def _check_configuration(self, dataset: BilinearDataset, model_params: LayeredBilinearModuleParams, activator_params: BilinearActivatorParams): model = LayeredBilinearModule(model_params) activator = BilinearActivator(model, activator_params, dataset) activator.train(show_plot=False) return activator.accuracy_train_vec, activator.auc_train_vec, activator.loss_train_vec, \ activator.accuracy_dev_vec, activator.auc_dev_vec, activator.loss_dev_vec, \ activator.accuracy_test_vec, activator.auc_test_vec, activator.loss_test_vec
def get_activator_protein(dev_split, test_split, topological_ftrs=True): data_name = "Protein" ext_train = ExternalData(ProteinAllExternalDataParams()) ds_params = ProteinDatasetAllParams() if not topological_ftrs: ds_params.FEATURES = [] ds = BilinearDataset(ds_params, external_data=ext_train) activator_params = ProteinBilinearActivatorParams() activator_params.TEST_SPLIT = test_split activator_params.DEV_SPLIT = dev_split module_params = ProteinLayeredBilinearModuleParams( ftr_len=ds.len_features, embed_vocab_dim=ext_train.len_embed()) return data_name, BilinearMultiClassActivator( LayeredBilinearModule(module_params), activator_params, ds)
def run_trial(params, dataset_param_class, module_param_class, activator_param_class, ext_data, is_multi_class): ds_params = dataset_param_class() ds_params.FEATURES = [globals()[ftr] for ftr in params['input_vec']] dataset = BilinearDataset(ds_params, external_data=ext_data) # model layers = [] if params['layers_config']["_name"] == "2_layers": layers.append([None, int(params['layers_config']["h1_dim"])]) layers.append([ int(params['layers_config']["h1_dim"]), int(params['layers_config']["h2_dim"]) ]) elif params['layers_config']["_name"] == "3_layers": layers.append([None, int(params['layers_config']["h1_dim"])]) layers.append([ int(params['layers_config']["h1_dim"]), int(params['layers_config']["h2_dim"]) ]) layers.append([ int(params['layers_config']["h2_dim"]), int(params['layers_config']["h3_dim"]) ]) model_params = module_param_class(ftr_len=dataset.len_features, layer_dim=layers, embed_vocab_dim=ext_data.len_embed()) model_params.DROPOUT = params['dropout'] model_params.WEIGHT_DECAY = params['regularization'] model_params.LR = params['learning_rate'] model_params.OPTIMIZER = globals()[params['optimizer']] # activator activator_params = activator_param_class() activator_params.BATCH_SIZE = params['batch_size'] activator_params.EPOCHS = params['epochs'] model = LayeredBilinearModule(model_params) activator = BilinearMultiClassActivator(model, activator_params, dataset, nni=True) if is_multi_class else \ BilinearActivator(model, activator_params, dataset, nni=True) activator.train(show_plot=False, early_stop=True)
self.NUM_LAYERS = 3 self.LINEAR_PARAMS_LIST = [ RefaelLinearLayerParams(in_dim=ftr_len, out_dim=50, dropout=self.DROPOUT), RefaelLinearLayerParams(in_dim=50, out_dim=100, dropout=self.DROPOUT), RefaelLinearLayerParams(in_dim=100, out_dim=4, dropout=self.DROPOUT), ] self.BILINEAR_PARAMS = RefaelBilinearLayerParams(self.LINEAR_PARAMS_LIST[self.NUM_LAYERS - 1].COL_DIM, self.LINEAR_PARAMS_LIST[0].ROW_DIM) class RefaelBilinearActivatorParams(BilinearActivatorParams): def __init__(self): super().__init__() self.DEV_SPLIT = 0.15 self.TEST_SPLIT = 0.15 # self.LOSS = functional.binary_cross_entropy_with_logits # f.factor_loss self.LOSS = functional.cross_entropy self.BATCH_SIZE = 16 self.EPOCHS = 100 if __name__ == '__main__': refael_train_ds = BilinearDataset(RefaelDatasetParams()) module = LayeredBilinearModule(RefaelLayeredBilinearModuleParams(ftr_len=refael_train_ds.len_features)) activator = BilinearMultiClassActivator(module, RefaelBilinearActivatorParams(), refael_train_ds) activator.train(show_plot=True)
self.LOSS = functional.binary_cross_entropy_with_logits # f.factor_loss # self.BATCH_SIZE = 128 self.EPOCHS = 250 self.DATASET = "Aids" if __name__ == '__main__': ALL = True if ALL == True: ext_train = ExternalData(AidsAllExternalDataParams()) aids_train_ds = BilinearDataset(AidsDatasetAllParams(), external_data=ext_train) activator = BilinearActivator( LayeredBilinearModule( AidsLayeredBilinearModuleParams( ftr_len=aids_train_ds.len_features, embed_vocab_dim=ext_train.len_embed())), AidsBilinearActivatorParams(), aids_train_ds) activator.train() if ALL == False: ext_train = ExternalData(AidsTrainExternalDataParams()) ext_dev = ExternalData(AidsDevExternalDataParams(), idx_to_symbol=ext_train.idx_to_symbol_dict) ext_test = ExternalData(AidsTestExternalDataParams(), idx_to_symbol=ext_train.idx_to_symbol_dict) aids_train_ds = BilinearDataset(AidsDatasetTrainParams(), external_data=ext_train) aids_dev_ds = BilinearDataset(AidsDatasetDevParams(), external_data=ext_dev)
from bilinear_model import LayeredBilinearModule from dataset.dataset_model import BilinearDataset from dataset.dataset_external_data import ExternalData from multi_class_bilinear_activator import BilinearMultiClassActivator t = time.time() ALL = True if ALL == True: ext_train = ExternalData(ProteinAllExternalDataParams()) aids_train_ds = BilinearDataset(ProteinDatasetAllParams(), external_data=ext_train) activator = BilinearMultiClassActivator( LayeredBilinearModule( ProteinLayeredBilinearModuleParams( ftr_len=aids_train_ds.len_features, embed_vocab_dim=ext_train.len_embed())), ProteinBilinearActivatorParams(), aids_train_ds) activator.train() if ALL == False: ext_train = ExternalData(ProteinTrainExternalDataParams()) ext_dev = ExternalData(ProteinDevExternalDataParams(), idx_to_symbol=ext_train.idx_to_symbol_dict) ext_test = ExternalData(ProteinTestExternalDataParams(), idx_to_symbol=ext_train.idx_to_symbol_dict) protein_train_ds = BilinearDataset(ProteinDatasetTrainParams(), external_data=ext_train) protein_dev_ds = BilinearDataset(ProteinDatasetDevParams(), external_data=ext_dev)
# calculate total loss loss_count += self._loss_func(output.squeeze(dim=1), l) true_labels += l.tolist() pred_labels += output.squeeze(dim=1).argmax(dim=1).tolist() pred_auc_labels += output.squeeze(dim=1).tolist() # update loss accuracy loss = float(loss_count / len(data_loader)) # pred_labels = [0 if np.isnan(i) else i for i in pred_labels] self._update_loss(loss, job=job) self._update_accuracy(pred_labels, true_labels, job=job) # self._update_auc(pred_auc_labels, true_labels, job=job) ##### TODO AUC check return loss if __name__ == '__main__': ds = BilinearDataset(YanivDatasetParams()) activator = BilinearActivator( LayeredBilinearModule( LayeredBilinearModuleParams(ftr_len=ds.len_features)), BilinearActivatorParams(), BilinearDataset(YanivDatasetParams())) # protein_train_ds = BilinearDataset(ProteinDatasetTrainParams()) # protein_dev_ds = BilinearDataset(ProteinDatasetDevParams()) # protein_test_ds = BilinearDataset(ProteinDatasetTestParams()) # _activator = BilinearActivator(LayeredBilinearModule(LayeredBilinearModuleParams( # ftr_len=protein_train_ds.len_features)), BilinearActivatorParams(), protein_train_ds, # dev_data=protein_dev_ds, test_data=protein_test_ds) activator.train()
dropout=self.DROPOUT) ] self.BILINEAR_PARAMS = CoilRagBilinearLayerParams( self.LINEAR_PARAMS_LIST[self.NUM_LAYERS - 1].COL_DIM, self.LINEAR_PARAMS_LIST[0].ROW_DIM) class CoilRagBilinearActivatorParams(BilinearActivatorParams): def __init__(self): super().__init__() self.DEV_SPLIT = 0.15 self.TEST_SPLIT = 0.15 self.LOSS = cross_entropy # f.factor_loss # self.BATCH_SIZE = 16 self.EPOCHS = 500 self.DATASET = "COIL_RAG - MultiClass" if __name__ == '__main__': ext_train = ExternalData(CoilRagAllExternalDataParams()) aids_train_ds = BilinearDataset(CoilRagDatasetAllParams(), external_data=ext_train) activator = BilinearMultiClassActivator( LayeredBilinearModule( CoilRagLayeredBilinearModuleParams( ftr_len=aids_train_ds.len_features, embed_vocab_dim=ext_train.len_embed())), CoilRagBilinearActivatorParams(), aids_train_ds) activator.train()
if self._gpu: A, x0, embed, l = A.cuda(), x0.cuda(), embed.cuda(), l.cuda() # print progress if not self._nni: self._print_progress(batch_index, len_data, job=VALIDATE_JOB) output = self._model(A, x0, embed) # calculate total loss loss_count += self._loss_func(output.squeeze(dim=1).squeeze(dim=1), l.float()) true_labels += l.tolist() pred += output.squeeze(dim=1).squeeze(dim=1).tolist() # update loss accuracy loss = float(loss_count / len(data_loader)) # pred_labels = [0 if np.isnan(i) else i for i in pred_labels] self._update_loss(loss, job=job) self._update_accuracy(pred, true_labels, job=job) self._update_auc(pred, true_labels, job=job) return loss if __name__ == '__main__': from params.aids_params import AidsDatasetTrainParams, AidsDatasetDevParams, AidsDatasetTestParams aids_train_ds = BilinearDataset(AidsDatasetTrainParams()) aids_dev_ds = BilinearDataset(AidsDatasetDevParams()) aids_test_ds = BilinearDataset(AidsDatasetTestParams()) activator = BilinearActivator( LayeredBilinearModule(LayeredBilinearModuleParams(ftr_len=aids_train_ds.len_features)), BilinearActivatorParams(), aids_train_ds, dev_data=aids_dev_ds, test_data=aids_test_ds) activator.train()
dropout=self.DROPOUT), YanivLinearLayerParams(in_dim=50, out_dim=10, dropout=self.DROPOUT), YanivLinearLayerParams(in_dim=50, out_dim=10, dropout=self.DROPOUT), YanivLinearLayerParams(in_dim=200, out_dim=1, dropout=self.DROPOUT) ] self.BILINEAR_PARAMS = YanivBilinearLayerParams( self.LINEAR_PARAMS_LIST[self.NUM_LAYERS - 1].COL_DIM, self.LINEAR_PARAMS_LIST[0].ROW_DIM) class YanivBilinearActivatorParams(BilinearActivatorParams): def __init__(self): super().__init__() self.DEV_SPLIT = 0.15 self.TEST_SPLIT = 0.15 self.LOSS = functional.binary_cross_entropy_with_logits # f.factor_loss # self.BATCH_SIZE = 16 self.EPOCHS = 100 if __name__ == '__main__': refael_train_ds = BilinearDataset(YanivDatasetParams()) activator = BilinearActivator( LayeredBilinearModule( YanivLayeredBilinearModuleParams( ftr_len=refael_train_ds.len_features)), YanivBilinearActivatorParams(), refael_train_ds) activator.train()