def get_activator_web(dev_split, test_split): data_name = "Web" ext_train = ExternalData(WebAllExternalDataParams()) ds = BilinearDataset(WebDatasetAllParams(), external_data=ext_train) activator_params = WebBilinearActivatorParams() activator_params.TEST_SPLIT = test_split activator_params.DEV_SPLIT = dev_split module_params = WebLayeredBilinearModuleParams( ftr_len=ds.len_features, embed_vocab_dim=ext_train.len_embed()) return data_name, BilinearMultiClassActivator( LayeredBilinearModuleGPU(module_params), activator_params, ds)
def get_activator_protein(dev_split, test_split, topological_ftrs=True): data_name = "Protein" ext_train = ExternalData(ProteinAllExternalDataParams()) ds_params = ProteinDatasetAllParams() if not topological_ftrs: ds_params.FEATURES = [] ds = BilinearDataset(ds_params, external_data=ext_train) activator_params = ProteinBilinearActivatorParams() activator_params.TEST_SPLIT = test_split activator_params.DEV_SPLIT = dev_split module_params = ProteinLayeredBilinearModuleParams( ftr_len=ds.len_features, embed_vocab_dim=ext_train.len_embed()) return data_name, BilinearMultiClassActivator( LayeredBilinearModule(module_params), activator_params, ds)
def get_params_by_dataset(data): dict_classes = { "AIDS": [ AidsDatasetAllParams, AidsLayeredBilinearModuleParams, AidsBilinearActivatorParams, ExternalData(AidsAllExternalDataParams()), False ], "PROTEIN": [ ProteinDatasetAllParams, ProteinLayeredBilinearModuleParams, ProteinBilinearActivatorParams, ExternalData(ProteinAllExternalDataParams()), True ], "MUTAGEN": [ MutagenDatasetAllParams, MutagenLayeredBilinearModuleParams, MutagenBilinearActivatorParams, ExternalData(MutagenAllExternalDataParams()), False ], "GREC": [ GrecDatasetAllParams, GrecLayeredBilinearModuleParams, GrecBilinearActivatorParams, ExternalData(GrecAllExternalDataParams()), True ] } return dict_classes[data]
class AidsBilinearActivatorParams(BilinearActivatorParams): def __init__(self): super().__init__() self.DEV_SPLIT = 0.1153 self.TEST_SPLIT = 0.538 self.LOSS = functional.binary_cross_entropy_with_logits # f.factor_loss # self.BATCH_SIZE = 128 self.EPOCHS = 250 self.DATASET = "Aids" if __name__ == '__main__': ALL = True if ALL == True: ext_train = ExternalData(AidsAllExternalDataParams()) aids_train_ds = BilinearDataset(AidsDatasetAllParams(), external_data=ext_train) activator = BilinearActivator( LayeredBilinearModule( AidsLayeredBilinearModuleParams( ftr_len=aids_train_ds.len_features, embed_vocab_dim=ext_train.len_embed())), AidsBilinearActivatorParams(), aids_train_ds) activator.train() if ALL == False: ext_train = ExternalData(AidsTrainExternalDataParams()) ext_dev = ExternalData(AidsDevExternalDataParams(), idx_to_symbol=ext_train.idx_to_symbol_dict)
self.BATCH_SIZE = 32 self.EPOCHS = 300 self.DATASET = "Protein - MultiClass" if __name__ == '__main__': from bilinear_model import LayeredBilinearModule from dataset.dataset_model import BilinearDataset from dataset.dataset_external_data import ExternalData from multi_class_bilinear_activator import BilinearMultiClassActivator t = time.time() ALL = True if ALL == True: ext_train = ExternalData(ProteinAllExternalDataParams()) aids_train_ds = BilinearDataset(ProteinDatasetAllParams(), external_data=ext_train) activator = BilinearMultiClassActivator( LayeredBilinearModule( ProteinLayeredBilinearModuleParams( ftr_len=aids_train_ds.len_features, embed_vocab_dim=ext_train.len_embed())), ProteinBilinearActivatorParams(), aids_train_ds) activator.train() if ALL == False: ext_train = ExternalData(ProteinTrainExternalDataParams()) ext_dev = ExternalData(ProteinDevExternalDataParams(), idx_to_symbol=ext_train.idx_to_symbol_dict)
dropout=self.DROPOUT) ] self.BILINEAR_PARAMS = CoilRagBilinearLayerParams( self.LINEAR_PARAMS_LIST[self.NUM_LAYERS - 1].COL_DIM, self.LINEAR_PARAMS_LIST[0].ROW_DIM) class CoilRagBilinearActivatorParams(BilinearActivatorParams): def __init__(self): super().__init__() self.DEV_SPLIT = 0.15 self.TEST_SPLIT = 0.15 self.LOSS = cross_entropy # f.factor_loss # self.BATCH_SIZE = 16 self.EPOCHS = 500 self.DATASET = "COIL_RAG - MultiClass" if __name__ == '__main__': ext_train = ExternalData(CoilRagAllExternalDataParams()) aids_train_ds = BilinearDataset(CoilRagDatasetAllParams(), external_data=ext_train) activator = BilinearMultiClassActivator( LayeredBilinearModule( CoilRagLayeredBilinearModuleParams( ftr_len=aids_train_ds.len_features, embed_vocab_dim=ext_train.len_embed())), CoilRagBilinearActivatorParams(), aids_train_ds) activator.train()
x0 = torch.cat([x0] + list_embed, dim=2) x1 = x0 self._sync() for i in range(self._num_layers): x1 = self._linear_layers[i](A, x1) x2 = self._bilinear_layer(A, x0, x1) return x2 if __name__ == "__main__": from dataset.datset_sampler import ImbalancedDatasetSampler from params.aids_params import AidsAllExternalDataParams, AidsDatasetAllParams from dataset.dataset_external_data import ExternalData from dataset.dataset_model import BilinearDataset ext_train = ExternalData(AidsAllExternalDataParams()) ds = BilinearDataset(AidsDatasetAllParams(), external_data=ext_train) dl = DataLoader(dataset=ds, collate_fn=ds.collate_fn, batch_size=64, sampler=ImbalancedDatasetSampler(ds)) m_params = LayeredBilinearModuleParams( ftr_len=ds.len_features, embed_vocab_dim=ext_train.len_embed()) m_params.EMBED_DIMS = [20, 20] module = LayeredBilinearModule(m_params) # module = BilinearModule(BilinearModuleParams()) for i, (_A, _D, _x0, _l) in enumerate(dl): _x2 = module(_A, _D, _x0) e = 0
labels_batch.append(l) return Tensor(adjacency_batch), Tensor(x_batch), Tensor( embeddings_batch).long(), Tensor(labels_batch).long() def __getitem__(self, index): gnx_id = self._idx_to_name[index] A, x, embed, label = self._data[gnx_id] embed = 0 if embed is None else Tensor(embed).long() return Tensor(A), Tensor(x), embed, label def __len__(self): return len(self._idx_to_name) if __name__ == "__main__": from dataset.datset_sampler import ImbalancedDatasetSampler from params.aids_params import AidsAllExternalDataParams, AidsDatasetAllParams ext_train = ExternalData(AidsAllExternalDataParams()) ds = BilinearDataset(AidsDatasetAllParams(), external_data=ext_train) # ds = BilinearDataset(AidsDatasetTestParams()) dl = DataLoader(dataset=ds, collate_fn=ds.collate_fn, batch_size=64, sampler=ImbalancedDatasetSampler(ds)) p = [] for i, (A, x, e, l) in enumerate(dl): print(i, A, x, e, l) e = 0
config + ftrs res_list.append(config_line) with open(os.path.join("grid_results", file_name.strip(".txt") + "_analyzed.csv"), "wt") as f: writer = csv.writer(f) writer.writerows(res_list) if __name__ == "__main__": # n = int(sys.argv[1]) n = 0 if n == 0: GridSearch(AidsDatasetAllParams, AidsLayeredBilinearModuleParams, AidsBilinearActivatorParams, ExternalData(AidsAllExternalDataParams()), multi_class=False).go("_Aids") # # elif n == 1: # GridSearch(WebDatasetAllParams, WebLayeredBilinearModuleParams, WebBilinearActivatorParams, # ExternalData(WebAllExternalDataParams()), multi_class=True, # layers=[[[None, 50], [50, 25]], [[None, 200], [200, 100], [100, 50]]]).go("_Web") # # elif n == 2: # GridSearch(MutagenDatasetAllParams, MutagenLayeredBilinearModuleParams, MutagenBilinearActivatorParams, # ExternalData(MutagenAllExternalDataParams()), multi_class=False).go("_Mutagen") # # elif n == 3: # GridSearch(ProteinDatasetAllParams, ProteinLayeredBilinearModuleParams, ProteinBilinearActivatorParams, # ExternalData(ProteinAllExternalDataParams()), multi_class=True, # layers=[[[None, 25], [50, 25]], [[None, 100], [100, 50], [50, 25]]]).go("_Protein") #
with open( os.path.join("grid_results", file_name.strip(".txt") + "_analyzed.csv"), "wt") as f: writer = csv.writer(f) writer.writerows(res_list) if __name__ == "__main__": n = 8 # int(sys.argv[1]) if n == 0: GridSearch(AidsDatasetAllParams, AidsLayeredBilinearModuleParams, AidsBilinearActivatorParams, ExternalData(AidsAllExternalDataParams()), multi_class=False).go("_Aids") elif n == 1: GridSearch(WebDatasetAllParams, WebLayeredBilinearModuleParams, WebBilinearActivatorParams, ExternalData(WebAllExternalDataParams()), multi_class=True).go("_Web") elif n == 2: GridSearch(MutagenDatasetAllParams, MutagenLayeredBilinearModuleParams, MutagenBilinearActivatorParams, ExternalData(MutagenAllExternalDataParams()), multi_class=False).go("_Mutagen")
class AidsBilinearActivatorParams(BilinearActivatorParams): def __init__(self): super().__init__() self.DEV_SPLIT = 0.1153 self.TEST_SPLIT = 0.538 self.LOSS = functional.binary_cross_entropy_with_logits # f.factor_loss # self.BATCH_SIZE = 128 self.EPOCHS = 25 self.DATASET = "Aids" if __name__ == '__main__': ALL = True if ALL == True: ext_train = ExternalData(AidsAllExternalDataParams()) aids_train_ds = BilinearDataset(AidsDatasetAllParams(), external_data=ext_train) activator = BilinearActivator(LayeredBilinearModule(AidsLayeredBilinearModuleParams( ftr_len=aids_train_ds.len_features, embed_vocab_dim=ext_train.len_embed())), AidsBilinearActivatorParams(), aids_train_ds) activator.train() if ALL == False: ext_train = ExternalData(AidsTrainExternalDataParams()) ext_dev = ExternalData(AidsDevExternalDataParams(), idx_to_symbol=ext_train.idx_to_symbol_dict) ext_test = ExternalData(AidsTestExternalDataParams(), idx_to_symbol=ext_train.idx_to_symbol_dict) aids_train_ds = BilinearDataset(AidsDatasetTrainParams(), external_data=ext_train) aids_dev_ds = BilinearDataset(AidsDatasetDevParams(), external_data=ext_dev) aids_test_ds = BilinearDataset(AidsDatasetTestParams(), external_data=ext_test)