def LoadData(DATASET_NAME): """ This function is called in the main.py file returns: ; dataset object """ # handling for MNIST or CIFAR Superpixels if DATASET_NAME == 'MNIST' or DATASET_NAME == 'CIFAR10': return SuperPixDataset(DATASET_NAME) # handling for (ZINC) molecule dataset if DATASET_NAME == 'ZINC': return MoleculeDataset(DATASET_NAME) # handling for the TU Datasets TU_DATASETS = [ 'BZR', 'COX2', 'DHFR', 'ENZYMES', 'PROTEINS_full', 'Synthie' ] if DATASET_NAME in TU_DATASETS: return TUsDataset(DATASET_NAME) # handling for SBM datasets SBM_DATASETS = ['SBM_CLUSTER', 'SBM_PATTERN'] if DATASET_NAME in SBM_DATASETS: return SBMsDataset(DATASET_NAME) # handling for TSP dataset if DATASET_NAME == 'TSP': return TSPDataset(DATASET_NAME)
def LoadData(DATASET_NAME): """ This function is called in the main.py file returns: ; dataset object """ # handling for MNIST or CIFAR Superpixels if DATASET_NAME == 'MNIST' or DATASET_NAME == 'CIFAR10': return SuperPixDataset(DATASET_NAME) # handling for (ZINC) molecule dataset if DATASET_NAME == 'ZINC' or DATASET_NAME == 'ZINC-full': return MoleculeDataset(DATASET_NAME) # handling for the TU Datasets TU_DATASETS = ['ENZYMES', 'DD', 'PROTEINS_full'] if DATASET_NAME in TU_DATASETS: return TUsDataset(DATASET_NAME) # handling for SBM datasets SBM_DATASETS = ['SBM_CLUSTER', 'SBM_PATTERN'] if DATASET_NAME in SBM_DATASETS: return SBMsDataset(DATASET_NAME) # handling for TSP dataset if DATASET_NAME == 'TSP': return TSPDataset(DATASET_NAME) # handling for COLLAB dataset if DATASET_NAME == 'OGBL-COLLAB': return COLLABDataset(DATASET_NAME) # handling for the CSL (Circular Skip Links) Dataset if DATASET_NAME == 'CSL': return CSLDataset(DATASET_NAME)
def LoadData(DATASET_NAME): """ This function is called in the main.py file returns: ; dataset object """ # handling for MNIST or CIFAR Superpixels if DATASET_NAME == 'MNIST' or DATASET_NAME == 'CIFAR10': return SuperPixDataset(DATASET_NAME) # handling for (ZINC) experiments dataset if DATASET_NAME == 'ZINC': return MoleculeDataset(DATASET_NAME) # handling for the TU Datasets TU_DATASETS = ['COLLAB', 'ENZYMES', 'DD', 'PROTEINS_full'] if DATASET_NAME in TU_DATASETS: return TUsDataset(DATASET_NAME) # handling for SBM datasets SBM_DATASETS = ['SBM_CLUSTER', 'SBM_PATTERN'] if DATASET_NAME in SBM_DATASETS: return SBMsDataset(DATASET_NAME) # handling for TSP dataset if DATASET_NAME == 'TSP': return TSPDataset(DATASET_NAME) # handling for the CITATIONGRAPHS Datasets CITATIONGRAPHS_DATASETS = ['CORA', 'CITESEER', 'PUBMED'] if DATASET_NAME in CITATIONGRAPHS_DATASETS: return CitationGraphsDataset(DATASET_NAME)
def get_multiplicity(DATASET_NAME, first, second, tol, dim, norm, tol_scipy): if DATASET_NAME == 'ZINC': dataset = MoleculeDataset(DATASET_NAME) elif DATASET_NAME == 'SBM_PATTERN': dataset = SBMsDataset(DATASET_NAME) elif DATASET_NAME == 'CIFAR10': dataset = SuperPixDataset(DATASET_NAME) elif DATASET_NAME == 'COLLAB': dataset = COLLABDataset(DATASET_NAME) if DATASET_NAME == 'COLLAB': pass else: train_graphs = dataset.train.graph_lists val_graphs = dataset.val.graph_lists test_graphs = dataset.test.graph_lists train_eigs = [ get_eig_val(g, pos_enc_dim=dim, norm=norm, tol=tol_scipy) for g in train_graphs ] val_eigs = [ get_eig_val(g, pos_enc_dim=dim, norm=norm, tol=tol_scipy) for g in val_graphs ] test_eigs = [ get_eig_val(g, pos_enc_dim=dim, norm=norm, tol=tol_scipy) for g in test_graphs ] eigs = train_eigs + val_eigs + test_eigs i = 0 n = len(eigs) for eig in eigs: if abs(eig[first] - eig[second]) > tol: i += 1 return i / n, i, n
def LoadData(DATASET_NAME): """ This function is called in the main.py file returns: ; dataset object """ # handling for MNIST or CIFAR Superpixels # if DATASET_NAME == 'MNIST' or DATASET_NAME == 'CIFAR10': # return SuperPixDataset(DATASET_NAME) # handling for (ZINC) molecule dataset return MoleculeDataset(DATASET_NAME)
def LoadData(DATASET_NAME): """ This function is called in the main.py file returns: ; dataset object """ # handling for (ZINC) molecule dataset if DATASET_NAME == 'ZINC' or DATASET_NAME == 'ZINC-full': return MoleculeDataset(DATASET_NAME) # handling for SBM datasets SBM_DATASETS = ['SBM_CLUSTER', 'SBM_PATTERN'] if DATASET_NAME in SBM_DATASETS: return SBMsDataset(DATASET_NAME)
def load_data(args): if args.data in ['ZINC']: return MoleculeDataset(args.data) elif args.data in ['QM9']: return QM9Dataset(args.data, args.extra) elif args.data in ['TSP']: return TSPDataset(args.data) elif args.data in ['MNIST', 'CIFAR10']: return SuperPixDataset(args.data) elif args.data in ['SBM_CLUSTER', 'SBM_PATTERN']: return SBMsDataset(args.data) elif args.data in ['Cora']: return CoraDataset(args.data) else: raise Exception('Unknown dataset!')
def LoadData(DATASET_NAME): """ This function is called in the main.py file returns: ; dataset object """ # handling for MNIST or CIFAR Superpixels if DATASET_NAME == 'MNIST' or DATASET_NAME == 'CIFAR10': return SuperPixDataset(DATASET_NAME) # handling for (ZINC) molecule dataset if DATASET_NAME == 'ZINC': return MoleculeDataset(DATASET_NAME) # handling for SBM datasets SBM_DATASETS = ['SBM_CLUSTER', 'SBM_PATTERN'] if DATASET_NAME in SBM_DATASETS: return SBMsDataset(DATASET_NAME) # handling for the SGS (Synthetic Graph Spectrum) Dataset SGS_DATASETS = ['SGS_HIGH_PASS', 'SGS_BAND_PASS', 'SGS_LOW_PASS'] if DATASET_NAME in SGS_DATASETS: return SGSDataset(DATASET_NAME)
def main(): """ USER CONTROLS """ parser = argparse.ArgumentParser() parser.add_argument( '--config', help= "Please give a config.json file with training/model/data/param details" ) parser.add_argument('--gpu_id', help="Please give a value for gpu id") parser.add_argument('--model', help="Please give a value for model name") parser.add_argument('--dataset', help="Please give a value for dataset name") parser.add_argument('--out_dir', help="Please give a value for out_dir") parser.add_argument('--seed', help="Please give a value for seed") parser.add_argument('--epochs', help="Please give a value for epochs") parser.add_argument('--batch_size', help="Please give a value for batch_size") parser.add_argument('--init_lr', help="Please give a value for init_lr") parser.add_argument('--lr_reduce_factor', help="Please give a value for lr_reduce_factor") parser.add_argument('--lr_schedule_patience', help="Please give a value for lr_schedule_patience") parser.add_argument('--min_lr', help="Please give a value for min_lr") parser.add_argument('--weight_decay', help="Please give a value for weight_decay") parser.add_argument('--print_epoch_interval', help="Please give a value for print_epoch_interval") parser.add_argument('--L', help="Please give a value for L") parser.add_argument('--hidden_dim', help="Please give a value for hidden_dim") parser.add_argument('--out_dim', help="Please give a value for out_dim") parser.add_argument('--residual', help="Please give a value for residual") parser.add_argument('--JK', default='last', help='Jumping Knowledge') parser.add_argument('--edge_feat', help="Please give a value for edge_feat") parser.add_argument('--readout', help="Please give a value for readout") parser.add_argument('--kernel', help="Please give a value for kernel") parser.add_argument('--n_heads', help="Please give a value for n_heads") parser.add_argument('--gated', help="Please give a value for gated") parser.add_argument('--in_feat_dropout', help="Please give a value for in_feat_dropout") parser.add_argument('--dropout', help="Please give a value for dropout") parser.add_argument('--graph_norm', help="Please give a value for graph_norm") parser.add_argument('--batch_norm', help="Please give a value for batch_norm") parser.add_argument('--sage_aggregator', help="Please give a value for sage_aggregator") parser.add_argument('--data_mode', help="Please give a value for data_mode") parser.add_argument('--num_pool', help="Please give a value for num_pool") parser.add_argument('--gnn_per_block', help="Please give a value for gnn_per_block") parser.add_argument('--embedding_dim', help="Please give a value for embedding_dim") parser.add_argument('--pool_ratio', help="Please give a value for pool_ratio") parser.add_argument('--linkpred', help="Please give a value for linkpred") parser.add_argument('--cat', help="Please give a value for cat") parser.add_argument('--self_loop', help="Please give a value for self_loop") parser.add_argument('--max_time', help="Please give a value for max_time") parser.add_argument('--expid', help='Experiment id.') parser.add_argument('--type_net', default='simple', help='Type of net') parser.add_argument('--lap_norm', default='none', help='Laplacian normalisation') parser.add_argument('--flip', action='store_true', default=False, help='Flip eig each epoch') # hydra params parser.add_argument('--hydra', action='store_true', default=False, help='Run in Hydra environment.') parser.add_argument('--hydra_checkpoint_every', type=int, default=100, help='Save checkpoints to hydra every.') parser.add_argument('--hydra_eta_every', type=int, default=100, help='Update ETA to hydra every.') parser.add_argument('--hydra_progress_bar_every', type=float, default=1, help='Update progress hydra every (seconds).') # eig params parser.add_argument('--aggregators', type=str, help='Aggregators to use.') parser.add_argument('--scalers', type=str, help='Scalers to use.') parser.add_argument('--NN_eig', action='store_true', default=False, help='NN eig aggr.') parser.add_argument('--towers', type=int, default=5, help='Towers to use.') parser.add_argument('--divide_input_first', type=bool, help='Whether to divide the input in first layer.') parser.add_argument('--divide_input_last', type=bool, help='Whether to divide the input in last layers.') parser.add_argument('--gru', type=bool, help='Whether to use gru.') parser.add_argument('--edge_dim', type=int, help='Size of edge embeddings.') parser.add_argument('--pretrans_layers', type=int, help='pretrans_layers.') parser.add_argument('--posttrans_layers', type=int, help='posttrans_layers.') parser.add_argument('--not_pre', action='store_true', default=False, help='Not applying pre-transformation') args = parser.parse_args() # hydra load if args.hydra: print('I am passing here 1') if not hydra.is_available(): print('hydra: not available') args.hydra = False with open(args.config) as f: config = json.load(f) # device if args.gpu_id is not None: config['gpu']['id'] = int(args.gpu_id) config['gpu']['use'] = True device = gpu_setup(config['gpu']['use'], config['gpu']['id'], verbose=hydra.is_first_execution()) # model, dataset, out_dir if args.model is not None: MODEL_NAME = args.model else: MODEL_NAME = config['model'] if args.dataset is not None: DATASET_NAME = args.dataset else: DATASET_NAME = config['dataset'] dataset = MoleculeDataset(DATASET_NAME, norm=args.lap_norm, verbose=hydra.is_first_execution()) if args.out_dir is not None: out_dir = args.out_dir else: out_dir = config['out_dir'] # parameters params = config['params'] if args.seed is not None: params['seed'] = int(args.seed) if args.epochs is not None: params['epochs'] = int(args.epochs) if args.batch_size is not None: params['batch_size'] = int(args.batch_size) if args.init_lr is not None: params['init_lr'] = float(args.init_lr) if args.lr_reduce_factor is not None: params['lr_reduce_factor'] = float(args.lr_reduce_factor) if args.lr_schedule_patience is not None: params['lr_schedule_patience'] = int(args.lr_schedule_patience) if args.min_lr is not None: params['min_lr'] = float(args.min_lr) if args.weight_decay is not None: params['weight_decay'] = float(args.weight_decay) if args.print_epoch_interval is not None: params['print_epoch_interval'] = int(args.print_epoch_interval) if args.max_time is not None: params['max_time'] = float(args.max_time) #hydra parameters params['hydra'] = args.hydra params['hydra_checkpoint_every'] = args.hydra_checkpoint_every params['hydra_eta_every'] = args.hydra_eta_every params['hydra_progress_bar_every'] = args.hydra_progress_bar_every # network parameters net_params = config['net_params'] net_params['device'] = device net_params['gpu_id'] = config['gpu']['id'] net_params['batch_size'] = params['batch_size'] if args.L is not None: net_params['L'] = int(args.L) if args.hidden_dim is not None: net_params['hidden_dim'] = int(args.hidden_dim) if args.out_dim is not None: net_params['out_dim'] = int(args.out_dim) if args.residual is not None: net_params['residual'] = True if args.residual == 'True' else False if args.JK is not None: net_params['JK'] = args.JK if args.edge_feat is not None: net_params['edge_feat'] = True if args.edge_feat == 'True' else False if args.readout is not None: net_params['readout'] = args.readout if args.kernel is not None: net_params['kernel'] = int(args.kernel) if args.n_heads is not None: net_params['n_heads'] = int(args.n_heads) if args.gated is not None: net_params['gated'] = True if args.gated == 'True' else False if args.in_feat_dropout is not None: net_params['in_feat_dropout'] = float(args.in_feat_dropout) if args.dropout is not None: net_params['dropout'] = float(args.dropout) if args.graph_norm is not None: net_params['graph_norm'] = True if args.graph_norm == 'True' else False if args.batch_norm is not None: net_params['batch_norm'] = True if args.batch_norm == 'True' else False if args.sage_aggregator is not None: net_params['sage_aggregator'] = args.sage_aggregator if args.data_mode is not None: net_params['data_mode'] = args.data_mode if args.num_pool is not None: net_params['num_pool'] = int(args.num_pool) if args.gnn_per_block is not None: net_params['gnn_per_block'] = int(args.gnn_per_block) if args.embedding_dim is not None: net_params['embedding_dim'] = int(args.embedding_dim) if args.pool_ratio is not None: net_params['pool_ratio'] = float(args.pool_ratio) if args.linkpred is not None: net_params['linkpred'] = True if args.linkpred == 'True' else False if args.cat is not None: net_params['cat'] = True if args.cat == 'True' else False if args.self_loop is not None: net_params['self_loop'] = True if args.self_loop == 'True' else False if args.aggregators is not None: net_params['aggregators'] = args.aggregators if args.scalers is not None: net_params['scalers'] = args.scalers if args.towers is not None: net_params['towers'] = args.towers if args.divide_input_first is not None: net_params['divide_input_first'] = args.divide_input_first if args.divide_input_last is not None: net_params['divide_input_last'] = args.divide_input_last if args.NN_eig is not None: net_params['NN_eig'] = args.NN_eig if args.gru is not None: net_params['gru'] = args.gru if args.edge_dim is not None: net_params['edge_dim'] = args.edge_dim if args.pretrans_layers is not None: net_params['pretrans_layers'] = args.pretrans_layers if args.posttrans_layers is not None: net_params['posttrans_layers'] = args.posttrans_layers if args.not_pre is not None: net_params['not_pre'] = args.not_pre if args.type_net is not None: net_params['type_net'] = args.type_net if args.flip is not None: net_params['flip'] = args.flip # ZINC net_params['num_atom_type'] = dataset.num_atom_type net_params['num_bond_type'] = dataset.num_bond_type D = torch.cat([ torch.sparse.sum(g.adjacency_matrix(transpose=True), dim=-1).to_dense() for g in dataset.train.graph_lists ]) net_params['avg_d'] = dict(lin=torch.mean(D), exp=torch.mean(torch.exp(torch.div(1, D)) - 1), log=torch.mean(torch.log(D + 1))) MODEL_NAME = 'EIG' root_log_dir = out_dir + 'logs/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str( config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y') root_ckpt_dir = out_dir + 'checkpoints/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str( config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y') write_file_name = out_dir + 'results/result_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str( config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y') write_config_file = out_dir + 'configs/config_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str( config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y') dirs = root_log_dir, root_ckpt_dir, write_file_name, write_config_file if not os.path.exists(out_dir + 'results'): os.makedirs(out_dir + 'results') if not os.path.exists(out_dir + 'configs'): os.makedirs(out_dir + 'configs') net_params['total_param'] = view_model_param( net_params, verbose=hydra.is_first_execution()) train_val_pipeline(dataset, params, net_params, dirs)
def main(): """ USER CONTROLS """ parser = argparse.ArgumentParser() parser.add_argument( '--config', help= "Please give a config.json file with training/model/data/param details" ) parser.add_argument('--gpu_id', help="Please give a value for gpu id") parser.add_argument('--model', help="Please give a value for model name") parser.add_argument('--dataset', help="Please give a value for dataset name") parser.add_argument('--out_dir', help="Please give a value for out_dir") parser.add_argument('--seed', help="Please give a value for seed") parser.add_argument('--epochs', help="Please give a value for epochs") parser.add_argument('--batch_size', help="Please give a value for batch_size") parser.add_argument('--init_lr', help="Please give a value for init_lr") parser.add_argument('--lr_reduce_factor', help="Please give a value for lr_reduce_factor") parser.add_argument('--lr_schedule_patience', help="Please give a value for lr_schedule_patience") parser.add_argument('--min_lr', help="Please give a value for min_lr") parser.add_argument('--weight_decay', help="Please give a value for weight_decay") parser.add_argument('--print_epoch_interval', help="Please give a value for print_epoch_interval") parser.add_argument('--L', help="Please give a value for L") parser.add_argument('--hidden_dim', help="Please give a value for hidden_dim") parser.add_argument('--out_dim', help="Please give a value for out_dim") parser.add_argument('--residual', help="Please give a value for residual") parser.add_argument('--edge_feat', help="Please give a value for edge_feat") parser.add_argument('--readout', help="Please give a value for readout") parser.add_argument('--kernel', help="Please give a value for kernel") parser.add_argument('--n_heads', help="Please give a value for n_heads") parser.add_argument('--gated', help="Please give a value for gated") parser.add_argument('--in_feat_dropout', help="Please give a value for in_feat_dropout") parser.add_argument('--dropout', help="Please give a value for dropout") parser.add_argument('--graph_norm', help="Please give a value for graph_norm") parser.add_argument('--batch_norm', help="Please give a value for batch_norm") parser.add_argument('--sage_aggregator', help="Please give a value for sage_aggregator") parser.add_argument('--data_mode', help="Please give a value for data_mode") parser.add_argument('--num_pool', help="Please give a value for num_pool") parser.add_argument('--gnn_per_block', help="Please give a value for gnn_per_block") parser.add_argument('--embedding_dim', help="Please give a value for embedding_dim") parser.add_argument('--pool_ratio', help="Please give a value for pool_ratio") parser.add_argument('--linkpred', help="Please give a value for linkpred") parser.add_argument('--cat', help="Please give a value for cat") parser.add_argument('--self_loop', help="Please give a value for self_loop") parser.add_argument('--max_time', help="Please give a value for max_time") parser.add_argument('--expid', help='Experiment id.') # pna params parser.add_argument('--aggregators', type=str, help='Aggregators to use.') parser.add_argument('--scalers', type=str, help='Scalers to use.') parser.add_argument('--towers', type=int, help='Towers to use.') parser.add_argument('--divide_input_first', type=str, help='Whether to divide the input in first layers.') parser.add_argument('--divide_input_last', type=str, help='Whether to divide the input in last layer.') parser.add_argument('--gru', type=str, help='Whether to use gru.') parser.add_argument('--edge_dim', type=int, help='Size of edge embeddings.') parser.add_argument('--pretrans_layers', type=int, help='pretrans_layers.') parser.add_argument('--posttrans_layers', type=int, help='posttrans_layers.') args = parser.parse_args() with open(args.config) as f: config = json.load(f) # device if args.gpu_id is not None: config['gpu']['id'] = int(args.gpu_id) config['gpu']['use'] = True device = gpu_setup(config['gpu']['use'], config['gpu']['id']) # dataset, out_dir if args.dataset is not None: DATASET_NAME = args.dataset else: DATASET_NAME = config['dataset'] dataset = MoleculeDataset(DATASET_NAME) if args.out_dir is not None: out_dir = args.out_dir else: out_dir = config['out_dir'] # parameters params = config['params'] if args.seed is not None: params['seed'] = int(args.seed) if args.epochs is not None: params['epochs'] = int(args.epochs) if args.batch_size is not None: params['batch_size'] = int(args.batch_size) if args.init_lr is not None: params['init_lr'] = float(args.init_lr) if args.lr_reduce_factor is not None: params['lr_reduce_factor'] = float(args.lr_reduce_factor) if args.lr_schedule_patience is not None: params['lr_schedule_patience'] = int(args.lr_schedule_patience) if args.min_lr is not None: params['min_lr'] = float(args.min_lr) if args.weight_decay is not None: params['weight_decay'] = float(args.weight_decay) if args.print_epoch_interval is not None: params['print_epoch_interval'] = int(args.print_epoch_interval) if args.max_time is not None: params['max_time'] = float(args.max_time) # network parameters net_params = config['net_params'] net_params['device'] = device net_params['gpu_id'] = config['gpu']['id'] net_params['batch_size'] = params['batch_size'] if args.L is not None: net_params['L'] = int(args.L) if args.hidden_dim is not None: net_params['hidden_dim'] = int(args.hidden_dim) if args.out_dim is not None: net_params['out_dim'] = int(args.out_dim) if args.residual is not None: net_params['residual'] = True if args.residual == 'True' else False if args.edge_feat is not None: net_params['edge_feat'] = True if args.edge_feat == 'True' else False if args.readout is not None: net_params['readout'] = args.readout if args.kernel is not None: net_params['kernel'] = int(args.kernel) if args.n_heads is not None: net_params['n_heads'] = int(args.n_heads) if args.gated is not None: net_params['gated'] = True if args.gated == 'True' else False if args.in_feat_dropout is not None: net_params['in_feat_dropout'] = float(args.in_feat_dropout) if args.dropout is not None: net_params['dropout'] = float(args.dropout) if args.graph_norm is not None: net_params['graph_norm'] = True if args.graph_norm == 'True' else False if args.batch_norm is not None: net_params['batch_norm'] = True if args.batch_norm == 'True' else False if args.sage_aggregator is not None: net_params['sage_aggregator'] = args.sage_aggregator if args.data_mode is not None: net_params['data_mode'] = args.data_mode if args.num_pool is not None: net_params['num_pool'] = int(args.num_pool) if args.gnn_per_block is not None: net_params['gnn_per_block'] = int(args.gnn_per_block) if args.embedding_dim is not None: net_params['embedding_dim'] = int(args.embedding_dim) if args.pool_ratio is not None: net_params['pool_ratio'] = float(args.pool_ratio) if args.linkpred is not None: net_params['linkpred'] = True if args.linkpred == 'True' else False if args.cat is not None: net_params['cat'] = True if args.cat == 'True' else False if args.self_loop is not None: net_params['self_loop'] = True if args.self_loop == 'True' else False if args.aggregators is not None: net_params['aggregators'] = args.aggregators if args.scalers is not None: net_params['scalers'] = args.scalers if args.towers is not None: net_params['towers'] = args.towers if args.divide_input_first is not None: net_params[ 'divide_input_first'] = True if args.divide_input_first == 'True' else False if args.divide_input_last is not None: net_params[ 'divide_input_last'] = True if args.divide_input_last == 'True' else False if args.gru is not None: net_params['gru'] = True if args.gru == 'True' else False if args.edge_dim is not None: net_params['edge_dim'] = args.edge_dim if args.pretrans_layers is not None: net_params['pretrans_layers'] = args.pretrans_layers if args.posttrans_layers is not None: net_params['posttrans_layers'] = args.posttrans_layers # ZINC net_params['num_atom_type'] = dataset.num_atom_type net_params['num_bond_type'] = dataset.num_bond_type MODEL_NAME = 'PNA' D = torch.cat([ torch.sparse.sum(g.adjacency_matrix(transpose=True), dim=-1).to_dense() for g in dataset.train.graph_lists ]) net_params['avg_d'] = dict(lin=torch.mean(D), exp=torch.mean(torch.exp(torch.div(1, D)) - 1), log=torch.mean(torch.log(D + 1))) root_log_dir = out_dir + 'logs/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str( config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y') root_ckpt_dir = out_dir + 'checkpoints/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str( config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y') write_file_name = out_dir + 'results/result_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str( config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y') write_config_file = out_dir + 'configs/config_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str( config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y') dirs = root_log_dir, root_ckpt_dir, write_file_name, write_config_file if not os.path.exists(out_dir + 'results'): os.makedirs(out_dir + 'results') if not os.path.exists(out_dir + 'configs'): os.makedirs(out_dir + 'configs') net_params['total_param'] = view_model_param(net_params) train_val_pipeline(dataset, params, net_params, dirs)
def main(): """ USER CONTROLS """ parser = argparse.ArgumentParser() parser.add_argument( '--config', help= "Please give a config.json file with training/model/data/param details" ) parser.add_argument('--gpu_id', help="Please give a value for gpu id") parser.add_argument('--dataset', help="Please give a value for dataset name") parser.add_argument('--seed', help="Please give a value for seed") parser.add_argument('--epochs', help="Please give a value for epochs") parser.add_argument('--batch_size', help="Please give a value for batch_size") parser.add_argument('--init_lr', help="Please give a value for init_lr") parser.add_argument('--lr_reduce_factor', help="Please give a value for lr_reduce_factor") parser.add_argument('--lr_schedule_patience', help="Please give a value for lr_schedule_patience") parser.add_argument('--min_lr', help="Please give a value for min_lr") parser.add_argument('--weight_decay', help="Please give a value for weight_decay") parser.add_argument('--print_epoch_interval', help="Please give a value for print_epoch_interval") parser.add_argument('--L', help="Please give a value for L") parser.add_argument('--hidden_dim', help="Please give a value for hidden_dim") parser.add_argument('--out_dim', help="Please give a value for out_dim") parser.add_argument('--residual', help="Please give a value for residual") parser.add_argument('--edge_feat', help="Please give a value for edge_feat") parser.add_argument('--readout', help="Please give a value for readout") parser.add_argument('--in_feat_dropout', help="Please give a value for in_feat_dropout") parser.add_argument('--dropout', help="Please give a value for dropout") parser.add_argument('--graph_norm', help="Please give a value for graph_norm") parser.add_argument('--batch_norm', help="Please give a value for batch_norm") parser.add_argument('--max_time', help="Please give a value for max_time") parser.add_argument('--expid', help='Experiment id.') parser.add_argument('--type_net', default='simple', help='Type of net') parser.add_argument('--lap_norm', default='none', help='Laplacian normalisation') parser.add_argument('--flip', action='store_true', default=False, help='Flip eig each epoch') # dgn params parser.add_argument('--aggregators', type=str, help='Aggregators to use.') parser.add_argument('--scalers', type=str, help='Scalers to use.') parser.add_argument('--towers', type=int, default=5, help='Towers to use.') parser.add_argument('--divide_input_first', type=bool, help='Whether to divide the input in first layer.') parser.add_argument('--divide_input_last', type=bool, help='Whether to divide the input in last layers.') parser.add_argument('--edge_dim', type=int, help='Size of edge embeddings.') parser.add_argument('--pretrans_layers', type=int, help='pretrans_layers.') parser.add_argument('--posttrans_layers', type=int, help='posttrans_layers.') parser.add_argument('--pos_enc_dim', default=0, type=int, help='Positional encoding dimension') args = parser.parse_args() with open(args.config) as f: config = json.load(f) # device if args.gpu_id is not None: config['gpu']['id'] = int(args.gpu_id) config['gpu']['use'] = True device = gpu_setup(config['gpu']['use'], config['gpu']['id']) # dataset if args.dataset is not None: DATASET_NAME = args.dataset else: DATASET_NAME = config['dataset'] dataset = MoleculeDataset(DATASET_NAME, pos_enc_dim=int(args.pos_enc_dim), norm=args.lap_norm) # parameters params = config['params'] if args.seed is not None: params['seed'] = int(args.seed) if args.epochs is not None: params['epochs'] = int(args.epochs) if args.batch_size is not None: params['batch_size'] = int(args.batch_size) if args.init_lr is not None: params['init_lr'] = float(args.init_lr) if args.lr_reduce_factor is not None: params['lr_reduce_factor'] = float(args.lr_reduce_factor) if args.lr_schedule_patience is not None: params['lr_schedule_patience'] = int(args.lr_schedule_patience) if args.min_lr is not None: params['min_lr'] = float(args.min_lr) if args.weight_decay is not None: params['weight_decay'] = float(args.weight_decay) if args.print_epoch_interval is not None: params['print_epoch_interval'] = int(args.print_epoch_interval) if args.max_time is not None: params['max_time'] = float(args.max_time) # network parameters net_params = config['net_params'] net_params['device'] = device net_params['gpu_id'] = config['gpu']['id'] net_params['batch_size'] = params['batch_size'] if args.L is not None: net_params['L'] = int(args.L) if args.hidden_dim is not None: net_params['hidden_dim'] = int(args.hidden_dim) if args.out_dim is not None: net_params['out_dim'] = int(args.out_dim) if args.residual is not None: net_params['residual'] = True if args.residual == 'True' else False if args.edge_feat is not None: net_params['edge_feat'] = True if args.edge_feat == 'True' else False if args.readout is not None: net_params['readout'] = args.readout if args.in_feat_dropout is not None: net_params['in_feat_dropout'] = float(args.in_feat_dropout) if args.dropout is not None: net_params['dropout'] = float(args.dropout) if args.graph_norm is not None: net_params['graph_norm'] = True if args.graph_norm == 'True' else False if args.batch_norm is not None: net_params['batch_norm'] = True if args.batch_norm == 'True' else False if args.aggregators is not None: net_params['aggregators'] = args.aggregators if args.scalers is not None: net_params['scalers'] = args.scalers if args.towers is not None: net_params['towers'] = args.towers if args.divide_input_first is not None: net_params['divide_input_first'] = args.divide_input_first if args.divide_input_last is not None: net_params['divide_input_last'] = args.divide_input_last if args.edge_dim is not None: net_params['edge_dim'] = args.edge_dim if args.pretrans_layers is not None: net_params['pretrans_layers'] = args.pretrans_layers if args.posttrans_layers is not None: net_params['posttrans_layers'] = args.posttrans_layers if args.type_net is not None: net_params['type_net'] = args.type_net if args.flip is not None: net_params['flip'] = args.flip if args.pos_enc_dim is not None: net_params['pos_enc_dim'] = args.pos_enc_dim # ZINC net_params['num_atom_type'] = dataset.num_atom_type net_params['num_bond_type'] = dataset.num_bond_type # calculate logarithmic average degree for scalers D = torch.cat([ torch.sparse.sum(g.adjacency_matrix(transpose=True), dim=-1).to_dense() for g in dataset.train.graph_lists ]) net_params['avg_d'] = dict(lin=torch.mean(D), exp=torch.mean(torch.exp(torch.div(1, D)) - 1), log=torch.mean(torch.log(D + 1))) net_params['total_param'] = view_model_param(net_params) train_val_pipeline(dataset, params, net_params)
def main(): """ USER CONTROLS """ parser = argparse.ArgumentParser() parser.add_argument('--config', help="Please give a config.json file with training/model/data/param details") parser.add_argument('--gpu_id', help="Please give a value for gpu id") parser.add_argument('--model', help="Please give a value for model name") parser.add_argument('--dataset', help="Please give a value for dataset name") parser.add_argument('--out_dir', help="Please give a value for out_dir") parser.add_argument('--seed', help="Please give a value for seed") parser.add_argument('--epochs', help="Please give a value for epochs") parser.add_argument('--batch_size', help="Please give a value for batch_size") parser.add_argument('--init_lr', help="Please give a value for init_lr") parser.add_argument('--lr_reduce_factor', help="Please give a value for lr_reduce_factor") parser.add_argument('--lr_schedule_patience', help="Please give a value for lr_schedule_patience") parser.add_argument('--min_lr', help="Please give a value for min_lr") parser.add_argument('--weight_decay', help="Please give a value for weight_decay") parser.add_argument('--print_epoch_interval', help="Please give a value for print_epoch_interval") parser.add_argument('--L', help="Please give a value for L") parser.add_argument('--hidden_dim', help="Please give a value for hidden_dim") parser.add_argument('--out_dim', help="Please give a value for out_dim") parser.add_argument('--residual', help="Please give a value for residual") parser.add_argument('--JK', default='last', help='Jumping Knowledge') parser.add_argument('--edge_feat', help="Please give a value for edge_feat") parser.add_argument('--readout', help="Please give a value for readout") parser.add_argument('--kernel', help="Please give a value for kernel") parser.add_argument('--n_heads', help="Please give a value for n_heads") parser.add_argument('--gated', help="Please give a value for gated") parser.add_argument('--in_feat_dropout', help="Please give a value for in_feat_dropout") parser.add_argument('--dropout', help="Please give a value for dropout") parser.add_argument('--graph_norm', help="Please give a value for graph_norm") parser.add_argument('--batch_norm', help="Please give a value for batch_norm") parser.add_argument('--sage_aggregator', help="Please give a value for sage_aggregator") parser.add_argument('--data_mode', help="Please give a value for data_mode") parser.add_argument('--num_pool', help="Please give a value for num_pool") parser.add_argument('--gnn_per_block', help="Please give a value for gnn_per_block") parser.add_argument('--embedding_dim', help="Please give a value for embedding_dim") parser.add_argument('--pool_ratio', help="Please give a value for pool_ratio") parser.add_argument('--linkpred', help="Please give a value for linkpred") parser.add_argument('--cat', help="Please give a value for cat") parser.add_argument('--self_loop', help="Please give a value for self_loop") parser.add_argument('--max_time', help="Please give a value for max_time") parser.add_argument('--expid', help='Experiment id.') parser.add_argument('--type_net', default='simple', help='Type of net') parser.add_argument('--lap_norm', default='none', help='Laplacian normalisation') # eig params parser.add_argument('--aggregators', type=str, help='Aggregators to use.') parser.add_argument('--scalers', type=str, help='Scalers to use.') parser.add_argument('--towers', type=int, default=5, help='Towers to use.') parser.add_argument('--divide_input_first', type=bool, help='Whether to divide the input in first layer.') parser.add_argument('--divide_input_last', type=bool, help='Whether to divide the input in last layers.') parser.add_argument('--gru', type=bool, help='Whether to use gru.') parser.add_argument('--edge_dim', type=int, help='Size of edge embeddings.') parser.add_argument('--pretrans_layers', type=int, help='pretrans_layers.') parser.add_argument('--posttrans_layers', type=int, help='posttrans_layers.') parser.add_argument('--not_pre', action='store_true', default=False, help='Not applying pre-transformation') # structure aware gnn parser.add_argument('--features', type=str, help='Space separated list of node features.') parser.add_argument('--label', type=str, help='Single node label') parser.add_argument('--max_graphs', type=int, help='Maximum number of graphs considered') parser.add_argument('--embedding_size', type=int, help='Size of the graph embedding') parser.add_argument('--distance_function', type=str, help='Embeddings distance function') parser.add_argument('--normalization', type=str, help='Divide distances by max (Yes/No)') args = parser.parse_args() print(args.config) with open(args.config) as f: config = json.load(f) # device if args.gpu_id is not None: config['gpu']['id'] = int(args.gpu_id) config['gpu']['use'] = True device = gpu_setup(config['gpu']['use'], config['gpu']['id']) # dataset, out_dir if args.dataset is not None: DATASET_NAME = args.dataset else: DATASET_NAME = config['dataset'] print(DATASET_NAME) distances_normalization = True if args.normalization == 'Yes' else False dataset = MoleculeDataset(DATASET_NAME, [NODE_INFORMATION[feature] for feature in args.features.split()], NODE_INFORMATION[args.label], args.max_graphs, normalization=distances_normalization) if args.out_dir is not None: out_dir = args.out_dir else: out_dir = config['out_dir'] # parameters params = config['params'] if args.seed is not None: params['seed'] = int(args.seed) if args.epochs is not None: params['epochs'] = int(args.epochs) if args.batch_size is not None: params['batch_size'] = int(args.batch_size) if args.init_lr is not None: params['init_lr'] = float(args.init_lr) if args.lr_reduce_factor is not None: params['lr_reduce_factor'] = float(args.lr_reduce_factor) if args.lr_schedule_patience is not None: params['lr_schedule_patience'] = int(args.lr_schedule_patience) if args.min_lr is not None: params['min_lr'] = float(args.min_lr) if args.weight_decay is not None: params['weight_decay'] = float(args.weight_decay) if args.print_epoch_interval is not None: params['print_epoch_interval'] = int(args.print_epoch_interval) if args.max_time is not None: params['max_time'] = float(args.max_time) params['features'] = args.features params['label'] = args.label # network parameters net_params = config['net_params'] net_params['device'] = device net_params['gpu_id'] = config['gpu']['id'] net_params['batch_size'] = params['batch_size'] if args.L is not None: net_params['L'] = int(args.L) if args.hidden_dim is not None: net_params['hidden_dim'] = int(args.hidden_dim) if args.out_dim is not None: net_params['out_dim'] = int(args.out_dim) if args.residual is not None: net_params['residual'] = True if args.residual == 'True' else False if args.JK is not None: net_params['JK'] = args.JK if args.edge_feat is not None: net_params['edge_feat'] = True if args.edge_feat == 'True' else False if args.readout is not None: net_params['readout'] = args.readout if args.kernel is not None: net_params['kernel'] = int(args.kernel) if args.n_heads is not None: net_params['n_heads'] = int(args.n_heads) if args.gated is not None: net_params['gated'] = True if args.gated == 'True' else False if args.in_feat_dropout is not None: net_params['in_feat_dropout'] = float(args.in_feat_dropout) if args.dropout is not None: net_params['dropout'] = float(args.dropout) if args.graph_norm is not None: net_params['graph_norm'] = True if args.graph_norm == 'True' else False if args.batch_norm is not None: net_params['batch_norm'] = True if args.batch_norm == 'True' else False if args.sage_aggregator is not None: net_params['sage_aggregator'] = args.sage_aggregator if args.data_mode is not None: net_params['data_mode'] = args.data_mode if args.num_pool is not None: net_params['num_pool'] = int(args.num_pool) if args.gnn_per_block is not None: net_params['gnn_per_block'] = int(args.gnn_per_block) if args.embedding_dim is not None: net_params['embedding_dim'] = int(args.embedding_dim) if args.pool_ratio is not None: net_params['pool_ratio'] = float(args.pool_ratio) if args.linkpred is not None: net_params['linkpred'] = True if args.linkpred == 'True' else False if args.cat is not None: net_params['cat'] = True if args.cat == 'True' else False if args.self_loop is not None: net_params['self_loop'] = True if args.self_loop == 'True' else False if args.aggregators is not None: net_params['aggregators'] = args.aggregators if args.scalers is not None: net_params['scalers'] = args.scalers if args.towers is not None: net_params['towers'] = args.towers if args.divide_input_first is not None: net_params['divide_input_first'] = args.divide_input_first if args.divide_input_last is not None: net_params['divide_input_last'] = args.divide_input_last if args.gru is not None: net_params['gru'] = args.gru if args.edge_dim is not None: net_params['edge_dim'] = args.edge_dim if args.pretrans_layers is not None: net_params['pretrans_layers'] = args.pretrans_layers if args.posttrans_layers is not None: net_params['posttrans_layers'] = args.posttrans_layers if args.not_pre is not None: net_params['not_pre'] = args.not_pre if args.type_net is not None: net_params['type_net'] = args.type_net if args.lap_norm is not None: net_params['lap_norm'] = args.lap_norm if args.embedding_size is not None: net_params['embedding_size'] = args.embedding_size if args.distance_function is not None: net_params['distance_function'] = args.distance_function g0, _ = dataset.train[0] net_params['num_feat'] = len(g0.ndata['feat'][0]) D = torch.cat([torch.sparse.sum(g.adjacency_matrix(transpose=True), dim=-1).to_dense() for g in dataset.train.graph_lists]) net_params['avg_d'] = dict(lin=torch.mean(D), exp=torch.mean(torch.exp(torch.div(1, D)) - 1), log=torch.mean(torch.log(D + 1))) MODEL_NAME='EIG' root_log_dir = out_dir + 'logs/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str( config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y') root_ckpt_dir = out_dir + 'checkpoints/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str( config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y') write_file_name = out_dir + 'results/result_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str( config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y') write_config_file = out_dir + 'configs/config_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str( config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y') dirs = root_log_dir, root_ckpt_dir, write_file_name, write_config_file if not os.path.exists(out_dir + 'results'): os.makedirs(out_dir + 'results') if not os.path.exists(out_dir + 'configs'): os.makedirs(out_dir + 'configs') net_params['total_param'] = view_model_param(net_params) train_val_pipeline(dataset, params, net_params, dirs)