if torch.cuda.is_available(): args['device'] = torch.device('cuda:0') else: args['device'] = torch.device('cpu') if args['task_names'] is not None: args['task_names'] = args['task_names'].split(',') args['node_featurizer'] = CanonicalAtomFeaturizer() df = pd.read_csv(args['csv_path']) mkdir_p(args['result_path']) dataset = MoleculeCSVDataset(df=df, smiles_to_graph=smiles_to_bigraph, node_featurizer=args['node_featurizer'], edge_featurizer=None, smiles_column=args['smiles_column'], cache_file_path=args['result_path'] + '/graph.bin', task_names=args['task_names']) args['n_tasks'] = dataset.n_tasks train_set, val_set, test_set = split_dataset(args, dataset) exp_config = get_configure(args) if exp_config is None: print('Start hyperparameter search with Bayesian optimization') bayesian_optimization(args, train_set, val_set, test_set) else: print('Use the best hyperparameters found before') args.update(exp_config) main(args, train_set, val_set, test_set)
args['node_featurizer'] = CanonicalAtomFeaturizer() df = pd.read_csv(args['csv_path']) mkdir_p(args['result_path']) dataset = MoleculeCSVDataset(df=df, smiles_to_graph=smiles_to_bigraph, node_featurizer=args['node_featurizer'], edge_featurizer=None, smiles_column=args['smiles_column'], cache_file_path=args['result_path'] + '/graph.bin', task_names=args['task_names']) args['n_tasks'] = dataset.n_tasks train_set, val_set, test_set = split_dataset(args, dataset) if args['num_evals'] is not None: assert args['num_evals'] > 0, 'Expect the number of hyperparameter search trials to ' \ 'be greater than 0, got {:d}'.format(args['num_evals']) print('Start hyperparameter search with Bayesian ' 'optimization for {:d} trials'.format(args['num_evals'])) trial_path = bayesian_optimization(args, train_set, val_set, test_set) else: print('Use the manually specified hyperparameters') exp_config = get_configure(args['model']) main(args, exp_config, train_set, val_set, test_set) trial_path = args['result_path'] + '/1' # Copy final copyfile(trial_path + '/model.pth', args['result_path'] + '/model.pth') copyfile(trial_path + '/configure.json', args['result_path'] + '/configure.json') copyfile(trial_path + '/eval.txt', args['result_path'] + '/eval.txt')
else: args['device'] = torch.device('cpu') args = init_featurizer(args) mkdir_p(args['result_path']) if args['dataset'] == 'FreeSolv': from dgllife.data import FreeSolv dataset = FreeSolv(smiles_to_graph=partial(smiles_to_bigraph, add_self_loop=True), node_featurizer=args['node_featurizer'], edge_featurizer=args['edge_featurizer'], n_jobs=1 if args['num_workers'] == 0 else args['num_workers']) elif args['dataset'] == 'Lipophilicity': from dgllife.data import Lipophilicity dataset = Lipophilicity(smiles_to_graph=partial(smiles_to_bigraph, add_self_loop=True), node_featurizer=args['node_featurizer'], edge_featurizer=args['edge_featurizer'], n_jobs=1 if args['num_workers'] == 0 else args['num_workers']) elif args['dataset'] == 'ESOL': from dgllife.data import ESOL dataset = ESOL(smiles_to_graph=partial(smiles_to_bigraph, add_self_loop=True), node_featurizer=args['node_featurizer'], edge_featurizer=args['edge_featurizer'], n_jobs=1 if args['num_workers'] == 0 else args['num_workers']) else: raise ValueError('Unexpected dataset: {}'.format(args['dataset'])) args['n_tasks'] = dataset.n_tasks train_set, val_set, test_set = split_dataset(args, dataset) exp_config = get_configure(args['model'], args['featurizer_type'], args['dataset']) main(args, exp_config, train_set, val_set, test_set)