Esempio n. 1
0
    if torch.cuda.is_available():
        args['device'] = torch.device('cuda:0')
    else:
        args['device'] = torch.device('cpu')

    if args['task_names'] is not None:
        args['task_names'] = args['task_names'].split(',')

    args['node_featurizer'] = CanonicalAtomFeaturizer()
    df = pd.read_csv(args['csv_path'])
    mkdir_p(args['result_path'])
    dataset = MoleculeCSVDataset(df=df,
                                 smiles_to_graph=smiles_to_bigraph,
                                 node_featurizer=args['node_featurizer'],
                                 edge_featurizer=None,
                                 smiles_column=args['smiles_column'],
                                 cache_file_path=args['result_path'] +
                                 '/graph.bin',
                                 task_names=args['task_names'])
    args['n_tasks'] = dataset.n_tasks
    train_set, val_set, test_set = split_dataset(args, dataset)

    exp_config = get_configure(args)
    if exp_config is None:
        print('Start hyperparameter search with Bayesian optimization')
        bayesian_optimization(args, train_set, val_set, test_set)
    else:
        print('Use the best hyperparameters found before')
        args.update(exp_config)
        main(args, train_set, val_set, test_set)
Esempio n. 2
0
    args['node_featurizer'] = CanonicalAtomFeaturizer()
    df = pd.read_csv(args['csv_path'])
    mkdir_p(args['result_path'])
    dataset = MoleculeCSVDataset(df=df,
                                 smiles_to_graph=smiles_to_bigraph,
                                 node_featurizer=args['node_featurizer'],
                                 edge_featurizer=None,
                                 smiles_column=args['smiles_column'],
                                 cache_file_path=args['result_path'] + '/graph.bin',
                                 task_names=args['task_names'])
    args['n_tasks'] = dataset.n_tasks
    train_set, val_set, test_set = split_dataset(args, dataset)

    if args['num_evals'] is not None:
        assert args['num_evals'] > 0, 'Expect the number of hyperparameter search trials to ' \
                                      'be greater than 0, got {:d}'.format(args['num_evals'])
        print('Start hyperparameter search with Bayesian '
              'optimization for {:d} trials'.format(args['num_evals']))
        trial_path = bayesian_optimization(args, train_set, val_set, test_set)
    else:
        print('Use the manually specified hyperparameters')
        exp_config = get_configure(args['model'])
        main(args, exp_config, train_set, val_set, test_set)
        trial_path = args['result_path'] + '/1'

    # Copy final
    copyfile(trial_path + '/model.pth', args['result_path'] + '/model.pth')
    copyfile(trial_path + '/configure.json', args['result_path'] + '/configure.json')
    copyfile(trial_path + '/eval.txt', args['result_path'] + '/eval.txt')
Esempio n. 3
0
    else:
        args['device'] = torch.device('cpu')

    args = init_featurizer(args)
    mkdir_p(args['result_path'])
    if args['dataset'] == 'FreeSolv':
        from dgllife.data import FreeSolv
        dataset = FreeSolv(smiles_to_graph=partial(smiles_to_bigraph, add_self_loop=True),
                           node_featurizer=args['node_featurizer'],
                           edge_featurizer=args['edge_featurizer'],
                           n_jobs=1 if args['num_workers'] == 0 else args['num_workers'])
    elif args['dataset'] == 'Lipophilicity':
        from dgllife.data import Lipophilicity
        dataset = Lipophilicity(smiles_to_graph=partial(smiles_to_bigraph, add_self_loop=True),
                                node_featurizer=args['node_featurizer'],
                                edge_featurizer=args['edge_featurizer'],
                                n_jobs=1 if args['num_workers'] == 0 else args['num_workers'])
    elif args['dataset'] == 'ESOL':
        from dgllife.data import ESOL
        dataset = ESOL(smiles_to_graph=partial(smiles_to_bigraph, add_self_loop=True),
                       node_featurizer=args['node_featurizer'],
                       edge_featurizer=args['edge_featurizer'],
                       n_jobs=1 if args['num_workers'] == 0 else args['num_workers'])
    else:
        raise ValueError('Unexpected dataset: {}'.format(args['dataset']))

    args['n_tasks'] = dataset.n_tasks
    train_set, val_set, test_set = split_dataset(args, dataset)
    exp_config = get_configure(args['model'], args['featurizer_type'], args['dataset'])
    main(args, exp_config, train_set, val_set, test_set)