コード例 #1
0
def load_dataset(args, df):
    dataset = MoleculeCSVDataset(df=df,
                                 smiles_to_graph=partial(smiles_to_bigraph,
                                                         add_self_loop=True),
                                 node_featurizer=args['node_featurizer'],
                                 edge_featurizer=args['edge_featurizer'],
                                 smiles_column=args['smiles_column'],
                                 cache_file_path=args['result_path'] +
                                 '/graph.bin',
                                 task_names=args['task_names'],
                                 n_jobs=args['num_workers'])

    return dataset
コード例 #2
0
ファイル: utils.py プロジェクト: yangkang98/dgl-lifesci
def load_dataset(args, df):
    if args['model'] in [
            'gin_supervised_contextpred', 'gin_supervised_infomax',
            'gin_supervised_edgepred', 'gin_supervised_masking'
    ]:
        self_loop = True
    else:
        self_loop = False

    dataset = MoleculeCSVDataset(
        df=df,
        smiles_to_graph=partial(smiles_to_bigraph, add_self_loop=self_loop),
        node_featurizer=args['node_featurizer'],
        edge_featurizer=args['edge_featurizer'],
        smiles_column=args['smiles_column'],
        cache_file_path=args['result_path'] + '/graph.bin',
        task_names=args['task_names'])

    return dataset
コード例 #3
0
    if torch.cuda.is_available():
        args['device'] = torch.device('cuda:0')
    else:
        args['device'] = torch.device('cpu')

    if args['task_names'] is not None:
        args['task_names'] = args['task_names'].split(',')

    args['node_featurizer'] = CanonicalAtomFeaturizer()
    df = pd.read_csv(args['csv_path'])
    mkdir_p(args['result_path'])
    dataset = MoleculeCSVDataset(df=df,
                                 smiles_to_graph=smiles_to_bigraph,
                                 node_featurizer=args['node_featurizer'],
                                 edge_featurizer=None,
                                 smiles_column=args['smiles_column'],
                                 cache_file_path=args['result_path'] + '/graph.bin',
                                 task_names=args['task_names'])
    args['n_tasks'] = dataset.n_tasks
    train_set, val_set, test_set = split_dataset(args, dataset)

    if args['num_evals'] is not None:
        assert args['num_evals'] > 0, 'Expect the number of hyperparameter search trials to ' \
                                      'be greater than 0, got {:d}'.format(args['num_evals'])
        print('Start hyperparameter search with Bayesian '
              'optimization for {:d} trials'.format(args['num_evals']))
        trial_path = bayesian_optimization(args, train_set, val_set, test_set)
    else:
        print('Use the manually specified hyperparameters')
        exp_config = get_configure(args['model'])
コード例 #4
0
ファイル: main.py プロジェクト: BatoolMM/dgl-lifesci
        'all the columns except for the smiles_column in the CSV file. '
        '(default: None)')
    args = parser.parse_args().__dict__

    args['exp_name'] = '_'.join([args['model'], args['mode']])
    if args['tasks'] is not None:
        args['tasks'] = args['tasks'].split(',')
    args.update(configs[args['exp_name']])

    # Setup for experiments
    mkdir_p(args['result_path'])

    node_featurizer = atom_featurizer
    edge_featurizer = CanonicalBondFeaturizer(bond_data_field='he',
                                              self_loop=True)
    df = pd.read_csv(args['csv_path'])
    dataset = MoleculeCSVDataset(
        df,
        partial(smiles_to_bigraph, add_self_loop=True),
        node_featurizer=node_featurizer,
        edge_featurizer=edge_featurizer,
        smiles_column=args['smiles_column'],
        cache_file_path=args['result_path'] + '/graph.bin',
        task_names=args['tasks'])
    args['tasks'] = dataset.task_names
    args = setup(args)
    train_set, val_set, test_set = RandomSplitter.train_val_test_split(
        dataset, frac_train=0.8, frac_val=0.1, frac_test=0.1, random_state=0)

    main(args, node_featurizer, edge_featurizer, train_set, val_set, test_set)