Esempio n. 1
0
        args['device'] = torch.device('cuda:0')
    else:
        args['device'] = torch.device('cpu')

    if args['file_path'].endswith('.csv') or args['file_path'].endswith(
            '.csv.gz'):
        import pandas
        df = pandas.read_csv(args['file_path'])
        if args['smiles_column'] is not None:
            smiles = df[args['smiles_column']].tolist()
        else:
            assert len(df.columns) == 1, 'The CSV file has more than 1 columns and ' \
                                         '-sc (smiles-column) needs to be specified.'
            smiles = df[df.columns[0]].tolist()
    elif args['file_path'].endswith('.txt'):
        from dgllife.utils import load_smiles_from_txt
        smiles = load_smiles_from_txt(args['file_path'])
    else:
        raise ValueError(
            'Expect the input data file to be a .csv or a .txt file, '
            'got {}'.format(args['file_path']))
    args['smiles'] = smiles
    args = init_featurizer(args)

    # Handle directories
    mkdir_p(args['inference_result_path'])
    assert os.path.exists(args['train_result_path']), \
        'The path to the saved training results does not exist.'

    main(args)
Esempio n. 2
0
            'gin_supervised_contextpred', 'gin_supervised_infomax',
            'gin_supervised_edgepred', 'gin_supervised_masking'
        ],
        help='Pre-trained model to use for computing molecule embeddings')
    parser.add_argument('-b',
                        '--batch-size',
                        type=int,
                        default=256,
                        help='Batch size for embedding computation')
    parser.add_argument('-o',
                        '--out-dir',
                        type=str,
                        default='results',
                        help='Path to the computation results')
    args = parser.parse_args().__dict__
    mkdir_p(args['out_dir'])

    if torch.cuda.is_available():
        args['device'] = torch.device('cuda:0')
    else:
        args['device'] = torch.device('cpu')

    if args['format'] == 'txt':
        smiles = load_smiles_from_txt(args['file'])
    else:
        df = pd.read_csv(args['file'])
        smiles = df[args['smiles_column']].tolist()
    dataset, success = graph_construction_and_featurization(smiles)
    np.save(args['out_dir'] + '/mol_parsed.npy', np.array(success))
    main(args, dataset)