args['device'] = torch.device('cuda:0') else: args['device'] = torch.device('cpu') if args['file_path'].endswith('.csv') or args['file_path'].endswith( '.csv.gz'): import pandas df = pandas.read_csv(args['file_path']) if args['smiles_column'] is not None: smiles = df[args['smiles_column']].tolist() else: assert len(df.columns) == 1, 'The CSV file has more than 1 columns and ' \ '-sc (smiles-column) needs to be specified.' smiles = df[df.columns[0]].tolist() elif args['file_path'].endswith('.txt'): from dgllife.utils import load_smiles_from_txt smiles = load_smiles_from_txt(args['file_path']) else: raise ValueError( 'Expect the input data file to be a .csv or a .txt file, ' 'got {}'.format(args['file_path'])) args['smiles'] = smiles args = init_featurizer(args) # Handle directories mkdir_p(args['inference_result_path']) assert os.path.exists(args['train_result_path']), \ 'The path to the saved training results does not exist.' main(args)
'gin_supervised_contextpred', 'gin_supervised_infomax', 'gin_supervised_edgepred', 'gin_supervised_masking' ], help='Pre-trained model to use for computing molecule embeddings') parser.add_argument('-b', '--batch-size', type=int, default=256, help='Batch size for embedding computation') parser.add_argument('-o', '--out-dir', type=str, default='results', help='Path to the computation results') args = parser.parse_args().__dict__ mkdir_p(args['out_dir']) if torch.cuda.is_available(): args['device'] = torch.device('cuda:0') else: args['device'] = torch.device('cpu') if args['format'] == 'txt': smiles = load_smiles_from_txt(args['file']) else: df = pd.read_csv(args['file']) smiles = df[args['smiles_column']].tolist() dataset, success = graph_construction_and_featurization(smiles) np.save(args['out_dir'] + '/mol_parsed.npy', np.array(success)) main(args, dataset)