def __init__(self): # Training details self.batch_size = 50 self.num_epochs = 30 self.log_interval = 5 self.cuda_details = gnn_utils.CudaDetails(use_cuda=torch.cuda.is_available()) # Molecule details self.gnn_hidden_size: int = 101 # our molecule features have this dimensionality. self.edge_names = ['single', 'double', 'triple'] self.gnn_time_steps = 4 self.gnn_embedding_dim = 50 # Data paths processed_data_dir = mchef_config.get_processed_data_dir() self.path_mol_details = path.join(processed_data_dir, 'reactants_feats.pick') self.path_react_bags_train = path.join(processed_data_dir, 'train_react_bags.txt') self.path_react_bags_val = path.join(processed_data_dir, 'valid_react_bags.txt') self.path_products_train = path.join(processed_data_dir, 'train_products.txt') self.path_products_val = path.join(processed_data_dir, 'valid_products.txt') # Command line arguments. arguments = docopt(__doc__) self.weights_to_use = arguments['<input_weights>']
def __init__(self): self.run_name = str(os.getenv("MCHEF_NAME")) print(f"Run name is {self.run_name}") processed_data_dir = mchef_config.get_processed_data_dir() self.path_mol_details = path.join(processed_data_dir, 'reactants_feats.pick') self.path_react_bags_train = path.join(processed_data_dir, 'train_react_bags.txt') self.path_react_bags_val = path.join(processed_data_dir, 'valid_react_bags.txt') self.path_products_train = path.join(processed_data_dir, 'train_products.txt') self.path_products_val = path.join(processed_data_dir, 'valid_products.txt') self.num_epochs = 100 self.batch_size = 25 self.learning_rate = 0.001 self.lr_reduction_interval = 40 self.lr_reduction_factor = 0.1 self.cuda_details = gnn_utils.CudaDetails( use_cuda=torch.cuda.is_available(), gpu_id=0) self.lambda_value = 10. # see WAE paper, section 4 self.property_pred_factor = 50. self.latent_dim = 25
def collate_function(batch): #todo: will not be able to pin memory at the moment. stacked_nds = [elem[0] for elem in batch] targets = [elem[1] for elem in batch] stacked_nds_catted = stacked_nds[0].concatenate(stacked_nds) stacked_nds_catted.to_torch(cuda_details=utils.CudaDetails(use_cuda=False)) targets = torch.from_numpy(np.array(targets)) return stacked_nds_catted, targets
def __init__(self): self.num_to_generate = 20000 self.batch_size = 2000 processed_data_dir = mchef_config.get_processed_data_dir() self.path_mol_details = path.join(processed_data_dir, 'reactants_feats.pick') self.cuda_details = gnn_utils.CudaDetails(use_cuda=torch.cuda.is_available()) arguments = docopt(__doc__) self.weights_to_use = arguments['<input_weights>'] self.location_for_tokenized_reactants = arguments['<output_name>']
def collate_function(batch): #todo: will not be able to pin memory at the moment. graphs_as_adjlist = [elem[0] for elem in batch] targets = [elem[1] for elem in batch] graphs_as_adjlist_catted = graphs_as_adjlist[0].concatenate( graphs_as_adjlist) graphs_as_adjlist_catted.to_torch(cuda_details=utils.CudaDetails( use_cuda=False)) targets = torch.from_numpy(np.array(targets)) return graphs_as_adjlist_catted, targets
def __init__(self): self.num_molecules_to_optimize: int = 250 self.num_distinct_molecule_steps: int = 10 self.epsilon: float = 0.5 self.cuda_details = gnn_utils.CudaDetails(use_cuda=torch.cuda.is_available()) processed_data_dir = mchef_config.get_processed_data_dir() self.path_mol_details = path.join(processed_data_dir, 'reactants_feats.pick') self.path_react_bags_train = path.join(processed_data_dir, 'train_react_bags.txt') # Command line arguments. arguments = docopt(__doc__) self.weights_to_use = arguments['<input_weights>']
def __init__(self): parser = argparse.ArgumentParser("Evaluate ELECTRO (or ELECTRO-LITE) on USPTO") parser.add_argument("checkpoint_path", help="location of the checkpoint file, use the string 'none' for random weights") parser.add_argument("output_file", help="where to store the predicted electron paths") parser.add_argument("--test_on_val", action="store_true", help="if set then will use validation dataset rather" "than the test dataset") parser.add_argument("--run_first_x", default=0, type=int, help="number of test set to use, (0 means run all)") args = parser.parse_args() self.chkpt_loc = args.checkpoint_path self.output_location = args.output_file self.use_val_as_test_set = args.test_on_val self.num_test_set_to_use = args.run_first_x self.beam_width = 10 self.cuda_details = utils.CudaDetails(use_cuda=torch.cuda.is_available())
def __init__(self): self.cuda_details = gnn_utils.CudaDetails(use_cuda=torch.cuda.is_available()) # GNN details self.gnn_args = dict(output_dim=25, hidden_layer_size=101, edge_names=['single', 'double', 'triple'], embedding_dim=50, T=4) # Data Paths processed_data_dir = mchef_config.get_processed_data_dir() self.path_mol_details = path.join(processed_data_dir, 'reactants_feats.pick') self.product_files_to_try = [('test_reachable', path.join(processed_data_dir, 'test_products.txt')), ('test_unreachable', path.join(processed_data_dir, 'test_unreachable_products.txt'))] # Command line arguments. arguments = docopt(__doc__) self.weights_to_use_mchef = arguments['<input_weights_mchef>'] self.weights_to_use_regressor = arguments['<input_weights_regressor>']
def __init__(self): parser = argparse.ArgumentParser( "Train ELECTRO (or ELECTRO-LITE) on USPTO-LEF dataset") parser.add_argument("--electro_lite", action="store_true") args = parser.parse_args() self.electro_lite_flag = args.electro_lite self.num_epochs = 14 self.initial_lr = 0.001 self.lr_decay_epochs = [8, 12] self.lr_decay_factor = 0.1 self.batch_size_wrt_reactions = 30 self.val_batch_size_wrt_reactions = 100 self.cuda_details = utils.CudaDetails( use_cuda=torch.cuda.is_available()) self.num_dataloader_workers = 10
def main(): params = eval_electro.Params() params.cuda_details = utils.CudaDetails( use_cuda=False) # will not use GPUs when work in paraellel params.num_workers = 17 # We first load in the model electro = eval_electro._get_model_and_loadin_weights( params.cuda_details, params) # Then the dataset dataset = eval_electro._get_data(params.use_val_as_test_set) # Then we create the beam searcher predictor = beam_searcher.PredictiveRanking(electro, params.cuda_details) # Then we go through and predict out the series of electron paths for each reaction assert params.num_test_set_to_use == 0, "should be run on whole dataset" MAX_TOP_ACC_TO_EVAL = 10 num_to_use = len(dataset) # Create a pool and assign the workers to go through the dataset pool = Pool(params.num_workers) list_of_results = list( tqdm.tqdm(pool.imap(_worker_func, ((i, dataset, predictor, MAX_TOP_ACC_TO_EVAL) for i in range(num_to_use))), total=num_to_use)) pool.close() pool.join() # Stitch the results back together: top_k_accs, result_lines = zip(*list_of_results) acc_storage = np.stack(top_k_accs) # We now compute the path level average accuracies and print these out. top_k_accs = np.mean( (np.cumsum(acc_storage, axis=1) > 0.5).astype(np.float64), axis=0) for k, k_acc in enumerate(top_k_accs, start=1): print(f"The top-{k} accuracy is {k_acc}") # Finally we store the reaction paths in a text file. with open(params.output_location, 'w') as fo: fo.writelines('\n'.join(result_lines))
def __init__(self): self.chkpt_loc = "../train_electro/chkpts/electro.pth.pick" self.beam_width = 10 self.cuda_details = utils.CudaDetails(use_cuda=False) self.use_val_as_test_set = False