Esempio n. 1
0
    def __init__(self):
        # Training details
        self.batch_size = 50
        self.num_epochs = 30
        self.log_interval = 5
        self.cuda_details = gnn_utils.CudaDetails(use_cuda=torch.cuda.is_available())

        # Molecule details
        self.gnn_hidden_size: int = 101  # our molecule features have this dimensionality.
        self.edge_names = ['single', 'double', 'triple']
        self.gnn_time_steps = 4
        self.gnn_embedding_dim = 50

        #  Data paths
        processed_data_dir = mchef_config.get_processed_data_dir()

        self.path_mol_details = path.join(processed_data_dir, 'reactants_feats.pick')

        self.path_react_bags_train = path.join(processed_data_dir, 'train_react_bags.txt')
        self.path_react_bags_val = path.join(processed_data_dir, 'valid_react_bags.txt')

        self.path_products_train = path.join(processed_data_dir, 'train_products.txt')
        self.path_products_val = path.join(processed_data_dir, 'valid_products.txt')

        # Command line arguments.
        arguments = docopt(__doc__)
        self.weights_to_use = arguments['<input_weights>']
    def __init__(self):
        self.run_name = str(os.getenv("MCHEF_NAME"))
        print(f"Run name is {self.run_name}")

        processed_data_dir = mchef_config.get_processed_data_dir()

        self.path_mol_details = path.join(processed_data_dir,
                                          'reactants_feats.pick')

        self.path_react_bags_train = path.join(processed_data_dir,
                                               'train_react_bags.txt')
        self.path_react_bags_val = path.join(processed_data_dir,
                                             'valid_react_bags.txt')

        self.path_products_train = path.join(processed_data_dir,
                                             'train_products.txt')
        self.path_products_val = path.join(processed_data_dir,
                                           'valid_products.txt')

        self.num_epochs = 100
        self.batch_size = 25
        self.learning_rate = 0.001

        self.lr_reduction_interval = 40
        self.lr_reduction_factor = 0.1

        self.cuda_details = gnn_utils.CudaDetails(
            use_cuda=torch.cuda.is_available(), gpu_id=0)

        self.lambda_value = 10.  # see WAE paper, section 4
        self.property_pred_factor = 50.
        self.latent_dim = 25
Esempio n. 3
0
def collate_function(batch):
    #todo: will not be able to pin memory at the moment.

    stacked_nds = [elem[0] for elem in batch]
    targets = [elem[1] for elem in batch]

    stacked_nds_catted = stacked_nds[0].concatenate(stacked_nds)

    stacked_nds_catted.to_torch(cuda_details=utils.CudaDetails(use_cuda=False))
    targets = torch.from_numpy(np.array(targets))

    return stacked_nds_catted, targets
Esempio n. 4
0
    def __init__(self):
        self.num_to_generate = 20000
        self.batch_size = 2000

        processed_data_dir = mchef_config.get_processed_data_dir()

        self.path_mol_details = path.join(processed_data_dir, 'reactants_feats.pick')

        self.cuda_details = gnn_utils.CudaDetails(use_cuda=torch.cuda.is_available())

        arguments = docopt(__doc__)
        self.weights_to_use = arguments['<input_weights>']
        self.location_for_tokenized_reactants = arguments['<output_name>']
Esempio n. 5
0
def collate_function(batch):
    #todo: will not be able to pin memory at the moment.

    graphs_as_adjlist = [elem[0] for elem in batch]
    targets = [elem[1] for elem in batch]

    graphs_as_adjlist_catted = graphs_as_adjlist[0].concatenate(
        graphs_as_adjlist)

    graphs_as_adjlist_catted.to_torch(cuda_details=utils.CudaDetails(
        use_cuda=False))
    targets = torch.from_numpy(np.array(targets))

    return graphs_as_adjlist_catted, targets
Esempio n. 6
0
    def __init__(self):
        self.num_molecules_to_optimize: int = 250
        self.num_distinct_molecule_steps: int = 10
        self.epsilon: float = 0.5

        self.cuda_details = gnn_utils.CudaDetails(use_cuda=torch.cuda.is_available())

        processed_data_dir = mchef_config.get_processed_data_dir()

        self.path_mol_details = path.join(processed_data_dir, 'reactants_feats.pick')
        self.path_react_bags_train = path.join(processed_data_dir, 'train_react_bags.txt')

        # Command line arguments.
        arguments = docopt(__doc__)
        self.weights_to_use = arguments['<input_weights>']
Esempio n. 7
0
    def __init__(self):
        parser = argparse.ArgumentParser("Evaluate ELECTRO (or ELECTRO-LITE) on USPTO")
        parser.add_argument("checkpoint_path", help="location of the checkpoint file, use the string 'none' for random weights")
        parser.add_argument("output_file", help="where to store the predicted electron paths")
        parser.add_argument("--test_on_val", action="store_true", help="if set then will use validation dataset rather"
                                                                       "than the test dataset")
        parser.add_argument("--run_first_x", default=0, type=int, help="number of test set to use, (0 means run all)")
        args = parser.parse_args()

        self.chkpt_loc = args.checkpoint_path
        self.output_location = args.output_file
        self.use_val_as_test_set = args.test_on_val
        self.num_test_set_to_use = args.run_first_x

        self.beam_width = 10
        self.cuda_details = utils.CudaDetails(use_cuda=torch.cuda.is_available())
Esempio n. 8
0
    def __init__(self):
        self.cuda_details = gnn_utils.CudaDetails(use_cuda=torch.cuda.is_available())
        # GNN details
        self.gnn_args = dict(output_dim=25, hidden_layer_size=101, edge_names=['single', 'double', 'triple'],
                             embedding_dim=50, T=4)

        # Data Paths
        processed_data_dir = mchef_config.get_processed_data_dir()
        self.path_mol_details = path.join(processed_data_dir, 'reactants_feats.pick')

        self.product_files_to_try = [('test_reachable', path.join(processed_data_dir, 'test_products.txt')),
                                     ('test_unreachable', path.join(processed_data_dir, 'test_unreachable_products.txt'))]

        # Command line arguments.
        arguments = docopt(__doc__)
        self.weights_to_use_mchef = arguments['<input_weights_mchef>']
        self.weights_to_use_regressor = arguments['<input_weights_regressor>']
Esempio n. 9
0
    def __init__(self):
        parser = argparse.ArgumentParser(
            "Train ELECTRO (or ELECTRO-LITE) on USPTO-LEF dataset")
        parser.add_argument("--electro_lite", action="store_true")
        args = parser.parse_args()

        self.electro_lite_flag = args.electro_lite
        self.num_epochs = 14
        self.initial_lr = 0.001
        self.lr_decay_epochs = [8, 12]
        self.lr_decay_factor = 0.1
        self.batch_size_wrt_reactions = 30
        self.val_batch_size_wrt_reactions = 100

        self.cuda_details = utils.CudaDetails(
            use_cuda=torch.cuda.is_available())
        self.num_dataloader_workers = 10
Esempio n. 10
0
def main():
    params = eval_electro.Params()
    params.cuda_details = utils.CudaDetails(
        use_cuda=False)  # will not use GPUs when work in paraellel
    params.num_workers = 17

    # We first load in the model
    electro = eval_electro._get_model_and_loadin_weights(
        params.cuda_details, params)

    # Then the dataset
    dataset = eval_electro._get_data(params.use_val_as_test_set)

    # Then we create the beam searcher
    predictor = beam_searcher.PredictiveRanking(electro, params.cuda_details)

    # Then we go through and predict out the series of electron paths for each reaction
    assert params.num_test_set_to_use == 0, "should be run on whole dataset"
    MAX_TOP_ACC_TO_EVAL = 10

    num_to_use = len(dataset)
    # Create a pool and assign the workers to go through the dataset
    pool = Pool(params.num_workers)
    list_of_results = list(
        tqdm.tqdm(pool.imap(_worker_func,
                            ((i, dataset, predictor, MAX_TOP_ACC_TO_EVAL)
                             for i in range(num_to_use))),
                  total=num_to_use))
    pool.close()
    pool.join()

    # Stitch the results back together:
    top_k_accs, result_lines = zip(*list_of_results)
    acc_storage = np.stack(top_k_accs)

    # We now compute the path level average accuracies and print these out.
    top_k_accs = np.mean(
        (np.cumsum(acc_storage, axis=1) > 0.5).astype(np.float64), axis=0)
    for k, k_acc in enumerate(top_k_accs, start=1):
        print(f"The top-{k} accuracy is {k_acc}")

    # Finally we store the reaction paths in a text file.
    with open(params.output_location, 'w') as fo:
        fo.writelines('\n'.join(result_lines))
Esempio n. 11
0
 def __init__(self):
     self.chkpt_loc = "../train_electro/chkpts/electro.pth.pick"
     self.beam_width = 10
     self.cuda_details = utils.CudaDetails(use_cuda=False)
     self.use_val_as_test_set = False