Example #1
0
def evaluate_generated_graphs(generated_graphs, termination, nlls, start_time,
                              ts_properties, generation_batch_idx):
    """ Computes molecular properties for input set of generated graphs, saves
    results to CSV, and writes  `generated_mols` to disk as a SMILES file.
    Properties are expensive to calculate, so only done when
    `gen_batch_idx` == 0 (i.e. for the first batch of generated molecules).

    Args:
      generated_graphs (list) : Contains `GenerationGraph`s.
      termination (torch.Tensor) : Molecular termination details; contains 1 at
        index if graph from `generated_mols` was "properly" terminated, 0
        otherwise.
      nlls (torch.Tensor) : Contains final NLL of each item in `generated_mols`.
      start_time (time) : Program start time.
      ts_properties (dict) : Contains training set properties.
      gen_batch_idx (int) : Generation batch index.
    """
    epoch_key = util.get_last_epoch()

    if generation_batch_idx == 0:
        # calculate molecular properties of generated set
        prop_dict = get_molecular_properties(molecules=generated_graphs,
                                             epoch_key=epoch_key,
                                             termination=termination)
    else:
        prop_dict = {}  # initialize the property dictionary

    # add a few additional properties to the propery dictionary
    prop_dict[(epoch_key, "final_nll")] = nlls
    prop_dict[(epoch_key, "run_time")] = round(time.time() - start_time, 2)

    # output evaluation metrics to CSV
    output = C.job_dir

    # calculate validity list now, so as not to write to CSV in previous step

    epoch_id = epoch_key[6:] + "_" + str(generation_batch_idx)
    fraction_valid, validity_tensor = util.write_molecules(
        molecules=generated_graphs, final_nlls=nlls, epoch=epoch_id)

    # add these validity properties to the property dictionary
    prop_dict[(epoch_key, "fraction_valid")] = fraction_valid
    prop_dict[(epoch_key, "validity_tensor")] = validity_tensor

    # write these properties to disk, only for the first generation batch
    if generation_batch_idx == 0:
        util.properties_to_csv(prop_dict=prop_dict,
                               csv_filename=f"{output}generation.csv",
                               epoch_key=epoch_key,
                               append=True)

        # join ts properties with prop_dict for plotting
        merged_properties = {**prop_dict, **ts_properties}

        # plot properties for this epoch
        plot_filename = f"{output}generation/features{epoch_key[6:]}.png"
        plot_molecular_properties(properties_dict=merged_properties,
                                  plot_filename=plot_filename)
Example #2
0
    def create_output_files(self) -> None:
        """
        Creates output files (with appropriate headers) for new (i.e. non-restart) jobs.
        If restart a job, all new output will be appended to existing output files.
        """
        if not self.constants.restart:
            print("* Touching output files.", flush=True)
            # begin writing `generation.log` file
            csv_path_and_filename = self.constants.job_dir + "generation.log"
            util.properties_to_csv(prop_dict=self.ts_properties,
                                   csv_filename=csv_path_and_filename,
                                   epoch_key="Training set",
                                   append=False)

            # begin writing `convergence.log` file
            util.write_model_status(append=False)

            # create `generation/` subdirectory to write generation output to
            os.makedirs(self.constants.job_dir + "generation/", exist_ok=True)