def evaluate_generated_graphs(generated_graphs, termination, nlls, start_time, ts_properties, generation_batch_idx): """ Computes molecular properties for input set of generated graphs, saves results to CSV, and writes `generated_mols` to disk as a SMILES file. Properties are expensive to calculate, so only done when `gen_batch_idx` == 0 (i.e. for the first batch of generated molecules). Args: generated_graphs (list) : Contains `GenerationGraph`s. termination (torch.Tensor) : Molecular termination details; contains 1 at index if graph from `generated_mols` was "properly" terminated, 0 otherwise. nlls (torch.Tensor) : Contains final NLL of each item in `generated_mols`. start_time (time) : Program start time. ts_properties (dict) : Contains training set properties. gen_batch_idx (int) : Generation batch index. """ epoch_key = util.get_last_epoch() if generation_batch_idx == 0: # calculate molecular properties of generated set prop_dict = get_molecular_properties(molecules=generated_graphs, epoch_key=epoch_key, termination=termination) else: prop_dict = {} # initialize the property dictionary # add a few additional properties to the propery dictionary prop_dict[(epoch_key, "final_nll")] = nlls prop_dict[(epoch_key, "run_time")] = round(time.time() - start_time, 2) # output evaluation metrics to CSV output = C.job_dir # calculate validity list now, so as not to write to CSV in previous step epoch_id = epoch_key[6:] + "_" + str(generation_batch_idx) fraction_valid, validity_tensor = util.write_molecules( molecules=generated_graphs, final_nlls=nlls, epoch=epoch_id) # add these validity properties to the property dictionary prop_dict[(epoch_key, "fraction_valid")] = fraction_valid prop_dict[(epoch_key, "validity_tensor")] = validity_tensor # write these properties to disk, only for the first generation batch if generation_batch_idx == 0: util.properties_to_csv(prop_dict=prop_dict, csv_filename=f"{output}generation.csv", epoch_key=epoch_key, append=True) # join ts properties with prop_dict for plotting merged_properties = {**prop_dict, **ts_properties} # plot properties for this epoch plot_filename = f"{output}generation/features{epoch_key[6:]}.png" plot_molecular_properties(properties_dict=merged_properties, plot_filename=plot_filename)
def create_output_files(self) -> None: """ Creates output files (with appropriate headers) for new (i.e. non-restart) jobs. If restart a job, all new output will be appended to existing output files. """ if not self.constants.restart: print("* Touching output files.", flush=True) # begin writing `generation.log` file csv_path_and_filename = self.constants.job_dir + "generation.log" util.properties_to_csv(prop_dict=self.ts_properties, csv_filename=csv_path_and_filename, epoch_key="Training set", append=False) # begin writing `convergence.log` file util.write_model_status(append=False) # create `generation/` subdirectory to write generation output to os.makedirs(self.constants.job_dir + "generation/", exist_ok=True)