def write_model_status(epoch=None, lr=None, loss=None, score=None, append=True): """ Writes the current epoch, loss, learning rate, and model score to CSV. """ convergence_path = C.job_dir + "convergence.csv" if not append: # create the file with open(convergence_path, "w") as output_file: # write the header output_file.write("epoch, lr, avg_loss, model_score\n") else: # append to existing file if C.job_type == "train": # only write a `convergence.csv` when training if score is None: with open(convergence_path, "a") as output_file: output_file.write(f"Epoch {epoch}, {lr:.8f}, {loss:.8f}, ") # write to tensorboard tb_writer.add_scalar("Train/loss", loss, epoch) tb_writer.add_scalar("Train/lr", lr, epoch) elif score == "NA": with open(convergence_path, "a") as output_file: output_file.write(f"{score}\n") elif score is not None: with open(convergence_path, "a") as output_file: output_file.write(f"{score:.6f}\n") else: raise NotImplementedError
def write_validation_scores(output_dir, epoch_key, model_scores, append=True): """ Writes a CSV with the model validation scores as a function of the epoch. Args: output_dir (str) : Full path/filename to CSV file. epoch_key (str) : For example, "Training set" or "Epoch {n}". model_scores (dict) : Contains the average NLL per molecule of {validation/training/generated} structures, and the average model score (weighted mean of above two scores). append (bool) : Indicates whether to append to the output file or start a new one. Default `True`. """ validation_file_path = output_dir + "validation.csv" avg_nll_val = model_scores["avg_nll_val"] avg_nll_train = model_scores["avg_nll_train"] avg_nll_gen = model_scores["avg_nll_gen"] abs_nll_diff = model_scores["abs_nll_diff"] uc_jsd = model_scores["UC-JSD"] if not append: # create file with open(validation_file_path, "w") as output_file: # write headeres output_file.write( f"set, avg_nll_per_molecule_val, avg_nll_per_molecule_train, " f"avg_nll_per_molecule_gen, abs_nll_diff, uc_jsd\n" ) # append the properties of interest to the CSV file with open(validation_file_path, "a") as output_file: output_file.write( f"{epoch_key:}, {avg_nll_val:.5f}, {avg_nll_train:.5f}, " f"{avg_nll_gen:.5f}, {abs_nll_diff:.5f}, {uc_jsd:.7f}\n" ) try: # write to tensorboard epoch = int(epoch_key.split()[1]) # scalars tb_writer.add_scalar("NLL/validation", avg_nll_val, epoch) tb_writer.add_scalar("NLL/training", avg_nll_train, epoch) tb_writer.add_scalar("NLL/generation", avg_nll_gen, epoch) tb_writer.add_scalar("NLL/diff", abs_nll_diff, epoch) tb_writer.add_scalar("NLL/uc_jsd", uc_jsd, epoch) except: pass
def properties_to_csv(prop_dict, csv_filename, epoch_key, append=True): """ Writes a CSV summarizing how training is going by comparing the properties of the generated structures during evaluation to the training set. Args: prop_dict (dict) : Contains molecular properties. csv_filename (str) : Full path/filename to CSV file. epoch_key (str) : For example, "Training set" or "Epoch {n}". append (bool) : Indicates whether to append to the output file (if the file exists) or start a new one. Default `True`. """ # get all the relevant properties from the dictionary frac_valid = prop_dict[(epoch_key, "fraction_valid")] avg_n_nodes = prop_dict[(epoch_key, "avg_n_nodes")] avg_n_edges = prop_dict[(epoch_key, "avg_n_edges")] frac_unique = prop_dict[(epoch_key, "fraction_unique")] # use the following properties if they exist e.g. for generation epochs, but # not for training set try: run_time = prop_dict[(epoch_key, "run_time")] frac_valid_pt = round( float(prop_dict[(epoch_key, "fraction_valid_properly_terminated")]), 5 ) frac_pt = round( float(prop_dict[(epoch_key, "fraction_properly_terminated")]), 5 ) except KeyError: run_time = "NA" frac_valid_pt = "NA" frac_pt = "NA" ( norm_n_nodes_hist, norm_atom_type_hist, norm_formal_charge_hist, norm_numh_hist, norm_n_edges_hist, norm_edge_feature_hist, norm_chirality_hist, ) = normalize_evaluation_metrics(prop_dict, epoch_key) if not append: # file does not exist yet, create it with open(csv_filename, "w") as output_file: # write the file header output_file.write( "set, fraction_valid, fraction_valid_pt, fraction_pt, run_time, " "avg_n_nodes, avg_n_edges, fraction_unique, atom_type_hist, " "formal_charge_hist, numh_hist, chirality_hist, " "n_nodes_hist, n_edges_hist, edge_feature_hist\n" ) # append the properties of interest to the CSV file with open(csv_filename, "a") as output_file: output_file.write( f"{epoch_key}, {frac_valid:.3f}, {frac_valid_pt}, {frac_pt}, {run_time}, " f"{avg_n_nodes:.3f}, {avg_n_edges:.3f}, {frac_unique:.3f}, " f"{norm_atom_type_hist}, {norm_formal_charge_hist}, " f"{norm_numh_hist}, {norm_chirality_hist}, {norm_n_nodes_hist}, " f"{norm_n_edges_hist}, {norm_edge_feature_hist}\n" ) # write to tensorboard try: epoch = int(epoch_key.split()[1]) except: pass else: # Scalars tb_writer.add_scalar("Evaluation/valid", frac_valid, epoch) tb_writer.add_scalar("Evaluation/valid_pt", frac_valid_pt, epoch) tb_writer.add_scalar("Evaluation/pt", frac_pt, epoch) tb_writer.add_scalar("Evaluation/n_nodes", avg_n_nodes, epoch) tb_writer.add_scalar("Evaluation/unique", frac_unique, epoch) # Histogram tb_writer.add_histogram("Distributions/atom_type", np.array(norm_atom_type_hist), epoch) tb_writer.add_histogram("Distributions/form_charge", np.array(norm_formal_charge_hist), epoch) tb_writer.add_histogram("Distributions/hydrogen", np.array(norm_numh_hist), epoch) tb_writer.add_histogram("Distributions/n_nodes", np.array(norm_n_nodes_hist), epoch) tb_writer.add_histogram("Distributions/n_edges", np.array(norm_n_edges_hist), epoch) tb_writer.add_histogram("Distributions/edge_features", np.array(norm_edge_feature_hist), epoch)