예제 #1
0
def main(argv=None):
    print(f"Running GaussianWrangler script plot_steps version {__version__}")

    # Read input
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET or args is None:
        return ret

    try:
        # Make a list of lists from the input file list
        with open(args.list) as f:
            row_list = [row.strip().split() for row in f.readlines()]
            row_list = list(filter(None, row_list))

        if args.output_fname:
            plot_fname = create_out_fname(args.output_fname,
                                          base_dir=args.out_dir,
                                          ext='.png')
        else:
            plot_fname = create_out_fname(args.list,
                                          base_dir=args.out_dir,
                                          ext='.png')
        plot_delta_g(plot_fname, args.temp, row_list, args.conv,
                     args.fig_width, args.fig_height, args.y_axis_label)
        print("Wrote file: {}".format(plot_fname))

    except IOError as e:
        warning("Problems reading file:", e)
        return IO_ERROR
    except InvalidDataError as e:
        warning("Problems reading data:", e)
        return INVALID_DATA

    return GOOD_RET  # success
예제 #2
0
def create_dynamics_plots(add_rate_str, bond_types, cfg, num_monos, num_oligs, sg_ratio):
    # Starting with num mon & olig vs timestep:
    len_y_val_key_list = [MONOMERS, OLIGOMERS]
    min_len = len(num_monos[0])
    avg_bond_types = {}
    std_bond_types = {}
    if cfg[NUM_REPEATS] > 1:
        # If there are multiple runs, arrays may be different lengths, so find shortest array
        min_len = len(num_monos[0])
        for mono_list in num_monos[1:]:
            if len(mono_list) < min_len:
                min_len = len(mono_list)
        # make lists of lists into np array
        sg_num_monos = np.asarray([np.array(num_list[:min_len]) for num_list in num_monos])
        # could save; for now, use to make images
        av_num_monos = np.mean(sg_num_monos, axis=0)

        sg_num_oligs = np.asarray([np.array(num_list[:min_len]) for num_list in num_oligs])
        av_num_oligs = np.mean(sg_num_oligs, axis=0)

        std_num_monos = np.std(sg_num_monos, axis=0)
        std_num_oligs = np.std(sg_num_oligs, axis=0)

        len_y_axis_val_dicts = {MONOMERS: av_num_monos, OLIGOMERS: av_num_oligs}
        len_y_axis_std_dev_dicts = {MONOMERS: std_num_monos, OLIGOMERS: std_num_oligs}

        for bond_type in BOND_TYPE_LIST:
            sg_bond_dist = np.asarray([np.array(bond_list[:min_len]) for
                                       bond_list in bond_types[bond_type]])
            avg_bond_types[bond_type] = np.mean(sg_bond_dist, axis=0)
            std_bond_types[bond_type] = np.std(sg_bond_dist, axis=0)

    else:
        len_y_axis_val_dicts = {MONOMERS: num_monos[0], OLIGOMERS: num_oligs[0]}
        len_y_axis_std_dev_dicts = {MONOMERS: None, OLIGOMERS: None}

        for bond_type in BOND_TYPE_LIST:
            avg_bond_types[bond_type] = bond_types[bond_type]
            std_bond_types[bond_type] = None
    timesteps = list(range(min_len))
    title = f"S:G Ratio {sg_ratio}, Add rate {add_rate_str} monomer/s"
    sg_str = f'{sg_ratio:.{3}g}'.replace("+", "").replace(".", "-")
    fname = create_out_fname(f'mono_olig_v_step_{sg_str}_{add_rate_str}', base_dir=cfg[OUT_DIR],
                             ext='.png')
    x_axis_label = 'Time step'
    y_axis_label = 'Number'
    plot_bond_error_bars(timesteps, len_y_axis_val_dicts, len_y_axis_std_dev_dicts, len_y_val_key_list,
                         x_axis_label, y_axis_label, title, fname)
    fname = create_out_fname(f'bond_dist_v_step_{sg_str}_{add_rate_str}', base_dir=cfg[OUT_DIR],
                             ext='.png')
    x_axis_label = 'Time step'
    y_axis_label = 'Number of Bonds'
    plot_bond_error_bars(timesteps, avg_bond_types, std_bond_types, BOND_TYPE_LIST,
                         x_axis_label, y_axis_label, title, fname)
예제 #3
0
def create_convergence_plots(out_fname, step_list):
    """
    To allow easy viewing of convergence
    :param out_fname: This is the name of the base csv file
    :param step_list: list of dicts with data re convergence
    :return: n/a, save file
    """
    png_out = create_out_fname(out_fname, prefix='', ext='.png')

    png_titles = [CONVERG, ENERGY, MAX_FORCE, RMS_FORCE, MAX_DISPL, RMS_DISPL]
    num_lists = len(png_titles)
    png_lists = [[] for _ in range(num_lists)]
    steps = []

    for s_dict in step_list:
        steps.append(s_dict[STEP_NUM])
        for list_id in range(num_lists):
            png_lists[list_id].append(s_dict[png_titles[list_id]])
    fig, axs = plt.subplots(num_lists, figsize=(7, 11.5))
    for list_id in range(num_lists):
        axs[list_id].plot(steps, png_lists[list_id])
        axs[list_id].set_title(png_titles[list_id])
    plt.subplots_adjust(hspace=0.4)
    plt.xlabel("Step number")
    plt.savefig(
        png_out,
        transparent=True,
        bbox_inches='tight',
    )
    plt.close()
    print(f"Wrote file: {os.path.relpath(png_out)}")
예제 #4
0
def process_gausscom_files(cfg, pdb_tpl_content):
    f_name = ''
    if cfg[COMBINE_LOGS]:
        f_name = create_out_fname(cfg[OUTFILE_NAME], ext='.pdb', base_dir=cfg[OUT_BASE_DIR])
        silent_remove(f_name)
    for gausslog_file in cfg[GAUSSLOG_FILES]:
        if not cfg[PDB_TPL_FILE]:
            pdb_tpl_content[SEC_HEAD] = ["TITLE     {}".format(gausslog_file)]
            pdb_tpl_content[SEC_TAIL] = ["END"]
        if not cfg[COMBINE_LOGS]:
            if cfg[OUTFILE_NAME]:
                out_name = cfg[OUTFILE_NAME]
            else:
                out_name = gausslog_file
            f_name = create_out_fname(out_name, ext='.pdb', base_dir=cfg[OUT_BASE_DIR])
        process_gausslog_file(cfg, gausslog_file, pdb_tpl_content, f_name)
예제 #5
0
def process_pdb_file(cfg, gau_tpl_content, pdb_file):
    with open(pdb_file) as d:
        mol_num = 0
        pdb_atom_line = []
        for line in d.readlines():
            pdb_section = line[:PDB_LINE_TYPE_LAST_CHAR]
            if pdb_section == 'MODEL ':
                mol_num += 1
            elif pdb_section == 'ATOM  ' or pdb_section == 'HETATM':
                element = line[
                    PDB_BEFORE_ELE_LAST_CHAR:PDB_ELE_LAST_CHAR].strip()
                if element == '':
                    element = line[PDB_ATOM_NUM_LAST_CHAR:
                                   PDB_ATOM_TYPE_LAST_CHAR].strip()
                pdb_xyz = line[PDB_MOL_NUM_LAST_CHAR:PDB_Z_LAST_CHAR]
                pdb_atom_line.append(["{:6}".format(element), pdb_xyz])
            elif pdb_section == 'END\n':
                if mol_num == 0:
                    mol_id = ''
                else:
                    mol_id = '_' + str(mol_num)
                d_out = create_out_fname(pdb_file, suffix=mol_id, ext='.com')
                if cfg[REMOVE_H]:
                    del pdb_atom_line[-1]
                list_to_file(
                    gau_tpl_content[SEC_HEAD] + pdb_atom_line +
                    gau_tpl_content[SEC_TAIL], d_out)
                if cfg[NUM] and mol_num >= cfg[NUM]:
                    return
                pdb_atom_line = []
def plot_mz_v_intensity(fname, data_array_dict, num_decimals_ms_accuracy,
                        out_dir):
    """
    Plot m/z v intensities for all entries in the data_dict_array (key is ms_level)
    :param fname: str, name of the file where the data originated
    :param data_array_dict: dict, str (label): ndarray (n x 3) with M/Z, intensity, and retention times
    :param num_decimals_ms_accuracy: int, number of decimal points in MS accuracy, for rounding
    :param out_dir: None or str, provides location where new file should be saved (None for current directory)
    :return: ndarray, (m x 2), were m is the number of unique retention times, in first column. Second column is
        total intensity for that retention time.
    """
    labels = list(data_array_dict.keys())
    first_label = labels[0]
    lower_fname = fname.lower()
    if isinstance(first_label, str):
        if "ms" in first_label:
            level = first_label
        else:
            level = f"ms{first_label}"
    else:
        # assumes numeric if the MS2, and the level is the ionization energy; only include the level if single level
        level = "ms2"
        if len(labels) == 1:
            ion_energy = f"hcd{first_label}"
            if ion_energy not in lower_fname:
                if level in lower_fname:
                    level = ion_energy
                else:
                    level += "_" + ion_energy
    title = f"M/Z versus Intensity from {level.upper()} Data"
    suffix = "_mz_v_int"
    if level not in lower_fname:
        suffix = "_" + level + suffix
    plot_fname = create_out_fname(fname,
                                  suffix=suffix,
                                  ext='png',
                                  base_dir=out_dir)
    default_x_max = 1000.
    data_max_x = 0.
    data_max_y = 0.

    for data_array in data_array_dict.values():
        current_max_x = np.max(data_array[:, 0])
        if current_max_x > data_max_x:
            data_max_x = current_max_x

        current_max_y = np.max(data_array[:, 1])
        if current_max_y > data_max_y:
            data_max_y = current_max_y

    if data_max_x > default_x_max:
        x_value_warning(data_max_x, default_x_max)

    y_max = find_pos_plot_limit(data_max_y)

    make_vlines_plot(title, "M/Z Values", "Intensity (unscaled)",
                     data_array_dict, plot_fname, num_decimals_ms_accuracy,
                     default_x_max, y_max)
예제 #7
0
def create_coms_from_mol_list(conformer_list, gau_tpl_content, base_out_name,
                              max_num_coms, print_original):
    """
    From a list of RDKit mol objects, create gaussian output files, optionally for only the specified number of
    objects
    :param conformer_list:
    :param gau_tpl_content:
    :param base_out_name:
    :param max_num_coms: int or infinity
    :param print_original: Boolean, whether to print the initial conformation
    :return:
    """
    energy_list = []
    if print_original:
        start_at = 0
    else:
        start_at = 1

    RDLogger.DisableLog('rdApp.*')
    for current_mol in conformer_list[start_at:]:
        opt_results = MMFFOptimizeMoleculeConfs(current_mol, maxIters=0)
        energy_list.append(opt_results[0][1])

    combined_lists = zip(energy_list, conformer_list)
    zipped_sorted = sorted(combined_lists, key=itemgetter(0))

    # for energy in sorted(energy_list):
    #     print(f"{energy:15.8f}")
    mol_num = 0
    last_energy = np.nan
    print_note = False
    com_fname = None
    for energy, current_mol in zipped_sorted:
        if mol_num >= max_num_coms:
            if np.isclose(energy, last_energy):
                print_note = True
            else:
                break
        mol_num += 1
        last_energy = energy
        com_fname = create_out_fname(base_out_name,
                                     suffix=f"_{mol_num}",
                                     ext=".com",
                                     rel_path=True)
        pdb_str = MolToPDBBlock(current_mol)
        create_com_from_pdb_str(pdb_str, gau_tpl_content, com_fname)
        print(f"{int(energy):12,} {com_fname}")

    if com_fname:
        print(
            f"Wrote {mol_num} files, ending with: {os.path.relpath(com_fname)}"
        )
    else:
        print("No output created from rotating dihedrals.")
    if print_note:
        print(
            f"More than {max_num_coms} conformations were output to ties calculated energies."
        )
예제 #8
0
def create_bond_v_sg_plots(add_rate_str, cfg, sg_adjs):
    all_avg_bonds, all_std_bonds = get_avg_percent_bonds(BOND_TYPE_LIST, len(cfg[SG_RATIOS]), sg_adjs,
                                                         cfg[NUM_REPEATS], cfg[BREAK_CO])
    title = f"Add rate {add_rate_str} monomer/s"
    x_axis_label = 'SG Ratio'
    y_axis_label = 'Bond Type Yield (%)'
    fname = create_out_fname(f'bond_dist_v_sg_{add_rate_str}', base_dir=cfg[OUT_DIR], ext='.png')
    plot_bond_error_bars(cfg[SG_RATIOS], all_avg_bonds, all_std_bonds, BOND_TYPE_LIST,
                         x_axis_label, y_axis_label, title, fname)
예제 #9
0
def setup_and_submit(cfg, current_job_list, tpl_dict, testing_mode, chk_warn):
    if len(current_job_list) == 1 and current_job_list[0] == '':
        suffix = ''
    else:
        if current_job_list[0] == '':
            suffix = '_' + '_'.join(current_job_list[1:])
        else:
            suffix = '_' + '_'.join(current_job_list)
    tpl_dict[JOB_DESCRIP] = tpl_dict[JOB_NAME] + suffix

    new_ini_fname = create_out_fname(tpl_dict[JOB_DESCRIP],
                                     ext='.ini',
                                     base_dir=cfg[OUT_DIR])
    new_sbatch_fname = create_out_fname(tpl_dict[JOB_DESCRIP],
                                        ext='.slurm',
                                        base_dir=cfg[OUT_DIR])

    sbatch_dict = create_sbatch_dict(
        cfg,
        tpl_dict,
        os.path.relpath(new_ini_fname),
        current_job_list,
        start_from_job_name_chk=cfg[START_FROM_SAME_CHK],
        ignore_chk_warning=chk_warn)
    tpl_str = read_tpl(cfg[SBATCH_TPL])
    fill_save_tpl(tpl_str, sbatch_dict, cfg[SBATCH_TPL], new_sbatch_fname)

    # read ini_tpl and check if it has fields for submitting spawned jobs, if needed
    create_ini_with_req_keys(current_job_list, cfg[TPL_DICT], cfg,
                             new_ini_fname)

    if not cfg[NO_SUBMIT]:
        # Do not want to actually (attempt to) submit a job during testing; this way, do not have to specify both
        #   testing mode and NO_SUBMIT (could make NO_SUBMIT if in testing mode, but no real advantage to that
        if testing_mode:
            sbatch_result = subprocess.check_output(
                ["echo", "Running in testing mode: "
                 "'sbatch' not called"]).decode("utf-8").strip()
        else:
            #  Will not be covered in testing mode, as is not part of written code to be tested
            sbatch_result = subprocess.check_output(
                ["sbatch", new_sbatch_fname]).decode("utf-8").strip()
        print(sbatch_result)
예제 #10
0
def process_smiles(gau_tpl_fname, smi_list, max_num_confs, out_dir):
    """
    Creates Gaussian input files for each SMILES string provided
    https://www.rdkit.org/docs/GettingStartedInPython.html
    :param smi_list: list of SMILES strings
    :param gau_tpl_fname: str, the location of the template file to use to create input files
    :param max_num_confs: int, the maximum number of conformations to generate
    :param out_dir: str, directory where files are to be saved (if None, saves to working directory)
    :return: N/A, writes files and prints notes on files created
    """
    gau_tpl_str = read_tpl(gau_tpl_fname)
    if REQ_STR not in gau_tpl_str:
        raise InvalidDataError(
            f"Did not find the required string '{REQ_STR}' in the provided Gaussian input "
            f"template file.")
    for smi in smi_list:
        mol = Chem.MolFromSmiles(smi)
        if mol is None:
            warning(f"Skipping SMILES input string '{smi}' due to error\n")
            continue
        Chem.Kekulize(mol)
        mol = AddHs(mol)
        confs = gen_conformers(mol, num_confs=max_num_confs)
        mol_name = get_mol_name(smi)
        base_fname = create_out_fname(mol_name,
                                      ext='com',
                                      base_dir=out_dir,
                                      rel_path=True)
        conf_id = -1  # make IDE happy
        for conf_id in confs:
            com_fname = create_out_fname(base_fname, suffix=f'_{conf_id}')
            pdb_str = MolToPDBBlock(mol, confId=conf_id)
            coord_list = get_pdb_coord_list(pdb_str)
            fill_save_tpl(gau_tpl_str, {ATOMS: "\n".join(coord_list)},
                          gau_tpl_fname,
                          com_fname,
                          print_info=False)
        print(f"Wrote {conf_id + 1} files with base name '{base_fname}'")
예제 #11
0
def produce_output(adj_matrix, mono_list, cfg):
    if cfg[SUPPRESS_SMI] and not (cfg[SAVE_JSON] or cfg[SAVE_PNG] or cfg[SAVE_SVG]):
        format_list = [SAVE_TCL]
        mol = None  # Make IDE happy
    else:
        # Default out is SMILES, which requires getting an rdKit molecule object; also required for everything
        #    except the TCL format
        format_list = [SAVE_TCL, SAVE_JSON, SAVE_PNG, SAVE_SVG]
        block = generate_mol(adj_matrix, mono_list)
        mol = MolFromMolBlock(block)
        try:
            smi_str = MolToSmiles(mol) + '\n'
        except:
            raise InvalidDataError("Error in producing SMILES string.")
        # if SMI is to be saved, don't output to stdout
        if cfg[SAVE_SMI]:
            fname = create_out_fname(cfg[BASENAME], base_dir=cfg[OUT_DIR], ext=SAVE_SMI)
            str_to_file(smi_str, fname, print_info=True)
        else:
            print("\nSMILES representation: \n", MolToSmiles(mol), "\n")
        if cfg[SAVE_PNG] or cfg[SAVE_SVG] or cfg[SAVE_JSON]:
            # PNG and SVG make 2D images and thus need coordinates
            # JSON will save coordinates--zero's if not computed; might as well compute and save non-zero values
            Compute2DCoords(mol)

    for save_format in format_list:
        if cfg[save_format]:
            fname = create_out_fname(cfg[BASENAME], base_dir=cfg[OUT_DIR], ext=save_format)
            if save_format == SAVE_TCL:
                gen_tcl(adj_matrix, mono_list, tcl_fname=fname, chain_id=cfg[CHAIN_ID],
                        psf_fname=cfg[PSF_FNAME], toppar_dir=cfg[TOPPAR_DIR], out_dir=cfg[OUT_DIR])
            if save_format == SAVE_JSON:
                json_str = MolToJSON(mol)
                str_to_file(json_str + '\n', fname)
            elif save_format == SAVE_PNG or save_format == SAVE_SVG:
                MolToFile(mol, fname, size=cfg[IMAGE_SIZE])
            print(f"Wrote file: {fname}")
예제 #12
0
def save_mol_files(smi_list, out_dir):
    """
    Given a list of smiles strings, save each in a separate file
    :param smi_list: str, standard SMILES format
    :param out_dir: None or str, if None saves file to current directory, if str to location in str
    :return: n/a, saves a mol file for each smi
    """
    for smi_str in smi_list:
        fname = create_out_fname(smi_str, ext='mol', base_dir=out_dir)
        mol = Chem.MolFromSmiles(smi_str)

        # simplest (no H, no coordinates)
        # MolToMolFile(mol, fname, includeStereo=False, kekulize=True)

        # 2D coords without H
        Chem.Kekulize(mol)
        Compute2DCoords(mol)
        MolToMolFile(mol, fname, includeStereo=False)
예제 #13
0
def make_image_grid(file_label,
                    smi_list,
                    labels=None,
                    out_dir=PNG_DIR,
                    mol_img_size=(400, 300),
                    write_output=True):
    """
    Given a molecular formula (or other label) and the set of SMI, make an image grid of all smiles within
    https://www.rdkit.org/docs/GettingStartedInPython.html
    :param file_label: str, such as chemical formula that corresponds to all smiles in SMILES set
    :param smi_list: list or set of SMILES strings; used to generate images
    :param labels: if None, will use the smi_list as labels; otherwise a list to use
    :param out_dir: directory where the file should be saved
    :param mol_img_size: tuple of ints to determine size of individual molecules
    :param write_output: boolean to determine whether to write to screen that a file was created
    :return: N/A, save a file
    """
    mols = []
    for smi in smi_list:
        mol = Chem.MolFromSmiles(smi)
        Compute2DCoords(mol)
        mols.append(mol)

    if labels:
        img_labels = labels
    else:
        img_labels = smi_list

    if len(mols) == 1:
        # didn't see a way for RDKit to add a label to an image with a single molecule (grid image does not work
        # for one image), so add to file name
        file_label += '_' + img_labels[0]
    fname = create_out_fname(file_label, ext='png', base_dir=out_dir)
    if len(mols) == 1:
        MolToFile(mols[0], fname, size=mol_img_size)
    else:
        img_grid = MolsToGridImage(mols,
                                   molsPerRow=3,
                                   subImgSize=mol_img_size,
                                   legends=img_labels)
        img_grid.save(fname)
    if write_output:
        print(f"Wrote file: {os.path.relpath(fname)}")
def print_clean_csv(fname, fname_lower, ms_level, data_array, comment, direct_injection, omit_csv_headers,
                    numpy_save_fmt, out_dir):
    if "ms" + ms_level in fname_lower:
        suffix = ""
    else:
        suffix = f"_ms{ms_level}"
    if "clean" not in fname:
        suffix = suffix + "_clean"
    if direct_injection:
        if omit_csv_headers:
            suffix = suffix + "_unlabeled"
        elif "direct" not in fname_lower:
            suffix = suffix + "_direct"
    # data_array will already be properly sorted; not rounded but okay because printing takes care of this
    f_out = create_out_fname(fname, suffix=suffix, ext='csv', base_dir=out_dir)
    # noinspection PyTypeChecker
    if omit_csv_headers:
        np.savetxt(f_out, data_array[:, :2], fmt=numpy_save_fmt, delimiter=',')
    else:
        np.savetxt(f_out, data_array, fmt=numpy_save_fmt, delimiter=',',
                   header=comment + quote('","'.join(CSV_RET_HEADER)), comments='')
    print(f"Wrote file: {os.path.relpath(f_out)}")
예제 #15
0
def make_dbe_mw_graphs(fkey, ion_energies_dict, out_dir=None):
    """
    makes and saves a graph of the bde value vs fragmentation energy for each set
    of qualifying files. the file the graph is saved to will be the fkey+_dbe_graph.png.
    :param fkey: str, used to designate sets of MS2 data
    :param ion_energies_dict: dict with data used for parent structure analysis, including average MW and DBEs
    :param out_dir: None if default output location is to be used
    :return: nothing
    """
    energy_levels = sorted(list(ion_energies_dict.keys()))
    dbe_list = []
    dbe_dev = []
    dbe_var = []
    dbe_skew = []
    dbe_kurt = []
    mz_list = []
    mz_dev = []
    mz_var = []
    mz_skew = []
    mz_kurt = []
    for energy_level in energy_levels:
        weighted_avg_dbe, std_dev_dbe, variation_dbe, skew_dbe, kurtosis_dbe = ion_energies_dict[energy_level][AVG_DBE]
        dbe_list.append(weighted_avg_dbe)
        dbe_dev.append(std_dev_dbe)
        dbe_var.append(variation_dbe)
        dbe_skew.append(skew_dbe)
        dbe_kurt.append(kurtosis_dbe)
        weighted_avg_mz, std_dev_mz, variation_mz, skew_mz, kurtosis_mz = ion_energies_dict[energy_level][AVG_MZ]
        mz_list.append(weighted_avg_mz)
        mz_dev.append(std_dev_mz)
        mz_var.append(variation_mz)
        mz_skew.append(skew_mz)
        mz_kurt.append(kurtosis_mz)

    out_filename = create_out_fname(fkey, suffix=GRAPH_SUFFIX, base_dir=out_dir, ext="png")
    fig = plt.figure(figsize=(9, 12))
    # The add_subplot sometimes throws a warning that we want to ignore
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=RuntimeWarning)
        ax1 = fig.add_subplot(411)
        ax2 = fig.add_subplot(413)
        ax3 = fig.add_subplot(412)
        ax4 = fig.add_subplot(414)
        # ax1.plot(energy_levels, dbe_list, 'or-')
    ax1.errorbar(energy_levels, dbe_list, yerr=dbe_dev, fmt='or-')
    ax1.set_title('DBE vs. Fragmentation Energy')
    ax1.set_xlabel('Fragmentation Energy')
    ax1.set_ylabel('Double Bond Equivalent')

    ax3.plot(energy_levels, dbe_var, 'b', label="variance")
    ax3.plot(energy_levels, dbe_skew, 'g', label="skew")
    ax3.plot(energy_levels, dbe_kurt, 'r', label="kurtosis")
    ax3.legend(loc=0)
    ax3.set_title('DBE Statistics vs. Fragmentation Energy')
    ax3.set_xlabel('Fragmentation Energy')
    ax3.set_ylabel('Property value')

    # now mw
    ax2.errorbar(energy_levels, mz_list, yerr=mz_dev, fmt='ob-')
    ax2.set_title('Weighted Average M/Z vs. Fragmentation Energy')
    ax2.set_xlabel('Fragmentation Energy')
    ax2.set_ylabel('Weighted Average M/Z')
    ax4.plot(energy_levels, mz_var, 'b', label="variance")
    ax4.plot(energy_levels, mz_skew, 'g', label="skew")
    ax4.plot(energy_levels, mz_kurt, 'r', label="kurtosis")
    ax4.legend(loc=0)
    ax4.set_title('M/Z Statistics vs. Fragmentation Energy')
    ax4.set_xlabel('Fragmentation Energy')
    ax4.set_ylabel('Property value')

    fig.tight_layout()
    fig.savefig(out_filename)
    print(f"Wrote file: {os.path.relpath(out_filename)}")
    plt.close()
예제 #16
0
def process_gausscom_file(cfg, gausscom_file, pdb_tpl_content):
    with open(gausscom_file) as d:
        if cfg[PDB_TPL_FILE]:
            pdb_data_section = copy.deepcopy(pdb_tpl_content[SEC_ATOMS])
        else:
            pdb_data_section = []
        section = SEC_HEAD
        atom_id = 0

        for line in d:
            line = line.strip()
            # not currently keeping anything from the header; just check num atoms
            if section == SEC_HEAD:
                # there may be some instructions (which start with %, and can have some blank lines) before the
                #    "route card lines" (which start with #)
                while not GAU_HEADER_PAT.match(line):
                    line = next(d).strip()
                # skip first line of route card
                line = next(d).strip()
                # for "route card" and then description, there may be more than one header line; look for blank line
                for i in range(2):
                    while len(line) > 0:
                        line = next(d).strip()
                    # now move past the blank line, and get the content of the following line
                    line = next(d).strip()
                # now on charge, multiplicity line, which we also skip with the "continue"
                section = SEC_ATOMS
                continue

            elif section == SEC_ATOMS:
                if len(line) == 0:
                    # Since the tail will come only from the template, nothing more is needed after reading atoms
                    break
                split_line = line.split()

                atom_type = split_line[0]
                # if working from a template, check atom type
                if cfg[PDB_TPL_FILE]:
                    try:
                        pdb_atom_type = pdb_data_section[atom_id][8].split(
                            ' ')[-1]
                    except IndexError:
                        raise InvalidDataError(
                            'Gausscom file: {}\n   has more atoms than the expected {} atoms in '
                            'the template file: {}'.format(
                                gausscom_file, pdb_tpl_content[NUM_ATOMS],
                                cfg[PDB_TPL_FILE]))
                    if atom_type != pdb_atom_type:
                        warning(
                            "Atom types do not match for atom number {}; pdb atom type is {} while gausscom type "
                            "is {}".format(atom_id, pdb_atom_type, atom_type))
                else:
                    pdb_data_section.append(atom_id)
                    pdb_data_section[atom_id] = [
                        'HETATM', '{:5d}'.format(atom_id + 1),
                        ' {:4} '.format(atom_type), 'UNL  ', 1, 0.0, 0.0, 0.0,
                        '  1.00  0.00          {:>2}'.format(atom_type)
                    ]
                pdb_data_section[atom_id][5:8] = map(float, split_line[1:4])
                atom_id += 1

    # Now that finished reading the file, first make sure didn't exit before reaching the desired number of atoms
    if cfg[PDB_TPL_FILE]:
        if atom_id != pdb_tpl_content[NUM_ATOMS]:
            raise InvalidDataError(
                'In gausscom file: {}\n  found {} atoms, while the pdb template has {} atoms'
                .format(gausscom_file, atom_id, pdb_tpl_content[NUM_ATOMS]))
    f_name = create_out_fname(gausscom_file,
                              ext='.pdb',
                              base_dir=cfg[OUT_BASE_DIR])
    list_to_file(pdb_tpl_content[SEC_HEAD] + pdb_data_section +
                 pdb_tpl_content[SEC_TAIL],
                 f_name,
                 list_format=PDB_FORMAT)
def write_output(fname, ms_level, num_matches, short_output_list, long_output_list, matched_formulas,
                 combined_out_fname, omit_mol_ion_flag, deprot_flag, prot_flag, write_mode, out_dir):
    """
    Print output from matching M/Z to lignin molecule library
    :param fname: location of input file processed
    :param ms_level: int, type of MS output, for output name so there are separate files from multiple-channel input
    :param num_matches: the number of matches made between input M/Z and MW in lignin library
    :param short_output_list: list of dicts of summary matching data (one list per match)
    :param long_output_list: list of dicts of extended matching data (sorted by MZ values)
    :param matched_formulas: set of formula names that were matched to M/Z values
    :param combined_out_fname: None or string if output from multiple files is to be written to one file
    :param omit_mol_ion_flag: boolean to indicate if molecular ion matches were not attempted (True) or sought (False)
    :param deprot_flag: boolean to indicate if matches were found for molecular ions
    :param prot_flag: flag to indicate if matches were found for molecular ions
    :param write_mode: flag to indicate if matches were found for molecular ions
    :param out_dir: location of output directory, or None if the current directory is the output directory
    :return: n/a; several output files created
    """
    # prepare string for txt output file
    if write_mode == 'a':
        short_txt_output_str = ''
    else:
        short_txt_output_str = MATCH_STR_HEADER
    for mz_dict in short_output_list:
        peak_str = MZ_STR_FMT.format(mz_dict[M_Z], mz_dict[INTENSITY], mz_dict[RET_TIME])
        short_txt_output_str += MATCH_STR_FMT.format(peak_str, mz_dict[REL_INTENSITY], mz_dict[CALC_MW],
                                                     mz_dict[PPM_ERR], mz_dict[PARENT_FORMULA], mz_dict[DBE],
                                                     mz_dict[MATCH_TYPE])

    ms_str = f"_ms{ms_level}"
    if ms_str in fname:
        suffix = DEF_SUFFIX
        ext_suffix = DEF_LONG_SUFFIX
    else:
        suffix = ms_str + DEF_SUFFIX
        ext_suffix = ms_str + DEF_LONG_SUFFIX
    f_out_txt = create_out_fname(fname, suffix=suffix, base_dir=out_dir, ext="txt")
    f_out_csv = create_out_fname(fname, suffix=suffix, base_dir=out_dir, ext="csv")
    if combined_out_fname:
        f_out_long = create_out_fname(combined_out_fname, suffix="_ext", base_dir=out_dir, ext="csv")
    else:
        f_out_long = create_out_fname(fname, suffix=ext_suffix, base_dir=out_dir, ext="csv")
    # Print quick summary; first note which types of matches were investigated
    if omit_mol_ion_flag:
        match_str_list = []
    else:
        match_str_list = ["molecular ion"]
    if deprot_flag:
        match_str_list.append("deprotonated ion")
    if prot_flag:
        match_str_list.append("protonated ion")
    print(f"    {num_matches} of these matched a MW in our dictionaries for a {' or a '.join(match_str_list)}")
    # save output to files
    short_write_mode = 'w'
    if num_matches == 0:
        warning(f"No MW to MZ matches (within specified ppm error) found for file: {os.path.basename(fname)}\n    "
                f"Summary output will not be printed.")
    else:
        str_to_file(short_txt_output_str, os.path.relpath(f_out_txt), print_info=True, mode=short_write_mode)
        write_csv(short_output_list, os.path.relpath(f_out_csv), SHORT_OUTPUT_HEADERS, extrasaction="ignore",
                  mode=short_write_mode)
    if out_dir:
        struct_dir = os.path.join(out_dir, STRUCT_DIR)
    else:
        struct_dir = STRUCT_DIR
    make_dir(struct_dir)
    for formula in matched_formulas:
        my_formula = formula.replace("*", "")
        make_image_grid(formula, list(FORMULA_SMI_DICT[my_formula]), out_dir=struct_dir, write_output=False)

    # print long output even if no matches
    write_csv(long_output_list, os.path.relpath(f_out_long), OUTPUT_HEADERS, extrasaction="ignore", mode=write_mode)
예제 #18
0
def process_gausscom_file(gausscom_file, tpl_com_content, read_new_charge, out_dir):
    # to make the later part easier to read
    tpl_atoms = tpl_com_content[SEC_ATOMS]
    tpl_atom_types = tpl_com_content[ATOM_TYPES]
    tpl_atom_num = len(tpl_atom_types)
    with open(gausscom_file) as d:
        section = SEC_HEAD
        atom_id = 0
        atom_content = []

        try:
            for line in d:
                line = line.strip()
                # not currently keeping anything from the header; just check num atoms
                if section == SEC_HEAD:
                    # there may be some instructions (which start with %, and can have some blank lines) before the
                    #    "route card lines" (which start with #)
                    while not GAU_HEADER_PAT.match(line):
                        line = next(d).strip()
                    # skip first line of route card
                    line = next(d).strip()
                    # for "route card" and then description, there may be more than one header line; look for blank line
                    for i in range(2):
                        while len(line) > 0:
                            line = next(d).strip()
                        # now move past the blank line, and get the content of the following line
                        line = next(d).strip()
                    # now on charge, multiplicity line, which we also skip unless we use its charge/mult
                    if read_new_charge:
                        # make sure reading a valid charge/mult line, with at least 2 integers
                        try:
                            charge_mult = line.split()
                            int(charge_mult[0])
                            int(charge_mult[1])
                            if len(charge_mult) % 2 != 0:
                                raise IndexError
                        except (IndexError, ValueError):
                            raise InvalidDataError("Problem while reading file: {}\nOption to read charge and "
                                                   "multiplicity from template not chosen, but found invalid data on "
                                                   "the expected line: {}".format(os.path.basename(gausscom_file),
                                                                                  line))
                        tpl_com_content[SEC_HEAD][-1] = line
                    section = SEC_ATOMS
                    continue

                elif section == SEC_ATOMS:
                    # stay in atom section until a blank line is reached
                    while len(line) > 0:
                        split_line = line.split()
                        # if there is a freeze/no freeze col, will be 5 columns (split by ' '); Keep atom info together
                        if len(split_line) == 5:
                            atom_info = "{:2}{:>8}".format(split_line[0], split_line[1])
                        else:
                            atom_info = split_line[0]

                        # if template has atoms, check atom type
                        if tpl_atom_num > 0:
                            atom_type = atom_info.split()[0].split('(')[0]
                            if atom_type != tpl_atom_types[atom_id]:
                                raise InvalidDataError("Problem while reading file: {}\nAtom types do not match for "
                                                       "atom number {}: file has type {} while tpl has type "
                                                       "{}".format(os.path.basename(gausscom_file), atom_id + 1,
                                                                   tpl_atom_types[atom_id], atom_type))
                            atom_info = tpl_atoms[atom_id]

                        atom_xyz = ["{:>12}".format(x) for x in split_line[-3:]]
                        atom_content.append('{:18}'.format(atom_info) + '  '.join(atom_xyz))
                        atom_id += 1
                        line = next(d).strip()
                    # Don't need to read the tail, because we won't use it
                    break
        except StopIteration:
            pass
        except UnicodeDecodeError:
            raise InvalidDataError(f"Error in reading file: {gausscom_file}\n           Exiting program.")

        # now loop is done; check atom number if atoms are in the tpl file
        check_num_atoms(atom_id, gausscom_file, tpl_atom_num)

        f_name = create_out_fname(gausscom_file, ext='.com', base_dir=out_dir)
        list_to_file(tpl_com_content[SEC_HEAD] + atom_content + tpl_com_content[SEC_TAIL], f_name)
예제 #19
0
def plot_total_intensity_v_ret_time(fname, ms_level, data_array,
                                    num_decimals_ret_time_accuracy, out_dir):
    """
    Plot total intensity versus retention times (combines retention times in this method; calls plotting function)
    :param fname: str, name of the file where the data originated
    :param ms_level: str, used to distinguish between different MS output of the same input file (no overwriting)
    :param data_array: ndarray (n x 3) with M/Z, intensity, and retention times
    :param num_decimals_ret_time_accuracy: number of decimal points in retention time accuracy, for rounding
    :param out_dir: None or str, provides location where new file should be saved (None for current directory)
    :return: ndarray, (m x 2), were m is the number of unique retention times, in first column. Second column is
        total intensity for that retention time.
    """
    default_x_max = 16.
    x_index = 2

    # in case not already rounded and sorted...
    data_array[:, x_index] = np.around(data_array[:, x_index],
                                       num_decimals_ret_time_accuracy)
    # the intensity and mz order does not matter, only ret time
    data_array = data_array[data_array[:, x_index].argsort()]
    unique_ret_times = np.unique(data_array[:, x_index])
    total_intensities = np.full((len(unique_ret_times)), np.nan)

    for ret_index, ret_time in enumerate(unique_ret_times):
        unique_ret_time_data_array = data_array[data_array[:, x_index] ==
                                                ret_time]
        total_intensities[ret_index] = np.sum(unique_ret_time_data_array[:, 1])

    data_max_x = np.max(unique_ret_times)
    min_y_max = np.max(total_intensities)
    if data_max_x > default_x_max:
        x_value_warning(data_max_x, default_x_max)
    y_max = find_pos_plot_limit(min_y_max)

    title = f"Total Intensity Plot"
    x_label = "Retention time (min)"
    y_label = "Total intensity (unscaled)"
    suffix = "_tot_int"
    if "_ms" not in fname.lower():
        suffix = f"_ms{ms_level}" + suffix
    plot_fname = create_out_fname(fname,
                                  suffix=suffix,
                                  ext='png',
                                  base_dir=out_dir)

    # # Uncomment below if want both vlines and not
    # make_fig(plot_fname, unique_ret_times, total_intensities, x_label=x_label, y_label=y_label,
    #          loc=0, title=title)
    # print(f"Wrote file: {os.path.relpath(plot_fname)}")
    # plot_fname = create_out_fname(base_fname, suffix="_tot_int_vlines", ext='png', base_dir=out_dir)

    ret_time_tot_intensity_array = np.column_stack(
        (unique_ret_times, total_intensities))
    make_vlines_plot(title,
                     x_label,
                     y_label,
                     {"total_intensities": ret_time_tot_intensity_array},
                     plot_fname,
                     num_decimals_ret_time_accuracy,
                     default_x_max,
                     y_max,
                     loc="upper left")
    return ret_time_tot_intensity_array
예제 #20
0
def process_gausslog_file(gausslog_file, com_tpl_content, charge_from_log_flag,
                          find_low_energy, step_num, base_dir, out_fname):
    with open(gausslog_file) as d:
        rel_path_fname = os.path.relpath(gausslog_file)
        # The header may be more than 5 lines long--counting from end makes sure the comment goes in the correct line
        if find_low_energy:
            com_tpl_content[SEC_HEAD][
                -3] = "Low energy conformation from file {}".format(
                    rel_path_fname)
        elif step_num:
            step_num = int(step_num)
            com_tpl_content[SEC_HEAD][
                -3] = "Conformation from step number {} in file {}".format(
                    step_num, rel_path_fname)
        else:
            com_tpl_content[SEC_HEAD][
                -3] = "Last conformation from file {}".format(rel_path_fname)
        lowest_energy_found = 0.0
        current_step_num = None
        final_atoms_section = []
        atom_type_list = []
        section = SEC_HEAD
        atom_id = 0
        # so don't change the flag that is passed it, so if there is another log file it will also be checked
        if not charge_from_log_flag:
            find_charge = True
        else:
            find_charge = False

        for line in d:
            line = line.strip()
            if len(line) == 0:
                continue
            # not currently keeping anything from the header
            if section == SEC_HEAD:
                if find_charge:
                    if GAU_CHARGE_PAT.match(line):
                        charge_mult = []
                        while find_charge:
                            split_line = line.split('=')
                            charge_mult.append('{}  {}'.format(
                                int(split_line[1].split()[0]),
                                int(split_line[2].split()[0])))
                            line = next(d).strip()
                            if not GAU_CHARGE_PAT.match(line):
                                if len(charge_mult) > 1:
                                    section = SEC_INITIAL_COORDINATES
                                    final_atoms_section = []
                                    # already reading the next section, so grab the needed info
                                    atom_type_list.append(line.split()[0])
                                com_tpl_content[SEC_HEAD][-1] = '   '.join(
                                    charge_mult)
                                find_charge = False
                        continue
                if step_num and GAU_STEP_PAT.match(line):
                    split_line = line.split()
                    current_step_num = int(split_line[2])
                    if current_step_num == step_num:
                        break

                if GAU_COORD_PAT.match(line):
                    atoms_section = []
                    next(d)
                    next(d)
                    section = SEC_ATOMS
                    continue

            elif section == SEC_INITIAL_COORDINATES:
                while len(line) > 0:
                    # originally just added whole line to final. Then found that this section prints fewer sig figs
                    #   than the coordinate section, so taking those instead
                    atom_type_list.append(line.split()[0])
                    line = next(d).strip()
                while not GAU_COORD_PAT.match(line):
                    line = next(d).strip()
                next(d)
                next(d)
                line = next(d).strip()
                while not GAU_SEP_PAT.match(line):
                    split_line = line.split()
                    atom_xyz = ["{:>12}".format(x) for x in split_line[3:6]]
                    final_atoms_section.append(
                        '{:16}'.format(atom_type_list[atom_id]) +
                        ' '.join(atom_xyz))
                    atom_id += 1
                    line = next(d).strip()
                break
            elif section == SEC_ATOMS:
                if GAU_SEP_PAT.match(line):
                    section = SEC_TAIL
                    continue

                split_line = line.split()
                try:
                    atom_type = ATOM_NUM_DICT[int(split_line[1])]
                except KeyError:
                    raise InvalidDataError(
                        "Currently, this code only expects atom numbers up to 36 (Kr), and the "
                        "atomic number read was {}. Update the code to use this with your current "
                        "output.".format(split_line[1]))
                if com_tpl_content[NUM_ATOMS]:
                    com_atom_type = re.split(
                        '[ (]',
                        com_tpl_content[SEC_ATOMS][atom_id])[0].strip()
                    if com_atom_type != atom_type:
                        try:
                            if ATOM_NUM_DICT[int(com_atom_type)] != atom_type:
                                raise ValueError
                        except ValueError:
                            raise InvalidDataError(
                                "For atom number {}, {} has atom type '{}', while the template has "
                                "atom type '{}'".format(
                                    atom_id + 1, gausslog_file, atom_type,
                                    com_atom_type))
                    atom_type = com_tpl_content[SEC_ATOMS][
                        atom_id]  # This keeps the "fragment" number if there
                atom_type = '{:16}'.format(atom_type)

                atom_xyz = ["{:>12}".format(x) for x in split_line[3:6]]
                atoms_section.append(atom_type + ''.join(atom_xyz))
                atom_id += 1
            elif section == SEC_TAIL:
                if com_tpl_content[
                        NUM_ATOMS] and atom_id != com_tpl_content[NUM_ATOMS]:
                    raise InvalidDataError(
                        'In gausslog file: {}\n  found {} atoms, but the tpl expects '
                        '{} atoms'.format(gausslog_file, atom_id,
                                          com_tpl_content[NUM_ATOMS]))
                if GAU_E_PAT.match(line):
                    if find_low_energy:
                        split_line = line.split()
                        energy = float(split_line[4])
                        if energy < lowest_energy_found:
                            final_atoms_section = atoms_section[:]
                    else:
                        final_atoms_section = atoms_section[:]
                    section = SEC_HEAD
                    atom_id = 0

    if len(final_atoms_section) == 0:
        raise InvalidDataError(
            "Check that the following log file has coordinates to use and/or specified step "
            "number: {}".format(gausslog_file))
    if out_fname:
        f_name = create_out_fname(out_fname, base_dir=base_dir)

    else:
        f_name = create_out_fname(gausslog_file,
                                  suffix='_' + com_tpl_content[BASE_NAME],
                                  ext='.com',
                                  base_dir=base_dir)
    list_to_file(
        com_tpl_content[SEC_HEAD] + final_atoms_section +
        com_tpl_content[SEC_TAIL], f_name)
예제 #21
0
def plot_select_mz_intensity_v_ret_time(fname, ms_level, mz_list_to_plot,
                                        data_array, num_decimals_ms_accuracy,
                                        num_decimals_ret_time_accuracy,
                                        out_dir):
    """
    Plot total intensity versus retention times (combines retention times in this method; calls plotting function)
    :param fname: str, name of the file where the data originated
    :param ms_level: str, used to distinguish between different MS output of the same input file (no overwriting)
    :param data_array: ndarray (n x 3) with M/Z, intensity, and retention times
    :param mz_list_to_plot: list, with up to 5 mz values to plot vs time on the same plot
    :param num_decimals_ms_accuracy: int, number of decimal points in MS accuracy, for rounding
    :param num_decimals_ret_time_accuracy: number of decimal points in retention time accuracy, for rounding
    :param out_dir: None or str, provides location where new file should be saved (None for current directory)
    :return: ndarray, (m x 2), were m is the number of unique retention times, in first column. Second column is
        total intensity for that retention time.
    """
    default_x_max = 16.
    data_x_max = 0.
    x_index = 2

    if len(mz_list_to_plot) > 5:
        warning(
            "Error while attempting to plot select M/Z values versus retention times.\n    This "
            "method expects at most 5 M/Z values to display on one plot. This plot will not be produced."
        )
        return
    if len(mz_list_to_plot) == 0:
        warning(
            "Error while attempting to plot select M/Z values versus retention times.\n    No "
            "M/Z values provided. This plot will not be produced.")
        return
    if len(mz_list_to_plot) == 1:
        title = f"Intensity versus Retention Time for M/Z={mz_list_to_plot[0]}"
    else:
        title = "Intensity versus Retention Time for Selected M/Z Values"
    # At least sometimes, mz_list_to_plot and data_array are not already rounded, so doing so here
    mz_list_to_plot = np.around(mz_list_to_plot, num_decimals_ms_accuracy)
    data_array[:, 0] = np.around(data_array[:, 0], num_decimals_ms_accuracy)
    # wait to check for max retention time (in case it does not apply to chosen mz values, but not intensity, to have
    #    more consistent y-axis ranges
    max_intensity = np.max(data_array[:, 1])
    y_max = find_pos_plot_limit(max_intensity)

    inten_time_dict = {}
    for mz_val in mz_list_to_plot:
        sub_data_array = data_array[data_array[:, 0] == mz_val]
        if len(sub_data_array) < 1:
            warning(
                f"No retention time data found for M/Z value {mz_val} from {os.path.relpath(fname)}.\n    This "
                f"M/Z will be omitted from the plot.")
        else:
            curve_label = f"{mz_val:.{num_decimals_ms_accuracy}f}"
            # make this x, y, so ret_time, intensity
            inten_time_dict[curve_label] = np.column_stack(
                (sub_data_array[:, x_index], sub_data_array[:, 1]))
            sub_array_max_x = np.max(sub_data_array[:, x_index])
            if sub_array_max_x > data_x_max:
                data_x_max = sub_array_max_x

    if data_x_max > default_x_max:
        warning(
            f"The default maximum x-axis value ({default_x_max}) is less than the maximum x-axis value in the "
            f"data ({data_x_max}). Not all data will be shown.")
    x_label = "Retention time (min)"
    y_label = "Intensity (unscaled)"
    suffix = "_int_v_time"
    if "_ms" not in fname.lower():
        suffix = f"_ms{ms_level}" + suffix
    plot_fname = create_out_fname(fname,
                                  suffix=suffix,
                                  ext='png',
                                  base_dir=out_dir)
    make_vlines_plot(title,
                     x_label,
                     y_label,
                     inten_time_dict,
                     plot_fname,
                     num_decimals_ret_time_accuracy,
                     default_x_max,
                     y_max,
                     loc="upper left")

    # Maybe later... would need to re-slice data
    # inten_time_dict = defaultdict(lambda: None)
    # y_val_dict = defaultdict(lambda: None)
    # curve_label = defaultdict(lambda: "")
    # mz_counter = 0
    # make_fig(plot_fname + "_make_fig",
    #          x_array=inten_time_dict[0], y1_array=y_val_dict[0], y1_label=curve_label[0], color1=NREL_COLORS[1],
    #          x2_array=inten_time_dict[1], y2_array=inten_time_dict[1], y2_label=curve_label[1], color2=NREL_COLORS[2],
    #          x3_array=inten_time_dict[2], y3_array=inten_time_dict[2], y3_label=curve_label[2], color3=NREL_COLORS[3],
    #          x4_array=inten_time_dict[3], y4_array=inten_time_dict[3], y4_label=curve_label[3], color4=NREL_COLORS[4],
    #          x5_array=inten_time_dict[4], y5_array=inten_time_dict[4], y5_label=curve_label[4], color5=NREL_COLORS[5],
    #          x_label=x_label, y_label=y_label, loc=0, title=title)
    return inten_time_dict
예제 #22
0
def parse_cmdline(argv):
    """
    Returns the parsed argument list and return code.
    `argv` is a list of arguments, or `None` for ``sys.argv[1:]``.
    """
    if argv is None:
        argv = sys.argv[1:]

    # initialize the parser object:
    parser = argparse.ArgumentParser(
        description=
        'Calculates A and Ea from Gaussian output files using GoodVibes. '
        'List files to be analyzed, reactant(s) first and ending with the '
        'transition structure. These can be listed on the command line or in '
        'a file (each line listing a set of reactant(s) and transition '
        'structure).')
    parser.add_argument(
        "-d",
        "--out_dir",
        help=
        "A directory where output files should be saved. The default location "
        "is the current working directory.",
        default=None)
    parser.add_argument(
        "-f",
        dest="freq_cutoff",
        help="Cut-off frequency for both entropy and enthalpy (wavenumbers) "
        "(default = 0)",
        default="0")
    parser.add_argument(
        "-l",
        "--list",
        help="The location of the list of Gaussian output files. "
        "The default file name.",
        default=None)
    parser.add_argument(
        "-q",
        "--quasiharmonic",
        help="Use the '-q' option in GoodVibes, which turns on turns on "
        "quasi-harmonic corrections to both entropy and enthalpy in the "
        "Gibbs free energy (qh-G(T)) output from GoodVibes. ",
        action='store_true')
    parser.add_argument(
        "--temp",
        help=
        "Temperature in K for calculating \u0394G. The default is the first "
        "temperature in 'temp_range' (if specified). If a value is given, the program "
        "will use the temperature closest to it in the temp_range.",
        default=None)
    parser.add_argument(
        "-ti",
        "--temp_range",
        help="Initial temp, final temp, (and optionally) step size (K) for "
        "thermochemistry calculations. The default range is 300,600,30",
        default="300,600,30")
    parser.add_argument(
        "-v",
        "--vib_scale",
        help="Scaling factor to be used for vibrational frequencies. If not "
        "provided, the GoodVibes default value will be used.",
        default=None)
    parser.add_argument(
        "-p",
        "--plot",
        help="Make a \u0394G plot at the specified temp. The default is False.",
        action='store_true')
    parser.add_argument(
        "-pl",
        "--plot_labels",
        help="Optional labels for \u0394G plot. Enter as a list.",
        default=None)
    parser.add_argument(
        "-c",
        "--vibes_check",
        help="In addition to standard checks always run (matching solvent, "
        "level of theory, stoichiometry, charge, multiplicity, and "
        "Gaussian versions), run files through GoodVibes '--check' before "
        "performing calculations. The default is False.",
        action='store_true')
    parser.add_argument(
        "-o",
        "--output_fname",
        help="The name of the output file to be created. The default is the "
        "list name with the extension '.csv', or '{}' if no list name "
        "provided.".format(DEF_OUT_FILE_NAME),
        default=None)

    parser.add_argument(
        "-s",
        "--save_vibes",
        help="Save the output from running GoodVibes in separate files, "
        "named with the Gaussian log file prefix and '.dat'. "
        "The default is False.",
        action='store_true')
    parser.add_argument(
        "-t",
        "--tog_vibes",
        help="Save the output from running GoodVibes in one file, "
        "renamed with the output file prefix and '.dat'. "
        "The default is False.",
        action='store_true')

    args = None
    try:
        args = parser.parse_known_args(argv)
        options = args[0]
        if not options.out_dir:
            options.out_dir = os.getcwd()
        # user can define a new directory as the output directory
        if not os.path.exists(options.out_dir):
            os.makedirs(options.out_dir)

        if options.output_fname:
            options.output_fname = os.path.abspath(
                os.path.join(options.out_dir, options.output_fname))
        elif options.list:
            options.output_fname = create_out_fname(options.list,
                                                    ext='.csv',
                                                    base_dir=options.out_dir)
        else:
            options.output_fname = create_out_fname(DEF_OUT_FILE_NAME,
                                                    ext='.csv',
                                                    base_dir=options.out_dir)

        if options.plot_labels:
            options.plot_labels = options.plot_labels.split(',')
        else:
            options.plot_labels = ['']

        if options.vib_scale:
            options.vib_scale = float(options.vib_scale)

    except (SystemExit, ValueError) as e:
        if hasattr(e, 'code') and e.code == 0:
            return args, GOOD_RET
        warning(e)
        parser.print_help()
        return args, INPUT_ERROR

    return args, GOOD_RET
예제 #23
0
def run_job(job, job_name_perhaps_with_dir, tpl_dict, cfg, testing_mode):
    # Determine if it will run fresh or from an old checkpoint
    if job == '':
        new_job_name = tpl_dict[JOB_NAME]
        tpl_dict[INPUT_FILE] = job_name_perhaps_with_dir + cfg[GAUSS_IN_EXT]
        if cfg[FIRST_JOB_CHK]:
            tpl_dict[OLD_CHECK_ECHO] = cfg[OLD_CHECK_ECHO].format(
                cfg[FIRST_JOB_CHK])
        else:
            tpl_dict[OLD_CHECK_ECHO] = ''
    else:
        new_job_name = tpl_dict[JOB_NAME] + '_' + job
        tpl_dict[OLD_JOB_NAME] = tpl_dict[JOB_NAME]
        tpl_dict[OLD_CHECK_ECHO] = cfg[OLD_CHECK_ECHO].format(
            tpl_dict[OLD_JOB_NAME])
        tpl_dict[INPUT_FILE] = cfg[TPL_DICT][job]

    tpl_file = cfg[JOB_RUN_TPL]
    job_runner_fname = create_out_fname(new_job_name,
                                        ext=".sh",
                                        base_dir=cfg[OUT_DIR])
    print("Running {}".format(new_job_name))

    tpl_dict[JOB_NAME] = new_job_name
    for key_name in [
            USER,
            MEM,
            PROC_LIST,
    ]:
        if key_name in cfg:
            tpl_dict[key_name] = cfg[key_name]

    tpl_str = read_tpl(tpl_file)
    # if either MEM or PROC_LIST is the default (Nonetype), and is used to run the job, get info from the node before
    #    creating the job script
    mem_required = '{' + MEM + '}' in tpl_str
    get_mem = mem_required and not tpl_dict[MEM]

    proc_required = '{' + PROC_LIST + '}' in tpl_str
    get_proc = proc_required and not tpl_dict[PROC_LIST]

    default_gauss_required = '{' + DEF_ROUTE + '}' in tpl_str

    num_procs = 1  # to make IDE happy
    proc_list = '0'  # to make IDE happy

    if get_mem or get_proc or default_gauss_required:
        # explicitly check each possible required info flag, because any or all can be requested
        if testing_mode:
            hostname = subprocess.check_output(["echo", "r1i7n35"
                                                ]).decode("utf-8").strip()
        else:
            #  Will not be covered in testing mode, as is not part of written code to be tested
            hostname = subprocess.check_output(["hostname"
                                                ]).decode("utf-8").strip()
        print(
            "Obtaining available memory and/or number of processors on node {}.\n    "
            "Note: this program assumes the whole node will be allocated to Gaussian.\n"
            .format(hostname))
        if get_mem:
            tpl_dict[MEM] = get_node_mem(testing_mode)

        max_cache = 1024 * 1024  # to make IDE happy; Gaussian default (conservative) is 1024 * 1024
        if get_proc or default_gauss_required:
            num_procs, proc_list, max_cache = get_proc_info(testing_mode)
        if get_proc:
            tpl_dict[PROC_LIST] = proc_list
            print(
                "    Found {} processors. Will allow use of cpus {}.\n".format(
                    num_procs, proc_list))

        if get_mem or get_proc:
            print(
                "    The user may override these values by specifying the '{}' and/or '{}' keywords in the "
                "configuration file.\n    Be sure to use the formatting Gaussian expects.\n"
                .format(MEM, PROC_LIST))

        if default_gauss_required:
            max_disk = get_max_disk(testing_mode)
            max_cache = int(max_cache)
            print(
                "Since '{}' found in the {}, read machine specs to determine CacheSize={} and "
                "MaxDisk={}".format(DEF_ROUTE, JOB_RUN_TPL, max_cache,
                                    max_disk))
            default_route_list = [
                "-#- CacheSize={}".format(max_cache),
                "-#- MaxDisk={}".format(max_disk)
            ]
            fname = create_out_fname('Default.Route',
                                     base_dir=cfg[SCRATCH_DIR])
            list_to_file(default_route_list, fname)
            tpl_dict[
                DEF_ROUTE] = ''  # there is an action triggered, not a value needed, so replaced with blank space

    move_on = False
    while not move_on:
        try:
            fill_save_tpl(tpl_str, tpl_dict, tpl_file, job_runner_fname)
            move_on = True
        except KeyError as e:
            missing_key = e.args[0].split("\'")[1]
            if missing_key in cfg:
                tpl_dict[missing_key] = cfg[missing_key]
            else:
                raise e
    subprocess.call(["chmod", "+x", job_runner_fname])
    if testing_mode:
        print(
            "Testing mode; did not run job script or check Gaussian output for normal termination.\n"
        )
    else:
        # do not want this tested, as actually running Gaussian would take too long, and not what should be tested
        p1 = subprocess.Popen(job_runner_fname)
        p1.wait()
        out_file = tpl_dict[JOB_NAME] + ".log"
        last_line = subprocess.check_output(["tail", "-1",
                                             out_file]).strip().decode("utf-8")
        if GAU_GOOD_PAT.match(last_line):
            print("Successfully completed {}\n".format(out_file))
            os.remove(job_runner_fname)
        else:
            raise InvalidDataError('Job failed: {}'.format(out_file))
예제 #24
0
def find_good_fit(x_vals, y_vals, x_fit, png_fname=None):
    """
    Find a good functional fit for scan data
    :param x_vals: np array, x values for fitting
    :param y_vals: np array, y values for fitting
    :param x_fit: np array, x values to use for creating curve
    :param png_fname: str, path to save plot, if desired
    :return:
    """
    smallest_resid = np.inf
    best_y_fit = None

    print("Residuals from curve fitting:")

    charmm_n_multipliers = [
        np.ones(5, dtype=int),
        np.asarray([0, 1, 1, 1, 1]),
        np.asarray([1, 1, 1, 1, 0]),
        np.asarray([1, 1, 1, 0, 0]),
        np.asarray([1, 1, 0, 0, 0]),
        np.asarray([1, 0, 0, 0, 0]),
        np.asarray([0, 1, 0, 1, 1]),
        np.asarray([1, 0, 1, 0, 0])
    ]
    if png_fname:
        plt.plot(x_vals, y_vals, '.', label='data')

    for idx, multipliers in enumerate(charmm_n_multipliers):
        n_vals = multipliers * N_DIHE
        # fit curve
        ini_vals = np.ones(len(N_DIHE) * 2)
        with warnings.catch_warnings():
            warnings.simplefilter("error", OptimizeWarning)
            try:
                popt, pcov = curve_fit(f=lambda x, *params: charmm_dihedral(
                    x, *params, *multipliers),
                                       xdata=x_vals,
                                       ydata=y_vals,
                                       p0=ini_vals)
            except OptimizeWarning:
                pass

        y_fit = charmm_dihedral(x_fit, *popt, *multipliers)
        if png_fname:
            plt.plot(x_fit,
                     y_fit,
                     '-',
                     color=assign_color(idx),
                     label=f'fit: {multipliers}')

        y_from_fit = charmm_dihedral(x_vals, *popt, *multipliers)
        resid = np.sqrt(np.mean(np.square(y_from_fit -
                                          y_vals)))  # Root Mean Squared Error
        print(
            f'    CHARMM dihedral eq with n = {",".join([str(x) for x in n_vals[n_vals != 0]]) + ":":10} '
            f'{resid:5.2f}')
        if resid < smallest_resid:
            smallest_resid = resid
            best_y_fit = y_fit

    if png_fname:
        # plt.legend()
        charmm_fname = create_out_fname(png_fname, suffix="_charmm")
        plt.savefig(
            charmm_fname,
            transparent=True,
            bbox_inches='tight',
        )
        plt.close()
        print(f"Saved: {charmm_fname}")

    if png_fname:
        plt.plot(x_vals, y_vals, '.', label='data')

    for idx, order in enumerate(range(1, 12)):
        # noinspection PyTupleAssignmentBalance
        p, residuals, rank, singular_values, rcond = np.polyfit(x_vals,
                                                                y_vals,
                                                                order,
                                                                full=True)
        y_fit = np.polyval(p, x_fit)

        if png_fname:
            plt.plot(x_fit,
                     y_fit,
                     '-',
                     color=COLOR_SEQUENCE[idx],
                     label=f'fit: poly order {order}')

        y_from_fit = np.polyval(p, x_vals)
        resid = np.sqrt(np.mean(np.square(y_from_fit - y_vals)))
        print(f'    Polynomial order {order:2}: {resid:5.2f}')

        if resid < smallest_resid:
            smallest_resid = resid
            best_y_fit = y_fit

    if png_fname:
        # plt.legend()
        poly_fname = create_out_fname(png_fname, suffix="_poly")
        plt.savefig(
            poly_fname,
            transparent=True,
            bbox_inches='tight',
        )
        plt.close()
        print(f"Saved: {poly_fname}")

    return best_y_fit
예제 #25
0
def check_convergence(check_file_list, step_converg, last_step, best_conv,
                      all_steps_to_stdout):
    """
    Reads a Gaussian output file to check convergence
    :param all_steps_to_stdout: Boolean to print convergence to standard out
    :param check_file_list: list of file names
    :param step_converg: boolean; if True, capture convergence of each step. If false, only the final convergence.
    :param last_step: None or int; if int, the last step number to check for convergence
    :param best_conv: Boolean; if true, print ten steps with the best convergence
    :return: nothing: either saves a file or prints to stdout
    """
    fname_str_length = 36
    conv_str_length = 11
    for fname in check_file_list:
        if len(os.path.basename(fname)) > fname_str_length:
            fname_str_length = len(os.path.basename(fname))

    print(
        f"{F_NAME:{fname_str_length}} {CONVERG:{conv_str_length}} {CONVERG_ERR}"
    )
    if step_converg:
        headers = STEP_CONVERG_HEADERS
    else:
        headers = FINAL_CONVERG_HEADERS
    for fname in check_file_list:
        log_content = process_gausslog_file(fname,
                                            find_converg=True,
                                            find_step_converg=step_converg,
                                            last_step_to_read=last_step)
        log_content[F_NAME] = os.path.basename(fname)
        if step_converg:
            # all_steps_to_stdout doesn't need an out_fname, but doesn't hurt either
            if last_step:
                out_fname = sys.stdout
            else:
                out_fname = create_out_fname(fname,
                                             prefix='',
                                             suffix='_conv_steps',
                                             ext='.csv')

            # create list of dicts for each step, for all step_converg options
            step_list = []
            for step_num in log_content[CONVERG_STEP_DICT].keys():
                # not sure necessary to make this new dict, but it is fast enough and clearer for next steps
                step_list.append({
                    F_NAME:
                    log_content[F_NAME],
                    STEP_NUM:
                    step_num,
                    ENERGY:
                    log_content[CONVERG_STEP_DICT][step_num][ENERGY],
                    MAX_FORCE:
                    log_content[CONVERG_STEP_DICT][step_num][MAX_FORCE],
                    RMS_FORCE:
                    log_content[CONVERG_STEP_DICT][step_num][RMS_FORCE],
                    MAX_DISPL:
                    log_content[CONVERG_STEP_DICT][step_num][MAX_DISPL],
                    RMS_DISPL:
                    log_content[CONVERG_STEP_DICT][step_num][RMS_DISPL],
                    CONVERG:
                    log_content[CONVERG_STEP_DICT][step_num][CONVERG],
                    CONVERG_ERR:
                    log_content[CONVERG_STEP_DICT][step_num][CONVERG_ERR],
                })

            # different output depending on which step_converg option
            if last_step or best_conv:
                if len(step_list) == 0:
                    print("No convergence data found for file: {}".format(
                        log_content[F_NAME]))
                    continue
                sorted_by_converg = sorted(step_list, key=itemgetter(CONVERG))
                if last_step:
                    print(
                        "Steps sorted by convergence to step number {} for file: {}"
                        .format(last_step, log_content[F_NAME]))
                    stop_step = last_step
                else:
                    print(
                        "Best (up to 10) steps sorted by convergence for file: {}"
                        .format(log_content[F_NAME]))
                    stop_step = 10
                print("    StepNum  Convergence")
                for print_num, step_dict in enumerate(sorted_by_converg):
                    if print_num == stop_step:
                        # break this for, and go to next file if there is one
                        break
                    print("    {:7} {:10.3f}".format(step_dict[STEP_NUM],
                                                     step_dict[CONVERG]))
            elif all_steps_to_stdout:
                # print all steps to stdout, not sorted by convergence
                print("Convergence of all steps for file: {}".format(
                    log_content[F_NAME]))
                print("    StepNum  Convergence")
                for step_dict in step_list:
                    print("    {:7} {:10.3f}".format(step_dict[STEP_NUM],
                                                     step_dict[CONVERG]))
            else:
                # save all steps, not sorted by convergence
                print(
                    f"{log_content[F_NAME]:{fname_str_length}} {step_list[-1][CONVERG]:{conv_str_length}.4f} "
                    f"{step_list[-1][CONVERG_ERR]}")
                write_csv(step_list,
                          out_fname,
                          headers,
                          extrasaction="ignore",
                          round_digits=6)
                # also make plots of step versus convergence
                create_convergence_plots(out_fname, step_list)
        else:
            # this is the printing for final termination step only (not step_converg)
            fname = log_content[headers[0]]
            print(
                f"{fname:{fname_str_length}} {log_content[headers[1]]:{conv_str_length}.4f} "
                f"{log_content[headers[2]]}")
예제 #26
0
def get_thermochem(file_set, results_dict, save_vibes, out_dir,
                   tog_output_fname, qh_h_opt, write_mode):
    """
    Calls GoodVibes to get thermochem at a range of temps
    :param file_set: list of reactant file(s), TS file (or separator), and optionally products
    :param results_dict: dictionary of results from running hartree and goodvibes
    :param save_vibes: boolean to determine whether to save each GoodVibes output separately
    :param out_dir: directory to save GoodVibes output files (if requested)
    :param tog_output_fname: None or string (file name) if saving each GoodVibes output together
    :param qh_h_opt: boolean to use the '-q' option in GoodVibes (corrections to both entropy and enthalpy)
    :param write_mode: boolean to start a new to add to an all-together goodvibes output file
    :return: nothing
    """
    h = []
    qh_h = []
    gt = []
    qh_gt = []
    temps = []
    for index, file in enumerate(file_set):
        base_name = os.path.basename(file)
        if file == REACT_PROD_SEP:
            h.append(np.full([len(temps)], np.nan))
            qh_h.append(np.full([len(temps)], np.nan))
            gt.append(np.full([len(temps)], np.nan))
            qh_gt.append(np.full([len(temps)], np.nan))
            continue
        vibes_out = results_dict[base_name][GOODVIBES_OUT]
        found_structure = False
        skip_line = True
        h.append([])
        qh_h.append([])
        gt.append([])
        qh_gt.append([])
        # we know the last line should be dropped, and at least the first 10
        for line in vibes_out[10:-2]:
            if GOODVIBES_ERROR_PAT.match(line):
                raise InvalidDataError(
                    "See GoodVibes output: {}".format(vibes_out))
            if not found_structure:
                if GOODVIBES_DATA_PAT.match(line):
                    found_structure = True
                    continue
            elif skip_line:
                skip_line = False
                continue
            else:
                vals = line.split()
                if index == 0:
                    temps.append(float(vals[1]))
                h[index].append(float(vals[2]))
                if qh_h_opt:
                    qh_h[index].append(float(vals[3]))
                gt[index].append(float(vals[-2]))
                qh_gt[index].append(float(vals[-1]))
        if save_vibes:
            vibes_out_fname = os.path.relpath(
                create_out_fname(file,
                                 suffix='_vibes',
                                 base_dir=out_dir,
                                 ext='.dat'))
            list_to_file(vibes_out, vibes_out_fname, print_message=False)
            print('Saved GoodVibes output as: {}'.format(vibes_out_fname))
        if tog_output_fname:
            list_to_file(vibes_out,
                         tog_output_fname,
                         mode=write_mode,
                         print_message=False)
            if write_mode == 'w':
                print("Adding all GoodVibes output to: {}".format(
                    tog_output_fname))
                write_mode = "a"

    temps = np.asarray(temps)
    # for each molecule, multiply the array to convert to kcal/mol
    for index in range(len(gt)):
        h[index] = np.asarray(h[index]) * EHPART_TO_KCAL_MOL
        if qh_h_opt:
            qh_h[index] = np.asarray(qh_h[index]) * EHPART_TO_KCAL_MOL
        gt[index] = np.asarray(gt[index]) * EHPART_TO_KCAL_MOL
        qh_gt[index] = np.asarray(qh_gt[index]) * EHPART_TO_KCAL_MOL

    return temps, h, qh_h, gt, qh_gt
예제 #27
0
def main(argv=None):
    print(
        f"Running GaussianWrangler script goodvibes_helper version {__version__}"
    )
    # Read input
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET or args is None:
        return ret

    try:
        # Make a list of lists; each inner list a set of reactant file(s) with TS
        # Include anything in the "list" file as well as entered on the command line
        options = args[0]
        if options.list:
            with open(options.list) as f:
                row_list = [row.strip().split() for row in f.readlines()]
                row_list = list(filter(None, row_list))
        else:
            row_list = []
        if len(args[1]) > 0:
            row_list.append(args[1])
        if len(row_list) == 0:
            raise InvalidDataError("No files or list of files found")

        # now a quick first check that all files exist, and get unique names
        missing_files = set()
        unique_fnames = set()
        for file_set in row_list:
            for file in file_set:
                if file != REACT_PROD_SEP:
                    if os.path.isfile(file):
                        unique_fnames.add(file)
                    else:
                        missing_files.add(file)
        if len(missing_files) > 0:
            raise IOError(missing_files)

        # Initialization to make IDE happy; used for plotting
        g_ts_list, g_rxn_list, qh_g_ts_list, qh_g_rxn_list = [], [], [], []
        g_temp = None
        h_ts_list, h_rxn_list, qh_h_ts_list, qh_h_rxn_list = [], [], [], []
        # now the calculations and printing
        print_mode = 'w'  # for the AEa output, so only prints header once, and then appends to file
        print_message = True
        if options.tog_vibes:
            tog_fname = os.path.relpath(
                create_out_fname(options.output_fname,
                                 suffix='_vibes',
                                 ext='.dat'))
        else:
            tog_fname = None
        results_dict = get_gauss_results(options, unique_fnames)
        for file_set in row_list:
            # the called method returns values needed for printing and plotting
            temps, a, ea, kt, delta_h_ts, delta_h_rxn, delta_gibbs_ts, delta_gibbs_rxn, qh_a, qh_ea, qh_kt, \
                qh_delta_h_ts, qh_delta_h_rxn, qh_delta_gibbs_ts, qh_delta_gibbs_rxn = \
                process_file_set(file_set, options, print_mode, results_dict, tog_fname)

            temp_index = get_temp_index(options.temp, temps)
            if REACT_PROD_SEP in file_set:
                k_temp = ""
                qh_k_temp = ""
            else:
                k_temp = round_sig_figs(kt[temp_index])
                qh_k_temp = round_sig_figs(qh_kt[temp_index])
            g_temp = temps[temp_index]
            g_ts = round_sig_figs(delta_gibbs_ts[temp_index])
            g_rxn = round_sig_figs(delta_gibbs_rxn[temp_index])
            qh_g_ts = round_sig_figs(qh_delta_gibbs_ts[temp_index])
            qh_g_rxn = round_sig_figs(qh_delta_gibbs_rxn[temp_index])
            h_ts = round_sig_figs(delta_h_ts[temp_index])
            h_rxn = round_sig_figs(delta_h_rxn[temp_index])
            if options.quasiharmonic:
                qh_h_ts = round_sig_figs(qh_delta_h_ts[temp_index])
                qh_h_rxn = round_sig_figs(qh_delta_h_rxn[temp_index])
            else:
                qh_h_ts, qh_h_rxn = 0, 0  # So don't use an undefined variable below

            print_results(a,
                          ea,
                          qh_a,
                          qh_ea,
                          g_temp,
                          k_temp,
                          g_ts,
                          g_rxn,
                          qh_k_temp,
                          qh_g_ts,
                          qh_g_rxn,
                          file_set,
                          options.output_fname,
                          print_mode,
                          print_message=print_message)
            if options.plot:
                g_ts_list.append(g_ts)
                g_rxn_list.append(g_rxn)
                qh_g_ts_list.append(qh_g_ts)
                qh_g_rxn_list.append(qh_g_rxn)
                h_ts_list.append(h_ts)
                h_rxn_list.append(h_rxn)
                if options.quasiharmonic:
                    qh_h_ts_list.append(qh_h_ts)
                    qh_h_rxn_list.append(qh_h_rxn)

            print_mode = 'a'
            print_message = False

        if options.plot:
            g_fname = create_out_fname(options.output_fname,
                                       suffix='_g',
                                       ext='.png')
            plot_delta(g_fname, g_temp, g_ts_list, g_rxn_list,
                       options.plot_labels)
            qh_g_fname = create_out_fname(options.output_fname,
                                          suffix='_g_qh',
                                          ext='.png')
            plot_delta(qh_g_fname, g_temp, qh_g_ts_list, qh_g_rxn_list,
                       options.plot_labels)
            h_fname = create_out_fname(options.output_fname,
                                       suffix='_h',
                                       ext='.png')
            plot_delta(h_fname,
                       g_temp,
                       h_ts_list,
                       h_rxn_list,
                       options.plot_labels,
                       var='H')
            if options.quasiharmonic:
                qh_h_fname = create_out_fname(options.output_fname,
                                              suffix='_h_qh',
                                              ext='.png')
                plot_delta(qh_h_fname,
                           g_temp,
                           qh_h_ts_list,
                           qh_h_rxn_list,
                           options.plot_labels,
                           var='H')

    except IOError as e:
        warning("Problems reading file:", e)
        return IO_ERROR
    except InvalidDataError as e:
        warning("Problems reading data:", e)
        return INVALID_DATA

    return GOOD_RET  # success