Esempio n. 1
0
def main(argv=None):
    print(f"Running GaussianWrangler script plot_steps version {__version__}")

    # Read input
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET or args is None:
        return ret

    try:
        # Make a list of lists from the input file list
        with open(args.list) as f:
            row_list = [row.strip().split() for row in f.readlines()]
            row_list = list(filter(None, row_list))

        if args.output_fname:
            plot_fname = create_out_fname(args.output_fname,
                                          base_dir=args.out_dir,
                                          ext='.png')
        else:
            plot_fname = create_out_fname(args.list,
                                          base_dir=args.out_dir,
                                          ext='.png')
        plot_delta_g(plot_fname, args.temp, row_list, args.conv,
                     args.fig_width, args.fig_height, args.y_axis_label)
        print("Wrote file: {}".format(plot_fname))

    except IOError as e:
        warning("Problems reading file:", e)
        return IO_ERROR
    except InvalidDataError as e:
        warning("Problems reading data:", e)
        return INVALID_DATA

    return GOOD_RET  # success
def parse_cmdline(argv):
    """
    Returns the parsed argument list and return code.
    :param argv: is a list of arguments, or `None` for ``sys.argv[1:]``.
    """
    if argv is None:
        argv = sys.argv[1:]

    # initialize the parser object:
    parser = argparse.ArgumentParser()
    # parser.add_argument("-i", "--input_rates", help="The location of the input rates file",
    #                     default=DEF_IRATE_FILE, type=read_input_rates)
    parser.add_argument("-n", "--no_attribution", help="Whether to include attribution",
                        action='store_false')
    args = None
    try:
        args = parser.parse_args(argv)
    except SystemExit as e:
        if hasattr(e, 'code') and e.code == 0:
            return args, GOOD_RET
        warning(e)
        parser.print_help()
        return args, INPUT_ERROR

    return args, GOOD_RET
def check_input(args, cfg):
    # override config entries if command-line options used
    if args.file:
        cfg[GAUSSLOG_FILE] = args.file
    if args.list:
        cfg[GAUSSLOG_FILES_FILE] = args.list
    if args.tpl:
        cfg[PDB_TPL_FILE] = args.tpl
    if args.out_dir:
        cfg[OUT_BASE_DIR] = args.out_dir
    if args.only_first:
        cfg[ONLY_FIRST] = True
    if args.only_final:
        cfg[ONLY_FINAL] = True
    if args.out_fname:
        cfg[OUTFILE_NAME] = args.out_fname
    if args.out_dir:
        cfg[OUT_BASE_DIR] = args.out_dir

    # checking
    if cfg[COMBINE_LOGS] and not cfg[OUTFILE_NAME]:
        raise InvalidDataError("When combining outputs from multiple log files into one pdb, specify the output "
                               "file name")
    if cfg[COMBINE_LOGS] and not cfg[ONLY_FINAL]:
        warning("When combining outputs from multiple log files into one pdb, only the last coordinates of each "
                "log file will be kept.")
        cfg[ONLY_FINAL] = True

    if cfg[OUT_BASE_DIR]:
        if not os.path.exists(cfg[OUT_BASE_DIR]):
            os.makedirs(cfg[OUT_BASE_DIR])
def main(argv=None):
    print(
        f"Running GaussianWrangler script gausslog2com version {__version__}")
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET or args is None:
        return ret

    try:
        # Make sure there are files to process
        gausslog_files = check_for_files(args.file, args.list)

        # and a template file to process
        if not args.tpl:
            raise InvalidDataError("No template file ('-t' option) specified")
        if not os.path.isfile(args.tpl):
            raise IOError(args.tpl)

        # Read template and data files
        com_tpl_content = process_gausscom_tpl(args.tpl, args.charge_from_tpl)
        process_gausslog_files(gausslog_files, com_tpl_content,
                               args.charge_from_tpl, args.low_energy,
                               args.step_num, args.out_dir, args.output_fname)
    except IOError as e:
        warning("Problems reading file:", e)
        return IO_ERROR
    except (InvalidDataError, UnicodeDecodeError) as e:
        warning("Problems reading data:", e)
        return INVALID_DATA

    return GOOD_RET  # success
def parse_cmdline(argv):
    """
    Returns the parsed argument list and return code.
    `argv` is a list of arguments, or `None` for ``sys.argv[1:]``.
    """
    if argv is None:
        argv = sys.argv[1:]

    # initialize the parser object:
    parser = argparse.ArgumentParser(description='Creates pdb files from Gaussian input files, given a template pdb '
                                                 'file.')
    parser.add_argument("-c", "--config", help="The location of the (optional) configuration file in ini format. The "
                                               "default file name is {}, located in the base directory where the " 
                                               "program as run. The program will run using either the specifications "
                                               "from the configuration file or from the command line. Command line "
                                               "specifications will override those in the configuration "
                                               "file.".format(DEF_CFG_FILE),
                        default=DEF_CFG_FILE, type=read_cfg)
    parser.add_argument("-d", "--out_dir", help="The directory where the output files will be placed. This will "
                                                "override any '{}' entry in the configuration file. The default is "
                                                "the same directory as the log file.".format(OUT_BASE_DIR),
                        default=None)
    parser.add_argument("-f", "--file", help="The location of a Gaussian output file. Will override any '{}' entry in "
                                             "the configuration file.".format(GAUSSLOG_FILE), default=None)
    parser.add_argument("-l", "--list", help="The location of the list of Gaussian output files. Will override any "
                                             "'{}' entry in a configuration file.".format(GAUSSLOG_FILES_FILE),
                        default=None)
    parser.add_argument("-o", "--out_fname", help="The name for the created pdb file. If none is provided, it will "
                                                  "take the basename from the provided Gaussian output file name, "
                                                  "with the '.pdb' extension.",
                        default=None)
    parser.add_argument("-t", "--tpl", help="The location of the pdb template file. Will override any '{}'entry in the "
                                            "config file.".format(PDB_TPL_FILE),
                        default=None)

    parser.add_argument("-a", "--only_first", help="Flag to have the program output a pdb only from the first "
                                                   "set of coordinates in the log file. Will override any '{}' entry "
                                                   "in the config file. The default is False.".format(ONLY_FIRST),
                        action="store_true", default=False)

    parser.add_argument("-z", "--only_final", help="Flag to have the program output a pdb only from the last "
                                                   "set of coordinates in the log file. Will override any '{}' entry "
                                                   "in the config file. The default is False.".format(ONLY_FINAL),
                        action="store_true", default=False)

    args = None
    try:
        args = parser.parse_args(argv)
    except (KeyError, InvalidDataError, MissingSectionHeaderError, SystemExit) as e:
        if hasattr(e, 'code') and e.code == 0:
            return args, GOOD_RET
        warning(e)
        parser.print_help()
        return args, INPUT_ERROR

    return args, GOOD_RET
Esempio n. 6
0
def process_input_file(input_fname, mw_formula_dict, mw_deprot_formula_dict,
                       mw_prot_formula_dict, form_smi_dict, form_dbe_dict,
                       smi_name_dict, smi_source_dict):
    """
    Read the file and uses the data to update dictionaries
    :return: the number of entries that were added to the dictionaries
    """
    rel_path_name = os.path.relpath(input_fname)
    new_entries = 0
    with open(input_fname) as f:
        for line in f:
            stripped_line = line.strip()
            if len(stripped_line) == 0:
                continue
            line_list = [
                entry.strip() for entry in stripped_line.split(SEP_KEY)
            ]
            # if there is no SMILES str, there is no way to properly add any data to the library
            if not line_list[0]:
                warning(
                    f"In reading file: {rel_path_name}\n    Line: '{stripped_line}'\n        does not "
                    f"provide a SMILES string as the first '|'-separated entry. This line will be skipped."
                )
                continue
            # if there aren't 3 entries, pad with blank strings, as 2nd two are optional
            while len(line_list) < 3:
                line_list.append("")
            if len(line_list) > 3:
                rel_path = os.path.relpath(input_fname)
                raise InvalidDataError(
                    f"Error while reading: {rel_path}\n    line: '{stripped_line}'\n"
                    f"    Expected no more than 3 comma-separated values: \n        SMILES "
                    f"string (only one per line),\n        molecule name(s) (separate "
                    f"multiple names with semicolons),\n        string description of the "
                    f"data source (with no commas or semicolons)")

            # being explicit in separating out line_list entries; do not change global variables
            new_entry_flag = add_smi_to_dicts(mw_formula_dict,
                                              mw_deprot_formula_dict,
                                              mw_prot_formula_dict,
                                              form_smi_dict,
                                              form_dbe_dict,
                                              smi_name_dict,
                                              smi_source_dict,
                                              line_list[0],
                                              mol_name=line_list[1],
                                              mol_source=line_list[2])
            if new_entry_flag:
                new_entries += 1
    print(
        f"Completed reading file: {rel_path_name}\n    Added {new_entries} entries to the dictionaries\n"
    )
    return new_entries
def parse_cmdline(argv):
    """
    Returns the parsed argument list and return code.
    `argv` is a list of arguments, or `None` for ``sys.argv[1:]``.
    """
    if argv is None:
        argv = sys.argv[1:]

    # initialize the parser object:
    parser = argparse.ArgumentParser(description="Creates Gaussian input files from Gaussian other files, given a "
                                                 "template input file. The default output name is the same as the "
                                                 "base name of the file with coordinates to use, with the '.com' "
                                                 "extension.")
    parser.add_argument("-c", "--charge_read_com", help="Flag to take the charge and multiplicity from the input file "
                                                        "to be read rather than from the template file. "
                                                        "The default is {}.".format(False),
                        action="store_true", default=False)
    parser.add_argument("-f", "--com_file", help="The location of the Gaussian input file with coordinates to be "
                                                 "input into the template file to create a new input file.",
                        default=None)
    parser.add_argument("-l", "--list_file", help="The location of a text file with a list of Gaussian input files "
                                                  "with coordinates to be input into the template file to create new "
                                                  "input files. Each file name should be on a separate line.",
                        default=None)
    parser.add_argument("-o", "--out_dir", help="A directory where output files should be saved. The default location "
                                                "is the current directory.", default=None)
    parser.add_argument("-t", "--tpl_file", help="The location of the Gaussian input file template (required). "
                                                 "The default file name is {}, located in the "
                                                 "base directory where the program as run.", default=None)

    args = None
    try:
        args = parser.parse_args(argv)
        if args.tpl_file is None:
            raise IOError("A template file (specified with '-t') must be provided.")
        if args.com_file is None and args.list_file is None:
            raise IOError("No files have been specified to be read. Use '-f' to specify a single file, or '-l' to "
                          "specify a file with a list of input files to be read and converted using the specified "
                          "template file.")
    except IOError as e:
        warning("Problems reading file:", e)
        parser.print_help()
        return args, IO_ERROR
    except (KeyError, InvalidDataError, MissingSectionHeaderError, SystemExit) as e:
        if hasattr(e, 'code') and e.code == 0:
            return args, GOOD_RET
        warning(e)
        parser.print_help()
        return args, INPUT_ERROR

    return args, GOOD_RET
Esempio n. 8
0
def main(argv=None):
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET or args is None:
        return ret

    try:
        # start with copies of global variable dicts; then only the copies will be altered
        if args.file_name:
            mw_formula_dict = MW_FORM_DICT.copy()
            mw_deprot_formula_dict = MW_DEPROT_FORM_DICT.copy()
            mw_prot_formula_dict = MW_PROT_FORM_DICT.copy()
            form_smi_dict = deepcopy(FORMULA_SMI_DICT)
            form_dbe_dict = FORMULA_DBE_DICT.copy()
            smi_name_dict = deepcopy(SMI_NAME_DICT)
            smi_source_dict = deepcopy(SMI_SOURCE_DICT)

            number_additions = process_input_file(
                args.file_name, mw_formula_dict, mw_deprot_formula_dict,
                mw_prot_formula_dict, form_smi_dict, form_dbe_dict,
                smi_name_dict, smi_source_dict)

            # Reading complete, now output
            if number_additions:
                pretty_print_dicts(mw_formula_dict, mw_deprot_formula_dict,
                                   mw_prot_formula_dict, form_smi_dict,
                                   form_dbe_dict, smi_name_dict,
                                   smi_source_dict)

        if args.image_library:
            if args.mw_list:
                mw_keys = [x.strip() for x in args.mw_list.split(",")]
            else:
                mw_keys = MW_FORM_DICT.keys()
            if args.out_dir:
                make_dir(args.out_dir)
            image_grid_mult_mw(mw_keys,
                               MW_FORM_DICT,
                               FORMULA_SMI_DICT,
                               out_dir=args.out_dir)

    except IOError as e:
        warning(e)
        return IO_ERROR
    except InvalidDataError as e:
        warning(e)
        return INVALID_DATA

    return GOOD_RET  # success
def main(argv=None):
    """
    Runs the main program.
    :param argv: The command line arguments.
    :return: The return code for the program's termination.
    """
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET or args is None:
        return ret

    try:
        print(canvas(args.no_attribution))
    except InvalidDataError as e:
        warning("Problems reading data:", e)
        return INVALID_DATA

    return GOOD_RET  # success
Esempio n. 10
0
def main(argv=None):
    print(f"Running GaussianWrangler script check_gauss version {__version__}")

    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET or args is None:
        return ret

    try:
        # Find files to process, then process them
        check_sub_dirs = False
        search_dir = None
        if args.dir_subdirs:
            search_dir = args.dir_subdirs
            check_sub_dirs = True
        elif args.directory:
            search_dir = args.directory
        check_file_list = check_for_files(args.file_name,
                                          args.file_list,
                                          search_pattern=args.extension,
                                          search_dir=search_dir,
                                          search_sub_dir=check_sub_dirs)

        # now check either for convergence or termination
        if args.step_converg or args.final_converg:
            check_convergence(check_file_list, args.step_converg, args.to_step,
                              args.best, args.all)
        else:
            # If output directory does not exist, make it:
            if not os.path.exists(args.output_directory):
                os.makedirs(args.output_directory)
            if args.scan:
                scan_array = collect_output_scan_steps(check_file_list)
                x_fit, y_fit = plot_scan(scan_array, args.scan)
                find_stable_points(x_fit, y_fit)
            else:
                check_termination(args, check_file_list)

    except IOError as e:
        warning("Problems reading file:", e)
        return IO_ERROR
    except InvalidDataError as e:
        warning("", e)
        return INVALID_DATA

    return GOOD_RET  # success
Esempio n. 11
0
def main(argv=None):
    print(f"Running GaussianWrangler script gausslog2pdb version {__version__}")

    # Read input
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET or args is None:
        return ret

    cfg = args.config

    # Read template and data files
    try:
        check_input(args, cfg)

        # set up list of files to process
        cfg[GAUSSLOG_FILES] = []
        if os.path.isfile(cfg[GAUSSLOG_FILES_FILE]):
            with open(cfg[GAUSSLOG_FILES_FILE]) as f:
                for data_file in f:
                    cfg[GAUSSLOG_FILES].append(data_file.strip())
        if cfg[GAUSSLOG_FILE] is not None:
            cfg[GAUSSLOG_FILES].append(cfg[GAUSSLOG_FILE])
        if len(cfg[GAUSSLOG_FILES]) == 0:
            raise InvalidDataError("No files to process: no '{}' specified and "
                                   "no list of files found for: {}".format(GAUSSLOG_FILE, cfg[GAUSSLOG_FILES_FILE]))
        if cfg[ONLY_FIRST] and cfg[ONLY_FINAL]:
            raise InvalidDataError("Cannot specify both '{}' and '{}'".format(ONLY_FIRST, ONLY_FINAL))

        # now start the actual work
        if cfg[PDB_TPL_FILE]:
            pdb_tpl_content = process_pdb_file(cfg[PDB_TPL_FILE])
        else:
            pdb_tpl_content = {}
        process_gausscom_files(cfg, pdb_tpl_content)
    except (IOError, UnicodeDecodeError) as e:
        warning("Problems reading file:", e)
        return IO_ERROR
    except InvalidDataError as e:
        warning("Problems reading data:", e)
        return INVALID_DATA

    return GOOD_RET  # success
Esempio n. 12
0
def check_file_termination(output_file, good_output_dir, completed_list,
                           likely_failed_list, perhaps_running_list):
    try:
        with open(output_file, 'r') as fh:
            last_line = fh.readlines()[-1].strip()
    except IndexError:
        warning(
            "Could not read the last line (may be blank) of file: {}".format(
                output_file))
        return
    if NORM_TERM_PAT.match(last_line):
        base_name = os.path.basename(output_file)
        completed_list.append(output_file)
        os.rename(output_file, os.path.join(good_output_dir, base_name))
        return
    for pattern in FAIL_PAT_LIST:
        if pattern.match(last_line):
            likely_failed_list.append(output_file)
            return
    perhaps_running_list.append(output_file)
def check_input_csv_header(fname):
    """
    Checks first line of specified for expected header
    :param fname: str, the location of the file to check the header
    :return: num_header_lines, int: 1 by default; 0 if it appears that the header is missing
    """
    num_header_lines = 1
    potential_header = read_csv_header(fname)
    base_fname = os.path.relpath(fname)
    if potential_header is None:
        raise InvalidDataError(f"Input file may be blank: {base_fname}")
    while potential_header[0].startswith("#"):
        with open(fname) as f:
            for row in f:
                if row.startswith("#"):
                    num_header_lines += 1
                else:
                    potential_header = row.strip().split(",")
                    potential_header = [dequote(x) for x in potential_header]
                    break
    if potential_header != TYPICAL_CSV_HEADER and potential_header != CSV_RET_HEADER:
        try:
            # Still move on to reading values, but first check if there may not be a header
            if len(potential_header) > 1:
                # if right into values (that is, no trouble converting to float), continue to reading values
                float(potential_header[0])
                float(potential_header[1])
                num_header_lines = 0
                warning(f"No header found in file: {base_fname}\n    Will attempt to read data as M/Z and intensity.")
            else:
                raise ValueError
        except ValueError:
            # check that the difference is not a trivial difference in case
            if (len(potential_header) in [2, 3]) and (potential_header[0].lower() == TYPICAL_CSV_HEADER[0].lower()) \
                    and (potential_header[1].lower() == TYPICAL_CSV_HEADER[1].lower()):
                pass
            else:
                warning(f"While reading file: {base_fname}\n    Did not find the expected headers "
                        f"'{TYPICAL_CSV_HEADER}', but '{potential_header}'\n Will attempt to read data as M/Z, "
                        f"intensity, and, if there is a third column, retention time (in min).")
    return num_header_lines
Esempio n. 14
0
def main(argv=None):
    print(
        f"Running GaussianWrangler script test_data.smi2gausscom version {__version__}"
    )

    # Read input
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET or args is None:
        return ret

    try:
        process_smiles(args.gau_tpl_file, args.list_file, args.max_confs,
                       args.out_dir)
    except (IOError, UnicodeDecodeError) as e:
        warning("Problems reading file:", e)
        return IO_ERROR
    except InvalidDataError as e:
        warning("Problems reading data template:", e)
        return INVALID_DATA

    return GOOD_RET  # success
Esempio n. 15
0
def initiate_state(add_rate, cfg, rep, sg_ratio):
    pct_s = sg_ratio / (1 + sg_ratio)
    ini_num_monos = cfg[INI_MONOS]
    if cfg[RANDOM_SEED]:
        # we don't want the same random seed for every iteration
        np.random.seed(cfg[RANDOM_SEED] + int(add_rate / 100 + sg_ratio * 10) + rep)
        monomer_draw = np.around(np.random.rand(ini_num_monos), MAX_NUM_DECIMAL)
    else:
        monomer_draw = np.random.rand(ini_num_monos)
    initial_monomers = create_initial_monomers(pct_s, monomer_draw)
    # initial event must be oxidation to create reactive species; all monomers may be oxidized
    initial_events = create_initial_events(initial_monomers, cfg[RXN_RATES])
    # initial_monomers and initial_events are grouped into the initial state
    initial_state = create_initial_state(initial_events, initial_monomers)
    if cfg[MAX_MONOS] > cfg[INI_MONOS]:
        initial_events.append(Event(GROW, [], rate=add_rate))
    elif cfg[MAX_MONOS] < cfg[INI_MONOS]:
        warning(f"The specified maximum number of monomers ({cfg[MAX_MONOS]}) is less than the "
                f"specified initial number of monomers ({cfg[INI_MONOS]}). \n The program will "
                f"proceed with the with the initial number of monomers with no addition of monomers.")
    return initial_events, initial_state
Esempio n. 16
0
def main(argv=None):
    print(f"Running GaussianWrangler script gausscom2com version {__version__}")
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET or args is None:
        return ret

    # Read template and data files
    try:
        # Make sure there are files to process
        gausscom_files = check_for_files(args.com_file, args.list_file)
        com_tpl_content = process_gausscom_tpl(args.tpl_file, not args.charge_read_com)
        for gausscom_file in gausscom_files:
            process_gausscom_file(gausscom_file, com_tpl_content, args.charge_read_com, args.out_dir)

    except IOError as e:
        warning("Problems reading file:", e)
        return IO_ERROR
    except InvalidDataError as e:
        warning("", e)
        return INVALID_DATA

    return GOOD_RET  # success
Esempio n. 17
0
def process_smiles(gau_tpl_fname, smi_list, max_num_confs, out_dir):
    """
    Creates Gaussian input files for each SMILES string provided
    https://www.rdkit.org/docs/GettingStartedInPython.html
    :param smi_list: list of SMILES strings
    :param gau_tpl_fname: str, the location of the template file to use to create input files
    :param max_num_confs: int, the maximum number of conformations to generate
    :param out_dir: str, directory where files are to be saved (if None, saves to working directory)
    :return: N/A, writes files and prints notes on files created
    """
    gau_tpl_str = read_tpl(gau_tpl_fname)
    if REQ_STR not in gau_tpl_str:
        raise InvalidDataError(
            f"Did not find the required string '{REQ_STR}' in the provided Gaussian input "
            f"template file.")
    for smi in smi_list:
        mol = Chem.MolFromSmiles(smi)
        if mol is None:
            warning(f"Skipping SMILES input string '{smi}' due to error\n")
            continue
        Chem.Kekulize(mol)
        mol = AddHs(mol)
        confs = gen_conformers(mol, num_confs=max_num_confs)
        mol_name = get_mol_name(smi)
        base_fname = create_out_fname(mol_name,
                                      ext='com',
                                      base_dir=out_dir,
                                      rel_path=True)
        conf_id = -1  # make IDE happy
        for conf_id in confs:
            com_fname = create_out_fname(base_fname, suffix=f'_{conf_id}')
            pdb_str = MolToPDBBlock(mol, confId=conf_id)
            coord_list = get_pdb_coord_list(pdb_str)
            fill_save_tpl(gau_tpl_str, {ATOMS: "\n".join(coord_list)},
                          gau_tpl_fname,
                          com_fname,
                          print_info=False)
        print(f"Wrote {conf_id + 1} files with base name '{base_fname}'")
Esempio n. 18
0
def main(argv=None):
    print(
        f"Running GaussianWrangler script pdbs2gausscoms version {__version__}"
    )

    # Read input
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET or args is None:
        return ret

    # Read pdb files
    cfg = args.config
    try:
        gau_tpl_content = process_gausscom_file(cfg[GAU_TPL_FILE])
        process_pdb_files(cfg, gau_tpl_content)
    except (IOError, UnicodeDecodeError) as e:
        warning("Problems reading file:", e)
        return IO_ERROR
    except InvalidDataError as e:
        warning("Problems reading data template:", e)
        return INVALID_DATA

    return GOOD_RET  # success
Esempio n. 19
0
def parse_cmdline(argv):
    """
    Returns the parsed argument list and return code.
    `argv` is a list of arguments, or `None` for ``sys.argv[1:]``.
    """
    if argv is None:
        argv = sys.argv[1:]

    # initialize the parser object:
    parser = argparse.ArgumentParser(
        description=
        'Creates pdb files from Gaussian input files, given a template pdb '
        'file.')
    parser.add_argument(
        "-c",
        "--config",
        help="The location of the configuration file in ini format. "
        "The default file name is {}, located in the "
        "base directory where the program as run.".format(DEF_CFG_FILE),
        default=DEF_CFG_FILE,
        type=read_cfg)
    args = None
    try:
        args = parser.parse_args(argv)
    except IOError as e:
        warning("Problems reading file:", e)
        parser.print_help()
        return args, IO_ERROR
    except (KeyError, InvalidDataError, MissingSectionHeaderError,
            SystemExit) as e:
        if hasattr(e, 'code') and e.code == 0:
            return args, GOOD_RET
        warning(e)
        parser.print_help()
        return args, INPUT_ERROR

    return args, GOOD_RET
Esempio n. 20
0
def main(argv=None):
    print(
        f"Running GaussianWrangler script gausscom2pdb version {__version__}")
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET or args is None:
        return ret

    cfg = args.config

    # Read template and data files
    try:
        if cfg[PDB_TPL_FILE]:
            pdb_tpl_content = process_pdb_file(cfg[PDB_TPL_FILE])
        else:
            pdb_tpl_content = {}
        process_gausscom_files(cfg, pdb_tpl_content)
    except (IOError, UnicodeDecodeError) as e:
        warning("Problems reading file:", e)
        return IO_ERROR
    except InvalidDataError as e:
        warning("Problems reading data:", e)
        return INVALID_DATA

    return GOOD_RET  # success
Esempio n. 21
0
def main(argv=None):
    print(f"Running GaussianWrangler script run_gauss version {__version__}")
    # Read input
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET or args is None:
        return ret

    cfg = args.config

    try:
        # overwrite default values from reading config if they were specified in command line
        args_key_map = [(args.list_of_jobs, False, LIST_OF_JOBS),
                        (args.old_chk_fname, None, FIRST_JOB_CHK),
                        (args.setup_submit, False, SETUP_SUBMIT)]
        for arg_val, arg_default, cfg_key in args_key_map:
            if arg_val != arg_default:
                cfg[cfg_key] = arg_val
        # The following do not have default config options, so overwrite
        cfg[NO_SUBMIT] = args.no_submit
        if cfg[FIRST_JOB_CHK]:
            # remove extension (if any) from cfg[FIRST_JOB_CHK]
            cfg[FIRST_JOB_CHK] = os.path.splitext(cfg[FIRST_JOB_CHK])[0]

        # for the "list_of_jobs" option, "job_name" is actually the name of the name of file with the list of jobs
        if args.list_of_jobs:
            with open(args.job_name) as f:
                for line in f:
                    s_line = line.strip()
                    if len(s_line) == 0:
                        continue
                    input_job_file = os.path.splitext(
                        s_line)[0] + cfg[GAUSS_IN_EXT]
                    base_name = get_fname_root(s_line)
                    tpl_dict = {
                        JOB_NAME: base_name,
                        INPUT_FILE: input_job_file
                    }
                    for thread_index, thread in enumerate(cfg[JOB_LIST]):
                        setup_and_submit(cfg, thread, tpl_dict, args.testing,
                                         args.ignore_chk_warning)
            return GOOD_RET

        # otherwise, job_name is actually the job name. We can to ignore any extension on it
        job_name_perhaps_with_dir = os.path.splitext(args.job_name)[0]
        job_name = os.path.basename(job_name_perhaps_with_dir)
        tpl_dict = {
            JOB_NAME: job_name,
            INPUT_FILE: job_name_perhaps_with_dir + cfg[GAUSS_IN_EXT]
        }
        if cfg[JOB_LIST][0] == '' and not os.path.isfile(tpl_dict[INPUT_FILE]):
            raise IOError("Could not find input file: {}".format(
                tpl_dict[INPUT_FILE]))

        if args.setup_submit:
            for thread_index, thread in enumerate(cfg[JOB_LIST]):
                setup_and_submit(cfg, thread, tpl_dict, args.testing,
                                 args.ignore_chk_warning)
            return GOOD_RET

        for job in cfg[JOB_LIST]:
            run_job(job, job_name_perhaps_with_dir, tpl_dict, cfg,
                    args.testing)

        if len(cfg[FOLLOW_JOBS_LIST]) > 1:
            for thread_index, thread in enumerate(cfg[FOLLOW_JOBS_LIST]):
                if thread_index == 0 and not cfg[ALL_NEW]:
                    continue
                setup_and_submit(cfg, thread, tpl_dict, args.testing,
                                 args.ignore_chk_warning)

        if len(cfg[FOLLOW_JOBS_LIST]) > 0 and not cfg[ALL_NEW]:
            for job in cfg[FOLLOW_JOBS_LIST][0]:
                run_job(job, job_name_perhaps_with_dir, tpl_dict, cfg,
                        args.testing)

    except IOError as e:
        warning("Problems reading file:", e)
        return IO_ERROR
    except (subprocess.CalledProcessError, KeyError) as e:
        warning("", e)
    except InvalidInputError as e:
        warning("Check input:", e)
        return INVALID_DATA
    except InvalidDataError as e:
        warning("Invalid data:", e)
        return INVALID_DATA

    return GOOD_RET  # success
Esempio n. 22
0
def main(argv=None):
    print(
        f"Running GaussianWrangler script gausslog_unique version {__version__}"
    )
    # Read input
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET or args is None:
        return ret

    # Read template and data files
    try:
        gausslog_files = []
        missing_files = []
        log_info = {}

        # check input
        if args.max_diff:
            args.max_diff = float(args.max_diff)
            if not args.energy and not args.gibbs:
                args.enthalpy = True

        # check that we have files
        with open(args.list) as f:
            for line in f:
                fname = line.strip()
                if len(fname) == 0:
                    continue
                # check that each log file can be found
                if os.path.isfile(fname):
                    gausslog_files.append(fname)
                else:
                    missing_files.append(fname)
            if len(missing_files) > 0:
                raise IOError(
                    "Could not find the following file(s) listed in '{}':\n    "
                    "{}".format(args.list,
                                '\n    '.join(sorted(set(missing_files)))))
            if len(gausslog_files) < 2:
                raise InvalidDataError(
                    "This program expects at least two files to compare to determine if they "
                    "have the same conformation. Check input.")

        # get the data from the files
        for gausslog_file in gausslog_files:
            gausslog_content = process_gausslog_file(gausslog_file,
                                                     find_dih=True,
                                                     find_converg=True)
            log_info[os.path.basename(gausslog_file)] = gausslog_content

        # process data from files
        list_of_conf_lists = compare_gausslog_info(log_info, args.tol)
        winner_str, warn_files_str = print_results(log_info,
                                                   list_of_conf_lists,
                                                   args.enthalpy, args.energy,
                                                   args.max_diff,
                                                   args.out_fname)
        if len(warn_files_str) > 0:
            warning("Check convergence of file(s):" + warn_files_str)

    except IOError as e:
        warning("Problems reading file:", e)
        return IO_ERROR
    except (InvalidDataError, UnicodeDecodeError) as e:
        warning("Problems reading data:", e)
        return INVALID_DATA
    except ValueError as e:
        warning(e.args[0])
        return INVALID_DATA
    return GOOD_RET  # success
Esempio n. 23
0
def parse_cmdline(argv):
    """
    Returns the parsed argument list and return code.
    `argv` is a list of arguments, or `None` for ``sys.argv[1:]``.
    """
    if argv is None:
        argv = sys.argv[1:]

    # initialize the parser object:
    parser = argparse.ArgumentParser(
        description=
        "This script has two modes, chosen by selected '-f' or '-i': "
        "1) The '-f' option: reads a file to add entries to "
        "dictionaries of lignin decomposition molecules that may be "
        "observed in mass spectrometry of lignin-derived compounds. Given "
        "SMILES strings, and optionally/ideally molecular names and/or source "
        "of the SMILES (e.g. observed in analysis of model compounds), the "
        "dictionaries are expanded to include additional potentially "
        "observed molecular weights and isomers. Note: it does not change "
        "the original libraries within this package, but instead outputs "
        "new libraries, which could be used to update the library in this "
        "package. 2) The '-i' option: creates an image library of all "
        "SMILES structures currently in the compound library (further details "
        "provided under the '-i' option description).")
    parser.add_argument(
        "-d",
        "--out_dir",
        help=
        "A directory where output files should be saved. The default location "
        "is the current working directory.",
        default=None)
    parser.add_argument(
        "-f",
        "--file_name",
        help=f"File name of values separated by '{SEP_KEY}' (to avoid conflicts "
        f"with IUPAC molecule names) with up to 3 values per line: SMILES "
        f"string (required), molecule name(s) (optional; split multiple "
        f"names with a semicolon), source (e.g. model compound analysis)",
        default=None)
    parser.add_argument(
        "-i",
        "--image_library",
        help=f"Flag to request that the program create a 2D image library of "
        f"the SMILES strings in the library. One file will be created "
        f"per exact molecular weight (calculated only from the most "
        f"abundant isotope). If there are multiple SMILES matches for a "
        f"molecular formula, the name of the file is '{{molecular "
        f"weight (with a '-' instead of a '.')}}_{{molecular formula}}"
        f".png', and the images of each structure within the file will "
        f"be labeled with its SMILES string. If there is only one "
        f"structure in the library for a molecular formula, the SMILES "
        f"string will be appended to the name. These files will be "
        f"saved in the current directory, unless a different directory "
        f"is specified with the '-o' option.",
        action='store_true')
    parser.add_argument(
        "-m",
        "--mw_list",
        help="A list of molecular weight keys for making an image library.",
        default=None)

    args = None
    try:
        args = parser.parse_args(argv)
        if not args.image_library and not args.file_name:
            raise InvalidDataError(
                "Please choose to either provide a file_name ('-f') to read new dictionary "
                "entries, or the image_library flag ('-i') to request 2D image library."
            )
    except (KeyError, InvalidDataError, IOError, SystemExit) as e:
        if hasattr(e, 'code') and e.code == 0:
            return args, GOOD_RET
        warning(e)
        parser.print_help()
        return args, INPUT_ERROR
    return args, GOOD_RET
Esempio n. 24
0
def add_smi_to_dicts(mw_formula_dict,
                     mw_deprot_formula_dict,
                     mw_prot_formula_dict,
                     form_smi_dict,
                     form_dbe_dict,
                     smi_name_dict,
                     smi_source_dict,
                     entered_smi,
                     mol_name=None,
                     mol_source=None):
    """
    Given a SMILES string, and optionally a name and/or source, update the designated formulas

    :param mw_formula_dict: dict of strs, eg. {'77.03913': 'C6H6', ...},
           (keys=MW as str with 5 decimal places, values=molecular formulas (str))
    :param mw_deprot_formula_dict: dict of strs, eg. {'78.04695': 'C6H6', ...}, (MW's have H mass subtracted)
           (keys=MW as str with 5 decimal places, values=molecular formulas (str))
    :param mw_prot_formula_dict: dict of strs, eg. {'78.04695': 'C6H6', ...}, (MW's have H mass added)
           (keys=MW as str with 5 decimal places, values=molecular formulas (str))
    :param form_smi_dict: dict of sets, e.g. {'C6H6': {'C1=CC=CC=C1'}, ...}
           (keys=molecular formulas (str), values=set of corresponding SMILES strings)
    :param form_dbe_dict: dict of floats, e.g. {'C6H6': 4, ...}
           (keys=molecular formulas (str), values=double bond equivalents (float))
    :param smi_name_dict: dict of sets, e.g. {'C1=COCCC1': {'dihydropyran', '3,4-dihydro-2h-pyran'}, ...}
           (keys=molecular formulas (str), values=set names including IUPAC and common names)
    :param smi_source_dict: dict of sets, e.g. {'C6H6': {'common_molecule'}, ...}
           (keys=molecular formulas (str), values=set of corresponding SMILES strings)
    :param entered_smi: str, the user-inputted SMILES string
    :param mol_name: str, optional, a molecule name for the string
    :param mol_source: str, optional, a note on the source of the molecule (e.g. if from model compound study)
    :return: boolean if updated dictionary, and updates dictionaries
    """
    # this will track addition to any dictionary--not going granular; can do so if later wished
    addition_to_dict = False
    new_smi, formula, mw, mw_deprot, mw_prot, dbe = smi_to_formula(entered_smi)
    if formula in form_smi_dict and new_smi != '':
        # since a set, if already there, adding would not change, but nice to track if anything changes
        if new_smi not in form_smi_dict[formula]:
            addition_to_dict = True
            form_smi_dict[formula].add(new_smi)
    else:
        form_smi_dict[formula] = {new_smi}
        form_dbe_dict[formula] = dbe
        addition_to_dict = True
    mw_dict_names = [
        "molecular ion MW dictionary", "deprotonated MW dictionary",
        "protonated MW dictionary"
    ]
    mw_dicts = [mw_formula_dict, mw_deprot_formula_dict, mw_prot_formula_dict]
    mw_inputs = [str(mw), str(mw_deprot), str(mw_prot)]
    for dict_name, mw_dict, new_mw in zip(mw_dict_names, mw_dicts, mw_inputs):
        if new_mw in mw_dict.keys():
            if mw_dict[new_mw] != formula:
                # hopefully never encountered
                warning(
                    f"Unexpectedly, the MW {new_mw} was already in the {dict_name} pared with molecular "
                    f"formula {mw_dict[new_mw]}, while the input has formula {formula}"
                )
        else:
            mw_dict[new_mw] = formula
            addition_to_dict = True
    val_dict_list = [(mol_name, smi_name_dict), (mol_source, smi_source_dict)]
    for opt_val, opt_dict in val_dict_list:
        if opt_val:
            # user may add a list of names--unlikely multiple sources, but won't hurt it
            # cannot split on comma, because that can be a part of a name. PubChem splits on ; so seems safe
            opt_val_list = opt_val.split(";")
            for val in opt_val_list:
                stripped_val = val.strip().lower()
                if stripped_val:
                    if new_smi in opt_dict.keys():
                        # as above, check is to check if we are changing any dictionaries
                        if stripped_val not in opt_dict[new_smi]:
                            addition_to_dict = True
                            opt_dict[new_smi].add(stripped_val)
                    else:
                        opt_dict[new_smi] = {stripped_val}
                        addition_to_dict = True
    return addition_to_dict
Esempio n. 25
0
def process_gausscom_file(cfg, gausscom_file, pdb_tpl_content):
    with open(gausscom_file) as d:
        if cfg[PDB_TPL_FILE]:
            pdb_data_section = copy.deepcopy(pdb_tpl_content[SEC_ATOMS])
        else:
            pdb_data_section = []
        section = SEC_HEAD
        atom_id = 0

        for line in d:
            line = line.strip()
            # not currently keeping anything from the header; just check num atoms
            if section == SEC_HEAD:
                # there may be some instructions (which start with %, and can have some blank lines) before the
                #    "route card lines" (which start with #)
                while not GAU_HEADER_PAT.match(line):
                    line = next(d).strip()
                # skip first line of route card
                line = next(d).strip()
                # for "route card" and then description, there may be more than one header line; look for blank line
                for i in range(2):
                    while len(line) > 0:
                        line = next(d).strip()
                    # now move past the blank line, and get the content of the following line
                    line = next(d).strip()
                # now on charge, multiplicity line, which we also skip with the "continue"
                section = SEC_ATOMS
                continue

            elif section == SEC_ATOMS:
                if len(line) == 0:
                    # Since the tail will come only from the template, nothing more is needed after reading atoms
                    break
                split_line = line.split()

                atom_type = split_line[0]
                # if working from a template, check atom type
                if cfg[PDB_TPL_FILE]:
                    try:
                        pdb_atom_type = pdb_data_section[atom_id][8].split(
                            ' ')[-1]
                    except IndexError:
                        raise InvalidDataError(
                            'Gausscom file: {}\n   has more atoms than the expected {} atoms in '
                            'the template file: {}'.format(
                                gausscom_file, pdb_tpl_content[NUM_ATOMS],
                                cfg[PDB_TPL_FILE]))
                    if atom_type != pdb_atom_type:
                        warning(
                            "Atom types do not match for atom number {}; pdb atom type is {} while gausscom type "
                            "is {}".format(atom_id, pdb_atom_type, atom_type))
                else:
                    pdb_data_section.append(atom_id)
                    pdb_data_section[atom_id] = [
                        'HETATM', '{:5d}'.format(atom_id + 1),
                        ' {:4} '.format(atom_type), 'UNL  ', 1, 0.0, 0.0, 0.0,
                        '  1.00  0.00          {:>2}'.format(atom_type)
                    ]
                pdb_data_section[atom_id][5:8] = map(float, split_line[1:4])
                atom_id += 1

    # Now that finished reading the file, first make sure didn't exit before reaching the desired number of atoms
    if cfg[PDB_TPL_FILE]:
        if atom_id != pdb_tpl_content[NUM_ATOMS]:
            raise InvalidDataError(
                'In gausscom file: {}\n  found {} atoms, while the pdb template has {} atoms'
                .format(gausscom_file, atom_id, pdb_tpl_content[NUM_ATOMS]))
    f_name = create_out_fname(gausscom_file,
                              ext='.pdb',
                              base_dir=cfg[OUT_BASE_DIR])
    list_to_file(pdb_tpl_content[SEC_HEAD] + pdb_data_section +
                 pdb_tpl_content[SEC_TAIL],
                 f_name,
                 list_format=PDB_FORMAT)
Esempio n. 26
0
def wrangler_main(args, process_file_list):
    """
    Calls to other functions to perform all requested tasks
    :param process_file_list:
    :param args: arguments
    :return: final_str: a string with all of the info that would normally be log outputs
    """

    final_str = ""
    write_mode = 'w'
    ms2_dict = defaultdict(lambda: defaultdict(lambda: None))
    dbe_dict = defaultdict(dict)
    mw_dict = defaultdict(dict)
    blank_data_array_dict = {}
    max_mz_in_stdout = 5
    if args.quit_after_mzml_to_csv:
        max_unique_mz_to_collect = max_mz_in_stdout
    else:
        max_unique_mz_to_collect = 1e9
        # blank file processing
        if args.blank_file_name is not None:
            fname = args.blank_file_name
            fname_lower = os.path.basename(fname).lower()
            blank_data_array_dict = process_blank_file(args)
            for ms_level, ms_array in blank_data_array_dict.items():
                array, my_str = initial_output(
                    fname, fname_lower, ms_array, ms_level,
                    max_unique_mz_to_collect, max_mz_in_stdout, args.threshold,
                    args.num_decimals_ms_accuracy, args.ret_time_accuracy,
                    args.num_decimals_ret_time_accuracy, args.out_dir,
                    args.quit_after_mzml_to_csv, args.direct_injection)
                final_str += my_str

    # all other file processing
    gathered_ms2_data = False
    for fname in process_file_list:
        base_fname = os.path.basename(fname)
        fname_lower = base_fname.lower()
        data_array_dict = process_ms_run_file(args, fname,
                                              blank_data_array_dict)
        if data_array_dict is None:
            continue
        prot_flag = fnmatch.fnmatch(fname_lower, "*+*")
        deprot_flag = fnmatch.fnmatch(fname_lower, "*-*")
        for ms_level, ms_array in data_array_dict.items():
            trimmed_mz_array, my_str = initial_output(
                fname, fname_lower, ms_array, ms_level,
                max_unique_mz_to_collect, max_mz_in_stdout, args.threshold,
                args.num_decimals_ms_accuracy, args.ret_time_accuracy,
                args.num_decimals_ret_time_accuracy, args.out_dir,
                args.quit_after_mzml_to_csv, args.direct_injection)
            final_str += my_str
            if args.quit_after_mzml_to_csv or "blank" in fname_lower:
                # move to next file without further analysis
                continue

            num_matches, matched_formulas, short_output_list, long_output_dict, high_int_peak_str_list = \
                compare_mz_mw(base_fname, trimmed_mz_array, args.threshold, args.omit_mol_ion,
                              deprot_flag, prot_flag, args.non_dom_iso_flag, max_mz_in_stdout)

            write_output(fname, ms_level, num_matches, short_output_list,
                         long_output_dict.values(), matched_formulas,
                         args.combined_output_fname, args.omit_mol_ion,
                         deprot_flag, prot_flag, write_mode, args.out_dir)
            final_str += get_high_inten_long(ms_level, high_int_peak_str_list,
                                             long_output_dict)

            # if there is an combined_output_fname and more than one file, append the data, don't write over it
            if args.combined_output_fname:
                write_mode = 'a'

            if "2" in ms_level and not ("blank" in fname_lower) and (
                    "hcd" in fname_lower):
                if len(process_file_list) < 2:
                    warning(
                        f"Only one set of MS2 data has been read (from file {base_fname}).\n"
                        f"    No chemical species matching will be attempted, since at least two MS2 data "
                        f"sets must be provided for\n    this procedure, with one of the data sets coming "
                        f"from a 0 ionization energy run.")
                else:
                    make_ms2_dict(fname_lower, ms_level, ms2_dict,
                                  trimmed_mz_array, long_output_dict,
                                  args.threshold, args.ms_accuracy,
                                  args.num_decimals_ms_accuracy)
                    gathered_ms2_data = True

    # Now that finished reading each file, exit (if only saving csv) or graph
    # If '-q' option used to exit without analysis, skip the next section (which skips to the end of the
    #     program
        if not args.quit_after_mzml_to_csv:
            if gathered_ms2_data:
                for fkey, fkey_dict in ms2_dict.items():
                    ion_energies = fkey_dict[ION_ENERGIES].keys()
                    num_ion_energies = len(ion_energies)
                    if num_ion_energies < 2:
                        warning(
                            f"No chemical species matching will be attempted for files designated {fkey}.\n    "
                            f"For this functionality, at least two MS2 data sets must be provided, one of them "
                            f"from a 0 ionization energy run.\n    Note that the program matches sets of MS2 "
                            f"output by searching for file names that differ only the number after\n    'HCD' "
                            f"(case insensitive), which is read as the ionization energy."
                        )
                    elif 0 not in ion_energies:
                        warning(
                            f"Did not find 0 ionization energy output for the set of files designated {fkey}.\n    "
                            f"This output is used to identify the parent. Contact the developers for "
                            f"more options.")
                    else:
                        print(
                            f"\nNow analyzing {fkey} files.\nUsing M/Z "
                            f"{fkey_dict[PARENT_MZ]:.{args.num_decimals_ms_accuracy}f} as the parent peak, as "
                            f"it is the closest peak in the 0 ionization output to the\n    specified precursor ion. "
                            f"The closest matching molecular formula, {fkey_dict[PARENT_FORMULA]} (with "
                            f"{fkey_dict[PARENT_MATCH_ERR]:.1f} ppm error),\n    will be used as the parent "
                            f"formula.")
                        plot_mz_v_intensity(fkey, fkey_dict[ION_MZ_DICT],
                                            args.num_decimals_ms_accuracy,
                                            args.out_dir)
                        make_dbe_mw_graphs(fkey, fkey_dict[ION_ENERGIES],
                                           args.out_dir)
                        find_substructure_sets(fkey, fkey_dict, args.threshold,
                                               args.num_decimals_ms_accuracy,
                                               args.out_dir)
    return final_str
Esempio n. 27
0
def parse_cmdline(argv):
    """
    Returns the parsed argument list and return code.
    `argv` is a list of arguments, or `None` for ``sys.argv[1:]``.
    """
    if argv is None:
        argv = sys.argv[1:]

    # initialize the parser object:
    parser = argparse.ArgumentParser(
        description='Given a list of Gaussian output files, returns a list with '
        'unique conformers, defined by having dihedral angles within the '
        'specified tolerance.')
    parser.add_argument(
        "-e",
        "--energy",
        help="Sort output by lowest electronic energy (not ZPE corrected)."
        "The default is False. This flag is superseded by the '-n'/'--enthalpy'"
        "and '-g'/'--gibbs' flags.",
        action='store_true')
    parser.add_argument(
        "-g",
        "--gibbs",
        help=
        "Sort output by lowest Gibbs free energy. If not found, the script will "
        "sort output by the lowest electronic energy. The default is False.",
        action='store_true')
    parser.add_argument(
        "-l",
        "--list",
        help=
        "The file location of the list of Gaussian output files. There should "
        "be one output file listed per line. The default file name is '{}', "
        "located in the base directory where the program as "
        "run. This program assumes that all the given files have the same atom "
        "order.".format(DEF_LIST_FILE),
        default=DEF_LIST_FILE)
    parser.add_argument(
        "-m",
        "--max_diff",
        help=
        "If a numerical value is provided with this option, the output list "
        "will be split between files within or not within this maximum "
        "difference, in kcal/mol), from the lowest energy or enthalpy. "
        "Additionally, the program will output a file with only the file "
        "names of conformations within the cutoff; see the '-o'/'--out_name' "
        "option to specify the name of this file.",
        default=None)
    parser.add_argument(
        "-n",
        "--enthalpy",
        help="Sort output by lowest enthalpy. If no enthalpy is found, it will "
        "sort by the lowest electronic energy. The default is False.",
        action='store_true')
    parser.add_argument(
        "-o",
        "--out_fname",
        help=f"When using the '-m'/'--max_diff' option, a file will be created "
        f"with only the names of the files within the specified cutoff, one "
        f"per line. This option allows the user to specify the output "
        f"file name. By default, the name will be '{DEF_OUT_NAME}'.",
        default=DEF_OUT_NAME)

    parser.add_argument(
        "-t",
        "--tol",
        help=
        "The tolerance, in degrees, for concluding that dihedral angles are "
        "equivalent. The default value is {}.".format(DEF_DIH_TOL),
        default=DEF_DIH_TOL)

    args = None
    try:
        args = parser.parse_args(argv)
    except (KeyError, InvalidDataError, MissingSectionHeaderError,
            SystemExit) as e:
        if hasattr(e, 'code') and e.code == 0:
            return args, GOOD_RET
        warning(e)
        parser.print_help()
        return args, INPUT_ERROR

    return args, GOOD_RET
Esempio n. 28
0
def parse_cmdline(argv):
    """
    Returns the parsed argument list and return code.
    `argv` is a list of arguments, or `None` for ``sys.argv[1:]``.
    """
    if argv is None:
        argv = sys.argv[1:]

    # initialize the parser object:
    parser = argparse.ArgumentParser(
        description=
        'Creates Gaussian input files from pdb files, given a template input '
        'file. The required input file provides the name/location of the '
        'template file and a file with a list of pdb files to convert.')
    parser.add_argument(
        "-c",
        "--config",
        help=
        "Optional: the location of the configuration file. The default file "
        "name is '{}', located in the base directory where the program as run. "
        "If a config file is not provided, use the command-line options to "
        "specify the '{}' (-t) and '{}' (-1) or '{}' (-f). The command lines "
        "for the '{}' flag (-r) or only the first entry in the pdb ('{}', -a) "
        "may also be specified.".format(DEF_CFG_FILE, GAU_TPL_FILE,
                                        PDB_LIST_FILE, PDB_FILE, REMOVE_H,
                                        NUM),
        default=DEF_CFG_FILE,
        type=read_cfg)
    parser.add_argument("-t",
                        "--tpl_file",
                        help="Specifies the '{}'".format(GAU_TPL_FILE),
                        default=None)
    parser.add_argument(
        "-l",
        "--pdb_list_file",
        help="Option to specify a file with a list of pdbs ('{}') to convert "
        "(one file per line on the list).".format(PDB_LIST_FILE),
        default=None)
    parser.add_argument(
        "-f",
        "--file",
        help="Option to specify a pdb file ('{}') to convert.".format(
            PDB_FILE),
        default=None)
    parser.add_argument(
        "-n",
        "--num",
        help=
        "Only read if a config file is not provided. This command can be used to "
        "specify only using the first '-n'/'--num' set(s) of coordinates in a pdb "
        "file to create gausscom file(s). The default is to use all coordinates, "
        "making as many input files as there are molecules/conformations in the "
        "pdb.",
        default=None,
        type=int)
    parser.add_argument(
        "-r",
        "--remove_final_h",
        help="Option to specify removing the last H atom from the PDB "
        "file(s) when creating the gausscom files. The default is "
        "False.",
        action='store_true')
    args = None
    try:
        args = parser.parse_args(argv)
        if args.config[GAU_TPL_FILE] is None:
            if args.tpl_file is None:
                raise InvalidDataError(
                    "Could not read config file: {}\n    and did not specify a 'tpl_file' "
                    "('-t' option). A tpl_file is needed to run this "
                    "script.".format(args.config[CONFIG_NAME]))
            else:
                args.config[GAU_TPL_FILE] = args.tpl_file
                if args.num:
                    args.config[NUM] = args.num
                if args.remove_final_h:
                    args.config[REMOVE_H] = True
                if args.file:
                    args.config[PDB_FILE] = args.file
                if args.pdb_list_file:
                    args.config[PDB_LIST_FILE] = args.pdb_list_file
    except (IOError, KeyError, InvalidDataError, MissingSectionHeaderError,
            SystemExit) as e:
        if hasattr(e, 'code') and e.code == 0:
            return args, GOOD_RET
        warning(e)
        parser.print_help()
        return args, INPUT_ERROR

    return args, GOOD_RET
def x_value_warning(data_max_x, default_x_max):
    warning(
        f"The default maximum x-axis value ({default_x_max}) is less than the maximum x-axis value in the "
        f"data ({data_max_x}). Not all data will be shown.")
def plot_select_mz_intensity_v_ret_time(fname, ms_level, mz_list_to_plot,
                                        data_array, num_decimals_ms_accuracy,
                                        num_decimals_ret_time_accuracy,
                                        out_dir):
    """
    Plot total intensity versus retention times (combines retention times in this method; calls plotting function)
    :param fname: str, name of the file where the data originated
    :param ms_level: str, used to distinguish between different MS output of the same input file (no overwriting)
    :param data_array: ndarray (n x 3) with M/Z, intensity, and retention times
    :param mz_list_to_plot: list, with up to 5 mz values to plot vs time on the same plot
    :param num_decimals_ms_accuracy: int, number of decimal points in MS accuracy, for rounding
    :param num_decimals_ret_time_accuracy: number of decimal points in retention time accuracy, for rounding
    :param out_dir: None or str, provides location where new file should be saved (None for current directory)
    :return: ndarray, (m x 2), were m is the number of unique retention times, in first column. Second column is
        total intensity for that retention time.
    """
    default_x_max = 16.
    data_x_max = 0.
    x_index = 2

    if len(mz_list_to_plot) > 5:
        warning(
            "Error while attempting to plot select M/Z values versus retention times.\n    This "
            "method expects at most 5 M/Z values to display on one plot. This plot will not be produced."
        )
        return
    if len(mz_list_to_plot) == 0:
        warning(
            "Error while attempting to plot select M/Z values versus retention times.\n    No "
            "M/Z values provided. This plot will not be produced.")
        return
    if len(mz_list_to_plot) == 1:
        title = f"Intensity versus Retention Time for M/Z={mz_list_to_plot[0]}"
    else:
        title = "Intensity versus Retention Time for Selected M/Z Values"
    # At least sometimes, mz_list_to_plot and data_array are not already rounded, so doing so here
    mz_list_to_plot = np.around(mz_list_to_plot, num_decimals_ms_accuracy)
    data_array[:, 0] = np.around(data_array[:, 0], num_decimals_ms_accuracy)
    # wait to check for max retention time (in case it does not apply to chosen mz values, but not intensity, to have
    #    more consistent y-axis ranges
    max_intensity = np.max(data_array[:, 1])
    y_max = find_pos_plot_limit(max_intensity)

    inten_time_dict = {}
    for mz_val in mz_list_to_plot:
        sub_data_array = data_array[data_array[:, 0] == mz_val]
        if len(sub_data_array) < 1:
            warning(
                f"No retention time data found for M/Z value {mz_val} from {os.path.relpath(fname)}.\n    This "
                f"M/Z will be omitted from the plot.")
        else:
            curve_label = f"{mz_val:.{num_decimals_ms_accuracy}f}"
            # make this x, y, so ret_time, intensity
            inten_time_dict[curve_label] = np.column_stack(
                (sub_data_array[:, x_index], sub_data_array[:, 1]))
            sub_array_max_x = np.max(sub_data_array[:, x_index])
            if sub_array_max_x > data_x_max:
                data_x_max = sub_array_max_x

    if data_x_max > default_x_max:
        warning(
            f"The default maximum x-axis value ({default_x_max}) is less than the maximum x-axis value in the "
            f"data ({data_x_max}). Not all data will be shown.")
    x_label = "Retention time (min)"
    y_label = "Intensity (unscaled)"
    suffix = "_int_v_time"
    if "_ms" not in fname.lower():
        suffix = f"_ms{ms_level}" + suffix
    plot_fname = create_out_fname(fname,
                                  suffix=suffix,
                                  ext='png',
                                  base_dir=out_dir)
    make_vlines_plot(title,
                     x_label,
                     y_label,
                     inten_time_dict,
                     plot_fname,
                     num_decimals_ret_time_accuracy,
                     default_x_max,
                     y_max,
                     loc="upper left")

    # Maybe later... would need to re-slice data
    # inten_time_dict = defaultdict(lambda: None)
    # y_val_dict = defaultdict(lambda: None)
    # curve_label = defaultdict(lambda: "")
    # mz_counter = 0
    # make_fig(plot_fname + "_make_fig",
    #          x_array=inten_time_dict[0], y1_array=y_val_dict[0], y1_label=curve_label[0], color1=NREL_COLORS[1],
    #          x2_array=inten_time_dict[1], y2_array=inten_time_dict[1], y2_label=curve_label[1], color2=NREL_COLORS[2],
    #          x3_array=inten_time_dict[2], y3_array=inten_time_dict[2], y3_label=curve_label[2], color3=NREL_COLORS[3],
    #          x4_array=inten_time_dict[3], y4_array=inten_time_dict[3], y4_label=curve_label[3], color4=NREL_COLORS[4],
    #          x5_array=inten_time_dict[4], y5_array=inten_time_dict[4], y5_label=curve_label[4], color5=NREL_COLORS[5],
    #          x_label=x_label, y_label=y_label, loc=0, title=title)
    return inten_time_dict