Esempio n. 1
0
def process_bin_max_min_vals(raw_key_val_tuple_list):
    """
    Convert tuple to a dictionary with float values
    @param raw_key_val_tuple_list: raw entries to be processed
    @return: dictionaries of keys and float values
    """
    hop_dict = {}
    min_dict = {}
    max_dict = {}
    for key, val in raw_key_val_tuple_list:
        try:
            val_list = [float(x.strip()) for x in val.split(',')]
            if len(val_list) in [1, 3]:
                hop_dict[key] = val_list[0]
                if len(val_list) == 3:
                    if val_list[1] < val_list[2]:
                        min_dict[key] = val_list[1]
                        max_dict[key] = val_list[2]
                    else:
                        raise InvalidDataError(
                            "Min value ({}) is not less than max value ({})"
                            "".format(round(val_list[1], 6),
                                      round(val_list[2], 6)))
            else:
                raise InvalidDataError(
                    "Unexpected number of values ({})".format(len(val_list)))
        except (ValueError, InvalidDataError) as e:
            raise InvalidDataError(
                "Encountered error '{}' For key '{}' in section {}, read: {}.\n"
                "Expected 1 or 3 comma-separated floats for each variable (key): the max "
                "hop (step) size, \noptionally followed by the min value, max value that "
                "should be obtained from hopping.".format(
                    e.args[0], key, BASIN_HOP_MIN_MAX, val))
    return hop_dict, min_dict, max_dict
Esempio n. 2
0
def process_coords(cp2k_file, data_tpl_content):
    """
    Creates the new atoms section based on coordinates from the cp2k file
    @param cp2k_file: file being read
    @param data_tpl_content: data from the template file
    @return: new atoms section, with replaced coordinates
    """
    new_atoms = list(data_tpl_content[ATOMS_CONTENT])
    atom_count = 0
    atom_num = 0
    for line in cp2k_file:
        split_line = line.split()
        if len(split_line) == 0:
            raise InvalidDataError(
                "Encountered an empty line after reading {} atoms. Expected to read "
                "coordinates for {} atoms before encountering a blank line."
                "".format(atom_num, data_tpl_content[NUM_ATOMS]))
        atom_num = int(split_line[0])
        new_atoms[atom_count][4:7] = map(float, split_line[3:6])
        atom_count += 1
        if atom_num == data_tpl_content[NUM_ATOMS]:
            # If that is the end of the atoms, the next line should be blank
            line = next(cp2k_file).strip()
            if len(line) == 0:
                return new_atoms
            else:
                raise InvalidDataError(
                    "After reading the number of atoms found in the template data file "
                    "({}), did not encounter a blank line, but: {}"
                    "".format(data_tpl_content[NUM_ATOMS], line))
    # if went through even line and didn't get all the atoms, catch the error
    raise InvalidDataError(
        "Did not read coordinates from {} atoms in file: {}".format(
            data_tpl_content[NUM_ATOMS], cp2k_file.name))
Esempio n. 3
0
def process_max_min_vals(raw_key_val_tuple_list, default_penalty):
    """
    Convert tuple to a dictionary with float values
    @param raw_key_val_tuple_list:
    @param default_penalty: default penalty for the flat-bottomed potential
    @return: dictionary of keys and float values
    """
    val_dict = {}
    for key, val in raw_key_val_tuple_list:
        try:
            val_list = [float(x.strip()) for x in val.split(',')]
            if len(val_list) == 2:
                val_dict[key] = val_list
            elif len(val_list) == 1:
                val_dict[key] = val_list + [default_penalty]
            else:
                raise InvalidDataError(
                    "For key '{}' in max or min section, read: {}. \nExpected 1 or 2 values: "
                    "either the edge of the potential and the penalty stiffness, or only the "
                    "edge of the potential, which will be used with "
                    "the default penalty for the flat-bottomed potential"
                    "".format(key, val))
        except ValueError as e:
            raise InvalidDataError(
                "Error in reading max or min value provided for key '{}': {}"
                "".format(key, e.args[0]))
    return val_dict
Esempio n. 4
0
def find_atom_data(lammps_f, atom_ids):
    """Searches and returns the given file location for atom data for the given IDs.

    :param lammps_f: The LAMMPS data file to search.
    :param atom_ids: The set of atom IDs to collect.
    :return: A nested dict of the atoms found keyed first by time step, then by atom ID.
    :raises: InvalidDataError If the file is missing atom data or is otherwise malformed.
    """
    tstep_atoms = OrderedDict()
    tstep_box = {}
    atom_count = len(atom_ids)
    empty_dims = np.full(3, np.nan)

    with open(lammps_f) as lfh:
        file_name = os.path.basename(lammps_f)
        tstep_id = None
        box_dim = np.copy(empty_dims)
        tstep_val = "(no value)"
        for line in lfh:
            if line.startswith(TSTEP_LINE):
                try:
                    tstep_val = next(lfh).strip()
                    tstep_id = int(tstep_val)
                # Todo: remove if never used
                except ValueError as e:
                    raise InvalidDataError(
                        "Invalid timestep value {}: {}".format(tstep_val, e))
            elif line.startswith(NUM_ATOM_LINE):
                # not needed, so just move along
                next(lfh)
            elif line.startswith(BOX_LINE):
                try:
                    for coord_id in range(len(box_dim)):
                        box_vals = list(map(float, next(lfh).strip().split()))
                        if len(box_vals) == 2:
                            box_dim[coord_id] = box_vals[1] - box_vals[0]
                except (ValueError, KeyError) as e:
                    raise InvalidDataError(
                        "Invalid PBC value read on timestep {}: {}".format(
                            tstep_val, e))
            elif tstep_id is not None:
                atom_lines = find_atom_lines(lfh, atom_ids, tstep_id,
                                             file_name)
                if len(atom_lines) != atom_count:
                    try:
                        missing_atoms_err(atom_ids, atom_lines, tstep_id,
                                          file_name)
                    except InvalidDataError as e:
                        warning(e)
                        warning("Skipping timestep and continuing.")
                else:
                    tstep_atoms[tstep_id] = atom_lines
                    tstep_box[tstep_id] = box_dim
                    tstep_id = None
                    box_dim = empty_dims
    return tstep_atoms, tstep_box
Esempio n. 5
0
def process_cfg_conv(raw_cfg, def_cfg_vals=None, req_keys=None, int_list=True):
    """
    Converts the given raw configuration, filling in defaults and converting the specified value (if any) to the
    default value's type.
    @param raw_cfg: The configuration map.
    @param def_cfg_vals: dictionary of default values
    @param req_keys: dictionary of required types
    @param int_list: flag to specify if lists should converted to a list of integers
    @return: The processed configuration.

    """
    proc_cfg = {}
    for key in raw_cfg:
        if not (key in def_cfg_vals or key in req_keys):
            raise InvalidDataError(
                "Unexpected key '{}' in configuration ('ini') file.".format(
                    key))
    key = None
    try:
        for key, def_val in def_cfg_vals.items():
            proc_cfg[key] = conv_raw_val(raw_cfg.get(key), def_val, int_list)
        for key, type_func in req_keys.items():
            proc_cfg[key] = type_func(raw_cfg[key])
    except KeyError as e:
        raise KeyError("Missing config val for key '{}'".format(key, e))
    except Exception as e:
        raise InvalidDataError('Problem with config vals on key {}: {}'.format(
            key, e))
    if proc_cfg[SCIPY_OPT_METHOD] != DEF_OPT_METHOD:
        proc_cfg[SCIPY_OPT_METHOD] = proc_cfg[SCIPY_OPT_METHOD].lower()
        if proc_cfg[SCIPY_OPT_METHOD] not in TESTED_SCIPY_MIN:
            warning(
                "Only the following optimization methods have been tested: scipy.optimize.minimize with {}."
                "".format(TESTED_SCIPY_MIN))
    for int_key in [TEMP, NITER_SUCCESS]:
        if proc_cfg[int_key] is not None:
            proc_cfg[int_key] = float(proc_cfg[int_key])

    # Remove any repeated parameters, or zero-character-length params (can happen if accidentally an additional comma)
    if len(proc_cfg[OPT_PARAMS]) > 0:
        filtered_opt_params = []
        for param in proc_cfg[OPT_PARAMS]:
            if len(param) > 0:
                if param in filtered_opt_params:
                    warning("'{}' repeated in '{}'; skipping repeated entry".
                            format(param, OPT_PARAMS))
                else:
                    filtered_opt_params.append(param)
        proc_cfg[OPT_PARAMS] = filtered_opt_params

    return proc_cfg
Esempio n. 6
0
def read_cfg(f_loc, cfg_proc=process_cfg):
    """
    Reads the given configuration file, returning a dict with the converted values supplemented by default values.

    :param f_loc: The location of the file to read.
    :param cfg_proc: The processor to use for the raw configuration values.  Uses default values when the raw
        value is missing.
    :return: A dict of the processed configuration file's data.
    """
    config = ConfigParser()
    try:
        good_files = config.read(f_loc)
    except MissingSectionHeaderError:
        raise InvalidDataError(MISSING_SEC_HEADER_ERR_MSG.format(f_loc))
    if not good_files:
        raise IOError('Could not read file {}'.format(f_loc))

    # Start with empty template value dictionaries to be filled
    proc = {TPL_VALS: OrderedDict(), TPL_EQ_PARAMS: OrderedDict()}

    if MAIN_SEC not in config.sections():
        raise InvalidDataError(
            "The configuration file is missing the required '{}' section".
            format(MAIN_SEC))

    for section in config.sections():
        if section == MAIN_SEC:
            try:
                proc.update(
                    cfg_proc(dict(config.items(MAIN_SEC)), DEF_CFG_VALS,
                             REQ_KEYS))
            except InvalidDataError as e:
                if 'Unexpected key' in e.message:
                    raise InvalidDataError(
                        e.message +
                        " Does this belong \nin a template value section such as '[{}]'?"
                        "".format(TPL_VALS_SEC))
        elif section in [TPL_VALS_SEC, TPL_EQS_SEC]:
            val_ordered_dict = process_tpl_vals(config.items(section))
            if section == TPL_EQS_SEC:
                # just keep the names, so we know special processing is required
                proc[TPL_EQ_PARAMS] = val_ordered_dict.keys()
            proc[TPL_VALS].update(val_ordered_dict)
        else:
            raise InvalidDataError(
                "Section name '{}' in not one of the valid section names: {}"
                "".format(section, VALID_SEC_NAMES))

    return proc
Esempio n. 7
0
def parse_cmdline(argv=None):
    """
    Returns the parsed argument list and return code.
    :param argv: A list of arguments, or `None` for ``sys.argv[1:]``.
    """
    if argv is None:
        argv = sys.argv[1:]

    # initialize the parser object:
    parser = argparse.ArgumentParser(
        description='Finds the distances between each pair '
        'of atoms listed in the pair file for '
        'each time step in the given LAMMPS dump '
        'file.')
    parser.add_argument(
        "-p",
        "--pair_files",
        action="append",
        default=[],
        help="One or more files containing atom pairs (default {0})".format(
            DEF_PAIRS_FILE))
    parser.add_argument("-f",
                        "--file",
                        help="The dump file to process",
                        default=None)
    parser.add_argument("-l",
                        "--list_file",
                        help="The file with a list of dump files to process",
                        default=None)

    args = None
    try:
        args = parser.parse_args(argv)
        if not args.pair_files:
            args.pair_files.append(DEF_PAIRS_FILE)
            if not os.path.isfile(DEF_PAIRS_FILE):
                raise InvalidDataError(
                    "No pair file specified and did not find the default "
                    "pair file: {}".format(DEF_PAIRS_FILE))
        if (args.file is None) and (args.list_file is None):
            raise InvalidDataError(
                "Specify either a file or list of files to process.")
    except (KeyError, InvalidDataError, SystemExit) as e:
        if hasattr(e, 'code') and e.code == 0:
            return args, GOOD_RET
        warning(e)
        parser.print_help()
        return args, INPUT_ERROR
    return args, GOOD_RET
Esempio n. 8
0
def make_tpl(cfg, tpl_name, filled_tpl_name):
    """
    Combines the dictionary and template file to create the new file(s)
    @param cfg: configuration for the run
    @param tpl_name: the cfg key for the template file name
    @param filled_tpl_name: the cfg key for the filled template file name
    """

    tpl_str = read_tpl(tpl_name)
    tpl_vals_dict = {}

    for value_set in itertools.product(*cfg[TPL_VALS].values()):
        for param, val in zip(cfg[TPL_VALS].keys(), value_set):
            tpl_vals_dict[param] = val

        for eq_param in cfg[TPL_EQ_PARAMS]:
            try:
                string_to_eval = tpl_vals_dict[eq_param].format(
                    **tpl_vals_dict)
            except KeyError as e:
                raise KeyError(
                    "Missing parameter value {} needed to evaluate '{}' for the parameter '{}'."
                    "".format(e, tpl_vals_dict[eq_param], eq_param))
            try:
                tpl_vals_dict[eq_param] = eval(string_to_eval)
            except NameError:
                raise InvalidDataError(
                    "Could not evaluate the string '{}' specifying the value for the parameter "
                    "'{}'. Check order of equation entry and/or input parameter values."
                    "".format(string_to_eval, eq_param))

        fill_save_tpl(cfg, tpl_str, tpl_vals_dict, tpl_name, filled_tpl_name)
Esempio n. 9
0
def main(argv=None):
    # Read input
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET or args is None:
        return ret

    len_buffer = None

    try:
        if args.buffer is not None:
            try:
                len_buffer = float(args.buffer)
            except ValueError:
                raise InvalidDataError("Input for buffer ({}) could not be converted to a float.".format(args.buffer))
        if args.out_dir is None:
            args.out_dir = os.path.dirname(args.file)
        if args.min_max_file is None:
            min_max_dict = None
        else:
            min_max_dict = read_csv(args.min_max_file, quote_style=csv.QUOTE_NONNUMERIC)
        process_file(args.file, args.out_dir, len_buffer, args.delimiter, min_max_dict,
                     header=args.names, make_hist=args.histogram)
    except IOError as e:
        warning("Problems reading file:", e)
        return IO_ERROR
    except InvalidDataError as e:
        warning("Problems reading data:", e)
        return INVALID_DATA

    return GOOD_RET  # success
Esempio n. 10
0
def read_cfg(f_loc, cfg_proc=process_cfg):
    """
    Reads the given configuration file, returning a dict with the converted values supplemented by default values.

    :param f_loc: The location of the file to read.
    :param cfg_proc: The processor to use for the raw configuration values.  Uses default values when the raw
        value is missing.
    :return: A dict of the processed configuration file's data.
    """
    config = ConfigParser()
    good_files = config.read(f_loc)

    if not good_files:
        raise IOError('Could not read file {}'.format(f_loc))
    main_proc = cfg_proc(dict(config.items(MAIN_SEC)), DEF_CFG_VALS, REQ_KEYS)

    # To fix; have this as default!
    main_proc[DATA_FILES] = []
    if os.path.isfile(main_proc[DATA_FILES_FILE]):
        with open(main_proc[DATA_FILES_FILE]) as f:
            for data_file in f:
                main_proc[DATA_FILES].append(data_file.strip())
    if main_proc[DATA_FILE] is not None:
        main_proc[DATA_FILES].append(main_proc[DATA_FILE])
    if len(main_proc[DATA_FILES]) == 0:
        raise InvalidDataError("No files to process: no '{}' specified and "
                               "no list of files found for: {}".format(
                                   DATA_FILE, main_proc[DATA_FILES_FILE]))

    return main_proc
Esempio n. 11
0
def process_cp2k_file(cp2k_file, data_tpl_content, data_template_fname):
    new_atoms_section = None
    with open(cp2k_file) as f:
        data_tpl_content[HEAD_CONTENT][0] = "Created on {} by {} version {} from template file {} and " \
                                            "cp2k output file {}".format(datetime.now(), __name__, __version__,
                                                                         data_template_fname, cp2k_file
                                                                         )
        for line in f:
            line = line.strip()
            if ENERGY_PAT.match(line):
                qmmm_energy = line.split()[-1]
            if COORD_PAT.match(line):
                # Now advance to first line of coordinates
                for _ in range(3):
                    next(f)
                new_atoms_section = process_coords(f, data_tpl_content)

    # If we successfully returned the new_atoms_section, make new file
    if new_atoms_section is None:
        raise InvalidDataError(
            "Did not file atoms coordinates in file: {}".format(cp2k_file))
    print("{} energy: {}".format(cp2k_file, qmmm_energy))
    f_name = create_out_fname(cp2k_file, ext='.data')
    list_to_file(data_tpl_content[HEAD_CONTENT] + new_atoms_section +
                 data_tpl_content[TAIL_CONTENT],
                 f_name,
                 print_message=False)
Esempio n. 12
0
def process_file(file_to_process, cfg):
    """
    Will complete the work of this script based on the provided cfg
    @param file_to_process: the file with column to be combined
    @param cfg: the configuration of this run
    @return: errors or nothing
    """
    to_print = []

    # determine if any type conversion has been specified & create conv dict if needed
    if cfg[COL1_CONV] is None and cfg[COL2_CONV] is None:
        conv_dict = None
    else:
        conv_dict = {}
        if cfg[COL1_CONV] is not None:
            conv_dict[cfg[COL1]] = cfg[COL1_CONV]
        if cfg[COL2_CONV] is not None:
            conv_dict[cfg[COL2]] = cfg[COL2_CONV]

    raw_col_data = read_csv(file_to_process, data_conv=conv_dict, quote_style=csv.QUOTE_NONNUMERIC)
    for header in cfg[COL1], cfg[COL2]:
        if header not in raw_col_data[0]:
            raise InvalidDataError("Specified column header '{}' was not found in file: {}"
                                   "".format(header, file_to_process))
    for row in raw_col_data:
        to_print.append(["".join(map(str, [cfg[PREFIX], row[cfg[COL1]], cfg[MIDDLE], row[cfg[COL2]], cfg[SUFFIX]]))])

    list_to_csv(to_print, cfg[OUT_FILE], delimiter=',', quote_style=csv.QUOTE_MINIMAL)
Esempio n. 13
0
def find_section_state(line, current_section, section_order, content,
                       highlight_content):
    """
    In addition to finding the current section by matching patterns, resets the count and
    adds to lists that are keeping track of the data being read

    @param line: current line of data file
    @param current_section: current section
    @param section_order: list keeping track of when find an new section
    @param content: dictionary; add a new key for each section found
    @param highlight_content: keep a list of selected content to output (interactions with specified atoms)
    @return: the section currently reading, count
    """
    for section, pattern in SEC_PAT_DICT.items():
        if pattern.match(line):
            section_order.append(section)
            content[section] = []
            highlight_content[section] = []
            return section, 1

    if current_section is None:
        raise InvalidDataError(
            "Could not identify section from line: {}".format(line))
    else:
        return current_section, 1
Esempio n. 14
0
def check_vals(config, sec_name):
    """
    Reads the max or min vals section of the given config file,
    returning a dict containing the original string key paired with a float representing the max or min value.
    If there is no specified section, an empty dict is returned.  Invalid values result in DataExceptions.
    :param config: The parsed config file that contains a max and/or min section.
    :param sec_name: the name of the section with string/float pairs to digest
    :return: A dict mapping the original column key to the float limit value.
    """
    limit_vals = {}
    limit_val = np.nan
    col_name = None
    try:
        for col_name, limit_val in config.items(sec_name):
            # I don't test for non-unique column name because, if a col_name appears twice, the parser has already
            # handled it by overwriting the value for that key
            limit_vals[col_name] = float(limit_val)
    except NoSectionError:
        # not a problem
        pass
    except ValueError:
        raise InvalidDataError(
            "For section '{}' key '{}', could not convert value '{}' to a float."
            .format(
                sec_name,
                col_name,
                limit_val,
            ))
    return limit_vals
Esempio n. 15
0
def read_cfg(floc, cfg_proc=process_cfg):
    """
    Reads the given configuration file, returning a dict with the converted values supplemented by default values.

    :param floc: The location of the file to read.
    :param cfg_proc: The processor to use for the raw configuration values.  Uses default values when the raw
        value is missing.
    :return: A dict of the processed configuration file's data.
    """
    config = ConfigParser()
    try:
        good_files = config.read(floc)
        if not good_files:
            raise IOError('Could not read file {}'.format(floc))
        main_proc = cfg_proc(dict(config.items(MAIN_SEC)),
                             DEF_CFG_VALS,
                             REQ_KEYS,
                             int_list=False)
    except (ParsingError, KeyError) as e:
        raise InvalidDataError(e)
    # Check the config file does not have sections that will be ignored
    for section in config.sections():
        if section not in SECTIONS:
            warning(
                "Found section '{}', which will be ignored. Expected section names are: {}"
                .format(section, ", ".join(SECTIONS)))
    # # Validate conversion input
    for conv in [COL1_CONV, COL2_CONV]:
        if main_proc[conv]:
            main_proc[conv] = conv_str_to_func(main_proc[conv])
    return main_proc
Esempio n. 16
0
def process_file(data_file, mcfg, delimiter=','):
    list_vectors, headers = read_csv_to_list(data_file,
                                             delimiter=delimiter,
                                             header=True)

    col_index_dict = {}
    for section in SUB_SECTIONS:
        col_index_dict[section] = {}
        for key, val in mcfg[section].items():
            if key in headers:
                # Parser already made sure that unique entries
                col_index_dict[section][headers.index(key)] = val
            else:
                raise InvalidDataError(
                    "Key '{}' found in configuration file but not in data file: "
                    "{}".format(key, data_file))

    edited_vectors = []
    for row in list_vectors:
        for col, max_val in col_index_dict[MAX_SEC].items():
            if row[col] > max_val:
                row[col] = max_val
        for col, min_val in col_index_dict[MIN_SEC].items():
            if row[col] < min_val:
                row[col] = min_val
        edited_vectors.append(row)

    f_name = create_out_fname(data_file, ext='.csv')
    list_to_csv([headers] + edited_vectors, f_name, delimiter=',')
Esempio n. 17
0
def adjust_atom_xyz(cfg, data_tpl_content):
    """
    If this options is selected, adjust the xyz coordinates as specified
    @param cfg: configuration for the run
    @param data_tpl_content: processed data from the template
    @return: will print new data files or raise InvalidDataError
    """
    if cfg[ADJUST_ATOM] > data_tpl_content[NUM_ATOMS]:
        raise InvalidDataError(
            "Keyword '{}' specified atom index {} to have its XYZ coordinates adjusted, "
            "but found only "
            "{} atoms in the data template file: {}".format(
                ADJUST_ATOM, cfg[ADJUST_ATOM], data_tpl_content[NUM_ATOMS],
                cfg[DATA_TPL_FILE]))
    diff_vector = np.asarray((np.subtract(cfg[XYZ2], cfg[XYZ1])))
    inc_vector = np.divide(diff_vector, cfg[XYZ_STEPS])
    head_content = data_tpl_content[HEAD_CONTENT]
    atoms_content = data_tpl_content[ATOMS_CONTENT]
    tail_content = data_tpl_content[TAIL_CONTENT]
    # since python is zero-based, must subtract 1
    adjust_atom_num = cfg[ADJUST_ATOM] - 1
    for multiplier in range(-cfg[XYZ_STEPS_EXTEND],
                            cfg[XYZ_STEPS] + cfg[XYZ_STEPS_EXTEND]):
        f_name = create_out_fname(cfg[DATA_TPL_FILE],
                                  suffix='_' + str(multiplier),
                                  ext='.data')
        atoms_content[adjust_atom_num][4:7] = np.round(
            multiplier * inc_vector + cfg[XYZ1], 6)
        list_to_file(head_content + atoms_content + tail_content, f_name)
Esempio n. 18
0
def process_output_file(cfg):
    """
    Reads in an initial set of parameters values from a space-separated list, as provided by 'fit.best' output from
    fitEVB. The order is important; thus read through the sections and parameters from the (ordered) lists (specified
    in the constants
    @param cfg: the configuration for this run
    @return: initial values to use in fitting, with both the high and low values set to that initial value
    """
    vals = {}
    best_file = cfg[MAIN_SEC][BEST_FILE]
    if best_file is not None:
        raw_vals = np.loadtxt(best_file, dtype=np.float64)
        if len(raw_vals) != cfg[MAIN_SEC][PARAM_NUM]:
            raise InvalidDataError(
                "The total number of parameters for the specified sections ({}) does not "
                "equal the total number of values ({}) in the specified fitEVB output file: {}"
                "".format(cfg[MAIN_SEC][PARAM_NUM], len(raw_vals), best_file))
        param_index = 0
        if best_file is not None:
            for section in cfg[MAIN_SEC][SECTIONS]:
                vals[section] = {}
                for param in FIT_PARAMS[section]:
                    vals[section][param] = {
                        LOW: raw_vals[param_index],
                        HIGH: raw_vals[param_index]
                    }
                    param_index += 1
    return vals
Esempio n. 19
0
def read_cfg(floc, cfg_proc=process_cfg):
    """
    Reads the given configuration file, returning a dict with the converted values supplemented by default values.

    :param floc: The location of the file to read.
    :param cfg_proc: The processor to use for the raw configuration values.  Uses default values when the raw
        value is missing.
    :return: A dict of the processed configuration file's data.
    """
    config = ConfigParser()
    good_files = config.read(floc)
    if not good_files:
        raise IOError("Could not read file '{}'".format(floc))
    main_proc = cfg_proc(dict(config.items(MAIN_SEC)),
                         DEF_CFG_VALS,
                         REQ_KEYS,
                         int_list=False)

    main_proc[CP2K_FILES] = []

    if os.path.isfile(main_proc[CP2K_LIST_FILE]):
        main_proc[CP2K_FILES] += file_rows_to_list(main_proc[CP2K_LIST_FILE])
    if main_proc[CP2K_FILE] is not None:
        main_proc[CP2K_FILES].append(main_proc[CP2K_FILE])

    if len(main_proc[CP2K_FILES]) == 0:
        raise InvalidDataError(
            "Found no file names to process. Use the configuration ('ini') file to specify the name "
            "of a single file with the keyword '{}' or a file with listing files to process "
            "(one per line) with the keyword '{}'.".format(
                CP2K_FILE, CP2K_LIST_FILE))

    return main_proc
Esempio n. 20
0
def read_cfg(floc, cfg_proc=process_cfg):
    """
    Reads the given configuration file, returning a dict with the converted values supplemented by default values.

    :param floc: The location of the file to read.
    :param cfg_proc: The processor to use for the raw configuration values.  Uses default values when the raw
        value is missing.
    :return: A dict of the processed configuration file's data.
    """
    config = ConfigParser()
    good_files = config.read(floc)
    if not good_files:
        raise IOError('Could not read file {}'.format(floc))
    main_proc = cfg_proc(dict(config.items(MAIN_SEC)), DEF_CFG_VALS, REQ_KEYS)
    main_proc[CALC_HIJ_NEW] = False
    # first see if we will calculate it
    for key in NEW_PARAMS:
        if main_proc[key] is not None:
            main_proc[CALC_HIJ_NEW] = True
            break
    if main_proc[CALC_HIJ_NEW]:
        for key in NEW_PARAMS:
            try:
                main_proc[key] = float(main_proc[key].split(',')[0])
            except (TypeError, ValueError, AttributeError):
                if main_proc[key] is None:
                    first_warn = "Missing input value for key '{}'. ".format(
                        key)
                else:
                    first_warn = "Found '{}' for key '{}'. ".format(
                        main_proc[key], key)
                raise InvalidDataError(first_warn +
                                       "Require float inputs for keys: {}"
                                       "".format(NEW_PARAMS))
    if main_proc[ALIGN_COL] not in [TIMESTEP, FILE_NAME]:
        raise InvalidDataError(
            "The program currently can only align CEC data on either '{}' or '{}'"
            .format(TIMESTEP, FILE_NAME))
    if main_proc[CALC_CEC_DIST] and main_proc[EVB_SUM_FILE] is None:
        raise InvalidDataError(
            "To calculate CEC distances ('{}' set to True), an '{}' must be specified."
            .format(CALC_CEC_DIST, EVB_SUM_FILE))
    return main_proc
Esempio n. 21
0
def parse_cmdline(argv):
    """
    Returns the parsed argument list and return code.
    `argv` is a list of arguments, or `None` for ``sys.argv[1:]``.
    """
    if argv is None:
        argv = sys.argv[1:]

    # initialize the parser object:
    parser = argparse.ArgumentParser(
        description=
        'For each timestep, gather the energy information output by LAMMPS '
        'in the log file.')
    parser.add_argument("-f",
                        "--file",
                        help="The log file to be processed.",
                        default=None)
    parser.add_argument(
        "-l",
        "--list_file",
        help="The a file with a list of log files to be processes.",
        default=None)
    args = None
    try:
        args = parser.parse_args(argv)
        if args.file is None:
            args.file_list = []
        else:
            if os.path.isfile(args.file):
                args.file_list = [args.file]
                args.source_name = args.file
            else:
                raise IOError("Could not find specified log file: {}".format(
                    args.file))
        if args.list_file is not None:
            args.file_list += file_rows_to_list(args.list_file)
            args.source_name = args.list_file
        if len(args.file_list) < 1:
            raise InvalidDataError(
                "Found no log file names to process. Specify one or more files as specified in "
                "the help documentation ('-h').")
    except IOError as e:
        warning("Problems reading file:", e)
        parser.print_help()
        return args, IO_ERROR
    except (KeyError, InvalidDataError, SystemExit) as e:
        if hasattr(e, 'code') and e.code == 0:
            return args, GOOD_RET
        warning(e)
        parser.print_help()
        return args, INPUT_ERROR
    return args, GOOD_RET
Esempio n. 22
0
def read_cfg(f_loc, cfg_proc=process_cfg):
    """
    Reads the given configuration file, returning a dict with the converted values supplemented by default values.

    :param f_loc: The location of the file to read.
    :param cfg_proc: The processor to use for the raw configuration values.  Uses default values when the raw
        value is missing.
    :return: A dict of the processed configuration file's data.
    """
    config = ConfigParser()
    good_files = config.read(f_loc)
    if not good_files:
        raise IOError('Could not read file {}'.format(f_loc))

    # since not all string lists and not all int lists, import as string and selectively make ints
    main_proc = cfg_proc(dict(config.items(MAIN_SEC)),
                         DEF_CFG_VALS,
                         REQ_KEYS,
                         int_list=False)
    for key in [RESID_QMMM, RESID_QM]:
        for index, entry in enumerate(main_proc[key]):
            try:
                main_proc[key][index] = int(entry)
            except:
                raise InvalidDataError(
                    "Encountered '{}' when expected only integers in list for keyword '{}'"
                    "".format(entry, key))

    if (len(main_proc[RESID_QMMM]) + len(main_proc[RESID_QM])) > 0:
        main_proc[PRINT_FOR_CP2K] = True
        if main_proc[ELEMENT_DICT_FILE] is None:
            main_proc[ELEMENT_DICT_FILE] = DEF_ELEM_DICT_FILE
        if main_proc[RADII_DICT_FILE] is None:
            main_proc[RADII_DICT_FILE] = DEF_RADII_DICT_FILE
    if main_proc[RENUM_MOL] and main_proc[MOL_RENUM_FILE] is not None:
        raise InvalidDataError(
            "This program does not currently support both '{}' and '{}'"
            "".format(RENUM_MOL, MOL_RENUM_FILE))
    return main_proc
Esempio n. 23
0
def missing_atoms_err(atom_ids, found_atoms, tstep_id, file_name):
    """Creates and raises an exception when the function is unable to find atom
    data for all of the requested IDs.

    :param atom_ids: The atoms that were requested.
    :param found_atoms: The collection of atoms found.
    :param tstep_id: The time step ID where the atom data was missing.
    :param file_name: the file name with the time step ID where atom was missing.
    :raises: InvalidDataError Describing the missing atom data.
    """
    missing = map(str, atom_ids.difference(found_atoms.keys()))
    raise InvalidDataError(
        MISSING_ATOMS_MSG.format(",".join(missing), tstep_id, file_name))
Esempio n. 24
0
def process_conv_tpl_keys(raw_key_val_tuple_list):
    """
    In case there are multiple (comma-separated) values, split on comma and strip. If possible, convert to int or float;
       otherwise. Return the tuple as a processed ordered dict

    @param raw_key_val_tuple_list: key-value dict read from configuration file;
       check for commas to indicate multiple parameters, and converted to int
       or floats if amenable
    @return val_dict: a dictionary of values
    @return dir_dict: a dictionary of initial directions for minimization
    """
    val_dict = OrderedDict()
    dir_dict = {}
    for key, val in raw_key_val_tuple_list:
        val_list = [x.strip() for x in val.split(',')]
        val_num = len(val_list)
        if val_num == 1:
            # if it can be converted, do so; this helps with my printing formatting
            val_dict[key] = conv_num(val_list[0])
            dir_dict[key] = DEF_DIR
        elif val_num == 2:
            # if there are two values, assume that it is a float with the ability to be optimized
            try:
                val_dict[key] = float(val_list[0])
                dir_dict[key] = float(val_list[1])
            except ValueError:
                raise InvalidDataError(
                    "For key '{}', read '{}', which could not be converted to floats. When two "
                    "values are provided, they are read as an initial float that may be optimized, "
                    "and the initial search direction for optimization.".
                    format(key, val))
        else:
            raise InvalidDataError(
                "For key '{}', {} values were found ({}). Each parameter should have either one or "
                "two specified values (x0, optionally followed by initial search direction, which "
                "defaults to {}.".format(key, val_num, val, DEF_DIR))
    return val_dict, dir_dict
Esempio n. 25
0
def check_atom_num(req_atom_num, last_line, file_name):
    """
    If applicable, make sure read the expected number of atoms
    @param req_atom_num: None if not specified; otherwise an int
    @param last_line: the last line read before a summary section; the first number of that line
        identifies the number of atoms in the last section
    @param file_name: name of file used for error message
    @return: raise InvalidDataError() if did not find the required
    """
    if req_atom_num is not None:
        num_atoms = int(last_line.split()[0])
        if num_atoms != req_atom_num:
            raise InvalidDataError(
                "Based on user specified num_atoms, expected to have read {} atoms, "
                "but read {} in file: {}".format(req_atom_num, num_atoms,
                                                 file_name))
Esempio n. 26
0
def gather_out_field_names(cfg):
    """
    Based on user options, determine which field names to use in printing output
    @param cfg: configuration for run
    @return: list of field names to be printed for selected options
    """
    selected_field_names = []
    for option_name, fieldnames in OPT_FIELD_NAME_DICT.items():
        if cfg[option_name]:
            for f_name in fieldnames:
                if f_name not in selected_field_names:
                    selected_field_names.append(f_name)
    if len(selected_field_names) > 0:
        return selected_field_names
    else:
        raise InvalidDataError(
            'None of the following options were selected, so no data will be collected: {}'
            ''.format(OPT_FIELD_NAME_DICT.keys()))
Esempio n. 27
0
def find_header_values(line, nums_dict):
    """
    Comprehend entries in lammps data file header
    @param line: line in header section
    @param nums_dict: dictionary keep track of total numbers for types (lammps header data)
    @return: updated nums_dict or error
    """
    try:
        for num_key, pattern in HEADER_PAT_DICT.items():
            if nums_dict[num_key] is None:
                pattern_match = pattern.match(line)
                if pattern_match:
                    # regex is 1-based
                    nums_dict[num_key] = int(pattern_match.group(1))
                    return
    except (ValueError, KeyError) as e:
        raise InvalidDataError(
            "While reading a data file, encountered error '{}' on line: {}".
            format(e, line))
Esempio n. 28
0
def read_cfg(f_loc, cfg_proc=process_cfg):
    """
    Reads the given configuration file, returning a dict with the converted values supplemented by default values.

    :param f_loc: The location of the file to read.
    :param cfg_proc: The processor to use for the raw configuration values.  Uses default values when the raw
        value is missing.
    :return: A dict of the processed configuration file's data.
    """
    config = ConfigParser()
    good_files = config.read(f_loc)
    if not good_files:
        raise IOError('Could not read file {}'.format(f_loc))
    main_proc = cfg_proc(dict(config.items(MAIN_SEC)), DEF_CFG_VALS, REQ_KEYS)
    rel_e_proc = {}
    if REL_E_SEC in config.sections():
        for entry in config.items(REL_E_SEC):
            section_prefix = entry[0]
            vals = entry[1].split(',')
            # when the ini file is read, upper case becomes lower, so I'll ignore case in pattern matching
            base_e_match_pat = re.compile(r"^" + section_prefix + ".*", re.I)
            base_e_file_name = vals[0]
            try:
                base_e_timestep = int(vals[1])
            except ValueError:
                raise InvalidDataError(
                    "Could not convert second entry in '{}' to an integer (expected an "
                    "integer timestep)".format(entry[1]))
            rel_e_proc[section_prefix] = {
                REL_E_PAT: base_e_match_pat,
                FILE_NAME: base_e_file_name,
                TIMESTEP: base_e_timestep,
                REL_E_REF: np.nan,
                MIN_DIAB_ENE: np.inf,
            }
    main_proc[REL_E_SEC] = rel_e_proc
    if not main_proc[PRINT_PER_FILE] and not main_proc[PRINT_PER_LIST]:
        main_proc[PRINT_PER_LIST] = True
        warning("'{}' set to '{}'; setting '{}' to '{}'".format(
            PRINT_PER_FILE, main_proc[PRINT_PER_FILE], PRINT_PER_LIST,
            main_proc[PRINT_PER_LIST]))

    return main_proc
Esempio n. 29
0
def deprotonate(cfg, protonatable_res, excess_proton, dump_h3o_mol,
                water_mol_dict, box, tpl_data):
    """
    Deprotonate a the residue and assign the proton to the closest water
    so that the output data matches with the template.
    """
    # Convert excess proton to a hydronium proton
    excess_proton[1] = tpl_data[H3O_MOL][0][1]  # molecule number
    excess_proton[2] = cfg[H3O_H_TYPE]  # type
    excess_proton[3] = tpl_data[H3O_H_CHARGE]  # charge
    dump_h3o_mol.append(excess_proton)
    min_dist_id = None
    min_dist = np.linalg.norm(box)
    for mol_id, molecule in water_mol_dict.items():
        for atom in molecule:
            if atom[2] == cfg[WAT_O_TYPE]:
                dist = pbc_dist(np.asarray(excess_proton[4:7]),
                                np.asarray(atom[4:7]), box)
        if dist < min_dist:
            min_dist_id = mol_id
            min_dist = dist
    logger.debug('Deprotonated residue: the molecule ID of the closest water '
                 '(to become a hydronium) is {}.'.format(min_dist_id))
    # Now that have the closest water, add its atoms to the hydronium list
    for atom in water_mol_dict[min_dist_id]:
        dump_h3o_mol.append(atom)
        # Remove the closest water from the dictionary of water molecules, and convert it to a hydronium
    del water_mol_dict[min_dist_id]
    for atom in dump_h3o_mol:
        if atom[2] == cfg[WAT_O_TYPE]:
            atom[2] = cfg[H3O_O_TYPE]
            atom[3] = tpl_data[H3O_O_CHARGE]
        elif atom[2] == cfg[WAT_H_TYPE]:
            atom[2] = cfg[H3O_H_TYPE]
            atom[3] = tpl_data[H3O_H_CHARGE]
    # Make the atom type and charge of the protonatable residue the same as for the template file (switching
    # from protonated to deprotonated residue)
    if len(tpl_data[PROT_RES_MOL]) != len(protonatable_res):
        raise InvalidDataError(
            'Encountered dump file in which the number of atoms in the '
            'protonatable residue does not equal the number of atoms in the template data file.'
        )
Esempio n. 30
0
def eval_eqs(cfg, tpl_vals_dict):
    """
    Evaluates equations based on
    @param cfg: configuration for the run
    @param tpl_vals_dict: dictionary of variable values to be used to evaluate equations and fill templates
    """
    for eq_param in cfg[TPL_EQ_PARAMS]:
        try:
            string_to_eval = cfg[TPL_VALS][eq_param].format(**tpl_vals_dict)
        except KeyError as e:
            raise KeyError(
                "Missing parameter value {} needed to evaluate '{}' for the parameter '{}'."
                "".format(e, tpl_vals_dict[eq_param], eq_param))
        try:
            tpl_vals_dict[eq_param] = eval(string_to_eval)
        except NameError:
            raise InvalidDataError(
                "Could not evaluate the string '{}' specifying the value for the parameter "
                "'{}'. Check equation order, equations, and/or parameter values."
                "".format(string_to_eval, eq_param))