Example #1
0
def main(argv=None):
    """ Runs the main program.

    @param argv: The command line arguments.
    :return: The return code for the program's termination.
    """
    args, ret = parse_cmdline(argv)
    if ret != 0:
        return ret

    if args.src_file is not None:
        proc_data = calc_for_wham(args.src_file)
        write_csv(proc_data, create_out_fname(args.src_file, prefix=OUT_PFX), COLVAR_WHAM_KEY_SEQ)
    else:
        found_files = find_files_by_dir(args.base_dir, args.pattern)
        logger.debug("Found '%d' dirs with files to process", len(found_files))
        # noinspection PyCompatibility
        for f_dir, files in found_files.iteritems():
            if not files:
                logger.warn("No files found for dir '%s'", f_dir)
                continue
            for colvar_path in ([os.path.join(f_dir, tgt) for tgt in files]):
                proc_data = calc_for_wham(colvar_path)
                f_name = create_out_fname(colvar_path, prefix=OUT_PFX)
                if allow_write(f_name, overwrite=args.overwrite):
                    list_to_file([str(d['r']) for d in proc_data if 'r' in d], f_name)
                    # write_csv(proc_data, f_name, COLVAR_WHAM_KEY_SEQ, extrasaction="ignore")
    return 0  # success
Example #2
0
def main(argv=None):
    """ Runs the main program.

    :param argv: The command line arguments.
    :return: The return code for the program's termination.
    """
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET or args is None:
        return ret

    kbt = calc_kbt(args.temp)

    if args.src_file is not None:
        proc_data = to_zero_point(calc_rad(args.src_file, kbt))
        write_csv(proc_data, create_out_fname(args.src_file, prefix=OUT_PFX),
                  RAD_KEY_SEQ)
    else:
        found_files = find_files_by_dir(args.base_dir, args.pattern)
        logger.debug("Found '{}' dirs with files to process".format(
            len(found_files)))
        # noinspection PyCompatibility
        for f_dir, files in found_files.items():
            if not files:
                logger.warn("No files found for dir '{}'".format(f_dir))
                continue
            for pmf_path in ([os.path.join(f_dir, tgt) for tgt in files]):
                proc_data = to_zero_point(calc_rad(pmf_path, kbt))
                f_name = create_out_fname(pmf_path, prefix=OUT_PFX)
                if allow_write(f_name, overwrite=args.overwrite):
                    write_csv(proc_data, f_name, RAD_KEY_SEQ)
    return GOOD_RET  # success
Example #3
0
def main(argv=None):
    """ Runs the main program.

    :param argv: The command line arguments.
    :return: The return code for the program's termination.
    """
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET or args is None:
        return ret

    kbt = calc_kbt(args.temp)

    if args.src_file is not None:
        proc_data = to_zero_point(calc_rad(args.src_file, kbt))
        write_csv(proc_data, create_out_fname(args.src_file, prefix=OUT_PFX), RAD_KEY_SEQ)
    else:
        found_files = find_files_by_dir(args.base_dir, args.pattern)
        logger.debug("Found '{}' dirs with files to process".format(len(found_files)))
        # noinspection PyCompatibility
        for f_dir, files in found_files.iteritems():
            if not files:
                logger.warn("No files found for dir '{}'".format(f_dir))
                continue
            for pmf_path in ([os.path.join(f_dir, tgt) for tgt in files]):
                proc_data = to_zero_point(calc_rad(pmf_path, kbt))
                f_name = create_out_fname(pmf_path, prefix=OUT_PFX)
                if allow_write(f_name, overwrite=args.overwrite):
                    write_csv(proc_data, f_name, RAD_KEY_SEQ)
    return GOOD_RET  # success
Example #4
0
def main(argv=None):
    """ Runs the main program.

    @param argv: The command line arguments.
    :return: The return code for the program's termination.
    """
    args, ret = parse_cmdline(argv)
    if ret != 0:
        return ret

    if args.src_file is not None:
        proc_data = calc_for_wham(args.src_file)
        write_csv(proc_data, create_out_fname(args.src_file, prefix=OUT_PFX),
                  COLVAR_WHAM_KEY_SEQ)
    else:
        found_files = find_files_by_dir(args.base_dir, args.pattern)
        logger.debug("Found '%d' dirs with files to process", len(found_files))
        # noinspection PyCompatibility
        for f_dir, files in found_files.iteritems():
            if not files:
                logger.warn("No files found for dir '%s'", f_dir)
                continue
            for colvar_path in ([os.path.join(f_dir, tgt) for tgt in files]):
                proc_data = calc_for_wham(colvar_path)
                f_name = create_out_fname(colvar_path, prefix=OUT_PFX)
                if allow_write(f_name, overwrite=args.overwrite):
                    list_to_file([str(d['r']) for d in proc_data if 'r' in d],
                                 f_name)
                    # write_csv(proc_data, f_name, COLVAR_WHAM_KEY_SEQ, extrasaction="ignore")
    return 0  # success
Example #5
0
def print_per_frame(dump_file, cfg, data_to_print, out_fieldnames, write_mode):
    f_out = create_out_fname(dump_file,
                             suffix='_sum',
                             ext='.csv',
                             base_dir=cfg[OUT_BASE_DIR])
    write_csv(data_to_print,
              f_out,
              out_fieldnames,
              extrasaction="ignore",
              mode=write_mode,
              round_digits=ROUND_DIGITS,
              print_message=cfg[PRINT_PROGRESS])
Example #6
0
def write_result(result, src_file, overwrite=False, basedir=None):
    """Writes the result to a file named for the given source file.

    :param result: The result to write.
    :param src_file: The original source file name.
    :param overwrite: Whether to overwrite an existing file name.
    :param basedir: The base directory to target (uses the source file's base directory
        if not specified)
    """
    f_name = create_out_fname(src_file, prefix=OUT_PFX, base_dir=basedir)
    if allow_write(f_name, overwrite=overwrite):
        write_csv(result, f_name, OUT_KEY_SEQ)
Example #7
0
def main(argv=None):
    """
    Runs the main program.

    :param argv: The command line arguments.
    :return: The return code for the program's termination.
    """
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET:
        return ret

    deduped = compress_dups(read_csv(args.file, all_conv=float), args.column)
    write_csv(deduped, create_out_fname(args.file, prefix=PREFIX),
              read_csv_header(args.file))

    return GOOD_RET  # success
Example #8
0
def main(argv=None):
    """
    Runs the main program.

    :param argv: The command line arguments.
    :return: The return code for the program's termination.
    """
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET:
        return ret

    deduped = compress_dups(read_csv(args.file, all_conv=float), args.column)
    write_csv(deduped, create_out_fname(args.file, prefix=PREFIX),
              read_csv_header(args.file))

    return GOOD_RET  # success
Example #9
0
    def testWriteCsv(self):
        tmp_dir = None
        data = csv_data()
        try:
            tmp_dir = tempfile.mkdtemp()
            tgt_fname = create_out_fname(SHORT_WHAM_PATH, prefix=OUT_PFX, base_dir=tmp_dir)

            write_csv(data, tgt_fname, RAD_KEY_SEQ)
            csv_result = read_csv(tgt_fname,
                                  data_conv={FREE_KEY: str_to_bool,
                                             CORR_KEY: float,
                                             COORD_KEY: str, })
            self.assertEqual(len(data), len(csv_result))
            for i, csv_row in enumerate(csv_result):
                self.assertDictEqual(data[i], csv_row)
        finally:
            shutil.rmtree(tmp_dir)
Example #10
0
    def testWriteCsv(self):
        tmp_dir = None
        data = csv_data()
        try:
            tmp_dir = tempfile.mkdtemp()
            tgt_fname = create_out_fname(SHORT_WHAM_PATH, prefix=OUT_PFX, base_dir=tmp_dir)

            write_csv(data, tgt_fname, RAD_KEY_SEQ)
            csv_result = read_csv(tgt_fname,
                                  data_conv={FREE_KEY: str_to_bool,
                                             CORR_KEY: float,
                                             COORD_KEY: str, })
            self.assertEqual(len(data), len(csv_result))
            for i, csv_row in enumerate(csv_result):
                self.assertDictEqual(data[i], csv_row)
        finally:
            shutil.rmtree(tmp_dir)
Example #11
0
def process_log_files(source_name, log_file_list):
    """
    Loops through all files and prints output
    @param source_name: the source name to use as the base for creating an outfile name
    @param log_file_list: list of file names to read and process
    """

    result_list = []
    out_fname = create_out_fname(source_name, suffix='_sum', ext=".csv")

    for log_file in log_file_list:
        result_list += process_log(log_file)

    if len(result_list) == 0:
        warning(
            "Found no lammps log data to process from: {}".format(source_name))
    else:
        write_csv(result_list,
                  out_fname,
                  LOG_FIELDNAMES,
                  extrasaction="ignore")
Example #12
0
def find_rel_e(extracted_data, cfg, ref_e_dict):
    """
    calculate relative energy, if data found
    @param extracted_data: dictionary of data found from chk file
    @param cfg: configuration for run
    @param ref_e_dict: reference energies, if available
    @return:
    """

    tot_resid = 0
    num_resid = 0

    for data_dict in extracted_data:
        this_group = data_dict[REL_E_GROUP]
        if this_group:
            rel_ene_ref = cfg[REL_E_SEC][this_group][REL_E_REF]
        if this_group is None or np.isnan(rel_ene_ref):
            data_dict[REL_E] = np.nan
        else:
            rel_e = data_dict[ENV_ENE] - rel_ene_ref
            data_dict[REL_E] = rel_e
            file_name = data_dict[FILE_NAME]
            if file_name in ref_e_dict:
                ref_e = ref_e_dict[file_name]
                resid = np.round(np.sqrt((ref_e - rel_e)**2), 6)

                data_dict[REF_E] = ref_e
                data_dict[E_RESID] = resid
                tot_resid += resid
                num_resid += 1

    f_out = create_out_fname(cfg[CHK_FILE_LIST],
                             suffix='_sum',
                             ext='.csv',
                             base_dir=cfg[OUT_BASE_DIR])
    write_csv(extracted_data, f_out, ENE_FIELD_NAMES, extrasaction="ignore")
    if len(ref_e_dict) > 1:
        print("Calculated total energy residual from {} files: {}".format(
            num_resid, tot_resid))
Example #13
0
def process_file(base_file, data_file):
    # TODO: add in reading vectors
    base_dict = read_csv(base_file, quote_style=csv.QUOTE_NONNUMERIC)[0]
    data_dict_list = read_csv(data_file, quote_style=csv.QUOTE_NONNUMERIC)

    data_headers = [INDEX, RMSD] + read_csv_header(data_file)

    num_vals = len(base_dict.values())
    for data_id, data_dict in enumerate(data_dict_list):
        rmsd = 0.0
        for key, val in base_dict.items():
            try:
                rmsd += (data_dict[key] - val)**2
            except KeyError:
                raise InvalidDataError(
                    "Could not find key '{}' from base file in compared data file."
                    .format(key))

        data_dict[INDEX] = data_id
        data_dict[RMSD] = round((rmsd / num_vals)**0.5, 2)

    out_name = create_out_fname(data_file, prefix=RMSD + '_')
    write_csv(data_dict_list, out_name, data_headers)
Example #14
0
def make_summary(cfg):
    """
    If the option is specified, add the last best fit output file to the list of outputs and evaluate changes
    @param cfg: configuration for the run
    @return:
    """
    best_file = cfg[MAIN_SEC][BEST_FILE]
    summary_file = cfg[MAIN_SEC][SUMMARY_FILE]

    low, high, headers = get_param_info(cfg)
    latest_output = np.loadtxt(best_file, dtype=np.float64)

    if os.path.isfile(summary_file):
        last_row = None
        percent_diffs = []
        previous_output = np.loadtxt(summary_file, dtype=np.float64)
        all_output = np.vstack((previous_output, latest_output))
        for row in all_output:
            if last_row is not None:
                diff = row - last_row
                percent_diff = {}
                # Check data for small values, hitting upper or lower bound, and calc % diff
                for index, val in enumerate(np.nditer(row)):
                    if abs(val) < TOL:
                        warning(
                            "Small value ({}) encountered for parameter {} (col {})"
                            "".format(val, headers[index], index))
                    if abs(diff[index]) > TOL:
                        if abs(last_row[index]) > TOL:
                            percent_diff[headers[index]] = round(
                                diff[index] / last_row[index] * 100, 2)
                        else:
                            if abs(diff[index]) > TOL:
                                percent_diff[headers[index]] = np.inf
                        if abs(val - low[index]) < TOL:
                            warning(
                                "Value ({}) near lower bound ({}) encountered for parameter {} (col {})."
                                "".format(val, low[index], headers[index],
                                          index))
                        if abs(val - high[index]) < TOL:
                            warning(
                                "Value ({}) near upper bound ({}) encountered for parameter {} (col {})."
                                "".format(val, high[index], headers[index],
                                          index))
                    else:
                        percent_diff[headers[index]] = np.nan
                percent_diffs.append(percent_diff)
            last_row = row
        if len(percent_diffs) > 0:
            max_percent_diff = 0
            max_diff_param = None
            for param, val in percent_diffs[-1].items():
                if abs(val) > abs(max_percent_diff):
                    max_percent_diff = val
                    max_diff_param = param
            print(
                "Maximum (absolute value) percent difference from last read line is {} % for parameter '{}'."
                "".format(max_percent_diff, max_diff_param))
            if cfg[MAIN_SEC][RESID_IN_BEST]:
                print("Percent change in residual: {} %"
                      "".format(
                          percent_diffs[-1][RESIDUAL +
                                            cfg[MAIN_SEC][SUM_HEAD_SUFFIX]]))

        # format for gnuplot and np.loadtxt
        f_out = create_out_fname(summary_file,
                                 suffix='_perc_diff',
                                 ext='.csv',
                                 base_dir=cfg[MAIN_SEC][OUT_BASE_DIR])
        write_csv(percent_diffs, f_out, headers, extrasaction="ignore")

        f_out = create_out_fname(summary_file,
                                 ext='.csv',
                                 base_dir=cfg[MAIN_SEC][OUT_BASE_DIR])
        with open(f_out, 'w') as s_file:
            s_file.write(','.join(headers) + '\n')
            np.savetxt(s_file, all_output, fmt='%8.6f', delimiter=',')
        print('Wrote file: {}'.format(f_out))

        # in addition to csv (above), print format for gnuplot and np.loadtxt
        with open(summary_file, 'w') as s_file:
            np.savetxt(s_file, all_output, fmt='%12.6f')
        print("Wrote file: {}".format(summary_file))
    else:
        # have this as sep statement, because now printing a 1D array, handled differently than 2D array (newline=' ')
        with open(summary_file, 'w') as s_file:
            np.savetxt(s_file, latest_output, fmt='%12.6f', newline=' ')
        print("Wrote results from {} to new summary file {}".format(
            best_file, summary_file))
Example #15
0
def make_summary(output_file, summary_file, cfg):
    low, high, headers = get_param_info(cfg)
    latest_output = np.loadtxt(output_file, dtype=np.float64)

    # append last best resid
    low = np.append(low, np.nan)
    high = np.append(high, np.nan)
    headers.append('resid')
    base_dir = os.path.dirname(output_file)
    latest_output = np.append(latest_output, get_resid(base_dir))

    if os.path.isfile(summary_file):
        last_row = None
        percent_diffs = []
        previous_output = np.loadtxt(summary_file, dtype=np.float64)
        all_output = np.vstack((previous_output, latest_output))
        for row in all_output:
            if last_row is not None:
                diff = row - last_row
                percent_diff = {}
                # Check data for small values, hitting upper or lower bound, and calc % diff
                for index, val in enumerate(np.nditer(row)):
                    if abs(val) < TOL:
                        warning("Small value ({}) encountered for parameter {} (col {})"
                                "".format(val, headers[index], index))
                    if abs(diff[index]) > TOL:
                        if abs(last_row[index]) > TOL:
                            percent_diff[headers[index]] = "%8.2f" % (diff[index] / last_row[index] * 100)
                        else:
                            percent_diff[headers[index]] = '        '
                        if abs(val-low[index]) < TOL:
                            warning("Value ({}) near lower bound ({}) encountered for parameter {} (col {})."
                                    "".format(val, low[index], headers[index], index))
                        if abs(val-high[index]) < TOL:
                            warning("Value ({}) near upper bound ({}) encountered for parameter {} (col {})."
                                    "".format(val, high[index], headers[index], index))
                    else:
                        percent_diff[headers[index]] = '        '
                percent_diffs.append(percent_diff)
            last_row = row

        # format for gnuplot and np.loadtxt
        f_out = create_out_fname(summary_file, suffix='_perc_diff', ext='.csv', base_dir=cfg[MAIN_SEC][OUT_BASE_DIR])
        write_csv(percent_diffs, f_out, headers, extrasaction="ignore")
        print('Wrote file: {}'.format(f_out))

        f_out = create_out_fname(summary_file, ext='.csv', base_dir=cfg[MAIN_SEC][OUT_BASE_DIR])
        with open(f_out, 'w') as s_file:
            s_file.write(','.join(headers)+'\n')
            np.savetxt(s_file, all_output, fmt='%8.6f', delimiter=',')
        print('Wrote file: {}'.format(f_out))

        # in addition to csv (above), print format for gnuplot and np.loadtxt
        with open(summary_file, 'w') as s_file:
            np.savetxt(s_file, all_output, fmt='%12.6f')
            print(summary_file)
        print("Wrote summary file {}".format(summary_file))
    else:
        # have this as sep statement, because now printing a 1D array, handled differently than 2D array (newline=' ')
        with open(summary_file, 'w') as s_file:
            np.savetxt(s_file, latest_output, fmt='%12.6f', newline=' ')
        print("Wrote results from {} to new summary file {}".format(output_file, summary_file))
Example #16
0
def process_evb_files(cfg, selected_fieldnames):
    """
    Want to grab the timestep and highest prot ci^2, highest wat ci^2, and print them
    @param selected_fieldnames: list of field names for output based on user-selected options
    @param cfg: configuration data read from ini file
    @return: @raise InvalidDataError:
    """
    first_file_flag = True
    all_data = []

    if cfg[EVB_FILE] is not None:
        evb_file_list = [cfg[EVB_FILE]]
    else:
        evb_file_list = []

    # Separate try-catch block here because want it to continue rather than exit;
    #    exit below if there are no files to  process
    try:
        evb_file_list += file_rows_to_list(cfg[EVB_LIST_FILE])
    except IOError as e:
        if cfg[EVB_LIST_FILE] != DEF_EVB_LIST_FILE:
            raise IOError(e)

    if len(evb_file_list) == 0:
        raise InvalidDataError(
            "Found no evb file names to read. Specify one file with the keyword '{}' or \n"
            "a file containing a list of evb files with the keyword '{}'.".
            format(EVB_FILE, EVB_LIST_FILE))

    for evb_file in evb_file_list:
        data_to_print, subset_to_print, wat_mol_data_to_print = process_evb_file(
            evb_file, cfg)
        all_data += data_to_print
        if cfg[PRINT_PER_FILE] is True:
            if len(data_to_print) > 0:
                f_out = create_out_fname(evb_file,
                                         suffix='_evb_info',
                                         ext='.csv',
                                         base_dir=cfg[OUT_BASE_DIR])
                write_csv(data_to_print,
                          f_out,
                          selected_fieldnames,
                          extrasaction="ignore",
                          print_message=cfg[PRINT_PROGRESS],
                          round_digits=ROUND_DIGITS)
            if cfg[PRINT_CI_SUBSET]:
                if len(subset_to_print) > 0:
                    f_out = create_out_fname(evb_file,
                                             suffix='_ci_sq_ts',
                                             ext='.csv',
                                             base_dir=cfg[OUT_BASE_DIR])
                    write_csv(subset_to_print,
                              f_out,
                              CI_FIELDNAMES,
                              extrasaction="ignore",
                              print_message=cfg[PRINT_PROGRESS],
                              round_digits=ROUND_DIGITS)
                else:
                    warning("'{}' set to true, but found no data from: {} \n"
                            "No output will be printed for this file."
                            "".format(PRINT_CI_SUBSET, evb_file))
        if cfg[PRINT_PER_LIST]:
            if first_file_flag:
                print_mode = 'w'
                first_file_flag = False
            else:
                print_mode = 'a'
            if cfg[PRINT_CI_SUBSET]:
                if len(subset_to_print) > 0:
                    f_out = create_out_fname(cfg[EVB_LIST_FILE],
                                             suffix='_ci_sq_ts',
                                             ext='.csv',
                                             base_dir=cfg[OUT_BASE_DIR])
                    write_csv(subset_to_print,
                              f_out, [FILE_NAME] + CI_FIELDNAMES,
                              extrasaction="ignore",
                              mode=print_mode,
                              print_message=cfg[PRINT_PROGRESS],
                              round_digits=ROUND_DIGITS)
                else:
                    warning(
                        "'{}' set to true, but found no data meeting criteria."
                        "".format(PRINT_CI_SUBSET))
            f_out = create_out_fname(cfg[EVB_LIST_FILE],
                                     suffix='_evb_info',
                                     ext='.csv',
                                     base_dir=cfg[OUT_BASE_DIR])
            write_csv(data_to_print,
                      f_out, [FILE_NAME] + selected_fieldnames,
                      extrasaction="ignore",
                      mode=print_mode,
                      print_message=cfg[PRINT_PROGRESS],
                      round_digits=ROUND_DIGITS)
    return all_data
Example #17
0
def obj_fun(x0_trial,
            cfg,
            tpl_dict,
            tpl_str,
            fitting_sum,
            result_dict,
            result_headers,
            x0_full=None):
    """
    Objective function to be minimized. Also used to save trial input and output.
    @param x0_trial: initial parameter values to minimize
    @param x0_full: all parameter values to minimize (may be larger than x0 trail
    @param cfg: configuration for the run
    @param tpl_dict: dictionary of values for filling in template strings
    @param tpl_str: template string (read from file)
    @param fitting_sum: list of dicts for saving all trial values (to be appended, if needed)
    @param result_dict: a dictionary of results already found, to keep the program from unnecessarily running
                the expensive function when we already have solved for that parameter set
    @param result_headers: list of headers for printing results
    @return: the result for the set of values being tested, obtained from the bash script specified in cfg
    """
    if x0_full is None:
        x0_full = x0_trial
    else:
        x0_full[:len(x0_trial)] = x0_trial

    resid_dict = {}
    penalty = 0
    for param_num, param_name in enumerate(cfg[OPT_PARAMS]):
        # Needed to add break for triangle/stepwise minimization
        if param_num >= len(x0_trial):
            break
        tpl_dict[param_name] = round(x0_trial[param_num],
                                     cfg[NUM_PARAM_DECIMALS])
        resid_dict[param_name] = tpl_dict[param_name]
        if param_name in cfg[LEFT_SIDE_POTENTIAL]:
            min_val = cfg[LEFT_SIDE_POTENTIAL][param_name][0]
            stiffness = cfg[LEFT_SIDE_POTENTIAL][param_name][1]
            if x0_trial[param_num] < min_val:
                penalty += stiffness * np.square(x0_trial[param_num] - min_val)
        if param_name in cfg[RIGHT_SIDE_PENALTY]:
            max_val = cfg[RIGHT_SIDE_PENALTY][param_name][0]
            stiffness = cfg[RIGHT_SIDE_PENALTY][param_name][1]
            if x0_trial[param_num] > max_val:
                penalty += stiffness * np.square(x0_trial[param_num] - max_val)

    eval_eqs(cfg, tpl_dict)
    fill_save_tpl(cfg,
                  tpl_str,
                  tpl_dict,
                  cfg[PAR_TPL],
                  cfg[PAR_FILE_NAME],
                  print_info=cfg[PRINT_INFO])

    # Note: found that the minimizer calls the function with the same inputs multiple times!
    #       only call this expensive function if we don't already have that answer, determined by checking for it in
    #       the result dictionary
    # to make the input hashable for a dictionary
    x0_str = str(x0_full)
    if x0_str in result_dict:
        trial_result = result_dict[x0_str]
    else:
        trial_result = float(
            check_output([cfg[BASH_DRIVER], tpl_dict[NEW_FNAME]]).strip())
        trial_result += penalty
        result_dict[x0_str] = trial_result
        tpl_dict[RESID] = round(trial_result, cfg[NUM_PARAM_DECIMALS])
        if cfg[PAR_COPY_NAME] is not None or cfg[RESULT_COPY] is not None:
            copy_par_result_file(cfg, tpl_dict, print_info=cfg[PRINT_INFO])
        if cfg[FITTING_SUM_FNAME] is not None:
            write_csv(fitting_sum,
                      cfg[FITTING_SUM_FNAME],
                      result_headers,
                      print_message=cfg[PRINT_INFO],
                      round_digits=cfg[NUM_PARAM_DECIMALS])
        if cfg[BEST_PARAMS_FNAME] is not None:
            if trial_result < cfg[LOWEST_RESID]:
                cfg[LOWEST_RESID] = trial_result
                with open(cfg[BEST_PARAMS_FNAME], 'w') as w_file:
                    for param_num, param_name in enumerate(cfg[OPT_PARAMS]):
                        w_file.write("{:} = {:f},{:f}\n".format(
                            param_name, x0_full[param_num],
                            cfg[INITIAL_DIR][param_name]))
    if cfg[PRINT_INFO]:
        print("Resid: {:11f} for parameters: {}".format(
            trial_result, ",".join(["{:11f}".format(x) for x in x0_trial])))
    if cfg[FITTING_SUM_FNAME] is not None:
        resid_dict[RESID] = trial_result
        fitting_sum.append(resid_dict)
    return trial_result
Example #18
0
def process_files(comp_f_list, col_name, base_out_name, delimiter, sep_out_flag, out_location):
    """
    Want to grab the timestep, first and 2nd mole found, first and 2nd ci^2
    print the timestep, residue ci^2
    @param comp_f_list: a list of lists of file names to process (file read during input processing)
    @param col_name: name of column to use for alignment
    @param base_out_name: name of file to be created, or suffix if multiple files to be created
    @param delimiter: string, delimiter separating file names on lines of the comp_f_list
    @param sep_out_flag: a boolean to note if separate output files should be made based on each row of input
    @param out_location: user-specified location for the output files, if specified
    @return: @raise InvalidDataError:
    """
    all_dicts = defaultdict(dict)

    # if need multiple output files, designate them by adding a prefix
    prefix = ''
    # if there will be multiple output files, make sure do not reuse a prefix, so keep copy of used names
    prefix_used = []
    # if one output file from multiple sets of file to combine, will change write_mode to append later
    write_mode = 'w'

    # we don't have to specify run names in the output if there one row set of files to combine,
    #  or if there will be separate output files
    if len(comp_f_list) < 2 or sep_out_flag:
        add_run_name = False
        headers = []
    else:
        add_run_name = True
        headers = [RUN_NAME]

    for line_num, line in enumerate(comp_f_list):
        dict_keys = None
        if sep_out_flag:
            headers = []
            all_dicts = defaultdict(dict)
        # separate on delimiter, strip any white space, and also get rid of empty entries
        comp_files = filter(None, [c_file.strip() for c_file in line.split(delimiter)])

        # get the common part of the name, if it exists; otherwise, give the name the line index
        for file_index, file_name in enumerate(comp_files):
            base_name = os.path.splitext(os.path.basename(file_name))[0]
            if file_index == 0:
                run_name = base_name
            else:
                run_name = longest_common_substring(run_name, base_name)
        if run_name == '':
            # because will use run_name as a string, need to convert it
            run_name = str(line_num) + "_"

        for c_file in comp_files:
            new_dict = read_csv_to_dict(c_file, col_name)
            if dict_keys is None:
                dict_keys = new_dict.keys()
            else:
                dict_keys = set(dict_keys).intersection(new_dict.keys())
            new_dict_keys = six.next(six.itervalues(new_dict)).keys()
            # Get the keys for the inner dictionary; diff methods for python 2 and 3 so use six
            # expect to only get new headers when making a new file (write_mode == 'w')
            # for the next file, will not gather more headers. When printed, extra cols will be skipped, and
            #    missing columns will have no data shown
            if write_mode == 'w':
                for key in new_dict_keys:
                    if key in headers:
                        # okay if already have header if the header is the column.
                        # If we are going to append, we also expect to already have the header name
                        if key != col_name:
                            warning("Non-unique column name {} found in {}. "
                                    "Values will be overwritten.".format(key, c_file))
                    else:
                        headers.append(key)
            for new_key in new_dict.items():
                all_dicts[new_key[0]].update(new_key[1])

        final_dict = []
        for key in sorted(dict_keys):
            final_dict.append(all_dicts[key])
            # final_dict.append(all_dicts[key].update({RUN_NAME: run_name}))

        if add_run_name:
            for each_dict in final_dict:
                each_dict.update({RUN_NAME: run_name})

        # Possible to have no overlap in align column
        if len(final_dict) > 0:
            # make sure col_name appears first by taking it out before sorting
            if sep_out_flag:
                prefix = run_name
                if prefix == '' or prefix in prefix_used:
                    prefix = str(line_num) + "_"
            # have a consistent output by sorting the headers, but keep the aligning column first
            # only needs to be done for printing the first time
            if write_mode == 'w':
                headers.remove(col_name)
                headers = [col_name] + sorted(headers)
                if add_run_name:
                    headers.remove(RUN_NAME)
                    headers = [RUN_NAME] + headers
            f_name = create_out_fname(base_out_name, prefix=prefix, base_dir=out_location)
            prefix_used.append(prefix)
            write_csv(final_dict, f_name, headers, mode=write_mode)
            if not sep_out_flag and write_mode == 'w':
                write_mode = 'a'
        else:
            raise InvalidDataError("No common values found for column {} among files: {}"
                                   "".format(col_name, ", ".join(comp_files)))
Example #19
0
def find_rel_e(extracted_data, cfg, out_field_names, ref_energy_dict):
    """
    calculate relative energies from the gathered data
    @param extracted_data: gathered data (based on flags)
    @param cfg: configuration for file
    @param out_field_names: field names chosen based on user-defined options
    @param ref_energy_dict: a dictionary of time names and the reference energy for calculating an energy RMSD
    @return: prints out a new outfile unless an error is raised
    """
    out_field_names = [
        FILE_NAME,
        TIMESTEP,
        REL_E_GROUP,
        RESID_E,
        REF_E,
        REL_ENE,
        REL_PROT_E,
        REL_HYD_E,
        REL_NEXT_HYD_E,
    ] + out_field_names[1:]

    tot_resid = 0
    num_resid = 0

    for data_dict in extracted_data:
        this_group = data_dict[REL_E_GROUP]
        if this_group:
            rel_ene_ref = cfg[REL_E_SEC][this_group][REL_E_REF]
            ref_diab_e = cfg[REL_E_SEC][this_group][MIN_DIAB_ENE]
        if this_group is None or np.isnan(rel_ene_ref):
            for key in [
                    RESID_E, REF_E, REL_ENE, REL_PROT_E, REL_HYD_E,
                    REL_NEXT_HYD_E
            ]:
                data_dict[key] = np.nan
        else:
            rel_e = data_dict[ENE_TOTAL] - rel_ene_ref
            data_dict[REL_ENE] = rel_e
            data_dict[REL_PROT_E] = data_dict[MAX_PROT_E] - ref_diab_e
            data_dict[REL_HYD_E] = data_dict[MAX_HYD_E] - ref_diab_e
            data_dict[REL_NEXT_HYD_E] = data_dict[NEXT_MAX_HYD_E] - ref_diab_e
            file_name = data_dict[FILE_NAME]
            if file_name in ref_energy_dict:
                ref_e = ref_energy_dict[file_name]
                resid = np.sqrt((ref_e - rel_e)**2)

                data_dict[REF_E] = ref_e
                data_dict[RESID_E] = resid
                tot_resid += resid
                num_resid += 1
            else:
                data_dict[REF_E] = np.nan
                data_dict[RESID_E] = np.nan

    f_out = create_out_fname(cfg[EVB_LIST_FILE],
                             suffix='_evb_info',
                             ext='.csv',
                             base_dir=cfg[OUT_BASE_DIR])
    write_csv(extracted_data,
              f_out,
              out_field_names,
              extrasaction="ignore",
              print_message=cfg[PRINT_PROGRESS],
              round_digits=ROUND_DIGITS)
    if len(ref_energy_dict) > 1:
        print("Calculated total energy residual from {} files: {}".format(
            num_resid, round(tot_resid, 6)))
Example #20
0
def process_evb_files(cfg):
    """
    Want to grab the timestep and highest prot ci^2, highest wat ci^2, and print them
    @param cfg: configuration data read from ini file
    @return: @raise InvalidDataError:
    """
    first_file_flag = True
    evb_file_list = []

    if cfg[EVB_FILE] is not None:
        evb_file_list.append(cfg[EVB_FILE])

    # Separate try-catch block here because want it to continue rather than exit; exit below if there are no files to
    # process
    try:
        with open(cfg[EVB_FILES]) as f:
            for evb_file in f:
                evb_file_list.append(evb_file.strip())
    except IOError as e:
        warning("Problems reading file:", e)

    if len(evb_file_list) == 0:
        raise InvalidDataError("Found no evb file names to read. Specify one file with the keyword '{}' or \n"
                               "a file containing a list of evb files with the keyword '{}'.".format(EVB_FILE,
                                                                                                     EVB_FILES))

    for evb_file in evb_file_list:
        data_to_print, subset_to_print, wat_mol_data_to_print = process_evb_file(evb_file, cfg)
        no_print = []
        if cfg[PRINT_PER_FILE] is True:
            if cfg[PRINT_KEY_PROPS]:
                if len(data_to_print) > 0:
                    f_out = create_out_fname(evb_file, suffix='_evb_info', ext='.csv',
                                             base_dir=cfg[OUT_BASE_DIR])
                    write_csv(data_to_print, f_out, KEY_PROPS_FIELDNAMES, extrasaction="ignore")
                else:
                    no_print.append(PRINT_KEY_PROPS)
            if cfg[PRINT_CI_SUBSET]:
                if len(subset_to_print) > 0:
                    f_out = create_out_fname(evb_file, suffix='_ci_sq_ts', ext='.csv',
                                             base_dir=cfg[OUT_BASE_DIR])
                    write_csv(subset_to_print, f_out, CI_FIELDNAMES, extrasaction="ignore")
                else:
                    no_print.append(PRINT_CI_SUBSET)
            if cfg[PRINT_CI_SQ]:
                if len(data_to_print) > 0:
                    f_out = create_out_fname(evb_file, suffix='_ci_sq', ext='.csv', base_dir=cfg[OUT_BASE_DIR])
                    write_csv(data_to_print, f_out, CI_FIELDNAMES, extrasaction="ignore")
                else:
                    no_print.append(PRINT_CI_SQ)
            if cfg[PRINT_CEC]:
                if len(data_to_print) > 0:
                    f_out = create_out_fname(evb_file, suffix='_cec', ext='.csv', base_dir=cfg[OUT_BASE_DIR])
                    write_csv(data_to_print, f_out, CEC_COORD_FIELDNAMES, extrasaction="ignore")
                else:
                    no_print.append(PRINT_CEC)
            if cfg[PRINT_WAT_MOL]:
                if len(wat_mol_data_to_print) > 0:
                    f_out = create_out_fname(evb_file, suffix='_wat_mols', ext='.csv',
                                             base_dir=cfg[OUT_BASE_DIR])
                    write_csv(wat_mol_data_to_print, f_out, PROT_WAT_FIELDNAMES, extrasaction="ignore")
                else:
                    no_print.append(PRINT_WAT_MOL)
        if len(no_print) > 0:
            warning("{} set to true, but found no data from: {} \n"
                    "No output will be printed for this file.".format(",".join(map(single_quote, no_print)), evb_file))
        if cfg[PRINT_PER_LIST]:
            if first_file_flag:
                print_mode = 'w'
                first_file_flag = False
            else:
                print_mode = 'a'
            if cfg[PRINT_CI_SQ]:
                f_out = create_out_fname(cfg[EVB_FILES], suffix='_ci_sq', ext='.csv',
                                         base_dir=cfg[OUT_BASE_DIR])
                write_csv(data_to_print, f_out, CI_FIELDNAMES, extrasaction="ignore", mode=print_mode)
            if cfg[PRINT_CI_SUBSET]:
                f_out = create_out_fname(cfg[EVB_FILES], suffix='_ci_sq_ts', ext='.csv',
                                         base_dir=cfg[OUT_BASE_DIR])
                write_csv(subset_to_print, f_out, CI_FIELDNAMES, extrasaction="ignore", mode=print_mode)
            if cfg[PRINT_WAT_MOL]:
                f_out = create_out_fname(cfg[EVB_FILES], suffix='_wat_mols', ext='.csv',
                                         base_dir=cfg[OUT_BASE_DIR])
                write_csv(wat_mol_data_to_print, f_out, PROT_WAT_FIELDNAMES, extrasaction="ignore", mode=print_mode)
            if cfg[PRINT_CEC]:

                f_out = create_out_fname(cfg[EVB_FILES], suffix='_cec', ext='.csv', base_dir=cfg[OUT_BASE_DIR])
                write_csv(data_to_print, f_out, CEC_COORD_FIELDNAMES, extrasaction="ignore", mode=print_mode)
            if cfg[PRINT_KEY_PROPS]:
                f_out = create_out_fname(cfg[EVB_FILES], suffix='_evb_info', ext='.csv',
                                         base_dir=cfg[OUT_BASE_DIR])
                write_csv(data_to_print, f_out, KEY_PROPS_FIELDNAMES, extrasaction="ignore", mode=print_mode)
Example #21
0
def print_per_frame(dump_file, cfg, data_to_print, out_fieldnames, write_mode):
    f_out = create_out_fname(dump_file, suffix="_sum", ext=".csv", base_dir=cfg[OUT_BASE_DIR])
    write_csv(data_to_print, f_out, out_fieldnames, extrasaction="ignore", mode=write_mode)
Example #22
0
def min_params(cfg, tpl_dict, tpl_str):
    num_opt_params = len(cfg[OPT_PARAMS])
    x0 = np.empty(num_opt_params)
    ini_direc = np.zeros((num_opt_params, num_opt_params))
    result_dict = {}
    fitting_sum = []
    result_sum_headers = [RESID]

    # setup minimization
    for param_num, param_name in enumerate(cfg[OPT_PARAMS]):
        x0[param_num] = cfg[TPL_VALS][param_name]
        ini_direc[param_num, param_num] = cfg[INITIAL_DIR][param_name]
        result_sum_headers.append(param_name)

    # arguments for objective function
    obj_fun_args = (cfg, tpl_dict, tpl_str, fitting_sum, result_dict,
                    result_sum_headers)

    # options for minimizer
    opt_options = {
        'maxiter': cfg[MAX_ITER],
        'disp': cfg[PRINT_INFO],
        'return_all': cfg[PRINT_CONV_ALL],
    }
    if cfg[SCIPY_OPT_METHOD] == POWELL:
        opt_options['direc'] = ini_direc
    if cfg[SCIPY_OPT_METHOD] in [POWELL, NELDER_MEAD]:
        opt_options['xtol'] = cfg[CONV_CUTOFF]
        opt_options['ftol'] = cfg[CONV_CUTOFF]
        opt_options['maxfev'] = cfg[MAX_ITER]

    if cfg[BASIN_HOP]:
        # for tests
        if cfg[BASIN_SEED]:
            np.random.seed(1)

        step_spec = False
        x_min = np.empty(num_opt_params)
        x_max = np.empty(num_opt_params)
        step_size = np.empty(num_opt_params)

        if BASIN_HOPS in cfg:
            hop_dict = cfg[BASIN_HOPS]
            min_dict = cfg[BASIN_MINS]
            max_dict = cfg[BASIN_MAXS]
            if len(hop_dict) > 0:
                for param_num, param_name in enumerate(cfg[OPT_PARAMS]):
                    if param_name in hop_dict:
                        step_size[param_num] = hop_dict[param_name]
                        step_spec = True
                    else:
                        step_size[param_num] = cfg[BASIN_DEF_STEP]
                    if param_name in min_dict:
                        x_min[param_num] = min_dict[param_name]
                        x_max[param_num] = max_dict[param_name]
                    else:
                        x_min[param_num] = -np.inf
                        x_max[param_num] = np.inf
        if step_spec:
            take_step = RandomDisplacementBounds(x_min, x_max, step_size,
                                                 cfg[PRINT_INFO])
        else:
            take_step = None

        minimizer_kwargs = dict(method=POWELL,
                                args=obj_fun_args,
                                options=opt_options)

        ret = basinhopping(obj_fun,
                           x0,
                           minimizer_kwargs=minimizer_kwargs,
                           disp=cfg[PRINT_INFO],
                           niter=cfg[BASIN_NITER],
                           niter_success=cfg[NITER_SUCCESS],
                           take_step=take_step)
        return_message = ret.message[-1] + "."
    else:
        # Number of minimization cycles set by default or user input
        num_minis = 0
        return_message = "No minimization cycles completed"
        ret = None
        trial_param_num = len(x0)

        while num_minis < cfg[MINI_CYCLES]:
            # Set up "triangle" or step-wise minimization
            if trial_param_num < 3 or not cfg[TRIANGLE_MINI]:
                x0_trial = x0
                # needed for after the first round of minimization
                trial_param_num = len(x0)
            else:
                trial_param_num = 2
                x0_trial = x0[:trial_param_num]
                obj_fun_args = (cfg, tpl_dict, tpl_str, fitting_sum,
                                result_dict, result_sum_headers, x0)
                if 'direc' in opt_options:
                    opt_options['direc'] = ini_direc[:trial_param_num, :
                                                     trial_param_num]

            while trial_param_num <= len(x0):
                ret = minimize(obj_fun,
                               x0_trial,
                               args=obj_fun_args,
                               method=cfg[SCIPY_OPT_METHOD],
                               options=opt_options)
                x0_trial = ret.x
                return_message = ret.message
                x0[:trial_param_num] = x0_trial
                trial_param_num += 1
                if trial_param_num <= len(x0):
                    x0_trial = x0[:trial_param_num]
                    if 'direc' in opt_options:
                        opt_options['direc'] = ini_direc[:trial_param_num, :
                                                         trial_param_num]
            num_minis += 1
            if cfg[MINI_CYCLES] - num_minis >= 0:
                print(return_message +
                      " Completed {} of {} minimization cycles".format(
                          num_minis, cfg[MINI_CYCLES]))

    if cfg[PRINT_CONV_ALL]:
        print(return_message +
              " Number of function calls: {}".format(ret.nfev))

    # Same final printing either way
    x_final = ret.x
    if x_final.size > 1:
        if cfg[FITTING_SUM_FNAME] is not None:
            write_csv(fitting_sum,
                      cfg[FITTING_SUM_FNAME],
                      result_sum_headers,
                      print_message=cfg[PRINT_INFO],
                      round_digits=cfg[NUM_PARAM_DECIMALS])
        print("Optimized parameters:")
        for param_num, param_name in enumerate(cfg[OPT_PARAMS]):
            print("{:>11} = {:11f}".format(param_name, x_final[param_num]))
    else:
        print("Optimized parameter:\n"
              "{:>11}: {:11f}".format(cfg[OPT_PARAMS][0], float(x_final)))
Example #23
0
def process_files(comp_f_list, col_name, base_out_name, delimiter,
                  sep_out_flag, out_location):
    """
    Want to grab the timestep, first and 2nd mole found, first and 2nd ci^2
    print the timestep, residue ci^2
    @param comp_f_list: a list of lists of file names to process (file read during input processing)
    @param col_name: name of column to use for alignment
    @param base_out_name: name of file to be created, or suffix if multiple files to be created
    @param delimiter: string, delimiter separating file names on lines of the comp_f_list
    @param sep_out_flag: a boolean to note if separate output files should be made based on each row of input
    @param out_location: user-specified location for the output files, if specified
    @return: @raise InvalidDataError:
    """
    all_dicts = defaultdict(dict)

    # if need multiple output files, designate them by adding a prefix
    prefix = ''
    # if there will be multiple output files, make sure do not reuse a prefix, so keep copy of used names
    prefix_used = []
    # if one output file from multiple sets of file to combine, will change write_mode to append later
    write_mode = 'w'

    # we don't have to specify run names in the output if there one row set of files to combine,
    #  or if there will be separate output files
    if len(comp_f_list) < 2 or sep_out_flag:
        add_run_name = False
        headers = []
    else:
        add_run_name = True
        headers = [RUN_NAME]

    for line_num, line in enumerate(comp_f_list):
        dict_keys = None
        if sep_out_flag:
            headers = []
            all_dicts = defaultdict(dict)
        # separate on delimiter, strip any white space, and also get rid of empty entries
        comp_files = filter(
            None, [c_file.strip() for c_file in line.split(delimiter)])

        # get the common part of the name, if it exists; otherwise, give the name the line index
        for file_index, file_name in enumerate(comp_files):
            base_name = os.path.splitext(os.path.basename(file_name))[0]
            if file_index == 0:
                run_name = base_name
            else:
                run_name = longest_common_substring(run_name, base_name)
        if run_name == '':
            # because will use run_name as a string, need to convert it
            run_name = str(line_num) + "_"

        for c_file in comp_files:
            new_dict = read_csv_to_dict(c_file, col_name)
            if dict_keys is None:
                dict_keys = new_dict.keys()
            else:
                dict_keys = set(dict_keys).intersection(new_dict.keys())
            new_dict_keys = six.next(six.itervalues(new_dict)).keys()
            # Get the keys for the inner dictionary; diff methods for python 2 and 3 so use six
            # expect to only get new headers when making a new file (write_mode == 'w')
            # for the next file, will not gather more headers. When printed, extra cols will be skipped, and
            #    missing columns will have no data shown
            if write_mode == 'w':
                for key in new_dict_keys:
                    if key in headers:
                        # okay if already have header if the header is the column.
                        # If we are going to append, we also expect to already have the header name
                        if key != col_name:
                            warning("Non-unique column name {} found in {}. "
                                    "Values will be overwritten.".format(
                                        key, c_file))
                    else:
                        headers.append(key)
            for new_key in new_dict.items():
                all_dicts[new_key[0]].update(new_key[1])

        final_dict = []
        for key in sorted(dict_keys):
            final_dict.append(all_dicts[key])
            # final_dict.append(all_dicts[key].update({RUN_NAME: run_name}))

        if add_run_name:
            for each_dict in final_dict:
                each_dict.update({RUN_NAME: run_name})

        # Possible to have no overlap in align column
        if len(final_dict) > 0:
            # make sure col_name appears first by taking it out before sorting
            if sep_out_flag:
                prefix = run_name
                if prefix == '' or prefix in prefix_used:
                    prefix = str(line_num) + "_"
            # have a consistent output by sorting the headers, but keep the aligning column first
            # only needs to be done for printing the first time
            if write_mode == 'w':
                headers.remove(col_name)
                headers = [col_name] + sorted(headers)
                if add_run_name:
                    headers.remove(RUN_NAME)
                    headers = [RUN_NAME] + headers
            f_name = create_out_fname(base_out_name,
                                      prefix=prefix,
                                      base_dir=out_location)
            prefix_used.append(prefix)
            write_csv(final_dict, f_name, headers, mode=write_mode)
            if not sep_out_flag and write_mode == 'w':
                write_mode = 'a'
        else:
            raise InvalidDataError(
                "No common values found for column {} among files: {}"
                "".format(col_name, ", ".join(comp_files)))