Esempio n. 1
0
def main(argv=None):
    """
    Runs the main program.

    :param argv: The command line arguments.
    :return: The return code for the program's termination.
    """
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET:
        return ret

    deduped = compress_dups(read_csv(args.file, all_conv=float), args.column)
    write_csv(deduped, create_out_fname(args.file, prefix=PREFIX),
              read_csv_header(args.file))

    return GOOD_RET  # success
Esempio n. 2
0
def main(argv=None):
    """
    Runs the main program.

    :param argv: The command line arguments.
    :return: The return code for the program's termination.
    """
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET:
        return ret

    deduped = compress_dups(read_csv(args.file, all_conv=float), args.column)
    write_csv(deduped, create_out_fname(args.file, prefix=PREFIX),
              read_csv_header(args.file))

    return GOOD_RET  # success
Esempio n. 3
0
def process_file(base_file, data_file):
    # TODO: add in reading vectors
    base_dict = read_csv(base_file, quote_style=csv.QUOTE_NONNUMERIC)[0]
    data_dict_list = read_csv(data_file, quote_style=csv.QUOTE_NONNUMERIC)

    data_headers = [INDEX, RMSD] + read_csv_header(data_file)

    num_vals = len(base_dict.values())
    for data_id, data_dict in enumerate(data_dict_list):
        rmsd = 0.0
        for key, val in base_dict.items():
            try:
                rmsd += (data_dict[key] - val)**2
            except KeyError:
                raise InvalidDataError(
                    "Could not find key '{}' from base file in compared data file."
                    .format(key))

        data_dict[INDEX] = data_id
        data_dict[RMSD] = round((rmsd / num_vals)**0.5, 2)

    out_name = create_out_fname(data_file, prefix=RMSD + '_')
    write_csv(data_dict_list, out_name, data_headers)
Esempio n. 4
0
 def testEmptyFile(self):
     self.assertIsNone(read_csv_header(EMPTY_CSV))
Esempio n. 5
0
 def testFirstRow(self):
     self.assertListEqual(CSV_HEADER, read_csv_header(CSV_FILE))
Esempio n. 6
0
def process_dump_files(cfg):
    """
    @param cfg: configuration data read from ini file
    """

    dump_file_list = []

    if os.path.isfile(cfg[DUMP_FILE_LIST]):
        dump_file_list += file_rows_to_list(cfg[DUMP_FILE_LIST])
    if cfg[DUMP_FILE] is not None:
        dump_file_list.append(cfg[DUMP_FILE])

    if len(dump_file_list) == 0:
        raise InvalidDataError(
            "Found no dump files to process. Use the configuration ('ini') file to specify the name "
            "of a single dump file with the keyword '{}' or a file listing dump files with the "
            "keyword '{}'.".format(DUMP_FILE, DUMP_FILE_LIST))

    gofr_data = {}
    evb_dict = {}
    out_fieldnames = None

    # If RDFs are to be calculated, initialize empty data structures
    if cfg[GOFR_OUTPUT]:
        g_dr = cfg[GOFR_DR]
        g_max = cfg[GOFR_MAX]
        gofr_data[GOFR_BINS] = np.arange(0.0, g_max + g_dr, g_dr)
        if len(gofr_data[GOFR_BINS]) < 2:
            raise InvalidDataError(
                "Insufficient number of bins to calculate RDFs. Check input: "
                "{}: {}, {}: {},".format(GOFR_DR, cfg[GOFR_DR], GOFR_MAX,
                                         cfg[GOFR_MAX]))
        if cfg[CALC_HO_GOFR]:
            ini_gofr_data(gofr_data, HO_BIN_COUNT, GOFR_BINS, HO_STEPS_COUNTED)
        if cfg[CALC_OO_GOFR]:
            ini_gofr_data(gofr_data, OO_BIN_COUNT, GOFR_BINS, OO_STEPS_COUNTED)
        if cfg[CALC_HH_GOFR]:
            ini_gofr_data(gofr_data, HH_BIN_COUNT, GOFR_BINS, HH_STEPS_COUNTED)
        if cfg[CALC_OH_GOFR]:
            ini_gofr_data(gofr_data, OH_BIN_COUNT, GOFR_BINS, OH_STEPS_COUNTED)
        if cfg[CALC_TYPE_GOFR]:
            ini_gofr_data(gofr_data, TYPE_BIN_COUNT, GOFR_BINS,
                          TYPE_STEPS_COUNTED)

    if cfg[PER_FRAME_OUTPUT]:
        out_fieldnames = setup_per_frame_output(cfg)

    if cfg[EVB_SUM_FILE] is not None:
        evb_dict = read_csv_to_dict(cfg[EVB_SUM_FILE], cfg[ALIGN_COL])
        evb_headers = read_csv_header(cfg[EVB_SUM_FILE])
        cfg[EVB_SUM_HEADERS] = []
        for header in evb_headers:
            if header not in out_fieldnames:
                cfg[EVB_SUM_HEADERS].append(header)
        if cfg[CALC_CEC_DIST]:
            for header in [CEC_X, CEC_Y, CEC_Z]:
                if header not in cfg[EVB_SUM_HEADERS]:
                    raise InvalidDataError(
                        "If '{}' is set to True, these headers must be found in the '{}': {}."
                        "".format(CALC_CEC_DIST, EVB_SUM_FILE,
                                  [CEC_X, CEC_Y, CEC_Z]))
        out_fieldnames += cfg[EVB_SUM_HEADERS]

    per_frame_write_mode = 'w'
    base_out_file_name = cfg[DUMP_FILE_LIST]
    for dump_file in dump_file_list:
        data_to_print = []
        # output file base name to change at each iteration if not combining output
        if cfg[COMBINE_OUTPUT] is False:
            base_out_file_name = dump_file
        read_dump_file(dump_file, cfg, data_to_print, gofr_data,
                       out_fieldnames, per_frame_write_mode, evb_dict)
        if cfg[PER_FRAME_OUTPUT]:
            print_per_frame(base_out_file_name, cfg, data_to_print,
                            out_fieldnames, per_frame_write_mode)
        # if combining files, after first loop, always append to file
        if cfg[COMBINE_OUTPUT]:
            per_frame_write_mode = 'a'

    if cfg[GOFR_OUTPUT]:
        print_gofr(cfg, gofr_data)
Esempio n. 7
0
 def testEmptyFile(self):
     self.assertIsNone(read_csv_header(EMPTY_CSV))
Esempio n. 8
0
 def testFirstRow(self):
     self.assertListEqual(CSV_HEADER, read_csv_header(CSV_FILE))