def main(argv=None): """ Runs the main program. :param argv: The command line arguments. :return: The return code for the program's termination. """ args, ret = parse_cmdline(argv) if ret != GOOD_RET: return ret deduped = compress_dups(read_csv(args.file, all_conv=float), args.column) write_csv(deduped, create_out_fname(args.file, prefix=PREFIX), read_csv_header(args.file)) return GOOD_RET # success
def process_file(base_file, data_file): # TODO: add in reading vectors base_dict = read_csv(base_file, quote_style=csv.QUOTE_NONNUMERIC)[0] data_dict_list = read_csv(data_file, quote_style=csv.QUOTE_NONNUMERIC) data_headers = [INDEX, RMSD] + read_csv_header(data_file) num_vals = len(base_dict.values()) for data_id, data_dict in enumerate(data_dict_list): rmsd = 0.0 for key, val in base_dict.items(): try: rmsd += (data_dict[key] - val)**2 except KeyError: raise InvalidDataError( "Could not find key '{}' from base file in compared data file." .format(key)) data_dict[INDEX] = data_id data_dict[RMSD] = round((rmsd / num_vals)**0.5, 2) out_name = create_out_fname(data_file, prefix=RMSD + '_') write_csv(data_dict_list, out_name, data_headers)
def testEmptyFile(self): self.assertIsNone(read_csv_header(EMPTY_CSV))
def testFirstRow(self): self.assertListEqual(CSV_HEADER, read_csv_header(CSV_FILE))
def process_dump_files(cfg): """ @param cfg: configuration data read from ini file """ dump_file_list = [] if os.path.isfile(cfg[DUMP_FILE_LIST]): dump_file_list += file_rows_to_list(cfg[DUMP_FILE_LIST]) if cfg[DUMP_FILE] is not None: dump_file_list.append(cfg[DUMP_FILE]) if len(dump_file_list) == 0: raise InvalidDataError( "Found no dump files to process. Use the configuration ('ini') file to specify the name " "of a single dump file with the keyword '{}' or a file listing dump files with the " "keyword '{}'.".format(DUMP_FILE, DUMP_FILE_LIST)) gofr_data = {} evb_dict = {} out_fieldnames = None # If RDFs are to be calculated, initialize empty data structures if cfg[GOFR_OUTPUT]: g_dr = cfg[GOFR_DR] g_max = cfg[GOFR_MAX] gofr_data[GOFR_BINS] = np.arange(0.0, g_max + g_dr, g_dr) if len(gofr_data[GOFR_BINS]) < 2: raise InvalidDataError( "Insufficient number of bins to calculate RDFs. Check input: " "{}: {}, {}: {},".format(GOFR_DR, cfg[GOFR_DR], GOFR_MAX, cfg[GOFR_MAX])) if cfg[CALC_HO_GOFR]: ini_gofr_data(gofr_data, HO_BIN_COUNT, GOFR_BINS, HO_STEPS_COUNTED) if cfg[CALC_OO_GOFR]: ini_gofr_data(gofr_data, OO_BIN_COUNT, GOFR_BINS, OO_STEPS_COUNTED) if cfg[CALC_HH_GOFR]: ini_gofr_data(gofr_data, HH_BIN_COUNT, GOFR_BINS, HH_STEPS_COUNTED) if cfg[CALC_OH_GOFR]: ini_gofr_data(gofr_data, OH_BIN_COUNT, GOFR_BINS, OH_STEPS_COUNTED) if cfg[CALC_TYPE_GOFR]: ini_gofr_data(gofr_data, TYPE_BIN_COUNT, GOFR_BINS, TYPE_STEPS_COUNTED) if cfg[PER_FRAME_OUTPUT]: out_fieldnames = setup_per_frame_output(cfg) if cfg[EVB_SUM_FILE] is not None: evb_dict = read_csv_to_dict(cfg[EVB_SUM_FILE], cfg[ALIGN_COL]) evb_headers = read_csv_header(cfg[EVB_SUM_FILE]) cfg[EVB_SUM_HEADERS] = [] for header in evb_headers: if header not in out_fieldnames: cfg[EVB_SUM_HEADERS].append(header) if cfg[CALC_CEC_DIST]: for header in [CEC_X, CEC_Y, CEC_Z]: if header not in cfg[EVB_SUM_HEADERS]: raise InvalidDataError( "If '{}' is set to True, these headers must be found in the '{}': {}." "".format(CALC_CEC_DIST, EVB_SUM_FILE, [CEC_X, CEC_Y, CEC_Z])) out_fieldnames += cfg[EVB_SUM_HEADERS] per_frame_write_mode = 'w' base_out_file_name = cfg[DUMP_FILE_LIST] for dump_file in dump_file_list: data_to_print = [] # output file base name to change at each iteration if not combining output if cfg[COMBINE_OUTPUT] is False: base_out_file_name = dump_file read_dump_file(dump_file, cfg, data_to_print, gofr_data, out_fieldnames, per_frame_write_mode, evb_dict) if cfg[PER_FRAME_OUTPUT]: print_per_frame(base_out_file_name, cfg, data_to_print, out_fieldnames, per_frame_write_mode) # if combining files, after first loop, always append to file if cfg[COMBINE_OUTPUT]: per_frame_write_mode = 'a' if cfg[GOFR_OUTPUT]: print_gofr(cfg, gofr_data)