def main(argv=None): # Read input args, ret = parse_cmdline(argv) if ret != GOOD_RET or args is None: return ret len_buffer = None try: if args.buffer is not None: try: len_buffer = float(args.buffer) except ValueError: raise InvalidDataError("Input for buffer ({}) could not be converted to a float.".format(args.buffer)) if args.out_dir is None: args.out_dir = os.path.dirname(args.file) if args.min_max_file is None: min_max_dict = None else: min_max_dict = read_csv(args.min_max_file, quote_style=csv.QUOTE_NONNUMERIC) process_file(args.file, args.out_dir, len_buffer, args.delimiter, min_max_dict, header=args.names, make_hist=args.histogram) except IOError as e: warning("Problems reading file:", e) return IO_ERROR except InvalidDataError as e: warning("Problems reading data:", e) return INVALID_DATA return GOOD_RET # success
def process_file(file_to_process, cfg): """ Will complete the work of this script based on the provided cfg @param file_to_process: the file with column to be combined @param cfg: the configuration of this run @return: errors or nothing """ to_print = [] # determine if any type conversion has been specified & create conv dict if needed if cfg[COL1_CONV] is None and cfg[COL2_CONV] is None: conv_dict = None else: conv_dict = {} if cfg[COL1_CONV] is not None: conv_dict[cfg[COL1]] = cfg[COL1_CONV] if cfg[COL2_CONV] is not None: conv_dict[cfg[COL2]] = cfg[COL2_CONV] raw_col_data = read_csv(file_to_process, data_conv=conv_dict, quote_style=csv.QUOTE_NONNUMERIC) for header in cfg[COL1], cfg[COL2]: if header not in raw_col_data[0]: raise InvalidDataError("Specified column header '{}' was not found in file: {}" "".format(header, file_to_process)) for row in raw_col_data: to_print.append(["".join(map(str, [cfg[PREFIX], row[cfg[COL1]], cfg[MIDDLE], row[cfg[COL2]], cfg[SUFFIX]]))]) list_to_csv(to_print, cfg[OUT_FILE], delimiter=',', quote_style=csv.QUOTE_MINIMAL)
def main(argv=None): """ Runs the main program. :param argv: The command line arguments. :return: The return code for the program's termination. """ args, ret = parse_cmdline(argv) if ret != GOOD_RET: return ret kbt = calc_kbt(args.temp) if args.coord_ts is not None: logger.info("Read TS coordinate value: '{:8.3f}'".format(args.coord_ts)) try: if args.src_file is not None: file_data = read_csv(args.src_file, data_conv=KEY_CONV) f_base_name = os.path.basename(args.src_file) try: pka, cur_corr, cur_coord = calc_pka(file_data, kbt, args.coord_ts) result = [{SRC_KEY: f_base_name, PKA_KEY: pka, MAX_VAL: cur_corr, MAX_LOC: cur_coord}] except NoMaxError: result = [{SRC_KEY: f_base_name, PKA_KEY: NO_MAX_RET, MAX_VAL: NO_MAX_RET, MAX_LOC: NO_MAX_RET}] write_result(result, args.src_file, args.overwrite) else: found_files = find_files_by_dir(args.base_dir, args.pattern) logger.debug("Found '{}' dirs with files to process".format(len(found_files))) if len(found_files) == 0: raise IOError("No files found in specified directory '{}'".format(args.base_dir)) for f_dir, files in found_files.items(): results = [] for pmf_path, fname in ([(os.path.join(f_dir, tgt), tgt) for tgt in sorted(files)]): file_data = read_csv(pmf_path, data_conv=KEY_CONV) try: pka, cur_corr, cur_coord = calc_pka(file_data, kbt, args.coord_ts) results.append({SRC_KEY: fname, PKA_KEY: pka, MAX_VAL: cur_corr, MAX_LOC: cur_coord}) except NoMaxError: results.append({SRC_KEY: fname, PKA_KEY: NO_MAX_RET, MAX_VAL: NO_MAX_RET, MAX_LOC: NO_MAX_RET}) write_result(results, os.path.basename(f_dir), args.overwrite, basedir=os.path.dirname(f_dir)) except IOError as e: warning(e) return IO_ERROR return GOOD_RET # success
def calc_avg_stdev(coord_bin): collect_coord = defaultdict(list) for csv_data in (read_csv(c_file, data_conv=KEY_CONV) for c_file in coord_bin): for d_row in csv_data: collect_coord[d_row[COORD_KEY]].append(d_row[CORR_KEY]) results = [] for coord, freng_vals in collect_coord.items(): results.append((coord, np.mean(freng_vals), np.std(freng_vals, ddof=1))) return results
def testGood(self): pka_val = calc_pka( read_csv(GOOD_RAD_PATH, data_conv={ FREE_KEY: float, CORR_KEY: float, COORD_KEY: float, }), calc_kbt(EXP_TEMP)) self.assertAlmostEqual(4.7036736, pka_val[0])
def testNoMax(self): with self.assertRaises(NoMaxError) as context: calc_pka( read_csv(NO_MAX_RAD_PATH, data_conv={ FREE_KEY: float, CORR_KEY: float, COORD_KEY: float, }), calc_kbt(EXP_TEMP)) self.assertTrue(NO_MAX_ERR in context.exception.args)
def testReadTypedCsvAllConv(self): """ Verifies the contents of the CSV file using the all_conv function. """ result = read_csv(CSV_FILE, all_conv=float) self.assertTrue(result) for row in result: self.assertEqual(3, len(row)) self.assertIsNotNone(row.get(FREE_KEY, None)) self.assertTrue(is_one_of_type(row[FREE_KEY], FRENG_TYPES)) self.assertIsNotNone(row.get(CORR_KEY, None)) self.assertTrue(is_one_of_type(row[CORR_KEY], FRENG_TYPES)) self.assertIsNotNone(row.get(COORD_KEY, None)) self.assertIsInstance(row[COORD_KEY], float)
def process_file(base_file, data_file): # TODO: add in reading vectors base_dict = read_csv(base_file, quote_style=csv.QUOTE_NONNUMERIC)[0] data_dict_list = read_csv(data_file, quote_style=csv.QUOTE_NONNUMERIC) data_headers = [INDEX, RMSD] + read_csv_header(data_file) num_vals = len(base_dict.values()) for data_id, data_dict in enumerate(data_dict_list): rmsd = 0.0 for key, val in base_dict.items(): try: rmsd += (data_dict[key] - val)**2 except KeyError: raise InvalidDataError( "Could not find key '{}' from base file in compared data file." .format(key)) data_dict[INDEX] = data_id data_dict[RMSD] = round((rmsd / num_vals)**0.5, 2) out_name = create_out_fname(data_file, prefix=RMSD + '_') write_csv(data_dict_list, out_name, data_headers)
def testReadCsv(self): """ Verifies the contents of the CSV file. """ result = read_csv(CSV_FILE) self.assertTrue(result) for row in result: self.assertEqual(3, len(row)) self.assertIsNotNone(row.get(FREE_KEY, None)) self.assertIsInstance(row[FREE_KEY], str) self.assertIsNotNone(row.get(CORR_KEY, None)) self.assertIsInstance(row[CORR_KEY], str) self.assertIsNotNone(row.get(COORD_KEY, None)) self.assertIsInstance(row[COORD_KEY], str)
def testAvgStdev(self): results = calc_avg_stdev(IN_FILES) directory_name = None try: directory_name = tempfile.mkdtemp() tgt_file = OUT_FNAME_FMT.format("02") write_avg_stdev(results, tgt_file, basedir=directory_name) csv_data = read_csv(os.path.join(directory_name, tgt_file), data_conv=AVG_KEY_CONV) for entry in csv_data: self.assertEqual(3, len(entry)) for c_key, c_val in entry.items(): self.assertIsInstance(c_val, float) self.assertTrue(c_key in OUT_KEY_SEQ) finally: shutil.rmtree(directory_name)
def main(argv=None): """ Runs the main program. :param argv: The command line arguments. :return: The return code for the program's termination. """ args, ret = parse_cmdline(argv) if ret != GOOD_RET: return ret deduped = compress_dups(read_csv(args.file, all_conv=float), args.column) write_csv(deduped, create_out_fname(args.file, prefix=PREFIX), read_csv_header(args.file)) return GOOD_RET # success
def testReadTypedCsv(self): """ Verifies the contents of the CSV file. """ result = read_csv(CSV_FILE, data_conv={FREE_KEY: float, CORR_KEY: float, COORD_KEY: float, }) self.assertTrue(result) for row in result: self.assertEqual(3, len(row)) self.assertIsNotNone(row.get(FREE_KEY, None)) self.assertTrue(is_one_of_type(row[FREE_KEY], FRENG_TYPES)) self.assertIsNotNone(row.get(CORR_KEY, None)) self.assertTrue(is_one_of_type(row[CORR_KEY], FRENG_TYPES)) self.assertIsNotNone(row.get(COORD_KEY, None)) self.assertIsInstance(row[COORD_KEY], float)
def testWriteCsv(self): tmp_dir = None data = csv_data() try: tmp_dir = tempfile.mkdtemp() tgt_fname = create_out_fname(SHORT_WHAM_PATH, prefix=OUT_PFX, base_dir=tmp_dir) write_csv(data, tgt_fname, RAD_KEY_SEQ) csv_result = read_csv(tgt_fname, data_conv={FREE_KEY: str_to_bool, CORR_KEY: float, COORD_KEY: str, }) self.assertEqual(len(data), len(csv_result)) for i, csv_row in enumerate(csv_result): self.assertDictEqual(data[i], csv_row) finally: shutil.rmtree(tmp_dir)
def main(argv=None): # Read input args, ret = parse_cmdline(argv) if ret != GOOD_RET or args is None: return ret len_buffer = None try: if args.buffer is not None: try: len_buffer = float(args.buffer) except ValueError: raise InvalidDataError("Input for buffer ({}) could not be converted to a float.".format(args.buffer)) if args.out_dir is None: args.out_dir = os.path.dirname(args.file) if args.min_max_file is None: min_max_dict = None else: min_max_dict = read_csv(args.min_max_file, quote_style=csv.QUOTE_NONNUMERIC) process_file( args.file, args.out_dir, len_buffer, args.delimiter, min_max_dict, header=args.names, make_hist=args.histogram, ) except IOError as e: warning("Problems reading file:", e) return IO_ERROR except InvalidDataError as e: warning("Problems reading data:", e) return INVALID_DATA return GOOD_RET # success
def testGood(self): pka_val = calc_pka(read_csv(GOOD_RAD_PATH, data_conv={FREE_KEY: float, CORR_KEY: float, COORD_KEY: float, }), calc_kbt(EXP_TEMP)) self.assertAlmostEqual(4.7036736, pka_val[0])
def testNoMax(self): with self.assertRaises(NoMaxError) as context: calc_pka(read_csv(NO_MAX_RAD_PATH, data_conv={FREE_KEY: float, CORR_KEY: float, COORD_KEY: float, }), calc_kbt(EXP_TEMP)) self.assertTrue(NO_MAX_ERR in context.exception.args)