Python read_csv Examples, md_utils.md_common.read_csv Python Examples

Example #1

0

Show file

def main(argv=None):
    # Read input
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET or args is None:
        return ret

    len_buffer = None

    try:
        if args.buffer is not None:
            try:
                len_buffer = float(args.buffer)
            except ValueError:
                raise InvalidDataError("Input for buffer ({}) could not be converted to a float.".format(args.buffer))
        if args.out_dir is None:
            args.out_dir = os.path.dirname(args.file)
        if args.min_max_file is None:
            min_max_dict = None
        else:
            min_max_dict = read_csv(args.min_max_file, quote_style=csv.QUOTE_NONNUMERIC)
        process_file(args.file, args.out_dir, len_buffer, args.delimiter, min_max_dict,
                     header=args.names, make_hist=args.histogram)
    except IOError as e:
        warning("Problems reading file:", e)
        return IO_ERROR
    except InvalidDataError as e:
        warning("Problems reading data:", e)
        return INVALID_DATA

    return GOOD_RET  # success

Example #2

0

Show file

File: comb_col.py Project: team-mayes/md_utils

def process_file(file_to_process, cfg):
    """
    Will complete the work of this script based on the provided cfg
    @param file_to_process: the file with column to be combined
    @param cfg: the configuration of this run
    @return: errors or nothing
    """
    to_print = []

    # determine if any type conversion has been specified & create conv dict if needed
    if cfg[COL1_CONV] is None and cfg[COL2_CONV] is None:
        conv_dict = None
    else:
        conv_dict = {}
        if cfg[COL1_CONV] is not None:
            conv_dict[cfg[COL1]] = cfg[COL1_CONV]
        if cfg[COL2_CONV] is not None:
            conv_dict[cfg[COL2]] = cfg[COL2_CONV]

    raw_col_data = read_csv(file_to_process, data_conv=conv_dict, quote_style=csv.QUOTE_NONNUMERIC)
    for header in cfg[COL1], cfg[COL2]:
        if header not in raw_col_data[0]:
            raise InvalidDataError("Specified column header '{}' was not found in file: {}"
                                   "".format(header, file_to_process))
    for row in raw_col_data:
        to_print.append(["".join(map(str, [cfg[PREFIX], row[cfg[COL1]], cfg[MIDDLE], row[cfg[COL2]], cfg[SUFFIX]]))])

    list_to_csv(to_print, cfg[OUT_FILE], delimiter=',', quote_style=csv.QUOTE_MINIMAL)

Example #3

0

Show file

def process_file(file_to_process, cfg):
    """
    Will complete the work of this script based on the provided cfg
    @param file_to_process: the file with column to be combined
    @param cfg: the configuration of this run
    @return: errors or nothing
    """
    to_print = []

    # determine if any type conversion has been specified & create conv dict if needed
    if cfg[COL1_CONV] is None and cfg[COL2_CONV] is None:
        conv_dict = None
    else:
        conv_dict = {}
        if cfg[COL1_CONV] is not None:
            conv_dict[cfg[COL1]] = cfg[COL1_CONV]
        if cfg[COL2_CONV] is not None:
            conv_dict[cfg[COL2]] = cfg[COL2_CONV]

    raw_col_data = read_csv(file_to_process, data_conv=conv_dict, quote_style=csv.QUOTE_NONNUMERIC)
    for header in cfg[COL1], cfg[COL2]:
        if header not in raw_col_data[0]:
            raise InvalidDataError("Specified column header '{}' was not found in file: {}"
                                   "".format(header, file_to_process))
    for row in raw_col_data:
        to_print.append(["".join(map(str, [cfg[PREFIX], row[cfg[COL1]], cfg[MIDDLE], row[cfg[COL2]], cfg[SUFFIX]]))])

    list_to_csv(to_print, cfg[OUT_FILE], delimiter=',', quote_style=csv.QUOTE_MINIMAL)

Example #4

0

Show file

File: calc_pka.py Project: team-mayes/md_utils

def main(argv=None):
    """ Runs the main program.

    :param argv: The command line arguments.
    :return: The return code for the program's termination.
    """
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET:
        return ret

    kbt = calc_kbt(args.temp)
    if args.coord_ts is not None:
        logger.info("Read TS coordinate value: '{:8.3f}'".format(args.coord_ts))

    try:
        if args.src_file is not None:
            file_data = read_csv(args.src_file, data_conv=KEY_CONV)
            f_base_name = os.path.basename(args.src_file)
            try:
                pka, cur_corr, cur_coord = calc_pka(file_data, kbt, args.coord_ts)
                result = [{SRC_KEY: f_base_name, PKA_KEY: pka, MAX_VAL: cur_corr, MAX_LOC: cur_coord}]
            except NoMaxError:
                result = [{SRC_KEY: f_base_name, PKA_KEY: NO_MAX_RET, MAX_VAL: NO_MAX_RET, MAX_LOC: NO_MAX_RET}]
            write_result(result, args.src_file, args.overwrite)
        else:
            found_files = find_files_by_dir(args.base_dir, args.pattern)
            logger.debug("Found '{}' dirs with files to process".format(len(found_files)))
            if len(found_files) == 0:
                raise IOError("No files found in specified directory '{}'".format(args.base_dir))
            for f_dir, files in found_files.items():
                results = []
                for pmf_path, fname in ([(os.path.join(f_dir, tgt), tgt) for tgt in sorted(files)]):
                    file_data = read_csv(pmf_path, data_conv=KEY_CONV)
                    try:
                        pka, cur_corr, cur_coord = calc_pka(file_data, kbt, args.coord_ts)
                        results.append({SRC_KEY: fname, PKA_KEY: pka, MAX_VAL: cur_corr, MAX_LOC: cur_coord})
                    except NoMaxError:
                        results.append({SRC_KEY: fname, PKA_KEY: NO_MAX_RET, MAX_VAL: NO_MAX_RET,
                                        MAX_LOC: NO_MAX_RET})

                write_result(results, os.path.basename(f_dir), args.overwrite,
                             basedir=os.path.dirname(f_dir))
    except IOError as e:
        warning(e)
        return IO_ERROR

    return GOOD_RET  # success

Example #5

0

Show file

File: calc_split_avg.py Project: jiaozihao18/md_utils

def calc_avg_stdev(coord_bin):
    collect_coord = defaultdict(list)
    for csv_data in (read_csv(c_file, data_conv=KEY_CONV) for c_file in coord_bin):
        for d_row in csv_data:
            collect_coord[d_row[COORD_KEY]].append(d_row[CORR_KEY])
    results = []
    for coord, freng_vals in collect_coord.items():
        results.append((coord, np.mean(freng_vals), np.std(freng_vals, ddof=1)))
    return results

Example #6

0

Show file

File: test_calc_pka.py Project: abb58/md_utils

 def testGood(self):
     pka_val = calc_pka(
         read_csv(GOOD_RAD_PATH,
                  data_conv={
                      FREE_KEY: float,
                      CORR_KEY: float,
                      COORD_KEY: float,
                  }), calc_kbt(EXP_TEMP))
     self.assertAlmostEqual(4.7036736, pka_val[0])

Example #7

0

Show file

File: test_calc_pka.py Project: abb58/md_utils

 def testNoMax(self):
     with self.assertRaises(NoMaxError) as context:
         calc_pka(
             read_csv(NO_MAX_RAD_PATH,
                      data_conv={
                          FREE_KEY: float,
                          CORR_KEY: float,
                          COORD_KEY: float,
                      }), calc_kbt(EXP_TEMP))
     self.assertTrue(NO_MAX_ERR in context.exception.args)

Example #8

0

Show file

 def testReadTypedCsvAllConv(self):
     """
     Verifies the contents of the CSV file using the all_conv function.
     """
     result = read_csv(CSV_FILE, all_conv=float)
     self.assertTrue(result)
     for row in result:
         self.assertEqual(3, len(row))
         self.assertIsNotNone(row.get(FREE_KEY, None))
         self.assertTrue(is_one_of_type(row[FREE_KEY], FRENG_TYPES))
         self.assertIsNotNone(row.get(CORR_KEY, None))
         self.assertTrue(is_one_of_type(row[CORR_KEY], FRENG_TYPES))
         self.assertIsNotNone(row.get(COORD_KEY, None))
         self.assertIsInstance(row[COORD_KEY], float)

Example #9

0

Show file

File: test_common.py Project: team-mayes/md_utils

 def testReadTypedCsvAllConv(self):
     """
     Verifies the contents of the CSV file using the all_conv function.
     """
     result = read_csv(CSV_FILE, all_conv=float)
     self.assertTrue(result)
     for row in result:
         self.assertEqual(3, len(row))
         self.assertIsNotNone(row.get(FREE_KEY, None))
         self.assertTrue(is_one_of_type(row[FREE_KEY], FRENG_TYPES))
         self.assertIsNotNone(row.get(CORR_KEY, None))
         self.assertTrue(is_one_of_type(row[CORR_KEY], FRENG_TYPES))
         self.assertIsNotNone(row.get(COORD_KEY, None))
         self.assertIsInstance(row[COORD_KEY], float)

Example #10

0

Show file

def process_file(base_file, data_file):
    # TODO: add in reading vectors
    base_dict = read_csv(base_file, quote_style=csv.QUOTE_NONNUMERIC)[0]
    data_dict_list = read_csv(data_file, quote_style=csv.QUOTE_NONNUMERIC)

    data_headers = [INDEX, RMSD] + read_csv_header(data_file)

    num_vals = len(base_dict.values())
    for data_id, data_dict in enumerate(data_dict_list):
        rmsd = 0.0
        for key, val in base_dict.items():
            try:
                rmsd += (data_dict[key] - val)**2
            except KeyError:
                raise InvalidDataError(
                    "Could not find key '{}' from base file in compared data file."
                    .format(key))

        data_dict[INDEX] = data_id
        data_dict[RMSD] = round((rmsd / num_vals)**0.5, 2)

    out_name = create_out_fname(data_file, prefix=RMSD + '_')
    write_csv(data_dict_list, out_name, data_headers)

Example #11

0

Show file

 def testReadCsv(self):
     """
     Verifies the contents of the CSV file.
     """
     result = read_csv(CSV_FILE)
     self.assertTrue(result)
     for row in result:
         self.assertEqual(3, len(row))
         self.assertIsNotNone(row.get(FREE_KEY, None))
         self.assertIsInstance(row[FREE_KEY], str)
         self.assertIsNotNone(row.get(CORR_KEY, None))
         self.assertIsInstance(row[CORR_KEY], str)
         self.assertIsNotNone(row.get(COORD_KEY, None))
         self.assertIsInstance(row[COORD_KEY], str)

Example #12

0

Show file

File: test_common.py Project: team-mayes/md_utils

 def testReadCsv(self):
     """
     Verifies the contents of the CSV file.
     """
     result = read_csv(CSV_FILE)
     self.assertTrue(result)
     for row in result:
         self.assertEqual(3, len(row))
         self.assertIsNotNone(row.get(FREE_KEY, None))
         self.assertIsInstance(row[FREE_KEY], str)
         self.assertIsNotNone(row.get(CORR_KEY, None))
         self.assertIsInstance(row[CORR_KEY], str)
         self.assertIsNotNone(row.get(COORD_KEY, None))
         self.assertIsInstance(row[COORD_KEY], str)

Example #13

0

Show file

 def testAvgStdev(self):
     results = calc_avg_stdev(IN_FILES)
     directory_name = None
     try:
         directory_name = tempfile.mkdtemp()
         tgt_file = OUT_FNAME_FMT.format("02")
         write_avg_stdev(results, tgt_file, basedir=directory_name)
         csv_data = read_csv(os.path.join(directory_name, tgt_file), data_conv=AVG_KEY_CONV)
         for entry in csv_data:
             self.assertEqual(3, len(entry))
             for c_key, c_val in entry.items():
                 self.assertIsInstance(c_val, float)
                 self.assertTrue(c_key in OUT_KEY_SEQ)
     finally:
         shutil.rmtree(directory_name)

Example #14

0

Show file

def main(argv=None):
    """
    Runs the main program.

    :param argv: The command line arguments.
    :return: The return code for the program's termination.
    """
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET:
        return ret

    deduped = compress_dups(read_csv(args.file, all_conv=float), args.column)
    write_csv(deduped, create_out_fname(args.file, prefix=PREFIX),
              read_csv_header(args.file))

    return GOOD_RET  # success

Example #15

0

Show file

File: press_dups.py Project: team-mayes/md_utils

def main(argv=None):
    """
    Runs the main program.

    :param argv: The command line arguments.
    :return: The return code for the program's termination.
    """
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET:
        return ret

    deduped = compress_dups(read_csv(args.file, all_conv=float), args.column)
    write_csv(deduped, create_out_fname(args.file, prefix=PREFIX),
              read_csv_header(args.file))

    return GOOD_RET  # success

Example #16

0

Show file

File: test_common.py Project: team-mayes/md_utils

 def testReadTypedCsv(self):
     """
     Verifies the contents of the CSV file.
     """
     result = read_csv(CSV_FILE, data_conv={FREE_KEY: float,
                                            CORR_KEY: float,
                                            COORD_KEY: float, })
     self.assertTrue(result)
     for row in result:
         self.assertEqual(3, len(row))
         self.assertIsNotNone(row.get(FREE_KEY, None))
         self.assertTrue(is_one_of_type(row[FREE_KEY], FRENG_TYPES))
         self.assertIsNotNone(row.get(CORR_KEY, None))
         self.assertTrue(is_one_of_type(row[CORR_KEY], FRENG_TYPES))
         self.assertIsNotNone(row.get(COORD_KEY, None))
         self.assertIsInstance(row[COORD_KEY], float)

Example #17

0

Show file

File: test_common.py Project: jiaozihao18/md_utils

 def testReadTypedCsv(self):
     """
     Verifies the contents of the CSV file.
     """
     result = read_csv(CSV_FILE, data_conv={FREE_KEY: float,
                                            CORR_KEY: float,
                                            COORD_KEY: float, })
     self.assertTrue(result)
     for row in result:
         self.assertEqual(3, len(row))
         self.assertIsNotNone(row.get(FREE_KEY, None))
         self.assertTrue(is_one_of_type(row[FREE_KEY], FRENG_TYPES))
         self.assertIsNotNone(row.get(CORR_KEY, None))
         self.assertTrue(is_one_of_type(row[CORR_KEY], FRENG_TYPES))
         self.assertIsNotNone(row.get(COORD_KEY, None))
         self.assertIsInstance(row[COORD_KEY], float)

Example #18

0

Show file

File: test_common.py Project: jiaozihao18/md_utils

    def testWriteCsv(self):
        tmp_dir = None
        data = csv_data()
        try:
            tmp_dir = tempfile.mkdtemp()
            tgt_fname = create_out_fname(SHORT_WHAM_PATH, prefix=OUT_PFX, base_dir=tmp_dir)

            write_csv(data, tgt_fname, RAD_KEY_SEQ)
            csv_result = read_csv(tgt_fname,
                                  data_conv={FREE_KEY: str_to_bool,
                                             CORR_KEY: float,
                                             COORD_KEY: str, })
            self.assertEqual(len(data), len(csv_result))
            for i, csv_row in enumerate(csv_result):
                self.assertDictEqual(data[i], csv_row)
        finally:
            shutil.rmtree(tmp_dir)

Example #19

0

Show file

File: test_common.py Project: team-mayes/md_utils

    def testWriteCsv(self):
        tmp_dir = None
        data = csv_data()
        try:
            tmp_dir = tempfile.mkdtemp()
            tgt_fname = create_out_fname(SHORT_WHAM_PATH, prefix=OUT_PFX, base_dir=tmp_dir)

            write_csv(data, tgt_fname, RAD_KEY_SEQ)
            csv_result = read_csv(tgt_fname,
                                  data_conv={FREE_KEY: str_to_bool,
                                             CORR_KEY: float,
                                             COORD_KEY: str, })
            self.assertEqual(len(data), len(csv_result))
            for i, csv_row in enumerate(csv_result):
                self.assertDictEqual(data[i], csv_row)
        finally:
            shutil.rmtree(tmp_dir)

Example #20

0

Show file

File: col_stats.py Project: team-mayes/md_utils

def main(argv=None):
    # Read input
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET or args is None:
        return ret

    len_buffer = None

    try:
        if args.buffer is not None:
            try:
                len_buffer = float(args.buffer)
            except ValueError:
                raise InvalidDataError("Input for buffer ({}) could not be converted to a float.".format(args.buffer))
        if args.out_dir is None:
            args.out_dir = os.path.dirname(args.file)
        if args.min_max_file is None:
            min_max_dict = None
        else:
            min_max_dict = read_csv(args.min_max_file, quote_style=csv.QUOTE_NONNUMERIC)
        process_file(
            args.file,
            args.out_dir,
            len_buffer,
            args.delimiter,
            min_max_dict,
            header=args.names,
            make_hist=args.histogram,
        )
    except IOError as e:
        warning("Problems reading file:", e)
        return IO_ERROR
    except InvalidDataError as e:
        warning("Problems reading data:", e)
        return INVALID_DATA

    return GOOD_RET  # success

Example #21

0

Show file

File: test_calc_pka.py Project: team-mayes/md_utils

 def testGood(self):
     pka_val = calc_pka(read_csv(GOOD_RAD_PATH,
                                 data_conv={FREE_KEY: float, CORR_KEY: float, COORD_KEY: float, }),
                        calc_kbt(EXP_TEMP))
     self.assertAlmostEqual(4.7036736, pka_val[0])

Example #22

0

Show file

File: test_calc_pka.py Project: team-mayes/md_utils

 def testNoMax(self):
     with self.assertRaises(NoMaxError) as context:
         calc_pka(read_csv(NO_MAX_RAD_PATH,
                           data_conv={FREE_KEY: float, CORR_KEY: float, COORD_KEY: float, }),
                  calc_kbt(EXP_TEMP))
     self.assertTrue(NO_MAX_ERR in context.exception.args)