Example #1
0
    def test_basic_empty_cells_num(self):
        filedata = b"""A,B,C,D,E,F
#,1,#,#,2,#
3,#,#,4,5,#
6,#,#,#,#,#
#,7,#,#,#,#
#,#,8,#,#,#
#,#,#,9,#,#
#,#,#,#,10,#
#,#,#,#,#,11
#,#,12,#,#,13
14,#,#,15,16,17
"""
        filedata = filedata.replace(b"#", b"")
        with generate_tempfile(filedata) as fn:
            expected = {
                "A": [0, 3, 6, 0, 0, 0, 0, 0, 0, 14],
                "B": [1, 0, 0, 7, 0, 0, 0, 0, 0, 0],
                "C": [0, 0, 0, 0, 8, 0, 0, 0, 12, 0],
                "D": [0, 4, 0, 0, 0, 9, 0, 0, 0, 15],
                "E": [2, 5, 0, 0, 0, 0, 10, 0, 0, 16],
                "F": [0, 0, 0, 0, 0, 0, 0, 11, 13, 17]
            }
            logging.debug("filename: %s" % fn)
            actual = paratext.load_csv_to_pandas(fn, number_only=True)
            assert_dictframe_almost_equal(actual, expected)
Example #2
0
 def do_basic_nums(self, dtype, num_rows, num_columns, num_threads,
                   number_only, no_header):
     if no_header:
         filedata = ''
         keys = ["col%d" % k for k in range(num_columns)]
     else:
         keys = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"]
         keys = keys[0:num_columns]
         filedata = ','.join(keys[0:num_columns]) + "\n"
     expected = {}
     for key in keys:
         expected[key] = []
     for row in range(num_rows):
         if np.issubdtype(dtype, np.integer):
             row_data = [row * i for i in range(num_columns)]
         else:
             row_data = np.random.random((num_columns, ))
         filedata += ",".join([str(v) for v in row_data]) + "\n"
         for k in range(len(keys)):
             expected[keys[k]].append(row_data[k])
     with generate_tempfile(filedata.encode("utf-8")) as fn:
         logging.debug("filename: %s" % fn)
         actual = paratext.load_csv_to_pandas(fn,
                                              num_threads=num_threads,
                                              number_only=number_only,
                                              no_header=no_header)
         assert_dictframe_almost_equal(actual, expected)
Example #3
0
    def test_basic_3x0x(self):
        filedata = b"""A,B,C
"""
        with generate_tempfile(filedata) as fn:
            expected = {"A": [], "B": [], "C": []}
            logging.debug("filename: %s" % fn)
            actual = paratext.load_csv_to_pandas(fn)
            assert_dictframe_almost_equal(actual, expected)
Example #4
0
    def test_edge_case1(self):
        filedata = b"""A,B
A.1,3ABC
"""
        with generate_tempfile(filedata) as fn:
            expected = {"A": ["A.1"], "B": ["3ABC"]}
            logging.debug("filename: %s" % fn)
            actual = paratext.load_csv_to_pandas(fn)
            assert_dictframe_almost_equal(actual, expected)
Example #5
0
    def test_basic_strange1(self):
        filedata = b"""A,B,C
"\\\"","",7
"\\\\","X",8
"\n","\\\\\\"",9"""
        with generate_tempfile(filedata) as fn:
            expected = {"A": ["\"","\\","\n"], "B": ["","X","\\\""], "C": [7,8,9]}
            logging.debug("filename: %s" % fn)
            actual = paratext.load_csv_to_pandas(fn, allow_quoted_newlines=True, out_encoding="utf-8")
            assert_dictframe_almost_equal(actual, expected)
Example #6
0
 def do_basic_empty(self, file_body, num_threads):
     with generate_tempfile(file_body) as fn:
         logging.debug("filename: %s" % fn)
         actual = paratext.load_csv_to_pandas(fn, num_threads=num_threads)
         expected = pandas.DataFrame()
         assert_dictframe_almost_equal(actual, expected)