예제 #1
0
 def test_parse_invalid_xlsx(self):
     result = parse_file(MockPath(["x.xlsx"], b"not an xlsx"), True)
     self.assertEqual(
         result,
         ("Error reading Excel file: Unsupported format, "
          "or corrupt file: Expected BOF record; found b'not an x'"),
     )
예제 #2
0
 def test_parse_fill_gaps_at_start_with_na(self):
     result = parse_file(MockPath(["x.csv"], b"A,B\na\nb,c"), True)
     assert_frame_equal(result,
                        pd.DataFrame({
                            "A": ["a", "b"],
                            "B": [np.nan, "c"]
                        }))
예제 #3
0
 def test_parse_xls(self):
     path = Path(__file__).parent.parent / "test_data" / "example.xls"
     result = parse_file(path, True)
     assert_frame_equal(result, pd.DataFrame({
         "foo": [1, 2],
         "bar": [2, 3]
     }))
예제 #4
0
 def test_parse_csv_allow_empty_str(self):
     result = parse_file(MockPath(["x.csv"], b"A,B\na,\n,b"), True)
     assert_frame_equal(result,
                        pd.DataFrame({
                            "A": ["a", ""],
                            "B": ["", "b"]
                        }))
예제 #5
0
 def test_parse_xlsx(self):
     path = Path(mock_xlsx_path)
     result = parse_file(path, True)
     assert_frame_equal(
         result, pd.DataFrame({
             "Month": ["Jan", "Feb"],
             "Amount": [10, 20]
         }))
예제 #6
0
 def test_parse_has_header_false(self):
     result = parse_file(MockPath(["x.csv"], b"A,B\n1,2"), False)
     assert_frame_equal(
         result,
         pd.DataFrame({
             "Column 1": ["A", "1"],
             "Column 2": ["B", "2"]
         }))
예제 #7
0
 def test_parse_fill_gaps_at_end_with_na(self):
     result = parse_file(MockPath(["x.csv"], b"A,B\na,b\nc,d\ne"), True)
     assert_frame_equal(
         result,
         pd.DataFrame({
             "A": ["a", "c", "e"],
             "B": ["b", "d", np.nan]
         }))
예제 #8
0
 def test_parse_too_many_bytes(self):
     result = parse_file(MockPath(["x.csv"], b"A,B\na,b\nc,d\ne,f"), True)
     self.assertEqual(result["error"],
                      "The input was too large, so we removed 2 rows")
     assert_frame_equal(result["dataframe"],
                        pd.DataFrame({
                            "A": ["a"],
                            "B": ["b"]
                        }))
예제 #9
0
 def test_parse_auto_categorize(self):
     result = parse_file(MockPath(["x.csv"], b"A,B\na,a\na,b\nb,c"), True)
     assert_frame_equal(
         result,
         pd.DataFrame({
             # 'a', 'a', 'b' has repeated strings, so we categorize
             "A": pd.Series(["a", "a", "b"], dtype="category"),
             "B": pd.Series(["a", "b", "c"], dtype=str),
         }),
     )
예제 #10
0
 def test_parse_too_many_columns(self):
     result = parse_file(MockPath(["x.csv"], b"A,B,C,D\na,b,c,d"), True)
     self.assertEqual(
         result["error"],
         ("The input had too many columns, so we removed 2 columns"))
     assert_frame_equal(result["dataframe"],
                        pd.DataFrame({
                            "A": ["a"],
                            "B": ["b"]
                        }))
예제 #11
0
 def test_parse_default_column_headers(self):
     # First row is ['A', '', None]
     result = parse_file(MockPath(["x.csv"], b'A,""\na,b,c'), True)
     assert_frame_equal(
         result,
         pd.DataFrame({
             "A": ["a"],
             "Column 2": ["b"],  # "" => default, 'Column 2'
             "Column 3": ["c"],  # None => default, 'Column 3'
         }),
     )
예제 #12
0
 def test_parse_csv_repair_errors(self):
     # It would be great to report "warnings" on invalid input. But Python's
     # `csv` module won't do that: it forces us to choose between mangling
     # input and raising an exception. Both are awful; mangling input is
     # slightly preferable, so that's what we do.
     result = parse_file(
         # CSV errors:
         #
         # * Data after close-quote: mangle by appending
         # * Unclosed quote: mangle by auto-closing
         MockPath(["x.csv"], b'A,B\n"x" y,"foo\nB'),
         True,
     )
     assert_frame_equal(result, pd.DataFrame({"A": ["x y"], "B": ["foo\nB"]}))
예제 #13
0
 def test_rewrite_conflicting_column_headers(self):
     result = parse_file(
         # Columns 1 and 2 both have name, 'A'
         # Columns 3 and 4 (defaulted) both have name, 'Column 4'
         MockPath(["x.csv"], b"A,A,Column 4,\na,b,c,d"),
         True,
     )
     assert_frame_equal(
         result,
         pd.DataFrame({
             "A": ["a"],
             "A 2": ["b"],  # rewritten
             "Column 4": ["c"],
             "Column 5": ["d"],  # rewritten
         }),
     )
예제 #14
0
 def test_parse_txt_sniff_delimiter(self):
     result = parse_file(MockPath(["x.txt"], b"A;B\na,b;c"), True)
     assert_frame_equal(result, pd.DataFrame({"A": ["a,b"], "B": ["c"]}))
예제 #15
0
 def test_parse_csv_detect_character_set(self):
     # tests that `chardet` is invoked
     csv = "A\nfôo\nbar".encode("windows-1252")
     result = parse_file(MockPath(["x.csv"], csv), True)
     assert_frame_equal(result, pd.DataFrame({"A": ["fôo", "bar"]}))
예제 #16
0
 def test_parse_invalid_mime_type(self):
     result = parse_file(MockPath(["x.bin"], b"A"), True)
     self.assertEqual(
         result, ("Unknown file extension '.bin'. Please upload a different file.")
     )
예제 #17
0
 def test_parse_skip_empty_row(self):
     result = parse_file(MockPath(["x.csv"], b"A\n\na"), True)
     assert_frame_equal(result, pd.DataFrame({"A": ["a"]}))
예제 #18
0
 def test_parse_has_header_true(self):
     result = parse_file(MockPath(["x.csv"], b"A,B\na,b"), True)
     assert_frame_equal(result, pd.DataFrame({"A": ["a"], "B": ["b"]}))
예제 #19
0
 def _parse_json(self, b: bytes):
     return parse_file(MockPath(["x.json"], b), False)
예제 #20
0
 def test_parse_too_many_rows(self):
     result = parse_file(MockPath(["x.csv"], b"A\na\nb\nc"), False)
     self.assertEqual(
         result["error"], "The input was too large, so we removed 2 rows"
     )
     assert_frame_equal(result["dataframe"], pd.DataFrame({"Column 1": ["A", "a"]}))
예제 #21
0
 def test_parse_empty_csv(self):
     result = parse_file(MockPath(["x.csv"], b""), True)
     self.assertEqual(result, "This file is empty")
예제 #22
0
 def test_parse_tsv(self):
     result = parse_file(MockPath(["x.tsv"], b"A\tB\na\tb"), True)
     assert_frame_equal(result, pd.DataFrame({"A": ["a"], "B": ["b"]}))
예제 #23
0
 def test_parse_txt_sniff_delimiter_not_found(self):
     result = parse_file(MockPath(["x.txt"], b"A B\na b c"), True)
     assert_frame_equal(result, pd.DataFrame({"A B": ["a b c"]}))
예제 #24
0
 def test_parse_txt_sniff_delimiter_empty_file(self):
     result = parse_file(MockPath(["x.txt"], b""), False)
     self.assertEqual(result, "This file is empty")
예제 #25
0
 def test_parse_autocast_numbers(self):
     result = parse_file(MockPath(["x.csv"], b"A,B\n1,2.0\n3,4.1"), True)
     assert_frame_equal(result, pd.DataFrame({"A": [1, 3], "B": [2.0, 4.1]}))