コード例 #1
0
 def test_parse_autocast_numbers(self):
     result = upload.parse_file(MockPath(['x.csv'], b'A,B\n1,2.0\n3,4.1'),
                                True)
     assert_frame_equal(result, pd.DataFrame({
         'A': [1, 3],
         'B': [2.0, 4.1]
     }))
コード例 #2
0
 def test_parse_csv_allow_empty_str(self):
     result = parse_file(MockPath(["x.csv"], b"A,B\na,\n,b"), True)
     assert_frame_equal(result,
                        pd.DataFrame({
                            "A": ["a", ""],
                            "B": ["", "b"]
                        }))
コード例 #3
0
 def test_parse_fill_gaps_at_start_with_na(self):
     result = upload.parse_file(MockPath(['x.csv'], b'A,B\na\nb,c'), True)
     assert_frame_equal(
         result, pd.DataFrame({
             'A': ['a', 'b'],
             'B': [np.nan, 'c'],
         }))
コード例 #4
0
 def test_parse_invalid_xlsx(self):
     result = parse_file(MockPath(["x.xlsx"], b"not an xlsx"), True)
     self.assertEqual(
         result,
         ("Error reading Excel file: Unsupported format, "
          "or corrupt file: Expected BOF record; found b'not an x'"),
     )
コード例 #5
0
 def test_parse_csv_allow_empty_str(self):
     result = upload.parse_file(MockPath(['x.csv'], b'A,B\na,\n,b'), True)
     assert_frame_equal(result,
                        pd.DataFrame({
                            'A': ['a', ''],
                            'B': ['', 'b'],
                        }))
コード例 #6
0
 def test_parse_fill_gaps_at_start_with_na(self):
     result = parse_file(MockPath(["x.csv"], b"A,B\na\nb,c"), True)
     assert_frame_equal(result,
                        pd.DataFrame({
                            "A": ["a", "b"],
                            "B": [np.nan, "c"]
                        }))
コード例 #7
0
 def test_parse_has_header_false(self):
     result = upload.parse_file(MockPath(['x.csv'], b'A,B\n1,2'), False)
     assert_frame_equal(
         result,
         pd.DataFrame({
             'Column 1': ['A', '1'],
             'Column 2': ['B', '2'],
         }))
コード例 #8
0
 def test_parse_fill_gaps_at_end_with_na(self):
     result = parse_file(MockPath(["x.csv"], b"A,B\na,b\nc,d\ne"), True)
     assert_frame_equal(
         result,
         pd.DataFrame({
             "A": ["a", "c", "e"],
             "B": ["b", "d", np.nan]
         }))
コード例 #9
0
 def test_parse_has_header_false(self):
     result = parse_file(MockPath(["x.csv"], b"A,B\n1,2"), False)
     assert_frame_equal(
         result,
         pd.DataFrame({
             "Column 1": ["A", "1"],
             "Column 2": ["B", "2"]
         }))
コード例 #10
0
 def test_parse_fill_gaps_at_end_with_na(self):
     result = upload.parse_file(MockPath(['x.csv'], b'A,B\na,b\nc,d\ne'),
                                True)
     assert_frame_equal(
         result,
         pd.DataFrame({
             'A': ['a', 'c', 'e'],
             'B': ['b', 'd', np.nan],
         }))
コード例 #11
0
 def test_parse_too_many_bytes(self):
     result = parse_file(MockPath(["x.csv"], b"A,B\na,b\nc,d\ne,f"), True)
     self.assertEqual(result["error"],
                      "The input was too large, so we removed 2 rows")
     assert_frame_equal(result["dataframe"],
                        pd.DataFrame({
                            "A": ["a"],
                            "B": ["b"]
                        }))
コード例 #12
0
 def test_filename_in_traceback(self):
     path = MockPath(["root", "badname.py"], b"def intify(x):\n    return int(x)")
     module = load_python_module("goodname", path)
     try:
         module.intify("not-a-number")
     except ValueError:
         s = traceback.format_exc()
         self.assertRegex(
             s, 'File "<Module goodname>", line 2, in intify\nValueError'
         )
コード例 #13
0
 def test_parse_auto_categorize(self):
     result = upload.parse_file(MockPath(['x.csv'], b'A,B\na,a\na,b\nb,c'),
                                True)
     assert_frame_equal(
         result,
         pd.DataFrame({
             # 'a', 'a', 'b' has repeated strings, so we categorize
             'A': pd.Series(['a', 'a', 'b'], dtype='category'),
             'B': pd.Series(['a', 'b', 'c'], dtype=str)
         }))
コード例 #14
0
 def test_parse_too_many_columns(self):
     result = parse_file(MockPath(["x.csv"], b"A,B,C,D\na,b,c,d"), True)
     self.assertEqual(
         result["error"],
         ("The input had too many columns, so we removed 2 columns"))
     assert_frame_equal(result["dataframe"],
                        pd.DataFrame({
                            "A": ["a"],
                            "B": ["b"]
                        }))
コード例 #15
0
 def test_filename_in_traceback(self):
     path = MockPath(['root', 'badname.py'],
                     b'def intify(x):\n    return int(x)')
     module = load_python_module('goodname', path)
     try:
         module.intify('not-a-number')
     except ValueError:
         s = traceback.format_exc()
         self.assertRegex(
             s, 'File "<Module goodname>", line 2, in intify\nValueError')
コード例 #16
0
 def test_parse_auto_categorize(self):
     result = parse_file(MockPath(["x.csv"], b"A,B\na,a\na,b\nb,c"), True)
     assert_frame_equal(
         result,
         pd.DataFrame({
             # 'a', 'a', 'b' has repeated strings, so we categorize
             "A": pd.Series(["a", "a", "b"], dtype="category"),
             "B": pd.Series(["a", "b", "c"], dtype=str),
         }),
     )
コード例 #17
0
 def test_parse_default_column_headers(self):
     # First row is ['A', '', None]
     result = upload.parse_file(MockPath(['x.csv'], b'A,""\na,b,c'), True)
     assert_frame_equal(
         result,
         pd.DataFrame({
             'A': ['a'],
             'Column 2': ['b'],  # "" => default, 'Column 2'
             'Column 3': ['c'],  # None => default, 'Column 3'
         }))
コード例 #18
0
 def test_parse_too_many_bytes(self):
     result = upload.parse_file(MockPath(['x.csv'], b'A,B\na,b\nc,d\ne,f'),
                                True)
     self.assertEqual(result['error'],
                      'The input was too large, so we removed 2 rows')
     assert_frame_equal(result['dataframe'],
                        pd.DataFrame({
                            'A': ['a'],
                            'B': ['b']
                        }))
コード例 #19
0
 def test_parse_default_column_headers(self):
     # First row is ['A', '', None]
     result = parse_file(MockPath(["x.csv"], b'A,""\na,b,c'), True)
     assert_frame_equal(
         result,
         pd.DataFrame({
             "A": ["a"],
             "Column 2": ["b"],  # "" => default, 'Column 2'
             "Column 3": ["c"],  # None => default, 'Column 3'
         }),
     )
コード例 #20
0
 def test_parse_too_many_columns(self):
     result = upload.parse_file(MockPath(['x.csv'], b'A,B,C,D\na,b,c,d'),
                                True)
     self.assertEqual(
         result['error'],
         ('The input had too many columns, so we removed 2 columns'))
     assert_frame_equal(result['dataframe'],
                        pd.DataFrame({
                            'A': ['a'],
                            'B': ['b']
                        }))
コード例 #21
0
 def test_rewrite_conflicting_column_headers(self):
     result = upload.parse_file(
         # Columns 1 and 2 both have name, 'A'
         # Columns 3 and 4 (defaulted) both have name, 'Column 4'
         MockPath(['x.csv'], b'A,A,Column 4,\na,b,c,d'),
         True)
     assert_frame_equal(
         result,
         pd.DataFrame({
             'A': ['a'],
             'A 2': ['b'],  # rewritten
             'Column 4': ['c'],
             'Column 5': ['d'],  # rewritten
         }))
コード例 #22
0
 def test_parse_csv_repair_errors(self):
     # It would be great to report "warnings" on invalid input. But Python's
     # `csv` module won't do that: it forces us to choose between mangling
     # input and raising an exception. Both are awful; mangling input is
     # slightly preferable, so that's what we do.
     result = parse_file(
         # CSV errors:
         #
         # * Data after close-quote: mangle by appending
         # * Unclosed quote: mangle by auto-closing
         MockPath(["x.csv"], b'A,B\n"x" y,"foo\nB'),
         True,
     )
     assert_frame_equal(result, pd.DataFrame({"A": ["x y"], "B": ["foo\nB"]}))
コード例 #23
0
 def test_rewrite_conflicting_column_headers(self):
     result = parse_file(
         # Columns 1 and 2 both have name, 'A'
         # Columns 3 and 4 (defaulted) both have name, 'Column 4'
         MockPath(["x.csv"], b"A,A,Column 4,\na,b,c,d"),
         True,
     )
     assert_frame_equal(
         result,
         pd.DataFrame({
             "A": ["a"],
             "A 2": ["b"],  # rewritten
             "Column 4": ["c"],
             "Column 5": ["d"],  # rewritten
         }),
     )
コード例 #24
0
 def test_parse_invalid_xlsx(self):
     result = upload.parse_file(MockPath(['x.xlsx'], b'not an xlsx'), True)
     self.assertEqual(
         result,
         ('Error reading Excel file: Unsupported format, '
          "or corrupt file: Expected BOF record; found b'not an x'"))
コード例 #25
0
 def test_parse_txt_sniff_delimiter(self):
     result = upload.parse_file(MockPath(['x.txt'], b'A;B\na,b;c'), True)
     assert_frame_equal(result, pd.DataFrame({'A': ['a,b'], 'B': ['c']}))
コード例 #26
0
 def test_parse_csv_detect_character_set(self):
     # tests that `chardet` is invoked
     csv = 'A\nfôo\nbar'.encode('windows-1252')
     result = upload.parse_file(MockPath(['x.csv'], csv), True)
     assert_frame_equal(result, pd.DataFrame({'A': ['fôo', 'bar']}))
コード例 #27
0
 def test_parse_skip_empty_row(self):
     result = upload.parse_file(MockPath(['x.csv'], b'A\n\na'), True)
     assert_frame_equal(result, pd.DataFrame({'A': ['a']}))
コード例 #28
0
 def test_parse_invalid_mime_type(self):
     result = upload.parse_file(MockPath(['x.bin'], b'A'), True)
     self.assertEqual(
         result,
         ("Unknown file extension '.bin'. Please upload a different file."))
コード例 #29
0
 def _load(self, filename, data):
     path = MockPath(["root", filename], data)
     return ModuleSpec.load_from_path(path)
コード例 #30
0
 def test_parse_too_many_rows(self):
     result = upload.parse_file(MockPath(['x.csv'], b'A\na\nb\nc'), False)
     self.assertEqual(result['error'],
                      'The input was too large, so we removed 2 rows')
     assert_frame_equal(result['dataframe'],
                        pd.DataFrame({'Column 1': ['A', 'a']}))