def test_convert(self): contents = (b'I, J, K\n' b' , A, \n' b'B, , 1\n' b'?, ., NA') class dialect(csv.excel): skipinitialspace = True opts = owcsvimport.Options(encoding="ascii", dialect=dialect(), columntypes=[ (range(0, 1), ColumnType.Text), (range(1, 2), ColumnType.Categorical), (range(2, 3), ColumnType.Text), ], rowspec=[(range(0, 1), RowSpec.Header)]) df = owcsvimport.load_csv(io.BytesIO(contents), opts) tb = pandas_to_table(df) assert_array_equal(tb.metas[:, 0], ["", "B", "?"]) assert_array_equal(tb.metas[:, 1], ["", "1", "NA"]) assert_array_equal(tb.X[:, 0], [0.0, np.nan, np.nan]) opts = owcsvimport.Options(encoding="ascii", dialect=dialect(), columntypes=[ (range(0, 1), ColumnType.Categorical), (range(1, 2), ColumnType.Categorical), (range(2, 3), ColumnType.Numeric), ], rowspec=[(range(0, 1), RowSpec.Header)]) df = owcsvimport.load_csv(io.BytesIO(contents), opts) tb = pandas_to_table(df) assert_array_equal(tb.X[:, 0], [np.nan, 0, np.nan]) assert_array_equal(tb.X[:, 1], [0, np.nan, np.nan]) assert_array_equal(tb.X[:, 2], [np.nan, 1, np.nan])
def test_load_csv(self): contents = ( b'1/1/1990,1.0,[,one,\n' b'1/1/1990,2.0,],two,\n' b'1/1/1990,3.0,{,three,' ) opts = owcsvimport.Options( encoding="ascii", dialect=csv.excel(), columntypes=[ (range(0, 1), ColumnType.Time), (range(1, 2), ColumnType.Numeric), (range(2, 3), ColumnType.Text), (range(3, 4), ColumnType.Categorical), (range(4, 5), ColumnType.Auto), ], rowspec=[] ) df = owcsvimport.load_csv(io.BytesIO(contents), opts) self.assertEqual(df.shape, (3, 5)) self.assertSequenceEqual( list(df.dtypes), [np.dtype("M8[ns]"), np.dtype(float), np.dtype(object), "category", np.dtype(float)], ) opts = owcsvimport.Options( encoding="ascii", dialect=csv.excel(), columntypes=[ (range(0, 1), ColumnType.Skip), (range(1, 2), ColumnType.Numeric), (range(2, 3), ColumnType.Skip), (range(3, 4), ColumnType.Categorical), (range(4, 5), ColumnType.Skip), ], rowspec=[ (range(1, 2), RowSpec.Skipped) ] ) df = owcsvimport.load_csv(io.BytesIO(contents), opts) self.assertEqual(df.shape, (2, 2)) self.assertSequenceEqual( list(df.dtypes), [np.dtype(float), "category"] ) self.assertSequenceEqual( list(df.iloc[:, 0]), [1.0, 3.0] ) self.assertSequenceEqual( list(df.iloc[:, 1]), ["one", "three"] )
def test_decimal_format(self): class Dialect(csv.excel): delimiter = ";" contents = b'3,21;3,37\n4,13;1.000,142' opts = owcsvimport.Options( encoding="ascii", dialect=Dialect(), decimal_separator=",", group_separator=".", columntypes=[ (range(0, 2), ColumnType.Numeric), ], rowspec=[], ) df = owcsvimport.load_csv(io.BytesIO(contents), opts) assert_array_equal(df.values, np.array([[3.21, 3.37], [4.13, 1000.142]]))