def test_columns_from_csv_source(self): # verify that an exception is raised if we don't override Parquet-invalid column names with self.assertRaises(csv2parquet.InvalidColumnNames): csv_src = csv2parquet.CsvSource(TEST_CSV_MAP) # now try again, with a mapping name_map = { 'Adj. Open': 'Adj Open', 'Adj. High': 'Adj High', 'Adj. Low': 'Adj Low', 'Adj. Close': 'Adj Close', 'Adj. Volume': 'Adj Volume', } csv_src = csv2parquet.CsvSource(TEST_CSV_MAP, name_map) expected_columns = [ Column('Date', 'Date', None), Column('Open', 'Open', None), Column('High', 'High', None), Column('Low', 'Low', None), Column('Close', 'Close', None), Column('Volume', 'Volume', None), Column('Ex-Dividend', 'Ex-Dividend', None), Column('Split Ratio', 'Split Ratio', None), Column('Adj. Open', 'Adj Open', None), Column('Adj. High', 'Adj High', None), Column('Adj. Low', 'Adj Low', None), Column('Adj. Close', 'Adj Close', None), Column('Adj. Volume', 'Adj Volume', None), ] self.assertEqual(expected_columns, csv_src.columns.items)
def test_headers_simple(self): csv_src = csv2parquet.CsvSource(TEST_CSV) expected_headers = [ 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'ExDividend', 'SplitRatio', 'AdjOpen', 'AdjHigh', 'AdjLow', 'AdjClose', 'AdjVolume', ] self.assertEqual(expected_headers, csv_src.headers) # CSV and Parquet column names should be the same. expected_columns = [ Column(header, header, None) for header in expected_headers ] self.assertEqual(expected_columns, csv_src.columns.items)
def test_real_path_to_prevent_drill_script_errors(self): # Specifying a CSV file path of something like "../something.csv" will confuse Drill. # Prevent this by expanding the path. csv_src = csv2parquet.CsvSource('./test-simple.csv') self.assertEqual(csv_src.path, os.path.realpath(TEST_CSV))