def read_and_load_file(file): """Reads and loads file. Parameters ---------- file : file_object The file to load """ reader = f.get_csv_reader(file) col_map = f.read_header(reader) f.debug("Column map: {0}".format(col_map)) for line in reader: load_data(col_map, tuple(line)) load_data(col_map, None)
def test_read_header(self): print("test_read_header") with f.open_file( "../resources/201811-citibike-tripdata.csv.gz") as file: reader = f.get_csv_reader(file) expected = [ "BIKEID", "BIRTH_YEAR", "END_STATION_ID", "END_STATION_LATITUDE", "END_STATION_LONGITUDE", "END_STATION_NAME", "GENDER", "STARTTIME", "START_STATION_ID", "START_STATION_LATITUDE", "START_STATION_LONGITUDE", "START_STATION_NAME", "STOPTIME", "TRIPDURATION", "USERTYPE" ] expected.sort() actual = f.read_header(reader) actual.sort() self.assertListEqual(expected, actual)
def generate_table_sql(file_names, column_data_type): """Generates SQL for the table to load data. Parameters ---------- file_names : str The file_names to scan for columns column_data_type : str The column data type to use """ col_set = set() for file_name in file_names: file = f.open_file(file_name) reader = f.get_csv_reader(file) columns_to_add = f.read_header(reader) col_set = add_to_col_set(col_set, columns_to_add) print_table_and_col_set(col_set, column_data_type)
def generate_table_sql(file_names, column_data_type): """Generates SQL for the table to load data. Parameters ---------- file_names : str The file_names to scan for columns column_data_type : str The column data type to use """ col_list = [] for file_name in file_names: f.debug("Reading file {0}".format(file_name)) with f.open_file(file_name) as file: reader = f.get_csv_reader(file) columns_to_add = f.read_header(reader) f.debug("Columns to add {0}".format(columns_to_add)) # Add columns to list implicitly removing duplicates for when going over multiple files col_list.extend(col for col in columns_to_add if col not in col_list) print_table_and_columns(col_list, column_data_type)