Exemple #1
0
def read_and_load_file(file):
    """Reads and loads file.

    Parameters
    ----------
    file : file_object
        The file to load
    """
    reader = f.get_csv_reader(file)
    col_map = f.read_header(reader)
    f.debug("Column map: {0}".format(col_map))
    for line in reader:
        load_data(col_map, tuple(line))
    load_data(col_map, None)
Exemple #2
0
 def test_read_header(self):
     print("test_read_header")
     with f.open_file(
             "../resources/201811-citibike-tripdata.csv.gz") as file:
         reader = f.get_csv_reader(file)
         expected = [
             "BIKEID", "BIRTH_YEAR", "END_STATION_ID",
             "END_STATION_LATITUDE", "END_STATION_LONGITUDE",
             "END_STATION_NAME", "GENDER", "STARTTIME", "START_STATION_ID",
             "START_STATION_LATITUDE", "START_STATION_LONGITUDE",
             "START_STATION_NAME", "STOPTIME", "TRIPDURATION", "USERTYPE"
         ]
         expected.sort()
         actual = f.read_header(reader)
         actual.sort()
         self.assertListEqual(expected, actual)
Exemple #3
0
def generate_table_sql(file_names, column_data_type):
    """Generates SQL for the table to load data.

    Parameters
    ----------
    file_names : str
        The file_names to scan for columns
    column_data_type : str
        The column data type to use
    """
    col_set = set()
    for file_name in file_names:
        file = f.open_file(file_name)
        reader = f.get_csv_reader(file)
        columns_to_add = f.read_header(reader)
        col_set = add_to_col_set(col_set, columns_to_add)
    print_table_and_col_set(col_set, column_data_type)
Exemple #4
0
def generate_table_sql(file_names, column_data_type):
    """Generates SQL for the table to load data.

    Parameters
    ----------
    file_names : str
        The file_names to scan for columns
    column_data_type : str
        The column data type to use
    """
    col_list = []
    for file_name in file_names:
        f.debug("Reading file {0}".format(file_name))
        with f.open_file(file_name) as file:
            reader = f.get_csv_reader(file)
            columns_to_add = f.read_header(reader)
            f.debug("Columns to add {0}".format(columns_to_add))
            # Add columns to list implicitly removing duplicates for when going over multiple files
            col_list.extend(col for col in columns_to_add
                            if col not in col_list)
    print_table_and_columns(col_list, column_data_type)