Exemple #1
0
 def test_pipe_separated_file(self):
     cfg.column_separator = "|"
     with f.open_file("../resources/201812-citibike-tripdata.psv") as file:
         reader = f.get_csv_reader(file)
         content = [f.read_header(reader)]
         for line in reader:
             content.append(line)
         self.assertEqual(len(content), 11)
Exemple #2
0
 def test_tab_separated_file(self):
     print("test_tab_separated_file")
     cfg.column_separator = "\t"
     with f.open_file("../resources/201812-citibike-tripdata.tsv") as file:
         reader = f.get_csv_reader(file)
         content = [f.read_header(reader)]
         for line in reader:
             content.append(line)
         self.assertEqual(11, len(content))
Exemple #3
0
 def test_read_header(self):
     with f.open_file("../resources/201811-citibike-tripdata.csv.gz") as file:
         reader = f.get_csv_reader(file)
         expected = ["BIKEID", "BIRTH_YEAR", "END_STATION_ID", "END_STATION_LATITUDE",
                     "END_STATION_LONGITUDE", "END_STATION_NAME", "GENDER", "STARTTIME",
                     "START_STATION_ID", "START_STATION_LATITUDE", "START_STATION_LONGITUDE",
                     "START_STATION_NAME", "STOPTIME", "TRIPDURATION", "USERTYPE"]
         expected.sort()
         actual = f.read_header(reader)
         actual.sort()
         self.assertListEqual(actual, expected)
Exemple #4
0
 def test_read_header(self):
     with f.open_file(
             "../resources/201811-citibike-tripdata.csv.gz") as file:
         self.assertEqual(
             f.read_header(file), {
                 "BIKEID", "BIRTH_YEAR", "END_STATION_ID",
                 "END_STATION_LATITUDE", "END_STATION_LONGITUDE",
                 "END_STATION_NAME", "GENDER", "STARTTIME",
                 "START_STATION_ID", "START_STATION_LATITUDE",
                 "START_STATION_LONGITUDE", "START_STATION_NAME",
                 "STOPTIME", "TRIPDURATION", "USERTYPE"
             })
Exemple #5
0
def read_and_load_file(file):
    """Reads and loads file.

    Parameters
    ----------
    file : file_object
        The file to load
    """
    reader = f.get_csv_reader(file)
    col_map = f.read_header(reader)
    f.debug("Column map: {0}".format(col_map))
    for line in reader:
        load_data(col_map, tuple(line))
    load_data(col_map, None)
Exemple #6
0
def generate_table_sql(file_names, column_data_type):
    """Generates SQL for the table to load data.

    Parameters
    ----------
    file_names : str
        The file_names to scan for columns
    column_data_type : str
        The column data type to use
    """
    col_set = set()
    for file_name in file_names:
        file = f.open_file(file_name)
        columns_to_add = f.read_header(file)
        col_set = add_to_col_set(col_set, columns_to_add)
        file.close()
    print_table_and_col_set(col_set, column_data_type)
Exemple #7
0
def generate_table_sql(file_names, column_data_type):
    """Generates SQL for the table to load data.

    Parameters
    ----------
    file_names : str
        The file_names to scan for columns
    column_data_type : str
        The column data type to use
    """
    col_list = []
    for file_name in file_names:
        f.debug("Reading file {0}".format(file_name))
        with f.open_file(file_name) as file:
            reader = f.get_csv_reader(file)
            columns_to_add = f.read_header(reader)
            f.debug("Columns to add {0}".format(columns_to_add))
            # Add columns to list implicitly removing duplicates for when going over multiple files
            col_list.extend(col for col in columns_to_add
                            if col not in col_list)
    print_table_and_columns(col_list, column_data_type)