def get_tables(year, month): from config import InterimCSV import csv2df.specification as spec import csv2df.reader as reader parsed_tables = [] csv_path = InterimCSV(year, month).path with reader.open_csv(csv_path) as csvfile: for csv_segment, pdef in reader.Reader(csvfile, spec.SPEC).items(): tables = extract_tables(csv_segment, pdef) parsed_tables.extend(tables) return parsed_tables
Yield CSV segments as Row() instances and corresponding parsing definitons based on *spec* parsing specification. Args: spec: parsing specification as spec.Specification() instance Yields: Yield CSV segments (list of Row() instances) and parsing definiton pairs as tuples """ for pdef in pdef_segments: start, end = pdef.get_bounds(self.rows) csv_segment = self.pop(start, end) yield csv_segment, pdef yield self.remaining_rows(), pdef_default if __name__ == "__main__": from config import InterimCSV, LATEST_DATE import csv2df.specification as spec # print all rows from csvpath year, month = LATEST_DATE csv_path = InterimCSV(year, month).path csvfile = open_csv(csv_path) reader = Reader(csvfile, spec=spec.SPEC) for csv_segment, pdef in reader.items(): for row in csv_segment: print(row) csvfile.close()
def __init__(self, year, month): self.year, self.month = year, month csv_interim = InterimCSV(year, month) self.dfs = get_dataframes(csv_interim.path)
def test_get_path_property_method_returns_tab_csv(self): interim_csv = InterimCSV(2015, 5).path expected_name = 'tab.csv' assert interim_csv.name == expected_name
def test_get_path_property_method_returns_existing_file(self): interim_csv = InterimCSV(2015, 5).path assert interim_csv.exists()
def test_csv_has_no_null_byte(): csv_path = InterimCSV(2015, 2).path z = csv_path.read_text(encoding='utf-8') assert "\0" not in z