def all_values(): # emit all values for debugging to_float() csv_path = cfg.get_path_csv() for t in get_all_tables(csv_path): for row in t.datarows: for x in row: yield x
def approve_csv(year, month, valid_datapoints=VALID_DATAPOINTS_SAMPLE): csv_path = cfg.get_path_csv(year, month) print("File:", csv_path) tables = get_all_valid_tables(csv_path) dps = Datapoints(tables) for x in valid_datapoints: if not dps.is_included(x): msg1 = "Not found in dataset: {}".format(x) msg2 = "Date: {}, {}".format(year, month) msg3 = "File: {}".format(csv_path) raise ValueError("\n".join([msg1 + msg2 + msg3])) print("Test values parsed OK.") Frame(dps) print("Dataframes created OK.")
def __for_testing__(): """Holder of boilerplate code for __main__""" # approve_latest() # approve_all() # save_all_dfs() # interim to processed data cycle: (year, month) -> 3 dataframes year, month = 2017, 4 # source csv file csv_path = cfg.get_path_csv(year, month) # break csv to tables with variable names tables = get_all_valid_tables(csv_path) # emit values from tables dpoints = Datapoints(tables) # convert stream values to pandas dataframes frame = Frame(datapoints=dpoints) # save dataframes to csv files processed_folder = cfg.get_processed_folder(year, month) frame.save(processed_folder)
def test_csv_has_no_null_byte(): csv_path = cfg.get_path_csv(2015, 2) z = csv_path.read_text(encoding=parse.ENC) assert "\0" not in z
# -*- coding: utf-8 -*- import parse import cfg import pytest # TESTING END TO END csv_path = cfg.get_path_csv(2017, 4) # break csv to tables with variable names tables = parse.get_all_valid_tables(csv_path) # emit values from tables dpoints = parse.Datapoints(tables) # convert stream values to pandas dataframes frame = parse.Frame(datapoints=dpoints) def test_Datapoints_is_included_annual_1999_values_in_2017_4(): test_datapoints = [{ 'freq': 'm', 'label': 'EXPORT_GOODS_TOTAL__bln_usd', 'month': 1, 'value': 4.5, 'year': 1999 }, { 'freq': 'a', 'label': 'EXPORT_GOODS_TOTAL__bln_usd', 'value': 75.6, 'year': 1999 }, { 'freq': 'a', 'label': 'EXPORT_GOODS_TOTAL__yoy', 'value': 101.5,
def all_heads(): # emit all heads for debugging get_year() csv_path = cfg.get_path_csv() csv_dicts = read_csv(csv_path) for d in csv_dicts: yield d['head']
def get_frame(year=None, month=None): csv_path = cfg.get_path_csv(year, month) tables = get_all_valid_tables(csv_path) dpoints = Datapoints(tables) return Frame(dpoints)