def get_spec_and_cfg_varnames(data_folder): csv, spec, cfg = get_filenames(data_folder) segments = load_cfg(cfg) header_dict, unit_dict = load_spec(spec) hdr = unpack_header_dict(header_dict) seg = unpack_segments(segments) return hdr, seg
def get_complete_dicts(data_folder): csv, spec, cfg = get_filenames(data_folder) segments = load_cfg(cfg) header_dict, unit_dict = load_spec(spec) for seg in segments: seg_header_dict = seg[2][0] seg_unit_dict = seg[2][1] header_dict.update(seg_header_dict) unit_dict.update(seg_unit_dict) return header_dict, unit_dict
def import_csv(data_folder, kill_existing_data=True): csv, spec, cfg = get_filenames(data_folder) if kill_existing_data: wipe_db_tables() to_database(csv, spec, cfg)
def import_csv(data_folder, kill_existing_data = True): csv, spec, cfg = get_filenames(data_folder) if kill_existing_data: wipe_db_tables() to_database(csv, spec, cfg)
from kep.inspection.var_check import count_entries, unique, get_complete_dicts from kep.file_io.common import get_filenames, write_file from kep.importer.parser.label_csv import get_nondata_rows data_folder = "data/2015/ind10" csv, spec, cfg = get_filenames(data_folder) rows = get_nondata_rows(csv) fst_elements = [r[0] for r in rows] def is_almost_in(x, lst): flag = 0 for y in lst: if x[0:10] in y: flag += 1 return flag #rows_with_count = [[r, count_entries(r[0], fst_elements)] for r in rows] def is_almost_in(x, lst): flag = 0 for y in lst: if x[0:10] in y: flag += 1 return flag rows_with_count = [[r, is_almost_in(r[0], fst_elements)] for r in rows] interest = unique( [x[0][0] for x in rows_with_count if x[1] > 1 and len(x[0][0]) > 1]) # error on printing to screen - Unicode
def test_lab_rows(): csv, spec, cfg = get_filenames(CURRENT_MONTH_DATA_FOLDER) lab_rows = get_labelled_rows(csv, spec, cfg) assert row_exists('PROD_AUTO_TRUCKS_AND_CHASSIS', lab_rows)
def test_lab_rows(self): csv, spec, cfg = get_filenames(DATA_FOLDER) lab_rows = get_labelled_rows(csv, spec, cfg) self.assertTrue( TestLabRows.row_exists('PROD_AUTO_TRUCKS_AND_CHASSIS', lab_rows))
from kep.inspection.var_check import count_entries, unique, get_complete_dicts from kep.file_io.common import get_filenames, write_file from kep.importer.parser.label_csv import get_nondata_rows data_folder = "data/2015/ind10" csv, spec, cfg = get_filenames(data_folder) rows = get_nondata_rows(csv) fst_elements = [r[0] for r in rows] def is_almost_in(x, lst): flag = 0 for y in lst: if x[0:10] in y: flag += 1 return flag #rows_with_count = [[r, count_entries(r[0], fst_elements)] for r in rows] def is_almost_in(x, lst): flag = 0 for y in lst: if x[0:10] in y: flag += 1 return flag rows_with_count = [[r, is_almost_in(r[0], fst_elements)] for r in rows] interest = unique([x[0][0] for x in rows_with_count if x[1] > 1 and len(x[0][0]) > 1]) # error on printing to screen - Unicode write_file("\n".join(interest),"i.txt") #todo: failures are where too many entries (>1) of spec/cfg dictionaries are found in fst_elements #get big specification #see how many times header_dict keys hit fst_elements
def test_lab_rows(self): csv, spec, cfg = get_filenames(DATA_FOLDER) lab_rows = get_labelled_rows(csv, spec, cfg) self.assertTrue(TestLabRows.row_exists('PROD_AUTO_TRUCKS_AND_CHASSIS', lab_rows))
def test_lab_rows(): csv, spec, cfg = get_filenames(CURRENT_MONTH_DATA_FOLDER) lab_rows = get_labelled_rows(csv, spec, cfg) assert row_exists("PROD_AUTO_TRUCKS_AND_CHASSIS", lab_rows)