def inspect_labelled_output(p): f = get_raw_csv_filename(p) # open csv gen_in = yield_csv_rows(f) # produce new rows headline_dict, support_dict = load_spec(p) print_rows_with_labels(gen_in, headline_dict, support_dict)
def yield_labelled_rows(p): f = get_raw_csv_filename(p) # open csv gen_in = yield_csv_rows(f) # produce new rows headline_dict, support_dict = load_spec(p) return yield_row_with_labels(gen_in, headline_dict, support_dict)
def make_headers(p): """Makes a list of docfile table headers and footers in txt file. Used to review file contents and manually make label dictionaries.""" f = get_headers_filename(p) with open(f, "w") as file: for row in yield_csv_rows(p): if not is_year(row[0]) and len(row[0]) > 0: file.write(row[0] + "\n") return f
def yield_labelled_row(p): """Emit varname-labeled rows as tuple of components.""" f = get_labelled_csv_filename(p) for row in yield_csv_rows(f): var_label = row[0] if var_label != "unknown_var": var_name = row[0] + "_" + row[1] mod_row = [filter_value(x) for x in row[2:]] reader = get_reader_func_by_row_length(row[2:]) y, annual_value, qtr_values, monthly_values = reader(mod_row) y = int(y) yield var_name, y, annual_value, qtr_values, monthly_values
def check_vars_not_in_labelled_csv(p): """Returns varnames not written to labelled csv file. Prints explaination.""" infile = get_raw_csv_filename(p) headline_dict, support_dict = load_spec(p) gen_in = yield_csv_rows(infile) gen_out = yield_row_with_labels(gen_in, headline_dict, support_dict) z2 = list(v[0] for k, v in headline_dict.items()) print("\nVars in spec:") print(list_as_string(z2)) z1 = list(set(row[0] for row in gen_out)) print("Vars in labelled csv:") print(list_as_string(z1)) not_in_file = [x for x in z2 if x not in z1] if not_in_file: print("Not loaded to labelled csv:") print(list_as_string(not_in_file)) else: print("Variables in spec and in labelled csv file match.\n") return not_in_file
def check_vars_not_in_labelled_csv(p): """Returns varnames not written to labelled csv file. Prints explaination.""" infile = get_raw_csv_filename(p) headline_dict, support_dict = load_spec(p) gen_in = yield_csv_rows(infile) gen_out = yield_row_with_labels(gen_in, headline_dict, support_dict) z2 = list(v[0] for k,v in headline_dict.items()) print ("\nVars in spec:") print(list_as_string(z2)) z1 = list(set(row[0] for row in gen_out)) print ("Vars in labelled csv:") print(list_as_string(z1)) not_in_file = [x for x in z2 if x not in z1] if not_in_file: print ("Not loaded to labelled csv:") print (list_as_string(not_in_file)) else: print ("Variables in spec and in labelled csv file match.\n") return not_in_file
def get_labelled_rows_by_segment(raw_data_file, yaml_spec_file, yaml_cfg_file): raw_rows = list(yield_csv_rows(raw_data_file)) default_dicts = load_spec(yaml_spec_file) segment_specs = _get_segment_specs_no_header_doc(yaml_cfg_file) return _label_raw_rows_by_segment(raw_rows, default_dicts, segment_specs)
def get_labelled_rows_no_segments(raw_data_file, yaml_spec_file): raw_rows_iter = yield_csv_rows(raw_data_file) spec_dicts = load_spec(yaml_spec_file) labelled_rows_iter = yield_valid_rows_with_labels(raw_rows_iter, spec_dicts) return list(labelled_rows_iter)