def inspect_labelled_output(p): f = get_raw_csv_filename(p) # open csv gen_in = yield_csv_rows(f) # produce new rows headline_dict, support_dict = load_spec(p) print_rows_with_labels(gen_in, headline_dict, support_dict)
def yield_labelled_rows(p): f = get_raw_csv_filename(p) # open csv gen_in = yield_csv_rows(f) # produce new rows headline_dict, support_dict = load_spec(p) return yield_row_with_labels(gen_in, headline_dict, support_dict)
def check_vars_not_in_labelled_csv(p): """Returns varnames not written to labelled csv file. Prints explaination.""" infile = get_raw_csv_filename(p) headline_dict, support_dict = load_spec(p) gen_in = yield_csv_rows(infile) gen_out = yield_row_with_labels(gen_in, headline_dict, support_dict) z2 = list(v[0] for k, v in headline_dict.items()) print("\nVars in spec:") print(list_as_string(z2)) z1 = list(set(row[0] for row in gen_out)) print("Vars in labelled csv:") print(list_as_string(z1)) not_in_file = [x for x in z2 if x not in z1] if not_in_file: print("Not loaded to labelled csv:") print(list_as_string(not_in_file)) else: print("Variables in spec and in labelled csv file match.\n") return not_in_file
def check_vars_not_in_labelled_csv(p): """Returns varnames not written to labelled csv file. Prints explaination.""" infile = get_raw_csv_filename(p) headline_dict, support_dict = load_spec(p) gen_in = yield_csv_rows(infile) gen_out = yield_row_with_labels(gen_in, headline_dict, support_dict) z2 = list(v[0] for k,v in headline_dict.items()) print ("\nVars in spec:") print(list_as_string(z2)) z1 = list(set(row[0] for row in gen_out)) print ("Vars in labelled csv:") print(list_as_string(z1)) not_in_file = [x for x in z2 if x not in z1] if not_in_file: print ("Not loaded to labelled csv:") print (list_as_string(not_in_file)) else: print ("Variables in spec and in labelled csv file match.\n") return not_in_file
# ______________________________________________________________________________ doc = """1.10. Внешнеторговый оборот – всего1), млрд.долларов США / Foreign trade turnover – total1), bln US dollars 1999 115,1 24,4 27,2 28,4 35,1 7,2 7,9 9,3 9,8 8,0 9,3 9,5 9,3 9,6 10,4 11,1 13,7""" def test_iter(): for row in [x.split("\t") for x in doc.split("\n")]: yield row if __name__ == "__main__": # import os # p = os.path.abspath("../data/1-07/1-07.txt") # gen = yield_labelled_rows(p) # for x in gen: # print(x) # # inspect_labelled_output(p) import os p = os.path.abspath("../data/minitab/minitab.csv") f = get_raw_csv_filename(p) # open csv gen_in = test_iter() # produce new rows headline_dict, support_dict = load_spec(p) print_rows_with_labels(gen_in, headline_dict, support_dict)
if is_label_found_func(text, pat): return label_dict[pat] # False will not cause change in labels return False #______________________________________________________________________________ doc = """1.10. Внешнеторговый оборот – всего1), млрд.долларов США / Foreign trade turnover – total1), bln US dollars 1999 115,1 24,4 27,2 28,4 35,1 7,2 7,9 9,3 9,8 8,0 9,3 9,5 9,3 9,6 10,4 11,1 13,7""" def test_iter(): for row in [x.split("\t") for x in doc.split("\n")]: yield row if __name__ == "__main__": # import os # p = os.path.abspath("../data/1-07/1-07.txt") # gen = yield_labelled_rows(p) # for x in gen: # print(x) # # inspect_labelled_output(p) import os p = os.path.abspath("../data/minitab/minitab.csv") f = get_raw_csv_filename(p) # open csv gen_in = test_iter() # produce new rows headline_dict, support_dict = load_spec(p) print_rows_with_labels(gen_in, headline_dict, support_dict)