def inspect_labelled_output(p):
    f = get_raw_csv_filename(p)
    # open csv
    gen_in = yield_csv_rows(f)
    # produce new rows
    headline_dict, support_dict = load_spec(p)
    print_rows_with_labels(gen_in, headline_dict, support_dict)
def yield_labelled_rows(p):
    f = get_raw_csv_filename(p)
    # open csv
    gen_in = yield_csv_rows(f)
    # produce new rows
    headline_dict, support_dict = load_spec(p)
    return yield_row_with_labels(gen_in, headline_dict, support_dict)
Esempio n. 3
0
def inspect_labelled_output(p):
    f = get_raw_csv_filename(p)
    # open csv
    gen_in = yield_csv_rows(f)
    # produce new rows
    headline_dict, support_dict = load_spec(p)    
    print_rows_with_labels(gen_in, headline_dict, support_dict)
Esempio n. 4
0
def yield_labelled_rows(p):
    f = get_raw_csv_filename(p)
    # open csv
    gen_in = yield_csv_rows(f)
    # produce new rows
    headline_dict, support_dict = load_spec(p)    
    return yield_row_with_labels(gen_in, headline_dict, support_dict)
Esempio n. 5
0
def dump_doc_files_to_csv(file_list, csv=None):
    """Write tables from .doc in *file_list* into *csv* file. """
    if csv is None:
        csv = get_raw_csv_filename(file_list[0])
    folder_iter = yield_rows_from_many_files(file_list)
    dump_iter_to_csv(folder_iter, csv)
    return csv
Esempio n. 6
0
def dump_doc_files_to_csv(file_list, csv=None):
    """Write tables from .doc in *file_list* into *csv* file. """
    if csv is None:
        csv = get_raw_csv_filename(file_list[0])
    folder_iter = yield_rows_from_many_files(file_list)
    dump_iter_to_csv(folder_iter, csv)
    return csv
def check_vars_not_in_labelled_csv(p):
    """Returns varnames not written to labelled csv file. Prints explaination."""
    infile = get_raw_csv_filename(p)
    headline_dict, support_dict = load_spec(p)
    gen_in = yield_csv_rows(infile)
    gen_out = yield_row_with_labels(gen_in, headline_dict, support_dict)

    z2 = list(v[0] for k, v in headline_dict.items())
    print("\nVars in spec:")
    print(list_as_string(z2))

    z1 = list(set(row[0] for row in gen_out))
    print("Vars in labelled csv:")
    print(list_as_string(z1))

    not_in_file = [x for x in z2 if x not in z1]

    if not_in_file:
        print("Not loaded to labelled csv:")
        print(list_as_string(not_in_file))
    else:
        print("Variables in spec and in labelled csv file match.\n")

    return not_in_file
Esempio n. 8
0
def check_vars_not_in_labelled_csv(p):
    """Returns varnames not written to labelled csv file. Prints explaination."""     
    infile = get_raw_csv_filename(p)
    headline_dict, support_dict = load_spec(p)    
    gen_in = yield_csv_rows(infile)
    gen_out = yield_row_with_labels(gen_in, headline_dict, support_dict)

    z2 = list(v[0] for k,v in headline_dict.items())
    print ("\nVars in spec:")
    print(list_as_string(z2))
    
    z1 = list(set(row[0] for row in gen_out))
    print ("Vars in labelled csv:")
    print(list_as_string(z1))
     
    not_in_file = [x for x in z2 if x not in z1] 
    
    if not_in_file:
        print ("Not loaded to labelled csv:")
        print (list_as_string(not_in_file))
    else:
        print ("Variables in spec and in labelled csv file match.\n")
        
    return not_in_file 

# ______________________________________________________________________________

doc = """1.10. Внешнеторговый оборот – всего1),  млрд.долларов США / Foreign trade turnover – total1),  bln US dollars																	
1999	115,1	24,4	27,2	28,4	35,1	7,2	7,9	9,3	9,8	8,0	9,3	9,5	9,3	9,6	10,4	11,1	13,7"""


def test_iter():
    for row in [x.split("\t") for x in doc.split("\n")]:
        yield row


if __name__ == "__main__":
    #    import os
    #    p = os.path.abspath("../data/1-07/1-07.txt")
    #    gen = yield_labelled_rows(p)
    #    for x in gen:
    #        print(x)
    #
    #    inspect_labelled_output(p)
    import os

    p = os.path.abspath("../data/minitab/minitab.csv")
    f = get_raw_csv_filename(p)
    # open csv
    gen_in = test_iter()
    # produce new rows
    headline_dict, support_dict = load_spec(p)
    print_rows_with_labels(gen_in, headline_dict, support_dict)
Esempio n. 10
0
        if is_label_found_func(text, pat): 
            return label_dict[pat]
    # False will not cause change in labels    
    return False

#______________________________________________________________________________

doc = """1.10. Внешнеторговый оборот – всего1),  млрд.долларов США / Foreign trade turnover – total1),  bln US dollars																	
1999	115,1	24,4	27,2	28,4	35,1	7,2	7,9	9,3	9,8	8,0	9,3	9,5	9,3	9,6	10,4	11,1	13,7"""

def test_iter():
    for row in [x.split("\t") for x in doc.split("\n")]:
        yield row
        

if __name__ == "__main__":
#    import os
#    p = os.path.abspath("../data/1-07/1-07.txt")
#    gen = yield_labelled_rows(p)
#    for x in gen:
#        print(x)
#
#    inspect_labelled_output(p)
    import os
    p = os.path.abspath("../data/minitab/minitab.csv")
    f = get_raw_csv_filename(p)
    # open csv
    gen_in = test_iter()
    # produce new rows
    headline_dict, support_dict = load_spec(p)    
    print_rows_with_labels(gen_in, headline_dict, support_dict)
Esempio n. 11
0
def dump_doc_to_single_csv_file(p):
    csv_filename = get_raw_csv_filename(p)
    many_rows_iter = yield_continious_rows(p)
    dump_iter_to_csv(many_rows_iter, csv_filename)
    return csv_filename
Esempio n. 12
0
def dump_doc_to_single_csv_file(p):
    csv_filename = get_raw_csv_filename(p)
    many_rows_iter = yield_continious_rows(p)
    dump_iter_to_csv(many_rows_iter, csv_filename)
    return csv_filename