コード例 #1
0
def write_json(file, data, ignore_tags):
    with open(file, "w") as f:
        for seq in data:
            line = {}
            line["content"] = " ".join([i[0] for i in seq])
            line["annotation"] = get_annotation(seq, ignore_tags)
            line["extras"] = {"Name": "ColumnName", "Class": "ColumnValue"}
            f.write(json.dumps(line))
            f.write("\n")
    title(f"Annotations Saved to: {file}")
    return
コード例 #2
0
ファイル: text_app.py プロジェクト: gcallah/text_menu
def menu_repr(menu):
    menu_txt = fmt.title(menu[TITLE], True)
    default_choice = f"{menu[DEFAULT]}"
    for key, val in menu[CHOICES].items():
        menu_txt += f"{TAB}{key}. {val[TEXT]}"
        if default_choice == key:
            menu_txt += " " + DEF_MARKER
        menu_txt += "\n"
    menu_txt += f"{fmt.sep()}\n"
    return menu_txt
コード例 #3
0
ファイル: text_app.py プロジェクト: gcallah/text_menu
def run_form(form):
    """
    Runs a form and fills in user answers.
    """
    print(fmt.title(form[TITLE]))
    for fld in form[FLDS]:
        answer = get_fld_input(form[FLDS][fld])
        change_form_fld(form[FLDS][fld], answer)
    if form.get(SUBMIT):
        my_input(form[SUBMIT][TEXT])
    return form
コード例 #4
0
ファイル: text_app.py プロジェクト: gcallah/text_menu
def data_repr(data):
    """
    Formats a data object for display.
    """
    ret_url = data.get(RETURN, None)
    data_txt = fmt.title(data[TITLE])
    for i, key in enumerate(data[DATA]):
        data_txt += f"{i}. {key}"
        rec = data[DATA][key]
        for val in rec.values():
            data_txt += f"\t{val}"
        data_txt += "\n"
    return (data_txt, ret_url)
コード例 #5
0
              all_possible_transitions=False)

    if eval:
        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.1,
                                                            random_state=42)
        crf.fit(x_train, y_train)
        pred = crf.predict(x_test)
        report = classification_report(y_test, pred)
        print("Test Results:\n")
        line(60)
        print(report)
        line(60)
        log_results(y_test, pred)
        line(60)

    else:
        crf.fit(x, y)

    if save:
        save_crf(crf)

    return crf


if __name__ == "__main__":
    labelled_files = ["./data/labelled/sample_labelled.tsv"]
    title("Testing CRF training...")
    crf = train_crf(labelled_files)
コード例 #6
0
 def test_title(self):
     test_title = "Well?"
     self.assertIn(test_title, title(test_title))
コード例 #7
0
    Desc:
    
    Loads annotated data, trains crf model. Makes predictions on unannottated sentances and saves
    output for upload to dataturks annotation service. 
    
    
    '''

    _, seqs = format_unlabelled_data(text_file)
    crf = load_crf(model_path)
    data = create_dataset(crf, seqs)
    write_json(save_file, data, ignore_tags)
    print("\nSample Raw Text:")
    print(' '.join([a for a, b in seqs[0]]))
    print("\nSample Prediction:")
    print(data[0])
    print("\n", "-" * 40)
    return


if __name__ == "__main__":
    crf_path = "./models/crf_Thu_Jan__2_12:54:14_2020"
    unlabelled_file = "./data/unlabelled/sample_unlabelled.txt"
    save_file = "./data/pre_annotated/pre_annotated_sample.txt"
    ignore_tags = [
        "O",
        "Duration",
        "BODY",
    ]
    title("Testing Pre-Annotation...")
    pre_annotate_unlabelled(crf_path, unlabelled_file, save_file, ignore_tags)