def write_json(file, data, ignore_tags): with open(file, "w") as f: for seq in data: line = {} line["content"] = " ".join([i[0] for i in seq]) line["annotation"] = get_annotation(seq, ignore_tags) line["extras"] = {"Name": "ColumnName", "Class": "ColumnValue"} f.write(json.dumps(line)) f.write("\n") title(f"Annotations Saved to: {file}") return
def menu_repr(menu): menu_txt = fmt.title(menu[TITLE], True) default_choice = f"{menu[DEFAULT]}" for key, val in menu[CHOICES].items(): menu_txt += f"{TAB}{key}. {val[TEXT]}" if default_choice == key: menu_txt += " " + DEF_MARKER menu_txt += "\n" menu_txt += f"{fmt.sep()}\n" return menu_txt
def run_form(form): """ Runs a form and fills in user answers. """ print(fmt.title(form[TITLE])) for fld in form[FLDS]: answer = get_fld_input(form[FLDS][fld]) change_form_fld(form[FLDS][fld], answer) if form.get(SUBMIT): my_input(form[SUBMIT][TEXT]) return form
def data_repr(data): """ Formats a data object for display. """ ret_url = data.get(RETURN, None) data_txt = fmt.title(data[TITLE]) for i, key in enumerate(data[DATA]): data_txt += f"{i}. {key}" rec = data[DATA][key] for val in rec.values(): data_txt += f"\t{val}" data_txt += "\n" return (data_txt, ret_url)
all_possible_transitions=False) if eval: x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=42) crf.fit(x_train, y_train) pred = crf.predict(x_test) report = classification_report(y_test, pred) print("Test Results:\n") line(60) print(report) line(60) log_results(y_test, pred) line(60) else: crf.fit(x, y) if save: save_crf(crf) return crf if __name__ == "__main__": labelled_files = ["./data/labelled/sample_labelled.tsv"] title("Testing CRF training...") crf = train_crf(labelled_files)
def test_title(self): test_title = "Well?" self.assertIn(test_title, title(test_title))
Desc: Loads annotated data, trains crf model. Makes predictions on unannottated sentances and saves output for upload to dataturks annotation service. ''' _, seqs = format_unlabelled_data(text_file) crf = load_crf(model_path) data = create_dataset(crf, seqs) write_json(save_file, data, ignore_tags) print("\nSample Raw Text:") print(' '.join([a for a, b in seqs[0]])) print("\nSample Prediction:") print(data[0]) print("\n", "-" * 40) return if __name__ == "__main__": crf_path = "./models/crf_Thu_Jan__2_12:54:14_2020" unlabelled_file = "./data/unlabelled/sample_unlabelled.txt" save_file = "./data/pre_annotated/pre_annotated_sample.txt" ignore_tags = [ "O", "Duration", "BODY", ] title("Testing Pre-Annotation...") pre_annotate_unlabelled(crf_path, unlabelled_file, save_file, ignore_tags)