def source(): for path in input_dir.rglob('*.txt'): with path.open('r', errors='replace') as f: txt = f.read() relative = str(path.relative_to(input_dir)) e = Event(event_id=relative, client=default_pipeline.events_client) doc = e.create_document('plaintext', txt) yield doc
def test_event_to_dict_include_label_text(): event = Event() doc = event.create_document('plaintext', text) doc.add_labels('sentences', [label(0, 117)]) doc.add_labels('tokens', [label(start, end) for start, end in tokens]) d_event = event_to_dict(event, include_label_text=True) d_doc = d_event['documents']['plaintext'] d_sentences = d_doc['label_indices']['sentences'] assert d_sentences['json_labels'][0]['_text'] == text d_tokens = d_doc['label_indices']['tokens']['json_labels'] for i, token in enumerate(d_tokens): assert token['_text'] == text[tokens[i][0]:tokens[i][1]]
def test_print_debug_all(): event = Event() doc = event.create_document( 'test', 'The quick brown fox jumps over the lazy dog.') with doc.get_labeler('target') as label_target: label_target(16, 19) with doc.get_labeler('tested') as label_tested: label_tested(10, 15) string_io = StringIO() metric = FirstTokenConfusion(print_debug='all', debug_handle=string_io) metric.update(doc, doc.labels['tested'], doc.labels['target']) assert string_io.getvalue( ) == 'False Positives\nThe quick {brown} fox jumps over the lazy dog.\n\nFalse Negatives\nThe quick brown {fox} jumps over the lazy dog.\n\n'