Python TokenOverlap.TokenOverlap Examples

Programming Language: Python

Namespace/Package Name: bratiaa.utils

Class/Type: TokenOverlap

Method/Function: TokenOverlap

Examples at hotexamples.com: 6

Python TokenOverlap.TokenOverlap - 6 examples found. These are the top rated real world Python examples of bratiaa.utils.TokenOverlap.TokenOverlap extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

TokenOverlap(6)

overlapping_tokens(4)

Example #1

Show file

File: agree.py Project: oya163/bratiaa

 def _compute_tp_total(self, input_gen):
     exp_total = []
     pred_total = []
     for doc_index, document in enumerate(input_gen()):
         assert doc_index < len(
             self._documents
         ), 'Input generator yields more documents than expected!'
         to = None
         if self._token_func:
             text = read(document.txt_path)
             tokens = list(self._token_func(text))
             to = TokenOverlap(text, tokens)
         for anno_file_1, anno_file_2 in combinations(
                 document.ann_files, 2):
             #                 tp, exp, pred, exp_list, pred_list = self._eval_func(anno_file_1.ann_path, anno_file_2.ann_path, tokens=to)
             tp, exp, pred = self._eval_func(anno_file_1.ann_path,
                                             anno_file_2.ann_path,
                                             tokens=to)
             #                 exp_total.append(exp_list)
             #                 pred_total.append(pred_list)
             pair_idx = self._pair2idx[(anno_file_1.annotator_id,
                                        anno_file_2.annotator_id)]
             doc_idx = self._doc2idx[document.doc_id]
             self._increment_counts(tp, pair_idx, doc_idx, 0)
             self._increment_counts(exp, pair_idx, doc_idx, 1)
             self._increment_counts(pred, pair_idx, doc_idx, 1)

Example #2

Show file

File: test_token_overlap.py Project: soras/bratiaa

def test_spans_overlapping_tokens():
    text = 'This is a sentence.'
    tokens = [(0, 4), (5, 7), (8, 9), (10, 18), (18, 19)]

    to = TokenOverlap(text, tokens)

    assert to.overlapping_tokens(6, 11) == [(5, 7), (8, 9), (10, 18)]
    assert to.overlapping_tokens(5, 15) == [(5, 7), (8, 9), (10, 18)]

Example #3

Show file

File: test_token_overlap.py Project: soras/bratiaa

def test_empty_overlap_between_tokens():
    text = 'This is a sentence.'
    tokens = [(0, 4), (5, 7), (8, 9), (10, 18), (18, 19)]

    to = TokenOverlap(text, tokens)

    assert len(to.overlapping_tokens(4, 5)) == 0
    assert len(to.overlapping_tokens(7, 8)) == 0
    assert len(to.overlapping_tokens(18, 18)) == 0

Example #4

Show file

File: test_token_overlap.py Project: soras/bratiaa

def test_empty_overlap_at_beginning():
    text = '   This is a sentence.'
    tokens = [(3, 7), (8, 10), (11, 12), (13, 21), (21, 22)]

    to = TokenOverlap(text, tokens)

    assert len(to.overlapping_tokens(0, 2)) == 0
    assert len(to.overlapping_tokens(1, 3)) == 0
    assert len(to.overlapping_tokens(0, 3)) == 0

Example #5

Show file

File: test_token_overlap.py Project: soras/bratiaa

def test_identical_spans():
    text = 'This is a sentence.'
    tokens = [(0, 4), (5, 7), (8, 9), (10, 18), (18, 19)]

    to = TokenOverlap(text, tokens)

    for start, end in tokens:
        overlap = to.overlapping_tokens(start, end)
        assert len(overlap) == 1
        assert overlap[0] == (start, end)

Example #6

Show file

 def find_all_matches_and_mismatches(self, input_gen):
     # Finds all matches / mismatches (or, by other words: agreements & disagreements)
     results = []
     for doc_index, document in enumerate( input_gen() ):
         assert doc_index < len(self._documents), 'Input generator yields more documents than expected!'
         to = None
         text = read(document.txt_path)
         if self._token_func:
             tokens = list(self._token_func(text))
             to = TokenOverlap(text, tokens)
         # TODO: the following solution robustly works for 2 annotators.
         #       in case of more than 2 annotators, it would be very nice 
         #       to further consolidate the results
         for anno_file_1, anno_file_2 in combinations(document.ann_files, 2):
             tp, exp, pred = self._eval_func(anno_file_1.ann_path, anno_file_2.ann_path, tokens=to)
             # Convert exp & pred to fp & fn
             if self._token_func is not None:
                 fp = counter2list(Counter(pred) - Counter(exp))
                 fn = counter2list(Counter(exp) - Counter(pred))
             else:
                 fp = pred.difference(exp)
                 fn = exp.difference(pred)
             doc_result = {}
             # all kinds of metadata
             doc_result['doc_id']    = doc_index
             doc_result['text_file'] = os.path.basename(document.txt_path)
             doc_result['ann_file']  = anno_file_1.ann_path.name
             doc_result['annotator_1'] = anno_file_1.annotator_id
             doc_result['annotator_2'] = anno_file_2.annotator_id
             # true-positive tuples: matching annotations
             tp_tuples = self._extract_sorted_annotation_text_tuples(tp, text)
             tp_tuples = self._format_text_tuples_for_pretty_printing(tp_tuples, skip_str_conversion=[0,1])
             doc_result['matches'] = tp_tuples
             # marked by annotator2, but not by annotator1
             fp_tuples = self._extract_sorted_annotation_text_tuples(fp, text, annotator=anno_file_2.annotator_id)
             # marked by annotator1, but not by annotator2
             fn_tuples = self._extract_sorted_annotation_text_tuples(fn, text, annotator=anno_file_1.annotator_id)
             all_mismatches = sorted(fp_tuples + fn_tuples, key=lambda x:x[2])
             all_mismatches = self._format_text_tuples_for_pretty_printing(all_mismatches, skip_str_conversion=[0,1,2])
             doc_result['mismatches'] = all_mismatches
             results.append( doc_result )
     return results