def test_record_to_set(self): ideal_set = {(7, 13, 'record'), (15, 16, 'refno')} records = evaluation.records(parse_brs(BRS_SAMPLE_11)) test_set = evaluation.record_to_set(ev.scan(next(records))) assert test_set == ideal_set ideal_set = {(26, 32, 'sample'), (47, 53, 'sample'), (54, 58, 'data')} records = evaluation.records(parse_brs(BRS_SAMPLE_21)) test_set = evaluation.record_to_set(ev.scan(next(records))) assert test_set == ideal_set
def test_02(): xml = et.fromstring('<a>Hello, <i>bright</i> <b>world</b></a>') segments = [ e['text'] for e in segment_text(ev.scan(xml), {0, 2, 8}) if e['type'] == ev.TEXT ] assert segments == ['He', 'llo, ', 'b', 'right', ' ', 'world']
def fuse(xml1, xml2, auto_segment=True, prefer_slave_inner=True, strip_slave_top_tag=True): nsmap = xml2.nsmap or {} nsmap.update(xml1.nsmap or {}) xml1 = list(ev.scan(xml1)) xml2 = list(ev.scan(xml2)) if strip_slave_top_tag: _, *xml2, _ = xml2 events = fuse_events(xml1, xml2, prefer_slave_inner=prefer_slave_inner, auto_segment=auto_segment) return ev.unscan(events, nsmap=nsmap)
def test_03a(): xml = et.fromstring('<a>Hello, bright<br/> <b>world</b></a>') tokens = list(as_token_stream(ev.scan(xml))) a = dict(type=ev.ENTER, tag='a') a_ = dict(type=ev.EXIT, peer=a) b = dict(type=ev.ENTER, tag='b') b_ = dict(type=ev.EXIT, peer=b) br = dict(type=ev.ENTER, tag='br') br_ = dict(type=ev.EXIT, peer=br) assert tokens == [ Token(prefix=[a], text='Hello, bright'), Token(prefix=[{ 'type': 'spot', 'spot': [br, br_] }], text=' '), Token(prefix=[b], text='world', suffix=[b_, a_]) ]
def test_03(): xml = et.fromstring('<a>Hello, <i><s>bright</s></i> <b>world</b></a>') tokens = list(as_token_stream(ev.scan(xml))) a = dict(type=ev.ENTER, tag='a') i = dict(type=ev.ENTER, tag='i') s = dict(type=ev.ENTER, tag='s') b = dict(type=ev.ENTER, tag='b') assert tokens == [ Token(prefix=[a], text='Hello, '), Token(prefix=[i, s], text='bright', suffix=[dict(type=ev.EXIT, peer=s), dict(type=ev.EXIT, peer=i)]), Token(text=' '), Token(prefix=[b], text='world', suffix=[dict(type=ev.EXIT, peer=b), dict(type=ev.EXIT, peer=a)]) ]
def tokens_and_iob_labels_from_record(r, tokenizer): ''' Given a BRS record and text tokenizer, parses the record and returns two lists: list of tokens, and list of corresponding IOB labels. ''' assert r.tag == BRS_R, r inputs = [] targets = [] label = None first = True for obj,peer in ev.with_peer(ev.scan(r)): if obj['type'] == ev.TEXT: span = list(tokenizer(obj['text'])) if span: inputs.extend(span) if label is None: targets.extend(['O'] * len(span)) elif first: targets.extend(['B-' + label] + ['I-' + label]*(len(span)-1)) first = False else: targets.extend(['I-' + label]*len(span)) elif obj['type'] == ev.ENTER: if obj['tag'] == BRS_S: if label is not None: raise RuntimeError('Nesting of <brs:s> not supported') label = obj['attrib']['l'] first = True elif obj['type'] == ev.EXIT: if peer['tag'] == BRS_S: assert peer['attrib']['l'] == label label = None return inputs, targets
def test_01(): xml = et.fromstring('<a>Hello, <i>bright</i> <b>world</b></a>') offsets = text_offsets(ev.scan(xml)) assert offsets == {0, 7, 13, 14, 19}
import lxml.etree as et import lxmlx.event as ev xml = et.fromstring('<a>Hello<?pi?> world!</a>') print(et.tostring(ev.unscan(ev.scan(xml))))
def evaluate(golden_records, predicted_records, confidence=10.0): """Evaluate tagger service result against golden truth param golden_records: XML with "true" labeling in BRS format type golden_records: Parsed XML param predicted_records: XML with predicted records in BRS format type predicted_records: Parsed XML param confidence: Record level confidence threshold value type confidence: float Note that number of records in both files must be the same, and each record in predicted file must have the corresponding record in the golden_records. returns: Evaluation statistics rtype: dict contents: record_count: Number of records evaluated gold_tag_count: Number of annotations in golden records pred_tag_count: Number of annotations in predicted records tp: Number of true positives fp: Number of false positives fn: Number of false negatives correct: Number of correct records incorrect: Number of incorrect records high_conf_records Number of records in high confidence channel low_conf_records Number of records in low confidence channel high_conf_error_rate Error rate in high confidence channel low_conf_error_rate Error rate in low confidence channel record_accuracy: Record level accuracy of tagger service tag_accuracy: Tag level accuracy of tagger service precision: Precision measure of tagger service recall: recall measure of tagger service f1-score: f1-score of tagger service""" if not (isinstance(golden_records, et._Element) and isinstance(predicted_records, et._Element)): raise TypeError( 'Invalid input object type. Expected object of type {}'.format( et._Element)) if not ((golden_records.tag == BRS_B) and (predicted_records.tag == BRS_B)): raise ValueError('Invalid XML Format. Expected XML in BRS format') if not (len(golden_records) == len(predicted_records)): raise ValueError( 'Received mismatched number of golden records and predicted records. Number of golden records and predicted records must be same' ) golden_recorder = records(golden_records) prediction_recorder = records(predicted_records) stats = collections.defaultdict(int) for gold, pred in zip(golden_recorder, prediction_recorder): true_annotations = record_to_set(ev.scan(gold)) predicted_annotations = record_to_set(ev.scan(pred)) score = float(pred.attrib.get('c', 10.0)) if score >= confidence: stats['high_conf_records'] += 1 else: stats['low_conf_records'] += 1 num_tp = len(predicted_annotations & true_annotations) num_fp = len(predicted_annotations - true_annotations) num_fn = len(true_annotations - predicted_annotations) stats['record_count'] += 1 stats['gold_tag_count'] += len(true_annotations) stats['pred_tag_count'] += len(predicted_annotations) stats['tp'] += num_tp stats['fp'] += num_fp stats['fn'] += num_fn if not (num_fp == 0 and num_fn == 0): stats['incorrect'] += 1 if score >= confidence: stats['incorrect_high_conf_records'] += 1 else: stats['incorrect_low_conf_records'] += 1 else: stats['correct'] += 1 stats['high_conf_error_rate'] = 100 * stats[ 'incorrect_high_conf_records'] / (stats['high_conf_records'] + 0.1e-8) stats['low_conf_error_rate'] = 100 * stats[ 'incorrect_low_conf_records'] / (stats['low_conf_records'] + 0.1e-8) stats['record_accuracy'] = 100 * stats['correct'] / ( stats['record_count'] + 0.1e-8) stats['tag_accuracy'] = 100 * stats['tp'] / (stats['gold_tag_count'] + 0.1e-8) precision = stats['tp'] / (stats['tp'] + stats['fp'] + 1.e-8) recall = stats['tp'] / (stats['tp'] + stats['fn'] + 1.e-8) stats['precision'] = precision * 100 stats['recall'] = recall * 100 stats['f1-score'] = (2.0 * precision * recall / (precision + recall + 1.e-8)) * 100 return stats
def md_text(p, prefix=None): if prefix: yield prefix + ' ' for obj in md(ev.scan(p)): yield obj['text']