Example #1
0
def test_yml_serializer():
    event = Event(event_id='1')
    event.metadata['foo'] = "bar"
    document = Document('plaintext', text='Some text.')
    event.add_document(document)
    one = label(start_index=0, end_index=5, x=10)
    two = label(start_index=6, end_index=10, x=15)
    document.add_labels('one', [one,
                                two])
    document.add_labels('two', [label(start_index=0, end_index=25, a='b', b=one),
                                label(start_index=26, end_index=42, a='c', b=two)])
    document.add_labels('three', [
        label(start_index=0, end_index=10, foo=True),
        label(start_index=11, end_index=15, foo=False)
    ], distinct=True)

    with TemporaryFile('w+') as tf:
        YamlSerializer.event_to_file(event, tf)
        tf.flush()
        tf.seek(0)
        e = YamlSerializer.file_to_event(tf)

    assert e.event_id == event.event_id
    assert e.metadata['foo'] == 'bar'
    d = e.documents['plaintext']
    assert d.text == document.text
    index_one = d.labels['one']
    assert index_one == [one, two]
    index_two = d.labels['two']
    assert index_two == [label(start_index=0, end_index=25, a='b', b=one),
                         label(start_index=26, end_index=42, a='c', b=two)]
    index_three = d.labels['three']
    assert index_three == [label(start_index=0, end_index=10, foo=True),
                           label(start_index=11, end_index=15, foo=False)]
Example #2
0
def test_event_to_dict_include_label_text():
    event = Event()
    doc = event.create_document('plaintext', text)
    doc.add_labels('sentences', [label(0, 117)])
    doc.add_labels('tokens', [label(start, end) for start, end in tokens])

    d_event = event_to_dict(event, include_label_text=True)
    d_doc = d_event['documents']['plaintext']
    d_sentences = d_doc['label_indices']['sentences']
    assert d_sentences['json_labels'][0]['_text'] == text
    d_tokens = d_doc['label_indices']['tokens']['json_labels']
    for i, token in enumerate(d_tokens):
        assert token['_text'] == text[tokens[i][0]:tokens[i][1]]
Example #3
0
def test_yml_deserialization():
    f = Path(__file__).parent / 'event.yml'
    event = YamlSerializer.file_to_event(f)
    assert event.event_id == '12345'
    assert event.metadata['foo'] == 'bar'
    d = event.documents['plaintext']
    assert d.text == "The quick brown fox jumps over the lazy dog."
    assert len(d.get_label_indices_info()) == 3
    assert d.get_label_index("one") == [
        label(start_index=0, end_index=10, a="b"),
        label(start_index=12, end_index=25, a="c"),
        label(start_index=26, end_index=52, a="d"),
        label(start_index=53, end_index=85, a="e"),
    ]
    assert d.get_label_index("two") == [
        label(start_index=0, end_index=10, x=1),
        label(start_index=3, end_index=9, x=3),
        label(start_index=4, end_index=25, x=2),
        label(start_index=5, end_index=25, x=4),
    ]
    assert d.get_label_index("three") == [
        label(start_index=0, end_index=10, x=True),
        label(start_index=3, end_index=9, x=True),
        label(start_index=4, end_index=25, x=False),
        label(start_index=5, end_index=25, x=False),
    ]
Example #4
0
def test_yml_serializer():
    event = Event(event_id='1')
    event.metadata['foo'] = "bar"
    document = Document('plaintext', text='Some text.')
    event.add_document(document)
    document.add_labels('one', [
        label(start_index=0, end_index=5, x=10),
        label(start_index=6, end_index=10, x=15)
    ])
    document.add_labels('two', [
        label(start_index=0, end_index=25, a='b'),
        label(start_index=26, end_index=42, a='c')
    ])
    document.add_labels('three', [
        label(start_index=0, end_index=10, foo=True),
        label(start_index=11, end_index=15, foo=False)
    ],
                        distinct=True)

    with TemporaryFile('w+') as tf:
        YamlSerializer.event_to_file(event, tf)
        tf.flush()
        tf.seek(0)
        o = yaml.load(tf, Loader=Loader)

    assert o['event_id'] == '1'
    assert o['metadata']['foo'] == 'bar'
    d = o['documents']['plaintext']
    assert d['text'] == 'Some text.'
    assert len(d['label_indices']) == 3
    assert d['label_indices']['one'] == {
        'json_labels': [{
            'start_index': 0,
            'end_index': 5,
            'x': 10
        }, {
            'start_index': 6,
            'end_index': 10,
            'x': 15
        }],
        'distinct':
        False
    }
    assert d['label_indices']['two'] == {
        'json_labels': [{
            'start_index': 0,
            'end_index': 25,
            'a': 'b'
        }, {
            'start_index': 26,
            'end_index': 42,
            'a': 'c'
        }],
        'distinct':
        False
    }
    assert d['label_indices']['three'] == {
        'json_labels': [{
            'start_index': 0,
            'end_index': 10,
            'foo': True
        }, {
            'start_index': 11,
            'end_index': 15,
            'foo': False
        }],
        'distinct':
        True
    }