Ejemplo n.º 1
0
    def test_docs_serialization_format(self):
        import json
        from eWRT.util.module_path import get_resource

        DOCS = [{'id': 7,
                 'body': 'Ehre sei Gott.',
                 'title': '',
                 'format': 'text/html',
                 'header': {'test': 'testvalue'}},
                {'id': 8,
                 'body': '',
                 'title': 'Guten Tag!',
                 'format': 'text/html',
                 'header': {}}]
        REFERENCE_MULTI = json.load(
            open(get_resource(__file__, 'data/jeremia_reference_output_documents.json')))
        REFERENCE_SINGLE = json.load(open(get_resource(
            __file__, 'data/jeremia_reference_output_single_document.json')))

        # document list
        j = Jeremia()
        result = j.submit_documents(DOCS)
        result.sort()
        REFERENCE_MULTI.sort()
        assert REFERENCE_MULTI == result

        # single document
        result = j.submit_document(DOCS[0])
        assert REFERENCE_SINGLE == result
Ejemplo n.º 2
0
    def test_single_document_with_annotations(self):
        '''
        Tests the handling of single document annotations.
        '''
        DOC = {'id': 12,
               'body': 'UBS has finally succeeded. They obtained a 10% share of CS.',
               'title': 'UBS versus Credit Suisse.',
               'format': 'text/html',
               'title_annotation': [{'start': 0, 'end': 3, 'surfaceForm': 'UBS', 'key': 'http://dbpedia.org/UBS'},
                                    {'start': 11, 'end': 24, 'surfaceForm': 'Credit Suisse', 'key': 'http://dbpedia.org/Credit Suisse'}],
               'body_annotation': [{'start': 0, 'end': 3, 'surfaceForm': 'UBS', 'key': 'http://dbpedia.org/UBS'},
                                   {'start': 56, 'end': 58, 'surfaceForm': 'CS', 'key': 'http://dbpedia.org/Credit Suisse'}],
               'header': {},
               }

        j = Jeremia()

        # this test requires Jeremia version 0.0.4+
        if j.version() < "0.0.4":
            return

        print('submitting document with annotations...')
        result = j.submit_document(DOC)

        # check: all annotations have been preserved
        print(result)
        assert len(result['annotation']) == 4

        # check: annotations
        for annotation in result['annotation']:
            # title
            if annotation['md5sum'] == '8e3f3deac5e6c01dab521c07e3a60d7b':
                assert annotation['start'] == 0 or annotation['start'] == 11
                assert annotation['end'] == 3 or annotation['end'] == 24
            # first body sentence
            elif annotation['md5sum'] == 'ffafdc744dcda3d58ab6eafc86ad99b1':
                assert annotation['start'] == 0
                assert annotation['end'] == 3
            # second body sentence with adjusted indices
            elif annotation['md5sum'] == '25faaf0960a68ae741125ca436b330ee':
                assert annotation['start'] == 29
                assert annotation['end'] == 31
Ejemplo n.º 3
0
    def test_docs_serialization_format(self):
        import json
        from eWRT.util.module_path import get_resource

        DOCS = [{
            'id': 7,
            'body': 'Ehre sei Gott.',
            'title': '',
            'format': 'text/html',
            'header': {
                'test': 'testvalue'
            }
        }, {
            'id': 8,
            'body': '',
            'title': 'Guten Tag!',
            'format': 'text/html',
            'header': {}
        }]
        REFERENCE_MULTI = json.load(
            open(
                get_resource(__file__,
                             'data/jeremia_reference_output_documents.json')))
        REFERENCE_SINGLE = json.load(
            open(
                get_resource(
                    __file__,
                    'data/jeremia_reference_output_single_document.json')))

        # document list
        j = Jeremia()
        result = j.submit_documents(DOCS)
        result.sort()
        REFERENCE_MULTI.sort()
        assert REFERENCE_MULTI == result

        # single document
        result = j.submit_document(DOCS[0])
        assert REFERENCE_SINGLE == result
Ejemplo n.º 4
0
    def test_single_document_with_annotations(self):
        '''
        Tests the handling of single document annotations.
        '''
        DOC = {
            'id':
            12,
            'body':
            'UBS has finally succeeded. They obtained a 10% share of CS.',
            'title':
            'UBS versus Credit Suisse.',
            'format':
            'text/html',
            'title_annotation': [{
                'start': 0,
                'end': 3,
                'surfaceForm': 'UBS',
                'key': 'http://dbpedia.org/UBS'
            }, {
                'start': 11,
                'end': 24,
                'surfaceForm': 'Credit Suisse',
                'key': 'http://dbpedia.org/Credit Suisse'
            }],
            'body_annotation': [{
                'start': 0,
                'end': 3,
                'surfaceForm': 'UBS',
                'key': 'http://dbpedia.org/UBS'
            }, {
                'start': 56,
                'end': 58,
                'surfaceForm': 'CS',
                'key': 'http://dbpedia.org/Credit Suisse'
            }],
            'header': {},
        }

        j = Jeremia()

        # this test requires Jeremia version 0.0.4+
        if j.version() < b"0.0.4":
            return

        print('submitting document with annotations...')
        result = j.submit_document(DOC)

        # check: all annotations have been preserved
        print(result)
        assert len(result['annotation']) == 4

        # check: annotations
        for annotation in result['annotation']:
            # title
            if annotation['md5sum'] == '8e3f3deac5e6c01dab521c07e3a60d7b':
                assert annotation['start'] == 0 or annotation['start'] == 11
                assert annotation['end'] == 3 or annotation['end'] == 24
            # first body sentence
            elif annotation['md5sum'] == 'ffafdc744dcda3d58ab6eafc86ad99b1':
                assert annotation['start'] == 0
                assert annotation['end'] == 3
            # second body sentence with adjusted indices
            elif annotation['md5sum'] == '25faaf0960a68ae741125ca436b330ee':
                assert annotation['start'] == 29
                assert annotation['end'] == 31
Ejemplo n.º 5
0
 def test_single_document_processing(self):
     j = Jeremia()
     print('submitting document...')
     document_annotated = j.submit_document(self.DOCS[1])
     self.assertTrue(document_annotated != "")
Ejemplo n.º 6
0
    '''
    add support for calling Jeremia tests as part of a test suite
    '''
    suite = unittest.TestSuite()
    suite.addTest(unittest.makeSuite(JeremiaTest, 'test'))
    return suite


if __name__ == '__main__':
    if len(argv) > 1:
        txt = argv[1]
        docs = {
            'id': '192292',
            'body': txt,
            'title': '',
            'format': 'text/html',
            'header': {
                'test': 'testvalue'
            }
        }
        j = Jeremia()
        docs['body_annotation'] = [{
            'start': 0,
            'end': 3,
            'key': 'test annotation'
        }]
        l = j.submit_document(docs)
        print(l)
    else:
        unittest.main()
Ejemplo n.º 7
0
 def test_single_document_processing(self):
     j = Jeremia()
     print('submitting document...')
     document_annotated = j.submit_document(self.DOCS[1])
     self.assertTrue(document_annotated != "")
Ejemplo n.º 8
0
        # single document
        result = j.submit_document(DOCS[0])
        assert REFERENCE_SINGLE == result


def test_suite():
    '''
    add support for calling Jeremia tests as part of a test suite
    '''
    suite = unittest.TestSuite()
    suite.addTest(unittest.makeSuite(JeremiaTest, 'test'))
    return suite


if __name__ == '__main__':
    if len(argv) > 1:
        txt = argv[1]
        docs = {'id': '192292',
                'body': txt,
                'title': '',
                'format': 'text/html',
                'header': {'test': 'testvalue'}}
        j = Jeremia()
        docs['body_annotation'] = [
            {'start': 0, 'end': 3, 'key': 'test annotation'}]
        l = j.submit_document(docs)
        print(l)
    else:
        unittest.main()