コード例 #1
0
    def _collect(  # type: ignore
        self, nif_context: str
    ) -> Iterator[Dict[str, str]]:
        str_data: Dict[str, str] = {}

        for context_statements in NIFParser(nif_context):
            for s, v, o, c in context_statements:
                nif_type = get_resource_attribute(s, "nif")
                print_progress(f"Collecting DBpedia resource: [{c.identifier}]")

                fragment = get_resource_fragment(v)
                if (
                    nif_type
                    and nif_type == "context"
                    and fragment is not None
                    and fragment == "isString"
                ):
                    str_data["text"] = o.toPython()
                    doc_name: Optional[str] = get_resource_name(s)
                    old_id: Optional[str] = get_resource_attribute(
                        c.identifier, "oldid"
                    )
                    if doc_name is not None and old_id is not None:
                        str_data["doc_name"] = doc_name
                        str_data["oldid"] = old_id
                        yield str_data
コード例 #2
0
    def _collect(self, nif_context: str  # type: ignore
                 ) -> Iterator[Dict[str, str]]:
        str_data: Dict[str, str] = {}

        for context_statements in NIFParser(nif_context):
            for s, v, o, c in context_statements:
                nif_type = get_resource_attribute(s, "nif")
                print_progress(f'Collecting DBpedia resource: [{c.identifier}]')

                if nif_type and nif_type == "context" and get_resource_fragment(
                        v) == 'isString':
                    str_data['text'] = o.toPython()
                    str_data['doc_name'] = get_resource_name(s)
                    str_data['oldid'] = get_resource_attribute(
                        c.identifier, 'oldid')

                    yield str_data
コード例 #3
0
    def test_nif_parser(self):
        p = os.path.join(self.data_dir, "nif_page_structure.tql")

        parsed = []

        for statements in NIFParser(p):
            for statement in statements:
                s, v, o, c = statement
                parsed.append(
                    (
                        context_base(c),
                        get_resource_fragment(v),
                        get_resource_name(s),
                        strip_url_params(s),
                    )
                )

        expected = [
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "type",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "notation",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "beginIndex",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "endIndex",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "referenceContext",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "superString",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "hasSection",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "firstSection",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "lastSection",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "type",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "beginIndex",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "endIndex",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "referenceContext",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "superString",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "hasParagraph",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "lastParagraph",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "type",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "notation",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "beginIndex",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "endIndex",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "referenceContext",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "superString",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "hasSection",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "firstSection",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "type",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "referenceContext",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "beginIndex",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
            (
                "http://en.wikipedia.org/wiki/Animalia_(book)",
                "endIndex",
                "Animalia_(book)",
                "http://dbpedia.org/resource/Animalia_(book)",
            ),
        ]
        self.assertEqual(parsed, expected)
コード例 #4
0
    def test_nif_parser(self):
        p = os.path.join(self.data_dir, 'nif_page_structure.tql')

        parsed = []

        for statements in NIFParser(p):
            for statement in statements:
                s, v, o, c = statement
                parsed.append((context_base(c), get_resource_fragment(v),
                               get_resource_name(s), strip_url_params(s)))

        expected = [
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'type',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'notation',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'beginIndex',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'endIndex',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)',
             'referenceContext', 'Animalia_(book)',
             'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'superString',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'hasSection',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'firstSection',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'lastSection',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'type',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'beginIndex',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'endIndex',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)',
             'referenceContext', 'Animalia_(book)',
             'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'superString',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'hasParagraph',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'lastParagraph',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'type',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'notation',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'beginIndex',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'endIndex',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)',
             'referenceContext', 'Animalia_(book)',
             'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'superString',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'hasSection',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'firstSection',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'type',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)',
             'referenceContext', 'Animalia_(book)',
             'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'beginIndex',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)'),
            ('http://en.wikipedia.org/wiki/Animalia_(book)', 'endIndex',
             'Animalia_(book)', 'http://dbpedia.org/resource/Animalia_(book)')
        ]
        self.assertEqual(parsed, expected)