Ejemplo n.º 1
0
def _process_json(arguments):
    (title, json_string) = arguments
    data = json.loads(json_string)
    if "claims" not in data:
        return Entity(title, [])

    claims = []
    for statement in data["claims"]:
        references = []
        for i in statement["refs"]:
            for a in i:
                ref = _parse_json_snak(a)
                if ref:
                    references.append(ref)
        qualifiers = []
        for q in statement["q"]:
            qualifier = _parse_json_snak(q)
            if qualifier:
                qualifiers.append(qualifier)

        claim = _parse_json_snak(statement["m"])
        if claim:
            claims.append(Claim(claim, qualifiers, references))

    return Entity(title, claims)
Ejemplo n.º 2
0
def _process_json(data):
    title = data["id"]
    if "claims" not in data:
        return Entity(title, [])
    claims = []
    for property_id, statements in sorted(list(data["claims"].items())):
        for statement in statements:
            references = []
            if "references" in statement:
                # TODO: group reference snaks correctly
                for reference in statement["references"]:
                    if not reference["snaks"]:
                        continue
                    for ref_id, snaks in sorted(
                            list(reference["snaks"].items())):
                        for snak in snaks:
                            ref = _parse_json_snak(snak)
                            if ref:
                                references.append(ref)
            qualifiers = []
            if "qualifiers" in statement:
                for qual_id, snaks in sorted(
                        list(statement["qualifiers"].items())):
                    for snak in snaks:
                        qualifier = _parse_json_snak(snak)
                        if qualifier:
                            qualifiers.append(qualifier)
            claim = _parse_json_snak(statement["mainsnak"])
            if claim:
                claims.append(Claim(claim, qualifiers, references))

    return Entity(title, claims)
def read_csv(input_file, delimiter=","):
    """
    @rtype : collections.Iterable[Entity]
    @type input_file: file or StringIO.StringIO
    @type delimiter: str
    """
    current_title = None
    current_claim = None
    claims = []
    csv_reader = csv.reader(input_file,
                            delimiter=delimiter,
                            quoting=csv.QUOTE_MINIMAL)

    for row in csv_reader:
        if len(row) != 5:
            logging.warning("error: {0}".format(row))
            continue
        title, typ, property_id, datatype, value = row
        if current_title != title:
            if current_title is not None:
                yield Entity(current_title, claims)
            current_title = title
            claims = []
        snak = Snak(int(property_id), datatype, value)
        if typ == 'claim':
            current_claim = Claim(snak)
            claims.append(current_claim)
        elif typ == 'reference':
            current_claim.references.append(snak)
        elif typ == 'qualifier':
            current_claim.qualifiers.append(snak)
        else:
            logging.warning("unknown type: {0}".format(typ))

    if current_title is not None:
        yield Entity(current_title, claims)
Ejemplo n.º 4
0
 def test_special_cases(self):
     data = dict([("id", "Q1"), ("type", "item")])
     assert Entity("Q1", []) == JsonReader._process_json(data)
from propertysuggester.analyzer import RuleGenerator
from propertysuggester.analyzer.rule import Rule
from propertysuggester.utils.datamodel import Entity, Claim, Snak

test_data1 = [
    Entity('Q15', [
        Claim(Snak(31, 'wikibase-entityid', 'Q5107')),
        Claim(Snak(373, 'string', 'Africa'))
    ]),
    Entity('Q16', [Claim(Snak(31, 'wikibase-entityid', 'Q5107'))]),
    Entity('Q17', [Claim(Snak(31, 'wikibase-entityid', 'Q1337'))])
]

test_data2 = [
    Entity('Q15', [
        Claim(Snak(31, 'wikibase-entityid', 'Q5107')),
        Claim(Snak(373, 'string', 'Africa')),
        Claim(Snak(373, 'string', 'Europe'))
    ])
]

test_data3 = [
    Entity('Q15', [
        Claim(
            Snak(31, 'wikibase-entityid', 'Q5107'),
            [Snak(12, 'wikibase-entityid', 'Q13'),
             Snak(13, 'string', 'qual')],
            [Snak(22, 'wikibase-entityid', 'Q345'),
             Snak(23, 'string', 'rel')])
    ])
]
Ejemplo n.º 6
0
try:
    from StringIO import StringIO
except ImportError:
    from io import StringIO

import gzip
from pkg_resources import resource_filename

from propertysuggester.parser import XmlReader
from propertysuggester.parser import CsvWriter
from propertysuggester.utils.datamodel import Entity, Claim, Snak

test_data = [
    Entity('Q51', [
        Claim(Snak(31, 'wikibase-entityid', 'Q5107')),
        Claim(Snak(373, 'string', 'Europe'), [Snak(1, 'string', 'qual')],
              [Snak(2, 'string', 'ref')])
    ])
]


class TestCsvWriter():
    def test_write_csv(self):
        out = StringIO()
        CsvWriter.write_csv(test_data, out)
        out.seek(0)

        line = out.readline()
        assert 'Q51,claim,31,wikibase-entityid,Q5107' == line.strip()

        line = out.readline()
Ejemplo n.º 7
0
 def test_special_cases(self):
     assert Entity("Q1", []) == XmlReader._process_json(("Q1", "{}"))
     data = '{"claims":[{"m":["value","","bad"], "refs":[],"q":[]}]}'
     assert Entity("Q1", []) == XmlReader._process_json(("Q1", data))
     data = '{"claims":[{"m":["value","","unknown"], "refs":[],"q":[]}]}'
     assert Entity("Q1", []) == XmlReader._process_json(("Q1", data))
 def test_tostring(self):
     e = Entity("Q1", [Claim(Snak(2, "string", "a"))])
     str(e)