Esempio n. 1
0
    def set_claim_with_start_and_end(self, prop, target_values, main_key,
                                     protoclaims):
        """
        Add a claim with start and end date qualifiers to protoclaims.

        Requires the value to be resolvable using kulturnav2Wikidata.

        @param prop: the property of the claim
        @type prop: str
        @param target_values: the values for the claim
        @type target_values: dict|list (of dict)|None
        @param main_key: the key for the main entry of the target_values dict
        @type main_key: str
        @param protoclaims: the dict of claims to add
        @type protoclaims: dict
        """
        if target_values:
            target_values = helpers.listify(target_values)
            claims = []
            for val in target_values:
                claim = WD.Statement(self.kulturnav2Wikidata(val[main_key]))
                claims.append(
                    helpers.add_start_end_qualifiers(claim, val[u'start'],
                                                     val[u'end']))
            if claims:
                protoclaims[prop] = claims
    def withClaimTest(self, hitItem, P, Q, descr, orNone=True):
        """
        Execute base test that an item contains a certain claim.

        param hitItem: item to check
        param P: the property to look for
        param Q: (list) of Q claim to look for
        param descr: a descriptive text
        param orNone: if complete absence of the Property is also ok
        return bool
        """
        P = u'P%s' % P.lstrip('P')
        Q = helpers.listify(Q)
        testItems = []
        for q in Q:
            testItems.append(self.wd.QtoItemPage(q))
        # check claims
        if P in hitItem.claims.keys():
            for testItem in testItems:
                if self.wd.has_claim(P, testItem, hitItem):
                    return True
            else:
                pywikibot.output(u'%s is identified as something other '
                                 u'than a %s. Check!' %
                                 (hitItem.title(), descr))
                return False
        elif orNone:  # no P claim
            return True
Esempio n. 3
0
    def add_statement(self, prop_name, value, quals=None, ref=None):
        """
        Add a statement to the data object.

        :param prop_name: P-item representing property
        :type prop_name: string
        :param value: content of the statement
        :type value: it can be a string representing
                      a Q-item or a dictionary of an amount
        :param quals: possibly qualifier items
        :type quals: a wikidatastuff Qualifier item,
                      or a list of them
        :param ref: reference item
        :type ref: a wikidatastuff Reference item
        """
        base = self.wd_item["statements"]
        prop = self.props[prop_name]
        if quals is None:
            quals = []
        wd_claim = self.make_pywikibot_item(value)
        statement = self.make_statement(wd_claim)
        for qual in helpers.listify(quals):
            statement.addQualifier(qual)
        base.append({"prop": prop,
                     "value": statement,
                     "ref": ref})
Esempio n. 4
0
    def format_protoclaims(self):
        """Create a preview table for the protoclaims."""
        table_head = ("{| class='wikitable'\n"
                      "|-\n"
                      "! Property\n"
                      "! Value\n"
                      "! Qualifiers\n")
        table_end = '|}'
        table_row = ('|-\n' '| {prop} \n' '| {value} \n' '| {quals} \n')

        # format each table row
        rows = []
        for prop, statements in self.protoclaims.items():
            if not statements:
                continue
            prop = PreviewItem.make_wikidata_template(prop)
            for statement in helpers.listify(statements):
                if (statement is None) or (statement.isNone()):
                    continue
                quals = ''
                if statement.quals:
                    if len(statement.quals) > 1:
                        quals = [
                            '* {}'.format(PreviewItem.format_qual(qual))
                            for qual in statement.quals
                        ]
                    else:
                        quals = [PreviewItem.format_qual(statement.quals[0])]

                ref = ''
                if statement.ref:
                    ref = '\n{}'.format(
                        PreviewItem.format_reference(statement.ref))

                rows.append({
                    'prop': prop,
                    'value': PreviewItem.format_itis(statement),
                    'quals': ' \n'.join(quals),
                    'references': ref
                })

        # if any statement has a reference then add the reference column
        if any(row.get('references') for row in rows):
            default_ref = PreviewItem.make_text_italics('default reference')
            table_head += '! References\n'
            table_row += '| {references} \n'
            if self.ref:
                for row in rows:
                    row['references'] = row['references'] or default_ref

        # start table construction
        table = table_head
        for row in rows:
            table += table_row.format(**row)
        table += table_end

        return table
Esempio n. 5
0
 def add_statement(self, prop_name, value, quals=None, ref=None):
     base = self.wd_item["statements"]
     prop = self.props[prop_name]
     if quals is None:
         quals = []
     wd_claim = self.make_pywikibot_item(value)
     statement = self.make_statement(wd_claim)
     for qual in helpers.listify(quals):
         statement.addQualifier(qual)
     base.append({"prop": prop, "value": statement, "ref": ref})
def get_wdq(dataset=None, data=None):
    """Find all links from Wikidata to Kulturnav using WDQ.

    @todo:
    To replace with wdqs we need something like:
    SELECT ?item ?value
      WHERE {
          ?item p:P1248 ?data .
          ?item wdt:P1248 ?value .
          {?data pq:P972 wd:Q20742915} UNION
          {?data pq:P972 wd:Q20734454}
     }

    @param dataset: Q-id (or list of Q-ids) corresponding to a dataset.
    @type dataset: str or list of str
    @param data: dictionary to which data should be added
    @type data: dict
    @return: (timestamp, dict {qid: uuid})
    @rtype: tuple (str, dict)
    """
    # initialise if needed
    data = data or {}
    dataset = helpers.listify(dataset) or []

    # make query
    pid = '1248'
    query = u'CLAIM[%s]' % pid
    if dataset:
        query += u'{CLAIM['
        for d in dataset:
            query += u'972:%s,' % d.lstrip('Q')
        query = query.rstrip(',') + ']}'

    wd_queryset = wdquery.QuerySet(query)
    wd_query = wdquery.WikidataQuery(cacheMaxAge=0)
    j = wd_query.query(wd_queryset, props=[
        str(pid),
    ])

    # process data
    j = j['props'][pid]

    # extract pairs
    for i in j:
        data[u'Q%d' % i[0]] = i[2]

    # get current timestamp
    needle = u'Times :'
    stats = urllib2.urlopen(u'http://wdq.wmflabs.org/stats').read()
    stats = stats[stats.find(needle):]
    time = stats[len(needle):stats.find('\n')].strip(' -')

    return (time, data)
    def resolve(self, entries, ids):
        """
        Resolve a rule to return the resulting value.

        :param entries: all the data under @graph
        :param ids: a dict of all @id values
        :return: the matching value
        """
        value = None
        if Rule.hasKeys(self.keys, entries):
            value = entries[self.target]

        # break here if no hit
        if not value:
            return None

        # convert values for viaId rules
        if self.viaId:
            # value can be either a single entry or a list
            values = helpers.listify(value)
            results = []
            for val in values:
                if isinstance(self.viaId, dict):
                    result = {}
                    for key, via_id in self.viaId.iteritems():
                        result[key] = Rule.resolve_via_id(via_id, val, ids)
                    try:
                        if set(result.values()) == set([None]):
                            # if all entries are None
                            # @todo: should log these to spot schema changes
                            return None
                        results.append(result)
                    except TypeError:
                        pywikibot.output("Could not handle: %s" % result)
                        return None
                else:  # self.viaId is either string or tuple
                    result = Rule.resolve_via_id(self.viaId, val, ids)
                    if not result:
                        # @todo: should log these to spot schema changes
                        return None
                    results.append(result)

            # reformat for output
            if len(results) == 1:
                value = results[0]  # undo listify
            else:
                value = results

        return value
Esempio n. 8
0
    def get_nationality(bot, values):
        """Get the nationality/nationalities.

        @param bot: the instance of the bot calling upon the template
        @param bot: KulturnavBot
        @param values: the values extracted using the rules
        @type values: dict
        @return: nationalities
        @rtype: list of WD.Statement
        """
        if values.get(u'person.nationality'):
            # there can be multiple values
            values[u'person.nationality'] = helpers.listify(
                values[u'person.nationality'])
            claim = []
            for pn in values[u'person.nationality']:
                claim.append(WD.Statement(bot.location2Wikidata(pn)))
            if claim:
                return claim
Esempio n. 9
0
    def set_registration_no(self, values, protoclaims):
        """Identify registration number (P879) and add to claims.

        Adds the claim to the protoclaims dict.

        @param values: the values extracted using the rules
        @type values: dict
        @param protoclaims: the dict of claims to add
        @type protoclaims: dict
        """
        values_target = values['navalVessel.registration']

        if values_target:
            values_target = helpers.listify(values_target)
            claims = []
            for val in values_target:
                if val['type'] == self.IKNO_K:
                    # only one type is currently mapped
                    claims.append(WD.Statement(val['number']))
            if claims:
                protoclaims[u'P879'] = claims
Esempio n. 10
0
        def claims(self, values):
            """Add protoclaims.

            @param values: the values extracted using the rules
            @type values: dict
            @return: the protoclaims
            @rtype: dict PID-WD.Statement pairs
            """
            # get basic person claims
            protoclaims = Person.get_claims(self, values)

            # add claim about natmus_artist_id
            if values[u'seeAlso'] is not None:
                if helpers.is_str(values[u'seeAlso']):
                    values[u'seeAlso'] = helpers.listify(values[u'seeAlso'])
                for sa in values[u'seeAlso']:
                    if u'collection.nationalmuseum.se/eMuseumPlus' in sa:
                        object_id = sa.split('objectId=')[-1].split('&')[0]
                        protoclaims['P2538'] = WD.Statement(object_id)
                        break

            return protoclaims
Esempio n. 11
0
    def extractStatements(self, riksdagdata):
        """Extract possible statements from the riksdag data.

        param riksdagsdata: a dict
        return dict of properties and statments
        """
        riksdagId = riksdagdata['intressent_id']
        self.current_id = riksdagId

        # Handle statments
        protoclaims = {}
        protoclaims[GENDER_P] = self.matchGender(riksdagdata['kon'])
        protoclaims[PARTY_P] = self.matchParty(riksdagdata['parti'])
        protoclaims[LAST_NAME_P] = self.matchName(
            riksdagdata['efternamn'], 'lastName')
        protoclaims[FIRST_NAME_P] = self.matchName(
            riksdagdata['tilltalsnamn'], 'firstName')
        protoclaims[BIRTH_DATE_P] = self.matchBirth(riksdagdata['fodd_ar'])
        protoclaims[DEATH_DATE_P] = self.matchDeath(riksdagdata['status'])

        # position data is inconsistent as single entries are sometimes
        # not in a list. hence the listify
        protoclaims[POSITION_P] = self.handlePositions(
            helpers.listify(riksdagdata['personuppdrag']['uppdrag']))
        # valkrets
        # personuppgifter

        # Handle aliases
        # Note that this gives a mistake in names such as "A von B" since
        # the "von" is not part of the sort key.
        fullName = helpers.reorder_names(riksdagdata['sorteringsnamn'])
        iortAlias = self.makeIortAlias(riksdagdata['iort'], fullName)
        names = set(fullName)
        if iortAlias:
            names.add(iortAlias)
        names = list(names)

        return protoclaims, names
    def __init__(self, target, keys=None, viaId=None):
        """
        Initialize the rule.

        Keys gives the resolver the environment in which the target
        is expected.
        Target is the first-pass value to be extracted from the data. The
        result can be a list of values.
        ViaId gives the path through which a second-pass value can be extracted
        from Target.

        For data which must remain connected (such as the start and end dates
        of a particular claim) a dict can be supplied to ViaId. Each entry is
        looked for under the same Target value but follows a different ViaId
        path.

        :param target: string key for which the value is wanted
        :param keys: list|string|None of keys which must be present
            (in addition to target). If none is supplied it defaults to a key
            in the top node of the graph.
        :param viaId: string|tuple|dict|None if the value of "target" should be
            matched to an @id entry where this key then gives the wanted value.
            If a tuple is supplied then each intermediate entry is matched to
            an @id entry.
            If a dict is supplied then the each value is treated as its own
            viaId and the returned value is a dict where each entry has been
            resolved.
        """
        self.target = target
        self.viaId = viaId

        self.keys = []
        keys = keys or 'inDataset'  # default to the top node in the graph
        if keys is not None:
            self.keys += helpers.listify(keys)
        self.keys.append(target)
    def commit_claims(self, protoclaims, item, default_ref):
        """
        Add each claim (if new) and source it.

        :param protoclaims: a dict of claims with
            key: Prop number
            val: Statement|list of Statements
        :param item: the target entity
        :param default_ref: main/default reference to use
        """
        for prop, statements in protoclaims.items():
            if statements:
                statements = helpers.listify(statements)
                statements = set(statements)  # eliminate potential duplicates
                for statement in statements:
                    # check if None or a Statement(None)
                    if (statement is not None) and (not statement.isNone()):
                        # use internal reference if present, else the general
                        ref = statement.ref or default_ref
                        self.wd.addNewClaim(prop, statement, item, ref)

                        # reload item so that next call is aware of changes
                        item = self.wd.QtoItemPage(item.title())
                        item.exists()
Esempio n. 14
0
    def set_homeport(self, values, protoclaims):
        """Identify homePort (P504) and add, with start/end dates, to claims.

        Adds the claim to the protoclaims dict.

        @param values: the values extracted using the rules
        @type values: dict
        @param protoclaims: the dict of claims to add
        @type protoclaims: dict
        """
        prop = u'P504'
        target_values = values[u'homePort']
        main_key = 'location'

        if target_values:
            target_values = helpers.listify(target_values)
            claims = []
            for val in target_values:
                claim = WD.Statement(self.location2Wikidata(val[main_key]))
                claims.append(
                    helpers.add_start_end_qualifiers(claim, val[u'start'],
                                                     val[u'end']))
            if claims:
                protoclaims[prop] = claims
    def populateValues(self, values, rules, hit):
        """
        Populate values and check results given a hit.

        Given a list of values and a kulturnav hit, populate the values
        and check if result is problem free.

        @todo: raise Error instead of using problemFree solution

        param values: dict with keys and every value as None
        param rules: a dict with keys and values either:
            None: the exakt key is present in hit and its value is wanted
            a Rule: acording to the class above
        param hit: a kulturnav entry
        return bool problemFree
        """
        ids = {}
        problemFree = True
        for entries in hit[u'@graph']:
            # populate ids for viaId rules
            if '@id' in entries.keys():
                if entries['@id'] in ids.keys():
                    pywikibot.output('Non-unique viaID key: \n%s\n%s' %
                                     (entries, ids[entries['@id']]))
                ids[entries['@id']] = entries

        for entries in hit[u'@graph']:
            # handle rules
            for key, rule in rules.iteritems():
                val = None
                if rule is None:
                    if key in entries.keys():
                        val = entries[key]
                elif isinstance(rule, Rule):
                    val = rule.resolve(entries, ids)

                # test and register found value
                if val is not None:
                    if values[key] is None:
                        values[key] = val
                    else:
                        pywikibot.output(u'duplicate entries for %s' % key)
                        problemFree = False

        # the minimum which must have been identified
        if values[u'identifier'] is None:
            raise pywikibot.Error(u'Could not isolate the identifier from the '
                                  u'KulturNav object! JSON layout must have '
                                  u'changed. Crashing!')

        # dig into sameAs/exactMatch and seeAlso
        KulturnavBot.set_sameas_values(values)

        # only look at seeAlso if we found no Wikidata link and require one
        if self.require_wikidata and \
                (not values[u'wikidata'] and values[u'seeAlso']):
            values[u'seeAlso'] = helpers.listify(values[u'seeAlso'])
            for sa in values[u'seeAlso']:
                if u'wikipedia' in sa:
                    pywikibot.output(u'Found a Wikipedia link but no '
                                     u'Wikidata link: %s %s' %
                                     (sa, values[u'identifier']))
            problemFree = False

        if not problemFree:
            pywikibot.output(u'Found an issue with %s (%s), skipping' %
                             (values['identifier'], values['wikidata']))
        return problemFree