コード例 #1
0
ファイル: utils.py プロジェクト: Vesihiisi/Biblioteksdata
def sanitize_wdqs_result(data):
    """
    Strip url component out of wdqs results.

    Source: deprecated wdqsLookup.py.
    Strip out http://www.wikidata.org/entity/
    For dicts it is assumed that it is the key which should be sanitized
    @param data: data to sanitize
    @type data: str, or list of str or dict
    @return: sanitized data
    @rtype: list of str
    """
    if helpers.is_str(data):
        return data.split('/')[-1]
    elif isinstance(data, list):
        for i, d in enumerate(data):
            data[i] = d.split('/')[-1]
        return data
    if isinstance(data, dict):
        new_data = dict()
        for k, v in data.items():
            new_data[k.split('/')[-1]] = v
        return new_data
    else:
        raise pywikibot.Error('sanitize_wdqs_result() requires a string, dict '
                              'or a list of strings. Not a %s' % type(data))
コード例 #2
0
    def testInput(self, text, language=None, term_type=None, entities=None):
        """
        Test that the user input is valid.

        @param text: the text to search for
        @type text: basestring
        @param language: the language to search in, defaults to None=any
        @type language: basestring or None
        @param entities: the language to search in, defaults to None=any
        @type entities: list (of basestring), or None
        @param term_type: field to search in, defaults
            to None=['label', 'alias']
        @type term_type: basestring or None
        @return: if input is valid
        @rtype: bool
        """
        # test text
        if not text.strip():
            self._print('You cannot send stringSearch an empty string')
            return False

        # test language
        if language and language not in self.languages:
            self._print('%s is not a recognised language' % language)
            return False

        # test term_type
        if term_type and term_type not in self.term_types:
            self._print('%s is not a recognised term_type' % term_type)
            return False

        # test list of entities
        if entities:
            if not isinstance(entities, (tuple, list)):
                self._print('Entities must be a non-zero list')
                return False

            # Check each is correctly formatted
            if not all(
                    e.startswith('Q') and helpers.is_str(e)
                    and WikidataStringSearch.is_int(e[1:]) for e in entities):
                self._print('Each entity must be a string like Q<integer>')
                return False

        # nothing flagged
        return True
コード例 #3
0
        def claims(self, values):
            """Add protoclaims.

            @param values: the values extracted using the rules
            @type values: dict
            @return: the protoclaims
            @rtype: dict PID-WD.Statement pairs
            """
            # get basic person claims
            protoclaims = Person.get_claims(self, values)

            # add claim about natmus_artist_id
            if values[u'seeAlso'] is not None:
                if helpers.is_str(values[u'seeAlso']):
                    values[u'seeAlso'] = helpers.listify(values[u'seeAlso'])
                for sa in values[u'seeAlso']:
                    if u'collection.nationalmuseum.se/eMuseumPlus' in sa:
                        object_id = sa.split('objectId=')[-1].split('&')[0]
                        protoclaims['P2538'] = WD.Statement(object_id)
                        break

            return protoclaims
コード例 #4
0
    def resolve_via_id(via_id, value, ids):
        """
        Resolve a viaId rule to return the resulting value.

        :param via_id: string|tuple of strings, viaId chains
        :param value: string the entry matching "target" of the rule.
        :param ids: a dict of all @id values
        :return: the matching value
        """
        # allow strings as input but handle them like tuples
        if helpers.is_str(via_id):
            via_id = (via_id, )

        i = 0
        while len(via_id) > i:
            id_entry = via_id[i]
            if value in ids.keys() and id_entry in ids[value].keys():
                value = ids[value][id_entry]
                i += 1
            else:
                return None
        return value
コード例 #5
0
    def is_uuid(self, uuid):
        """Test if a string really is a uuid.

        @param uuid: uuid to test
        @type uuid: str
        @return: whether the test passed
        @rtype: bool
        """
        if not helpers.is_str(uuid):
            pywikibot.output(u'Not an uuid in %s: %s' %
                             (self.current_uuid, uuid))
            return False

        uuid = uuid.split('/')[-1]  # in case of url
        pattern = r'[0-9a-f]{8}\-[0-9a-f]{4}\-[0-9a-f]{4}' \
                  r'\-[0-9a-f]{4}\-[0-9a-f]{12}'
        m = re.search(pattern, uuid)
        if not m or m.group(0) != uuid:
            pywikibot.output(u'Not an uuid in %s: %s' %
                             (self.current_uuid, uuid))
            return False

        return True
コード例 #6
0
    def make_wikidata_template(wd_entry, special=False):
        """
        Make a wikidata template for items and properties.

        @param wd_entry: a Q/P prefixed item/property id or an
            ItemPage/PropertyPage
        @type wd_entry: pywikibot.ItemPage|pywikibot.PropertyPage|str
        @param special: if it is a special type of value
            (i.e. novalue, somevalue)
        @type special: bool
        @rtype: str
        """
        if isinstance(wd_entry, (pywikibot.ItemPage, pywikibot.PropertyPage)):
            wd_id = wd_entry.id
        else:
            wd_id = wd_entry

        typ = None
        if helpers.is_str(wd_id) and (wd_id.startswith('Q')
                                      or wd_id.startswith('P')):
            typ = wd_id[0]
        elif special:
            typ = "Q'"
            # convert to format used by the template
            if wd_id == 'somevalue':
                wd_id = 'some value'
            elif wd_id == 'novalue':
                wd_id = 'no value'
            else:
                raise ValueError('Sorry but "{}" is not a recognized special '
                                 'value/snaktype.'.format(wd_id))
        else:
            raise ValueError(
                'Sorry only items and properties are supported, not whatever '
                '"{}" is.'.format(wd_id))

        return '{{%s|%s}}' % (typ, wd_id)