コード例 #1
0
    def test_reference_get_all_sources(self):
        r_test = WD.Reference(source_test=self.ref_1)
        self.assertEqual(r_test.get_all_sources(), [self.ref_1])

        r_notest = WD.Reference(source_notest=self.ref_1)
        self.assertEqual(r_notest.get_all_sources(), [self.ref_1])

        r_both = WD.Reference(self.ref_1, self.ref_2)
        self.assertEqual(r_both.get_all_sources(), [self.ref_1, self.ref_2])
コード例 #2
0
    def test_reference_init_single_claim_gives_list(self):
        r_test = WD.Reference(source_test=self.ref_1)
        self.assertEqual(r_test.source_test, [self.ref_1])
        self.assertEqual(r_test.source_notest, [])

        r_notest = WD.Reference(source_notest=self.ref_1)
        self.assertEqual(r_notest.source_test, [])
        self.assertEqual(r_notest.source_notest, [self.ref_1])

        r_both = WD.Reference(self.ref_1, self.ref_2)
        self.assertEqual(r_both.source_test, [self.ref_1])
        self.assertEqual(r_both.source_notest, [self.ref_2])
コード例 #3
0
    def test_reference_init_non_claim_error(self):
        with self.assertRaises(pwbError) as cm:
            WD.Reference(source_test='foo')
        self.assertEqual(
            str(cm.exception),
            'You tried to create a reference with a non-Claim source')

        with self.assertRaises(pwbError) as cm:
            WD.Reference(source_notest='foo')
        self.assertEqual(
            str(cm.exception),
            'You tried to create a reference with a non-Claim source')
コード例 #4
0
    def test_reference_init_with_list(self):
        r_test = WD.Reference(source_test=[self.ref_1, self.ref_2])
        self.assertEqual(r_test.source_test, [self.ref_1, self.ref_2])
        self.assertEqual(r_test.source_notest, [])

        r_notest = WD.Reference(source_notest=[self.ref_1, self.ref_2])
        self.assertEqual(r_notest.source_test, [])
        self.assertEqual(r_notest.source_notest, [self.ref_1, self.ref_2])

        r_both = WD.Reference([self.ref_1, self.ref_2],
                              [self.ref_2, self.ref_1])
        self.assertEqual(r_both.source_test, [self.ref_1, self.ref_2])
        self.assertEqual(r_both.source_notest, [self.ref_2, self.ref_1])
コード例 #5
0
    def make_ref(self, date):
        """Make a correctly formatted ref object for claims.

        Contains 4 parts:
        * P248: Stated in <the kulturnav dataset>
        * P577: Publication date <from the document>
        * P854: Reference url <using the current uuid>
        * P813: Retrieval date <current date>

        P854
        Should be in source_test (after retroactively fixing older references)
        but by being in source_notest we ensure that duplicate uuids don't
        source the statement twice.

        @param date: The "last modified" time of the document
        @type date: pywikibot.WbTime
        @return: the formated reference
        @rtype WD.Reference
        """
        reference_url = 'http://kulturnav.org/%s' % self.current_uuid
        ref = WD.Reference(
            source_test=self.wd.make_simple_claim(
                'P248', self.wd.QtoItemPage(self.DATASET_Q)),
            source_notest=[
                self.wd.make_simple_claim('P577', date),
                self.wd.make_simple_claim('P854', reference_url),
                self.wd.make_simple_claim('P813', helpers.today_as_WbTime())
            ])
        return ref
コード例 #6
0
    def make_url_ref(self, url, fetch_date, publish_date=None):
        """Make a Reference object for a url.

        Contains 3 parts:
        * P813: Retrieval date
        * P577: Publication date <from creation date of the document>
        * P854: Reference url <using the input url>

        :param url: the source url
        :param fetch_date: the retrieval date url (iso)
        :param publish_date: the retrieval date url (iso)
        :return: WdS.Reference
        """
        date_claims = []
        if publish_date:
            date_claims.append(
                self.wd.make_simple_claim('P577',
                                          helpers.iso_to_WbTime(publish_date)))
        date_claims.append(
            self.wd.make_simple_claim('P813',
                                      helpers.iso_to_WbTime(fetch_date)))

        ref = WdS.Reference(
            source_test=[self.wd.make_simple_claim('P854', url)],
            source_notest=date_claims)
        return ref
コード例 #7
0
 def test_reference_repr(self):
     """Also ensures there is a repr for Claim."""
     r = WD.Reference(self.ref_1, self.ref_2)
     self.assertEqual(
         repr(r), 'WD.Reference('
         'test: [WD.Claim(P55: foo)], '
         'no_test: [WD.Claim(P55: bar)])')
コード例 #8
0
    def make_lido_ref(self, lido_data):
        """
        Make a Reference object for the dataset.

        Contains 4 parts:
        * P248: Stated in <the Nationalmuseum dataset>
        * P577: Publication date <from creation date of the document>
        * P854: Reference url <using the input url>
        * P813: Retrieval date <current date>
        """
        exit()
        # P248: Nationalmuseum dataset
        xml_file = lido_data.get('source_file')
        date = helpers.today_as_WbTime()
        pub_date = helpers.iso_to_WbTime(u'2016-09-30')
        zip_url = u'https://github.com/NationalmuseumSWE/WikidataCollection/' \
                  u'blob/master/valid_items_transform_1677.tgz'
        ref = WD.Reference(source_test=[
            self.wd.make_simple_claim(u'P854', zip_url),
            self.wd.make_simple_claim(u'P577', pub_date),
            self.wd.make_simple_claim(u'P?', xml_file),
        ],
                           source_notest=self.wd.make_simple_claim(
                               u'P813', date))
        return ref
コード例 #9
0
 def setUp(self):
     wikidata = Site('test', 'wikidata')
     self.q_1 = WD.Qualifier('P123', 'foo')
     self.q_2 = WD.Qualifier('P123', 'bar')
     claim = Claim(wikidata, 'P55')
     claim.setTarget('foo')
     self.ref = WD.Reference(source_test=[
         claim,
     ])
コード例 #10
0
    def make_url_reference(self, uri):
        """Make a Reference object with a retrieval url and today's date.

        @param uri: retrieval uri/url
        @type uri: str
        @rtype: WD.Reference
        """
        date = helpers.today_as_WbTime()
        ref = WD.Reference(source_test=self.wd.make_simple_claim(u'P854', uri),
                           source_notest=self.wd.make_simple_claim(
                               u'P813', date))
        return ref
コード例 #11
0
class WikidataItem(object):
    def __init__(self, db_row_dict, repository, data_files, existing, caches):
        self.repo = repository
        self.existing = existing
        self.wdstuff = WDS(self.repo)
        self.raw_data = db_row_dict
        self.caches = caches
        self.problem_report = {}
        self.props = data_files["properties"]
        self.construct_wd_item()

        self.problem_report = {}

    def get_caches(self):
        return self.caches

    def make_q_item(self, qnumber):
        return self.wdstuff.QtoItemPage(qnumber)

    def make_pywikibot_item(self, value):
        val_item = None
        if isinstance(value, list) and len(value) == 1:
            value = value[0]
        if utils.string_is_q_item(value):
            val_item = self.make_q_item(value)
        elif value == "novalue":
            val_item = value
        elif isinstance(value, dict) and 'monolingual_value' in value:
            text = value['monolingual_value']
            language = value['lang']
            val_item = pywikibot.WbMonolingualText(text=text,
                                                   language=language)
        elif isinstance(value, dict) and 'quantity_value' in value:
            number = value['quantity_value']
            if 'unit' in value:
                unit = self.wdstuff.QtoItemPage(value["unit"])
            else:
                unit = None
            val_item = pywikibot.WbQuantity(amount=number,
                                            unit=unit,
                                            site=self.repo)
        elif isinstance(value, dict) and 'date_value' in value:
            date_dict = value["date_value"]
            val_item = pywikibot.WbTime(year=date_dict.get("year"),
                                        month=date_dict.get("month"),
                                        day=date_dict.get("day"))
        elif value == "novalue":
            #  raise NotImplementedError
            #  implement Error
            print("Status: novalue will be added here")
        else:
            val_item = value
        return val_item

    def make_statement(self, value):
        if value in ['somevalue', 'novalue']:
            special = True
        else:
            special = False
        return self.wdstuff.Statement(value, special=special)

    def make_qualifier_applies_to(self, value):
        prop_item = self.props["applies_to_part"]
        target_item = self.wdstuff.QtoItemPage(value)
        return self.wdstuff.Qualifier(prop_item, target_item)

    def add_statement(self, prop_name, value, quals=None, ref=None):
        base = self.wd_item["statements"]
        prop = self.props[prop_name]
        if quals is None:
            quals = []
        wd_claim = self.make_pywikibot_item(value)
        statement = self.make_statement(wd_claim)
        for qual in helpers.listify(quals):
            statement.addQualifier(qual)
        base.append({"prop": prop, "value": statement, "ref": ref})

    def make_stated_in_ref(self,
                           value,
                           pub_date=None,
                           ref_url=None,
                           retrieved_date=None):
        item_prop = self.props["stated_in"]
        published_prop = self.props["publication_date"]
        published_claim = None
        if pub_date:
            pub_date = utils.date_to_dict(pub_date, "%Y-%m-%d")
            timestamp = self.make_pywikibot_item({"date_value": pub_date})
            published_claim = self.wdstuff.make_simple_claim(
                published_prop, timestamp)
        source_item = self.wdstuff.QtoItemPage(value)
        source_claim = self.wdstuff.make_simple_claim(item_prop, source_item)
        if ref_url and retrieved_date:
            ref_url_prop = self.props["reference_url"]
            retrieved_date_prop = self.props["retrieved"]

            retrieved_date = utils.date_to_dict(retrieved_date, "%Y-%m-%d")
            retrieved_date = self.make_pywikibot_item(
                {"date_value": retrieved_date})

            ref_url_claim = self.wdstuff.make_simple_claim(
                ref_url_prop, ref_url)
            retrieved_on_claim = self.wdstuff.make_simple_claim(
                retrieved_date_prop, retrieved_date)

            if published_claim:
                ref = self.wdstuff.Reference(
                    source_test=[source_claim, ref_url_claim],
                    source_notest=[published_claim, retrieved_on_claim])
            else:
                ref = self.wdstuff.Reference(
                    source_test=[source_claim, ref_url_claim],
                    source_notest=[retrieved_on_claim])
        else:
            ref = self.wdstuff.Reference(source_test=[source_claim],
                                         source_notest=published_claim)
        return ref

    def associate_wd_item(self, wd_item):
        if wd_item is not None:
            self.wd_item["wd-item"] = wd_item

    def set_upload(self, booln):
        self.wd_item["upload"] = booln

    def add_label(self, language, text):
        base = self.wd_item["labels"]
        base.append({"language": language, "value": text})

    def add_description(self, language, text):
        base = self.wd_item["descriptions"]
        base.append({"language": language, "value": text})

    def add_to_report(self, key_name, raw_data, id_no, prop_name=None):
        """
        Add data to problem report json.

        Check if item has an associated Q-number,
        and if that's the case and it's missing
        in the report,
        add it to the report automatically.

        Optionally, assign a Property ID that the data
        should have been used as a value for.

        :param key_name: name of the field containing
                         the problematic data, e.g. the header of the column
        :type key_name: string
        :param raw_data: the data that we failed to process
        :type raw_data: string
        :param id_no: unique id assigned to item, e.g. url
        :type id_no: string
        :param prop_name: name of the property,
                          as stated in the props library file
        :type prop_name: string
        """
        prop = None
        if prop_name:
            if prop_name.startswith('_'):
                prop = prop_name
            else:
                prop = self.props.get(prop_name)
        self.problem_report[key_name] = {"value": raw_data, "target": prop}
        if "wd-item" not in self.problem_report:
            if self.wd_item["wd-item"] is not None:
                self.problem_report["Q"] = self.wd_item["wd-item"]
            else:
                self.problem_report["Q"] = ""
        self.problem_report["url"] = id_no

    def print_report(self):
        """Print the problem report on screen."""
        print(
            json.dumps(self.problem_report,
                       sort_keys=True,
                       indent=4,
                       ensure_ascii=False,
                       default=utils.datetime_convert))

    def get_report(self):
        """Retrieve the problem report."""
        return self.problem_report

    def construct_wd_item(self):
        self.wd_item = {}
        self.wd_item["upload"] = True
        self.wd_item["statements"] = []
        self.wd_item["labels"] = []
        self.wd_item["descriptions"] = []
        self.wd_item["wd-item"] = None
コード例 #12
0
 def test_reference_init_empty_error(self):
     with self.assertRaises(pwbError) as cm:
         WD.Reference()
     self.assertEqual(
         str(cm.exception),
         'You tried to create a reference without any sources')
コード例 #13
0
class WikidataItem(object):
    def __init__(self, db_row_dict, repository, data_files, existing):
        self.repo = repository
        self.existing = existing
        self.wdstuff = WDS(self.repo)
        self.raw_data = db_row_dict
        self.props = data_files["properties"]
        self.construct_wd_item()

        self.problem_report = {}

    def make_q_item(self, qnumber):
        return self.wdstuff.QtoItemPage(qnumber)

    def make_pywikibot_item(self, value):
        val_item = None
        if isinstance(value, list) and len(value) == 1:
            value = value[0]
        if utils.string_is_q_item(value):
            val_item = self.make_q_item(value)
        elif value == "novalue":
            val_item = value
        elif isinstance(value, dict) and 'quantity_value' in value:
            number = value['quantity_value']
            if 'unit' in value:
                unit = self.wdstuff.QtoItemPage(value["unit"])
            else:
                unit = None
            val_item = pywikibot.WbQuantity(amount=number,
                                            unit=unit,
                                            site=self.repo)
        elif isinstance(value, dict) and 'date_value' in value:
            date_dict = value["date_value"]
            val_item = pywikibot.WbTime(year=date_dict.get("year"),
                                        month=date_dict.get("month"),
                                        day=date_dict.get("day"))
        elif value == "novalue":
            #  raise NotImplementedError
            #  implement Error
            print("Status: novalue will be added here")
        else:
            val_item = value
        return val_item

    def make_statement(self, value):
        if value in ['somevalue', 'novalue']:
            special = True
        else:
            special = False
        return self.wdstuff.Statement(value, special=special)

    def make_qualifier_applies_to(self, value):
        prop_item = self.props["applies_to_part"]
        target_item = self.wdstuff.QtoItemPage(value)
        return self.wdstuff.Qualifier(prop_item, target_item)

    def add_statement(self, prop_name, value, quals=None, ref=None):
        base = self.wd_item["statements"]
        prop = self.props[prop_name]
        if quals is None:
            quals = []
        wd_claim = self.make_pywikibot_item(value)
        statement = self.make_statement(wd_claim)
        for qual in helpers.listify(quals):
            statement.addQualifier(qual)
        base.append({"prop": prop, "value": statement, "ref": ref})

    def make_stated_in_ref(self,
                           value,
                           pub_date,
                           ref_url=None,
                           retrieved_date=None):
        item_prop = self.props["stated_in"]
        published_prop = self.props["publication_date"]
        pub_date = utils.date_to_dict(pub_date, "%Y-%m-%d")
        timestamp = self.make_pywikibot_item({"date_value": pub_date})
        published_claim = self.wdstuff.make_simple_claim(
            published_prop, timestamp)
        source_item = self.wdstuff.QtoItemPage(value)
        source_claim = self.wdstuff.make_simple_claim(item_prop, source_item)
        if ref_url and retrieved_date:
            ref_url_prop = self.props["reference_url"]
            retrieved_date_prop = self.props["retrieved"]

            retrieved_date = utils.date_to_dict(retrieved_date, "%Y-%m-%d")
            retrieved_date = self.make_pywikibot_item(
                {"date_value": retrieved_date})

            ref_url_claim = self.wdstuff.make_simple_claim(
                ref_url_prop, ref_url)
            retrieved_on_claim = self.wdstuff.make_simple_claim(
                retrieved_date_prop, retrieved_date)

            ref = self.wdstuff.Reference(
                source_test=[source_claim, ref_url_claim],
                source_notest=[published_claim, retrieved_on_claim])
        else:
            ref = self.wdstuff.Reference(source_test=[source_claim],
                                         source_notest=published_claim)
        return ref

    def associate_wd_item(self, wd_item):
        if wd_item is not None:
            self.wd_item["wd-item"] = wd_item

    def add_label(self, language, text):
        base = self.wd_item["labels"]
        base.append({"language": language, "value": text})

    def add_description(self, language, text):
        base = self.wd_item["descriptions"]
        base.append({"language": language, "value": text})

    def construct_wd_item(self):
        self.wd_item = {}
        self.wd_item["upload"] = True
        self.wd_item["statements"] = []
        self.wd_item["labels"] = []
        self.wd_item["descriptions"] = []
        self.wd_item["wd-item"] = None
コード例 #14
0
class WikidataItem(object):
    """Basic data object for upload to Wikidata."""

    def __init__(self, db_row_dict, repository, data_files, existing):
        """
        Initialize the data object.

        :param db_row_dict: raw data from the data source
        :type db_row_dict: string
        :param repository: data repository (Wikidata site)
        :type repository: site instance
        :param data_files: dict of various mapping files
        :type data_files: dictionary
        :param existing: WD items that already have an unique id
        :type existing: dictionary
        """
        self.repo = repository
        self.existing = existing
        self.wdstuff = WDS(self.repo)
        self.raw_data = db_row_dict
        self.props = data_files["properties"]
        self.items = data_files["items"]
        self.construct_wd_item()

        self.problem_report = {}

    def make_q_item(self, qnumber):
        """
        Create a regular Wikidata ItemPage.

        :param qnumber: Q-item that we want to get an ItemPage of
        :type qnumber: string

        :return: an ItemPage for pywikibot
        """
        return self.wdstuff.QtoItemPage(qnumber)

    def make_pywikibot_item(self, value):
        """
        Create a statement in pywikibot-ready format.

        The statement can be either:
        * a string (value is string)
        * an item (value is Q-string)
        * an amount with or without unit (value is dic)

        :param value: the content of the item
        :type value: it can be a string or
                      a dictionary, see above.

        :return: a pywikibot item of the type determined
                 by the input data, either ItemPage or Quantity
                 or string.
        """
        val_item = None
        if isinstance(value, list) and len(value) == 1:
            value = value[0]
        if utils.string_is_q_item(value):
            val_item = self.make_q_item(value)
        elif value == "novalue":
            val_item = value
        elif isinstance(value, dict) and 'quantity_value' in value:
            number = value['quantity_value']
            if 'unit' in value:
                unit = self.wdstuff.QtoItemPage(value["unit"])
            else:
                unit = None
            val_item = pywikibot.WbQuantity(
                amount=number, unit=unit, site=self.repo)
        elif isinstance(value, dict) and 'date_value' in value:
            date_dict = value["date_value"]
            val_item = pywikibot.WbTime(year=date_dict["year"],
                                        month=date_dict["month"],
                                        day=date_dict["day"])
        elif value == "novalue":
            #  raise NotImplementedError
            #  implement Error
            print("Status: novalue will be added here")
        else:
            val_item = value
        return val_item

    def make_statement(self, value):
        """
        Create a Wikidatastuff statement.

        Supports the special data types 'somevalue'
        and 'novalue'.

        :prop value: the content of the statement
        :type value: pywikibot item

        :return: a wikidatastuff statement
        """
        if value in ['somevalue', 'novalue']:
            special = True
        else:
            special = False
        return self.wdstuff.Statement(value, special=special)

    def make_qualifier_applies_to(self, value):
        """
        Create a qualifier to a statement with type 'applies to part'.

        :param value: Q-item that this applies to
        :type value: string

        :return: a wikidatastuff Qualifier
        """
        prop_item = self.props["applies_to_part"]
        target_item = self.wdstuff.QtoItemPage(value)
        return self.wdstuff.Qualifier(prop_item, target_item)

    def add_statement(self, prop_name, value, quals=None, ref=None):
        """
        Add a statement to the data object.

        :param prop_name: P-item representing property
        :type prop_name: string
        :param value: content of the statement
        :type value: it can be a string representing
                      a Q-item or a dictionary of an amount
        :param quals: possibly qualifier items
        :type quals: a wikidatastuff Qualifier item,
                      or a list of them
        :param ref: reference item
        :type ref: a wikidatastuff Reference item
        """
        base = self.wd_item["statements"]
        prop = self.props[prop_name]
        if quals is None:
            quals = []
        wd_claim = self.make_pywikibot_item(value)
        statement = self.make_statement(wd_claim)
        for qual in helpers.listify(quals):
            statement.addQualifier(qual)
        base.append({"prop": prop,
                     "value": statement,
                     "ref": ref})

    def make_stated_in_ref(self,
                           value,
                           pub_date,
                           ref_url=None,
                           retrieved_date=None):
        """
        Make a reference object of type 'stated in'.

        :param value: Q-item where sth is stated
        :type value: string
        :param pub_date: timestamp in format "1999-09-31"
        :type pub_date: string
        :param ref_url: optionally a reference url
        :type ref_url: string
        :param retrieved_date: timestamp in format "1999-09-31"
        :type retrieved_date: string

        :return: a wikidatastuff Reference item
        """
        item_prop = self.props["stated_in"]
        published_prop = self.props["publication_date"]
        pub_date = utils.date_to_dict(pub_date, "%Y-%m-%d")
        timestamp = self.make_pywikibot_item({"date_value": pub_date})
        published_claim = self.wdstuff.make_simple_claim(
            published_prop, timestamp)
        source_item = self.wdstuff.QtoItemPage(value)
        source_claim = self.wdstuff.make_simple_claim(item_prop, source_item)
        if ref_url and retrieved_date:
            ref_url_prop = self.props["reference_url"]
            retrieved_date_prop = self.props["retrieved"]

            retrieved_date = utils.date_to_dict(retrieved_date, "%Y-%m-%d")
            retrieved_date = self.make_pywikibot_item(
                {"date_value": retrieved_date})

            ref_url_claim = self.wdstuff.make_simple_claim(
                ref_url_prop, ref_url)
            retrieved_on_claim = self.wdstuff.make_simple_claim(
                retrieved_date_prop, retrieved_date)

            ref = self.wdstuff.Reference(
                source_test=[source_claim, ref_url_claim],
                source_notest=[published_claim, retrieved_on_claim])
        else:
            ref = self.wdstuff.Reference(
                source_test=[source_claim],
                source_notest=published_claim
            )
        return ref

    def associate_wd_item(self, wd_item):
        """
        Associate the data object with a Wikidata item.

        :param wd_item: Q-item that shall be assigned to the
                        data object.
        :type wd_item: string
        """
        if wd_item is not None:
            self.wd_item["wd-item"] = wd_item
            print("Associated WD item: ", wd_item)

    def add_label(self, language, text):
        """
        Add a label in a specific language.

        :param language: code of language, e.g. "fi"
        :type language: string
        :param text: content of the label
        :type text: string
        """
        base = self.wd_item["labels"]
        base.append({"language": language, "value": text})

    def add_description(self, language, text):
        """
        Add a description in a specific language.

        :param language: code of language, e.g. "fi"
        :type language: string
        :param text: content of the description
        :type text: string
        """
        base = self.wd_item["descriptions"]
        base.append({"language": language, "value": text})

    def construct_wd_item(self):
        """
        Create the empty structure of the data object.

        This creates self.wd_item -- a dict container
        of all the data content of the item.
        """
        self.wd_item = {}
        self.wd_item["upload"] = True
        self.wd_item["statements"] = []
        self.wd_item["labels"] = []
        self.wd_item["descriptions"] = []
        self.wd_item["wd-item"] = None
コード例 #15
0
 def make_commons_reference(self):
     """Make a Reference object saying imported from Wikimedia Commons."""
     commons_item = self.wd.QtoItemPage(COMMONS_Q)
     ref = WD.Reference(source_test=self.wd.make_simple_claim(
         u'P143', commons_item))  # imported from
     return ref