Ejemplo n.º 1
0
    def set_location(self):
        """
        Set location based on 'by' column.

        If there's one wikilinked item, confirm that
        the corresponding WD item is of a type that's
        a subclass of 'human settlement', using query results
        downloaded by importer.
        If not wikilinked, check if there's a dawp article
        with the same name and do the same check.
        """
        place_item = None
        if self.has_non_empty_attribute("by"):
            place = self.by
            if utils.count_wikilinks(place) == 1:
                place = utils.get_wikilinks(place)[0].title
            if utils.wp_page_exists("da", place):
                place_item = utils.q_from_wikipedia("da", place)
        if place_item:
            place_item_ids = utils.get_P31(place_item, self.repo)
            for p31_value in place_item_ids:
                if p31_value in self.data_files["settlement"]:
                    self.add_statement("location", place_item)
                    # there can be more than one P31, but after first positive
                    # we can leave
                    return
Ejemplo n.º 2
0
    def set_location(self):
        """
        Set the Location.

        Use the linked Localidad if available,
        and if it's not linked, try and see if there's
        an article anyway. Compare against external
        list of settlements.
        """
        loc_dic = self.data_files["settlements"]
        loc_q = None

        if self.has_non_empty_attribute("localidad"):
            loc_raw = self.localidad
            if utils.count_wikilinks(loc_raw) == 1:
                loc_try = utils.q_from_first_wikilink("es", loc_raw)
                loc_match = utils.get_item_from_dict_by_key(
                    dict_name=loc_dic, search_term=loc_try, search_in="item")
                if len(loc_match) == 1:
                    loc_q = loc_try
            else:
                loc_try = utils.q_from_wikipedia("es", loc_raw)
                loc_match = utils.get_item_from_dict_by_key(
                    dict_name=loc_dic, search_term=loc_try, search_in="item")
                if len(loc_match) == 1:
                    loc_q = loc_try

            if loc_q:
                self.add_statement("location", loc_q)
            else:
                self.add_to_report("localidad", self.localidad, "location")
Ejemplo n.º 3
0
    def set_location(self):
        """
        Set location (P276) of object.

        If there's a 'plats' and it's wikilinked use it as location:
            [[Tyresta]]
        It's just a handful of items that have it, though.
        But all should have socken/landskap,
        so use that as location as well.
        """
        if self.has_non_empty_attribute("plats"):
            wikilinks = utils.get_wikilinks(self.plats)
            if len(wikilinks) == 1:
                target_page = wikilinks[0].title
                wd_item = utils.q_from_wikipedia("sv", target_page)
                self.add_statement("location", wd_item)
            else:
                self.add_to_report("plats", self.plats)
        if self.has_non_empty_attribute("socken"):
            socken_dict = self.data_files["socken"]
            socken = self.get_socken(self.socken, self.landskap)
            if socken:
                self.add_statement("location", socken)
            else:
                try:
                    possible_socken = [
                        x["item"] for x in socken_dict
                        if x["itemLabel"].startswith(self.socken)
                    ]
                    if len(possible_socken) != 1:
                        raise ValueError
                    self.add_statement("location", possible_socken[0])
                except (IndexError, ValueError):
                    raw_socken = "{} ({})".format(self.socken, self.landskap)
                    self.add_to_report("socken", raw_socken)
Ejemplo n.º 4
0
 def set_parts(self):
     if self.has_non_empty_attribute("enlace"):
         parts_raw = self.enlace
         parts_links = utils.get_wikilinks(parts_raw)
         for link in parts_links:
             part_q = utils.q_from_wikipedia("es", link.title)
             if part_q:
                 self.add_statement("has_part", part_q)
             else:
                 self.add_to_report("enlace", self.enlace, "has_part")
def process_wp_reserves():
    """
    Process a Petscan-generated list of nature reserves on svwp.

    Attempt to find matches in the source file
    and save them to appropriate report depending
    on the reliability of the match (one match, multiple matches,
    zero matches).
    The whole updated report is dumped to file immediately
    after every lookup rather than after the finished run,
    so that the current result can be peeked into
    before the processing is done.
    """
    results_file_exact = "svwp_to_nature_id_exact.json"
    results_file_none = "svwp_to_nature_id_none.json"
    results_file_multiple = "svwp_to_nature_id_multiple.json"
    results = []
    results_none = []
    results_multiple = []
    reserves_on_wp = read_wp_nr_list()
    article_count = len(reserves_on_wp)
    reserves_source = read_reserve_csv()
    print("Processing {} svwp articles.".format(article_count))
    counter = 0
    for article_title in reserves_on_wp:
        if (not article_title.startswith("Lista") and
                "nationalpark" not in article_title.lower()):
            counter += 1
            if counter % 10 == 0:
                print("Processed {}/{}...".format(counter, article_count))
            guesses = find_wp_reserve_in_data_file(
                article_title, reserves_source)
            if len(guesses) == 1:
                entry = {}
                entry["wp_article"] = article_title
                entry["source_name"] = guesses[0]["name"]
                entry["nature_id"] = guesses[0]["nature_id"]
                entry["item"] = utils.q_from_wikipedia("sv", article_title)
                results.append(entry)
                utils.json_to_file(results_file_exact, results)
            elif len(guesses) > 1:
                entry = {}
                entry["wp_article"] = article_title
                nature_ids = []
                for row in guesses:
                    nature_ids.append(row["nature_id"])
                entry["nature_id"] = nature_ids
                results_multiple.append(entry)
                utils.json_to_file(results_file_multiple, results_multiple)
            else:
                entry = {}
                entry["wp_article"] = article_title
                entry["nature_id"] = ""
                results_none.append(entry)
                utils.json_to_file(results_file_none, results_none)
Ejemplo n.º 6
0
    def exists_with_monument_article(self,
                                     language,
                                     article_keyword="monument_article"):
        """
        Get the wd item connected to monument_article (or equivalent), if any.

        Ignore if the linked article contains # in the title,
        indicating a section.
        """
        if self.has_non_empty_attribute(article_keyword):
            article_title = getattr(self, article_keyword)
            if "#" not in article_title:
                wd_item = utils.q_from_wikipedia(language, article_title)
                return wd_item
        else:
            return None
Ejemplo n.º 7
0
    def set_architect(self):
        """
        Set the architect.

        Only if wikilinked.
        Can be more than one.
        Check if it's a human.
        """
        if self.has_non_empty_attribute("arkitekt"):
            architects = utils.get_wikilinks(self.arkitekt)
            for name in architects:
                wp_page = name.title
                q_item = utils.q_from_wikipedia("sv", wp_page)
                if q_item:
                    if utils.is_whitelisted_P31(q_item, self.repo, ["Q5"]):
                        self.add_statement("architect", q_item)
                    else:
                        self.add_to_report("arkitekt", self.arkitekt)
Ejemplo n.º 8
0
    def set_location(self):
        """
        Set Location property from article linked in localitate.

        Run this after set_adm_location. localitate can
        contain several links (we take the 1st which seems to
        be the most granular one) and a mix of administrative
        types. Compare with admin location so that they're not
        the same.
        """
        if self.has_non_empty_attribute("localitate"):
            loc_item = None
            if utils.count_wikilinks(self.localitate) > 0:
                loc_link = utils.get_wikilinks(self.localitate)[0]
                loc_item = utils.q_from_wikipedia("ro", loc_link.title)
                adm_item = self.get_statement_values("located_adm")
                if loc_item and loc_item != adm_item[0]:
                    self.add_statement("location", loc_item)

            if not loc_item:
                self.add_to_report("localitate", self.localitate, "location")
Ejemplo n.º 9
0
    def set_adm_location(self):
        """
        Set the administrative location.

        Use the 'gemeinde' field to resolve
        municipality via wp link first, then if unable,
        region-iso code via mapping.
        """
        adm_q = None
        municip_raw = self.gemeinde
        adm_q = utils.q_from_wikipedia("de", municip_raw)
        if adm_q is None:
            adm_dict = self.data_files["councils"]
            adm_iso_match = utils.get_item_from_dict_by_key(dict_name=adm_dict,
                                                            search_term=self.region_iso,
                                                            search_in="iso")
            if len(adm_iso_match) == 1:
                adm_q = adm_iso_match[0]

        if adm_q:
            self.add_statement("located_adm", adm_q)
        else:
            self.add_to_report("gemeinde", self.gemeinde, "located_adm")