Example #1
0
def get_listpages() -> set:
    """Return all list pages (with already resolved redirects)."""
    global __LISTPAGES__
    if '__LISTPAGES__' not in globals():
        __LISTPAGES__ = {
            dbp_store.resolve_redirect(lp)
            for lp in get_listpages_with_redirects()
            if list_util.is_listpage(dbp_store.resolve_redirect(lp))
        }

    return __LISTPAGES__
Example #2
0
def _parse_listpages() -> dict:
    parsed_listpages = {}
    for resource, content in wikipedia.get_parsed_articles().items():
        if not list_util.is_listpage(resource):
            continue
        if resource != dbp_store.resolve_redirect(resource):
            continue
        if not content or 'sections' not in content:
            continue
        parsed_listpages[resource] = content
    return parsed_listpages
Example #3
0
def _get_resource_surface_scores(text):
    """Return resource lexicalisation scores for the given text."""
    resource_surface_scores = {}
    if not text:
        return resource_surface_scores
    resource_surface_scores[text] = 1
    direct_match = dbp_store.resolve_redirect(dbp_util.name2resource(text))
    if direct_match in dbp_store.get_resources():
        resource_surface_scores[direct_match] = 1
    for surface_match, frequency in sorted(dbp_store.get_inverse_lexicalisations(text.lower()).items(), key=operator.itemgetter(1)):
        resource_surface_scores[surface_match] = frequency
    return resource_surface_scores
Example #4
0
    def create_from_dbpedia(cls):
        """Initialise the graph by combining list categories with list pages."""
        # add nodes and edges for listcategories
        nodes = list_store.get_listcategories()
        edges = set()
        for listcat in nodes:
            listcat_children = {
                child
                for child in cat_store.get_children(
                    listcat, include_listcategories=True) if child in nodes
            }
            edges.update({(listcat, child) for child in listcat_children})

        # add nodes and edges for listpages
        for listcat in list(nodes):
            listpages = {
                dbp_store.resolve_redirect(page)
                for page in cat_store.get_resources(listcat)
                if list_util.is_listpage(page)
            }
            listpages = {lp
                         for lp in listpages if list_util.is_listpage(lp)
                         }  # filter out redirects on non-listpages
            nodes.update(listpages)
            edges.update({(listcat, lp) for lp in listpages})

        # make sure that all listpages are in the graph
        nodes.update(list_store.get_listpages())

        # initialise graph
        graph = nx.DiGraph(incoming_graph_data=list(edges))
        graph.add_nodes_from(
            list({n
                  for n in nodes.difference(set(graph.nodes))}))
        list_graph = ListGraph(graph)

        for node in graph.nodes:
            list_graph._set_name(node, list_util.list2name(node))
            list_graph._set_parts(node, {node})

        # add root node
        graph.add_node(list_graph.root_node)
        list_graph._set_name(list_graph.root_node,
                             cat_util.category2name(list_graph.root_node))
        list_graph._set_parts(list_graph.root_node, {list_graph.root_node})

        return list_graph
Example #5
0
 def rejects_resource(self, dbp_resource: str) -> bool:
     if not dbp_store.is_functional(self.predicate):
         return False
     props = dbp_store.get_properties(dbp_resource)
     return self.predicate in props and self.value not in props[self.predicate] and dbp_store.resolve_redirect(self.value) not in props[self.predicate]
Example #6
0
 def accepts_resource(self, dbp_resource: str) -> bool:
     props = dbp_store.get_properties(dbp_resource)
     return self.predicate in props and (self.value in props[self.predicate] or dbp_store.resolve_redirect(self.value) in props[self.predicate])