def get_listpages() -> set: """Return all list pages (with already resolved redirects).""" global __LISTPAGES__ if '__LISTPAGES__' not in globals(): __LISTPAGES__ = { dbp_store.resolve_redirect(lp) for lp in get_listpages_with_redirects() if list_util.is_listpage(dbp_store.resolve_redirect(lp)) } return __LISTPAGES__
def _parse_listpages() -> dict: parsed_listpages = {} for resource, content in wikipedia.get_parsed_articles().items(): if not list_util.is_listpage(resource): continue if resource != dbp_store.resolve_redirect(resource): continue if not content or 'sections' not in content: continue parsed_listpages[resource] = content return parsed_listpages
def _get_resource_surface_scores(text): """Return resource lexicalisation scores for the given text.""" resource_surface_scores = {} if not text: return resource_surface_scores resource_surface_scores[text] = 1 direct_match = dbp_store.resolve_redirect(dbp_util.name2resource(text)) if direct_match in dbp_store.get_resources(): resource_surface_scores[direct_match] = 1 for surface_match, frequency in sorted(dbp_store.get_inverse_lexicalisations(text.lower()).items(), key=operator.itemgetter(1)): resource_surface_scores[surface_match] = frequency return resource_surface_scores
def create_from_dbpedia(cls): """Initialise the graph by combining list categories with list pages.""" # add nodes and edges for listcategories nodes = list_store.get_listcategories() edges = set() for listcat in nodes: listcat_children = { child for child in cat_store.get_children( listcat, include_listcategories=True) if child in nodes } edges.update({(listcat, child) for child in listcat_children}) # add nodes and edges for listpages for listcat in list(nodes): listpages = { dbp_store.resolve_redirect(page) for page in cat_store.get_resources(listcat) if list_util.is_listpage(page) } listpages = {lp for lp in listpages if list_util.is_listpage(lp) } # filter out redirects on non-listpages nodes.update(listpages) edges.update({(listcat, lp) for lp in listpages}) # make sure that all listpages are in the graph nodes.update(list_store.get_listpages()) # initialise graph graph = nx.DiGraph(incoming_graph_data=list(edges)) graph.add_nodes_from( list({n for n in nodes.difference(set(graph.nodes))})) list_graph = ListGraph(graph) for node in graph.nodes: list_graph._set_name(node, list_util.list2name(node)) list_graph._set_parts(node, {node}) # add root node graph.add_node(list_graph.root_node) list_graph._set_name(list_graph.root_node, cat_util.category2name(list_graph.root_node)) list_graph._set_parts(list_graph.root_node, {list_graph.root_node}) return list_graph
def rejects_resource(self, dbp_resource: str) -> bool: if not dbp_store.is_functional(self.predicate): return False props = dbp_store.get_properties(dbp_resource) return self.predicate in props and self.value not in props[self.predicate] and dbp_store.resolve_redirect(self.value) not in props[self.predicate]
def accepts_resource(self, dbp_resource: str) -> bool: props = dbp_store.get_properties(dbp_resource) return self.predicate in props and (self.value in props[self.predicate] or dbp_store.resolve_redirect(self.value) in props[self.predicate])