def calc_v1_matrix(self, v0): from_list = [] to_list = [] value_list = [] max_id = 0 wds = WikipediaDataset() for from_entity_id in v0: link_to = wds.get_links_to(from_entity_id) for v in link_to: from_list.append(from_entity_id) to_list.append(v) value_list.append(1) if v > max_id: max_id = v link_from = set(wds.get_links_to(from_entity_id)) for v in link_from: from_list.append(v) to_list.append(from_entity_id) value_list.append(1) mtx = sparse.coo_matrix((value_list, (from_list, to_list)), shape=(max_id + 1, max_id + 1)) full_set = set(to_list) full_set.update(from_list) return mtx, full_set
def calc_v1_matrix(self, entity_id_list): # find links that are within the set of nodes we are passed, and # all those in bound to them, and out bound from them from_list = [] to_list = [] value_list = [] max_id = 0 wds = WikipediaDataset() for from_entity_id in entity_id_list: link_to = wds.get_links_to(from_entity_id) for v in link_to: from_list.append(from_entity_id) to_list.append(v) value_list.append(1) if v > max_id: max_id = v link_from = set(wds.get_links_from(from_entity_id)) for v in link_from: from_list.append(v) to_list.append(from_entity_id) value_list.append(1) if v > max_id: max_id = v # TODO The following line threw a Value error (row index exceeds matrix dimentions) here on docid 579, and docid 105 try: mtx = sparse.coo_matrix((value_list, (from_list, to_list)), shape=(max_id + 1, max_id + 1)) pass except ValueError as e: self.logger.warning( 'An error occurred returning None rather that a V1 matrix. %s', e) return None return mtx
def get_links_totally_within(self, entity_id_list): from_list = [] to_list = [] value_list = [] v0_vertice_set = set(entity_id_list) wds = WikipediaDataset() for entity_id in v0_vertice_set: links_to = wds.get_links_to(entity_id) for link_to in links_to: if link_to in v0_vertice_set: to_list.append(entity_id) from_list.append(link_to) value_list.append(1) return from_list, to_list, value_list