def _links(db, node): """Gets the links to and from a node""" links = set(node.links) for link_id in db.index("links_index", "get_ids", node.id, model.account_key(node.owner)): links.update(link_id) return links
def map(self, db, obj_id, obj): if 'links' in obj and 'type' in obj and obj['type'] == 'node': for link_uri in obj.links: link_hash = scarecrow.ident(model.node_key(link_uri)) link_obj = self.model[link_hash] if link_obj == None: continue link_owner = scarecrow.ident(model.account_key(link_obj.owner)) db.execute("INSERT INTO " + self.name + " VALUES (%s, %s, %s)", obj_id, link_hash, link_owner)
def delete_to(self, uri): hash = scarecrow.ident(model.node_key(uri)) results = False #Iterate through all the linked nodes and delete the link if it still #exists for node in self.db.index('links_index', 'get', hash, model.account_key(self.current_user)): if not uri in node.links: continue results = True del node.links[uri] self.db[model.node_key(node.id)] = node #If no changes were made, the node might not exist; throw a not found #if it doesn't if not results and not node in self.db: raise web.HTTPError(404, 'could not find node')
def candidates(node_store, root, max_visit): """ Returns a set of candidates that could be used by recommendation algorithms for a given node. It is a list of sub-lists, where each sub-list contains the uri and resized weight. """ owner = scarecrow.ident(model.account_key(root.owner)) candidates = {} # Store a list of already visited links so we don't revisit them visited_links = set([uri for uri in root.links]) visited_links.add(root.id) # Store a list of already visited nodes so we don't revisit them visited_nodes = set(root.id) # A queue of nodes to process queue = [[uri, root.links[uri].weight, 1] for uri in root.links] # Keep processing all the items in the queue until we reach max_visit to # ensure that the recommendations are returned quickly enough if there are # a lot of candidates while max_visit > 0: next_queue = [] next_visited_links = set([]) # Process all nodes in the current queue for uri, weight, count in queue: if max_visit <= 0: break if uri in visited_nodes: continue hash = model.node_key(uri) node = node_store[hash] # Visit each outbound link in the currently processed node for link_uri in node.links: link_weight = node.links[link_uri].weight _visit(candidates, visited_links, next_visited_links, next_queue, link_uri, weight, link_weight, count) # Visit each inbound link to the currently processed node for link_node in node_store.db.index("links_index", "get", hash, owner): if uri in link_node.links: link_uri = link_node.id link_weight = weight + link_node.links[uri].weight _visit( candidates, visited_links, next_visited_links, next_queue, link_uri, weight, link_weight, count ) max_visit -= 1 visited_nodes.add(uri) # Skip any further logic if we've processed the maximum number of nodes if max_visit <= 0 or len(queue) == 0: break queue = next_queue visited_links.update(next_visited_links) # Each node has been potentially visited multiple times. Average out the # scores to create an overall weight for uri in candidates: weight, count = candidates[uri] candidates[uri] = weight / count # Store the results in the cache root._cache.candidates = candidates node_store.db[model.node_key(root.id)] = root return candidates