def recommendations(node_store, node, tags, settings): """Gets the recommendations for a given node""" max_nodes = settings["max_nodes"] max_visit = settings["max_visit"] min_threshold = settings["min_threshold"] recommended = [] # Get a list of potential candidates for the node for candidate_uri in candidates(node_store, node, max_visit): to_node = node_store[model.node_key(candidate_uri)] # Skip the candidate if it doesn't have any of the requested tags if not has_any_tag(to_node, tags): continue # Get the recommendation score between the two nodes and add it to the # list if it is high enough score = recommendation(node_store, node, to_node, settings) if score > min_threshold: recommended.append([candidate_uri, score]) # Return a sorted list of recommendations; ensure there are at most # max_nodes recommendations recommended.sort(cmp=_recommendation_comparator) return recommended[:max_nodes]
def get_from(self, uri): try: node = self.db[model.node_key(uri)] except KeyError: #Return a not found if the node doesn't exist raise web.HTTPError(404, 'could not find node') serialize(self, node.links)
def get(self, from_uri, to_uri): node_store = cache.NodeStore(self.db, 4) try: from_node = node_store[model.node_key(from_uri)] except KeyError: raise web.HTTPError(404, 'could not find from node') try: to_node = node_store[model.node_key(to_uri)] except KeyError: raise web.HTTPError(404, 'could not find to node') rec = engine.recommendation(node_store, from_node, to_node, self.application.settings) if not rec: rec = 0.0 serialize(self, rec)
def delete_to(self, uri): hash = scarecrow.ident(model.node_key(uri)) results = False #Iterate through all the linked nodes and delete the link if it still #exists for node in self.db.index('links_index', 'get', hash, model.account_key(self.current_user)): if not uri in node.links: continue results = True del node.links[uri] self.db[model.node_key(node.id)] = node #If no changes were made, the node might not exist; throw a not found #if it doesn't if not results and not node in self.db: raise web.HTTPError(404, 'could not find node')
def get(self, uri): hash = scarecrow.ident(model.node_key(uri)) node = request.db[hash] try: node = self.db[hash] except KeyError: raise web.HTTPError(404) serialize(self, node.tags)
def map(self, db, obj_id, obj): if 'links' in obj and 'type' in obj and obj['type'] == 'node': for link_uri in obj.links: link_hash = scarecrow.ident(model.node_key(link_uri)) link_obj = self.model[link_hash] if link_obj == None: continue link_owner = scarecrow.ident(model.account_key(link_obj.owner)) db.execute("INSERT INTO " + self.name + " VALUES (%s, %s, %s)", obj_id, link_hash, link_owner)
def put(self, from_node, to_node): from_hash = scarecrow.ident(model.node_key(from_node)) to_hash = scarecrow.ident(model.node_key(to_node)) weight = util.check_weight(self.get_argument('weight', None)) tags = util.check_tags(self.get_argument('tags', None)) try: node = self.db[from_hash] except KeyError: #Return a not found if the node doesn't exist raise web.HTTPError(404, 'could not find from node') if not to_hash in self.db: #Return a not found if the node doesn't exist raise web.HTTPError(404, 'could not find to node') #Return a forbidden if the current user doesn't own the node if node.owner != self.current_user: raise web.HTTPError(403, 'you do not own the from node') if to_node in node.links: #Update the link if it already exists link = node.links[to_node] if weight != None: link.weight = weight if tags: link.tags = tags else: #Require the weight parameter if the link doesn't exist yet if weight == None: raise web.HTTPError(400, "requires 'weight' parameter") #Create a new link if it doesn't exist yet link = model.Storage() node.links[to_node] = link link.weight = weight link.tags = tags if tags else set([]) link.update_date = datetime.now() self.db[from_hash] = node serialize(self, link)
def delete(self, uri): hash = scarecrow.ident(model.node_key(uri)) try: node = self.db[hash] except KeyError: #Return a not found if the node doesn't exist raise web.HTTPError(404, 'could not find node') #Return a forbidden if the current user doesn't own the node if node.owner != self.current_user: raise web.HTTPError(403, 'you do not own the node') #Iterate through each linked node and delete the link for link_node in self.db.index('links_index', 'get', hash): if uri in link_node.links: del link_node.links[uri] self.db[model.node_key(link_node.id)] = link_node del self.db[hash]
def get(self, uri): max_visit = self.application.settings['max_visit'] tags = util.check_tags(self.get_argument('tags', '')) node_store = cache.NodeStore(self.db, max_visit + 1) try: from_node = node_store[model.node_key(uri)] except KeyError: raise web.HTTPError(404, 'could not find node') recs = engine.recommendations(node_store, from_node, tags, self.application.settings) serialize(self, recs)
def get(self, from_node, to_node): try: node = self.db[model.node_key(from_node)] except KeyError: #Return a not found if the node doesn't exist raise web.HTTPError(404, 'could not find node') try: link = node.links[to_node] except KeyError: #Return a not found if the link doesn't exist raise web.HTTPError(404, 'could not find link') serialize(self, link)
def delete_from(self, uri): hash = scarecrow.ident(model.node_key(uri)) try: node = self.db[hash] except KeyError: #Return a not found if the node doesn't exist raise web.HTTPError(404, 'could not find node') #Return a forbidden if the current user doesn't own the node if node.owner != self.current_user: raise web.HTTPError(403, 'you do not own the node') node.links = {} self.db[hash] = node
def bridging(node_store, node): """ Gets the bridging score of a node. This is based on the TANGENT algorithm. """ score = 0.0 total = 0 for first_uri in node._cache.candidates: first_node = node_store[model.node_key(first_uri)] for second_uri in node._cache.candidates: if second_uri in first_node.links: score += first_node.links[second_uri].weight total += 1 elif second_uri in first_node._cache.candidates: score += first_node._cache.candidates[second_uri] total += 1 score = 1 / (score / total) if total > 0 else 0.0 return score
def delete(self, from_node, to_node): from_hash = scarecrow.ident(model.node_key(from_node)) try: node = self.db[from_hash] except KeyError: #Return a not found if the node doesn't exist raise web.HTTPError(404, 'could not find node') #Return a forbidden if the current user doesn't own the node if node.owner != self.current_user: raise web.HTTPError(403, 'you do not own the from node') if to_node in node.links: del node.links[to_node] else: #Return a not found if the link doesn't exist raise web.HTTPError(404, 'could not find link') self.db[from_hash] = node
def get_to(self, uri): hash = scarecrow.ident(model.node_key(uri)) nodes = self.db.index('links_index', 'get', hash) links = {} #Iterate through all the linked nodes and ensure the link still exists #since the index could be stale for node in nodes: try: link = node.links[uri] links[node.id] = link except: pass #If there were no results, check to see that the node exists; if not, #return a not found if len(links) == 0 and not hash in self.db: raise web.HTTPError(404, 'could not find node') serialize(self, links)
def put(self, uri): hash = scarecrow.ident(model.node_key(uri)) tags = util.check_tags(self.get_argument('tags', None)) if not tags: raise web.HTTPError(400, "requires 'tags' parameter") try: node = self.db[hash] except KeyError: #return a not found if the node doesn't exist raise web.HTTPError(404) if node.owner != self.current_user: #return a forbidden if the current user doesn't own the node raise web.HTTPError(403) for tag in tags: node.tags.add(tag) self.db[hash] = node serialize(self, tags)
def put_node(request, uri): """Updates an existing or creates a new node identified by the given URI""" hash = scarecrow.ident(model.node_key(uri)) tags = util.check_tags(request.get_argument('tags', None)) date = util.check_datetime(request.get_argument('creation_date', None)) try: node = request.db[hash] #Update an existing node if node.owner != request.current_user: raise web.HTTPError(403, 'you do not own the node') if tags: node.tags = tags if date: node.creation_date = date except KeyError: if not tags: tags = set([]) if not date: date = datetime.now() #Create a new node if it doesn't exist node = model.Entity(uri, 'node') node.owner = request.current_user node.creation_date = date node.tags = tags node.links = {} node._cache = model.Storage() node._cache.candidates = model.Storage() node._cache.expired = False node.update_date = datetime.now() request.db[hash] = node serialize(request, node)
def delete(self, uri): hash = scarecrow.ident(model.node_key(uri)) delete_tags = util.check_tags(self.get_argument('tags', None)) try: node = self.db[hash] except KeyError: #return a not found if the node doesn't exist raise web.HTTPError(404) if node['owner'] != self.current_user: #return a forbidden if the current user doesn't own the node raise web.HTTPError(403) try: if delete_tags == None: node.tags = ([]) else: for tag in delete_tags: node.tags.remove(tag) except KeyError: raise web.HTTPError(404) self.db[hash] = node
def candidates(node_store, root, max_visit): """ Returns a set of candidates that could be used by recommendation algorithms for a given node. It is a list of sub-lists, where each sub-list contains the uri and resized weight. """ owner = scarecrow.ident(model.account_key(root.owner)) candidates = {} # Store a list of already visited links so we don't revisit them visited_links = set([uri for uri in root.links]) visited_links.add(root.id) # Store a list of already visited nodes so we don't revisit them visited_nodes = set(root.id) # A queue of nodes to process queue = [[uri, root.links[uri].weight, 1] for uri in root.links] # Keep processing all the items in the queue until we reach max_visit to # ensure that the recommendations are returned quickly enough if there are # a lot of candidates while max_visit > 0: next_queue = [] next_visited_links = set([]) # Process all nodes in the current queue for uri, weight, count in queue: if max_visit <= 0: break if uri in visited_nodes: continue hash = model.node_key(uri) node = node_store[hash] # Visit each outbound link in the currently processed node for link_uri in node.links: link_weight = node.links[link_uri].weight _visit(candidates, visited_links, next_visited_links, next_queue, link_uri, weight, link_weight, count) # Visit each inbound link to the currently processed node for link_node in node_store.db.index("links_index", "get", hash, owner): if uri in link_node.links: link_uri = link_node.id link_weight = weight + link_node.links[uri].weight _visit( candidates, visited_links, next_visited_links, next_queue, link_uri, weight, link_weight, count ) max_visit -= 1 visited_nodes.add(uri) # Skip any further logic if we've processed the maximum number of nodes if max_visit <= 0 or len(queue) == 0: break queue = next_queue visited_links.update(next_visited_links) # Each node has been potentially visited multiple times. Average out the # scores to create an overall weight for uri in candidates: weight, count = candidates[uri] candidates[uri] = weight / count # Store the results in the cache root._cache.candidates = candidates node_store.db[model.node_key(root.id)] = root return candidates