Ejemplo n.º 1
def recommendations(node_store, node, tags, settings):
    """Gets the recommendations for a given node"""
    max_nodes = settings["max_nodes"]
    max_visit = settings["max_visit"]
    min_threshold = settings["min_threshold"]

    recommended = []

    # Get a list of potential candidates for the node
    for candidate_uri in candidates(node_store, node, max_visit):
        to_node = node_store[model.node_key(candidate_uri)]

        # Skip the candidate if it doesn't have any of the requested tags
        if not has_any_tag(to_node, tags):

        # Get the recommendation score between the two nodes and add it to the
        # list if it is high enough
        score = recommendation(node_store, node, to_node, settings)
        if score > min_threshold:
            recommended.append([candidate_uri, score])

    # Return a sorted list of recommendations; ensure there are at most
    # max_nodes recommendations
    return recommended[:max_nodes]
Ejemplo n.º 2
 def get_from(self, uri):
         node = self.db[model.node_key(uri)]
     except KeyError:
         #Return a not found if the node doesn't exist
         raise web.HTTPError(404, 'could not find node')
     serialize(self, node.links)
Ejemplo n.º 3
 def get(self, from_uri, to_uri):
     node_store = cache.NodeStore(self.db, 4)
         from_node = node_store[model.node_key(from_uri)]
     except KeyError:
         raise web.HTTPError(404, 'could not find from node')
         to_node = node_store[model.node_key(to_uri)]
     except KeyError:
         raise web.HTTPError(404, 'could not find to node')
     rec = engine.recommendation(node_store, from_node, to_node, self.application.settings)
     if not rec:
         rec = 0.0
     serialize(self, rec)
Ejemplo n.º 4
 def delete_to(self, uri):
     hash = scarecrow.ident(model.node_key(uri))
     results = False
     #Iterate through all the linked nodes and delete the link if it still
     for node in self.db.index('links_index', 'get', hash, model.account_key(self.current_user)):
         if not uri in node.links:
         results = True
         del node.links[uri]
         self.db[model.node_key(node.id)] = node
     #If no changes were made, the node might not exist; throw a not found
     #if it doesn't
     if not results and not node in self.db:
         raise web.HTTPError(404, 'could not find node')
Ejemplo n.º 5
 def get(self, uri):
     hash = scarecrow.ident(model.node_key(uri))
     node = request.db[hash]
         node = self.db[hash]
     except KeyError:
         raise web.HTTPError(404)       
     serialize(self, node.tags)
Ejemplo n.º 6
 def map(self, db, obj_id, obj):
     if 'links' in obj and 'type' in obj and obj['type'] == 'node':
         for link_uri in obj.links:
             link_hash = scarecrow.ident(model.node_key(link_uri))
             link_obj = self.model[link_hash]
             if link_obj == None: continue
             link_owner = scarecrow.ident(model.account_key(link_obj.owner))
             db.execute("INSERT INTO " + self.name + " VALUES (%s, %s, %s)", obj_id, link_hash, link_owner)
Ejemplo n.º 7
 def put(self, from_node, to_node):
     from_hash = scarecrow.ident(model.node_key(from_node))
     to_hash = scarecrow.ident(model.node_key(to_node))
     weight = util.check_weight(self.get_argument('weight', None))
     tags = util.check_tags(self.get_argument('tags', None))
         node = self.db[from_hash]
     except KeyError:
         #Return a not found if the node doesn't exist
         raise web.HTTPError(404, 'could not find from node')
     if not to_hash in self.db:
         #Return a not found if the node doesn't exist
         raise web.HTTPError(404, 'could not find to node')
     #Return a forbidden if the current user doesn't own the node
     if node.owner != self.current_user:
         raise web.HTTPError(403, 'you do not own the from node')
     if to_node in node.links:
         #Update the link if it already exists
         link = node.links[to_node]
         if weight != None:
             link.weight = weight
         if tags:
             link.tags = tags
         #Require the weight parameter if the link doesn't exist yet
         if weight == None:
             raise web.HTTPError(400, "requires 'weight' parameter")
         #Create a new link if it doesn't exist yet
         link = model.Storage()
         node.links[to_node] = link
         link.weight = weight
         link.tags = tags if tags else set([])
     link.update_date = datetime.now()
     self.db[from_hash] = node
     serialize(self, link)
Ejemplo n.º 8
 def delete(self, uri):
     hash = scarecrow.ident(model.node_key(uri))
         node = self.db[hash]
     except KeyError:    
         #Return a not found if the node doesn't exist
         raise web.HTTPError(404, 'could not find node')
     #Return a forbidden if the current user doesn't own the node
     if node.owner != self.current_user:
         raise web.HTTPError(403, 'you do not own the node')
     #Iterate through each linked node and delete the link
     for link_node in self.db.index('links_index', 'get', hash):
         if uri in link_node.links:
             del link_node.links[uri]
         self.db[model.node_key(link_node.id)] = link_node
     del self.db[hash]
Ejemplo n.º 9
 def get(self, uri):
     max_visit = self.application.settings['max_visit']
     tags = util.check_tags(self.get_argument('tags', ''))
     node_store = cache.NodeStore(self.db, max_visit + 1)
         from_node = node_store[model.node_key(uri)]
     except KeyError:
         raise web.HTTPError(404, 'could not find node')
     recs = engine.recommendations(node_store, from_node, tags, self.application.settings)
     serialize(self, recs)
Ejemplo n.º 10
 def get(self, from_node, to_node):
         node = self.db[model.node_key(from_node)]
     except KeyError:
         #Return a not found if the node doesn't exist
         raise web.HTTPError(404, 'could not find node')
         link = node.links[to_node]
     except KeyError:
         #Return a not found if the link doesn't exist
         raise web.HTTPError(404, 'could not find link')
     serialize(self, link)
Ejemplo n.º 11
 def delete_from(self, uri):
     hash = scarecrow.ident(model.node_key(uri))
         node = self.db[hash]
     except KeyError:
         #Return a not found if the node doesn't exist
         raise web.HTTPError(404, 'could not find node')
     #Return a forbidden if the current user doesn't own the node
     if node.owner != self.current_user:
         raise web.HTTPError(403, 'you do not own the node')
     node.links = {}
     self.db[hash] = node
Ejemplo n.º 12
def bridging(node_store, node):
    Gets the bridging score of a node. This is based on the TANGENT algorithm.
    score = 0.0
    total = 0

    for first_uri in node._cache.candidates:
        first_node = node_store[model.node_key(first_uri)]

        for second_uri in node._cache.candidates:
            if second_uri in first_node.links:
                score += first_node.links[second_uri].weight
                total += 1
            elif second_uri in first_node._cache.candidates:
                score += first_node._cache.candidates[second_uri]
                total += 1

    score = 1 / (score / total) if total > 0 else 0.0
    return score
Ejemplo n.º 13
 def delete(self, from_node, to_node):
     from_hash = scarecrow.ident(model.node_key(from_node))
         node = self.db[from_hash]
     except KeyError:
         #Return a not found if the node doesn't exist
         raise web.HTTPError(404, 'could not find node')
     #Return a forbidden if the current user doesn't own the node
     if node.owner != self.current_user:
         raise web.HTTPError(403, 'you do not own the from node')
     if to_node in node.links:
         del node.links[to_node]
         #Return a not found if the link doesn't exist
         raise web.HTTPError(404, 'could not find link')
     self.db[from_hash] = node
Ejemplo n.º 14
 def get_to(self, uri):
     hash = scarecrow.ident(model.node_key(uri))
     nodes = self.db.index('links_index', 'get', hash)
     links = {}
     #Iterate through all the linked nodes and ensure the link still exists
     #since the index could be stale
     for node in nodes:
             link = node.links[uri]
             links[node.id] = link
     #If there were no results, check to see that the node exists; if not,
     #return a not found
     if len(links) == 0 and not hash in self.db:
         raise web.HTTPError(404, 'could not find node')
     serialize(self, links)
Ejemplo n.º 15
 def put(self, uri):
     hash = scarecrow.ident(model.node_key(uri))
     tags = util.check_tags(self.get_argument('tags', None))
     if not tags:
         raise web.HTTPError(400, "requires 'tags' parameter")
         node = self.db[hash]
     except KeyError:
         #return a not found if the node doesn't exist
         raise web.HTTPError(404)
     if node.owner != self.current_user:
         #return a forbidden if the current user doesn't own the node
         raise web.HTTPError(403)
     for tag in tags:
     self.db[hash] = node
     serialize(self, tags)
Ejemplo n.º 16
def put_node(request, uri):
    """Updates an existing or creates a new node identified by the given URI"""
    hash = scarecrow.ident(model.node_key(uri))
    tags = util.check_tags(request.get_argument('tags', None))
    date = util.check_datetime(request.get_argument('creation_date', None))
        node = request.db[hash]
        #Update an existing node
        if node.owner != request.current_user:
            raise web.HTTPError(403, 'you do not own the node')
        if tags:
            node.tags = tags
        if date:
            node.creation_date = date
    except KeyError:
        if not tags:
            tags = set([])
        if not date:
            date = datetime.now()
        #Create a new node if it doesn't exist
        node = model.Entity(uri, 'node')
        node.owner = request.current_user
        node.creation_date = date
        node.tags = tags
        node.links = {}
        node._cache = model.Storage()
        node._cache.candidates = model.Storage()
        node._cache.expired = False
    node.update_date = datetime.now()
    request.db[hash] = node
    serialize(request, node)
Ejemplo n.º 17
 def delete(self, uri):
     hash = scarecrow.ident(model.node_key(uri))
     delete_tags = util.check_tags(self.get_argument('tags', None))
         node = self.db[hash]
     except KeyError:
         #return a not found if the node doesn't exist
         raise web.HTTPError(404)
     if node['owner'] != self.current_user:
         #return a forbidden if the current user doesn't own the node
         raise web.HTTPError(403)
         if delete_tags == None:
             node.tags = ([])
             for tag in delete_tags:
     except KeyError:
         raise web.HTTPError(404)
     self.db[hash] = node
Ejemplo n.º 18
def candidates(node_store, root, max_visit):
    Returns a set of candidates that could be used by recommendation algorithms
    for a given node. It is a list of sub-lists, where each sub-list contains
    the uri and resized weight.
    owner = scarecrow.ident(model.account_key(root.owner))
    candidates = {}

    # Store a list of already visited links so we don't revisit them
    visited_links = set([uri for uri in root.links])

    # Store a list of already visited nodes so we don't revisit them
    visited_nodes = set(root.id)

    # A queue of nodes to process
    queue = [[uri, root.links[uri].weight, 1] for uri in root.links]

    # Keep processing all the items in the queue until we reach max_visit to
    # ensure that the recommendations are returned quickly enough if there are
    # a lot of candidates
    while max_visit > 0:
        next_queue = []
        next_visited_links = set([])

        # Process all nodes in the current queue
        for uri, weight, count in queue:
            if max_visit <= 0:
            if uri in visited_nodes:

            hash = model.node_key(uri)
            node = node_store[hash]

            # Visit each outbound link in the currently processed node
            for link_uri in node.links:
                link_weight = node.links[link_uri].weight
                _visit(candidates, visited_links, next_visited_links, next_queue, link_uri, weight, link_weight, count)

            # Visit each inbound link to the currently processed node
            for link_node in node_store.db.index("links_index", "get", hash, owner):
                if uri in link_node.links:
                    link_uri = link_node.id
                    link_weight = weight + link_node.links[uri].weight
                        candidates, visited_links, next_visited_links, next_queue, link_uri, weight, link_weight, count

            max_visit -= 1

        # Skip any further logic if we've processed the maximum number of nodes
        if max_visit <= 0 or len(queue) == 0:

        queue = next_queue

    # Each node has been potentially visited multiple times. Average out the
    # scores to create an overall weight
    for uri in candidates:
        weight, count = candidates[uri]
        candidates[uri] = weight / count

    # Store the results in the cache
    root._cache.candidates = candidates
    node_store.db[model.node_key(root.id)] = root

    return candidates