コード例 #1
0
ファイル: engine.py プロジェクト: heliumpigs/snowball
def recommendations(node_store, node, tags, settings):
    """Gets the recommendations for a given node"""
    max_nodes = settings["max_nodes"]
    max_visit = settings["max_visit"]
    min_threshold = settings["min_threshold"]

    recommended = []

    # Get a list of potential candidates for the node
    for candidate_uri in candidates(node_store, node, max_visit):
        to_node = node_store[model.node_key(candidate_uri)]

        # Skip the candidate if it doesn't have any of the requested tags
        if not has_any_tag(to_node, tags):
            continue

        # Get the recommendation score between the two nodes and add it to the
        # list if it is high enough
        score = recommendation(node_store, node, to_node, settings)
        if score > min_threshold:
            recommended.append([candidate_uri, score])

    # Return a sorted list of recommendations; ensure there are at most
    # max_nodes recommendations
    recommended.sort(cmp=_recommendation_comparator)
    return recommended[:max_nodes]
コード例 #2
0
ファイル: links.py プロジェクト: heliumpigs/snowball
 def get_from(self, uri):
     try:
         node = self.db[model.node_key(uri)]
     except KeyError:
         #Return a not found if the node doesn't exist
         raise web.HTTPError(404, 'could not find node')
     
     serialize(self, node.links)
コード例 #3
0
 def get(self, from_uri, to_uri):
     node_store = cache.NodeStore(self.db, 4)
     
     try:
         from_node = node_store[model.node_key(from_uri)]
     except KeyError:
         raise web.HTTPError(404, 'could not find from node')
     
     try:
         to_node = node_store[model.node_key(to_uri)]
     except KeyError:
         raise web.HTTPError(404, 'could not find to node')
     
     rec = engine.recommendation(node_store, from_node, to_node, self.application.settings)
     if not rec:
         rec = 0.0
     
     serialize(self, rec)
コード例 #4
0
ファイル: links.py プロジェクト: heliumpigs/snowball
 def delete_to(self, uri):
     hash = scarecrow.ident(model.node_key(uri))
     results = False
     
     #Iterate through all the linked nodes and delete the link if it still
     #exists
     for node in self.db.index('links_index', 'get', hash, model.account_key(self.current_user)):
         if not uri in node.links:
             continue
         
         results = True
         
         del node.links[uri]
         self.db[model.node_key(node.id)] = node
            
     #If no changes were made, the node might not exist; throw a not found
     #if it doesn't
     if not results and not node in self.db:
         raise web.HTTPError(404, 'could not find node')
コード例 #5
0
ファイル: tags.py プロジェクト: heliumpigs/snowball
 def get(self, uri):
     hash = scarecrow.ident(model.node_key(uri))
     node = request.db[hash]
     
     try:
         node = self.db[hash]
     except KeyError:
         raise web.HTTPError(404)       
     
     serialize(self, node.tags)
コード例 #6
0
ファイル: mysql.py プロジェクト: heliumpigs/snowball
 def map(self, db, obj_id, obj):
     if 'links' in obj and 'type' in obj and obj['type'] == 'node':
         for link_uri in obj.links:
             link_hash = scarecrow.ident(model.node_key(link_uri))
             link_obj = self.model[link_hash]
             
             if link_obj == None: continue
             link_owner = scarecrow.ident(model.account_key(link_obj.owner))
             
             db.execute("INSERT INTO " + self.name + " VALUES (%s, %s, %s)", obj_id, link_hash, link_owner)
コード例 #7
0
ファイル: links.py プロジェクト: heliumpigs/snowball
 def put(self, from_node, to_node):
     from_hash = scarecrow.ident(model.node_key(from_node))
     to_hash = scarecrow.ident(model.node_key(to_node))
     
     weight = util.check_weight(self.get_argument('weight', None))
     tags = util.check_tags(self.get_argument('tags', None))
     
     try:
         node = self.db[from_hash]
     except KeyError:
         #Return a not found if the node doesn't exist
         raise web.HTTPError(404, 'could not find from node')
         
     if not to_hash in self.db:
         #Return a not found if the node doesn't exist
         raise web.HTTPError(404, 'could not find to node')
     
     #Return a forbidden if the current user doesn't own the node
     if node.owner != self.current_user:
         raise web.HTTPError(403, 'you do not own the from node')
     
     if to_node in node.links:
         #Update the link if it already exists
         link = node.links[to_node]
         if weight != None:
             link.weight = weight
         if tags:
             link.tags = tags
     else:
         #Require the weight parameter if the link doesn't exist yet
         if weight == None:
             raise web.HTTPError(400, "requires 'weight' parameter")
         
         #Create a new link if it doesn't exist yet
         link = model.Storage()
         node.links[to_node] = link
         
         link.weight = weight
         link.tags = tags if tags else set([])
     
     link.update_date = datetime.now()
     self.db[from_hash] = node
     serialize(self, link)
コード例 #8
0
ファイル: nodes.py プロジェクト: heliumpigs/snowball
 def delete(self, uri):
     hash = scarecrow.ident(model.node_key(uri))
     
     try:
         node = self.db[hash]
     except KeyError:    
         #Return a not found if the node doesn't exist
         raise web.HTTPError(404, 'could not find node')
     
     #Return a forbidden if the current user doesn't own the node
     if node.owner != self.current_user:
         raise web.HTTPError(403, 'you do not own the node')
     
     #Iterate through each linked node and delete the link
     for link_node in self.db.index('links_index', 'get', hash):
         if uri in link_node.links:
             del link_node.links[uri]
         
         self.db[model.node_key(link_node.id)] = link_node
         
     del self.db[hash]
コード例 #9
0
 def get(self, uri):
     max_visit = self.application.settings['max_visit']
     
     tags = util.check_tags(self.get_argument('tags', ''))
     node_store = cache.NodeStore(self.db, max_visit + 1)
     
     try:
         from_node = node_store[model.node_key(uri)]
     except KeyError:
         raise web.HTTPError(404, 'could not find node')
     
     recs = engine.recommendations(node_store, from_node, tags, self.application.settings)
     serialize(self, recs)
コード例 #10
0
ファイル: links.py プロジェクト: heliumpigs/snowball
 def get(self, from_node, to_node):
     try:
         node = self.db[model.node_key(from_node)]
     except KeyError:
         #Return a not found if the node doesn't exist
         raise web.HTTPError(404, 'could not find node')
     
     try:
         link = node.links[to_node]
     except KeyError:
         #Return a not found if the link doesn't exist
         raise web.HTTPError(404, 'could not find link')
     
     serialize(self, link)
コード例 #11
0
ファイル: links.py プロジェクト: heliumpigs/snowball
 def delete_from(self, uri):
     hash = scarecrow.ident(model.node_key(uri))
     
     try:
         node = self.db[hash]
     except KeyError:
         #Return a not found if the node doesn't exist
         raise web.HTTPError(404, 'could not find node')
     
     #Return a forbidden if the current user doesn't own the node
     if node.owner != self.current_user:
         raise web.HTTPError(403, 'you do not own the node')
     
     node.links = {}
     self.db[hash] = node
コード例 #12
0
ファイル: engine.py プロジェクト: heliumpigs/snowball
def bridging(node_store, node):
    """
    Gets the bridging score of a node. This is based on the TANGENT algorithm.
    """
    score = 0.0
    total = 0

    for first_uri in node._cache.candidates:
        first_node = node_store[model.node_key(first_uri)]

        for second_uri in node._cache.candidates:
            if second_uri in first_node.links:
                score += first_node.links[second_uri].weight
                total += 1
            elif second_uri in first_node._cache.candidates:
                score += first_node._cache.candidates[second_uri]
                total += 1

    score = 1 / (score / total) if total > 0 else 0.0
    return score
コード例 #13
0
ファイル: links.py プロジェクト: heliumpigs/snowball
 def delete(self, from_node, to_node):
     from_hash = scarecrow.ident(model.node_key(from_node))
     
     try:
         node = self.db[from_hash]
     except KeyError:
         #Return a not found if the node doesn't exist
         raise web.HTTPError(404, 'could not find node')
     
     #Return a forbidden if the current user doesn't own the node
     if node.owner != self.current_user:
         raise web.HTTPError(403, 'you do not own the from node')
     
     if to_node in node.links:
         del node.links[to_node]
     else:
         #Return a not found if the link doesn't exist
         raise web.HTTPError(404, 'could not find link')
     
     self.db[from_hash] = node
コード例 #14
0
ファイル: links.py プロジェクト: heliumpigs/snowball
 def get_to(self, uri):
     hash = scarecrow.ident(model.node_key(uri))
     
     nodes = self.db.index('links_index', 'get', hash)
     links = {}
     
     #Iterate through all the linked nodes and ensure the link still exists
     #since the index could be stale
     for node in nodes:
         try:
             link = node.links[uri]
             links[node.id] = link
         except:
             pass
     
     #If there were no results, check to see that the node exists; if not,
     #return a not found
     if len(links) == 0 and not hash in self.db:
         raise web.HTTPError(404, 'could not find node')
             
     serialize(self, links)
コード例 #15
0
ファイル: tags.py プロジェクト: heliumpigs/snowball
 def put(self, uri):
     hash = scarecrow.ident(model.node_key(uri))
     
     tags = util.check_tags(self.get_argument('tags', None))
     if not tags:
         raise web.HTTPError(400, "requires 'tags' parameter")
     
     try:
         node = self.db[hash]
     except KeyError:
         #return a not found if the node doesn't exist
         raise web.HTTPError(404)
         
     if node.owner != self.current_user:
         #return a forbidden if the current user doesn't own the node
         raise web.HTTPError(403)
     
     for tag in tags:
         node.tags.add(tag)
     
     self.db[hash] = node
     serialize(self, tags)
コード例 #16
0
ファイル: nodes.py プロジェクト: heliumpigs/snowball
def put_node(request, uri):
    """Updates an existing or creates a new node identified by the given URI"""
    hash = scarecrow.ident(model.node_key(uri))
    tags = util.check_tags(request.get_argument('tags', None))
    date = util.check_datetime(request.get_argument('creation_date', None))
    
    try:
        node = request.db[hash]
        
        #Update an existing node
        if node.owner != request.current_user:
            raise web.HTTPError(403, 'you do not own the node')
        if tags:
            node.tags = tags
        if date:
            node.creation_date = date
    except KeyError:
        if not tags:
            tags = set([])
        if not date:
            date = datetime.now()
        
        #Create a new node if it doesn't exist
        node = model.Entity(uri, 'node')
        node.owner = request.current_user
        node.creation_date = date
        node.tags = tags
        node.links = {}
        
        node._cache = model.Storage()
        node._cache.candidates = model.Storage()
        node._cache.expired = False
    
    node.update_date = datetime.now()
    request.db[hash] = node
    serialize(request, node)
コード例 #17
0
ファイル: tags.py プロジェクト: heliumpigs/snowball
 def delete(self, uri):
     hash = scarecrow.ident(model.node_key(uri))
     delete_tags = util.check_tags(self.get_argument('tags', None))
     
     try:
         node = self.db[hash]
     except KeyError:
         #return a not found if the node doesn't exist
         raise web.HTTPError(404)
     
     if node['owner'] != self.current_user:
         #return a forbidden if the current user doesn't own the node
         raise web.HTTPError(403)
     
     try:
         if delete_tags == None:
             node.tags = ([])
         else:
             for tag in delete_tags:
                 node.tags.remove(tag)
     except KeyError:
         raise web.HTTPError(404)
         
     self.db[hash] = node
コード例 #18
0
ファイル: engine.py プロジェクト: heliumpigs/snowball
def candidates(node_store, root, max_visit):
    """
    Returns a set of candidates that could be used by recommendation algorithms
    for a given node. It is a list of sub-lists, where each sub-list contains
    the uri and resized weight.
    """
    owner = scarecrow.ident(model.account_key(root.owner))
    candidates = {}

    # Store a list of already visited links so we don't revisit them
    visited_links = set([uri for uri in root.links])
    visited_links.add(root.id)

    # Store a list of already visited nodes so we don't revisit them
    visited_nodes = set(root.id)

    # A queue of nodes to process
    queue = [[uri, root.links[uri].weight, 1] for uri in root.links]

    # Keep processing all the items in the queue until we reach max_visit to
    # ensure that the recommendations are returned quickly enough if there are
    # a lot of candidates
    while max_visit > 0:
        next_queue = []
        next_visited_links = set([])

        # Process all nodes in the current queue
        for uri, weight, count in queue:
            if max_visit <= 0:
                break
            if uri in visited_nodes:
                continue

            hash = model.node_key(uri)
            node = node_store[hash]

            # Visit each outbound link in the currently processed node
            for link_uri in node.links:
                link_weight = node.links[link_uri].weight
                _visit(candidates, visited_links, next_visited_links, next_queue, link_uri, weight, link_weight, count)

            # Visit each inbound link to the currently processed node
            for link_node in node_store.db.index("links_index", "get", hash, owner):
                if uri in link_node.links:
                    link_uri = link_node.id
                    link_weight = weight + link_node.links[uri].weight
                    _visit(
                        candidates, visited_links, next_visited_links, next_queue, link_uri, weight, link_weight, count
                    )

            max_visit -= 1
            visited_nodes.add(uri)

        # Skip any further logic if we've processed the maximum number of nodes
        if max_visit <= 0 or len(queue) == 0:
            break

        queue = next_queue
        visited_links.update(next_visited_links)

    # Each node has been potentially visited multiple times. Average out the
    # scores to create an overall weight
    for uri in candidates:
        weight, count = candidates[uri]
        candidates[uri] = weight / count

    # Store the results in the cache
    root._cache.candidates = candidates
    node_store.db[model.node_key(root.id)] = root

    return candidates