Esempio n. 1
0
def filter_spells(G,window):
    """Remove all attributes outside time window from nodes/edges
    
    This removes all the spells and actions from the nodes and edges
    where they fall outside of the window slot. This makes
    JSON files much less bulky and easier to read

    :param G: NetworkX graph  
    :param window: 2-tuple containing start date and end date
    :returns: Graph with only actions and spells in the time window
    """
    #First, filter node spells/actions
    nodeiter = G.nodes(data=True)      
    for (n,c) in nodeiter:
        nodemeta = []
        if 'spells' in c:
            del c['spells']
        vals_to_delete = []
        #Get rid of some unnecessaries that can clog the JSON files
        for val in c:
            if val != 'kcore' and val != 'id' and val != 'date' and val != 'times_active' and val != 'binary_active':
                vals_to_delete.append(val)
        for val in vals_to_delete:
            del c[val]
                #print 'val is ',val
        for action_key in cf.interaction_keys:
            if action_key in c:
                actions_to_keep = []
                for action in c[action_key]:
                    if cf.in_date(window,action[1]):
                        actions_to_keep.append(action)
                c[action_key] = actions_to_keep
                if len(actions_to_keep) == 0:
                    continue
                #Add existing action to node's 'nodemeta'
                nodemeta.append(cf.interaction_types[action_key])

    #Do the same for edges
    #TODO: Do edge spells need to be filtered too? 
    edgeiter = G.edges(data=True)
    for (u,v,c) in edgeiter:
        edgemeta = []
        for action_key in cf.interaction_keys:
        #for action_key in c:
            actions_to_keep = []       
            #if action_key in c and len(c[action_key]) > 0:
            if len(c[action_key]) > 0:
                for action in c[action_key]:
                    if len(action) == 1:
                        actions_to_keep = action
                        break
                    if cf.in_date(window,action[1]):
                        #edgemeta.append(cf.interaction_types[action_key])
                        actions_to_keep.append(action)
                c[action_key] = actions_to_keep
        c['edgemeta'] = edgemeta
    return G
Esempio n. 2
0
def filter_spells(G, window):
    """Remove all attributes outside time window from nodes/edges
    
    This removes all the spells and actions from the nodes and edges
    where they fall outside of the window slot. This makes
    JSON files much less bulky and easier to read

    :param G: NetworkX graph  
    :param window: 2-tuple containing start date and end date
    """
    #First, filter node spells/actions
    nodeiter = G.nodes(data=True)
    for (n, c) in nodeiter:
        nodemeta = []
        spells_to_keep = []
        for spell in c['spells']:
            if cf.in_date(window, spell[0]):
                spells_to_keep.append(spell)
        c['spells'] = spells_to_keep

        for k, v in iter(cf.INTERACTIONS.items()):
            #for action_key in cf.interaction_keys:
            if k in c:
                actions_to_keep = []
                for action in c[k]:
                    if cf.in_date(window, action[1]):
                        actions_to_keep.append(action)
                c[k] = actions_to_keep
                if len(actions_to_keep) == 0:
                    continue
                #Add existing action to node's 'nodemeta'
                #nodemeta.append(cf.interaction_types[action_key])
                nodemeta.append(v[0])
        #If node is a story, its nodemeta always contains 'story'
        if c['type'] == 'story':
            nodemeta.append('story')
        c['nodemeta'] = c['nodemeta'] + nodemeta

    #Do the same for edges
    #TODO: Do edge spells need to be filtered too?
    edgeiter = G.edges(data=True)
    for (u, v, c) in edgeiter:
        edgemeta = []
        for k, val in iter(cf.INTERACTIONS.items()):
            #for action_key in cf.interaction_keys:
            if k in c and len(c[k]) > 0:
                for action in c[k]:
                    if cf.in_date(window, action[1]):
                        #edgemeta.append(cf.interaction_types[action_key])
                        edgemeta.append(val[0])
        c['edgemeta'] = edgemeta
Esempio n. 3
0
def colluding(G,n1,n2,n1_weight,n2_weight,window):
    """Check if two nodes are colluding
    
    Basic collusion checking algorithm. Determines if actions from
    one node are greater than a threshold of the recipient node's
    overall weight
    
    Algorithm adapted from the following paper:
    H.Shen, Y.Lin, K.Sapra and Z.Li, "Enhancing Collusion Resilience
    in Reputation Systems," in IEEE Transactions on Parallel and 
    Distributed Systems, vol.27, no.8, pp.2274-2287, 1 Aug. 2016.
    
    :param G: NetworkX graph of all interactions in a time window 
    :param n1: string representing Node 1 ID
    :param n2: string representing Node 2 ID
    :param n1_weight: float, sum of edge weights of Node 1
    :param n2_weight: float, sum of edge weights of Node 2
    :param window: 2-tuple of start and end dates of time window
    :returns: bool, True if strong suspicion of collusion. 
    """
    if n1 == n2:
        return False
    if G.has_edge(n1,n2) == False:
        return False
    edge = G[n1][n2]
    edgeweight = 0
    frequency = 0
    if 'edgeweight' in G[n1][n2]:
        edgeweight = G[n1][n2]['edgeweight'][n1]
    else:
        for action_key in cf.interaction_keys:
            if action_key in edge:
                for action in edge[action_key]:
                    if cf.in_date(window,action[1]):
                        edgeweight = edgeweight + cf.weights[action_key][0]
                        frequency = frequency + 1
    if n1_weight == 0:
        n_weight = n2_weight
    elif n2_weight == 0:
        n_weight = n1_weight
    else:
        n_weight = min(n1_weight,n2_weight)
    return ((edgeweight/n_weight)*100) > cf.PERCENTAGE_THRESHOLD
Esempio n. 4
0
def nodeweight(G,node_id,window,suspect_nodes,cumulative):
    """Calculate weight of a node in the interactions graph.
    
    This calculates a node's 'weight' by looking at the 
    number, type, and date of interactions with other nodes 
    in the graph. 

    :param G: NetworkX graph of all interactions in a time window 
    :param node_id: string ID of node to determine weight of 
    :param window: 2-tuple with start and end dates of time window 
    :param suspect_nodes: dictionary mapping node ID's of high
    activity to their overall weight 
    """
    
    #If there are no interaction types, check if there are built-in weights

    #This holds the contribution of each type of action to
    #the node's overall weight 
   
    action_weights = {}
    for meta in cf.meta_networks:
        action_weights[meta] = 0
    edges = G.edges(node_id,data=True)
    edgeweights = []
    active_weeks = None
    
    if window[0] is not None:
        total_weeks_active = G.nodes[node_id]['times_active']
    else:
        total_weeks_active = None
    maxweight = 0
    influence = 0
    flagged = False    
    for (u,v,c) in edges:
        initiated = 0
        #Constant used to decrease the 'value' of multiple interactions
        #between the same two nodes
        depreciating_constant = 0.75 
        if 'initiator' in c:
            for edgey in c['initiator']:
                if edgey[0] == node_id:
                    initiated += 1
        overallweight = 0.0
        #We already have the edge weight, no need to do it manually
        #HOWEVER we have to separate it from the node ID
        if cf.WEIGHT_KEY in c:
            #However it might just be a float if it's an undirected graph
            try:
                for some_weight in c[cf.WEIGHT_KEY]:
                    if not cf.in_date(window,some_weight[1]):
                        continue
                    sourceweight = c[cf.WEIGHT_KEY][0]
                    if cf.DIRECTED == False:
                        targetweight = sourceweight
                    else:
                        targetweight = None if len(c[cf.WEIGHT_KEY]) == 1 else c[cf.WEIGHT_KEY][1]
                    #If A rates B 5 and B rates A 2, it's stored as [A/5,B/2]
                    if sourceweight[0].split('/')[0] == node_id:
                        if targetweight == None:
                            overallweight += 0
                        else:
                            overallweight += float(targetweight[0].split('/')[1])
                    else:
                        overallweight += float(sourceweight[0].split('/')[1])
                    break
            except TypeError:
                overallweight = c[cf.WEIGHT_KEY]
            #So it has to be done like this. There is probably a better way
            #But I do not care right now.
            #This seems more complicated than it ought to be 
            
        #This happens when things are static
        elif total_weeks_active is None:
            overallweight = 1
            
        else:
            if cumulative:
              #Estimate the 'influence' of this node. 
              #For each neighbour node, determine its activity
              #since connecting with this node
                spelldate = cf.to_date(c['first_active'])
                after_weeks = ((window[1] - spelldate).days / 7)
                count = 0
                active_count = 0
                for x in reversed(G.nodes[v]['binary_active']):
                    if count == after_weeks:
                        break
                    else:
                        active_count += float(x)
                        count +=1
                if after_weeks > 0:
                    influence = active_count / after_weeks
                else:
                    influence = 0
            action_count = 0   
            
            #Iterate over all actions between two nodes. Increment the 
            #overall edge weight based on the type and date of the action
            for action_key in cf.interaction_keys:
                
                if action_key not in c:
                    continue 
                actions_to_keep = []
                for action in c[action_key]:
                    if not cf.in_date(window,action[1]):
                        continue 
                    if node_id == action[0].split("-")[0]: #Node initiated the action
                        edge_weight = cf.weights[action_key][0]
                    else: #This node was the recipient of the action
                        edge_weight = cf.weights[action_key][1]
                    actions_to_keep.append(action)
                    action_count += 1    
                    '''
                    Depreciate weight of the edge as a function of 
                    the number of days old it is. 
                    e^(-days/50) seems to work well. 
                    '''
                    tdelta = window[1] - cf.to_date(action[1])
                    agefraction = math.exp(-(float(tdelta.days)/50))
                    '''
                    Also depreciate the value of subsequent 
                    interactions along the same edge by 25% each time
                    '''
                    overallweight = (overallweight 
                    + (edge_weight*agefraction*depreciating_constant)) 
                  
                    '''
                    Also store the contribution to this edge's weight of
                    each different action type involved
                    '''
                    action_weights[cf.interaction_types[action_key]] \
                    += round((edge_weight*agefraction*depreciating_constant),2)                          
                    
                    depreciating_constant = depreciating_constant*0.75
                c[action_key] = actions_to_keep     
                
            if overallweight == 0:
                #Some new trickery here to give the edge a weight based on
                #the 'initiator' attvalues (if nothing else is there
                for edgey in c['initiator']:
                    if edgey[0] == node_id:
                        overallweight += 0
                    else:
                        overallweight += 3
            if cumulative:
                '''
                Reduce edgeweight based on its influence and
                weeks active of the node. Reduction value = 
                (e^influence)-1 * square_root(weeks active) + 0.1
                Minimum reduction = 0.1 when influence is 0
                '''
                overallweight *= min(((math.exp(influence)-1)
                * math.sqrt(total_weeks_active) +0.1),1)
            else: #'flag' a node if it has been particularly active 
                if action_count > 7 or ('initiator' in c and len(c['initiator']) > 7): 
                    flagged = True
                    
        edgeweights.append(overallweight)
        #Adds the edge's weight as an attribute
        if 'edgeweight' not in c:
            c['edgeweight'] = {}
            c['initiated'] = {}
            c['maxweight'] = overallweight
        c['edgeweight'][node_id] = round(overallweight,2)
        c['initiated'][node_id] = initiated
        c['maxweight'] = round(max(c['maxweight'],overallweight),2)
        maxweight = max(c['maxweight'],maxweight)
        
        if 'maxweight' not in G.nodes[u]:
            G.nodes[u]['maxweight'] = c['maxweight']
        G.nodes[u]['maxweight'] = max(G.nodes[u]['maxweight'],c['maxweight'])

        if 'maxweight' not in G.nodes[v]:
            G.nodes[v]['maxweight'] = c['maxweight']
        G.nodes[v]['maxweight'] = max(G.nodes[v]['maxweight'],c['maxweight'])

    if len(edgeweights) == 0:
        return (G,action_weights,0,False)
        
    if flagged: #High activity node, add it to dictionary 
        suspect_nodes[node_id] = sum(edgeweights)
        
    return (G,action_weights,sum(edgeweights),True)
Esempio n. 5
0
def make_graphs(G, window, index, communities, commoner_graphs):
    """
    Generate JSON for NetworkX graph. Update commoner graphs.
    
    This method generates all necessary information from a NetworkX
    graph representation and returns it in a JSON format. It also 
    updates the 'dynamic communities' and individual commoner graphs
    (using make_dynamic_communities and build_commoner_data methods)
    
    :param G: NetworkX graph of interactions in time window 
    :param window: 2-tuple containing start and end dates 
    :param index: integer representing time step
    :param communities: list holding NetworkX dynamic communities 
    (filled in by make_dynamic_communities method) 
    :param commoner_graphs: dictionary mapping each commoner node to
    its interaction history (filled in by build_commoner_data method)
    :returns: tuple containing:
              1. Updated dynamic communities
              2. Updated commoner_graphs
              3. JSON representation of NetworkX graph
    """
    edges_to_remove = []
    tag_edges = []
    tag_nodes = {}
    tag_counts = {}  #Holds counts of each of the tags
    cumulative = (index == 0)
    create_count = 0
    comment_count = 0
    convo_count = 0
    trans_count = 0

    if index > 0:
        graph_copy = copy.deepcopy(G)  #To avoid screwing future iterations
    else:
        graph_copy = G
    nodeiter = G.nodes(data=True)
    edgeiter = G.edges(data=True)

    #Filter edges outside time window and add count stats
    for (u, v, c) in edgeiter:
        edge_exists = False

        for intervals in c['spells']:
            if (window[0] <= cf.to_date(intervals[0]) < window[1]):
                edge_exists = True

        if edge_exists == False:
            edges_to_remove.append((u, v, c))
        else:
            #Find node that wrote story, add it to their 'nodemeta'
            if G.nodes[u]["type"] == "story":
                if "create_story" in G.nodes[u]:
                    G.nodes[v]['nodemeta'] = ['story']
            elif G.nodes[v]["type"] == "story":
                if "create_story" in G.nodes[v]:
                    G.nodes[u]['nodemeta'] = ['story']

            #Count how many different edge types there are
            if "create_story" in c:
                if cf.in_date(window, c["create_story"][0][1]):
                    create_count += 1
            if "comment_story" in c:
                for comment in c["comment_story"]:
                    if cf.in_date(window, comment[1]):
                        comment_count += 1
            if "conversation" in c:
                for convo in c["conversation"]:
                    if cf.in_date(window, convo[1]):
                        convo_count += 1
            if "transaction" in c:
                for trans in c["transaction"]:
                    if cf.in_date(window, trans[1]):
                        trans_count += 1

            #Special actions if the edge connects a node to a tag
            if G.nodes[u]["type"] == "tag" or G.nodes[v]["type"] == "tag":
                tag_edges.append((u, v, c))
                if G.nodes[u]["type"] == "tag":
                    tagname = G.nodes[u]["name"]
                else:
                    tagname = G.nodes[v]["name"]
                graph_copy.nodes[u]["tags"].append(tagname)
                graph_copy.nodes[v]["tags"].append(tagname)
                if tagname not in tag_counts:
                    tag_counts[tagname] = 0
                tag_counts[tagname] += 1

    #Remove non-existent edges
    graph_copy.remove_edges_from(edges_to_remove)

    #Also remove the tag edges so not to influence k-core calculation
    graph_copy.remove_edges_from((tag_edges))

    #Filter nodes outside the time window
    nodes_to_remove = []
    zero_nodes = []
    for (n, c) in nodeiter:
        graph_copy.nodes[n]['nodemeta'] = []

        node_exists = False
        graph_copy.nodes[n]['date'] = cf.to_str(window[0])
        c['date'] = cf.to_str(window[0])  #TODO: Do both lines need to be here?
        if 'spells' in c:
            for intervals in c['spells']:
                if cf.in_date(window, intervals[0]):
                    node_exists = True
        if node_exists == False:
            nodes_to_remove.append(n)
            if c['type'] == 'commoner':
                zero_nodes.append((n, c))
    graph_copy.remove_nodes_from(nodes_to_remove)

    #Get rid of spells and actions that fall outside the window range
    filter_spells(graph_copy, window)

    #DO THE KCORE CALCULATIONS HERE
    colluders = dx.weighted_core(graph_copy, window)

    #Add the tags back in
    graph_copy.add_edges_from(tag_edges)

    nodeiter = graph_copy.nodes(data=True)
    for (n, c) in nodeiter:
        if c['type'] == 'tag':
            tag_nodes[n] = c

    #Recommender data is built from the cumulative graph
    if not cumulative:
        build_commoner_data(graph_copy, commoner_graphs, zero_nodes)
    else:
        print('making rec data HERE')
        make_recommender_data(copy.deepcopy(graph_copy), window, tag_edges)

    #Remove isolated nodes that exist after removing Basic Income
    graph_copy.remove_nodes_from(list(nx.isolates(graph_copy)))

    #Give each edge the weight of its primary direction
    #TODO: Why not take into account other direction?
    iter = graph_copy.edges(data=True)
    for (u, v, c) in iter:
        if 'edgeweight' in c:
            c['edgeweight'] = c['edgeweight'][u]
        else:
            c['edgeweight'] = 1

    #Now compare fronts to previous partitions
    if not cumulative:
        partition = make_dynamic_communities(graph_copy, communities, index)
    else:
        partition = community.best_partition(graph_copy, weight='edgeweight')

    #Simple counting of different node types
    c_count = 0
    s_count = 0
    t_count = 0
    l_count = 0
    nodeiter = graph_copy.nodes(data=True)
    for n, c in nodeiter:
        if c['type'] == 'commoner':
            c_count += 1
        elif c['type'] == 'story':
            s_count += 1
        elif c['type'] == 'tag':
            t_count += 1
        elif c['type'] == 'listing':
            l_count += 1
        c['cluster'] = partition[n]

    n_count = nx.number_of_nodes(graph_copy)
    e_count = nx.number_of_edges(graph_copy)
    core_graph_json = json_graph.node_link_data(graph_copy)
    # tags = sorted(iter(tag_counts.items()),reverse=True,key=lambda kv: (kv[1], kv[0]))
    tags = [(k, tag_counts[k])
            for k in sorted(tag_counts, key=tag_counts.get, reverse=True)]
    #Additional info about the graph itself
    meta_info = {
        'commoners': c_count,
        'stories': s_count,
        'listings': l_count,
        'tags': t_count,
        'create': create_count,
        'comment': comment_count,
        'convo': convo_count,
        'trans': trans_count,
        'nodenum': n_count,
        'edge_num': e_count,
        'tagcount': tags,
        'date': cf.to_str(window[1]),
        'colluders': colluders
    }
    core_graph_json.update(meta_info)
    return (communities, commoner_graphs, core_graph_json)
Esempio n. 6
0
def nodeweight(G,node_id,window,suspect_nodes):
    """Calculate weight of a node in the interactions graph.
    
    This calculates a node's 'weight' by looking at the 
    number, type, and date of interactions with other nodes 
    in the graph. 

    :param G: NetworkX graph of all interactions in a time window 
    :param node_id: string ID of node to determine weight of 
    :param window: 2-tuple with start and end dates of time window 
    :param suspect_nodes: dictionary mapping node ID's of high
    activity to their overall weight 
    """
    
    #This holds the contribution of each type of action to
    #the node's overall weight 
    action_weights = {}
    for meta in cf.TYPES:
        action_weights[meta] = 0
    edges = G.edges(node_id,data=True)
    edgeweights = []
    
    #'if len(active_weeks) > 52' is used throughout to check 
    #if this is the aggregate graph
    
    #Find no. weeks this node is active from start of commonfare.net
    active_weeks = [0] * int((window[1]-window[0]).days / 7) 
    if len(active_weeks) > 52: #If this is the aggregate graph...
        for spell in G.nodes[node_id]['spells']:
            nodespell = cf.to_date(spell[1])
            index = ((nodespell-window[0]).days / 7) -1
            active_weeks[int(index)] = 1
        total_weeks_active = float(sum(active_weeks))

    maxweight = 0
    influence = 0
    flagged = False    
    for (u,v,c) in edges:
    
        #Constant used to decrease the 'value' of multiple interactions
        #between the same two nodes
        depreciating_constant = 0.75 
        
        overallweight = 0.0
        
        if len(active_weeks) > 52:
          
          #Estimate the 'influence' of this node. 
          #For each neighbour node, determine its activity
          #since connecting with this node
            spelldate = cf.to_date(c['spells'][0][0])
            after_weeks = [0]*int(((window[1] - spelldate).days / 7))
            for spell in G.nodes[v]['spells']:
                nodespell = cf.to_date(spell[1])
                if (nodespell - spelldate).days >= 7: #If this spell happened afterwards... 
                    index = ((nodespell-spelldate).days / 7)-1
                    after_weeks[int(index)] = 1
            if len(after_weeks) > 0:
                influence = float(sum(after_weeks))/len(after_weeks)
            else:
                influence = 0
        action_count = 0   
        
        #Iterate over all actions between two nodes. Increment the 
        #overall edge weight based on the type and date of the action
        for k,val in iter(cf.INTERACTIONS.items()):
        #for action_key in cf.interaction_keys:
            
            if k not in c:
                continue 
            actions_to_keep = []
            for action in c[k]:
                if not cf.in_date(window,action[1]):
                    continue 
                if node_id == action[0].split("-")[0]: #Node initiated the action
                    edge_weight = val[1]#cf.weights[action_key]
                else: #This node was the recipient of the action
                    edge_weight = val[2]#cf.weights["r"+action_key]
                actions_to_keep.append(action)
                action_count += 1    
                '''
                Depreciate weight of the edge as a function of 
                the number of days old it is. 
                e^(-days/100) seems to work well. 
                '''
                tdelta = window[1] - cf.to_date(action[1])
                agefraction = math.exp(-(float(tdelta.days)/100))
                '''
                Also depreciate the value of subsequent 
                interactions along the same edge by 25% each time
                '''
                overallweight = (overallweight 
                + (edge_weight*agefraction*depreciating_constant)) 
              
                '''
                Also store the contribution to this edge's weight of
                each different action type involved
                '''
                #action_weights[cf.interaction_types[action_key]] \
                action_weights[val[0]] \
                += round((edge_weight*agefraction*depreciating_constant),2)                          
                
                depreciating_constant = depreciating_constant*0.75
            c[k] = actions_to_keep     
            
        if overallweight == 0:
            continue 
        if len(active_weeks) > 52: #Aggregate graph
            '''
            Reduce edgeweight based on its influence and
            weeks active of the node. Reduction value = 
            (1.3^influence)-1 * square_root(weeks active) + 0.1
            Minimum reduction = 0.1 when influence is 0
            '''
            overallweight *= min(((pow(1.3,influence)-1)
            * math.sqrt(total_weeks_active) +0.1),1)
          
        else: #'flag' a node if it has been particularly active 
            if action_count > cf.FREQUENCY_THRESHOLD: 
                flagged = True
                
        edgeweights.append(overallweight)
        
        #Adds the edge's weight as an attribute
        if 'edgeweight' not in c:
            c['edgeweight'] = {}
            c['maxweight'] = overallweight
        c['edgeweight'][node_id] = round(overallweight,2)
        c['maxweight'] = round(max(c['maxweight'],overallweight),2)
        if overallweight > cf.MAX_WEIGHT: 
            cf.MAX_WEIGHT = overallweight #Update max edge weight for this time step 
        maxweight = max(c['maxweight'],maxweight)
        
        if 'maxweight' not in G.nodes[u]:
            G.nodes[u]['maxweight'] = c['maxweight']
        G.nodes[u]['maxweight'] = max(G.nodes[u]['maxweight'],c['maxweight'])

        if 'maxweight' not in G.nodes[v]:
            G.nodes[v]['maxweight'] = c['maxweight']
        G.nodes[v]['maxweight'] = max(G.nodes[v]['maxweight'],c['maxweight'])

    if len(edgeweights) == 0:
        return (G,action_weights,0)
        
    if flagged: #High activity node, add it to dictionary 
        suspect_nodes[node_id] = sum(edgeweights)
    return (G,action_weights,sum(edgeweights))