def filter_spells(G,window): """Remove all attributes outside time window from nodes/edges This removes all the spells and actions from the nodes and edges where they fall outside of the window slot. This makes JSON files much less bulky and easier to read :param G: NetworkX graph :param window: 2-tuple containing start date and end date :returns: Graph with only actions and spells in the time window """ #First, filter node spells/actions nodeiter = G.nodes(data=True) for (n,c) in nodeiter: nodemeta = [] if 'spells' in c: del c['spells'] vals_to_delete = [] #Get rid of some unnecessaries that can clog the JSON files for val in c: if val != 'kcore' and val != 'id' and val != 'date' and val != 'times_active' and val != 'binary_active': vals_to_delete.append(val) for val in vals_to_delete: del c[val] #print 'val is ',val for action_key in cf.interaction_keys: if action_key in c: actions_to_keep = [] for action in c[action_key]: if cf.in_date(window,action[1]): actions_to_keep.append(action) c[action_key] = actions_to_keep if len(actions_to_keep) == 0: continue #Add existing action to node's 'nodemeta' nodemeta.append(cf.interaction_types[action_key]) #Do the same for edges #TODO: Do edge spells need to be filtered too? edgeiter = G.edges(data=True) for (u,v,c) in edgeiter: edgemeta = [] for action_key in cf.interaction_keys: #for action_key in c: actions_to_keep = [] #if action_key in c and len(c[action_key]) > 0: if len(c[action_key]) > 0: for action in c[action_key]: if len(action) == 1: actions_to_keep = action break if cf.in_date(window,action[1]): #edgemeta.append(cf.interaction_types[action_key]) actions_to_keep.append(action) c[action_key] = actions_to_keep c['edgemeta'] = edgemeta return G
def filter_spells(G, window): """Remove all attributes outside time window from nodes/edges This removes all the spells and actions from the nodes and edges where they fall outside of the window slot. This makes JSON files much less bulky and easier to read :param G: NetworkX graph :param window: 2-tuple containing start date and end date """ #First, filter node spells/actions nodeiter = G.nodes(data=True) for (n, c) in nodeiter: nodemeta = [] spells_to_keep = [] for spell in c['spells']: if cf.in_date(window, spell[0]): spells_to_keep.append(spell) c['spells'] = spells_to_keep for k, v in iter(cf.INTERACTIONS.items()): #for action_key in cf.interaction_keys: if k in c: actions_to_keep = [] for action in c[k]: if cf.in_date(window, action[1]): actions_to_keep.append(action) c[k] = actions_to_keep if len(actions_to_keep) == 0: continue #Add existing action to node's 'nodemeta' #nodemeta.append(cf.interaction_types[action_key]) nodemeta.append(v[0]) #If node is a story, its nodemeta always contains 'story' if c['type'] == 'story': nodemeta.append('story') c['nodemeta'] = c['nodemeta'] + nodemeta #Do the same for edges #TODO: Do edge spells need to be filtered too? edgeiter = G.edges(data=True) for (u, v, c) in edgeiter: edgemeta = [] for k, val in iter(cf.INTERACTIONS.items()): #for action_key in cf.interaction_keys: if k in c and len(c[k]) > 0: for action in c[k]: if cf.in_date(window, action[1]): #edgemeta.append(cf.interaction_types[action_key]) edgemeta.append(val[0]) c['edgemeta'] = edgemeta
def colluding(G,n1,n2,n1_weight,n2_weight,window): """Check if two nodes are colluding Basic collusion checking algorithm. Determines if actions from one node are greater than a threshold of the recipient node's overall weight Algorithm adapted from the following paper: H.Shen, Y.Lin, K.Sapra and Z.Li, "Enhancing Collusion Resilience in Reputation Systems," in IEEE Transactions on Parallel and Distributed Systems, vol.27, no.8, pp.2274-2287, 1 Aug. 2016. :param G: NetworkX graph of all interactions in a time window :param n1: string representing Node 1 ID :param n2: string representing Node 2 ID :param n1_weight: float, sum of edge weights of Node 1 :param n2_weight: float, sum of edge weights of Node 2 :param window: 2-tuple of start and end dates of time window :returns: bool, True if strong suspicion of collusion. """ if n1 == n2: return False if G.has_edge(n1,n2) == False: return False edge = G[n1][n2] edgeweight = 0 frequency = 0 if 'edgeweight' in G[n1][n2]: edgeweight = G[n1][n2]['edgeweight'][n1] else: for action_key in cf.interaction_keys: if action_key in edge: for action in edge[action_key]: if cf.in_date(window,action[1]): edgeweight = edgeweight + cf.weights[action_key][0] frequency = frequency + 1 if n1_weight == 0: n_weight = n2_weight elif n2_weight == 0: n_weight = n1_weight else: n_weight = min(n1_weight,n2_weight) return ((edgeweight/n_weight)*100) > cf.PERCENTAGE_THRESHOLD
def nodeweight(G,node_id,window,suspect_nodes,cumulative): """Calculate weight of a node in the interactions graph. This calculates a node's 'weight' by looking at the number, type, and date of interactions with other nodes in the graph. :param G: NetworkX graph of all interactions in a time window :param node_id: string ID of node to determine weight of :param window: 2-tuple with start and end dates of time window :param suspect_nodes: dictionary mapping node ID's of high activity to their overall weight """ #If there are no interaction types, check if there are built-in weights #This holds the contribution of each type of action to #the node's overall weight action_weights = {} for meta in cf.meta_networks: action_weights[meta] = 0 edges = G.edges(node_id,data=True) edgeweights = [] active_weeks = None if window[0] is not None: total_weeks_active = G.nodes[node_id]['times_active'] else: total_weeks_active = None maxweight = 0 influence = 0 flagged = False for (u,v,c) in edges: initiated = 0 #Constant used to decrease the 'value' of multiple interactions #between the same two nodes depreciating_constant = 0.75 if 'initiator' in c: for edgey in c['initiator']: if edgey[0] == node_id: initiated += 1 overallweight = 0.0 #We already have the edge weight, no need to do it manually #HOWEVER we have to separate it from the node ID if cf.WEIGHT_KEY in c: #However it might just be a float if it's an undirected graph try: for some_weight in c[cf.WEIGHT_KEY]: if not cf.in_date(window,some_weight[1]): continue sourceweight = c[cf.WEIGHT_KEY][0] if cf.DIRECTED == False: targetweight = sourceweight else: targetweight = None if len(c[cf.WEIGHT_KEY]) == 1 else c[cf.WEIGHT_KEY][1] #If A rates B 5 and B rates A 2, it's stored as [A/5,B/2] if sourceweight[0].split('/')[0] == node_id: if targetweight == None: overallweight += 0 else: overallweight += float(targetweight[0].split('/')[1]) else: overallweight += float(sourceweight[0].split('/')[1]) break except TypeError: overallweight = c[cf.WEIGHT_KEY] #So it has to be done like this. There is probably a better way #But I do not care right now. #This seems more complicated than it ought to be #This happens when things are static elif total_weeks_active is None: overallweight = 1 else: if cumulative: #Estimate the 'influence' of this node. #For each neighbour node, determine its activity #since connecting with this node spelldate = cf.to_date(c['first_active']) after_weeks = ((window[1] - spelldate).days / 7) count = 0 active_count = 0 for x in reversed(G.nodes[v]['binary_active']): if count == after_weeks: break else: active_count += float(x) count +=1 if after_weeks > 0: influence = active_count / after_weeks else: influence = 0 action_count = 0 #Iterate over all actions between two nodes. Increment the #overall edge weight based on the type and date of the action for action_key in cf.interaction_keys: if action_key not in c: continue actions_to_keep = [] for action in c[action_key]: if not cf.in_date(window,action[1]): continue if node_id == action[0].split("-")[0]: #Node initiated the action edge_weight = cf.weights[action_key][0] else: #This node was the recipient of the action edge_weight = cf.weights[action_key][1] actions_to_keep.append(action) action_count += 1 ''' Depreciate weight of the edge as a function of the number of days old it is. e^(-days/50) seems to work well. ''' tdelta = window[1] - cf.to_date(action[1]) agefraction = math.exp(-(float(tdelta.days)/50)) ''' Also depreciate the value of subsequent interactions along the same edge by 25% each time ''' overallweight = (overallweight + (edge_weight*agefraction*depreciating_constant)) ''' Also store the contribution to this edge's weight of each different action type involved ''' action_weights[cf.interaction_types[action_key]] \ += round((edge_weight*agefraction*depreciating_constant),2) depreciating_constant = depreciating_constant*0.75 c[action_key] = actions_to_keep if overallweight == 0: #Some new trickery here to give the edge a weight based on #the 'initiator' attvalues (if nothing else is there for edgey in c['initiator']: if edgey[0] == node_id: overallweight += 0 else: overallweight += 3 if cumulative: ''' Reduce edgeweight based on its influence and weeks active of the node. Reduction value = (e^influence)-1 * square_root(weeks active) + 0.1 Minimum reduction = 0.1 when influence is 0 ''' overallweight *= min(((math.exp(influence)-1) * math.sqrt(total_weeks_active) +0.1),1) else: #'flag' a node if it has been particularly active if action_count > 7 or ('initiator' in c and len(c['initiator']) > 7): flagged = True edgeweights.append(overallweight) #Adds the edge's weight as an attribute if 'edgeweight' not in c: c['edgeweight'] = {} c['initiated'] = {} c['maxweight'] = overallweight c['edgeweight'][node_id] = round(overallweight,2) c['initiated'][node_id] = initiated c['maxweight'] = round(max(c['maxweight'],overallweight),2) maxweight = max(c['maxweight'],maxweight) if 'maxweight' not in G.nodes[u]: G.nodes[u]['maxweight'] = c['maxweight'] G.nodes[u]['maxweight'] = max(G.nodes[u]['maxweight'],c['maxweight']) if 'maxweight' not in G.nodes[v]: G.nodes[v]['maxweight'] = c['maxweight'] G.nodes[v]['maxweight'] = max(G.nodes[v]['maxweight'],c['maxweight']) if len(edgeweights) == 0: return (G,action_weights,0,False) if flagged: #High activity node, add it to dictionary suspect_nodes[node_id] = sum(edgeweights) return (G,action_weights,sum(edgeweights),True)
def make_graphs(G, window, index, communities, commoner_graphs): """ Generate JSON for NetworkX graph. Update commoner graphs. This method generates all necessary information from a NetworkX graph representation and returns it in a JSON format. It also updates the 'dynamic communities' and individual commoner graphs (using make_dynamic_communities and build_commoner_data methods) :param G: NetworkX graph of interactions in time window :param window: 2-tuple containing start and end dates :param index: integer representing time step :param communities: list holding NetworkX dynamic communities (filled in by make_dynamic_communities method) :param commoner_graphs: dictionary mapping each commoner node to its interaction history (filled in by build_commoner_data method) :returns: tuple containing: 1. Updated dynamic communities 2. Updated commoner_graphs 3. JSON representation of NetworkX graph """ edges_to_remove = [] tag_edges = [] tag_nodes = {} tag_counts = {} #Holds counts of each of the tags cumulative = (index == 0) create_count = 0 comment_count = 0 convo_count = 0 trans_count = 0 if index > 0: graph_copy = copy.deepcopy(G) #To avoid screwing future iterations else: graph_copy = G nodeiter = G.nodes(data=True) edgeiter = G.edges(data=True) #Filter edges outside time window and add count stats for (u, v, c) in edgeiter: edge_exists = False for intervals in c['spells']: if (window[0] <= cf.to_date(intervals[0]) < window[1]): edge_exists = True if edge_exists == False: edges_to_remove.append((u, v, c)) else: #Find node that wrote story, add it to their 'nodemeta' if G.nodes[u]["type"] == "story": if "create_story" in G.nodes[u]: G.nodes[v]['nodemeta'] = ['story'] elif G.nodes[v]["type"] == "story": if "create_story" in G.nodes[v]: G.nodes[u]['nodemeta'] = ['story'] #Count how many different edge types there are if "create_story" in c: if cf.in_date(window, c["create_story"][0][1]): create_count += 1 if "comment_story" in c: for comment in c["comment_story"]: if cf.in_date(window, comment[1]): comment_count += 1 if "conversation" in c: for convo in c["conversation"]: if cf.in_date(window, convo[1]): convo_count += 1 if "transaction" in c: for trans in c["transaction"]: if cf.in_date(window, trans[1]): trans_count += 1 #Special actions if the edge connects a node to a tag if G.nodes[u]["type"] == "tag" or G.nodes[v]["type"] == "tag": tag_edges.append((u, v, c)) if G.nodes[u]["type"] == "tag": tagname = G.nodes[u]["name"] else: tagname = G.nodes[v]["name"] graph_copy.nodes[u]["tags"].append(tagname) graph_copy.nodes[v]["tags"].append(tagname) if tagname not in tag_counts: tag_counts[tagname] = 0 tag_counts[tagname] += 1 #Remove non-existent edges graph_copy.remove_edges_from(edges_to_remove) #Also remove the tag edges so not to influence k-core calculation graph_copy.remove_edges_from((tag_edges)) #Filter nodes outside the time window nodes_to_remove = [] zero_nodes = [] for (n, c) in nodeiter: graph_copy.nodes[n]['nodemeta'] = [] node_exists = False graph_copy.nodes[n]['date'] = cf.to_str(window[0]) c['date'] = cf.to_str(window[0]) #TODO: Do both lines need to be here? if 'spells' in c: for intervals in c['spells']: if cf.in_date(window, intervals[0]): node_exists = True if node_exists == False: nodes_to_remove.append(n) if c['type'] == 'commoner': zero_nodes.append((n, c)) graph_copy.remove_nodes_from(nodes_to_remove) #Get rid of spells and actions that fall outside the window range filter_spells(graph_copy, window) #DO THE KCORE CALCULATIONS HERE colluders = dx.weighted_core(graph_copy, window) #Add the tags back in graph_copy.add_edges_from(tag_edges) nodeiter = graph_copy.nodes(data=True) for (n, c) in nodeiter: if c['type'] == 'tag': tag_nodes[n] = c #Recommender data is built from the cumulative graph if not cumulative: build_commoner_data(graph_copy, commoner_graphs, zero_nodes) else: print('making rec data HERE') make_recommender_data(copy.deepcopy(graph_copy), window, tag_edges) #Remove isolated nodes that exist after removing Basic Income graph_copy.remove_nodes_from(list(nx.isolates(graph_copy))) #Give each edge the weight of its primary direction #TODO: Why not take into account other direction? iter = graph_copy.edges(data=True) for (u, v, c) in iter: if 'edgeweight' in c: c['edgeweight'] = c['edgeweight'][u] else: c['edgeweight'] = 1 #Now compare fronts to previous partitions if not cumulative: partition = make_dynamic_communities(graph_copy, communities, index) else: partition = community.best_partition(graph_copy, weight='edgeweight') #Simple counting of different node types c_count = 0 s_count = 0 t_count = 0 l_count = 0 nodeiter = graph_copy.nodes(data=True) for n, c in nodeiter: if c['type'] == 'commoner': c_count += 1 elif c['type'] == 'story': s_count += 1 elif c['type'] == 'tag': t_count += 1 elif c['type'] == 'listing': l_count += 1 c['cluster'] = partition[n] n_count = nx.number_of_nodes(graph_copy) e_count = nx.number_of_edges(graph_copy) core_graph_json = json_graph.node_link_data(graph_copy) # tags = sorted(iter(tag_counts.items()),reverse=True,key=lambda kv: (kv[1], kv[0])) tags = [(k, tag_counts[k]) for k in sorted(tag_counts, key=tag_counts.get, reverse=True)] #Additional info about the graph itself meta_info = { 'commoners': c_count, 'stories': s_count, 'listings': l_count, 'tags': t_count, 'create': create_count, 'comment': comment_count, 'convo': convo_count, 'trans': trans_count, 'nodenum': n_count, 'edge_num': e_count, 'tagcount': tags, 'date': cf.to_str(window[1]), 'colluders': colluders } core_graph_json.update(meta_info) return (communities, commoner_graphs, core_graph_json)
def nodeweight(G,node_id,window,suspect_nodes): """Calculate weight of a node in the interactions graph. This calculates a node's 'weight' by looking at the number, type, and date of interactions with other nodes in the graph. :param G: NetworkX graph of all interactions in a time window :param node_id: string ID of node to determine weight of :param window: 2-tuple with start and end dates of time window :param suspect_nodes: dictionary mapping node ID's of high activity to their overall weight """ #This holds the contribution of each type of action to #the node's overall weight action_weights = {} for meta in cf.TYPES: action_weights[meta] = 0 edges = G.edges(node_id,data=True) edgeweights = [] #'if len(active_weeks) > 52' is used throughout to check #if this is the aggregate graph #Find no. weeks this node is active from start of commonfare.net active_weeks = [0] * int((window[1]-window[0]).days / 7) if len(active_weeks) > 52: #If this is the aggregate graph... for spell in G.nodes[node_id]['spells']: nodespell = cf.to_date(spell[1]) index = ((nodespell-window[0]).days / 7) -1 active_weeks[int(index)] = 1 total_weeks_active = float(sum(active_weeks)) maxweight = 0 influence = 0 flagged = False for (u,v,c) in edges: #Constant used to decrease the 'value' of multiple interactions #between the same two nodes depreciating_constant = 0.75 overallweight = 0.0 if len(active_weeks) > 52: #Estimate the 'influence' of this node. #For each neighbour node, determine its activity #since connecting with this node spelldate = cf.to_date(c['spells'][0][0]) after_weeks = [0]*int(((window[1] - spelldate).days / 7)) for spell in G.nodes[v]['spells']: nodespell = cf.to_date(spell[1]) if (nodespell - spelldate).days >= 7: #If this spell happened afterwards... index = ((nodespell-spelldate).days / 7)-1 after_weeks[int(index)] = 1 if len(after_weeks) > 0: influence = float(sum(after_weeks))/len(after_weeks) else: influence = 0 action_count = 0 #Iterate over all actions between two nodes. Increment the #overall edge weight based on the type and date of the action for k,val in iter(cf.INTERACTIONS.items()): #for action_key in cf.interaction_keys: if k not in c: continue actions_to_keep = [] for action in c[k]: if not cf.in_date(window,action[1]): continue if node_id == action[0].split("-")[0]: #Node initiated the action edge_weight = val[1]#cf.weights[action_key] else: #This node was the recipient of the action edge_weight = val[2]#cf.weights["r"+action_key] actions_to_keep.append(action) action_count += 1 ''' Depreciate weight of the edge as a function of the number of days old it is. e^(-days/100) seems to work well. ''' tdelta = window[1] - cf.to_date(action[1]) agefraction = math.exp(-(float(tdelta.days)/100)) ''' Also depreciate the value of subsequent interactions along the same edge by 25% each time ''' overallweight = (overallweight + (edge_weight*agefraction*depreciating_constant)) ''' Also store the contribution to this edge's weight of each different action type involved ''' #action_weights[cf.interaction_types[action_key]] \ action_weights[val[0]] \ += round((edge_weight*agefraction*depreciating_constant),2) depreciating_constant = depreciating_constant*0.75 c[k] = actions_to_keep if overallweight == 0: continue if len(active_weeks) > 52: #Aggregate graph ''' Reduce edgeweight based on its influence and weeks active of the node. Reduction value = (1.3^influence)-1 * square_root(weeks active) + 0.1 Minimum reduction = 0.1 when influence is 0 ''' overallweight *= min(((pow(1.3,influence)-1) * math.sqrt(total_weeks_active) +0.1),1) else: #'flag' a node if it has been particularly active if action_count > cf.FREQUENCY_THRESHOLD: flagged = True edgeweights.append(overallweight) #Adds the edge's weight as an attribute if 'edgeweight' not in c: c['edgeweight'] = {} c['maxweight'] = overallweight c['edgeweight'][node_id] = round(overallweight,2) c['maxweight'] = round(max(c['maxweight'],overallweight),2) if overallweight > cf.MAX_WEIGHT: cf.MAX_WEIGHT = overallweight #Update max edge weight for this time step maxweight = max(c['maxweight'],maxweight) if 'maxweight' not in G.nodes[u]: G.nodes[u]['maxweight'] = c['maxweight'] G.nodes[u]['maxweight'] = max(G.nodes[u]['maxweight'],c['maxweight']) if 'maxweight' not in G.nodes[v]: G.nodes[v]['maxweight'] = c['maxweight'] G.nodes[v]['maxweight'] = max(G.nodes[v]['maxweight'],c['maxweight']) if len(edgeweights) == 0: return (G,action_weights,0) if flagged: #High activity node, add it to dictionary suspect_nodes[node_id] = sum(edgeweights) return (G,action_weights,sum(edgeweights))