def get_components_by_clustering(self, graph): _, ltuo_node_and_cluster_id = clusterUsingAffinityPropagation(graph) ltuo_cluster_id_and_ltuo_node_id_and_cluster_id = GeneralMethods.group_items_by( ltuo_node_and_cluster_id, itemgetter(1) ) ltuo_cluster_id_and_nodes = map( lambda (c_i, l_n_c): (c_i, zip(*l_n_c)[0]), ltuo_cluster_id_and_ltuo_node_id_and_cluster_id ) return zip(*ltuo_cluster_id_and_nodes)[1]
def plotLocationClustersOnMap(graph): noOfClusters, clusters = clusterUsingAffinityPropagation(graph) nodeToClusterIdMap = dict(clusters) colorMap = dict([(i, GeneralMethods.getRandomColor()) for i in range(noOfClusters)]) clusters = [(c, list(l)) for c, l in groupby(sorted(clusters, key=itemgetter(1)), key=itemgetter(1))] points, colors = zip(*map(lambda l: (getLocationFromLid(l.replace('_', ' ')), colorMap[nodeToClusterIdMap[l]]), graph.nodes())) _, m =plotPointsOnUSMap(points, s=0, lw=0, c=colors, returnBaseMapObject=True) for u, v, data in graph.edges(data=True): if nodeToClusterIdMap[u]==nodeToClusterIdMap[v]: color, u, v, w = colorMap[nodeToClusterIdMap[u]], getLocationFromLid(u.replace('_', ' ')), getLocationFromLid(v.replace('_', ' ')), data['w'] m.drawgreatcircle(u[1],u[0],v[1],v[0],color=color, alpha=0.5) plt.show()
def getRunningTime(graphs, linear): graphMap = dict(graphs) startingGraphId, endingGraphId = min(graphMap.keys()), max(graphMap.keys()) timeDifference = endingGraphId-startingGraphId LocationGraphs.updateLogarithmicGraphs(graphMap, edgesToKeep=edgesToKeep) dataToReturn = [] for j, intervalInSeconds in enumerate(range(0, timeDifference, int(timeDifference/numberOfPoints))): ts = time.time() graph = LocationGraphs.combineLocationGraphs(graphMap, startingGraphId, datetime.datetime.fromtimestamp(endingGraphId+1), intervalInSeconds, linear=linear, edgesToKeep=edgesToKeep) noOfClusters, clusters = clusterUsingAffinityPropagation(graph) clusters = [[str(c), [l[0]for l in lst]] for c, lst in groupby(sorted(clusters, key=itemgetter(1)), key=itemgetter(1))] te = time.time() edgeWeights = sum(data['w'] for _,_,data in graph.edges(data=True)) print graphType, linear, len(clusters), graph.number_of_nodes(), graph.number_of_edges(), edgeWeights, j, te-ts dataToReturn.append({'intervalInSeconds': intervalInSeconds, 'runningTime': te-ts, 'clusters': clusters, 'noOfNodes': graph.number_of_nodes()}) return dataToReturn
def influence_clusters(model_ids, min_cluster_size=15): influence_type = InfluenceMeasuringModels.TYPE_INCOMING_INFLUENCE for model_id in model_ids: digraph_of_location_and_location_similarity = nx.DiGraph() for line_count, (location, tuo_neighbor_location_and_mf_influence_type_and_similarity) in \ enumerate(FileIO.iterateJsonFromFile(tuo_location_and_tuo_neighbor_location_and_mf_influence_type_and_similarity_file%model_id)): # print line_count for neighbor_location, mf_influence_type_to_similarity in tuo_neighbor_location_and_mf_influence_type_and_similarity: if isWithinBoundingBox(getLocationFromLid(location.replace('_', ' ')), PARTIAL_WORLD_BOUNDARY) and \ isWithinBoundingBox(getLocationFromLid(neighbor_location.replace('_', ' ')), PARTIAL_WORLD_BOUNDARY): digraph_of_location_and_location_similarity.add_edge(location, neighbor_location, {'w': mf_influence_type_to_similarity[influence_type]}) no_of_clusters, tuo_location_and_cluster_id = clusterUsingAffinityPropagation(digraph_of_location_and_location_similarity) tuo_cluster_id_to_locations = [ (cluster_id, zip(*ito_tuo_location_and_cluster_id)[0]) for cluster_id, ito_tuo_location_and_cluster_id in groupby( sorted(tuo_location_and_cluster_id, key=itemgetter(1)), key=itemgetter(1) ) ] mf_location_to_cluster_id = dict(tuo_location_and_cluster_id) mf_cluster_id_to_cluster_color = dict([(i, GeneralMethods.getRandomColor()) for i in range(no_of_clusters)]) mf_valid_locations_to_color = {} for cluster_id, locations in \ sorted(tuo_cluster_id_to_locations, key=lambda (cluster_id, locations): len(locations))[-10:]: # if len(locations)>min_cluster_size: print cluster_id, len(locations) for location in locations: mf_valid_locations_to_color[location] \ = mf_cluster_id_to_cluster_color[mf_location_to_cluster_id[location]] locations, colors = zip(*mf_valid_locations_to_color.iteritems()) locations = [getLocationFromLid(location.replace('_', ' ')) for location in locations] _, m = plotPointsOnWorldMap(locations, blueMarble=False, bkcolor='#CFCFCF', c=colors, s=0, returnBaseMapObject=True, lw = 0) for u, v, data in digraph_of_location_and_location_similarity.edges(data=True): if u in mf_valid_locations_to_color and v in mf_valid_locations_to_color \ and mf_location_to_cluster_id[u]==mf_location_to_cluster_id[v]: color, u, v, w = mf_cluster_id_to_cluster_color[mf_location_to_cluster_id[u]], getLocationFromLid(u.replace('_', ' ')), getLocationFromLid(v.replace('_', ' ')), data['w'] m.drawgreatcircle(u[1], u[0], v[1], v[0], color=color, alpha=0.6) plt.show()
def load_checkins_graph(checkins_graph_file): graph = nx.Graph() for data in iterateJsonFromFile(checkins_graph_file): (u, v) = data['e'].split('__') graph.add_edge(u , v, {'w': data['w']}) noOfClusters, clusters = clusterUsingAffinityPropagation(graph) # for cluster in clusters: # print len(cluster), cluster nodeToClusterIdMap = dict(clusters) colorMap = dict([(i, GeneralMethods.getRandomColor()) for i in range(noOfClusters)]) clusters = [(c, list(l)) for c, l in groupby(sorted(clusters, key=itemgetter(1)), key=itemgetter(1))] points, colors = zip(*map(lambda l: (getLocationFromLid(l.replace('_', ' ')), colorMap[nodeToClusterIdMap[l]]), graph.nodes())) _, m =plotPointsOnWorldMap(points[:1], s=0, lw=0, c=colors[:1], returnBaseMapObject=True) for u, v, data in graph.edges(data=True): if nodeToClusterIdMap[u]==nodeToClusterIdMap[v]: color, u, v, w = colorMap[nodeToClusterIdMap[u]], getLocationFromLid(u.replace('_', ' ')), getLocationFromLid(v.replace('_', ' ')), data['w'] m.drawgreatcircle(u[1],u[0],v[1],v[0],color=color, alpha=1.5) # plt.title(title) plt.show() print noOfClusters print graph.number_of_edges() print graph.number_of_nodes()
def getQualityScore(graphMap, edgesToKeep, timeDifference): dataToReturn = [] for j, intervalInSeconds in enumerate([1]): intervalInSeconds*=timeDifference linearGraph = LocationGraphs.combineLocationGraphs(graphMap, startingGraphId, datetime.datetime.fromtimestamp(endingGraphId+1), intervalInSeconds, linear=True, edgesToKeep=edgesToKeep) logGraph = LocationGraphs.combineLocationGraphs(graphMap, startingGraphId, datetime.datetime.fromtimestamp(endingGraphId+1), intervalInSeconds, linear=False, edgesToKeep=edgesToKeep) linearClusters = [[str(c), [l[0]for l in lst]] for c, lst in groupby(sorted(clusterUsingAffinityPropagation(linearGraph)[1], key=itemgetter(1)), key=itemgetter(1))] logarithmicClusters = [[str(c), [l[0]for l in lst]] for c, lst in groupby(sorted(clusterUsingAffinityPropagation(logGraph)[1], key=itemgetter(1)), key=itemgetter(1))] score = LocationGraphs.getClusterQualityScore(linearClusters, logarithmicClusters) print intervalInSeconds, edgesToKeep, score dataToReturn.append(score) return dataToReturn