def get_monthly_venue_graphs(): files = os.listdir(DIR_PATH) file_month_list = [] for fn in files: if fn.startswith(FILE_PREFIX): file_month_list.append(fn) return [GH.load_graph(DIR_PATH, fn) for fn in file_month_list]
def get_monthly_trans_graphs(): files = os.listdir(TRANS_PATH) file_month_list = [] for fn in files: if not fn.endswith('DS_Store'): file_month_list.append(fn) return [GH.load_graph(TRANS_PATH, fn) for fn in file_month_list]
def main(): data_path = '../CS224W_Dataset/GraphData' filename = 'sf_venue_center_small' venue_g = GH.load_graph(data_path, filename) weight_hash = create_weight_hash(venue_g) degree_hash = create_degree_hash(venue_g) un_venue_g = create_undirected(venue_g) print "start community detection" communities = detect_community(weight_hash, un_venue_g, degree_hash) print communities
def generate_node_change(): x, y = [], [] for i in range(0, 13): filename = snapshot_list[i] g = GH.load_graph(graph_path, filename) print g.GetNodes() x.append(i) y.append(g.GetNodes()) plt.plot(x, y, '-') plt.show()
def generate_edge_ratio(): x, y = [], [] g = GH.load_graph(graph_path, snapshot_list[0]) prev_nom, prev_denom = g.GetEdges(), 0.0 for E in g.Edges(): prev_denom += g.GetIntAttrDatE(E.GetId(), 'trsn_cnt') for i in range(1, 13): filename = snapshot_list[i] g = GH.load_graph(graph_path, filename) nom, denom = g.GetEdges(), 0.0 for E in g.Edges(): denom += g.GetIntAttrDatE(E.GetId(), 'trsn_cnt') x.append(i) y.append((nom-prev_nom) / (denom-prev_denom)) prev_denom = denom prev_nom = nom plt.plot(x, y, '-') plt.show()
''' Currently, the graph has node attribute: - vid - ckn (insofar, checkin number) - sts (start timestamp) - ets (end timestamp) - lat - lng - category - pcategor And edge attribute: - trsn_cnt - duration ''' venue_g = GH.load_graph(graph_path, graph_name) category_list = VH.get_category_list(venue_path, category_name) pcategory_list = VH.get_category_list(venue_path, pcategory_name) #GH.print_node_attr_names(venue_g) #GH.print_edge_attr_names(venue_g) #print category_list GH.print_nids(venue_g) # create snapshop of the graph - node accurate, but edge aren't center = (37.76010, -122.44779) radius = 0.095 print venue_g.GetNodes() print venue_g.GetEdges() i = 0 for edge in venue_g.Edges():
import snap import os import numpy as np import Helper.GraphHelper as GH import Helper.AnalysisHelper as AH import pylab as plt ''' Import Graph: graph is stored in binary form to save space, available in dropbox folder sf_venue_graph_small: A small test graph with only a few venues in sf -- you can use this to test your script first sf_venue_graph: up-to-date venue graph of sf ''' data_path = '../DataSet/GraphData/' result_path = '../DataSet/Analysis/' filename = 'sf_venue_graph' venue_g = GH.load_graph(data_path, filename) '''Analysis 1: graph structure - graph size - SCC, bowtie structure ''' g_size = venue_g.GetNodes() edge_size = venue_g.GetEdges() max_scc = snap.GetMxScc(venue_g) num_max_scc_n = max_scc.GetNodes() rand_node = max_scc.GetRndNId() out_combined = snap.GetBfsTree( venue_g, rand_node, True, False ) in_combined = snap.GetBfsTree( venue_g, rand_node, False, True ) max_wcc = snap.GetMxWcc( venue_g )
sf_trsn_graph_small: A small test graph with only a few venues in sf -- you can use this to test your script first sf_trsn_graph: up-to-date venue graph of sf ''' def counter_to_arrays(c): values = [] frequencies = [] for n in c: values.append(n) frequencies.append(c[n]) return [values, frequencies] data_path = '../Dataset/GraphData' result_path = '../Dataset/Analysis/' graph = GH.load_graph(data_path, 'sf_venue_graph') occurrences = cl.Counter() dataset = [] for node in graph.Nodes(): ckn = graph.GetIntAttrDatN(node.GetId(), 'ckn') occurrences[ckn] += 1 dataset.append(ckn) x, y = counter_to_arrays(occurrences) alpha = AH.get_mle_alpha(dataset, min(dataset)) powerlaw_y = AH.get_powerlaw_y(dataset, alpha, min(dataset), np.sum(y)) print "check-in distribution: the estimated alpha is", alpha plt.figure() plt.xscale('log')
node_count = 0 edge_count = 0 for node in g.Nodes(): node_count += 1 un_g.AddNode(node.GetId()) for edge in g.Edges(): edge_count += 1 un_g.AddEdge(edge.GetSrcNId(), edge.GetDstNId()) print "node: %d edge: %d" % (node_count, edge_count) return un_g data_path = '../CS224W_Dataset/GraphData' filename = 'sf_venue_center' trsn_g = GH.load_graph(data_path, filename) un_trsn_g = to_PUNGraph(trsn_g) # try to use the SNAP library function to get the community structure communities = snap.TCnComV() modularity = snap.CommunityCNM(un_trsn_g, communities) print "Community detection complete, modularity score is", modularity # communities = [[3280, 2414, 2662, 2878, 3551], [848, 1106, 1474, 1915, 2089, 3139, 3400, 5759, 6280, 7848]] # fetch venue info and produce a csv for visualization data_path = '../CS224W_Dataset' out_csv = '../CS224W_Dataset/transition-SF-community.csv' venue_hash = VH.GetFullVenueDict(data_path, 'venues-CA-new.json') with open(out_csv, 'w') as fout: a = csv.writer(fout, delimiter=',', quoting=csv.QUOTE_ALL)
venue_dict[data['id']] = data fin.close() return venue_dict # add attributes for graph nodes def AddNodeAttr(graph, full_venue_dict): ''' for each node in the graph, add two attributes 1. two float values: latitude, longitute 2. category 3. parent-category ''' for NI in graph.Nodes(): vid = graph.GetStrAttrDatN(NI.GetId(), 'vid') if vid in full_venue_dict: graph.AddFltAttrDatN(NI.GetId(), float(full_venue_dict[vid]['lat']), 'lat') graph.AddFltAttrDatN(NI.GetId(), float(full_venue_dict[vid]['lng']), 'lng') graph.AddStrAttrDatN(NI.GetId(), full_venue_dict[vid]['category'], 'category') graph.AddStrAttrDatN(NI.GetId(), full_venue_dict[vid]['parentcategory'], 'pcategory') #GH.save_graph(graph, result_path, result_filename) #return None trsn_g = GH.load_graph(graph_data_path, graph_filename) full_venue_dict = VH.GetFullVenueDict(venue_graph_data_path, venue_filename) category_dict = VH.load_json(venue_graph_data_path, 'category_map.json') pcategory_dict = VH.load_json(venue_graph_data_path, 'pcategory_map.json') GH.add_category(trsn_g, full_venue_dict, category_dict, pcategory_dict) GH.save_graph(trsn_g, graph_data_path, 'sf_venue_graph') print 'successfully build venue_graph!'