def generate_monthly_trans_graphs(): graphs = get_monthly_venue_graphs() for idx in range(len(graphs)-1, 0, -1): cur_graph = graphs[idx] prev_graph = graphs[idx-1] for edge in prev_graph.Edges(): src_nid = edge.GetSrcNId() dst_nid = edge.GetDstNId() cur_eid = cur_graph.GetEId(src_nid, dst_nid) cur_weight = cur_graph.GetIntAttrDatE(cur_eid, 'trsn_cnt') prev_eid = prev_graph.GetEId(src_nid, dst_nid) prev_weight = prev_graph.GetIntAttrDatE(prev_eid, 'trsn_cnt') diff = cur_weight - prev_weight # cur_graph.AddIntAttrDatE(cur_eid, cur_weight - prev_weight, 'trsn_cnt') # month_coeff = float(idx) month_coeff = 1 cur_graph.AddIntAttrDatE(cur_eid, ORI_FACTOR*cur_weight + month_coeff*MUL_FACTOR*diff, 'trsn_cnt') print "updated trans graph for month ", idx for idx, G in enumerate(graphs): idx_str = str(idx) if idx < 10 else '9'+str(idx) GH.save_graph(G, TRANS_PATH, TRANS_FILE_PREFIX + idx_str)
""" Currently, the graph has node attribute: - vid - ckn (insofar, checkin number) - sts (start timestamp) - ets (end timestamp) - lat - lng - category - pcategor And edge attribute: - trsn_cnt - duration """ venue_g = GH.load_graph(graph_path, graph_name) category_list = VH.get_category_list(venue_path, category_name) pcategory_list = VH.get_category_list(venue_path, pcategory_name) # GH.print_node_attr_names(venue_g) # GH.print_edge_attr_names(venue_g) # print category_list GH.print_nids(venue_g) # create snapshop of the graph - node accurate, but edge aren't ts_list = TH.gen_ts_list("201201010000", "201301010000", 30) ts_list.reverse() for ts in ts_list: GH.filter_node_sts(venue_g, ts) GH.save_graph(venue_g, graph_path, "sf_venue_" + ts)
for nid, vid in enumerate(node_set): node_hash[vid] = nid trsn_g = snap.TNEANet.New() #node_id: 0 to n-1 for vid, nid in node_hash.iteritems(): trsn_g.AddNode(nid) trsn_g.AddStrAttrDatN(nid, vid, 'vid') #freq: frequncy(cnt) of edge print trsn_g.GetNodes() for idx, trsn in enumerate(trsn_list): src_nid = node_hash[trsn[0]] dst_nid = node_hash[trsn[1]] print src_nid, dst_nid #TODO: add timestamp filter if not trsn_g.IsEdge(src_nid, dst_nid): GH.add_edge_attrs(trsn_g, src_nid, dst_nid, time_list[idx]) GH.add_node_attrs(trsn_g, src_nid, dst_nid, time_list[idx]) print "add a new edge, hoho~" else: GH.update_edge_attrs(trsn_g, src_nid, dst_nid, time_list[idx]) GH.update_node_attrs(trsn_g, src_nid, dst_nid, time_list[idx]) print "update node info, haha~" print idx, trsn print len(trsn_list) GH.save_graph(trsn_g, graph_path, 'sf_trsn_graph') print "succesfully build the graph!"
return False data_path = "../DataSet/Transition/" graph_path = "../DataSet/GraphData/" venue_path = "../DataSet/VenueData/" trsn_list = VH.load_pickle_file(data_path, "sf_trsn_small_new") time_list = VH.load_pickle_file(data_path, "sf_time_small_new") full_venue_dict = VH.GetFullVenueDict(venue_path, "venues-CA-new.json") category_dict = VH.load_json(venue_path, "category_map.json") pcategory_dict = VH.load_json(venue_path, "pcategory_map.json") vid_map = create_vid_map(trsn_list) ts_list = TH.gen_ts_list("201201010000", "201301010000", 30) venue_g = snap.TNEANet.New() for ts_idx, ts in enumerate(ts_list): for trsn_idx, trsn in enumerate(trsn_list): src_ts = time_list[trsn_idx][0] # only need check one ts dst_ts = time_list[trsn_idx][1] if within_ts_range(ts, src_ts): src_nid = vid_map[trsn[0]] dst_nid = vid_map[trsn[1]] GH.add_node(venue_g, src_nid, trsn[0], src_ts) GH.add_node(venue_g, dst_nid, trsn[1], dst_ts) GH.add_edge(venue_g, src_nid, dst_nid, time_list[trsn_idx]) GH.add_category(venue_g, full_venue_dict, category_dict, pcategory_dict) print venue_g.GetNodes() GH.save_graph(venue_g, graph_path, "sf_venue_small_" + str(ts))
venue_dict[data['id']] = data fin.close() return venue_dict # add attributes for graph nodes def AddNodeAttr(graph, full_venue_dict): ''' for each node in the graph, add two attributes 1. two float values: latitude, longitute 2. category 3. parent-category ''' for NI in graph.Nodes(): vid = graph.GetStrAttrDatN(NI.GetId(), 'vid') if vid in full_venue_dict: graph.AddFltAttrDatN(NI.GetId(), float(full_venue_dict[vid]['lat']), 'lat') graph.AddFltAttrDatN(NI.GetId(), float(full_venue_dict[vid]['lng']), 'lng') graph.AddStrAttrDatN(NI.GetId(), full_venue_dict[vid]['category'], 'category') graph.AddStrAttrDatN(NI.GetId(), full_venue_dict[vid]['parentcategory'], 'pcategory') #GH.save_graph(graph, result_path, result_filename) #return None trsn_g = GH.load_graph(graph_data_path, graph_filename) full_venue_dict = VH.GetFullVenueDict(venue_graph_data_path, venue_filename) category_dict = VH.load_json(venue_graph_data_path, 'category_map.json') pcategory_dict = VH.load_json(venue_graph_data_path, 'pcategory_map.json') GH.add_category(trsn_g, full_venue_dict, category_dict, pcategory_dict) GH.save_graph(trsn_g, graph_data_path, 'sf_venue_graph') print 'successfully build venue_graph!'