Esempio n. 1
0
def get_monthly_trans_graphs():
  files = os.listdir(TRANS_PATH)
  file_month_list = []
  for fn in files:
    if not fn.endswith('DS_Store'):
      file_month_list.append(fn)
  return [GH.load_graph(TRANS_PATH, fn) for fn in file_month_list]
Esempio n. 2
0
def get_monthly_venue_graphs():
  files = os.listdir(DIR_PATH)
  file_month_list = []
  for fn in files:
    if fn.startswith(FILE_PREFIX):
      file_month_list.append(fn)
  return [GH.load_graph(DIR_PATH, fn) for fn in file_month_list]
def main():
	data_path = '../CS224W_Dataset/GraphData'
	filename = 'sf_venue_center_small'
	venue_g = GH.load_graph(data_path, filename)
	weight_hash = create_weight_hash(venue_g)
	degree_hash = create_degree_hash(venue_g)
	un_venue_g = create_undirected(venue_g)
	print "start community detection"
	communities = detect_community(weight_hash, un_venue_g, degree_hash)
	print communities
def generate_node_change():
  x, y = [], []
  for i in range(0, 13):
    filename = snapshot_list[i]
    g = GH.load_graph(graph_path, filename)
    print g.GetNodes()
    x.append(i)
    y.append(g.GetNodes())
  plt.plot(x, y, '-')
  plt.show()
def generate_edge_ratio():
  x, y = [], []
  g = GH.load_graph(graph_path, snapshot_list[0])
  prev_nom, prev_denom = g.GetEdges(), 0.0
  for E in g.Edges():
    prev_denom += g.GetIntAttrDatE(E.GetId(), 'trsn_cnt')

  for i in range(1, 13):
    filename = snapshot_list[i]
    g = GH.load_graph(graph_path, filename)
    nom, denom = g.GetEdges(), 0.0
    for E in g.Edges():
      denom += g.GetIntAttrDatE(E.GetId(), 'trsn_cnt')
    x.append(i)
    y.append((nom-prev_nom) / (denom-prev_denom))
    prev_denom = denom
    prev_nom = nom

  plt.plot(x, y, '-')
  plt.show()
Esempio n. 6
0
def generate_monthly_trans_graphs():
  graphs = get_monthly_venue_graphs()
  for idx in range(len(graphs)-1, 0, -1):
    cur_graph = graphs[idx]
    prev_graph = graphs[idx-1]
    for edge in prev_graph.Edges():
      src_nid = edge.GetSrcNId()
      dst_nid = edge.GetDstNId()

      cur_eid = cur_graph.GetEId(src_nid, dst_nid)
      cur_weight = cur_graph.GetIntAttrDatE(cur_eid, 'trsn_cnt')
      prev_eid = prev_graph.GetEId(src_nid, dst_nid)
      prev_weight = prev_graph.GetIntAttrDatE(prev_eid, 'trsn_cnt')

      diff = cur_weight - prev_weight
      # cur_graph.AddIntAttrDatE(cur_eid, cur_weight - prev_weight, 'trsn_cnt')
      # month_coeff = float(idx)
      month_coeff = 1
      cur_graph.AddIntAttrDatE(cur_eid, ORI_FACTOR*cur_weight + month_coeff*MUL_FACTOR*diff, 'trsn_cnt')
    print "updated trans graph for month ", idx

  for idx, G in enumerate(graphs):
    idx_str = str(idx) if idx < 10 else '9'+str(idx)
    GH.save_graph(G, TRANS_PATH, TRANS_FILE_PREFIX + idx_str)
Esempio n. 7
0
'''
    Currently, the graph has node attribute:
    - vid
    - ckn (insofar, checkin number)
    - sts (start timestamp)
    - ets (end timestamp)
    - lat
    - lng
    - category
    - pcategor
    
    And edge attribute:
    - trsn_cnt
    - duration
'''
venue_g = GH.load_graph(graph_path, graph_name)
category_list = VH.get_category_list(venue_path, category_name)
pcategory_list = VH.get_category_list(venue_path, pcategory_name)
#GH.print_node_attr_names(venue_g)
#GH.print_edge_attr_names(venue_g)
#print category_list
GH.print_nids(venue_g)


# create snapshop of the graph - node accurate, but edge aren't
center = (37.76010, -122.44779)
radius = 0.095
print venue_g.GetNodes()
print venue_g.GetEdges()
i = 0
for edge in venue_g.Edges():
Esempio n. 8
0
import snap
import os
import numpy as np
import Helper.GraphHelper as GH
import Helper.AnalysisHelper as AH
import pylab as plt

''' Import Graph: graph is stored in binary form to save space, available in dropbox folder
    sf_venue_graph_small: A small test graph with only a few venues in sf -- you can use this to test your script first
    sf_venue_graph: up-to-date venue graph of sf
'''
data_path = '../DataSet/GraphData/'
result_path = '../DataSet/Analysis/'
filename = 'sf_venue_graph'
venue_g = GH.load_graph(data_path, filename)


'''Analysis 1: graph structure
   - graph size
   - SCC, bowtie structure
'''
g_size = venue_g.GetNodes()
edge_size = venue_g.GetEdges()
max_scc = snap.GetMxScc(venue_g)
num_max_scc_n = max_scc.GetNodes()
rand_node = max_scc.GetRndNId()
out_combined = snap.GetBfsTree( venue_g, rand_node, True, False )
in_combined = snap.GetBfsTree( venue_g, rand_node, False, True )

max_wcc = snap.GetMxWcc( venue_g )
Esempio n. 9
0
'''
    Currently, the graph has node attribute:
    - vid
    - ckn (insofar, checkin number)
    - sts (start timestamp)
    - ets (end timestamp)
    - lat
    - lng
    - category
    - pcategor
    
    And edge attribute:
    - trsn_cnt
    - duration
'''
g = GH.load_graph(graph_path, graph_name)
n = g.GetNodes()
print g.GetNodes(), g.GetEdges()

edge_weight = get_edge_weight(g)
node_weight = get_node_weight(g)

undirected_g = GH.convert_undirected_graph(g)
m = get_m(node_weight)
B = get_B(undirected_g, node_weight, m)
save_list(B, community_path, 'B_matrix')

num_community = 0

mod_list = []
while num_community < 10:
"""
    Currently, the graph has node attribute:
    - vid
    - ckn (insofar, checkin number)
    - sts (start timestamp)
    - ets (end timestamp)
    - lat
    - lng
    - category
    - pcategor
    
    And edge attribute:
    - trsn_cnt
    - duration
"""
venue_g = GH.load_graph(graph_path, graph_name)
category_list = VH.get_category_list(venue_path, category_name)
pcategory_list = VH.get_category_list(venue_path, pcategory_name)
# GH.print_node_attr_names(venue_g)
# GH.print_edge_attr_names(venue_g)
# print category_list
GH.print_nids(venue_g)


# create snapshop of the graph - node accurate, but edge aren't
ts_list = TH.gen_ts_list("201201010000", "201301010000", 30)
ts_list.reverse()
for ts in ts_list:
    GH.filter_node_sts(venue_g, ts)
    GH.save_graph(venue_g, graph_path, "sf_venue_" + ts)
Esempio n. 11
0
    sf_trsn_graph_small: A small test graph with only a few venues in sf -- you can use this to test your script first
    sf_trsn_graph: up-to-date venue graph of sf
'''

def counter_to_arrays(c):
	values = []
	frequencies = []
	for n in c:
		values.append(n)
		frequencies.append(c[n])
	return [values, frequencies]

data_path = '../Dataset/GraphData'
result_path = '../Dataset/Analysis/'

graph = GH.load_graph(data_path, 'sf_venue_graph')
occurrences = cl.Counter()
dataset = []

for node in graph.Nodes():
	ckn = graph.GetIntAttrDatN(node.GetId(), 'ckn')
	occurrences[ckn] += 1
	dataset.append(ckn)

x, y = counter_to_arrays(occurrences)
alpha = AH.get_mle_alpha(dataset, min(dataset))
powerlaw_y = AH.get_powerlaw_y(dataset, alpha, min(dataset), np.sum(y))   
print "check-in distribution: the estimated alpha is", alpha

plt.figure()
plt.xscale('log')
        return False


data_path = "../DataSet/Transition/"
graph_path = "../DataSet/GraphData/"
venue_path = "../DataSet/VenueData/"

trsn_list = VH.load_pickle_file(data_path, "sf_trsn_small_new")
time_list = VH.load_pickle_file(data_path, "sf_time_small_new")
full_venue_dict = VH.GetFullVenueDict(venue_path, "venues-CA-new.json")
category_dict = VH.load_json(venue_path, "category_map.json")
pcategory_dict = VH.load_json(venue_path, "pcategory_map.json")

vid_map = create_vid_map(trsn_list)
ts_list = TH.gen_ts_list("201201010000", "201301010000", 30)

venue_g = snap.TNEANet.New()
for ts_idx, ts in enumerate(ts_list):
    for trsn_idx, trsn in enumerate(trsn_list):
        src_ts = time_list[trsn_idx][0]  # only need check one ts
        dst_ts = time_list[trsn_idx][1]
        if within_ts_range(ts, src_ts):
            src_nid = vid_map[trsn[0]]
            dst_nid = vid_map[trsn[1]]
            GH.add_node(venue_g, src_nid, trsn[0], src_ts)
            GH.add_node(venue_g, dst_nid, trsn[1], dst_ts)
            GH.add_edge(venue_g, src_nid, dst_nid, time_list[trsn_idx])
    GH.add_category(venue_g, full_venue_dict, category_dict, pcategory_dict)
    print venue_g.GetNodes()
    GH.save_graph(venue_g, graph_path, "sf_venue_small_" + str(ts))
Esempio n. 13
0
for nid, vid in enumerate(node_set):
    node_hash[vid] = nid

trsn_g = snap.TNEANet.New()
#node_id: 0 to n-1
for vid, nid in node_hash.iteritems():
    trsn_g.AddNode(nid)
    trsn_g.AddStrAttrDatN(nid, vid, 'vid')

#freq: frequncy(cnt) of edge
print trsn_g.GetNodes()
for idx, trsn in enumerate(trsn_list):
    src_nid = node_hash[trsn[0]]
    dst_nid = node_hash[trsn[1]]
    print src_nid, dst_nid
    #TODO: add timestamp filter
    if not trsn_g.IsEdge(src_nid, dst_nid):
        GH.add_edge_attrs(trsn_g, src_nid, dst_nid, time_list[idx])
        GH.add_node_attrs(trsn_g, src_nid, dst_nid, time_list[idx])
        print "add a new edge, hoho~"
    else:
        GH.update_edge_attrs(trsn_g, src_nid, dst_nid, time_list[idx])
        GH.update_node_attrs(trsn_g, src_nid, dst_nid, time_list[idx])
        print "update node info, haha~"
    print idx, trsn
    print len(trsn_list)    

GH.save_graph(trsn_g, graph_path, 'sf_trsn_graph')
print "succesfully build the graph!"

Esempio n. 14
0
    else:
        print "catch a false"
        return False
    

data_path = '../DataSet/'
graph_path = '../DataSet/GraphData/'
venue_path = '../DataSet/VenueData/'

trsn_list = VH.load_pickle_file(data_path, 'sf_trsn')
time_list = VH.load_pickle_file(data_path, 'sf_time')

full_venue_dict = VH.GetFullVenueDict(venue_path, 'venues-CA-new.json')
category_dict = VH.load_json(venue_path, 'category_map.json')
pcategory_dict = VH.load_json(venue_path, 'pcategory_map.json')
lng_list = [GH.get_lat_lng(full_venue_dict, trsn[1]) for trsn in trsn_list]
lng_list.sort(key=lambda t:t[1], reverse=True)
print lng_list[0]
'''
vid_map = create_vid_map(trsn_list)

venue_g = snap.TNEANet.New()
lngs = []
for trsn_idx, trsn in enumerate(trsn_list):
    # only need check one vid
    lat, lng = GH.get_lat_lng(full_venue_dict, trsn[0])
    if within_geo_range(center, radius, lat, lng):
        lngs.append(lng)
        src_nid = vid_map[trsn[0]]
        dst_nid = vid_map[trsn[1]]
        src_ts = time_list[trsn_idx][0]
Esempio n. 15
0
	node_count = 0
	edge_count = 0
	for node in g.Nodes():
		node_count += 1
		un_g.AddNode(node.GetId())

	for edge in g.Edges():
		edge_count += 1
		un_g.AddEdge(edge.GetSrcNId(), edge.GetDstNId())

	print "node: %d edge: %d" % (node_count, edge_count)
	return un_g

data_path = '../CS224W_Dataset/GraphData'
filename = 'sf_venue_center'
trsn_g = GH.load_graph(data_path, filename)
un_trsn_g = to_PUNGraph(trsn_g)

# try to use the SNAP library function to get the community structure
communities = snap.TCnComV()
modularity = snap.CommunityCNM(un_trsn_g, communities)
print "Community detection complete, modularity score is", modularity
# communities = [[3280, 2414, 2662, 2878, 3551], [848, 1106, 1474, 1915, 2089, 3139, 3400, 5759, 6280, 7848]]

# fetch venue info and produce a csv for visualization
data_path = '../CS224W_Dataset'
out_csv = '../CS224W_Dataset/transition-SF-community.csv'
venue_hash = VH.GetFullVenueDict(data_path, 'venues-CA-new.json')

with open(out_csv, 'w') as fout:
	a = csv.writer(fout, delimiter=',', quoting=csv.QUOTE_ALL)
    venue_dict[data['id']] = data
  fin.close()
  return venue_dict

# add attributes for graph nodes
def AddNodeAttr(graph, full_venue_dict):
  ''' for each node in the graph, add two attributes
      1. two float values: latitude, longitute
      2. category
      3. parent-category
  '''
  for NI in graph.Nodes():
    vid = graph.GetStrAttrDatN(NI.GetId(), 'vid')
    if vid in full_venue_dict:
      graph.AddFltAttrDatN(NI.GetId(), float(full_venue_dict[vid]['lat']), 'lat') 
      graph.AddFltAttrDatN(NI.GetId(), float(full_venue_dict[vid]['lng']), 'lng') 
      graph.AddStrAttrDatN(NI.GetId(), full_venue_dict[vid]['category'], 'category') 
      graph.AddStrAttrDatN(NI.GetId(), full_venue_dict[vid]['parentcategory'], 'pcategory') 
  #GH.save_graph(graph, result_path, result_filename)
  #return None

trsn_g = GH.load_graph(graph_data_path, graph_filename)
full_venue_dict = VH.GetFullVenueDict(venue_graph_data_path, venue_filename)
category_dict = VH.load_json(venue_graph_data_path, 'category_map.json')
pcategory_dict = VH.load_json(venue_graph_data_path, 'pcategory_map.json')

GH.add_category(trsn_g, full_venue_dict, category_dict, pcategory_dict)
GH.save_graph(trsn_g, graph_data_path, 'sf_venue_graph')
print 'successfully build venue_graph!'