num_max_wcc_n = max_wcc.GetNodes()

num_out = out_combined.GetNodes() - num_max_scc_n
num_in = in_combined.GetNodes() - num_max_scc_n
numDiscon = g_size - num_max_wcc_n
print "The size of the graph is %d, %d" %(g_size, edge_size)
print "The largest SCC ratio is: %0.4f " %( float(num_max_scc_n) / g_size )
print "The In-component of the largest SCC ratio is: %0.4f "  %(float(num_in) / g_size)
print "The Out-component of the largest SCC ratio is: %0.4f"  % (float(num_out) / g_size)
print "The disconnected components has the percentage: %0.4f"  % (float(numDiscon) / g_size)


''' Analysis 2: graph structure
    - node distribution
'''
deg_cnt_v = AH.gen_degree_hist(venue_g)
deg_v, deg_prob = zip(*[[item.GetVal1(), float(item.GetVal2()) / g_size] for item in deg_cnt_v])

plt.figure()
plt.plot(deg_v, deg_prob, '-yo', color='red', label='node degreee distribution')
plt.xscale('log')
plt.yscale('log')
plt.legend()
plt.title('log-log node degree distribution of transition graph')
plt.savefig(os.path.join(result_path, 'node_dist.png'))


'''Analysis 3: edge(trasition) freatures
   - transition frequency distribution
   - transition duration distribution
'''
	return [values, frequencies]

data_path = '../Dataset/GraphData'
result_path = '../Dataset/Analysis/'

graph = GH.load_graph(data_path, 'sf_venue_graph')
occurrences = cl.Counter()
dataset = []

for node in graph.Nodes():
	ckn = graph.GetIntAttrDatN(node.GetId(), 'ckn')
	occurrences[ckn] += 1
	dataset.append(ckn)

x, y = counter_to_arrays(occurrences)
alpha = AH.get_mle_alpha(dataset, min(dataset))
powerlaw_y = AH.get_powerlaw_y(dataset, alpha, min(dataset), np.sum(y))   
print "check-in distribution: the estimated alpha is", alpha

plt.figure()
plt.xscale('log')
plt.yscale('log')
plt.scatter(x, y, color='crimson', label='check-in distribution')
plt.plot(powerlaw_y, color='blue', label='MLE-PDF alpha: ' + str(alpha)[:4])
plt.title('check-in distribution of venues')
plt.ylabel('number of venues')
plt.xlabel('total check-ins')
plt.legend()
plt.savefig(os.path.join(result_path, 'ck_freq_dist.png'))

occurrences = cl.Counter()