def plot_entropy_ccdf(): entropy = read_pickle('output/normalized_entropy.obj') fig = plt.figure() ax = fig.add_subplot(111) powerlaw.plot_ccdf(entropy, ax, label='normalized entropy') # further plotting ax.set_xlabel("Normalized entropy e") ax.set_ylabel("Pr(X>=e)") plt.legend(fancybox=True, loc='lower left', ncol=1, prop={'size': 5}) plt.tight_layout() plt.savefig('output/normalized_entropy_distribution_ccdf.pdf') fig = plt.figure() ax = fig.add_subplot(111) powerlaw.plot_cdf(entropy, ax, label='normalized entropy', color='r') # further plotting ax.set_xlabel("Normalized entropy e") ax.set_ylabel("Pr(X<=e)") plt.legend(fancybox=True, loc='lower left', ncol=1, prop={'size': 5}) plt.tight_layout() plt.savefig('output/normalized_entropy_distribution_cdf.pdf')
def plot_entropy_ccdf(): entropy = read_pickle('output/normalized_entropy.obj') fig = plt.figure() ax = fig.add_subplot(111) powerlaw.plot_ccdf(entropy, ax, label='normalized entropy') # further plotting ax.set_xlabel("Normalized entropy e") ax.set_ylabel("Pr(X>=e)") plt.legend(fancybox=True, loc='lower left', ncol=1,prop={'size':5}) plt.tight_layout() plt.savefig('output/normalized_entropy_distribution_ccdf.pdf') fig = plt.figure() ax = fig.add_subplot(111) powerlaw.plot_cdf(entropy, ax, label='normalized entropy',color='r') # further plotting ax.set_xlabel("Normalized entropy e") ax.set_ylabel("Pr(X<=e)") plt.legend(fancybox=True, loc='lower left', ncol=1,prop={'size':5}) plt.tight_layout() plt.savefig('output/normalized_entropy_distribution_cdf.pdf')
def plot_counts_category_distributions_ccdf(): category_distributions = read_pickle( HOME + 'output/category_counts_distribution.obj') for i in category_distributions.values(): print len(i) colors = { 'lead': 'r', 'infobox': 'b', 'body': 'g', 'left-body': 'm', 'navbox': 'c', 'counts': 'k' } fig = plt.figure() ax = fig.add_subplot(111) for category in [ 'lead', 'infobox', 'body', 'left-body', 'navbox', 'counts' ]: data = category_distributions[category] data = [x[0] for x in data] powerlaw.plot_ccdf(data, ax, label=category, color=colors[category]) # further plotting ax.set_xlabel("Number of clicks n") ax.set_ylabel("Pr(X>=n)") plt.legend(fancybox=True, loc=3, ncol=2, prop={'size': 4}) #leg = plt.gca().get_legend() #ltext = leg.get_texts() # all the text.Text instance in the legend #llines = leg.get_lines() #plt.setp(ltext, fontsize='small') # the legend text fontsize #plt.setp(llines, linewidth=1) plt.tight_layout() plt.savefig('output/category_counts_distributions.pdf') data = category_distributions['counts'] data = [int(x[0]) for x in data] hist, bin_edges = np.histogram(data, 100, density=True) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(bin_edges[:-1], hist, marker='o') ax.set_xlabel('#Counts') ax.set_ylabel('#Pages') ax.set_yscale('log') ax.set_xscale('log') plt.legend(fancybox=True, loc=3, prop={'size': 4}) plt.tight_layout() plt.savefig('output/counts_distribution.pdf')
def plot_counts_category_distributions_ccdf(): category_distributions = read_pickle(HOME+'output/category_counts_distribution.obj') for i in category_distributions.values(): print len(i) colors= {'lead':'r','infobox':'b', 'body':'g', 'left-body':'m','navbox':'c', 'counts':'k'} fig = plt.figure() ax = fig.add_subplot(111) for category in ['lead', 'infobox', 'body', 'left-body', 'navbox', 'counts']: data = category_distributions[category] data = [x[0] for x in data] powerlaw.plot_ccdf(data, ax, label=category,color=colors[category]) # further plotting ax.set_xlabel("Number of clicks n") ax.set_ylabel("Pr(X>=n)") plt.legend(fancybox=True, loc=3, ncol=2, prop={'size':4}) #leg = plt.gca().get_legend() #ltext = leg.get_texts() # all the text.Text instance in the legend #llines = leg.get_lines() #plt.setp(ltext, fontsize='small') # the legend text fontsize #plt.setp(llines, linewidth=1) plt.tight_layout() plt.savefig('output/category_counts_distributions.pdf') data = category_distributions['counts'] data = [int(x[0]) for x in data] hist, bin_edges = np.histogram(data, 100, density=True) fig = plt.figure() ax = fig.add_subplot(111) ax.plot( bin_edges[:-1],hist, marker='o') ax.set_xlabel('#Counts') ax.set_ylabel('#Pages') ax.set_yscale('log') ax.set_xscale('log') plt.legend(fancybox=True, loc=3, prop={'size':4}) plt.tight_layout() plt.savefig('output/counts_distribution.pdf')
def plot_ccdf(place, points, thresholds=None): """ plot ccdf :param place: tuple (name [string], (north_lat [float], south_lat [float], east_lon [float], west_lon [float])) :param points: list of tuples (length [float], asymmetry_factor [float]) :param thresholds: list of floats of minimum length threshold to filter points and plot ccdf :return: """ print(' Plotting ccdf ...') name, bbox = place cpoints = points # cpoints = points.copy() # careful with overwriting list vs. memory overflow if thresholds is None: thresholds = [0, 250, 500, 1000, 1500, 3000, 4500] cmap = plt.get_cmap('Set1') colors = [cmap(i) for i in np.linspace(0, 1, len(thresholds))] plt.figure() for idx, threshold in enumerate(thresholds): above_threshold = [] asymmetry_factors = [] while cpoints: point = cpoints.pop() length, asymmetry_factor = point if length >= threshold: above_threshold.append(point) asymmetry_factors.append(asymmetry_factor) powerlaw.plot_ccdf(asymmetry_factors, color=colors[idx], linewidth=1.5, label='$Length \geq {0} \ m$'.format(threshold)) cpoints = above_threshold plt.xlabel('Asymmetry Factor', fontsize=16) plt.ylabel('$P(X \geq x)$', fontsize=16) plt.legend() plt.grid() plt.savefig('./figs_dir/ccdf {0}.png'.format(name), format='png', dpi=200) print(' Done!')
def plotView(self): self.clearView() f = Figure(figsize=(5,4), dpi=100) a = f.add_subplot(111) test = powerlaw.plot_ccdf(self.orderedFreq.values(), ax = a, color = 'b') a.plot() canvas = FigureCanvasTkAgg(f, master=self) canvas.show() canvas.get_tk_widget().pack(side=TOP, fill=BOTH, expand=1) toolbar = NavigationToolbar2TkAgg( canvas, self ) toolbar.update() canvas._tkcanvas.pack(side=TOP, fill=BOTH, expand=1)
def main(): timer = Timer() timer.start() app_name = 'cyberbullying' sample_cascade_size = {} sample_inter_arrival_time = [] sample_cascade_influence = {} sample_cascade_influence_10m = defaultdict(int) sample_cascade_influence_1h = defaultdict(int) with open('../data/{0}_out/sample_retweet_{0}.txt'.format(app_name), 'r') as fin: for line in fin: root_tweet, cascades = line.rstrip().split(':') cascades = cascades.split(',') root_tweet = root_tweet.split('-')[0] retweets = [x.split('-')[0] for x in cascades] influences = [int(x.split('-')[1]) for x in cascades] sample_cascade_size[root_tweet] = len(retweets) sample_cascade_influence[root_tweet] = sum(influences) root_timestamp = melt_snowflake(root_tweet)[0] / 1000 retweet_timestamp_list = [root_timestamp] for i in range(len(retweets)): retweet_time = melt_snowflake(retweets[i])[0] / 1000 relative_retweet_time = retweet_time - root_timestamp retweet_timestamp_list.append( melt_snowflake(retweets[i])[0] / 1000) if relative_retweet_time < 10 * 60: sample_cascade_influence_10m[root_tweet] += influences[i] if relative_retweet_time < 60 * 60: sample_cascade_influence_1h[root_tweet] += influences[i] for i in range(len(retweet_timestamp_list) - 1): sample_inter_arrival_time.append(retweet_timestamp_list[i + 1] - retweet_timestamp_list[i]) complete_cascade_size = {} complete_inter_arrival_time = [] complete_cascade_influence = {} complete_cascade_influence_10m = defaultdict(int) complete_cascade_influence_1h = defaultdict(int) with open('../data/{0}_out/complete_retweet_{0}.txt'.format(app_name), 'r') as fin: for line in fin: root_tweet, cascades = line.rstrip().split(':') cascades = cascades.split(',') root_tweet = root_tweet.split('-')[0] retweets = [x.split('-')[0] for x in cascades] complete_cascade_size[root_tweet] = len(retweets) if len(retweets) >= 50: influences = [int(x.split('-')[1]) for x in cascades] complete_cascade_influence[root_tweet] = sum(influences) root_timestamp = melt_snowflake(root_tweet)[0] / 1000 retweet_timestamp_list = [root_timestamp] for i in range(len(retweets)): retweet_time = melt_snowflake(retweets[i])[0] / 1000 relative_retweet_time = retweet_time - root_timestamp retweet_timestamp_list.append( melt_snowflake(retweets[i])[0] / 1000) if relative_retweet_time < 10 * 60: complete_cascade_influence_10m[ root_tweet] += influences[i] if relative_retweet_time < 60 * 60: complete_cascade_influence_1h[ root_tweet] += influences[i] for i in range(len(retweet_timestamp_list) - 1): complete_inter_arrival_time.append( retweet_timestamp_list[i + 1] - retweet_timestamp_list[i]) print('number of cascades in the complete set', len(complete_cascade_size)) print('number of cascades in the sample set', len(sample_cascade_size)) print('mean complete size', np.mean(list(complete_cascade_size.values()))) print('mean sample size', np.mean(list(sample_cascade_size.values()))) print('complete #cascades (≥50 retweets)', sum([1 for x in list(complete_cascade_size.values()) if x >= 50])) print('sample #cascades (≥50 retweets)', sum([1 for x in list(sample_cascade_size.values()) if x >= 50])) num_complete_cascades_in_sample = 0 complete_cascades_in_sample_size_list = [] num_complete_cascades_in_sample_50 = 0 for root_tweet in sample_cascade_size: if sample_cascade_size[root_tweet] == complete_cascade_size[ root_tweet]: num_complete_cascades_in_sample += 1 complete_cascades_in_sample_size_list.append( complete_cascade_size[root_tweet]) if complete_cascade_size[root_tweet] >= 50: num_complete_cascades_in_sample_50 += 1 print('number of complete cascades in the sample set', num_complete_cascades_in_sample) print('number of complete cascades (>50 retweets) in the sample set', num_complete_cascades_in_sample_50) print('max: {0}, mean: {1}'.format( max(complete_cascades_in_sample_size_list), np.mean(complete_cascades_in_sample_size_list))) fig, axes = plt.subplots(1, 2, figsize=(10, 3.3)) cc4 = ColorPalette.CC4 blue = cc4[0] red = cc4[3] sample_median = np.median(sample_inter_arrival_time) complete_median = np.median(complete_inter_arrival_time) plot_ccdf(sample_inter_arrival_time, ax=axes[0], color=blue, ls='-', label='sample') plot_ccdf(complete_inter_arrival_time, ax=axes[0], color='k', ls='-', label='complete') axes[0].plot([sample_median, sample_median], [0, 1], color=blue, ls='--', lw=1) axes[0].plot([complete_median, complete_median], [0, 1], color='k', ls='--', lw=1) print('\ninter_arrival_time sample median', sample_median) print('inter_arrival_time complete median', complete_median) axes[0].set_xscale('symlog') axes[0].set_xticks([0, 1, 100, 10000, 1000000]) axes[0].set_yscale('linear') axes[0].set_xlabel('inter-arrival time (sec)', fontsize=16) axes[0].set_ylabel('$P(X \geq x)$', fontsize=16) axes[0].legend(frameon=False, fontsize=16, ncol=1, fancybox=False, shadow=True, loc='upper right') axes[0].tick_params(axis='both', which='major', labelsize=16) axes[0].set_title('(a)', fontsize=18, pad=-3 * 72, y=1.0001) influence_list = [] influence_list_10m = [] influence_list_1h = [] for root_tweet in sample_cascade_size: if complete_cascade_size[root_tweet] >= 50: if complete_cascade_influence[root_tweet] > 0: influence_list.append(sample_cascade_influence[root_tweet] / complete_cascade_influence[root_tweet]) if complete_cascade_influence_10m[root_tweet] > 0: influence_list_10m.append( sample_cascade_influence_10m[root_tweet] / complete_cascade_influence_10m[root_tweet]) if complete_cascade_influence_1h[root_tweet] > 0: influence_list_1h.append( sample_cascade_influence_1h[root_tweet] / complete_cascade_influence_1h[root_tweet]) plot_ccdf(influence_list_10m, ax=axes[1], color=red, ls='-', label='10m') plot_ccdf(influence_list_1h, ax=axes[1], color=blue, ls='-', label='1h') plot_ccdf(influence_list, ax=axes[1], color='k', ls='-', label='14d') print('influence_list median', np.median(influence_list)) print('influence_list_1h median', np.median(influence_list_1h)) print('influence_list_10m median', np.median(influence_list_10m)) print('influence_list 0.25', percentileofscore(influence_list, 0.25)) print('influence_list 0.25', percentileofscore(influence_list_1h, 0.25)) print('influence_list 0.25', percentileofscore(influence_list_10m, 0.25)) print('influence_list 0.75', percentileofscore(influence_list, 0.75)) print('influence_list 0.75', percentileofscore(influence_list_1h, 0.75)) print('influence_list 0.75', percentileofscore(influence_list_10m, 0.75)) axes[1].set_xscale('linear') axes[1].set_yscale('linear') axes[1].set_xlabel('relative potential reach', fontsize=16) # axes[1].set_ylabel('$P(X \geq x)$', fontsize=16) axes[1].legend(frameon=False, fontsize=16, ncol=1, fancybox=False, shadow=True, loc='upper right') axes[1].tick_params(axis='both', which='major', labelsize=16) axes[1].set_title('(b)', fontsize=18, pad=-3 * 72, y=1.0001) hide_spines(axes) timer.stop() plt.tight_layout(rect=[0, 0.05, 1, 1]) plt.savefig('../images/cascades_measures.pdf', bbox_inches='tight') if not platform.system() == 'Linux': plt.show()
for x in range(0, max(degree) + 1): dist.append(degree.count(x) / float(g.vcount())) #log plt.xscale('log') plt.yscale('log') plt.xlabel('K') plt.ylabel('Pk') plt.plot(dist, linestyle=(0, (1, 3))) results = pl.Fit(degree) print("Alpha is {}".format(results.power_law.alpha)) print("Xmin is {}".format(results.power_law.xmin)) pl.plot_ccdf(degree, color='r') plt.show() print("The network diameter is {}".format(g.diameter())) print('top betweenness') #centrality top10 between = g.betweenness() between.sort() print(between[-10:]) print(g.vs.find(_degree=2)["id"]) degree.sort() maxd = degree[-10:]
weight = float(route[2][:-1]) G.add_edge(airport1, airport2, weight=weight) N = len(G) L = len(G.edges()) degrees = nx.degree(G).values() kmax = max(degrees) kmin = min(degrees) kavg = 1.0 * sum(degrees) / len(degrees) print "Number of nodes:", N print "Number of links:", L print "Max degree:", kmax print "Min degree:", kmin print "Average degree:", kavg powerlaw.plot_ccdf(degrees, marker="o", color="b", linestyle="none") plt.ylabel(r"Cummulative $P(k)$", fontsize=16) plt.xlabel(r"$k$", fontsize=16) plt.savefig("Degree distribution" + "_Airline" + ".png") # plt.show() """random walk""" def rndWalk(G, T): node_list = G.nodes() """initialization""" walker_path = [] walker_path.append(random.choice(node_list)) """let's walk!""" for t in range(1, T + 1):
def plot_stats(): # wikipedia graph structural statistics print 'before load' network = load_graph("output/wikipedianetwork.xml.gz") print 'after load' out_hist = vertex_hist(network, "out") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(out_hist[1][:-1], out_hist[0], marker='o') plt.xlabel('Out-degree') plt.ylabel('Frequency') plt.gca().set_ylim([1, 10**6]) #plt.title('Out-degree Distribution') plt.tight_layout() plt.savefig('output/wikipedia-out-deg-dist.pdf') plt.clf() in_hist = vertex_hist(network, "in") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(in_hist[1][:-1], in_hist[0], marker='o') plt.xlabel('In-degree') plt.ylabel('Frequency') plt.gca().set_ylim([1, 10**6]) #plt.title('In-degree Distribution') plt.tight_layout() plt.savefig('output/wikipedia-in-deg-dist.pdf') plt.clf() total_hist = vertex_hist(network, "total") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(total_hist[1][:-1], total_hist[0], marker='o') plt.xlabel('Degree') plt.ylabel('Frequency') plt.gca().set_ylim([1, 10**6]) #plt.title('Degree Distribution') plt.tight_layout() plt.savefig('output/wikipedia-deg-dist.pdf') plt.clf() clust = network.vertex_properties["local_clust"] #clust = local_clustering(network, undirected=False) #hist, bin_edges = np.histogram(clust.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Local Clustering Coefficient C') #plt.ylabel('P(x<=C)') #plt.title('Clustering Coefficient Distribution') #plt.savefig('output/wikipedia-clust-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(clust.get_array(), ax) #ax.set_title('Clustering Coefficient Distribution') ax.set_xlabel('Local Clustering Coefficient $C') ax.set_ylabel('P(x<=C)') ax.set_ylim([0, 1]) fig.tight_layout() fig.savefig('output/wikipedia-clust-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(clust.get_array(), ax) #ax.set_title('Clustering Coefficient Distribution') ax.set_xlabel('Local Clustering Coefficient C') ax.set_ylabel('P(x>=C)') ax.set_ylim([10**-4, 10**-0.5]) fig.tight_layout() fig.savefig('output/wikipedia-clust-ccdf.pdf') plt.clf() prank = network.vertex_properties["page_rank"] #hist, bin_edges = np.histogram(prank.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Page rank Pr') #plt.ylabel('P(x<=Pr)') #plt.title('Page rank Distribution') #plt.savefig('output/wikipedia-prank-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(prank.get_array(), ax) #ax.set_title('Page Rank Distribution') ax.set_xlabel('Page rank Pr') ax.set_ylabel('P(x<=Pr)') ax.set_ylim([0, 1]) fig.tight_layout() fig.savefig('output/wikipedia-prank-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(prank.get_array(), ax) #ax.set_title('Page Rank Distribution') ax.set_xlabel('Page rank Pr') ax.set_ylabel('P(x>=Pr)') fig.tight_layout() fig.savefig('output/wikipedia-prank-ccdf.pdf') plt.clf() kcore = network.vertex_properties["kcore"] #hist, bin_edges = np.histogram(kcore.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Kcore kC') #plt.ylabel('P(x<=kC)') #plt.title('K-Core Distribution') #plt.savefig('output/wikipedia-kcore-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(kcore.get_array(), ax) #ax.set_title('K-Core Distribution') ax.set_xlabel('k-Core kC') ax.set_ylabel('P(x<=kC)') ax.set_ylim([0, 1]) fig.tight_layout() fig.savefig('output/wikipedia-kcore-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(kcore.get_array(), ax) #ax.set_title('K-Core Distribution') ax.set_xlabel('k-Core kC') ax.set_ylabel('P(x>=kC)') fig.tight_layout() fig.savefig('output/wikipedia-kcore-ccdf.pdf') plt.clf() eigenvector_centr = network.vertex_properties["eigenvector_centr"] #hist, bin_edges = np.histogram(eigenvector_centr.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Eigenvector Centrality E') #plt.ylabel('P(x<=E)') #plt.title('Eigenvector Centrality Distribution') #plt.savefig('output/wikipedia-eigenvcentr-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(eigenvector_centr.get_array(), ax) #ax.set_title('Eigenvector Centrality E') ax.set_xlabel('Eigenvector Centrality E') ax.set_ylabel('P(x<=E)') ax.set_ylim([0, 1]) fig.tight_layout() fig.savefig('output/wikipedia-eigenvcentr-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(eigenvector_centr.get_array(), ax) #ax.set_title('Eigenvector Centrality E') ax.set_xlabel('Eigenvector Centrality E') ax.set_ylabel('P(x>=E)') fig.tight_layout() fig.savefig('output/wikipedia-eigenvcentr-ccdf.pdf') plt.clf() colors= {'local_clust':'r','eigenvector_centr':'b', 'page_rank': 'g', 'kcore':'m', 'hub': 'c', 'authority':'k'} labels = {'local_clust': 'clust.', 'eigenvector_centr':'eigen. centr.','page_rank': 'page rank', 'kcore': 'kcore', 'hub':'hub', 'authority':'authority'} fig = plt.figure() ax = fig.add_subplot(111) for f in ['local_clust','page_rank', 'hub', 'authority', 'kcore']: feature = network.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f],color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X>=f)$') ax.set_ylim([0, 1]) plt.legend(fancybox=True, loc=3, ncol=2, prop={'size':4}) plt.tight_layout() plt.savefig('output/wikipedia-features-cdf.pdf') plt.clf() colors= {'local_clust':'r','eigenvector_centr':'b', 'page_rank': 'g', 'kcore':'m', 'hub': 'c', 'authority':'k'} labels = {'local_clust': 'clust.', 'eigenvector_centr':'eigen. centr.','page_rank': 'page rank', 'kcore': 'kcore', 'hub':'hub', 'authority':'authority'} fig = plt.figure() ax = fig.add_subplot(111) for f in ['local_clust','eigenvector_centr','page_rank', 'hub', 'authority', 'kcore']: feature = network.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f],color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X<=f)$') plt.legend(fancybox=True, loc=3, ncol=2, prop={'size':4}) plt.tight_layout() plt.savefig('output/wikipedia-features-ccdf.pdf') plt.clf() # wikipedia transitions graph structural statistics print 'before load' network_transitions = load_graph("output/transitionsnetwork.xml.gz") print 'after load' out_hist = vertex_hist(network_transitions, "out") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(out_hist[1][:-1], out_hist[0], marker='o') plt.xlabel('Out-degree') plt.ylabel('Frequency') plt.gca().set_ylim([1, 10**6]) #plt.title('Out-degree Distribution') plt.savefig('output/wikipedia-transitions-out-deg-dist.pdf') plt.clf() in_hist = vertex_hist(network_transitions, "in") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(in_hist[1][:-1], in_hist[0], marker='o') plt.xlabel('In-degree') plt.ylabel('Frequency') #plt.title('In-degree Distribution') plt.gca().set_ylim([1, 10**6]) plt.savefig('output/wikipedia-transitions-in-deg-dist.pdf') plt.clf() total_hist = vertex_hist(network_transitions, "total") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(total_hist[1][:-1], total_hist[0], marker='o') plt.xlabel('Degree') plt.ylabel('Frequency') #plt.title('Degree Distribution') plt.gca().set_ylim([1, 10**6]) plt.savefig('output/wikipedia-transitions-deg-dist.pdf') plt.clf() #clust = local_clustering(network_transitions, undirected=False) clust = network_transitions.vertex_properties["local_clust"] #hist, bin_edges = np.histogram(clust.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Local Clustering Coefficient C') #plt.ylabel('P(x<=C)') #plt.title('Clustering Coefficient Distribution') #plt.savefig('output/wikipedia-transitions-clust-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(clust.get_array(), ax) #ax.set_title('Clustering Coefficient Distribution') ax.set_xlabel('Local Clustering Coefficient C') ax.set_ylabel('P(x<=C)') fig.savefig('output/wikipedia-transitions-clust-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(clust.get_array(), ax) ax.set_title('Clustering Coefficient Distribution') ax.set_xlabel('Local Clustering Coefficient C') ax.set_ylabel('P(x>=C)') fig.savefig('output/wikipedia-transitions-clust-ccdf.pdf') plt.clf() prank = network_transitions.vertex_properties["page_rank"] #hist, bin_edges = np.histogram(prank.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Page rank Pr') #plt.ylabel('P(x<=Pr)') #plt.title('Page rank Distribution') #plt.savefig('output/wikipedia-transitions-prank-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(prank.get_array(), ax) #ax.set_title('Page Rank Distribution') ax.set_xlabel('Page rank Pr') ax.set_ylabel('P(x<=Pr)') fig.savefig('output/wikipedia-transitions-prank-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(prank.get_array(), ax) #ax.set_title('Page Rank Distribution') ax.set_xlabel('Page rank Pr') ax.set_ylabel('P(x>=Pr)') fig.savefig('output/wikipedia-transitions-prank-ccdf.pdf') plt.clf() kcore = network_transitions.vertex_properties["kcore"] #hist, bin_edges = np.histogram(kcore.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Kcore kC') #plt.ylabel('P(x<=kC)') #plt.title('K-Core Distribution') #plt.savefig('output/wikipedia-transitions-kcore-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(kcore.get_array(), ax) #ax.set_title('K-Core Distribution') ax.set_xlabel('k-Core kC') ax.set_ylabel('P(x<=kC)') fig.savefig('output/wikipedia-transitions-kcore-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(kcore.get_array(), ax) #ax.set_title('K-Core Distribution') ax.set_xlabel('k-Core kC') ax.set_ylabel('P(x>=kC)') fig.savefig('output/wikipedia-transitions-kcore-ccdf.pdf') plt.clf() eigenvector_centr = network_transitions.vertex_properties["eigenvector_centr"] #hist, bin_edges = np.histogram(eigenvector_centr.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Eingenvector centrality E') #plt.ylabel('P(x<=E)') #plt.title('Eigenvector Centrality Distribution') #plt.savefig('output/wikipedia-transitions-eigenvcentr-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(eigenvector_centr.get_array(), ax) #ax.set_title('Eigenvector Centrality Distribution') ax.set_xlabel('Eingenvector centrality E') ax.set_ylabel('P(x<=E)') fig.savefig('output/wikipedia-transitions-eigenvcentr-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(eigenvector_centr.get_array(), ax) #ax.set_title('Eigenvector Centrality Distribution') ax.set_xlabel('Eingenvector centrality E') ax.set_ylabel('P(x>=E)') fig.savefig('output/wikipedia-transitions-eigenvcentr-ccdf.pdf') plt.clf() print 'before hits' #ee, authority, hub = hits(network_transitions) #network_transitions.vertex_properties["authority"] = authority #network_transitions.vertex_properties["hub"] = hub #network_transitions.save("output/transitionsnetwork.xml.gz") print 'after hits' colors= {'local_clust':'r','eigenvector_centr':'b', 'page_rank': 'g', 'kcore':'m', 'hub': 'c', 'authority':'k'} labels = {'local_clust': 'clust.', 'eigenvector_centr':'eigen. centr.','page_rank': 'page rank', 'kcore': 'kcore', 'hub':'hub', 'authority':'authority'} fig = plt.figure() ax = fig.add_subplot(111) for f in ['local_clust','page_rank', 'hub', 'authority', 'kcore']: feature = network_transitions.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f],color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X>=f)$') ax.set_ylim([0, 1]) plt.legend(fancybox=True, loc=3, ncol=2, prop={'size':4}) plt.tight_layout() plt.savefig('output/wikipedia-transitions-features-cdf.pdf') plt.clf() colors= {'local_clust':'r','eigenvector_centr':'b', 'page_rank': 'g', 'kcore':'m', 'hub': 'c', 'authority':'k'} labels = {'local_clust': 'clust.', 'eigenvector_centr':'eigen. centr.','page_rank': 'page rank', 'kcore': 'kcore', 'hub':'hub', 'authority':'authority'} fig = plt.figure() ax = fig.add_subplot(111) for f in ['local_clust','page_rank', 'hub', 'authority', 'kcore']: feature = network_transitions.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f],color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X<=f)$') plt.legend(fancybox=True, loc=3, ncol=2, prop={'size':4}) plt.tight_layout() plt.savefig('output/wikipedia-transitions-features-ccdf.pdf') plt.clf()
def plot_stats(): # wikipedia graph structural statistics print 'before load' network = load_graph("output/wikipedianetwork.xml.gz") print 'after load' out_hist = vertex_hist(network, "out") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(out_hist[1][:-1], out_hist[0], marker='o') plt.xlabel('Out-degree') plt.ylabel('Frequency') plt.gca().set_ylim([1, 10**6]) #plt.title('Out-degree Distribution') plt.tight_layout() plt.savefig('output/wikipedia-out-deg-dist.pdf') plt.clf() in_hist = vertex_hist(network, "in") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(in_hist[1][:-1], in_hist[0], marker='o') plt.xlabel('In-degree') plt.ylabel('Frequency') plt.gca().set_ylim([1, 10**6]) #plt.title('In-degree Distribution') plt.tight_layout() plt.savefig('output/wikipedia-in-deg-dist.pdf') plt.clf() total_hist = vertex_hist(network, "total") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(total_hist[1][:-1], total_hist[0], marker='o') plt.xlabel('Degree') plt.ylabel('Frequency') plt.gca().set_ylim([1, 10**6]) #plt.title('Degree Distribution') plt.tight_layout() plt.savefig('output/wikipedia-deg-dist.pdf') plt.clf() clust = network.vertex_properties["local_clust"] #clust = local_clustering(network, undirected=False) #hist, bin_edges = np.histogram(clust.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Local Clustering Coefficient C') #plt.ylabel('P(x<=C)') #plt.title('Clustering Coefficient Distribution') #plt.savefig('output/wikipedia-clust-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(clust.get_array(), ax) #ax.set_title('Clustering Coefficient Distribution') ax.set_xlabel('Local Clustering Coefficient $C') ax.set_ylabel('P(x<=C)') ax.set_ylim([0, 1]) fig.tight_layout() fig.savefig('output/wikipedia-clust-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(clust.get_array(), ax) #ax.set_title('Clustering Coefficient Distribution') ax.set_xlabel('Local Clustering Coefficient C') ax.set_ylabel('P(x>=C)') ax.set_ylim([10**-4, 10**-0.5]) fig.tight_layout() fig.savefig('output/wikipedia-clust-ccdf.pdf') plt.clf() prank = network.vertex_properties["page_rank"] #hist, bin_edges = np.histogram(prank.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Page rank Pr') #plt.ylabel('P(x<=Pr)') #plt.title('Page rank Distribution') #plt.savefig('output/wikipedia-prank-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(prank.get_array(), ax) #ax.set_title('Page Rank Distribution') ax.set_xlabel('Page rank Pr') ax.set_ylabel('P(x<=Pr)') ax.set_ylim([0, 1]) fig.tight_layout() fig.savefig('output/wikipedia-prank-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(prank.get_array(), ax) #ax.set_title('Page Rank Distribution') ax.set_xlabel('Page rank Pr') ax.set_ylabel('P(x>=Pr)') fig.tight_layout() fig.savefig('output/wikipedia-prank-ccdf.pdf') plt.clf() kcore = network.vertex_properties["kcore"] #hist, bin_edges = np.histogram(kcore.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Kcore kC') #plt.ylabel('P(x<=kC)') #plt.title('K-Core Distribution') #plt.savefig('output/wikipedia-kcore-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(kcore.get_array(), ax) #ax.set_title('K-Core Distribution') ax.set_xlabel('k-Core kC') ax.set_ylabel('P(x<=kC)') ax.set_ylim([0, 1]) fig.tight_layout() fig.savefig('output/wikipedia-kcore-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(kcore.get_array(), ax) #ax.set_title('K-Core Distribution') ax.set_xlabel('k-Core kC') ax.set_ylabel('P(x>=kC)') fig.tight_layout() fig.savefig('output/wikipedia-kcore-ccdf.pdf') plt.clf() eigenvector_centr = network.vertex_properties["eigenvector_centr"] #hist, bin_edges = np.histogram(eigenvector_centr.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Eigenvector Centrality E') #plt.ylabel('P(x<=E)') #plt.title('Eigenvector Centrality Distribution') #plt.savefig('output/wikipedia-eigenvcentr-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(eigenvector_centr.get_array(), ax) #ax.set_title('Eigenvector Centrality E') ax.set_xlabel('Eigenvector Centrality E') ax.set_ylabel('P(x<=E)') ax.set_ylim([0, 1]) fig.tight_layout() fig.savefig('output/wikipedia-eigenvcentr-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(eigenvector_centr.get_array(), ax) #ax.set_title('Eigenvector Centrality E') ax.set_xlabel('Eigenvector Centrality E') ax.set_ylabel('P(x>=E)') fig.tight_layout() fig.savefig('output/wikipedia-eigenvcentr-ccdf.pdf') plt.clf() colors = { 'local_clust': 'r', 'eigenvector_centr': 'b', 'page_rank': 'g', 'kcore': 'm', 'hub': 'c', 'authority': 'k' } labels = { 'local_clust': 'clust.', 'eigenvector_centr': 'eigen. centr.', 'page_rank': 'page rank', 'kcore': 'kcore', 'hub': 'hub', 'authority': 'authority' } fig = plt.figure() ax = fig.add_subplot(111) for f in ['local_clust', 'page_rank', 'hub', 'authority', 'kcore']: feature = network.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f], color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X>=f)$') ax.set_ylim([0, 1]) plt.legend(fancybox=True, loc=3, ncol=2, prop={'size': 4}) plt.tight_layout() plt.savefig('output/wikipedia-features-cdf.pdf') plt.clf() colors = { 'local_clust': 'r', 'eigenvector_centr': 'b', 'page_rank': 'g', 'kcore': 'm', 'hub': 'c', 'authority': 'k' } labels = { 'local_clust': 'clust.', 'eigenvector_centr': 'eigen. centr.', 'page_rank': 'page rank', 'kcore': 'kcore', 'hub': 'hub', 'authority': 'authority' } fig = plt.figure() ax = fig.add_subplot(111) for f in [ 'local_clust', 'eigenvector_centr', 'page_rank', 'hub', 'authority', 'kcore' ]: feature = network.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f], color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X<=f)$') plt.legend(fancybox=True, loc=3, ncol=2, prop={'size': 4}) plt.tight_layout() plt.savefig('output/wikipedia-features-ccdf.pdf') plt.clf() # wikipedia transitions graph structural statistics print 'before load' network_transitions = load_graph("output/transitionsnetwork.xml.gz") print 'after load' out_hist = vertex_hist(network_transitions, "out") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(out_hist[1][:-1], out_hist[0], marker='o') plt.xlabel('Out-degree') plt.ylabel('Frequency') plt.gca().set_ylim([1, 10**6]) #plt.title('Out-degree Distribution') plt.savefig('output/wikipedia-transitions-out-deg-dist.pdf') plt.clf() in_hist = vertex_hist(network_transitions, "in") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(in_hist[1][:-1], in_hist[0], marker='o') plt.xlabel('In-degree') plt.ylabel('Frequency') #plt.title('In-degree Distribution') plt.gca().set_ylim([1, 10**6]) plt.savefig('output/wikipedia-transitions-in-deg-dist.pdf') plt.clf() total_hist = vertex_hist(network_transitions, "total") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(total_hist[1][:-1], total_hist[0], marker='o') plt.xlabel('Degree') plt.ylabel('Frequency') #plt.title('Degree Distribution') plt.gca().set_ylim([1, 10**6]) plt.savefig('output/wikipedia-transitions-deg-dist.pdf') plt.clf() #clust = local_clustering(network_transitions, undirected=False) clust = network_transitions.vertex_properties["local_clust"] #hist, bin_edges = np.histogram(clust.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Local Clustering Coefficient C') #plt.ylabel('P(x<=C)') #plt.title('Clustering Coefficient Distribution') #plt.savefig('output/wikipedia-transitions-clust-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(clust.get_array(), ax) #ax.set_title('Clustering Coefficient Distribution') ax.set_xlabel('Local Clustering Coefficient C') ax.set_ylabel('P(x<=C)') fig.savefig('output/wikipedia-transitions-clust-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(clust.get_array(), ax) ax.set_title('Clustering Coefficient Distribution') ax.set_xlabel('Local Clustering Coefficient C') ax.set_ylabel('P(x>=C)') fig.savefig('output/wikipedia-transitions-clust-ccdf.pdf') plt.clf() prank = network_transitions.vertex_properties["page_rank"] #hist, bin_edges = np.histogram(prank.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Page rank Pr') #plt.ylabel('P(x<=Pr)') #plt.title('Page rank Distribution') #plt.savefig('output/wikipedia-transitions-prank-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(prank.get_array(), ax) #ax.set_title('Page Rank Distribution') ax.set_xlabel('Page rank Pr') ax.set_ylabel('P(x<=Pr)') fig.savefig('output/wikipedia-transitions-prank-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(prank.get_array(), ax) #ax.set_title('Page Rank Distribution') ax.set_xlabel('Page rank Pr') ax.set_ylabel('P(x>=Pr)') fig.savefig('output/wikipedia-transitions-prank-ccdf.pdf') plt.clf() kcore = network_transitions.vertex_properties["kcore"] #hist, bin_edges = np.histogram(kcore.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Kcore kC') #plt.ylabel('P(x<=kC)') #plt.title('K-Core Distribution') #plt.savefig('output/wikipedia-transitions-kcore-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(kcore.get_array(), ax) #ax.set_title('K-Core Distribution') ax.set_xlabel('k-Core kC') ax.set_ylabel('P(x<=kC)') fig.savefig('output/wikipedia-transitions-kcore-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(kcore.get_array(), ax) #ax.set_title('K-Core Distribution') ax.set_xlabel('k-Core kC') ax.set_ylabel('P(x>=kC)') fig.savefig('output/wikipedia-transitions-kcore-ccdf.pdf') plt.clf() eigenvector_centr = network_transitions.vertex_properties[ "eigenvector_centr"] #hist, bin_edges = np.histogram(eigenvector_centr.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Eingenvector centrality E') #plt.ylabel('P(x<=E)') #plt.title('Eigenvector Centrality Distribution') #plt.savefig('output/wikipedia-transitions-eigenvcentr-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(eigenvector_centr.get_array(), ax) #ax.set_title('Eigenvector Centrality Distribution') ax.set_xlabel('Eingenvector centrality E') ax.set_ylabel('P(x<=E)') fig.savefig('output/wikipedia-transitions-eigenvcentr-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(eigenvector_centr.get_array(), ax) #ax.set_title('Eigenvector Centrality Distribution') ax.set_xlabel('Eingenvector centrality E') ax.set_ylabel('P(x>=E)') fig.savefig('output/wikipedia-transitions-eigenvcentr-ccdf.pdf') plt.clf() print 'before hits' #ee, authority, hub = hits(network_transitions) #network_transitions.vertex_properties["authority"] = authority #network_transitions.vertex_properties["hub"] = hub #network_transitions.save("output/transitionsnetwork.xml.gz") print 'after hits' colors = { 'local_clust': 'r', 'eigenvector_centr': 'b', 'page_rank': 'g', 'kcore': 'm', 'hub': 'c', 'authority': 'k' } labels = { 'local_clust': 'clust.', 'eigenvector_centr': 'eigen. centr.', 'page_rank': 'page rank', 'kcore': 'kcore', 'hub': 'hub', 'authority': 'authority' } fig = plt.figure() ax = fig.add_subplot(111) for f in ['local_clust', 'page_rank', 'hub', 'authority', 'kcore']: feature = network_transitions.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f], color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X>=f)$') ax.set_ylim([0, 1]) plt.legend(fancybox=True, loc=3, ncol=2, prop={'size': 4}) plt.tight_layout() plt.savefig('output/wikipedia-transitions-features-cdf.pdf') plt.clf() colors = { 'local_clust': 'r', 'eigenvector_centr': 'b', 'page_rank': 'g', 'kcore': 'm', 'hub': 'c', 'authority': 'k' } labels = { 'local_clust': 'clust.', 'eigenvector_centr': 'eigen. centr.', 'page_rank': 'page rank', 'kcore': 'kcore', 'hub': 'hub', 'authority': 'authority' } fig = plt.figure() ax = fig.add_subplot(111) for f in ['local_clust', 'page_rank', 'hub', 'authority', 'kcore']: feature = network_transitions.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f], color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X<=f)$') plt.legend(fancybox=True, loc=3, ncol=2, prop={'size': 4}) plt.tight_layout() plt.savefig('output/wikipedia-transitions-features-ccdf.pdf') plt.clf()
# Plot adjacency matrix indexed by locations ind1, ind2 = np.nonzero(np.triu(Z1new, 1)) # returns indices of non zero elements fig, ax = plt.subplots() ax.plot(x1[ind1], x1[ind2], 'b.', x1[ind2], x1[ind1], 'b.') ax.set(xlabel='x_i', ylabel='x_j', title='Adjacency matrix') # Plot degree distribution. deg = deg[ind] a = np.sum(deg <= 100) fit = pl.Fit(np.sort(deg)[0:a], discrete=True) # fit power law to low degrees? very empirical figCCDF = pl.plot_ccdf(deg, label='alpha=10') figCCDF.set(xlabel='degree', ylabel='distribution', title='Double power law degree distribution') # TO DO: add lines of power law. This does not work. y = np.linspace(1, 100, 100) plt.plot(y, y**(-sigma)) y = np.linspace(100, 1000, 1000) plt.plot(y, y**(-tau)) #fit.plot_ccdf(color='r', linewidth=2, ax=figCCDF) #fit.power_law.plot_ccdf(color='r', linestyle='--', ax=figCCDF) # second way: with Poisson # accept = (np.random.poisson(XYw / (1 + XY ** beta)) > 0)
graph = nx.configuration_model(sequence) loops = graph.selfloop_edges() graph = nx.Graph(graph) graph.remove_edges_from(loops) Gcc=sorted(nx.connected_component_subgraphs(graph), key = len, reverse=True) G = Gcc[0] degrees = nx.degree(G) #powerlaw.plot_ccdf(degrees.values(),color='b',marker ='o',linestyle = "none") #plt.show() N_new.append(len(G)) print len(G) data = G.degree().values() powerlaw.plot_ccdf(data,color = color.pop(),marker = 'o',label = str(n)) fit = powerlaw.Fit(G.degree().values()) print fit.power_law.alpha, fit.power_law.sigma plt.show() node_list = G.nodes() WalkerNum = 300 T = [] walker = 0 #for walker in range(WalkerNum): while len(T)<=50000: #if walker<=1000: walker += 1 print walker # if walker%100==0: # print walker source,target = random.sample(node_list,2)
#np_edges = T.get_n_edge_lists(500) for meas in range(N_meas): edges = get_fast_edge_list(N, covariance, t) ks = get_degrees_from_edge_list(N, edges).tolist() k1.extend(ks) k1 = np.array(k1, dtype=int) k1pos = k1[k1 >= 1] import powerlaw results = powerlaw.Fit(k1pos, discrete=True, xmin=1) fig = pl.figure() powerlaw.plot_ccdf(k1pos) #powerlaw.plot_pdf(k1) #pl.hist(k1,bins=np.arange(1,max(k1)+1),histtype='step',density=True) x = np.arange(1, max(k1pos)) results.lognormal.plot_ccdf(ax=pl.gca()) #results.lognormal.plot_pdf(ax=pl.gca()) pl.xscale('log') pl.yscale('log') fig = pl.figure() pl.hist( k1, bins=np.arange(max(k1) + 1), histtype='step', density=True,
histplot(indian_6_fork, binsize, 'Users', 'Fork count', 'green', 'User Jan-Jun 2019 Forks Received by Count, LogLog Scale Plot') plt.savefig('logscale_jan-jun_forks_received.png') plt.close() powerlaw.plot_pdf(russian_followers_all, color='black') powerlaw.plot_pdf(chinese_followers_all, color='red') powerlaw.plot_pdf(american_followers_all, color='blue') powerlaw.plot_pdf(indian_followers_all, color='green') plt.ylabel('Users') plt.xlabel('Follow Count') plt.title('All Follows Received by Count, PDF') plt.savefig('pdf_all_follows_received.png') plt.close() powerlaw.plot_ccdf(russian_followers_all, color='black') powerlaw.plot_ccdf(chinese_followers_all, color='red') powerlaw.plot_ccdf(american_followers_all, color='blue') powerlaw.plot_ccdf(indian_followers_all, color='green') plt.ylabel('Users') plt.xlabel('Follow Count') plt.title('All Follows Received by Count, CCDF') plt.savefig('ccdf_all_follows_received.png') plt.close() powerlaw.plot_pdf(russian_watchers_all, color='black') powerlaw.plot_pdf(chinese_watchers_all, color='red') powerlaw.plot_pdf(american_watchers_all, color='blue') powerlaw.plot_pdf(indian_watchers_all, color='green') plt.ylabel('Users') plt.xlabel('Star Count')
def main(): # == == == == == == Part 1: Set up environment == == == == == == # timer = Timer() timer.start() data_prefix = '../data/' target_day_indices = [0, 15, 30, 45] color_cycle_4 = ColorPalette.CC4 date_labels = [ 'Sep 01, 2018', 'Sep 16, 2018', 'Oct 01, 2018', 'Oct 16, 2018' ] # == == == == == == Part 2: Load video views == == == == == == # data_loader = DataLoader() data_loader.load_video_views() embed_view_dict = data_loader.embed_view_dict embed_avg_view_dict = data_loader.embed_avg_view_dict num_videos = data_loader.num_videos target_day_view_list = [[], [], [], []] for embed in range(num_videos): for target_idx, target_day in enumerate(target_day_indices): target_day_view_list[target_idx].append( embed_view_dict[embed][target_day]) # == == == == == == Part 3: Load dynamic network snapshot == == == == == == # embed_indegree_dict = { embed: np.zeros((T, )) for embed in np.arange(num_videos) } # daily indegree for each embed zero_indegree_list = [] # percentage of zero indegree for each day num_edges_list = [] # number of total edges for each day for t in range(T): filename = 'network_{0}.p'.format( (datetime(2018, 9, 1) + timedelta(days=t)).strftime('%Y-%m-%d')) indegree_list = [] with open(os.path.join(data_prefix, 'network_pickle', filename), 'rb') as fin: network_dict = pickle.load(fin) # embed_tar: [(embed_src, pos_src, view_src), ...] for tar_embed in range(num_videos): indegree_value = len( [1 for x in network_dict[tar_embed] if x[1] < NUM_REL]) embed_indegree_dict[tar_embed][t] = indegree_value indegree_list.append(indegree_value) indegree_counter = Counter(indegree_list) zero_indegree_list.append(indegree_counter[0] / num_videos) num_edges_list.append(sum(indegree_list)) print('>>> Finish loading day {0}...'.format(t + 1)) print('>>> Network structure has been loaded!') print('\n>>> Average number of edges: {0:.0f}, max: {1:.0f}, min: {2:.0f}'. format( sum(num_edges_list) / len(num_edges_list), max(num_edges_list), min(num_edges_list))) fig, axes = plt.subplots(1, 3, figsize=(12, 4.5)) ax1, ax2, ax3 = axes.ravel() # == == == == == == Part 4: Plot ax1 indegree CCDF == == == == == == # embed_avg_indegree_dict = defaultdict(float) for t in range(T): for embed in range(num_videos): embed_avg_indegree_dict[embed] += embed_indegree_dict[embed][t] / T indegree_ranked_embed_list = [ x[0] for x in sorted(embed_avg_indegree_dict.items(), key=lambda kv: kv[1], reverse=True) ] top_20_indegree_embeds = indegree_ranked_embed_list[:20] popular_ranked_embed_list = [ x[0] for x in sorted( embed_avg_view_dict.items(), key=lambda kv: kv[1], reverse=True) ] top_20_popular_embeds = popular_ranked_embed_list[:20] for target_idx, target_day in enumerate(target_day_indices): indegree_list = [] for embed in range(num_videos): indegree_list.append(embed_indegree_dict[embed][target_day]) print( 'video with 10 indegree has more in-links than {0:.2f}% videos on date {1}' .format(percentileofscore(indegree_list, 10), date_labels[target_idx])) print( 'video with 20 indegree has more in-links than {0:.2f}% videos on date {1}' .format(percentileofscore(indegree_list, 20), date_labels[target_idx])) plot_ccdf(indegree_list, ax=ax1, color=color_cycle_4[target_idx], label=date_labels[target_idx]) # compute the powerlaw fit powerlaw_fit = Fit(list(embed_avg_indegree_dict.values())) infer_alpha = powerlaw_fit.power_law.alpha p = powerlaw_fit.power_law.ccdf() ins_x_axis = powerlaw_fit.power_law.__dict__['parent_Fit'].__dict__[ 'data'][:int(0.9 * len(p))] ins_y_axis = 0.1 * p[:int(0.9 * len(p))] ax1.plot(ins_x_axis, ins_y_axis, 'k:') ax1.text(0.4, 0.6, r'$x^{{{0:.2f}}}$'.format(-infer_alpha + 1), size=12, ha='right', va='bottom', transform=ax1.transAxes) ax1.set_xscale('log') ax1.set_yscale('log') ax1.set_xlabel('indegree', fontsize=11) ax1.set_ylabel('$P(X) \geq x$', fontsize=11) ax1.tick_params(axis='both', which='major', labelsize=10) ax1.set_title('(a) indegree distribution', fontsize=12) ax1.legend(frameon=False, fontsize=11, ncol=1, fancybox=False, shadow=True) mean_zero_indegree = sum(zero_indegree_list) / len(zero_indegree_list) ax1.axhline(y=1 - mean_zero_indegree, color='k', linestyle='--', zorder=30) ax1.text(0.96, 0.9, '{0:.0f}% with 0 indegree'.format(mean_zero_indegree * 100), size=11, transform=ax1.transAxes, ha='right', va='top') # == == == == == == Part 5: Plot ax2 views distribution == == == == == == # for target_idx, views_list in enumerate(target_day_view_list): x_values = range(100) y_values = [np.percentile(views_list, x) for x in x_values] ax2.plot(x_values, y_values, color=color_cycle_4[target_idx], label=date_labels[target_idx]) ax2.set_yscale('log') ax2.set_xlabel('views percentile', fontsize=11) ax2.set_ylabel('num of views', fontsize=11) ax2.tick_params(axis='both', which='major', labelsize=10) ax2.set_title('(b) daily views vs. its percentile', fontsize=12) avg_views_list = sorted(list(embed_avg_view_dict.values()), reverse=True) gini_coef = gini(avg_views_list) print('top 1% videos occupy {0:.2f}% views'.format( sum(avg_views_list[:int(0.01 * num_videos)]) / sum(avg_views_list) * 100)) print('top 10% videos occupy {0:.2f}% views'.format( sum(avg_views_list[:int(0.1 * num_videos)]) / sum(avg_views_list) * 100)) print('Gini coef: {0:.3f}'.format(gini_coef)) spearman_degree = [ embed_avg_indegree_dict[embed] for embed in range(num_videos) ] spearman_views = [ embed_avg_view_dict[embed] for embed in range(num_videos) ] print( 'Spearman correlation between views and indegree: {0:.4f}, pvalue: {1:.2f}' .format(*spearmanr(spearman_views, spearman_degree))) median_views = np.median(avg_views_list) top_views_90th = np.percentile(avg_views_list, 90) top_views_99th = np.percentile(avg_views_list, 99) ax2_xmin = ax2.get_xlim()[0] ax2_ymin = ax2.get_ylim()[0] ax2.plot((50, 50), (ax2_ymin, median_views), color='k', linestyle='--', zorder=30) ax2.plot((ax2_xmin, 50), (median_views, median_views), color='k', linestyle='--', zorder=30) ax2.text(0.49, 0.45, 'median views {0:,.0f}'.format(median_views), size=11, transform=ax2.transAxes, ha='right', va='bottom') ax2.plot((90, 90), (ax2_ymin, top_views_90th), color='k', linestyle='--', zorder=30) ax2.plot((ax2_xmin, 90), (top_views_90th, top_views_90th), color='k', linestyle='--', zorder=30) ax2.text(0.88, 0.75, '90th views {0:,.0f}'.format(top_views_90th), size=11, transform=ax2.transAxes, ha='right', va='bottom') ax2.plot((99, 99), (ax2_ymin, top_views_99th), color='k', linestyle='--', zorder=30) ax2.plot((ax2_xmin, 99), (top_views_99th, top_views_99th), color='k', linestyle='--', zorder=30) ax2.text(0.91, 0.95, '99th views {0:,.0f}'.format(top_views_99th), size=11, transform=ax2.transAxes, ha='right', va='bottom') # == == == == == == Part 7: Plot ax3 video uploading trend == == == == == == # x_axis = range(2009, 2018) x_labels = ["'09", "'10", "'11", "'12", "'13", "'14", "'15", "'16", "'17"] upload_mat = np.zeros((len(x_axis), 8)) target_topics = [ 'Pop_music', 'Rock_music', 'Hip_hop_music', 'Independent_music', 'Country_music', 'Electronic_music', 'Soul_music', 'Others' ] topic_labels = [ 'Pop', 'Rock', 'Hip hop', 'Independent', 'Country', 'Electronic', 'Soul', 'Others' ] color_cycle_8 = ColorPalette.CC8 data_loader.load_embed_content_dict() embed_title_dict = data_loader.embed_title_dict embed_uploadtime_dict = data_loader.embed_uploadtime_dict embed_genre_dict = data_loader.embed_genre_dict for embed in range(num_videos): upload_year = int(embed_uploadtime_dict[embed][:4]) if 2009 <= upload_year <= 2017: year_idx = upload_year - 2009 genres = embed_genre_dict[embed] if len(genres) == 0: # add one to "Others" genre upload_mat[year_idx, 7] += 1 else: for genre in genres: upload_mat[year_idx, target_topics.index(genre)] += 1 / len(genres) print() print([ '{0}: {1}'.format(topic, int(num)) for topic, num in zip(target_topics, np.sum(upload_mat, axis=0)) ]) stackedBarPlot(ax=ax3, data=upload_mat, cols=color_cycle_8, edgeCols=['#000000'] * 8, xlabel='uploaded year', ylabel='num of videos', scale=False, endGaps=True) ax3.tick_params(axis='both', which='major', labelsize=9) ax3.set_xticks(np.arange(len(x_axis))) ax3.set_xticklabels(x_labels) ax3.yaxis.set_major_formatter(FuncFormatter(concise_fmt)) ax3.legend([ plt.Rectangle((0, 0), 1, 1, fc=c, ec='k', alpha=0.6) for c in color_cycle_8 ], topic_labels, fontsize=9, frameon=False, handletextpad=0.2, columnspacing=0.3, ncol=4, bbox_to_anchor=(1, -0.12), bbox_transform=ax3.transAxes, fancybox=False, shadow=True) ax3.set_title('(c) VEVO videos uploading trend', fontsize=12) union_top_set = set(top_20_indegree_embeds).union(top_20_popular_embeds) print('\n>>> Size of the union set at cutoff 15:', len(union_top_set)) print('{0:>24} | {1:>17} | {2:>5} | {3:>8} | {4:>6} | {5:>10} | {6:>5}'. format('Video title', 'Artist', 'Age', 'Indegree', '-rank', 'Views', '-rank')) for embed in top_20_indegree_embeds: print( '{0:>24} & {1:>17} & {2:>5} & {3:>8} & {4:>6} & {5:>10} & {6:>5} \\\\' .format( embed_title_dict[embed].split( ' - ', 1)[1].split('(')[0].split('ft')[0].strip(), embed_title_dict[embed].split( ' - ', 1)[0].split('&')[0].split(',')[0].strip(), '{0:,}'.format( (datetime(2018, 11, 2) - str2obj(embed_uploadtime_dict[embed])).days), '{0:,}'.format(int(embed_avg_indegree_dict[embed])), '{0:,}'.format(top_20_indegree_embeds.index(embed) + 1), '{0:,}'.format(int(embed_avg_view_dict[embed])), '{0:,}'.format(popular_ranked_embed_list.index(embed) + 1))) print('\n{0:>24} | {1:>17} | {2:>5} | {3:>8} | {4:>6} | {5:>10} | {6:>5}'. format('Video title', 'Artist', 'Age', 'Indegree', '-rank', 'Views', '-rank')) for embed in top_20_popular_embeds: print( '{0:>24} & {1:>17} & {2:>5} & {3:>8} & {4:>6} & {5:>10} & {6:>5} \\\\' .format( embed_title_dict[embed].split( ' - ', 1)[1].split('(')[0].split('ft')[0].strip(), embed_title_dict[embed].split( ' - ', 1)[0].split('&')[0].split(',')[0].strip(), '{0:,}'.format( (datetime(2018, 11, 2) - str2obj(embed_uploadtime_dict[embed])).days), '{0:,}'.format(int(embed_avg_indegree_dict[embed])), '{0:,}'.format(indegree_ranked_embed_list.index(embed) + 1), '{0:,}'.format(int(embed_avg_view_dict[embed])), '{0:,}'.format(top_20_popular_embeds.index(embed) + 1))) hide_spines(axes) timer.stop() plt.tight_layout() plt.savefig('../images/measure_basic_statistics.pdf', bbox_inches='tight') if not platform.system() == 'Linux': plt.show()
print 'Mu = ', fit.lognormal.mu print 'Sigma = ', fit.lognormal.sigma step_data_xmin = [i for i in step_data if i > fit.power_law.xmin] figure() powerlaw.plot_pdf(step_data_xmin, color = 'b', linewidth = 2) # PDF of data fit.power_law.plot_pdf(color = 'b', linestyle = '--') # PL theoretical fit fit.exponential.plot_pdf(color = 'r', linestyle = '--') # EXP theoretical fit fit.lognormal.plot_pdf(color = 'g', linestyle = '--') # LN theoretical fit xlabel('Step Length, x [cm]') ylabel('P(x)') plt.legend(('Data', 'Power Law Fit', 'Exponential Fit', 'Lognormal Fit')) figure() powerlaw.plot_ccdf(step_data_xmin, color = 'b', linewidth = 2) # PDF of data fit.power_law.plot_ccdf(color = 'b', linestyle = '--') # PL theoretical fit fit.exponential.plot_ccdf(color = 'r', linestyle = '--') # EXP theoretical fit fit.lognormal.plot_ccdf(color = 'g', linestyle = '--') # LN theoretical fit xlabel('Step Length, x [cm]') ylabel('P(x)') plt.legend(('Data', 'Power Law Fit', 'Exponential Fit', 'Lognormal Fit')) ##print '\nCompare PL with TRUNCATED PL:' ## ##R1, p1 = fit.distribution_compare('power_law', 'truncated_power_law') ## ##if R1 > 0: ## print 'Power law more likely for data. R = ', R1, ' and p = ', p1 ##else: ## print 'Truncated PL more likely for data. R = ', R1, 'and p = ', p1
bins=np.linspace(np.log10(np.min(grado)), np.log10(np.max(grado)), 15)) #Ajuste a ley de potencias, vease: https://github.com/jeffalstott/powerlaw ajuste = powerlaw.Fit(grado, xmin=1.0) print(ajuste.power_law.alpha) print(ajuste.power_law.xmin) R, p = ajuste.distribution_compare('power_law', 'lognormal') #El alpha de la ley de potencias esta dado por ajuste.power_law.alpha #Plotting fig = plt.figure(figsize=(15, 10)) #plt.suptitle('Histogramas Datos Newman', fontsize=22) #plt.subplot(2, 2, 1) powerlaw.plot_pdf(grado, color='b') ajuste.power_law.plot_pdf(color='b', linestyle='--') #plt.subplot(2, 2, 2) #powerlaw.plot_cdf(grado, color='b') #plt.subplot(2, 2, 3) powerlaw.plot_ccdf(grado, color='r') #plt.subplot(2, 2, 4) A = np.diff(bines_log) ydata = np.divide(datos_log + 0.0, np.amax(datos_log)) print ydata plt.loglog(np.diff(bines_log), ydata, 'o') #plt.plot(np.diff(bines_log)*ajuste.power_law.alpha,datos_log,'-') plt.show() #plt.savefig('maps.png', dpi=300)
legend( legend_refs[::-1], theoretical_alphas[::-1], loc = 'center right', bbox_to_anchor = (.1,0,1,1), bbox_transform = plt.gcf().transFigure, title=r'$\alpha$ of Data' ) savefig('Fig_powerlaw_validation_%itrials_%idata.pdf'%(int(n_trials),int(n_data)), bbox_inches='tight') # <markdowncell> # # Validation of Simulated Data Generators for Other Distributions # <codecell> param = [2.5, .5] dist = powerlaw.Truncated_Power_Law theoretical_dist = dist(xmin=2.0, parameters=param,discrete=True) simulated_data = theoretical_dist.generate_random(1000) powerlaw.plot_ccdf(simulated_data, linewidth=2, linestyle='--') theoretical_dist.plot_ccdf(simulated_data) figure() powerlaw.plot_pdf(simulated_data, linewidth=2, linestyle='--') theoretical_dist.plot_pdf(simulated_data) theoretical_dist = dist(xmin=2.0, parameters=param,discrete=False) figure() simulated_data = theoretical_dist.generate_random(1000) powerlaw.plot_ccdf(simulated_data, linewidth=2, linestyle='--') theoretical_dist.plot_ccdf(simulated_data) figure() powerlaw.plot_pdf(simulated_data, linewidth=2, linestyle='--') theoretical_dist.plot_pdf(simulated_data)
binsize = int(np.max(indian_all)/multiplier) histplot(indian_6, binsize, 'Users', 'Star Count', 'green', 'All Stars Given by Count, LogLog Scale Plot') plt.savefig('logscale_all_stars_given.png') plt.close() powerlaw.plot_pdf(russian_6, color='black') powerlaw.plot_pdf(chinese_6, color='red') powerlaw.plot_pdf(american_6, color='blue') powerlaw.plot_pdf(indian_6, color='green') plt.ylabel('Users') plt.xlabel('Star Count') plt.title('Jan-Jun 2019 Stars Given by Count, PDF') plt.savefig('pdf_jan-jun_stars_given.png') plt.close() powerlaw.plot_ccdf(russian_6, color='black') powerlaw.plot_ccdf(chinese_6, color='red') powerlaw.plot_ccdf(american_6, color='blue') powerlaw.plot_ccdf(indian_6, color='green') plt.ylabel('Users') plt.xlabel('Star Count') plt.title('Jan-Jun 2019 Stars Given by Count, CCDF') plt.savefig('ccdf_jan-jun_stars_given.png') plt.close() powerlaw.plot_pdf(russian_all, color='black') powerlaw.plot_pdf(chinese_all, color='red') powerlaw.plot_pdf(american_all, color='blue') powerlaw.plot_pdf(indian_all, color='green') plt.ylabel('Users') plt.xlabel('Star Count')
def main(): timer = Timer() timer.start() app_name = 'cyberbullying' archive_dir = '../data/{0}_out'.format(app_name) entities = ['user', 'hashtag'] rho = 0.5272 fig, axes = plt.subplots(1, 2, figsize=(10, 3.3)) cc4 = ColorPalette.CC4 blue = cc4[0] for ax_idx, entity in enumerate(entities): sample_datefile = open(os.path.join( archive_dir, '{0}_{1}_all.txt'.format(entity, app_name)), 'r', encoding='utf-8') complete_datefile = open(os.path.join( archive_dir, 'complete_{0}_{1}.txt'.format(entity, app_name)), 'r', encoding='utf-8') sample_entity_freq_dict = defaultdict(int) complete_entity_freq_dict = defaultdict(int) uni_random_entity_freq_dict = defaultdict(int) if entity == 'user': for line in sample_datefile: sample_entity_freq_dict[line.rstrip().split(',')[1]] += 1 for line in complete_datefile: complete_entity_freq_dict[line.rstrip().split(',')[1]] += 1 toss = np.random.random_sample() if toss <= rho: uni_random_entity_freq_dict[line.rstrip().split(',') [1]] += 1 else: for line in sample_datefile: for item in line.rstrip().split(',')[1:]: sample_entity_freq_dict[item.lower()] += 1 for line in complete_datefile: for item in line.rstrip().split(',')[1:]: complete_entity_freq_dict[item.lower()] += 1 toss = np.random.random_sample() if toss <= rho: for item in line.rstrip().split(',')[1:]: uni_random_entity_freq_dict[item.lower()] += 1 sample_datefile.close() complete_datefile.close() # compute the powerlaw fit in the complete set complete_freq_list = list(complete_entity_freq_dict.values()) complete_powerlaw_fit = Fit(complete_freq_list) complete_alpha = complete_powerlaw_fit.power_law.alpha complete_xmin = complete_powerlaw_fit.power_law.xmin print('{0} complete set alpha {1}, xmin {2}'.format( entity, complete_alpha, complete_xmin)) plot_ccdf(complete_freq_list, ax=axes[ax_idx], color='k', ls='-', label='complete') # compute the powerlaw fit in the sample set # infer the number of missing entities sample_freq_list = list(sample_entity_freq_dict.values()) sample_freq_counter = Counter(sample_freq_list) # we observe the frequency of entities appearing less than 100 times num_interest = 100 sample_freq_list_top100 = [0] * num_interest for freq in range(1, num_interest + 1): sample_freq_list_top100[freq - 1] = sample_freq_counter[freq] inferred_num_missing = infer_missing_num(sample_freq_list_top100, rho=rho, m=num_interest) corrected_sample_freq_list = sample_freq_list + [ 0 ] * inferred_num_missing sample_powerlaw_fit = Fit(corrected_sample_freq_list) sample_alpha = sample_powerlaw_fit.power_law.alpha sample_xmin = sample_powerlaw_fit.power_law.xmin print('{0} sample set alpha {1}, xmin {2}'.format( entity, sample_alpha, sample_xmin)) plot_ccdf(corrected_sample_freq_list, ax=axes[ax_idx], color=blue, ls='-', label='sample') # compute the powerlaw fit in uniform random sample uni_random_num_missing = len(complete_entity_freq_dict) - len( uni_random_entity_freq_dict) uni_random_freq_list = list(uni_random_entity_freq_dict.values()) uni_random_freq_list = uni_random_freq_list + [ 0 ] * uni_random_num_missing uni_random_powerlaw_fit = Fit(uni_random_freq_list) uni_random_alpha = uni_random_powerlaw_fit.power_law.alpha uni_random_xmin = uni_random_powerlaw_fit.power_law.xmin print('{0} uniform random sampling alpha {1}, xmin {2}'.format( entity, uni_random_alpha, uni_random_xmin)) plot_ccdf(uni_random_freq_list, ax=axes[ax_idx], color='k', ls='--', label='uniform random') print('inferred missing', inferred_num_missing) print('empirical missing', len(complete_entity_freq_dict) - len(sample_entity_freq_dict)) print('uniform random missing', uni_random_num_missing) print('KS test (sample, uniform)') print(stats.ks_2samp(corrected_sample_freq_list, uni_random_freq_list)) print('KS test (sample, complete)') print(stats.ks_2samp(corrected_sample_freq_list, complete_freq_list)) print('KS test (uniform, complete)') print(stats.ks_2samp(uni_random_freq_list, complete_freq_list)) axes[ax_idx].set_xscale('symlog') axes[ax_idx].set_yscale('log') axes[ax_idx].set_xlabel('frequency', fontsize=16) axes[ax_idx].tick_params(axis='both', which='major', labelsize=16) axes[0].set_xticks([0, 1, 100, 10000]) axes[0].set_yticks([1, 0.01, 0.0001, 0.000001]) axes[0].set_ylabel('$P(X \geq x)$', fontsize=16) axes[0].legend(frameon=False, fontsize=16, ncol=1, fancybox=False, shadow=True, loc='lower left') axes[0].set_title('(a) user posting', fontsize=18, pad=-3 * 72, y=1.0001) axes[1].set_xticks([0, 1, 100, 10000, 1000000]) axes[1].set_yticks([1, 0.1, 0.001, 0.00001]) axes[1].set_title('(b) hashtag', fontsize=18, pad=-3 * 72, y=1.0001) hide_spines(axes) timer.stop() plt.tight_layout(rect=[0, 0.05, 1, 1]) plt.savefig('../images/entity_freq_dist.pdf', bbox_inches='tight') if not platform.system() == 'Linux': plt.show()
# stashfig("degree-synapse-sequences") # %% from powerlaw import plot_ccdf, plot_cdf, plot_pdf fig, axs = plt.subplots(4, 5, figsize=(20, 20), sharex=True) for i in range(len(GRAPH_TYPES)): g_type = GRAPH_TYPES[i] g_type_label = GRAPH_TYPE_LABELS[i] adj = load_everything(g_type, version=BRAIN_VERSION) in_sum = np.sort(adj.sum(axis=0)) out_sum = np.sort(adj.sum(axis=1)) ax = plot_ccdf(in_sum, ax=axs[0, i]) # ax.set(yscale="log") ax.set_xticklabels([]) ax = plot_ccdf(out_sum, ax=axs[1, i]) # ax.set(yscale="log") ax.set_xticklabels([]) in_degree = np.sort(np.count_nonzero(adj, axis=0)) out_degree = np.sort(np.count_nonzero(adj, axis=1)) ax = plot_ccdf(in_degree, ax=axs[2, i]) # ax.set(yscale="log") ax.set_xticklabels([]) ax = plot_ccdf(out_degree, ax=axs[3, i])
import powerlaw import matplotlib.pyplot as plt """C.Elegan""" G = nx.read_gml("celegansneural/celegansneural.gml") G = nx.DiGraph(G) print "C.Elegan" print "Number of Nodes: ",len(G) print "Number of Edges: ",len(G.edges()) degrees = G.degree().values() in_degrees = G.in_degree().values() out_degrees = G.out_degree().values() plt.figure(1) powerlaw.plot_ccdf(degrees,marker = 'o',color = 'b',linestyle = "none") plt.xlabel(r"$k$",fontsize = 16) plt.ylabel(r"$P(k)$",fontsize = 16) plt.title("Degree Distribution of C.Elegan") plt.savefig("C.Elegan - Degree Distribution.png") plt.figure(2) powerlaw.plot_ccdf(in_degrees,marker = 'o',color = 'b',linestyle = "none") plt.xlabel(r"$k$",fontsize = 16) plt.ylabel(r"$P(k)$",fontsize = 16) plt.title("In Degree Distribution of C.Elegan") plt.savefig("C.Elegan - In Degree Distribution.png") plt.figure(3) powerlaw.plot_ccdf(out_degrees,marker = 'o',color = 'b',linestyle = "none") plt.xlabel(r"$k$",fontsize = 16)