def create_cits_plots(data, title_=""): fig, ax1 = plt.subplots(1, 1, figsize=(7, 7)) fit = powerlaw.Fit(data, discrete=True) plt.suptitle(title_, fontsize=30) a = fit.power_law.alpha xmin = fit.power_law.xmin pdf = powerlaw.pdf(data) bins = pdf[0] widths = bins[1:] - bins[0:-1] centers = bins[0:-1] + 0.5 * widths xc, yc = find_pdf_at_x(xmin, pdf[0], pdf[1]) x_, y_ = fitted_pl_xy(a, xc, yc, xmin, np.max(pdf[0])) ax1.set_xlabel("Number of word occurances", fontsize=20) ax1.set_ylabel("Probability density function", fontsize=20) ax1.plot(centers, pdf[1], 'o') powerlaw.plot_pdf(data, ax=ax1, color='b', label='APS data') ax1.plot(x_, y_, 'r--', label=r'Power law fit: $x^{-%4.3f}$' % (a)) #ax1.plot([xmin,xmin],[10**(-16),1],'--',color="grey", label=r'$\rm x_{min}=%d$' % (xmin)) ax1.legend(loc=0, fontsize=15) fig.text(0.95, 0.05, '(c) 2018, P.G.', fontsize=10, color='gray', ha='right', va='bottom', alpha=0.5) plt.show()
def drawDegreeDistributionWithPowerLaw(degrees): fig = plt.figure() ax = plt.axes() series = pd.Series(degrees) bins = 20 var = 'Degree frequencies' values = series.sort_values().values n, bins, patches = ax.hist(values, bins, density=True, edgecolor='grey') # Linear bins powerlaw.plot_pdf(values, ax=ax, linear_bins=True, color='b') # Logoritmic bins powerlaw.plot_pdf(values, ax=ax, linear_bins=False, color='r') distributions = dict() multiple_line_chart(ax, values, distributions, 'Best fit for %s' % var, var, 'probability') fig.tight_layout() plt.legend(['Linearly spaced bins', 'logarithmically spaced bins']) plt.show()
def plot_loglog(avalanche, label): powerlaw.plot_pdf(avalanche) plt.ylabel(str(feature)) plt.title('Avalanche ' + label + ' — Power Law Fit') plt.savefig("figs/" + label + "_powerlaw.svg") plt.show() plt.clf()
def plot_powerlaw_combined(data, data_inst, fig, units): from powerlaw import plot_pdf, Fit, pdf annotate_coord = (-.4, .95) ax1 = fig.add_subplot(n_graphs,n_data,data_inst) plot_pdf(data, ax=ax1, color='b', linewidth=2) fit = Fit(data, xmin=1, discrete=True) fit.power_law.plot_pdf(ax=ax1, linestyle=':', color='g') p = fit.power_law.pdf() fit = Fit(data, discrete=True) fit.power_law.plot_pdf(ax=ax1, linestyle='--', color='g') from pylab import setp setp( ax1.get_xticklabels(), visible=False) if data_inst==1: ax1.annotate("A", annotate_coord, xycoords="axes fraction", fontsize=14) ax1.set_ylabel(r"$p(X)$")# (10^n)") ax2 = fig.add_subplot(n_graphs,n_data,n_data+data_inst)#, sharex=ax1)#, sharey=ax2) fit.power_law.plot_pdf(ax=ax2, linestyle='--', color='g') fit.exponential.plot_pdf(ax=ax2, linestyle='--', color='r') fit.plot_pdf(ax=ax2, color='b', linewidth=2) ax2.set_ylim(ax1.get_ylim()) ax2.set_yticks(ax2.get_yticks()[::2]) ax2.set_xlim(ax1.get_xlim()) if data_inst==1: ax2.annotate("B", annotate_coord, xycoords="axes fraction", fontsize=14) ax2.set_xlabel(units)
def plot_powerlaw(X): import powerlaw import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) powerlaw.plot_pdf(X, ax=ax, color='b', linewidth=2) fit = powerlaw.Fit(X, xmin=1, discrete=True) fit.power_law.plot_pdf(ax=ax, linestyle=':', color='g') fit = powerlaw.Fit(X, discrete=True) logger.info(fit.power_law.alpha) logger.info(fit.power_law.xmin) logger.info(fit.power_law.sigma) logger.info(fit.power_law.D) #logger.info(fit.supported_distributions) fit.power_law.plot_pdf(color='g', linestyle='--', ax=ax) fit.lognormal.plot_pdf(color='r', linestyle='--', ax=ax) #fit.exponential.plot_pdf(color='r', linestyle='--', ax=fig) #fit.truncated_power_law.plot_pdf(color='y', linestyle='--', ax=fig) #logger.info(fit.distribution_compare('power_law', 'exponential')) #logger.info(fit.distribution_compare('power_law', 'lognormal')) #logger.info(fit.distribution_compare('power_law', 'truncated_power_law')) plt.show()
def plot_dist(data, columns, rows, outdir, filename_base): ncols = len(columns) nrows = len(rows) fig = plt.figure(figsize=(4*ncols, 3*nrows)) plt.subplots_adjust(hspace=.2, wspace=0.2) fig.patch.set_facecolor('white') reportfilename = "%s/%s.txt" % (outdir, filename_base) reportfile = open(reportfilename, 'wb') n = 1 for row in rows: for column in columns: reportfile.write("= %s: %s =\n" % (column, row)) values = list(value for value in data[row][column] if value>0) # print data[row][column] # print values if n <= ncols: # first row ax1 = plt.subplot(nrows, ncols, n, title=column) else: ax1 = plt.subplot(nrows, ncols, n) if (n % ncols == 1): # first column plt.ylabel(row) powerlaw.plot_pdf(values, ax=ax1, color='k') ax1.tick_params(axis='both', which='major', labelsize='x-small') ax1.tick_params(axis='both', which='minor', labelsize='xx-small') fit = powerlaw.Fit(values, discrete=True, xmin=2) #, xmin=min(values)) reportfile.write("Lognormal:\n") reportfile.write(" mu = %f\n" % (fit.lognormal.mu)) reportfile.write(" sigma = %f\n" % (fit.lognormal.sigma)) reportfile.write(" xmin = %d\n" % (fit.lognormal.xmin)) reportfile.write("Power-law:\n") reportfile.write(" alpha = %f\n" % (fit.power_law.alpha)) reportfile.write(" sigma = %f\n" % (fit.power_law.sigma)) reportfile.write(" xmin = %d\n" % (fit.power_law.xmin)) R, p = fit.distribution_compare('lognormal', 'power_law') # 'lognormal', 'exponential', 'truncated_power_law', 'stretched_exponential', 'gamma', 'power_law' reportfile.write("Lognormal fit compared to power-law distribution: R=%f, p=%f\n" % (R, p)) reportfile.write("\n") fit.power_law.plot_pdf(linestyle='--', color='b', ax=ax1, label='Power-law fit') info = "Power-law:\nalpha=%.3f\nsigma=%.3f\nxmin=%d" % (fit.power_law.alpha, fit.power_law.sigma, fit.power_law.xmin) plt.text(0.1, 0.1, info, transform=ax1.transAxes, color='b', ha='left', va='bottom', size='small') fit.lognormal.plot_pdf(linestyle='--', color='r', ax=ax1, label='Lognormal fit') info = "Lognormal:\nmu=%.3f\nsigma=%.3f\nxmin=%d" % (fit.lognormal.mu, fit.lognormal.sigma, fit.lognormal.xmin) plt.text(0.9, 0.9, info, transform=ax1.transAxes, color='r', ha='right', va='top', size='small') n += 1 reportfile.close() plt.savefig("%s/%s.pdf" % (outdir, filename_base), bbox_inches='tight') plt.savefig("%s/%s.png" % (outdir, filename_base), bbox_inches='tight')
def plot_basics(data, data_inst, fig, units): ''' This function is the main plotting function. Adapted from Newman's powerlaw package. ''' import pylab pylab.rcParams['xtick.major.pad'] = '8' pylab.rcParams['ytick.major.pad'] = '8' pylab.rcParams['font.sans-serif'] = 'Arial' from matplotlib import rc rc('font', family='sans-serif') rc('font', size=10.0) rc('text', usetex=False) from matplotlib.font_manager import FontProperties panel_label_font = FontProperties().copy() panel_label_font.set_weight("bold") panel_label_font.set_size(12.0) panel_label_font.set_family("sans-serif") n_data = 1 n_graphs = 4 from powerlaw import plot_pdf, Fit, pdf ax1 = fig.add_subplot(n_graphs, n_data, data_inst) x, y = pdf(data, linear_bins=True) ind = y > 0 y = y[ind] x = x[:-1] x = x[ind] ax1.scatter(x, y, color='r', s=.5, label='data') plot_pdf(data[data > 0], ax=ax1, color='b', linewidth=2, label='PDF') from pylab import setp setp(ax1.get_xticklabels(), visible=False) plt.legend(loc='bestloc') ax2 = fig.add_subplot(n_graphs, n_data, n_data + data_inst, sharex=ax1) plot_pdf(data[data > 0], ax=ax2, color='b', linewidth=2, label='PDF') fit = Fit(data, discrete=True) fit.power_law.plot_pdf(ax=ax2, linestyle=':', color='g', label='w/o xmin') p = fit.power_law.pdf() ax2.set_xlim(ax1.get_xlim()) fit = Fit(data, discrete=True, xmin=3) fit.power_law.plot_pdf(ax=ax2, linestyle='--', color='g', label='w xmin') from pylab import setp setp(ax2.get_xticklabels(), visible=False) plt.legend(loc='bestloc') ax3 = fig.add_subplot(n_graphs, n_data, n_data * 2 + data_inst) #, sharex=ax1)#, sharey=ax2) fit.power_law.plot_pdf(ax=ax3, linestyle='--', color='g', label='powerlaw') fit.exponential.plot_pdf(ax=ax3, linestyle='--', color='r', label='exp') fit.plot_pdf(ax=ax3, color='b', linewidth=2) ax3.set_ylim(ax2.get_ylim()) ax3.set_xlim(ax1.get_xlim()) plt.legend(loc='bestloc') ax3.set_xlabel(units)
def plot_basics(data, data_inst, fig, units): ''' This function is the main plotting function. Adapted from Newman's powerlaw package. ''' import pylab pylab.rcParams['xtick.major.pad']='8' pylab.rcParams['ytick.major.pad']='8' pylab.rcParams['font.sans-serif']='Arial' from matplotlib import rc rc('font', family='sans-serif') rc('font', size=10.0) rc('text', usetex=False) from matplotlib.font_manager import FontProperties panel_label_font = FontProperties().copy() panel_label_font.set_weight("bold") panel_label_font.set_size(12.0) panel_label_font.set_family("sans-serif") n_data = 1 n_graphs = 4 from powerlaw import plot_pdf, Fit, pdf ax1 = fig.add_subplot(n_graphs,n_data,data_inst) x, y = pdf(data, linear_bins=True) ind = y>0 y = y[ind] x = x[:-1] x = x[ind] ax1.scatter(x, y, color='r', s=.5, label='data') plot_pdf(data[data>0], ax=ax1, color='b', linewidth=2, label='PDF') from pylab import setp setp( ax1.get_xticklabels(), visible=False) plt.legend(loc = 'bestloc') ax2 = fig.add_subplot(n_graphs,n_data,n_data+data_inst, sharex=ax1) plot_pdf(data[data>0], ax=ax2, color='b', linewidth=2, label='PDF') fit = Fit(data, discrete=True) fit.power_law.plot_pdf(ax=ax2, linestyle=':', color='g',label='w/o xmin') p = fit.power_law.pdf() ax2.set_xlim(ax1.get_xlim()) fit = Fit(data, discrete=True,xmin=3) fit.power_law.plot_pdf(ax=ax2, linestyle='--', color='g', label='w xmin') from pylab import setp setp(ax2.get_xticklabels(), visible=False) plt.legend(loc = 'bestloc') ax3 = fig.add_subplot(n_graphs,n_data,n_data*2+data_inst)#, sharex=ax1)#, sharey=ax2) fit.power_law.plot_pdf(ax=ax3, linestyle='--', color='g',label='powerlaw') fit.exponential.plot_pdf(ax=ax3, linestyle='--', color='r',label='exp') fit.plot_pdf(ax=ax3, color='b', linewidth=2) ax3.set_ylim(ax2.get_ylim()) ax3.set_xlim(ax1.get_xlim()) plt.legend(loc = 'bestloc') ax3.set_xlabel(units)
def comp_diff_dim(iterations=2000): """ Compares the cluster size distribution for 2 Dimensions and 3 Dimensions """ # Compares the cluster sizes of different sizes of grid grid = Lattice(size=(20, 20), torus_mode=True, rand_dist=('uniform', ), free_percent=0, iterations=iterations, age_fraction=1 / 10) cube = Lattice(size=(20, 20, 3), torus_mode=True, rand_dist=('uniform', ), free_percent=0, iterations=iterations, age_fraction=1 / 10) grid.run(["mutation", "update_age", "get_cluster"]) cube.run(["mutation", "update_age", "get_cluster"]) grid_hist = np.concatenate( [grid.cluster_size[x] for x in grid.cluster_size]) cube_hist = np.concatenate( [cube.cluster_size[x] for x in cube.cluster_size]) # get the power law grid_results = powerlaw.Fit(grid_hist, discrete=True, verbose=False) cube_results = powerlaw.Fit(grid_hist, dicsrete=True, verbose=False) r_grid, p_grid = grid_results.distribution_compare('power_law', 'exponential', normalized_ratio=True) r_cube, p_cube = cube_results.distribution_compare('power_law', 'exponential', normalized_ratio=True) # plot the power law plot_setting() powerlaw.plot_pdf(grid_hist, label='2 Dimensions') powerlaw.plot_pdf(cube_hist, label='3 Dimensions') plt.title("Cluster Distribution for 2D and 3D") plt.xlabel("Cluster size ") plt.ylabel("Probability ") plt.grid() plt.legend() plt.tight_layout() plt.savefig(path.join( dir_path, 'figures/different_dimentions_itr={}.png'.format(iterations)), dpi=300) plt.show() print_statement(grid_results.power_law.alpha, r_grid, p_grid, "2D") print_statement(cube_results.power_law.alpha, r_cube, p_cube, "3D")
def plot_basics(data, data_inst, fig, units): from powerlaw import plot_pdf, Fit, pdf annotate_coord = (-.4, .95) ax1 = fig.add_subplot(n_graphs,n_data,data_inst) x, y = pdf(data, linear_bins=True) ind = y>0 y = y[ind] x = x[:-1] x = x[ind] ax1.scatter(x, y, color='r', s=.5) plot_pdf(data[data>0], ax=ax1, color='b', linewidth=2) from pylab import setp setp( ax1.get_xticklabels(), visible=False) if data_inst==1: ax1.annotate("A", annotate_coord, xycoords="axes fraction", fontproperties=panel_label_font) from mpl_toolkits.axes_grid.inset_locator import inset_axes ax1in = inset_axes(ax1, width = "30%", height = "30%", loc=3) ax1in.hist(data, normed=True, color='b') ax1in.set_xticks([]) ax1in.set_yticks([]) ax2 = fig.add_subplot(n_graphs,n_data,n_data+data_inst, sharex=ax1) plot_pdf(data, ax=ax2, color='b', linewidth=2) fit = Fit(data, xmin=1, discrete=True) fit.power_law.plot_pdf(ax=ax2, linestyle=':', color='g') p = fit.power_law.pdf() ax2.set_xlim(ax1.get_xlim()) fit = Fit(data, discrete=True) fit.power_law.plot_pdf(ax=ax2, linestyle='--', color='g') from pylab import setp setp( ax2.get_xticklabels(), visible=False) if data_inst==1: ax2.annotate("B", annotate_coord, xycoords="axes fraction", fontproperties=panel_label_font) ax2.set_ylabel(u"p(X)")# (10^n)") ax3 = fig.add_subplot(n_graphs,n_data,n_data*2+data_inst)#, sharex=ax1)#, sharey=ax2) fit.power_law.plot_pdf(ax=ax3, linestyle='--', color='g') fit.exponential.plot_pdf(ax=ax3, linestyle='--', color='r') fit.plot_pdf(ax=ax3, color='b', linewidth=2) ax3.set_ylim(ax2.get_ylim()) ax3.set_xlim(ax1.get_xlim()) if data_inst==1: ax3.annotate("C", annotate_coord, xycoords="axes fraction", fontproperties=panel_label_font) ax3.set_xlabel(units)
def powerlaw_figures(dpath, fit=False): fig, ax = pl.subplots() fig.set_size_inches(5.2, 3) data_dirs = sorted(['data/' + pth for pth in next(os.walk("data/"))[1]]) with open(dpath + '/namespace.p', 'rb') as pfile: nsp = pickle.load(pfile) with open(dpath + '/lts.p', 'rb') as pfile: lts_df = np.array(pickle.load(pfile)) # discard synapses present at beginning lts_df = lts_df[lts_df[:, 1] > 0] # discard synapses still alive at end of simulation # lts_df = lts_df[lts_df[:,4]==1] t_split = nsp['Nsteps'] / 2 lts_df = lts_df[lts_df[:, 3] < t_split] lts = lts_df[:, 2] - lts_df[:, 3] lts[lts > t_split] = t_split fit = powerlaw.Fit(lts, xmin=1, xmax=t_split + 1) label = '$\gamma = %.4f$, $x_{\mathrm{min}}=%.1f$' % (fit.power_law.alpha, fit.power_law.xmin) figPDF = powerlaw.plot_pdf(lts[lts > fit.power_law.xmin], label=label, alpha=1.) fit.power_law.plot_pdf(ax=figPDF, linestyle='--') lts = lts_df[:, 2] - lts_df[:, 3] fit2 = powerlaw.Fit(lts, xmin=1) label = '$\gamma = %.4f$, $x_{\mathrm{min}}=%.1f$' % (fit2.power_law.alpha, fit2.power_law.xmin) powerlaw.plot_pdf(lts[lts > fit2.power_law.xmin], label=label, alpha=1.) fit2.power_law.plot_pdf(ax=figPDF, linestyle='--') pl.legend() directory = 'figures/lts_traces_half-split-necessary/' if not os.path.exists(directory): os.makedirs(directory) fname = dpath[-4:] fig.savefig(directory + '/' + fname + '.png', dpi=300, bbox_inches='tight')
def power_law_plot(graph, log=True, linear_binning=False, bins=90, draw=True, x_min=None): degree = list(dict(graph.degree()).values()) #powerlaw does not work if a bin is empty #sum([1 if x == 0 else 0 for x in list(degree)]) corrected_degree = [x for x in degree if x != 0] if x_min is not None: corrected_degree = [x for x in corrected_degree if x > x_min] # fit powerlaw exponent and return distribution pwl_distri = pwl.pdf(corrected_degree, bins=bins) if draw: degree_distribution = Counter(degree) # Degree distribution x = [] y = [] for i in sorted(degree_distribution): x.append(i) y.append(degree_distribution[i] / len(graph)) #plot our distributon compared to powerlaw #plt.figure(figsize=(10,7)) plt.yscale('log') plt.xscale('log') plt.plot(x, y, 'ro') plt.xticks(fontsize=15) plt.yticks(fontsize=15) plt.xlabel('$k$', fontsize=16) plt.ylabel('$P(k)$', fontsize=16) if linear_binning: pwl.plot_pdf(corrected_degree, linear_bins=True, color='black', linewidth=2) else: pwl.plot_pdf(corrected_degree, color='black', linewidth=2) return pwl_distri
def report_dist(data, measures, outdir, filename_base, discrete=False, min_distinc_values=5): reportfilename = "%s/%s.txt" % (outdir, filename_base) reportfile = open(reportfilename, 'wb') for (measure, temp, ax1) in plot_matrix(measures, [1]): reportfile.write("= %s =\n" % measure) values = list(value for value in data[measure] if is_numeric(value) and value>0) if len(set(values)) < min_distinc_values: ax1.set_axis_bgcolor('#eeeeee') plt.setp(ax1.spines.values(), color='none') else: powerlaw.plot_pdf(values, ax=ax1, color='k') ax1.tick_params(axis='both', which='major', labelsize='x-small') ax1.tick_params(axis='both', which='minor', labelsize='xx-small') fit = powerlaw.Fit(values, discrete=discrete, xmin=2) #, xmin=min(values)) reportfile.write("Lognormal:\n") reportfile.write(" mu = %f\n" % (fit.lognormal.mu)) reportfile.write(" sigma = %f\n" % (fit.lognormal.sigma)) reportfile.write(" xmin = %d\n" % (fit.lognormal.xmin)) reportfile.write("Power-law:\n") reportfile.write(" alpha = %f\n" % (fit.power_law.alpha)) reportfile.write(" sigma = %f\n" % (fit.power_law.sigma)) reportfile.write(" xmin = %d\n" % (fit.power_law.xmin)) R, p = fit.distribution_compare('lognormal', 'power_law') # 'lognormal', 'exponential', 'truncated_power_law', 'stretched_exponential', 'gamma', 'power_law' reportfile.write("Lognormal fit compared to power-law distribution: R=%f, p=%f\n" % (R, p)) reportfile.write("\n") fit.power_law.plot_pdf(linestyle='--', color='b', ax=ax1, label='Power-law fit') info = "Power-law:\nalpha=%.3f\nsigma=%.3f\nxmin=%d" % (fit.power_law.alpha, fit.power_law.sigma, fit.power_law.xmin) plt.text(0.1, 0.1, info, transform=ax1.transAxes, color='b', ha='left', va='bottom', size='small') fit.lognormal.plot_pdf(linestyle='--', color='r', ax=ax1, label='Lognormal fit') info = "Lognormal:\nmu=%.3f\nsigma=%.3f\nxmin=%d" % (fit.lognormal.mu, fit.lognormal.sigma, fit.lognormal.xmin) plt.text(0.9, 0.9, info, transform=ax1.transAxes, color='r', ha='right', va='top', size='small') reportfile.close() plt.savefig("%s/%s.pdf" % (outdir, filename_base), bbox_inches='tight') plt.savefig("%s/%s.png" % (outdir, filename_base), bbox_inches='tight')
def power_law_fit(): InDegV = snap.TIntPrV() snap.GetNodeInDegV(G, InDegV) a = np.arange(1, snap.CntNonZNodes(G) - snap.CntInDegNodes(G, 0) + 2) fit = pl.Fit(a) pl.plot_pdf(a, color='r') fig2 = fit.plot_pdf(color='b', linewidth=2) # power-law exponent print("Power Law Data\n") print("Power Law Exponential:", fit.alpha) print("Min value for X:", fit.xmin) print("Kolmogorov-Smirnov test:", fit.D) # comparison of data and Pl-fits of pdf (blue) and ccdf (red)" figCCDF = fit.plot_pdf(color='b', linewidth=2) fit.power_law.plot_pdf(color='b', linestyle='--', ax=figCCDF) fit.plot_ccdf(color='r', linewidth=2, ax=figCCDF) fit.power_law.plot_ccdf(color='r', linestyle='--', ax=figCCDF) #### figCCDF.set_ylabel(u"p(X), p(X≥x)") figCCDF.set_xlabel(r"in-degree")
def create_cits_plots(data, title_=""): # fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14,7) ) fig, ax1 = plt.subplots(1, 1, figsize=(7,7) ) fit = powerlaw.Fit(data, discrete=True) plt.suptitle(title_,fontsize=30) a = fit.power_law.alpha xmin = fit.power_law.xmin pdf = powerlaw.pdf(data) bins = pdf[0] widths = bins[1:] - bins[0:-1] centers = bins[0:-1] + 0.5*widths xc,yc = find_pdf_at_x(xmin, pdf[0], pdf[1]) x_, y_ = fitted_pl_xy(a, xc, yc, xmin, np.max(pdf[0]) ) #### AX1 ax1.set_xlabel("Number of citations", fontsize=20) ax1.set_ylabel("Probability density function", fontsize=20) ax1.plot(centers,pdf[1],'o') powerlaw.plot_pdf(data, ax=ax1, color='b',label='APS data') ax1.plot(x_, y_, 'r--', label=r'Power law fit: $x^{-%4.3f}$' % ( a ) ) # ax1.plot(xc,yc,'o',color='grey') ax1.plot([xmin,xmin],[10**(-16),1],'--',color="grey", label=r'$\rm x_{min}=%d$' % (xmin)) ax1.legend(loc=0, fontsize=15) #### AX2 # ax2.set_xlabel(r'$\rm x_{min}$', fontsize=20) # ax2.set_ylabel(r'$\rm D, \sigma, \alpha$', fontsize=20) # ax2.plot(fit.xmins, fit.Ds, label=r'$D$') # ax2.plot(fit.xmins, fit.sigmas, label=r'$\sigma$', linestyle='--') # ax2.plot(fit.xmins, fit.sigmas/fit.alphas, label=r'$\sigma /\alpha$', linestyle='--') # ax2.legend(loc=0, fontsize=15) # ax2.set_xlim( [0, 200] ) # ax2.set_ylim( [0, .25] ) fig.text(0.95, 0.05, '(c) 2018, P.G.',fontsize=10, color='gray', ha='right', va='bottom', alpha=0.5) plt.show()
def draw_pdf(self, sequence, fig_ax=None, title=None, legend_label=None, x_label="Counts", y_label="p(X)", style='b-', marker='o'): if not fig_ax: fig_ax = plt.subplots(figsize=figsize) fit = powerlaw.Fit(sequence, xmin=1) alhpa = fit.power_law.alpha xmin = fit.xmin dis = fit.power_law.D mu = fit.lognormal.mu sigma = fit.lognormal.sigma R, p = fit.distribution_compare('power_law', 'lognormal') print(dis, R, p) powerlaw.plot_pdf(sequence, linewidth=linewidth, marker=marker, color=style[0], ax=fig_ax[1], label=r"%s ($\mu$=%.2f, $\sigma$=%.2f)" % (legend_label, mu, sigma)) #label=r"%s ($\alpha$=%.2f)" % (legend_label, alhpa)) #label=r"%s ($\alpha$=%.2f, p=%.2f)" % (legend_label, alhpa, p)) fit.lognormal.plot_pdf(color=style[0], linestyle='--', ax=fig_ax[1]) #fit.power_law.plot_pdf(color=style[0], linestyle='--', ax=fig_ax[1]) fig_ax[1].set_xlabel(x_label, fontsize=label_fontsize) fig_ax[1].set_ylabel(y_label, fontsize=label_fontsize) fig_ax[1].tick_params(size=tick_fontsize) if legend_label: fig_ax[1].legend(fontsize=legend_fontsize) if title: fig_ax[1].set_title(title, fontsize=title_fontsize) fig_ax[0].tight_layout() return fig_ax
def distribution_of_citations(male, female, after_yr=1990, plot=0): male_cit_u = [] female_cit_u = [] for m in male: if m['first_publish'] >= after_yr: male_cit_u.append(m['citations']) for f in female: if f['first_publish'] >= after_yr: female_cit_u.append(f['citations']) male_cit_u = np.array(male_cit_u) female_cit_u = np.array(female_cit_u) ## make a histogram of distribution if plot: plt.figure(figsize=(8.2, 5)) powerlaw.plot_pdf(male_cit_u[male_cit_u > 0], label="Male", color='b', linewidth=4) powerlaw.plot_pdf(female_cit_u[female_cit_u > 0], label="Female", color='orange', linewidth=4) plt.xscale('linear') plt.ticklabel_format(style='sci', axis='x', scilimits=(-1, 3)) legend = plt.legend(loc='best', shadow=False, fontsize=20) after_yr = 1980 plt.xlabel("Total Citations (Papers after " + str(after_yr) + ")", fontsize=20) plt.ylabel("Probability Mass Function", fontsize=20) plt.savefig(out_folder + 'citation_unweighted_' + str(after_yr) + '.eps', format='eps', dpi=500) plt.savefig(out_folder + 'citation_unweighted_' + str(after_yr) + '.png', format='png', dpi=150) # plt.show() return male_cit_u, female_cit_u
def plot_basics(data, data_inst, fig, units): from powerlaw import plot_pdf, Fit, pdf annotate_coord = (-.4, .95) ax1 = fig.add_subplot(n_graphs,n_data,data_inst) plot_pdf(data[data>0], ax=ax1, linear_bins=True, color='r', linewidth=.5) x, y = pdf(data, linear_bins=True) ind = y>0 y = y[ind] x = x[:-1] x = x[ind] ax1.scatter(x, y, color='r', s=.5) plot_pdf(data[data>0], ax=ax1, color='b', linewidth=2) from pylab import setp setp( ax1.get_xticklabels(), visible=False) #ax1.set_xticks(ax1.get_xticks()[::2]) ax1.set_yticks(ax1.get_yticks()[::2]) locs,labels = yticks() #yticks(locs, map(lambda x: "%.0f" % x, log10(locs))) if data_inst==1: ax1.annotate("A", annotate_coord, xycoords="axes fraction", fontsize=14) from mpl_toolkits.axes_grid.inset_locator import inset_axes ax1in = inset_axes(ax1, width = "30%", height = "30%", loc=3) ax1in.hist(data, normed=True, color='b') ax1in.set_xticks([]) ax1in.set_yticks([]) ax2 = fig.add_subplot(n_graphs,n_data,n_data+data_inst, sharex=ax1) plot_pdf(data, ax=ax2, color='b', linewidth=2) fit = Fit(data, xmin=1, discrete=True) fit.power_law.plot_pdf(ax=ax2, linestyle=':', color='g') p = fit.power_law.pdf() #ax2.set_ylim(min(p), max(p)) ax2.set_xlim(ax1.get_xlim()) fit = Fit(data, discrete=True) fit.power_law.plot_pdf(ax=ax2, linestyle='--', color='g') from pylab import setp setp( ax2.get_xticklabels(), visible=False) #ax2.set_xticks(ax2.get_xticks()[::2]) if ax2.get_ylim()[1] >1: ax2.set_ylim(ax2.get_ylim()[0], 1) ax2.set_yticks(ax2.get_yticks()[::2]) #locs,labels = yticks() #yticks(locs, map(lambda x: "%.0f" % x, log10(locs))) if data_inst==1: ax2.annotate("B", annotate_coord, xycoords="axes fraction", fontsize=14) ax2.set_ylabel(r"$p(X)$")# (10^n)") ax3 = fig.add_subplot(n_graphs,n_data,n_data*2+data_inst)#, sharex=ax1)#, sharey=ax2) fit.power_law.plot_pdf(ax=ax3, linestyle='--', color='g') fit.exponential.plot_pdf(ax=ax3, linestyle='--', color='r') fit.plot_pdf(ax=ax3, color='b', linewidth=2) #p = fit.power_law.pdf() ax3.set_ylim(ax2.get_ylim()) ax3.set_yticks(ax3.get_yticks()[::2]) ax3.set_xlim(ax1.get_xlim()) #locs,labels = yticks() #yticks(locs, map(lambda x: "%.0f" % x, log10(locs))) if data_inst==1: ax3.annotate("C", annotate_coord, xycoords="axes fraction", fontsize=14) #if ax2.get_xlim()!=ax3.get_xlim(): # zoom_effect01(ax2, ax3, ax3.get_xlim()[0], ax3.get_xlim()[1]) ax3.set_xlabel(units)
if __name__ == '__main__': estimate_dir = "/home/valentin/Desktop/Thesis II/Zipf Error/Estimates" lang = "NO" estimate_file = lang + "_ToktokTokenizer_ArticleSplitter" reader = TableReader(estimate_dir + "/" + estimate_file, [str, int, int]) data = reader.read_data() counts = data["count"] pos_counts = [c for c in counts if c > 0] print(counts[:10]) print(min(counts)) powerlaw.plot_cdf(counts) # plt.show() powerlaw.plot_pdf(pos_counts) # plt.show() fitted_dist = powerlaw.Fit(pos_counts, discrete=True) for key, val in fitted_dist.__dict__.items(): print( key, ":\t", val if hasattr(val, "__len__") and len(val) < 100 else "val too long") print() print("\n\n", fitted_dist.find_xmin())
print "sum :%g, mean :%g" % (np.sum(data), np.mean(data)) return data #--------------------------------------------------------------# fig, ax = pl.subplots(1, figsize=(8, 10)) N = 5000 n = -2.6 xmin, xmax = 2.0, 10000.0 seed = 1234785 data = generate_power_law_dist(N, n, xmin, xmax, seed) counter = collections.Counter(data) pk = counter.values() k = counter.keys() pk = np.asarray(pk) / float(np.sum(pk)) fit = Fit(data) fit.power_law.plot_pdf(ax=ax, linestyle=':', color='g') # fit = Fit(data) print fit.power_law.alpha print fit.power_law.sigma ax.loglog(k, pk, '.') plot_pdf(data, color='r') pl.show()
def plot_basics(data, data_inst, fig, units): from powerlaw import plot_pdf, Fit, pdf annotate_coord = (-.1, .95) # annotate_coord = (1.1, .95) ax1 = fig.add_subplot(n_graphs, n_data, data_inst, visible=False) x, y = pdf(data, linear_bins=True) ind = y > 0 y = y[ind] x = x[:-1] x = x[ind] ax1.scatter(x, y, color='r', s=.5) plot_pdf(data[data > 0], ax=ax1, color='b', linewidth=2) from pylab import setp setp(ax1.get_xticklabels(), visible=False) # ABC # if data_inst == 1: # ax1.annotate("A", annotate_coord, xycoords="axes fraction", fontproperties=panel_label_font) # ax1.set_ylabel(u"p(X)") # from mpl_toolkits.axes_grid.inset_locator import inset_axes # ax1in = inset_axes(ax1, width="30%", height="30%", loc=3) # ax1in.hist(data, density=True, color='b') # ax1in.set_xticks([]) # ax1in.set_yticks([]) # ax1.set_xlabel(units) ax2 = fig.add_subplot(n_graphs, n_data, n_data + data_inst, sharex=ax1, visible=False) plot_pdf(data, ax=ax2, color='b', linewidth=2, label="pdf of data") fit = Fit(data, xmin=1, discrete=True) fit.power_law.plot_pdf(ax=ax2, linestyle=':', color='g', label="power law fit") p = fit.power_law.pdf() ax2.set_xlim(ax1.get_xlim()) fit = Fit(data, discrete=True) fit.power_law.plot_pdf(ax=ax2, linestyle='--', color='g', label="power law fit--opt xmin") from pylab import setp setp(ax2.get_xticklabels(), visible=True) # if data_inst == 1: ax2.annotate("B", annotate_coord, xycoords="axes fraction", fontproperties=panel_label_font) ax2.set_ylabel(u"p(X)") # (10^n)") handles, labels = ax2.get_legend_handles_labels() ax2.legend(handles, labels, loc=3) ax2.set_xlabel(units) ax3 = fig.add_subplot(n_graphs, n_data, n_data * 2 + data_inst) # , sharex=ax1)#, sharey=ax2) fit.power_law.plot_pdf(ax=ax3, linestyle='--', color='g', label="power law fit\n(opt-min)") fit.exponential.plot_pdf(ax=ax3, linestyle='--', color='r', label="exponential fit\n(opt-min)") fit.plot_pdf(ax=ax3, color='b', linewidth=2, label="PDF\n(opt-min)") ax3.set_ylim(ax2.get_ylim()) ax3.set_xlim(ax1.get_xlim()) handles, labels = ax3.get_legend_handles_labels() ax3.legend(handles, labels, loc=3, fontsize=12) ax3.set_xlabel(units, fontsize=15) # if data_inst == 1: ax3.annotate("C", annotate_coord, xycoords="axes fraction", fontproperties=panel_label_font) ax3.set_ylabel(u"p(X)", fontsize=15)
def doPlot(ff, evotype): # Make files from directory listing numgens = '1000000000' directry = '../../data' contexttag = 'nc3_I16-4-1_T20-5-10' # p = [0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5] p = [0.03, 0.05, 0.1, 0.15, 0.2, 0.3] # Make file names files = [] lbls = [] for pp in p: files.append('{4}/evolve_{3}_{0}_{1}_gens_{2}.csv'.format( ff, numgens, pp, contexttag, directry)) lbls.append('p={0}'.format(pp)) graphtag = 'powerlaw. Violet: data PDF, Red: truncated PDF, Blue: powerlaw fit, Green: lognormal, Pink: exponential' # num files print('files has length {0}'.format(len(files))) nf = len(files) # Font size for plotting fs = 16 # Set a default fontsize for matplotlib: fnt = {'family': 'DejaVu Sans', 'weight': 'regular', 'size': fs} matplotlib.rc('font', **fnt) scale = 1. f1 = plt.figure(figsize=(24, 8)) # Figure object M = np.zeros([nf, 2]) nbins = 100 # Some markers mkr = ['.', 'o', 'v', 's', 's', 'v', 'o', '^', '^', 'h'] # And marker sizes ms = [6, 6, 7, 6, 7, 7, 8, 7, 8, 6] gcount = 0 a1 = [] # list of axes for y, fil in enumerate(files): print('----------------\nProcessing file: {0}'.format(fil)) D = readDataset(fil) if D.size == 0: continue print('D min: {0}, D max: {1}'.format(np.min(D), np.max(D))) # Powerlaw fitting fit = powerlaw.Fit(D[:, 0]) #, discrete=True, estimate_discrete=True) print('powerlaw.alpha: {0}'.format(fit.power_law.alpha)) print('powerlaw.sigma: {0}'.format(fit.power_law.sigma)) print('powerlaw.xmin: {0}'.format(fit.power_law.xmin)) # Bootstrap test to see if powerlaw is apparently sensible #largerCount = 0 #dataD = fit.power_law.D #numSimThreads = 6 #ns = np.zeros(numSimThreads) #simsInThread = 5 #numSims = numSimThreads * simsInThread #print ('Simulating power law {0} times...'.format(numSims)) # Can multi-thread this, writing into array size numSims and then sum that array at the end. #def doSimFit (counts): #for it in range (0, simsInThread): # simdat = fit.power_law.generate_random(np.size(D)) # fit2 = powerlaw.Fit (simdat) # print ('Simulated KS statistic: {0} vs data-based KS: {1} (fit2.xmin: {2})'.format(fit2.power_law.D, dataD, fit2.power_law.xmin)) # if fit2.power_law.D > dataD: # counts = counts + 1 #pool = ThreadPool(numSimThreads) #results = pool.map(doSimFit, ns) #pool.close() #pool.join() #print ('ns: {0}'.format(ns)) #powerp = float(largerCount) / float(numSims) #print ('Simulated power law {0} times. p={1}'.format(numSims, powerp)) # Comparative tests if fit.power_law.sigma < 0.05: # then assume powerlaw is an ok fit bestfitter = 1 else: bestfitter = 0 # 0 unknown, 1 powerlaw better than exp, 2 lognormal, 3 exponential, 4 powerlaw better than lognormal if bestfitter == 1: Rex, pex = fit.distribution_compare('power_law', 'exponential') print( 'Comparison with exponential: R={0}, p={1} (+ve: powerlaw more likely, -ve: exponential more likely)' .format(Rex, pex)) if pex < 0.05: if Rex < 0: bestfitter = 3 # exp else: bestfitter = 1 # powerlaw Rln, pln = fit.distribution_compare('power_law', 'lognormal') print( 'Comparison with lognormal: R={0}, p={1} (+ve: powerlaw more likely, -ve: lognormal more likely)' .format(Rln, pln)) if pln < 0.05: if Rln < 0: bestfitter = 2 # ln else: print('Power to the law!') bestfitter = 4 # powerlaw else: Rlnex, plnex = fit.distribution_compare('lognormal', 'exponential') print( 'Comparison lognormal to exponential: R={0}, p={1} (+ve: lognormal more likely, -ve: exponential more likely)' .format(Rlnex, plnex)) if plnex < 0.05: if Rlnex < 0: bestfitter = 3 # exp else: bestfitter = 2 # ln # Create subplot print('gcount+1 is {0}'.format(gcount + 1)) ax = f1.add_subplot(2, len(p) / 2, gcount + 1) a1.append(ax) ## bestfitter = 6 # hack # Plot the data powerlaw.plot_pdf(D[:, 0], color=col.darkviolet, ax=a1[gcount]) # or fit.plot_pdf(color=col.red2, ax=a1[gcount], linewidth=3) if bestfitter == 1: # Plot the powerlaw fit fit.power_law.plot_pdf(color=col.blue3, linestyle='-.', ax=a1[gcount]) elif bestfitter == 2: # Plot the lognormal fit fit.lognormal.plot_pdf(color=col.mediumspringgreen, linestyle='-.', ax=a1[gcount]) elif bestfitter == 3: # Plot exponential fit.truncated_power_law.plot_pdf(color=col.deeppink2, linestyle='-.', ax=a1[gcount]) elif bestfitter == 4: fit.power_law.plot_pdf(color=col.blue3, linestyle='-.', linewidth=4, ax=a1[gcount]) else: # Unknown which fit is best. #print ('No good fit...') fit.power_law.plot_pdf(color=col.blue3, linestyle='-.', linewidth=1, ax=a1[gcount]) fit.lognormal.plot_pdf(color=col.mediumspringgreen, linestyle='-.', linewidth=1, ax=a1[gcount]) fit.truncated_power_law.plot_pdf(color=col.deeppink2, linestyle='-.', linewidth=1, ax=a1[gcount]) # powerlaw.plot_cdf(D[:,0], color='b') a1[gcount].set_ylabel('log (evolutions)', fontsize=fs) a1[gcount].set_xlabel('log (generations)', fontsize=fs) a1[gcount].set_title('{0}, al={1:.2f}, xmin={3:.2f} D={2:.2f}'.format( lbls[gcount], fit.power_law.alpha, fit.power_law.D, fit.power_law.xmin), fontsize=9) gcount = gcount + 1 # rect=[left bottom right top] f1.tight_layout(rect=[0.01, 0.01, 0.99, 0.9]) f1.text(0.2, 0.9, graphtag, fontsize=20) plt.savefig('figures/evospeed_pl_' + evotype + '_' + ff + contexttag + '.png') return M
def comp_cluster_sizes(iterations=2000): """ Compares the cluster sizes of different sizes of grid using uniform random generator for the fitness :param iterations: number of steps to run for the Bak-Sneppen model """ print( colored("Warning this function might take long (2000 itr ~ 40 min)", 'red')) small = Lattice(size=(20, 20), torus_mode=True, rand_dist=('uniform', ), free_percent=0, iterations=iterations, age_fraction=1 / 10) medium = Lattice(size=(50, 50), torus_mode=True, rand_dist=('uniform', ), free_percent=0, iterations=iterations, age_fraction=1 / 10) large = Lattice(size=(70, 70), torus_mode=True, rand_dist=('uniform', ), free_percent=0, iterations=iterations, age_fraction=1 / 10) small.run(["mutation", "update_age", "get_cluster"]) medium.run(["mutation", "update_age", "get_cluster"]) large.run(["mutation", "update_age", "get_cluster"]) small_hist = np.concatenate( [small.cluster_size[x] for x in small.cluster_size]) medium_hist = np.concatenate( [medium.cluster_size[x] for x in medium.cluster_size]) large_hist = np.concatenate( [large.cluster_size[x] for x in large.cluster_size]) # get the power law small_results = powerlaw.Fit(small_hist, discrete=True, verbose=False) medium_results = powerlaw.Fit(medium_hist, dicsrete=True, verbose=False) large_resutls = powerlaw.Fit(large_hist, discrete=True, verbose=False) r_small, p_small = small_results.distribution_compare( 'power_law', 'exponential', normalized_ratio=True) r_medium, p_medium = medium_results.distribution_compare( 'power_law', 'exponential', normalized_ratio=True) r_large, p_large = large_resutls.distribution_compare( 'power_law', 'exponential', normalized_ratio=True) # plot the power law plt.figure() powerlaw.plot_pdf(small_hist, label='20X20 Grid') powerlaw.plot_pdf(medium_hist, label='50X50 Grid') powerlaw.plot_pdf(large_hist, label='70X70 Grid') plt.title("Compare cluster size for different grid sizes") plt.xlabel("Cluster size ") plt.ylabel("Probability") plt.legend() plt.grid() plt.tight_layout() plt.savefig(path.join( dir_path, 'figures/cluster-sizes_rep={}.png'.format(iterations)), dpi=300) plt.show() print_statement(small_results.power_law.alpha, r_small, p_small, "the 20X20 grid's") print_statement(medium_results.power_law.alpha, r_medium, p_medium, "the 50X50 grid's") print_statement(large_resutls.power_law.alpha, r_large, p_large, "the 70X70 grid's")
def comp_diff_neighbours(size=(20, 20), iteration=2000, repetition=10): """ Plots the avalanche distribution in a log-log plot for a model using Moore/van Neumann Neighbourhood :param : number of iterations, number of repetition and standard deviation for gaussian distribution """ # Get a comparison between the different random distribution iterations = iteration repetition = repetition mutation_dist_vonNeumann_list = [] mutation_dist_moore_list = [] avalanche_moore_list = [] avalanche_vonNeumann_list = [] for i in range(repetition): moore = Lattice( size=size, torus_mode=True, neighbourhood='Moore', rand_dist=('uniform', ), free_percent=0, iterations=iterations, ) vonNeumann = Lattice( size=size, torus_mode=True, neighbourhood='von Neumann', rand_dist=('uniform', ), free_percent=0, iterations=iterations, ) moore = Lattice( size=size, torus_mode=True, neighbourhood='Moore', rand_dist=('uniform', ), free_percent=0, iterations=iterations, ) vonNeumann = Lattice( size=(50, 50), torus_mode=True, neighbourhood='von Neumann', rand_dist=('uniform', ), free_percent=0, iterations=iterations, ) moore.run(["mutation", "avalanche_time", "get_dist_btw_mutation"]) vonNeumann.run(["mutation", "avalanche_time", "get_dist_btw_mutation"]) avalanche_moore_list = avalanche_moore_list + moore.avalanche_time_list[ 'avalanche_time'] avalanche_vonNeumann_list = avalanche_vonNeumann_list + vonNeumann.avalanche_time_list[ 'avalanche_time'] mutation_dist_moore_list = mutation_dist_moore_list + moore.distance_btw_mutation_list mutation_dist_vonNeumann_list = mutation_dist_vonNeumann_list + vonNeumann.distance_btw_mutation_list result_moore = powerlaw.Fit(avalanche_moore_list, discrete=True, verbose=False) R_moore, p_moore = result_moore.distribution_compare('power_law', 'exponential', normalized_ratio=True) result_vonNeumann = powerlaw.Fit(avalanche_vonNeumann_list, discrete=True, verbose=False) R_vonNeumann, p_vonNeumann = result_vonNeumann.distribution_compare( 'power_law', 'exponential', normalized_ratio=True) # plot for comparision plot_setting() powerlaw.plot_pdf(avalanche_moore_list, color='b', label='Moore') powerlaw.plot_pdf(avalanche_vonNeumann_list, color='r', label='von Neumann') plt.legend() plt.title("Avalanche Time") plt.ylabel("Probability ") plt.xlabel("Avalanche Time") plt.yscale('log') plt.xscale('log') plt.grid() plt.tight_layout() plt.show() # new figure result_moore = powerlaw.Fit(mutation_dist_moore_list, discrete=True, verbose=False) R_moore, p_moore = result_moore.distribution_compare('power_law', 'exponential', normalized_ratio=True) result_vonNeumann = powerlaw.Fit(mutation_dist_vonNeumann_list, discrete=True, verbose=False) R_vonNeumann, p_vonNeumann = result_vonNeumann.distribution_compare( 'power_law', 'exponential', normalized_ratio=True) print_statement(result_moore.power_law.alpha, R_moore, p_moore, "More Neighbour") print_statement(result_vonNeumann.power_law.alpha, R_vonNeumann, p_vonNeumann, "von Neumann Neighbourhood") n_moore, bins_moore = np.histogram(mutation_dist_moore_list, density=True) n_vonNeumann, bins_vonNeumann = np.histogram(mutation_dist_vonNeumann_list, density=True) # plot for comparision plot_setting() plt.plot(bins_moore[:-1], n_moore, label='Moore Neighbourhood') plt.plot(bins_vonNeumann[:-1], n_vonNeumann, label='von Neumann Neighbourhood') plt.legend() plt.title("Distribution of the distances between consecutive mutations") plt.ylabel("Probability") plt.xlabel("Distances between consecutive mutations") plt.yscale('log') plt.xscale('log') plt.grid() plt.tight_layout() plt.savefig(path.join( dir_path, 'figures/diff_neighbours_s={}_itr={}_rep={}.png'.format( size, iteration, repetition)), dpi=300) plt.show() print_statement(result_moore.power_law.alpha, R_moore, p_moore, "More Neighbour") print_statement(result_vonNeumann.power_law.alpha, R_vonNeumann, p_vonNeumann, 'von Neumann')
plt.ylabel("Count") plt.xlabel("Degree") plt.show() #%% import powerlaw data = [x[1] for x in list(nx.degree(G))] fit = powerlaw.Fit(data) print(f"Powerlaw coeffecient from fit: {fit.power_law.alpha}") # R is the Loglikelihood ratio of the two distributions' fit to the data. If greater than 0, # the first distribution is preferred. If less than 0, the second distribution is preferred. # P is the significance of R. R, p = fit.distribution_compare('power_law', 'lognormal') print(f"Logliklihood ratio: {R} with a p-value of {p}.") powerlaw.plot_pdf(data, linear_bins=False) plt.title( f"AIS Data Power Law Plot with Coefficent of {round(results.power_law.alpha,3)}" ) plt.show() fig = fit.plot_ccdf(label='Emprical Data') fit.power_law.plot_ccdf(ax=fig, color='r', linestyle='--', label='Power law fit') fit.lognormal.plot_ccdf(ax=fig, color='g', linestyle='--', label='Lognormal fit') handles, labels = fig.get_legend_handles_labels()
range_dict = {'mu': [0.0, None]} fit.lognormal.parameter_range(range_dict) R1, p1 = fit.distribution_compare('power_law', 'lognormal') if R1 > 0: print 'Power law more likely for data. R = ', R1, ' and p = ', p1 print "Power law's alpha: %f"%fit.power_law.alpha else: print 'Log normal more likely for data. R = ', R1, 'and p = ', p1 print 'Mu = ', fit.lognormal.mu print 'Sigma = ', fit.lognormal.sigma step_data_xmin = [i for i in step_data if i > fit.power_law.xmin] figure() powerlaw.plot_pdf(step_data_xmin, color = 'b', linewidth = 2) # PDF of data fit.power_law.plot_pdf(color = 'b', linestyle = '--') # PL theoretical fit fit.exponential.plot_pdf(color = 'r', linestyle = '--') # EXP theoretical fit fit.lognormal.plot_pdf(color = 'g', linestyle = '--') # LN theoretical fit xlabel('Step Length, x [cm]') ylabel('P(x)') plt.legend(('Data', 'Power Law Fit', 'Exponential Fit', 'Lognormal Fit')) figure() powerlaw.plot_ccdf(step_data_xmin, color = 'b', linewidth = 2) # PDF of data fit.power_law.plot_ccdf(color = 'b', linestyle = '--') # PL theoretical fit fit.exponential.plot_ccdf(color = 'r', linestyle = '--') # EXP theoretical fit fit.lognormal.plot_ccdf(color = 'g', linestyle = '--') # LN theoretical fit xlabel('Step Length, x [cm]') ylabel('P(x)') plt.legend(('Data', 'Power Law Fit', 'Exponential Fit', 'Lognormal Fit'))
def comp_moving_vs_stationary(size=(20, 20), iteration=2000, repetition=10): """ Compares the cluster sizes and avalanche time between a stationary model and a model where the nodes can move to free space :param : number of iterations, number of repetition and standard deviation for gaussian distribution """ # Get a comparison between the different random distribution iterations = iteration repetition = repetition avalanche_move_list = [] avalanche_stationary_list = [] for i in range(repetition): stationary = Lattice( size=size, torus_mode=True, neighbourhood='Moore', rand_dist=('uniform', ), free_percent=0, iterations=iterations, ) move = Lattice( size=(50, 50), torus_mode=True, neighbourhood='Moore', rand_dist=('uniform', ), free_percent=0.3, iterations=iterations, ) stationary.run(["mutation", "avalanche_time"]) move.run(["moving", "avalanche_time"]) avalanche_move_list = avalanche_move_list + move.avalanche_time_list[ 'avalanche_time'] avalanche_stationary_list = avalanche_stationary_list + stationary.avalanche_time_list[ 'avalanche_time'] result_move = powerlaw.Fit(avalanche_move_list, discrete=True, verbose=False) R_move, p_move = result_move.distribution_compare('power_law', 'exponential', normalized_ratio=True) result_stationary = powerlaw.Fit(avalanche_stationary_list, discrete=True, verbose=False) R_stationary, p_stationary = result_stationary.distribution_compare( 'power_law', 'exponential', normalized_ratio=True) # plot for comparision plot_setting() powerlaw.plot_pdf(avalanche_move_list, color='b', label='Migration') powerlaw.plot_pdf(avalanche_stationary_list, color='r', label='No Migration') plt.legend() plt.title("Avalanche sizes") plt.ylabel("Probability ") plt.xlabel("Avalanche sizes ") plt.grid() plt.tight_layout() plt.savefig(path.join( dir_path, 'figures/moving-vs-stationary_size={}_itr{}_rep={}.png'.format( size, iteration, repetition)), dpi=300) plt.show() print_statement(result_move.power_law.alpha, R_move, p_move, "migration") print_statement(result_stationary.power_law.alpha, R_stationary, p_stationary, "no migration")
plt.savefig('logscale_jan-jun_stars_given.png') plt.close() multiplier = 15 binsize = int(np.max(russian_all)/multiplier) histplot(russian_all, binsize, 'Users', 'Star Count', 'black', '',) binsize = int(np.max(chinese_all)/multiplier) histplot(chinese_all, binsize, 'Users', 'Star Count', 'red', '') binsize = int(np.max(american_all)/multiplier) histplot(american_6, binsize, 'Users', 'Star Count', 'blue', '') binsize = int(np.max(indian_all)/multiplier) histplot(indian_6, binsize, 'Users', 'Star Count', 'green', 'All Stars Given by Count, LogLog Scale Plot') plt.savefig('logscale_all_stars_given.png') plt.close() powerlaw.plot_pdf(russian_6, color='black') powerlaw.plot_pdf(chinese_6, color='red') powerlaw.plot_pdf(american_6, color='blue') powerlaw.plot_pdf(indian_6, color='green') plt.ylabel('Users') plt.xlabel('Star Count') plt.title('Jan-Jun 2019 Stars Given by Count, PDF') plt.savefig('pdf_jan-jun_stars_given.png') plt.close() powerlaw.plot_ccdf(russian_6, color='black') powerlaw.plot_ccdf(chinese_6, color='red') powerlaw.plot_ccdf(american_6, color='blue') powerlaw.plot_ccdf(indian_6, color='green') plt.ylabel('Users') plt.xlabel('Star Count')
# # Basic Methods # <markdowncell> # ## Visualization # <markdowncell> # ### PDF Linear vs Logarithmic Bins # <codecell> data = words #### figPDF = powerlaw.plot_pdf(data, color='b') powerlaw.plot_pdf(data, linear_bins=True, color='r', ax=figPDF) #### figPDF.set_ylabel("p(X)") figPDF.set_xlabel(r"Word Frequency") figname = 'FigPDF' savefig(figname+'.eps', bbox_inches='tight') #savefig(figname+'.tiff', bbox_inches='tight', dpi=300) # <markdowncell> # ### Figure 2 # <codecell> data = words
#%% #parameter_range = {'alpha': [0, 0.9]} energies = sandpile.get_avalanche_energies() times = sandpile.get_avalanche_times() linear_sizes = sandpile.get_avalanche_linear_sizes() energyfit = powerlaw.Fit(energies, xmin=1, xmax=sandpile.N, discrete=True) timefit = powerlaw.Fit(times, xmin=1, xmax=100, discrete=True) sizefit = powerlaw.Fit(linear_sizes, xmin=1, discrete=True) # Create plots for avalanche sizes and durations fig, ax = plt.subplots(1, 3, figsize=(18, 7)) # Plot distribution of avalanche sizes powerlaw.plot_pdf(energies, ax=ax[0], color='black', label='Empirical pdf') energyfit.power_law.plot_pdf(ax=ax[0], color='blue', linestyle='--', \ label=r'Power law, $\alpha = $' + f'{energyfit.power_law.alpha:.2f}') energyfit.truncated_power_law.plot_pdf(ax=ax[0], color='red', linestyle='--', \ label=r'Truncated power law, $\alpha = $' + \ f'{energyfit.truncated_power_law.alpha:.2f}' + \ r', $\lambda = $' + f'{energyfit.truncated_power_law.parameter2:.2e}') ax[0].set_xlabel('Avalanche energy') ax[0].set_ylabel(r'$p(X)$') ax[0].legend(loc='lower left', fontsize=14) # Plot distribution of avalanche durations powerlaw.plot_pdf(times, ax=ax[1], color='black', label='Empirical pdf') timefit.power_law.plot_pdf(ax=ax[1], color='blue', linestyle='--', \ label=r'Power law, $\alpha = $' + f'{timefit.power_law.alpha:.2f}') timefit.truncated_power_law.plot_pdf(ax=ax[1], color='red', linestyle='--', \
def plot_basics(data, data_inst, fig, units): ### Setup ### from powerlaw import plot_pdf, Fit, pdf import pylab pylab.rcParams['xtick.major.pad'] = '8' pylab.rcParams['ytick.major.pad'] = '8' #pylab.rcParams['font.sans-serif']='Arial' from matplotlib.font_manager import FontProperties panel_label_font = FontProperties().copy() panel_label_font.set_weight("bold") panel_label_font.set_size(30.0) panel_label_font.set_family("sans-serif") n_data = 2 n_graphs = 4 annotate_coord = (-.4, .95) ############# ax1 = fig.add_subplot(n_graphs, n_data, data_inst) x, y = pdf(data, linear_bins=True) ind = y > 0 y = y[ind] x = x[:-1] x = x[ind] ax1.scatter(x, y, color='r', s=.5) plot_pdf(data[data > 0], ax=ax1, color='b', linewidth=2) from pylab import setp setp(ax1.get_xticklabels(), visible=False) if data_inst == 1: ax1.annotate("A", annotate_coord, xycoords="axes fraction", fontproperties=panel_label_font) ax2 = fig.add_subplot(n_graphs, n_data, n_data + data_inst, sharex=ax1) plot_pdf(data, ax=ax2, color='b', linewidth=2) fit = Fit(data, xmin=1, discrete=True) fit.power_law.plot_pdf(ax=ax2, linestyle='--', color='g') _ = fit.power_law.pdf() ax2.set_xlim((1, max(x))) setp(ax2.get_xticklabels(), visible=False) if data_inst == 1: ax2.annotate("B", annotate_coord, xycoords="axes fraction", fontproperties=panel_label_font) ax2.set_ylabel(u"p(X)") # (10^n)") ax3 = fig.add_subplot(n_graphs, n_data, n_data * 2 + data_inst) #, sharex=ax1)#, sharey=ax2) fit.power_law.plot_pdf(ax=ax3, linestyle='--', color='g') fit.exponential.plot_pdf(ax=ax3, linestyle='--', color='r') fit.lognormal.plot_pdf(ax=ax3, linestyle=':', color='r') fit.plot_pdf(ax=ax3, color='b', linewidth=2) ax3.set_ylim(ax2.get_ylim()) ax3.set_xlim(ax1.get_xlim()) if data_inst == 1: ax3.annotate("C", annotate_coord, xycoords="axes fraction", fontproperties=panel_label_font) ax3.set_xlabel(units)
def is_free_variation(i_min=0, i_max=1, i_iter=6, iterations=2000): ''' runs several instances of the lattice with different percentages of empty nodes in the lattice. avalanche time and thresholds are then compared between runs. ____ i_min: lower percentage of the range (within [0,1]) i_max: upper percentage (within [0,1]) i_iter: number of steps between i_min and i_max to be taken (Integer) ''' # ============================================================================= # #list of thresholds # free_thresh = [] # #list of avalanche times # free_avalanche = [] # ============================================================================= # figure & settings for plots plot_setting() plt.figure(1) plt.title( 'Avalanche times for different percentages of empty space in the lattice' ) plt.xlabel('Avalanche Time') plt.ylabel('Instances') plt.figure(2) plt.title( 'Threshold values for different percentages of empty space in the lattice' ) # looping over the different percentages for i in np.linspace(i_min, i_max, i_iter): i = round(i, 1) free_iter = Lattice(size=(20, 20), torus_mode=True, rand_dist=('uniform', ), free_percent=i, iterations=iterations, age_fraction=1 / 10) free_iter.run("all") av_times = free_iter.avalanche_time_list['avalanche_time'] thresholds = free_iter.threshold_list['threshold'] thresh_time = free_iter.threshold_list['time_step'] avalanche_bins = range(min(av_times), max(av_times) + 1) threshold_bins = np.linspace(min(thresholds), max(thresholds), len(thresholds)) # plt.xscale('log') # plt.yscale('log') #sb.distplot(av_times, label= str(i), hist=True) plt.figure(1) powerlaw.plot_pdf(av_times) #plt.hist(av_times, avalanche_bins, label= str(i)) blue = mpatches.Patch(color='b', label='0.0') orange = mpatches.Patch(color='orange', label='0.2') green = mpatches.Patch(color='green', label='0.4') red = mpatches.Patch(color='red', label='0.6') purple = mpatches.Patch(color='purple', label='0.8') plt.legend(handles=[blue, orange, green, red, purple]) plt.figure(2) plt.plot(thresh_time, thresholds, label=str(i)) plt.xlabel('Iteration Number') plt.ylabel('Threshold Fitness Level') plt.legend(loc='upper right') plt.show() plt.grid() plt.tight_layout() plt.savefig(path.join( dir_path, 'figures/free_variation_imin={}_imax={}_iterations={}.png'.format( i_min, i_max, i_iter)), dpi=300) plt.show()
# # Basic Methods # <markdowncell> # ## Visualization # <markdowncell> # ### PDF Linear vs Logarithmic Bins # <codecell> data = words #### figPDF = powerlaw.plot_pdf(data, color='b') powerlaw.plot_pdf(data, linear_bins=True, color='r', ax=figPDF) #### figPDF.set_ylabel("p(X)") figPDF.set_xlabel(r"Word Frequency") figname = 'FigPDF' savefig(figname + '.eps', bbox_inches='tight') #savefig(figname+'.tiff', bbox_inches='tight', dpi=300) # <markdowncell> # ### Figure 2 # <codecell> data = words
from pandas.plotting import autocorrelation_plot from statsmodels.graphics.tsaplots import plot_acf, plot_pacf # Open file in a dataframe file_name = 'commits_django.csv' df = pd.read_csv(file_name) # Extract pure data from 2º column and transform it to a numpy array raw_commits = df['Lines changed'] data = np.array(raw_commits) # fit the data in a powerlaw (standard method) fit = powerlaw.Fit(data, discrete=True) # plot the raw pdf and the others fitted distributions pdf powerlaw.plot_pdf(data[data >= fit.power_law.xmin], label="Data as PDF") fit.power_law.plot_pdf(label="Fitted powerlaw PDF", ls=":") fit.lognormal.plot_pdf(label="fitted log normal pdf", ls="--") fit.truncated_power_law.plot_pdf(label="fitted truncated powerlaw pdf", ls=":") plt.legend(loc=3, fontsize=14) plt.show() # Print data from the fitted process, parameters and estimations print('Summary\n') print('Estimated value of alpha parameter: ', fit.power_law.alpha) print('Estimated first value where powerlaw is exposed: ', fit.power_law.xmin) print('Estimated last value where powerlaw is exposed: ', fit.power_law.xmax) print('Estimated precision of alpha parameter: +/-', fit.power_law.sigma) R, p = fit.distribution_compare('power_law', 'lognormal',
def powerlaw_figures(fit=False, manual=False): data_dirs = sorted(['data/' + pth for pth in next(os.walk("data/"))[1]]) df_all = [] for dpath in data_dirs: print('Loading ', dpath) try: with open(dpath + '/namespace.p', 'rb') as pfile: nsp = pickle.load(pfile) with open(dpath + '/true_lts_equal.p', 'rb') as pfile: true_lts_df = np.array(pickle.load(pfile)) for true_lts in true_lts_df: concat = {**nsp, **true_lts} df_all.append(concat) except FileNotFoundError: print(dpath[-4:], "reports: No namespace or " +\ "synsrv_prb data. Skipping.") all_Npool = np.unique([df['Npool'] for df in df_all]) all_k = np.unique([df['k'] for df in df_all]) for k in all_k[all_k >= 10]: # fig, ax = pl.subplots() # fig.set_size_inches(5.2,3) for df in df_all: if df['pl_alpha'] == 1.5 and df['k'] == k: # if df['Npool']==npool and df['k'] in [4, 5, 7, 8, 10]: # if df['Npool']==npool and df['k'] in [100, 1000]: # if df['Npool']==npool and df['k'] in [1000]: label = "$k = " + str(df['k']) + "$" dt = df['dts'][1] - df['dts'][0] # print(dt) # print(df['synsrv_prb']) lts_dat = np.array(df['df_newins'][:, 2] - df['df_newins'][:, 3]) # lts_dat[lts_dat > (df['Nsteps']-dt)] = df['Nsteps']-dt # print(lts_dat) # lts_dat=np.trim_zeros(lts_dat) # with open(dpath+'/lts.p', 'rb') as pfile: # lts_df=np.array(pickle.load(pfile)) # # discard synapses present at beginning # lts_df = lts_df[lts_df[:,1]>0] # # only take synapses grown in first half of simulation # t_split = nsp['Nsteps']/2 # lts_df = lts_df[lts_df[:,3]<t_split] # lts_dat = lts_df[:,2] - lts_df[:,3] if fit: # prm, prm_cov = optimize.curve_fit(powerlaw_func_s, # df['dts'], df['synsrv_prb'], # p0=[0.5, 0.5]) # xs = np.arange(df['dts'][0], # df['dts'][-1], # 1) # bl, = ax.plot(xs, # powerlaw_func_s(xs,*prm), # linestyle='-', alpha=0.55) # label += ', $\gamma = %.4f$' %(prm[0]) fit = powerlaw.Fit(lts_dat, xmin=df['dts'][1]) label += ', $\gamma = %.4f$, $x_{\mathrm{min}}=%.1f$' % ( fit.power_law.alpha, fit.power_law.xmin) print(df['k'], df['Npool']) figPDF = powerlaw.plot_pdf( lts_dat[lts_dat > fit.power_law.xmin], label=label, alpha=0.2) # def pwl(x, alph, xmin): # return (alph - 1) * xmin**(alph-1)*x**(-1*alph) # figPDF.plot(np.arange(100, 30000, 1), # pwl(np.arange(100, 30000, 1), # fit.alpha, fit.xmin)) fit.power_law.plot_pdf(ax=figPDF, linestyle='--') # powerlaw.plot_pdf(lts_dat, linear_bins=True, # ax=figPDF, label=label) if manual: bins = np.logspace(np.log10(fit.power_law.xmin), np.log10(np.max(lts_dat)), 15) cts, edgs = np.histogram( lts_dat[lts_dat >= fit.power_law.xmin], density=True, bins=bins) centers = (edgs[:-1] + edgs[1:]) / 2 figPDF.plot(centers, cts) # ax_ll = 0.9*df['dts'][1] else: pass # print(df['synsrv_prb']) # ax.plot(df['dts'], df['synsrv_prb'], '.', # 'o', # # markeredgewidth=1, # # markerfacecolor='None', # label=label) # ax_ll = 0.9*df['dts'][1] directory = 'figures/true_lts/' if not os.path.exists(directory): os.makedirs(directory) pl.legend() pl.savefig(directory + 'k%d.png' % k, bbox_inches='tight') pl.clf()
def degree_distribution(G, name, extention): """Plot ranking graph Parameters ---------- G : graph A network graph name : string A graph name. extentin : string A graph file extention. """ degree_sequence = sorted([G.degree(n) for n in G]) data = np.asarray(degree_sequence, dtype=np.float64) # xminを次数の最低値にしてあげないと近似がうまく行かないかも fit = powerlaw.Fit(data, xlim=2.0) # xmin=2.0とか """ べき分布と指数分布のどちらがもっともらしいか検定をかける。 もしR>0ならべき分布の方がもっともらしい。 pはp値のことで、説明変数の係数や定数項が”たまたま”その値である確率を示す. ある説明変数の係数の p 値が 5 %以下であった場合、 「この説明変数は 5 %以下の確率で”たまたま”この係数である」ということ. """ # print fit.distribution_compare('power_law', 'exponential') """ param = fit.power_law.alpha xmin = fit.power_law.xmin print xmin, param theoretical_distribution = powerlaw.Power_Law(xmin=xmin, parameters=[param]) simulated_data = theoretical_distribution.generate_random(10000) print min(simulated_data), xmin """ alpha = fit.power_law.alpha fig = powerlaw.plot_pdf(data, color='b', label='Empirical Data') # powerlaw.plot_pdf(simulated_data, linewidth=3, ax=fig) """ fit.power_law.plot_pdf(data, linestyle='--', color='r', label='Power law fit', linewidth=2) """ # 指定した座標の上にテキストを追加 # fig.text(1, 1, slope, ha='center', va='bottom') slope = "slope = " + str(alpha) handles, labels = fig.get_legend_handles_labels() fig.legend(handles, labels, loc=3) fig.set_ylabel("p(k)") fig.set_xlabel("degree k") plt.title("Degree Distribution [ " + slope + " ]") save_file = save_dir + name + "." + extention plt.savefig(save_file) print("----------------------------------------") print("Finish deistribution analysis.") print "Save ->", save_file print("----------------------------------------\n") plt.close()