def findPowerLawExponent(degree_sequence):
    results = powerlaw.Fit(degree_sequence)
    exponent = results.power_law.alpha
    outputFile = 'PowerLawExponentValue.txt'
    file = open(outputFile, 'w')
    file.write('The power-law exponent for the obtained graph is ' + str(exponent))
    file.close()
Ejemplo n.º 2
0
def powerlaw(data, ax=None, show_fit=True, xmin=1):
    """Plots the probability distribution of data with a power-law fit

	Args:
		data (list): list/numpy array of observations
		ax (None, optional): ax to plot the distribution
		show_fit (bool, optional): whether to show the power-law fit
		xmin (int, optional): smallest value to fit
	"""
    if ax is None:
        ax = plt.gca()

    # Varies fitting method based on package recomendation
    if xmin > 6:
        estimate_discrete = True
    else:
        estimate_discrete = False

    # Plots data
    data_nonzero = data[data > 0]
    pl_obj = plw.Fit(data_nonzero,
                     xmin=xmin,
                     estimate_discrete=estimate_discrete)
    str_label = r'N = {:0.0f}, R = {:0.1f}'.format(
        data_nonzero.size,
        np.sum(data_nonzero) / data_nonzero.size)
    pl_obj.plot_pdf(ax=ax, original_data=True, **{'label': str_label})

    if show_fit:
        str_label_fit = r'$\alpha$ = {:0.3f}'.format(pl_obj.power_law.alpha)
        pl_obj.power_law.plot_pdf(ax=ax,
                                  color='k',
                                  linestyle='--',
                                  **{'label': str_label_fit})
Ejemplo n.º 3
0
def get_alpha(data):
        data = np.array(data)
        
        result = powerlaw.Fit(data)
        xminimum = result.power_law.xmin
        #xmin from power law package
        xminimum = xminimum - 0.5
        summand1 = 0
        datanew = []
        #alpha2 is the estimation from powerlaw package
        alpha2 = result.power_law.alpha
        #xmin the smallest value in data
        xminimum = min(data)
        for i in range(len(data)):
            if data[i] > xminimum:
                datanew.append(data[i])
        data = datanew
        for dt in data:
            summand1 = summand1 + np.log(dt/xminimum)
        #logsum = sum(np.log(data))
        #alpha is the estimation from this function
        alpha = 1 + len(data) * (summand1) ** (-1)
        sigma = (alpha - 1)/(len(data)**(0.5)) + (1/(len(data)))
        print(alpha)
        print(sigma)
        print(xminimum)
        return alpha, sigma, alpha2
Ejemplo n.º 4
0
def do_stuff(data):
    fit = powerlaw.Fit(data)
    fig1 = fit.plot_pdf(color='b', linewidth=2)
    fit.power_law.plot_pdf(color='b', linestyle='--', ax=fig1)
    fit.plot_ccdf(color='r', linewidth=2, ax=fig1)
    fit.power_law.plot_ccdf(color='r', linestyle='--', ax=fig1)
    plt.show()
Ejemplo n.º 5
0
 def __fitting(self, data):
     _, y = data['degs']['in']
     y = np.array(y)
     mask = y >= 3
     fit = powerlaw.Fit(y[mask], verbose=False)
     fig = fit.plot_ccdf(label='CCDF',
                         color='r',
                         linestyle='--',
                         marker='o')
     fit.lognormal.plot_ccdf(ax=fig,
                             color='c',
                             linestyle='--',
                             label='log-normal fit')
     fit.power_law.plot_ccdf(ax=fig,
                             color='g',
                             linestyle='--',
                             label='power-law fit')
     fit.exponential.plot_ccdf(ax=fig,
                               color='b',
                               linestyle='--',
                               label='exponential fit')
     fit.truncated_power_law.plot_ccdf(ax=fig,
                                       color='k',
                                       linestyle='--',
                                       label='truncated power law fit')
     plt.legend()
     plt.title(f"{self.name} fit")
     plt.ylabel(r"$P(k>=x)$")
     plt.xlabel(r'$x$')
     plt.savefig(f'../plots/{self.name}/fitting-{self.name}.png', dpi=300)
     plt.close()
Ejemplo n.º 6
0
def display_fitting(X):
	import powerlaw
	fit = powerlaw.Fit(X, discrete=True)
	logger.info('alpha: %.3f'%fit.power_law.alpha)
	logger.info('xmin: %.3f'%fit.power_law.xmin)
	logger.info('sigma: %.3f'%fit.power_law.sigma)
	logger.info('D: %.3f'%fit.power_law.D)
Ejemplo n.º 7
0
def nodeDegree(graph):
    degree_sequence = sorted([d for n, d in graph.degree()], reverse=True)
    degreeCount = collections.Counter(degree_sequence)
    deg, cnt = zip(*degreeCount.items())

    # # plot node degrees
    # fig, ax = plt.subplots()
    # plt.bar(deg, cnt, width=0.80, color='b')
    # plt.title("Degree Histogram")
    # plt.ylabel("Count")
    # plt.xlabel("Degree")
    # ax.set_xticks([d + 0.4 for d in deg])
    # ax.set_xticklabels(deg)
    # plt.savefig('nodeDegrees')

    np.seterr(divide='ignore', invalid='ignore')
    fitgen = powerlaw.Fit(deg, discrete=True)

    global RgenL
    global PgenL

    RgenL = []
    PgenL = []

    Rgen, pgen = fitgen.distribution_compare('power_law',
                                             'lognormal',
                                             normalized_ratio=True)
    print(Rgen, pgen)
    RgenL.append(Rgen)
    np.log(pgen)
    PgenL.append(pgen)
def powlaw_of_total(pid_citnum):

    values = pid_citnum.values()

    results = powerlaw.Fit(values, xmin=(1, 10))

    return results.power_law.alpha
Ejemplo n.º 9
0
def Barabasi_Albert(start, width, role_start=0, m=4):
    graph = nx.barabasi_albert_graph(width, m)
    graph.add_nodes_from(range(start, start + width))
    nids = sorted(graph)
    mapping = {nid: start + i for i, nid in enumerate(nids)}
    graph = nx.relabel_nodes(graph, mapping)
    roles = [role_start for i in range(width)]
    #get degree dist. & power exponenet
    #print(list(nx.isolates(graph)))
    #clustering coeffi & diameter
    graph.remove_nodes_from(list(nx.isolates(graph)))
    con_graph = graph #cuz starting 4 nodes are not connected, diameter calc error occurs
    cluster_coeff = nx.algorithms.cluster.clustering(con_graph)
    avg_cluster_coeff = nx.algorithms.cluster.average_clustering(con_graph)
    print('average clustering coefficients: ', avg_cluster_coeff)
    diameter = nx.diameter(con_graph)
    print('diameter',diameter)
   
    #plot graph
    degrees = sorted([d for n, d in con_graph.degree()], reverse=True)
    fit = powerlaw.Fit(degrees,xmin=1)
    fig2 = fit.plot_pdf(color='b', linewidth=2)
    fit.power_law.plot_pdf(color='g', linestyle='--', ax=fig2)
    #print('power law exponent: ', power_fit.power_law.alpha)    
    #degHistogram(con_graph)
    #nx.draw(graph, with_labels=True) 
    plt.show()
    
    return graph, roles 
Ejemplo n.º 10
0
def fs_digraph_using_indegree(D,
                              stats,
                              options={
                                  'features': [],
                                  'skip_features': []
                              }):
    """"""

    # compute once
    degree_list = D.get_in_degrees(D.get_vertices())

    # feature: h_index_d
    if 'h_index' in options['features']:
        degree_list[::-1].sort()

        h = 0
        for x in degree_list:
            if x >= h + 1:
                h += 1
            else:
                break

        stats['h_index_d'] = h
        log.debug('done h_index_d')

    # feature: p_law_exponent
    if 'powerlaw' in options['features']:
        fit = powerlaw.Fit(degree_list)

        stats['powerlaw_exponent_in_degree'] = float(fit.power_law.alpha)
        stats['powerlaw_exponent_in_degree_dmin'] = float(fit.power_law.xmin)
        log.debug('done powerlaw_exponent')

    # plot degree distribution
    if 'plots' in options['features'] and (
            not 'skip_features' in options
            or not 'plots' in options['skip_features']):
        degree_counted = collections.Counter(degree_list)
        degree, counted = zip(*degree_counted.items())

        with lock:
            fig, ax = plt.subplots()
            plt.plot(degree, counted)

            plt.title('In-Degree Histogram')
            plt.ylabel('Frequency')
            plt.xlabel('In-Degree')

            ax.set_xticklabels(degree)

            ax.set_xscale('log')
            ax.set_yscale('log')

            plt.tight_layout()
            plt.savefig('/'.join([
                os.path.dirname(stats['path_edgelist']),
                'distribution_in-degree.pdf'
            ]))
            log.debug('done plotting in-degree distribution')
Ejemplo n.º 11
0
def fitPowerLaw(rand, ax, label):

    ax.set_title(label, fontsize=18)

    # histogram
    print 'Fitting lognormal...'
    x_rand, p_rand = getDistribution(rand)
    counts, bins, bars = ax.hist(
        rand,
        normed=True,
        bins=10**np.linspace(np.log10(min(x_rand)), np.log10(max(x_rand)), 15),
        log=True,
        alpha=0.0)  #,   histtype='step', linewidth = 0)
    ax.plot((bins[1:] + bins[:-1]) / 2,
            counts,
            's-',
            color='royalblue',
            alpha=0.5,
            markersize=12,
            linewidth=2)

    # get the lognormal
    param = stats.lognorm.fit(rand)
    pdf_fitted = stats.lognorm.pdf(x_rand,
                                   param[0],
                                   loc=param[1],
                                   scale=param[2])  #
    mu = np.log(param[2])
    sigma = param[0]
    sk_results_norm = stats.kstest(
        np.asarray(pdf_fitted), lambda x: stats.lognorm.cdf(
            x_rand, param[0], loc=param[1], scale=param[
                2]))  # stats.ks_2samp(np.cumsum(p_rand), np.cumsu
    ax.plot(x_rand,
            pdf_fitted,
            'k-',
            linewidth=4,
            label='$\\mu$=' + str(round(mu, 2)) + ' $\\sigma$=' +
            str(round(sigma, 2)) + ', $D$=' +
            str(round(sk_results_norm[0], 2)))

    # fit and plot the powerlaw
    results = powerlaw.Fit(rand, xmin=min(x_rand), fit_method='KS')
    alpha = results.power_law.alpha
    xmin = results.power_law.xmin
    D = results.power_law.KS()

    results.power_law.plot_pdf(color='r',
                               ax=ax,
                               linestyle='-',
                               linewidth=4,
                               label='$\\alpha$= ' + str(round(alpha, 2)) +
                               ', $x_{min}$=' + str(round(xmin, 2)) +
                               '\n$D$=' + str(round(D, 2)))

    ax.set_ylim([min(counts), 1.1])
    ax.set_xlim([min(x_rand), max(bins)])

    return alpha, xmin, D
Ejemplo n.º 12
0
    def _fit(self, xmin, xmax):
        '''
		Estimate paramters by minimizing KS distance
		'''
        if not xmin:
            with io.capture_output() as captured:
                best_fit = pl.Fit(data=self.data_original, discrete=True)
        else:
            with io.capture_output() as captured:
                best_fit = pl.Fit(data=self.data_original,
                                  xmin=xmin,
                                  discrete=True)

        self.xmin = best_fit.xmin
        self.alpha = best_fit.alpha

        return best_fit
Ejemplo n.º 13
0
    def _fit(self, xmin, xmin_range=False):
        '''
		Estimate paramters by minimizing KS distance
		'''
        if not xmin:
            if not xmin_range:
                xmins = np.unique(self.data_original)[:-1]
            else:
                # to save time, we can set an xmin estimate range
                # when bootstraping. the range should be based on the
                # estimate for xmin of the hypothesized model
                xmins = xmin_range

            estimates = []
            for xmin_ in xmins:
                try:
                    fit = pl.Fit(data=self.data_original,
                                 xmin=xmin_,
                                 discrete=True)
                except ZeroDivisionError:
                    fit = np.nan
                estimates.append(fit)
            try:
                estimates.sort(key=lambda dist: dist.truncated_power_law.D)
                best_fit = estimates[0]
            except ZeroDivisionError:
                Ds = []
                for est in estimates:
                    try:
                        D = est.truncated_power_law.D
                    except ZeroDivisionError:
                        D = np.nan
                    Ds.append([est, D])
                Ds.sort(key=lambda l: l[-1])
                self.Ds = Ds
                best_fit = Ds[0][0]

            self.xmin = best_fit.xmin
        else:
            best_fit = pl.Fit(data=self.data_original,
                              xmin=xmin,
                              discrete=True)
        self.Lambda = best_fit.truncated_power_law.Lambda
        self.alpha = best_fit.truncated_power_law.alpha

        return best_fit
Ejemplo n.º 14
0
def plot_ccdf():
    df1 = pd.read_csv('../../data/channel_chat_lines_count.csv', header=None)
    df1.columns = ['word', 'chat_count']
    data1 = df1.chat_count.values.tolist()
    fit1 = powerlaw.Fit(data1, xmin=1.0, discrete=True)

    df2 = pd.read_csv('../../data/user_chat_counts.csv', header=None)
    df2.columns = ['user', 'chat_count']
    data2 = df2.chat_count.values.tolist()
    fit2 = powerlaw.Fit(data2, xmin=1.0, discrete=True)

    df3 = pd.read_csv('../../data/users_per_channel_counts.csv', header=None)
    df3.columns = ['user_count', 'channel_count']
    data3 = df3.channel_count.values.tolist()
    fit3 = powerlaw.Fit(data3, xmin=1.0, discrete=True)

    fig, (ax1, ax2) = plt.subplots(figsize=(4, 3),
                                   nrows=1,
                                   ncols=2,
                                   sharey=True)
    #    ax1.set_title('Messages')

    fit1.plot_ccdf(color='black', linewidth=2, label='Channels', ax=ax1)
    fit2.plot_ccdf(color='black',
                   linewidth=2,
                   linestyle='--',
                   ax=ax1,
                   label='Users')

    ax1.set_xlabel('No. messages $n$')
    ax1.set_ylabel(r'$p(N \geq n)$')
    # ax1.legend(loc='best', frameon=False, fontsize='x-small')
    handles, labels = ax1.get_legend_handles_labels()

    fit3.plot_ccdf(color='black', linewidth=2, label='Channels', ax=ax2)

    ax2.set_xlabel('No. users $n$')
    ax2.legend(handles, labels, loc='best', frameon=False, fontsize='x-small')
    #    ax2.set_ylabel(r'$p(U \geq u)$')
    #    ax2.set_title('Users')

    for ax in [ax1, ax2]:
        ax.xaxis.set_major_locator(matplotlib.ticker.LogLocator(numticks=4))
    plt.tight_layout()

    fig.savefig('power_law.eps', format='eps')
def calculate_comparison_ratio(data):
    fit = powerlaw.Fit(data, xmin=1, discrete=True)
    R_exp, p_exp = fit.distribution_compare('power_law',
                                            'exponential',
                                            normalized_ratio=True)
    R = R_exp if p_exp < 0.1 else 0

    return R
Ejemplo n.º 16
0
 def fitPowerLaw(self):
     #get degree distribution
     rowSums = np.asarray(self.adjMat).sum(0)
     colSums = np.asarray(self.adjMat).sum(1)
     total = rowSums + colSums
     results = powerlaw.Fit(total)
     print("LOL")
     return (results.power_law.alpha, results.power_law.xmin)
Ejemplo n.º 17
0
def estimate_alpha_simple_random_walk(size):
    reg_sizes = np.divide(get_random_walk_regeneration_block_sizes(size), 2)
    fit_estimating_discrete = pw.Fit(data=reg_sizes,
                                     discrete=True,
                                     estimate_discrete=True)
    return fit_estimating_discrete.power_law.alpha, fit_estimating_discrete.power_law.xmin, sum(
        np.greater_equal(
            reg_sizes, fit_estimating_discrete.power_law.xmin)), len(reg_sizes)
Ejemplo n.º 18
0
 def soiTest(result_path):
     elems = []
     with open(result_path, "r") as f:
         for line in f:
             soi_key = int(line.strip().split()[0])
             soi_value = int(line.strip().split()[1])
             elems += [soi_key for _ in range(soi_value)]
     return powerlaw.Fit(elems, discrete=True)
Ejemplo n.º 19
0
 def setUpClass(cls):
     for k in references.keys():
         data = references[k]['data']
         fit = powerlaw.Fit(data, discrete=references[k]['discrete'],
                            estimate_discrete=False)
         results[k]['alpha'] = fit.alpha
         results[k]['xmin'] = fit.xmin
         results[k]['fit'] = fit
Ejemplo n.º 20
0
def update(selected=None):
    t1, t2 = ticker1.value, ticker2.value
    data = get_data(t1, t2)
    update_stats(data, data[1], data[2])
    source.data = source.from_df(
        data[0][~data[0]["word"].isin(ss[0:int(stopwords_1.value)])])
    #source.data["rank_y_new"] = source.data["rank_y_new"].rank(ascending=False, method="first")
    #source.data["rank_x_new"] = source.data["rank_x_new"].rank(ascending=False, method="first")

    #selection_1 = np.array(load_ticker(t1)[~load_ticker(t1)["word"].isin(ss[0:int(stopwords_1.value)])]["freq"].astype(float))
    selection_1 = np.array(
        data[0][~data[0]["word"].isin(ss[0:int(stopwords_1.value)])]
        ["freq_x"].astype(float))
    #selection_2 = np.array(load_ticker(t2)[~load_ticker(t2)["word"].isin(ss[0:int(stopwords_1.value)])]["freq"].astype(float))
    selection_2 = np.array(
        data[0][~data[0]["word"].isin(ss[0:int(stopwords_1.value)])]
        ["freq_y"].astype(float))
    left_lin.title.text = '%s, TTR = %s' % (
        t1, round(len(selection_1) / sum(selection_1), 2)
    ) + ', Gini = %s' % round(gini(selection_1), 2) + ', ⍺ = %s' % str(
        round(powerlaw.Fit(selection_1, discrete=True).alpha,
              2)) + ', 𝑯 = %s' % str(round(ent(pd.Series(selection_1)), 2))
    left_log.title.text = '%s, TTR = %s' % (
        t1, round(len(selection_1) / sum(selection_1), 2)
    ) + ', Gini = %s' % round(gini(selection_1), 2) + ', ⍺ = %s' % str(
        round(powerlaw.Fit(selection_1, discrete=True).alpha,
              2)) + ', 𝑯 = %s' % str(round(ent(pd.Series(selection_1)), 2))
    right_lin.title.text = '%s, TTR = %s' % (
        t2, round(len(selection_2) / sum(selection_2), 2)
    ) + ', Gini = %s' % round(gini(selection_2), 2) + ', ⍺ = %s' % str(
        round(powerlaw.Fit(selection_2, discrete=True).alpha,
              2)) + ', 𝑯 = %s' % str(round(ent(pd.Series(selection_2)), 2))
    right_log.title.text = '%s, TTR = %s' % (
        t2, round(len(selection_2) / sum(selection_2), 2)
    ) + ', Gini = %s' % round(gini(selection_2), 2) + ', ⍺ = %s' % str(
        round(powerlaw.Fit(selection_2, discrete=True).alpha,
              2)) + ', 𝑯 = %s' % str(round(ent(pd.Series(selection_2)), 2))
    left_lin.title.text_font_size = '7pt'
    left_log.title.text_font_size = '7pt'
    right_lin.title.text_font_size = '7pt'
    right_log.title.text_font_size = '7pt'
    right_lin.x_range = left_lin.x_range
    right_lin.y_range = left_lin.y_range
    right_log.x_range = left_log.x_range
    right_log.y_range = left_log.y_range
Ejemplo n.º 21
0
  def fatness(self):
    with warnings.catch_warnings():
      warnings.simplefilter("ignore")
      self.g_obs = powerlaw.Fit(nx.degree_histogram(self.G), verbose=False).alpha
    self.cc = nx.average_clustering(self.G)
    self.dmoy = nx.average_shortest_path_length(self.G)
    fatness = sum([(1-self.g_obs/self.param[0])**2, (self.dmoy/self.param[1])*2,(self.cc/self.param[2])**2])

    return fatness
Ejemplo n.º 22
0
def naivesample(data):
    """sample for case resampling bootstrap"""
    sample = np.random.choice(data, len(data), replace=True)
    result = powerlaw.Fit(sample, discrete=True, verbose=False)  # , xmin=xmin
    return {
        "D": getattr(result, "power_law").D,
        "xmin": getattr(result, "power_law").xmin,
        "alpha": getattr(result, "power_law").alpha,
    }
Ejemplo n.º 23
0
def estimate_alpha_pareto_sample(size):
    sample = np.ceil(pareto.rvs(b=1.5, size=size))
    fit_estimating_discrete = pw.Fit(data=sample,
                                     discrete=True,
                                     estimate_discrete=True)
    print(fit_estimating_discrete.power_law.alpha)
    print(fit_estimating_discrete.power_law.sigma)
    print(fit_estimating_discrete.power_law.xmin)
    return fit_estimating_discrete.power_law.alpha
Ejemplo n.º 24
0
def plotPowerlaw(data,ax,col,xlab):
    fit = powerlaw.Fit(data,xmin=2)
    #fit = powerlaw.Fit(data)
    fit.plot_pdf(color = col, linewidth = 2)
    a,x = (fit.power_law.alpha,fit.power_law.xmin)
    fit.power_law.plot_pdf(color = col, linestyle = 'dotted', ax = ax,                             label = r"$\alpha = %d \:\:, x_{min} = %d$" % (a,x))
    ax.set_xlabel(xlab, fontsize = 20)
    ax.set_ylabel('$Probability$', fontsize = 20)
    plt.legend(loc = 0, frameon = False)
Ejemplo n.º 25
0
 def singTest(result_path):
     elems = []
     with open(result_path, "r") as f:
         for line in f:
             sv_key = int(line.strip().split()[0])
             sv_value = float(line.strip().split()[1])
             elems += [sv_key for _ in range(int(sv_value * 100))]
             elems.append(sv_value)
     return powerlaw.Fit(elems, discrete=True)
Ejemplo n.º 26
0
def fit_compare(data, estimate_discrete = True, p_lim = 0.05):

	fit_obj = plw.Fit(data, estimate_discrete=estimate_discrete)

	distributions = ['power_law', 'truncated_power_law', 'exponential','lognormal_positive']

	results = {}

	results['power_law_score'] = 0
	results['truncated_power_law_score'] = 0
	results['exponential_score'] = 0
	results['lognormal_positive_score'] = 0

	#Tests all distribution combinations
	for (a,b) in combinations(distributions,2):
		(likelihood_ratio, p) = fit_obj.distribution_compare(a,b)
		if p < p_lim:
			if likelihood_ratio > 0:
				results[a+'_score'] += 1
			else:
				results[b+'_score'] += 1

	#Selects a best fit if it is better than all the others
	results['best_fit'] = None
	for dist in distributions:
		if results[dist + '_score'] == 3:
			results['best_fit'] = dist

	#Fills data
	results['power_law_alpha'] = fit_obj.power_law.alpha
	try:
		results['power_law_xmin'] = fit_obj.power_law.xmin
		results['power_law_xmax'] = fit_obj.pdf()[0][-1]
		results['power_law_orders'] = np.log10(results['power_law_xmax']) - np.log10(results['power_law_xmin'])
	except:
		results['power_law_xmin'] = fit_obj.power_law.xmin
		results['power_law_xmax'] = fit_obj.power_law.xmin
		results['power_law_orders'] = 0

	results['truncated_power_law_alpha'] = fit_obj.truncated_power_law.parameter1
	results['truncated_power_law_lambda'] = fit_obj.truncated_power_law.parameter2
	try:
		results['truncated_power_law_xmin'] = fit_obj.truncated_power_law.xmin
		results['truncated_power_law_xmax'] = fit_obj.pdf()[0][-1]
		results['truncated_power_law_orders'] = np.log10(results['truncated_power_law_xmax']) - np.log10(results[
			'truncated_power_law_xmin'])
	except:
		results['truncated_power_law_xmin'] = fit_obj.truncated_power_law.xmin
		results['truncated_power_law_xmax'] = fit_obj.truncated_power_law.xmin
		results['truncated_power_law_orders'] = 0

	results['exp_lambda'] = fit_obj.exponential.parameter1

	results['lognormal_positive_mu'] = fit_obj.lognormal_positive.parameter1
	results['lognormal_positive_sigma'] = fit_obj.lognormal_positive.parameter2

	return results
Ejemplo n.º 27
0
    def fit_power_law(
        self,
        eig_dict=None,
        plot_alpha: bool = False,
        plot_eig=False,
    ) -> Dict[int, Tuple[float, float]]:
        r"""
        Fits the eigenvalue spectrum distribution of
        the layer weights :math:`X = W W^T` with a power-law distribution.
        Uses the MLE approach from https://arxiv.org/abs/0706.1062.

        Parameters
        ----------
        eigdict: Dict[int, Tuple[np.array, float]]
            Optional, useful if pre-computed with `.spectral_analysisr()`
            Dictionary with keys of the nth layer proviled,
            values of :attr:`(eigenvalues, Q)`, where :attr:`eigenvalues`
            are those of the weight matrix for the layer, and :attr:`Q`
            is the aspect ratio of the matrix.
        plot_alpha: bool
            Plot per-layer power-law fit of the
            eigenvalue spectrum distribution.
        plot_eig: bool
            Plot per-layer eigenvalue spectrum distribution

        Returns
        -------
        alpha_dict: Dict[int, Tuple[float, float]]
            Dictionary with keys of the nth layer proviled,
            values of `(alpha, eig_max)`, where `alpha`
            is the power law fit alpha, i.e:
            :math: \rho(\lambda) \sim \lambda^{-\alpha}.
            `eig_max` is the max eigenvalue.
        """
        if not eig_dict:
            eig_dict = self.spectral_analysis(plot=plot_eig)
        all_layers = list(eig_dict.keys())
        alpha_dict = {}
        for layer in all_layers:
            eigenvalues, Q = eig_dict[layer]
            eig_max = np.max(eigenvalues)
            results = powerlaw.Fit(eigenvalues, verbose=False)
            alpha = results.power_law.alpha
            alpha_dict[layer] = alpha, eig_max
            if plot_alpha:
                results.plot_pdf(color="b")
                results.power_law.plot_pdf(color="r",
                                           linewidth=2,
                                           linestyle="--")  # noqa
                plt.title(
                    f"Linear layer {layer} power law fit \n alpha = {round(alpha, 3)}"
                )  # noqa
                plt.ylabel("Spectral density (log)")
                plt.xlabel("Eigenvalues of $W_{FC}W_{FC}^T$ (log)")
                plt.show()
        return alpha_dict
Ejemplo n.º 28
0
 def R(self, state):
     "Return a numeric reward for this state."
     sim = HChem(self.rules, state)
     chain_lenghts = sim.calculate_chain_lengths()
     results = powerlaw.Fit(chain_lenghts)
     R, p = results.distribution_compare('power_law', 'lognormal')
     reward = self.reward
     if R > 0:
         reward = p
     return reward
Ejemplo n.º 29
0
def simulate_zipf(alpha=1.5, n=10**4, repetitions=10, x_min=None):
    indexes = list()
    estimations_alpha = list()
    estimations_xmin = list()
    bigger_than_min = list()
    for k in range(1, repetitions + 1):
        _zipf_rv = zipf(alpha)

        discrete_sample = np.sort(_zipf_rv.rvs(size=n))

        if x_min is not None:
            fit_estimating_discrete = pw.Fit(data=discrete_sample,
                                             discrete=True,
                                             estimate_discrete=False,
                                             xmin=x_min)
        else:
            fit_estimating_discrete = pw.Fit(data=discrete_sample,
                                             discrete=True,
                                             estimate_discrete=False)
        print(fit_estimating_discrete.alpha)
        print(fit_estimating_discrete.xmin)
        indexes.append(k)
        estimations_alpha.append(fit_estimating_discrete.alpha)
        estimations_xmin.append(fit_estimating_discrete.xmin)
        if x_min:
            bigger_than_min.append(
                sum(np.greater_equal(discrete_sample, x_min)))
        else:
            bigger_than_min.append(
                sum(
                    np.greater_equal(discrete_sample,
                                     fit_estimating_discrete.xmin)))

    if not x_min:
        plot_results(rep_nums=indexes,
                     alphas=estimations_alpha,
                     xmins=estimations_xmin,
                     resampling=bigger_than_min)
    else:
        plot_results(rep_nums=indexes,
                     alphas=estimations_alpha,
                     xmins=None,
                     resampling=bigger_than_min)
Ejemplo n.º 30
0
def calculate_data_score(data):
  fit = powerlaw.Fit(data, xmin =1, discrete= True)
  alpha = fit.power_law.alpha
  ksdist = fit.power_law.D
  R_exp, p_exp = fit.distribution_compare('power_law', 'exponential', normalized_ratio=True)
  R_exp = R_exp if p_exp < 0.1 else 0

  R = R_exp

  return alpha, ksdist, R