def fit_incs_pd(ascs, ascs_month, months, flag=0):
    # this period accepts a list of increasing periods and a month desired
    # and returns the parameter of the distribution of increments for that month
    
    ascs_select = list()
    for m in range(len(ascs)):
        #if ascs_month[m] == months:
        if np.in1d(ascs_month[m], months):
            ascs_select.append(ascs[m])
            
    L = len(ascs_select)
    incs = list()
    
    for k in range(L):
        asc_temp = ascs_select[k]
        if hasattr(asc_temp, "__len__"):
            asc1 = asc_temp[1:]
            asc2 = asc_temp[:-1]
            incs.extend(np.subtract(asc1, asc2))
        else:
            pass
    
    incs = list(filter(lambda a: a > 0, incs))
    if flag == 0:
        optparms = fitweibull(incs)
    elif flag == 1:
        optparms = fitlognorm(incs)
    elif flag == 2:
        optparms = gumbel_r.fit(incs)
    elif flag == 3:
        optparms = powerlaw.fit(incs)
    elif flag == 4:
        optparms = genextreme.it(incs)
    return(optparms)
Exemple #2
0
def distest_loose(x):
    x = _series(x)
    data = {
        'Shapiro-Wilk (normal)': shapiro(x),
        'D\'Agostino-Pearson (normal)': normaltest(x),
        'Kolmogorov-Smirnov (normal)': kstest(x, norm.cdf, norm.fit(x)),
        'Kolmogorov-Smirnov (powerlaw)': kstest(x, powerlaw.cdf, powerlaw.fit(x)),
        'Kolmogorov-Smirnov (exponential)': kstest(x, expon.cdf, expon.fit(x)),
    }
    keys = data.keys()
    values = (p for _, p in data.values())
    return pd.DataFrame(values, keys, ['p-value']).round(DEC)
Exemple #3
0
def Generation(nb_graph):
    N = []  #liste graphes
    fitC = []
    fitD = []
    diam = []

    for i in range(nb_graph):
        n = r.randint(80, 120)
        p = r.random()
        N.append(nx.fast_gnp_random_graph(n, p))

        diam.append(nx.diameter(N[i]))
        C = nx.clustering(N[i])
        D = nx.degree(N[i])
        fitC.append(pw.fit(list(C.values()), discrete=True)[0])
        fitD.append(pw.fit(list(dict(D).values()), discrete=True)[0])
    F = fitness(fitC, fitD, diam)
    print(F)
    a = []
    for j in range(nb_graph):
        a.append((N[j], F[j]))
    return a
Exemple #4
0
import pandas as pd
from scipy.optimize import curve_fit
from scipy.stats import powerlaw

csv_data = pd.read_csv(
    '/Users/zhangchenhan/Desktop/sanguo_network/sanguozhiiiiiiiiiiiiiiiiiiiiiiiiiiiiii/sanguozhi_topo_features.csv'
)
data = list(csv_data.Degree)
data = dict(Counter(data))
X = list(data.keys())
Y = list(data.values())
# X = np.array(X)
# Y = np.array(Y)

df = pd.DataFrame({'X': X, 'Y': Y})
df.to_csv('sanguozhi_powerlaw.csv')

# regr = linear_model.LinearRegression()
# regr.fit(X,Y) #拟合
#
# print('Coefficients: \n', regr.coef_,)
# print("Intercept:\n",regr.intercept_)
# print("Residual sum of squares: %.8f" % np.mean((regr.predict(X) - Y) ** 2)) #残差平方和

results = powerlaw.fit(X)
print(results)
#可视化
# plt.title('power_law')
# plt.scatter(X, Y, color='black')
# plt.plot(X, regr.predict(X), color='blue',linewidth=3)
# plt.show()
Exemple #5
0
plt.xlabel("Swaps")
plt.ylabel("Total length")
plt.title("Trajectory of Search for Min Total Length, Average Min = %.1f" %
          np.mean(mins))

#------------------------ Power Law Networks ----------------------------------
o_dist2 = data[1][1]
f_dist2 = data[1][2]

#original and final length
original_len = sum(o_dist2) / realizations
final_len = sum(f_dist2) / realizations

#fitting
r = powerlaw.fit(o_dist2)
rx = np.linspace(0, max(o_dist2), 100)
rp = powerlaw.pdf(rx, *r)
l1_fit = 1 / r[1]

fig3, (ax1, ax2) = plt.subplots(2)
n2, bins2, patches2 = ax1.hist(o_dist2, bins=100, density=True, alpha=0)
ax1.set(xlabel="log(Edge Length)", ylabel="log(counts)")
ax1.set_title('Original Length Distribution length = %.2f' %
              (sum(o_dist2) / realizations))
ax1.set_yscale('log')
ax1.set_xscale('log')
ax1.scatter(bins2[:-1] + 0.5 * (bins2[1:] - bins2[:-1]),
            n2,
            marker='x',
            c='red',
Exemple #6
0
from scipy.stats import powerlaw
import matplotlib.pyplot as plt
import numpy as np

pl = powerlaw(.8, loc=0, scale=2)
samples = pl.rvs(10000)  # create random variables
alpha, loc, scale = powerlaw.fit(samples)  # fit the variables

# plotting
plt.figure(0)
plt.clf()
plt.hist(samples, bins=50, normed=True, histtype='stepfilled', alpha=.9)
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
plt.plot(x, pl.pdf(x), linewidth=2, label="fit")

plt.figure(1)
plt.clf()
plt.hist(samples, bins=50, normed=True, histtype='stepfilled', alpha=.9)
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
plt.plot(x, pl.pdf(x), linewidth=2, label="fit")
plt.xscale("log", basex=10, nonposy='clip')
plt.yscale("log", basey=10, nonposy='clip')

plt.show()
Exemple #7
0
# However, if this is not figured out, the costs are hourly and unbridled.
# Especially in multi-year trials, and you'd need to model it with that uncertainty.
# This Monte Carlo _does not_ model this!

trial_odds_yes = .05
trial_odds_no = .95  # Odds of going to trial.

regulation_odds_yes = .05  # Odds of having an audit requirement imposed.
regulation_odds_no = .95  # This can be modeled further, to include several other costs we are leaving out.

# Statistical Values

settlements = loadtxt(
    'settlements.dat')  # Loading in external data for settlements
fit = powerlaw.fit(
    settlements
)  # Fitting data to a simulated Power Law, which we think is reasonable.

incidents = powerlaw(a=fit[0], loc=fit[1], scale=fit[2])

c.progress("Disclosure Legal")

# Disclosure complexity (Legal)
# (Lawyers * Lawyer Rate * Hours) + (Engineers * Eng Pay * Hours)
disclosure_lawyers = np.random.uniform(
    lawyers_min, lawyers_max,
    simulations)  # What's the minimum? What's the maximum?
disclosure_lawyer_rate = np.random.normal(
    lawyer_rate_average, lawyer_rate_variance, simulations
)  # Using https://thervo.com/costs/attorney-fees as stand-in data
disclosure_lawyer_hours = np.random.uniform(