def fit_incs_pd(ascs, ascs_month, months, flag=0): # this period accepts a list of increasing periods and a month desired # and returns the parameter of the distribution of increments for that month ascs_select = list() for m in range(len(ascs)): #if ascs_month[m] == months: if np.in1d(ascs_month[m], months): ascs_select.append(ascs[m]) L = len(ascs_select) incs = list() for k in range(L): asc_temp = ascs_select[k] if hasattr(asc_temp, "__len__"): asc1 = asc_temp[1:] asc2 = asc_temp[:-1] incs.extend(np.subtract(asc1, asc2)) else: pass incs = list(filter(lambda a: a > 0, incs)) if flag == 0: optparms = fitweibull(incs) elif flag == 1: optparms = fitlognorm(incs) elif flag == 2: optparms = gumbel_r.fit(incs) elif flag == 3: optparms = powerlaw.fit(incs) elif flag == 4: optparms = genextreme.it(incs) return(optparms)
def distest_loose(x): x = _series(x) data = { 'Shapiro-Wilk (normal)': shapiro(x), 'D\'Agostino-Pearson (normal)': normaltest(x), 'Kolmogorov-Smirnov (normal)': kstest(x, norm.cdf, norm.fit(x)), 'Kolmogorov-Smirnov (powerlaw)': kstest(x, powerlaw.cdf, powerlaw.fit(x)), 'Kolmogorov-Smirnov (exponential)': kstest(x, expon.cdf, expon.fit(x)), } keys = data.keys() values = (p for _, p in data.values()) return pd.DataFrame(values, keys, ['p-value']).round(DEC)
def Generation(nb_graph): N = [] #liste graphes fitC = [] fitD = [] diam = [] for i in range(nb_graph): n = r.randint(80, 120) p = r.random() N.append(nx.fast_gnp_random_graph(n, p)) diam.append(nx.diameter(N[i])) C = nx.clustering(N[i]) D = nx.degree(N[i]) fitC.append(pw.fit(list(C.values()), discrete=True)[0]) fitD.append(pw.fit(list(dict(D).values()), discrete=True)[0]) F = fitness(fitC, fitD, diam) print(F) a = [] for j in range(nb_graph): a.append((N[j], F[j])) return a
import pandas as pd from scipy.optimize import curve_fit from scipy.stats import powerlaw csv_data = pd.read_csv( '/Users/zhangchenhan/Desktop/sanguo_network/sanguozhiiiiiiiiiiiiiiiiiiiiiiiiiiiiii/sanguozhi_topo_features.csv' ) data = list(csv_data.Degree) data = dict(Counter(data)) X = list(data.keys()) Y = list(data.values()) # X = np.array(X) # Y = np.array(Y) df = pd.DataFrame({'X': X, 'Y': Y}) df.to_csv('sanguozhi_powerlaw.csv') # regr = linear_model.LinearRegression() # regr.fit(X,Y) #拟合 # # print('Coefficients: \n', regr.coef_,) # print("Intercept:\n",regr.intercept_) # print("Residual sum of squares: %.8f" % np.mean((regr.predict(X) - Y) ** 2)) #残差平方和 results = powerlaw.fit(X) print(results) #可视化 # plt.title('power_law') # plt.scatter(X, Y, color='black') # plt.plot(X, regr.predict(X), color='blue',linewidth=3) # plt.show()
plt.xlabel("Swaps") plt.ylabel("Total length") plt.title("Trajectory of Search for Min Total Length, Average Min = %.1f" % np.mean(mins)) #------------------------ Power Law Networks ---------------------------------- o_dist2 = data[1][1] f_dist2 = data[1][2] #original and final length original_len = sum(o_dist2) / realizations final_len = sum(f_dist2) / realizations #fitting r = powerlaw.fit(o_dist2) rx = np.linspace(0, max(o_dist2), 100) rp = powerlaw.pdf(rx, *r) l1_fit = 1 / r[1] fig3, (ax1, ax2) = plt.subplots(2) n2, bins2, patches2 = ax1.hist(o_dist2, bins=100, density=True, alpha=0) ax1.set(xlabel="log(Edge Length)", ylabel="log(counts)") ax1.set_title('Original Length Distribution length = %.2f' % (sum(o_dist2) / realizations)) ax1.set_yscale('log') ax1.set_xscale('log') ax1.scatter(bins2[:-1] + 0.5 * (bins2[1:] - bins2[:-1]), n2, marker='x', c='red',
from scipy.stats import powerlaw import matplotlib.pyplot as plt import numpy as np pl = powerlaw(.8, loc=0, scale=2) samples = pl.rvs(10000) # create random variables alpha, loc, scale = powerlaw.fit(samples) # fit the variables # plotting plt.figure(0) plt.clf() plt.hist(samples, bins=50, normed=True, histtype='stepfilled', alpha=.9) xmin, xmax = plt.xlim() x = np.linspace(xmin, xmax, 100) plt.plot(x, pl.pdf(x), linewidth=2, label="fit") plt.figure(1) plt.clf() plt.hist(samples, bins=50, normed=True, histtype='stepfilled', alpha=.9) xmin, xmax = plt.xlim() x = np.linspace(xmin, xmax, 100) plt.plot(x, pl.pdf(x), linewidth=2, label="fit") plt.xscale("log", basex=10, nonposy='clip') plt.yscale("log", basey=10, nonposy='clip') plt.show()
# However, if this is not figured out, the costs are hourly and unbridled. # Especially in multi-year trials, and you'd need to model it with that uncertainty. # This Monte Carlo _does not_ model this! trial_odds_yes = .05 trial_odds_no = .95 # Odds of going to trial. regulation_odds_yes = .05 # Odds of having an audit requirement imposed. regulation_odds_no = .95 # This can be modeled further, to include several other costs we are leaving out. # Statistical Values settlements = loadtxt( 'settlements.dat') # Loading in external data for settlements fit = powerlaw.fit( settlements ) # Fitting data to a simulated Power Law, which we think is reasonable. incidents = powerlaw(a=fit[0], loc=fit[1], scale=fit[2]) c.progress("Disclosure Legal") # Disclosure complexity (Legal) # (Lawyers * Lawyer Rate * Hours) + (Engineers * Eng Pay * Hours) disclosure_lawyers = np.random.uniform( lawyers_min, lawyers_max, simulations) # What's the minimum? What's the maximum? disclosure_lawyer_rate = np.random.normal( lawyer_rate_average, lawyer_rate_variance, simulations ) # Using https://thervo.com/costs/attorney-fees as stand-in data disclosure_lawyer_hours = np.random.uniform(