def generateCallData(data, mean_calls=5, call_dur=1.15, mean_sms=25, mean_mms=10, seed=None): """Takes a generateDateCallers() dataframe and returns one with call info. This new dataframe will contain four new columns: 1) 'min' : number of total minutes between callers 2) 'calls' : number of total calls (per day) between nodes 3) 'sms' : number of total sms (per day) between nodes 4) 'mms' : number of total mms (per day) between nodes Parameters ---------- data : your pandas dataframe as created by generateDateCallers mean_calls : mean of Poisson distribution to draw number of calls between nodes for that day. min_dur : the shape parameter of a log-logistic distribution to describe duration of calls. (Note: Drawn multiple times and then summed over.) mean_sms : mean of Poisson distribution from which to draw SMS data mean_mms : mean of Poisson distribution from which to draw MMS data """ dfrows = len(data) if seed is not None: random.seed(seed) calls = np.random.poisson(mean_calls, dfrows) data['calls'] = calls data['min'] = [round(np.sum(fisk.rvs(call_dur, size=x)), 1) for x in calls] data['sms'] = np.random.poisson(mean_sms, dfrows) data['mms'] = np.random.poisson(mean_mms, dfrows) return (data)
def _sim_cash_buffer( self, size: int, median_solvency_days: float, cash_buffer: float, max_cash_buffer_days: Optional[float] = np.inf, ) -> np.array: """ Simulate cash buffer of companies Parameters ---------- size: number of companies in simulated population median_solvency_days: median number of days till companies go insolvent cash_buffer: total annual cash buffer for the simulated companies max_cash_buffer_days: hard cap on maximum number of cash buffer days a company can have """ # Rejection sampling to get truncated log-logistic distribution of days till insolvency solvent_days = np.zeros((0,)) while solvent_days.shape[0] < size: s = fisk.rvs(size=(size,), c=self.beta, scale=median_solvency_days) accepted = s[s <= max_cash_buffer_days] solvent_days = np.concatenate((solvent_days, accepted), axis=0) solvent_days = solvent_days[:size] total_solvent_days = solvent_days.sum() if total_solvent_days == 0: corp_cash_buffer = np.zeros(size) else: corp_cash_buffer = np.array( [days / total_solvent_days * cash_buffer for days in solvent_days] ) return corp_cash_buffer
def loglogistic(rng, n_samples, sigma=10, c=2.2): from scipy.stats import fisk noise = sigma * fisk.rvs(c, size=n_samples) expect_noise = sigma * (np.pi / c) / np.sin(np.pi / c) noise_2nd_moment = (sigma ** 2) * (2 * np.pi / c) / np.sin(2 * np.pi / c) return noise, expect_noise, noise_2nd_moment
def check_fiskprior(): data = fisk.rvs(c=0.5, scale=1.0, size=1000) data[data > 1.0] = 1.0 print "Orig Frac=" + str(np.sum(data == 1.0) / float(data.shape[0])) rv = TruncatedFisk_Prior(data) res = rv.fit() print res.params print "================================" x = truncfiskprior_rvs(res.params[0], res.params[1], res.params[2], size=1000) print min(x), max(x) print "Frac=" + str(np.sum(x == 1.0) / float((x.shape[0])))
def truncfiskprior_rvs(prob, c, scale, size): prob = max(prob, 1e-10) falseEntries = np.zeros((0, )) failure_ctr = 5 while falseEntries.shape[0] < size and failure_ctr > 0: s = fisk.rvs(c, loc=0.0, scale=scale, size=size) accepted = s[(s <= 1.0)] if len(accepted) == 0: failure_ctr -= 1 falseEntries = np.concatenate((falseEntries, accepted), axis=0) falseEntries = falseEntries[:size] if failure_ctr <= 0: falseEntries = np.zeros(size) if size > 0: indexes = np.random.choice(range(size), size=int(prob * size), replace=False) falseEntries[indexes] = 1.0 return falseEntries
def run_Parametric(story_id, data): print "[" + str(story_id) + "]Fitting Fisk" fisk_params = fisk.fit(data, floc=0) fisk_nll = fisk.nnlf(fisk_params, data) fisk_rvs = fisk.rvs(*fisk_params, size=data.shape[0]) ks_fisk = ks_2samp(data, fisk_rvs) bic_fisk = compute_BIC(data, len(fisk_params), fisk_nll) print "[" + str(story_id) + "]Fitting IG" ig_params = invgauss.fit(data, floc=0) ig_nll = invgauss.nnlf(ig_params, data) ig_rvs = invgauss.rvs(*ig_params, size=data.shape[0]) ks_ig = ks_2samp(data, ig_rvs) bic_ig = compute_BIC(data, len(ig_params), ig_nll) print "[" + str(story_id) + "]Fitting LN" ln_params = lognorm.fit(data, floc=0) ln_nll = lognorm.nnlf(ln_params, data) ln_rvs = lognorm.rvs(*ln_params, size=data.shape[0]) ks_ln = ks_2samp(data, ln_rvs) bic_ln = compute_BIC(data, len(ln_params), ln_nll) print "[" + str(story_id) + "]Fitting Weibull" weib_params = weibull_min.fit(data, floc=0) weib_nll = weibull_min.nnlf(weib_params, data) weib_rvs = weibull_min.rvs(*weib_params, size=data.shape[0]) ks_weib = ks_2samp(data, weib_rvs) bic_weib = compute_BIC(data, len(weib_params), weib_nll) print "[" + str(story_id) + "]Fitting Gamma" gamma_params = gamma.fit(data, floc=0) gamma_nll = gamma.nnlf(gamma_params, data) gamma_rvs = gamma.rvs(*gamma_params, size=data.shape[0]) ks_gamma = ks_2samp(data, gamma_rvs) bic_gamma = compute_BIC(data, len(gamma_params), gamma_nll) return [ fisk_nll, ig_nll, ln_nll, weib_nll, gamma_nll, ks_fisk, ks_ig, ks_ln, ks_weib, ks_gamma, bic_fisk, bic_ig, bic_ln, bic_weib, bic_gamma, fisk_params, ig_params, ln_params, weib_params, gamma_params ]
x = np.linspace(fisk.ppf(0.01, c), fisk.ppf(0.99, c), 100) ax.plot(x, fisk.pdf(x, c), 'r-', lw=5, alpha=0.6, label='fisk pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = fisk(c) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = fisk.ppf([0.001, 0.5, 0.999], c) np.allclose([0.001, 0.5, 0.999], fisk.cdf(vals, c)) # True # Generate random numbers: r = fisk.rvs(c, size=1000) # And compare the histogram: ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) plt.show()