def tst_sim_2(): k = 0.8 lmb = 2.0 s_n1n2 = 0 s_n1 = 0 s_n2 = 0 s_n1_sq = 0 s_n2_sq = 0 n_sim = 5000 for _ in range(n_sim): intervals1 = lomax.rvs(c=k, scale=(1 / lmb), size=800) intervals2 = lomax.rvs(c=k, scale=(1 / lmb), size=800) #intervals = np.random.exponential(scale=1,size=1000) time_stamps1 = np.cumsum(intervals1) time_stamps2 = np.cumsum(intervals2) n1 = sum((time_stamps1 > 400) * (time_stamps1 < 900)) n2 = sum((time_stamps2 > 400) * (time_stamps2 < 900)) s_n1n2 += n1 * n2 s_n1_sq += n1 * n1 s_n2_sq += n2 * n2 s_n1 += n1 s_n2 += n2 cov = s_n1n2 / n_sim - (s_n1 / n_sim) * (s_n2 / n_sim) v_n1 = s_n1_sq / n_sim - (s_n1 / n_sim)**2 v_n2 = s_n2_sq / n_sim - (s_n2 / n_sim)**2 corln = cov / np.sqrt(v_n1 * v_n2) print("correlation: " + str(corln))
def get_number_records_lomax(c=0.5, size=1000, discretize=True): if discretize: sample = np.ceil(lomax.rvs(c=c, size=size)) else: sample = lomax.rvs(c=c, size=size) return len(GeneralUtils.get_record_indexes(sample))
def tst_sim_3(k=7.0, theta=0.5): s_n1n2 = 0 s_n1 = 0 s_n2 = 0 s_n1_sq = 0 s_n2_sq = 0 n_sim = 5000 for _ in range(n_sim): intervals = lomax.rvs(c=k, scale=theta, size=2000) #intervals = np.random.exponential(scale=1,size=1000) time_stamps = np.cumsum(intervals) bi_furcator = np.random.choice(2, size=len(time_stamps)) time_stamps1 = time_stamps[bi_furcator == 1] time_stamps2 = time_stamps[bi_furcator == 0] n1 = sum((time_stamps1 > 50) * (time_stamps1 < 90)) n2 = sum((time_stamps2 > 50) * (time_stamps2 < 90)) s_n1n2 += n1 * n2 s_n1_sq += n1 * n1 s_n2_sq += n2 * n2 s_n1 += n1 s_n2 += n2 cov = s_n1n2 / n_sim - (s_n1 / n_sim) * (s_n2 / n_sim) v_n1 = s_n1_sq / n_sim - (s_n1 / n_sim)**2 v_n2 = s_n2_sq / n_sim - (s_n2 / n_sim)**2 corln = cov / np.sqrt(v_n1 * v_n2) print("correlation: " + str(corln))
def lomax_exponmix(): #### Verify Lomax equivalence with exponential-mix. k=4; theta=0.1 ## In numpy's definition, the scale, theta is inverse of Ross definition. lm = np.random.gamma(k,1/theta,size=1000) lomax_mix=np.random.exponential(1/lm) mean1=np.mean(lomax_mix) lomax_direct=lomax.rvs(c=k,scale=theta,size=1000) mean2=np.mean(lomax_direct) mean3 = theta/(k-1)
def samples(self, k=None, lmb=None, size=1000, params=None): ''' Generates samples for the Lomax distribution. args: k: Shape of Lomax. lmb: Scale of Lomax. size: The number of simulations to be generated. params: A 2-d array with shape and scale parameters. ''' [k, lmb] = self.determine_params(k, lmb, params) return lomax.rvs(c=k, scale=(1 / lmb), size=size)
def sim_lomax(): c = 1.88 mean, var, skew, kurt = lomax.stats(c, moments='mvsk') print(1 / mean) catches = 0 for _ in range(10000): j = np.random.uniform() * 1000 t_i = 0 while t_i < j + 500: t_i += lomax.rvs(c) if j < t_i and t_i < j + 1: catches += 1 print(catches / 10000)
def sim_lomax(intr_strt=20): c = 1.88 mean, var, skew, kurt = lomax.stats(c, moments='mvsk') print(1 / mean) catches = 0 for _ in range(10000): j = intr_strt t_i = 0 while t_i < j + 50: t_i += lomax.rvs(c) if j < t_i and t_i < j + 1: catches += 1 print(catches / 10000)
def lomax_renewal_correlation(k=2.0, theta=1.0): s_n1 = 0 n_sim = 5000 for _ in range(n_sim): intervals = lomax.rvs(c=k, scale=theta, size=1200) #intervals = np.random.exponential(scale=1,size=1000) time_stamps = np.cumsum(intervals) #n1 = sum((time_stamps>100) * (time_stamps<200)) n1 = sum(time_stamps < 100) s_n1 += n1 e_n1 = s_n1 / n_sim print("simulated mean: " + str(e_n1)) #print("actual mean-1: " +str(k*200/theta)) print("actual mean-2: " + str((k - 1) * 200 / theta))
def sim_lomax_v2(): c = 1.88 catches = 0 catches2 = 0 total_t = 0 for _ in range(20000): j = np.random.uniform() * 1000 t_i = 0 tt = 0 catches1 = -1 while t_i < j + 100: t_i += lomax.rvs(c) if j < t_i and t_i < j + 30: tt = t_i catches += 1 catches1 += 1 total_t += max((tt - j), 0) catches2 += max(0, catches1) print(catches / 20000 / 30) print(catches2 / total_t)
def _generate_timestamps(self, last=None): """Generate list of timestamps. Args: last (datetime, optional): Datetime of last message. If ``None``, defaults to current date. Returns: list: List with timestamps. """ if not last: last = datetime.now() last = last.replace(microsecond=0) c = 1.0065 scale = 40.06 loc = 30 ts_ = [0] + lomax.rvs(c=c, loc=loc, scale=scale, size=self.size - 1, random_state=self.seed).cumsum().tolist() ts = [last - timedelta(seconds=t * 60) for t in ts_] return ts[::-1]
def comparisson_number_of_records_lomax(c): highest = 10**6 _ns = list() _num_records = list() _num_integer_records = list() for n in range(10, highest + 1, 10000): sample = lomax.rvs(c=c, size=n) _integer_sample = np.ceil(sample) _index_records = GeneralUtils.get_record_indexes(sample) _integer_index_records = GeneralUtils.get_record_indexes( _integer_sample) _ns.append(n) _num_records.append(len(_index_records)) _num_integer_records.append(len(_integer_index_records)) print(_num_records) print(_num_integer_records) plt.scatter(_ns, _num_records) plt.scatter(_ns, _num_integer_records) plt.show()
def rvs_fn4(n): return lomax.rvs(c=.9, size=n)
def samples_(k, lmb, size=1000): return lomax.rvs(c=k, scale=(1 / lmb), size=size)
# Display the probability density function (``pdf``): x = np.linspace(lomax.ppf(0.01, c), lomax.ppf(0.99, c), 100) ax.plot(x, lomax.pdf(x, c), 'r-', lw=5, alpha=0.6, label='lomax pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = lomax(c) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = lomax.ppf([0.001, 0.5, 0.999], c) np.allclose([0.001, 0.5, 0.999], lomax.cdf(vals, c)) # True # Generate random numbers: r = lomax.rvs(c, size=1000) # And compare the histogram: ax.hist(r, density=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) plt.show()
# 947,649,385 import matplotlib.pyplot as plt plt.scatter(grid.predict(x_test), y_test) # # plt.hist(alpha.rvs(1000, size = 100)) # # plt.hist(uniform.rvs(1e-20, 1e-3, size=1000)) # gamma.rvs(0.1, size = 1000) from scipy.stats import lognorm # plt.hist([grid.predict(x_test), y_test]) # plt.hist(gamma.rvs(10, size = 1000)) # plt.hist(gamma.rvs(100, size = 1000)) # plt.hist(gamma.rvs(1000, size = 1000)) # plt.hist(gamma.rvs(10000, size = 1000)) # # betaprime.rvs(12, 100000, size=1000) # lomax.rvs(100000, size=10000) uniform.rvs(0.00001, 0.1, size=1000) lomax.rvs(2, size=100) # # test_data = pd.read_csv(os.path.join( os.path.dirname(os.path.abspath("__file__")), 'amnes/data/test.csv'), index_col="Id") # # test_data_num = VariableSelector( variable_type="numeric").fit_transform(test_data) test_data_cat = VariableSelector( variable_type="categorical").fit_transform(test_data) test_data_num.shape
def samples(self, k=None, lmb=None, size = 1000, params = None): [k, lmb] = self.determine_params(k, lmb, params) return lomax.rvs(c=k, scale=(1 / lmb), size=size)