Esempio n. 1
0
def get_number_records_pareto(b=3, power=6, discretize=True):
    size = 10**power
    if discretize:
        sample = np.ceil(pareto.rvs(b=b, size=size))
    else:
        sample = pareto.rvs(b=b, size=size)

    fig = plt.figure(figsize=(8, 4), dpi=200)
    ax = fig.add_subplot(111)
    ax.scatter(range(0, size),
               sample,
               c='blue',
               alpha=0.25,
               s=0.5,
               label="realization")

    record_indexes = GeneralUtils.get_record_indexes(sample)
    # for c_p in record_indexes:
    ax.scatter(record_indexes,
               sample[record_indexes],
               c='green',
               marker='*',
               label="records = {0}".format(len(record_indexes)))
    ax.legend()
    if discretize:
        message = 'Discretized Pareto distribution, alpha={alpha}, n=10^{power}'.format(
            alpha=b, power=power)
    else:
        message = 'Continuous Pareto distribution, alpha={alpha}, n=10^{power}'.format(
            alpha=b, power=power)
    plt.title(message)
    plt.show()

    return len(record_indexes)
Esempio n. 2
0
    def make_population(self, n_people):
        age_class = pm.draw_ac(n_people)
        profession = pm.draw_prof(n_people)
        health_status = pm.draw_hs(n_people)
        education_level = pm.draw_el(n_people)
        income = pareto.rvs(
            b=1,
            scale=pm.person_params['income'],
            size=n_people,
        )
        cobb_c = [pm.person_params['cobb_c']] * n_people
        cobb_d = [pm.person_params['cobb_d']] * n_people

        population = pd.DataFrame(list(
            zip(age_class, profession, health_status, education_level, income,
                cobb_c, cobb_d)),
                                  columns=[
                                      'age_class', 'profession',
                                      'health_status', 'education_level',
                                      'income', 'cobb_c', 'cobb_d'
                                  ])

        population.to_sql('person',
                          self.connection,
                          index=False,
                          if_exists='append')
Esempio n. 3
0
 def draw_rdn_powerlaw(self):
     drawed = pareto.rvs(self.alpha)
     drawed /= (self.dt**(-self.alpha) - self.tau_c **
                (-self.alpha))**(1 / (self.alpha + 1))
     if drawed <= self.tau_c:
         return drawed
     else:
         return self.draw_rdn_powerlaw()
 def random_sample(self, count):
     sequence = []
     i = 1
     while len(sequence) < count:
         sequence = np.array(pareto.rvs(
             self.__alpha, size=i * count)) + (self.__low_bound - 1)
         sequence = sequence[sequence < self.__up_bound]
         i += 1
     return sequence[:count]
Esempio n. 5
0
def estimate_alpha_pareto_sample(size):
    sample = np.ceil(pareto.rvs(b=1.5, size=size))
    fit_estimating_discrete = pw.Fit(data=sample,
                                     discrete=True,
                                     estimate_discrete=True)
    print(fit_estimating_discrete.power_law.alpha)
    print(fit_estimating_discrete.power_law.sigma)
    print(fit_estimating_discrete.power_law.xmin)
    return fit_estimating_discrete.power_law.alpha
Esempio n. 6
0
def get_pareto_data(shape, location, number_of_data):
    """
    function to generate data from Pareto distribution specified by shape (alpha) and location (gamma) parameters
    :param shape: alpha parameter of pareto distribution
    :param location: gamma parameter of pareto distribution
    :param number_of_data: number of data samples to generate
    :return: numpy.ndarray with samples from pareto distribution specified by :param shape and :param location.
    """
    data = pareto.rvs(shape, scale=location, size=number_of_data)
    return data
 def generate_pareto(self, n):
     b = 1.5
     to_return = np.zeros(n, dtype=float)
     for i in range(n):
         while True:
             potential_value = pareto.rvs(b, size=1)
             if potential_value[0] <= 10.0:
                 to_return[i] = potential_value/10.0
                 break
     return to_return
Esempio n. 8
0
def generate_synth_data(anom_mode, noise_percentage):
    if anom_mode == 1:
        n_anom1 = 200
        n_anom2 = 200
        anom_views = []

        for i in range(n_anom1):
            nViews = int(np.random.uniform(low=100, high=1000))
            falseEntries = np.ones((nViews, ))
            anom_views.append(falseEntries)

        # Dirac Delta
        for i in range(n_anom2):
            nViews = int(np.random.uniform(low=100, high=1000))
            complete_prob = np.random.uniform(low=0.8, high=0.9)
            falseEntries = np.random.uniform(low=0.8, high=0.9, size=nViews)
            compl_indexes = np.random.choice(range(falseEntries.shape[0]),
                                             size=int(complete_prob * nViews))
            falseEntries[compl_indexes] = 1.0
            anom_views.append(falseEntries)

    elif anom_mode == 2:
        ## EXPON Param 0.05
        n_anom1 = 400
        anom_views = []

        for i in range(n_anom1):
            nViews = int(np.random.uniform(low=100, high=1000))
            views = expon.rvs(0.05, size=nViews)
            views[views > 1.0] = 1.0
            anom_views.append(views)

    elif anom_mode == 3:
        ##
        n_anom1 = 400
        anom_views = []

        for i in range(n_anom1):
            nViews = int(np.random.uniform(low=100, high=1000))
            views = pareto.rvs(50.0, size=nViews) - 1.0
            views[views > 1.0] = 1.0
            anom_views.append(views)

    anom_views = np.array(anom_views)

    pbar = ProgressBar()
    synth_fisk_params = []
    for i in pbar(range(len(anom_views))):
        synth_single = fit_fisk(anom_views[i], 0)
        synth_fisk_params.append(
            [synth_single[0], synth_single[1][0], synth_single[1][2]])

    print len(synth_fisk_params)
    synth_fisk_params = np.array(synth_fisk_params)
    return synth_fisk_params
Esempio n. 9
0
def KS_MC(a, n_events, n_draws=10000):
    """
    Run MC trials of computing KS D values for data draw from power law with cumulative index a.
    """

    D = []
    for _ in range(n_draws):
        rvs = pareto.rvs(a, size=n_events)
        aML = ML_index_analytic(rvs, 1.)
        cdf = lambda x: pareto.cdf(x, aML)
        D.append(kstest(rvs, cdf)[0])
    return  np.sort(D)
Esempio n. 10
0
    def sample_service_time(self, time):
        """
        Sample next availability for service unit
        """

        if self.service_time_dist == 'expon':
            service_avail_time = time + self.exp_dist()

        elif self.service_time_dist == 'pareto':
            service_avail_time = time + pareto.rvs(self.mean_service_time)

        self.next_service = service_avail_time
Esempio n. 11
0
	def GetBoulders(self, pos, R, distr='exp', alpha=1.1):
		"""
		we choose to base all this on the volume
		possible distributions: 'exp' or 'pareto'
		alpha is the shape parameter for pareto.
		"""
		#new, based on volume
		boul=[]
		area=R**2*pi
		uniform=random.uniform
		nboul=int(self.boulderFreq*area)
		thresh=0.125/1000.0 #minimum volume
		meanR=pow(self.meanBoulderV/pi*3.0/4.0, 1.0/3.0) #from mean volume
		meanV=self.meanBoulderV
		Vols=[]
		#randomly dist.
		if distr=='exp':
			lambd=1./(meanV-thresh) #for exponential
			for i in range(nboul):
				vol=thresh+random.expovariate(lambd)
				Vols.append(vol)
		elif distr=='pareto':
			xm=(alpha-1)/alpha*(meanV-thresh) #pareto
			for i in range(nboul):
				vol=thresh+pareto.rvs(alpha, scale=xm)
				Vols.append(vol)
		else:
			raise Exception('unknown distribution %s'%str(distr))
		Vols=sorted(Vols, key=lambda vol: -vol) #sort, biggest first to get it in there..
		for vol in Vols:
			placed = False
			radius=pow(vol/pi*3.0/4.0, 1.0/3.0)
			#end of the new stuff
			#print "A: %f n:%f %f %f %f"%(area, nboul, i, nboul, radius)
			i=0
			while not placed:
				placed=True
				z=-uniform(0+radius,0.2+radius)
				x=uniform(pos[0]-R-radius/2., pos[0]+R+radius/2.)
				y=uniform(pos[1]-R-radius/2., pos[1]+R+radius/2.)
				for o in boul:
					if sqrt(pow(x-o.pos[0],2)+pow(y-o.pos[1],2)+pow(z-o.z,2))<(radius+o.radius):
						placed=False
						#print "i=%f nboul=%f fcould not place boulder b.x:%f b.y:%f b.z: %f x:%f y:%f z:%f"%(i,nboul,o.pos[0],o.pos[1], o.z,x,y,z)
						break
				if placed:
					boul.append(Boulder([x,y],radius,z))
				i+=1
				if i>50: 
					print "could not place all stones.. frequency/area is obviously too big."
					break
		return boul
Esempio n. 12
0
		def oracle():
			rs = pareto.rvs(2./3,size=BATCH_SIZE)
			# mask for which of the points to sample from [0,1] versus from pareto
			signs = np.random.random(BATCH_SIZE) < 1/2.
			# choose a random subset of the batch of samples to come from the "augmented" part of the pareto
			unit_interval_points = np.random.random(BATCH_SIZE)
			rs = rs * signs + unit_interval_points * (1 - signs)
			ids = np.random.choice(len(weights), BATCH_SIZE, p=weights)
			v = random_unit_vectors()
			xs = v[:,0] * rs + np.array(centers)[ids]
			ys = v[:,1] * rs
			out = np.vstack((xs,ys)).T
			return out
Esempio n. 13
0
def get_number_records_pareto_discrete_vs_continuous(b=4, power=6):
    size = 10**power
    sample = pareto.rvs(b=b, size=size)
    fig = plt.figure(figsize=(8, 4), dpi=200)
    ax = fig.add_subplot(111)
    ax.scatter(range(0, size),
               sample,
               c='blue',
               alpha=0.25,
               s=0.5,
               label="realization")
    record_indexes = GeneralUtils.get_record_indexes(sample)
    ax.scatter(record_indexes,
               sample[record_indexes],
               c='green',
               marker='*',
               label="records = {0}".format(len(record_indexes)))
    ax.legend()
    message = 'Continuous Pareto distribution, alpha={alpha}, n=10^{power}'.format(
        alpha=b, power=power)
    plt.title(message)
    plt.show()

    # hill_estimator(sample)

    _integer_sample = np.ceil(sample)
    fig2 = plt.figure(figsize=(8, 4), dpi=200)
    ax = fig2.add_subplot(111)
    ax.scatter(range(0, size),
               _integer_sample,
               c='blue',
               alpha=0.25,
               s=0.5,
               label="realization")
    integer_record_indexes = GeneralUtils.get_record_indexes(_integer_sample)
    ax.scatter(integer_record_indexes,
               _integer_sample[integer_record_indexes],
               c='green',
               marker='*',
               label="records = {0}".format(len(integer_record_indexes)))
    ax.legend()
    message = 'Discretized Pareto distribution, alpha={alpha}, n=10^{power}'.format(
        alpha=b, power=power)
    plt.title(message)
    plt.show()

    # hill_estimator(_integer_sample)

    return sample, len(record_indexes), _integer_sample, len(
        integer_record_indexes)
Esempio n. 14
0
 def grant_wealth(self, person_ids, bank: Bank, transaction_date):
     """
     assign an initial amount of starting wealth per person
     """
     accounts = query_accounts_by_person_id(person_ids, bank.name, 'cash')
     accounts['transaction_amount'] = pareto.rvs(
         b=1,
         scale=pm.person_params['income'],
         size=accounts.shape[0],
     )
     accounts = accounts[['account_id', 'transaction_amount']]
     accounts = accounts.rename(columns={'account_id': 'debit_account'})
     accounts['credit_account'] = bank.liability_account
     accounts['transaction_date'] = transaction_date
     bank.make_transactions(accounts)
Esempio n. 15
0
def _generate_KS_cube():
    """
    Generate a grid of D values for KS tests of power-law behavior.
    """
    a_grid = np.arange(0.2, 2, 0.05)
    n_grid = [3, 4, 5, 6, 7, 8, 9, 10, 12, 15, 20, 25, 30, 40, 50, 75, 100, 125, 150, 200]
    m = 1000
    Dcube = np.zeros([len(a_grid), len(n_grid), m], dtype='f4')
    for i, a in enumerate(a_grid):
        for j, n in enumerate(n_grid):
            D = []
            for k in range(m):
                rvs = pareto.rvs(a, size=n)
                aML = ML_index_analytic(rvs, 1.)
                cdf = lambda x: pareto.cdf(x, aML)
                D.append(kstest(rvs, cdf)[0])
            Dcube[i,j] = np.sort(D)
    np.save(_path_ks_grid, np.array((a_grid, n_grid, Dcube)))
Esempio n. 16
0
    def distribution_generator(flag, para_pow, para_normal, para_zip, t):
        if flag == "power_law":
            #            dist = np.random.pareto(para_pow, t)
            dist = pareto.rvs(para_pow, size=t) - 1
            dist = dist / max(dist)


#            dist = 1 - np.random.power(para_pow, t) # R^{k}
#            dist = np.random.uniform(0,1,t)
#            dist = (dist ** para_pow)
        elif flag == "uniform":
            dist = np.random.uniform(0, 1, t)
        elif flag == "normal":
            dist = np.random.normal(0.5, para_normal, t)
        elif flag == "zipfian":
            dist = np.random.zipf(para_zip, t)

        return dist
Esempio n. 17
0
def generate_flows(num_flows):
    print "Generating flows..."
    all_flows = Queue.Queue()
    inter_arrivals = np.random.exponential(1.0/lamb, num_flows)
    flow_lengths = pareto.rvs(shape, scale=scale, size=num_flows)
    if debug_flag:
        print "average flow length:", sum(flow_lengths)/num_flows
    prev_time = 0
    for i in range(num_flows):
        curr_time = prev_time + inter_arrivals[i]
        flow = Flow(curr_time, int(8*packet_size*flow_lengths[i]), inter_arrivals[i])
        all_flows.put(flow)
        prev_time = curr_time
        if debug_flag:
            print "flow created: (%.8f, %d)." % (flow.arrival, flow.packet_length)
    max_packets = int(max(flow_lengths))
    print "Finished flow generation."
    print
    return inter_arrivals, all_flows, max_packets
Esempio n. 18
0
def simulate(algorithms, a, alpha, T, trials):
    cum_regret = np.zeros((len(algorithms), T + 1))
    for trial in range(trials):
        inst_regret = np.zeros((len(algorithms), T + 1))
        for alg in algorithms:
            alg.initialize()

        for t in range(1, T + 1):
            for i, alg in enumerate(algorithms):
                idx = alg.output()
                arm = alg.active_arms[idx]
                inst_regret[i, t] = min(abs(arm - 0.4), abs(arm - 0.8))
                y = a - min(
                    abs(arm - 0.4),
                    abs(arm - 0.8)) + pareto.rvs(alpha) - alpha / (alpha - 1)
                alg.observe(t, y)

        cum_regret += np.cumsum(inst_regret, axis=-1)
    return cum_regret / trials
Esempio n. 19
0
def generate_flows(num_flows):
    print "Generating flows..."
    all_flows = Queue.Queue()
    inter_arrivals = np.random.exponential(1.0 / lamb, num_flows)
    flow_lengths = pareto.rvs(shape, scale=scale, size=num_flows)
    if debug_flag:
        print "average flow length:", sum(flow_lengths) / num_flows
    prev_time = 0
    for i in range(num_flows):
        curr_time = prev_time + inter_arrivals[i]
        flow = Flow(curr_time, int(8 * packet_size * flow_lengths[i]),
                    inter_arrivals[i])
        all_flows.put(flow)
        prev_time = curr_time
        if debug_flag:
            print "flow created: (%.8f, %d)." % (flow.arrival,
                                                 flow.packet_length)
    max_packets = int(max(flow_lengths))
    print "Finished flow generation."
    print
    return inter_arrivals, all_flows, max_packets
def paretoF(sizeSamples, Ex, Dx):
    n = sizeSamples
    #генерация выборок
    values = np.array([ pareto.rvs(k, size=n) for x in range(1000)])
    #вычисление выборочных средних
    meanVal = values.mean(axis = 1)
    plt.hist(meanVal, normed=True, alpha=0.5, label='hist mean n ' + str(n))
    
    #мат. ожидание м sigma нормального распределения
    mu = Ex
    sigma = math.sqrt(Dx/n)
    print 'мат. ожидание=' , mu
    print 'sigma=' , sigma
    # зададим нормальное распределенние
    norm_rv = sts.norm(loc=mu, scale=sigma)
    x = np.linspace(0.5,2,100)
    pdf = norm_rv.pdf(x)
    plt.plot(x, pdf, 'r-', lw=3, alpha=0.7, label='pareto pdf n ' + str(n))
    plt.ylabel('samples')
    plt.xlabel('$x$')
    plt.legend(loc='best')
Esempio n. 21
0
def paretoF(sizeSamples, Ex, Dx):
    n = sizeSamples
    #генерация выборок
    values = np.array([pareto.rvs(k, size=n) for x in range(1000)])
    #вычисление выборочных средних
    meanVal = values.mean(axis=1)
    plt.hist(meanVal, normed=True, alpha=0.5, label='hist mean n ' + str(n))

    #мат. ожидание м sigma нормального распределения
    mu = Ex
    sigma = math.sqrt(Dx / n)
    print 'мат. ожидание=', mu
    print 'sigma=', sigma
    # зададим нормальное распределенние
    norm_rv = sts.norm(loc=mu, scale=sigma)
    x = np.linspace(0.5, 2, 100)
    pdf = norm_rv.pdf(x)
    plt.plot(x, pdf, 'r-', lw=3, alpha=0.7, label='pareto pdf n ' + str(n))
    plt.ylabel('samples')
    plt.xlabel('$x$')
    plt.legend(loc='best')
Esempio n. 22
0
def dispatch_rvs(alpha, xmin, xmax, discrete, size=1, random_state=None):
    if discrete:
        if np.isinf(xmax):
            ll = genzipf.rvs(alpha, xmin, size=size, random_state=random_state)
        else:
            ll = truncated_zipf.rvs(alpha,
                                    xmin,
                                    xmax,
                                    size=size,
                                    random_state=random_state)
    else:
        if np.isinf(xmax):
            ll = pareto.rvs(alpha - 1,
                            scale=xmin,
                            size=size,
                            random_state=random_state)
        else:
            ll = truncated_pareto.rvs(alpha - 1,
                                      float(xmax) / xmin,
                                      scale=xmin,
                                      size=size,
                                      random_state=random_state)
    return ll
Esempio n. 23
0
    def iperfFM(self, poisson_mean=100,r_int=0.5, r_tcp=0.8, last_time=1.7, t_threshold=8, size_1=4, size_2=1, base_port=5001):
        '''
        use iperf to generate flow model
        '''
        generate_flows = poisson.rvs(poisson_mean,size=30)
        for n in range(len(generate_flows)):
            flows_num = generate_flows[n]
            print flows_num
            for i in range(flows_num):
                # is_interior = True if bernoulli.rvs(r_int,size=1)[0]==1 else False
                client = random.choice(self.hosts[0:5])
                server = client
                # if not is_interior:
                while server == client:
                    server = random.choice(self.hosts)
                is_tcp =  True if bernoulli.rvs(r_tcp,size=1)[0]==1 else False
                flow_t = pareto.rvs(b=last_time,scale=1,size=1)[0] # b: shape parameter
                if flow_t < t_threshold:
                    flow_s = weibull_min.rvs(c=size_1,scale=5,size=1)[0] # c: shape parameter
                else:
                    flow_s = weibull_min.rvs(c=size_2,scale=1,size=1)[0]
                if is_tcp:
                    # self._iperfSingleTCP(hosts=[client,server], )
                    if flow_t < t_threshold:
                        self._iperfSingleTCPN(hosts=[client,server],bytes=str(flow_s)+'K',port=base_port) # hosts=None, bytes='10K', port=5001
                    else:
                        self._iperfSingleTCPN(hosts=[client,server],bytes=str(flow_s)+'M',port=base_port)

                else:
                    # self._iperfSingleUDP(hosts=[client,server],)
                    if flow_t < t_threshold:
                        self._iperfSingleUDPN(hosts=[client,server],bytes=str(flow_s)+'K',port=base_port)
                    else:
                        self._iperfSingleUDPN(hosts=[client,server],bytes=str(flow_s)+'M',port=base_port)
                base_port = random.randint(base_port,base_port+500)
                sleep(0.1)
        print 'iperfFM test has done'
Esempio n. 24
0
def sim(n, r, k, f, d):
    '''
    function for creating values for hill estimator.
    n = sample size
    r = number of estimates
    k = parameter for hill estimator
    f = file path for estimates
    d = specifies the distribution
    '''
    hill_est = np.array([])

    if d == "pareto":
        for _ in range(0, r):
            X = pareto.rvs(3, size=n)
            hill_est = np.append(hill_est, hill(X, k))

    if d == "cauchy":
        for _ in range(0, r):
            X = cauchy.rvs(1, size=n)
            hill_est = np.append(hill_est, hill(X, k))

    estimates = pd.DataFrame(hill_est, columns=np.array([d]))
    estimates.to_csv("%s/%s-%s-%s-%s.csv" % (f, n, r, k, d),
                     sep=",", encoding="utf-8")
Esempio n. 25
0
    yList = []
    step = 0
    xList = sorted(set(data))
    dataLen = len(data)
    for elem in xList:
        count = data.count(elem)
        step += count
        yList.append(step / dataLen)
    return xList, yList

def print_quantile(n):
    quan = [0.1, 0.5, 0.7]
    for qq in quan:
        print("level =", qq, "n =", n, end=": ")
        res = []
        for i in range(5):
            data = np.zeros(n)
            for iteration in range(n):
                xi = np.random.rand()
                r = xm / xi ** (1 / a)
                data[iteration] = r
            x, y = efr(data.tolist())
            res.append(quantile(x,y, qq))
        print(res)

[a, xm] = [5, 1]
N = [5, 10, 100, 1000, 10000]
for n in N:
    print_quantile(n)
print("Theoretical quantiles: ", np.quantile(pareto.rvs(a, scale=xm,size=10**5), [0.1, 0.5, 0.7]))
#теоретическая плотность распределения случайной величины
left = pareto.ppf(0.01, k)
right = pareto.ppf(0.99, k)
x = np.linspace(left, right, 100)
plt.plot(x, pareto.pdf(x, k), 'r-', lw=5, alpha=0.7, label='pareto pdf')
plt.legend(loc='best')

# In[150]:

# values = np.array([pareto.rvs(k, size=10) for x in range(10)])
# print values
# plt.hist(values.mean(axis=1), normed=True)

m = []
for _ in xrange(20):
    m.append(np.mean(pareto.rvs(k, size=1000)))
# plt.hist(m, normed=True, alpha=0.5, label='hist samples')

mean = pareto.mean(k)
EX = mean
print mean
std = pareto.std(k)
print std
DX = std**2
print DX

n = 50
values = np.array([pareto.rvs(k, size=n) for x in range(1000)])
# print 'values ', values
# print 'mean ', values.mean(axis = 1)
meanAr = values.mean(axis=1)
Esempio n. 27
0
File: test_RLS.py Progetto: RJTK/LMS
def tracking_example4():
  '''
  Shows the RLS algorithm tracking a process with fat tailed noise.
  We compare performance with and without a clamped input range.

  Obviously, simply clipping the input range is a pretty naive method
  for dealing with fat tailed noise.
  '''
  np.random.seed(2718)

  N = 1000 #Length of data
  lmbda = 0.98 #Forgetting factor
  p = 6 #Filter order

  #Filter for generating d(n)
  b = [1, -0.5, .3]
  a = [1, 0.2, 0.16, -0.21, -0.0225]
  sv2 = .25 #Innovations noise variance
  beta = 1.25
  psv = 0.025

  t = np.linspace(0, 1, N)
  f = 2
  v = 2*np.sin(2*np.pi*f*t) + \
      gaussian.rvs(size = N, scale = math.sqrt(sv2)) #Innovations
  d = lfilter(b, a, v) #Desired process
  d = d + pareto.rvs(beta, size = N, scale = math.sqrt(psv)) #fat tailed noise

  def clamp(x, M, m):
    return M*(x >= M) + m*(x <= m) + x*(x < M and x > m)

  M = 4.
  m = -4.
  d_clamp = np.array([clamp(di, M, m) for di in d])

  #Initialize RLS filter and then
  #Get function closure implementing 1 step prediction

  #-------CLAMPED INPUT-----------
  F = RLS(p = p, lmbda = lmbda)
  ff_fb = one_step_pred_setup(F)

  d_hat_clamp = np.array([0] + [ff_fb(di) for di in d_clamp])[:-1]
  err_clamp = (d - d_hat_clamp)
  MSE_avg_clamp = np.average(abs(err_clamp)**2)

  #--------UNCLAMPED INPUT---------
  F = RLS(p = p, lmbda = lmbda)
  ff_fb = one_step_pred_setup(F)

  #Run it through the filter and get the error
  d_hat = np.array([0] + [ff_fb(di) for di in d])[:-1]
  err = (d - d_hat)
  MSE_avg = np.average(abs(err)**2)

  plt.subplot(2,1,1)
  plt.plot(range(N), d, linewidth = 1, linestyle = ':',
           label = 'True Process')
  plt.plot(range(N), d_clamp, linewidth = 1, linestyle = '--',
           label = 'Clamped Process')
  plt.plot(range(N), d_hat, linewidth = 1, label = 'Prediction')
  plt.plot(range(N), d_hat_clamp, linewidth = 1, label = 'Prediction (clamped)')
  plt.legend()
  plt.xlabel('$n$')
  plt.ylabel('Process Value')
  plt.title('RLS tracking a process ' \
            '$\\lambda = %s$, $p = %d$' % (lmbda, p))

  plt.subplot(2,1,2)
  plt.plot(range(N), err, linewidth = 2, label = 'err')
  plt.plot(range(N), err_clamp, linewidth = 2, label = 'err (clamped)')
  plt.hlines(MSE_avg, 0, N, linestyle = '--', label = 'MSE', linewidth = 3,
             color = 'r')
  plt.hlines(MSE_avg_clamp, 0, N, linestyle = '--', label = 'MSE (clamped)',
             linewidth = 3, color = 'y')
  plt.legend()
  plt.xlabel('$n$')
  plt.ylabel('Error')
  plt.title('Prediction Error')

  plt.show()
  return
 def normal_and_pareto():
     return [norm.rvs(size=1)[0], pareto.rvs(b, size=1)[0]]
Esempio n. 29
0
import butools
import butools.fitting as bfit
import butools.trace as btrace
import butools.ph as bph
from scipy.stats import erlang
from scipy.stats import pareto
import numpy as np
import matplotlib.pyplot as plt

butools.verbose = True

# generate 10000 samples from a Pareto distribution
tr = pareto.rvs(3, size=10000)

# plot the empirical CDF of the samples
(xt, yt) = btrace.CdfFromTrace(tr)
plt.plot(xt, yt, label='Trace')

# try fitting using 3, 5, and 7 states
nstates = [3, 5, 7]
for ns in nstates:

    # use phase type distributions to fit this trace
    # tr: the trace (samples) to be fitted
    # ns: number of transient states to use
    alpha, A, logli = bfit.PHFromTrace(tr, ns)
    # alpha: initial probability of the Markov chain
    # A: transition rate matrix between transient states

    # plot the fitted CDF
    intBounds = np.linspace(0, 20, 1000)
Esempio n. 30
0
def generateMonteCarlo(rand_seed, N_MC=1e8, d=2, grid_size=4,
                       alpha=4, independence=False,
                       tau=0, verbose=False, pickle=True):
    """ Approximate the angular measure with Monte Carlo procedure
    Params:
    @N_MC          (int): MC sample size
    @d             (int): dimension
    @grid_size     (int): paving size of the L_inf sphere
    @alpha   (float > 1): Dirichlet concentration param
    @independence (bool): central or axis concentration param
    @tau         (float): min angular region from the axis to avoid on the L_inf sphere
    @verbose      (bool): bool to print output
    @pickle       (bool): dump the generated samples
    Returns:
        None if pickle is True or rectangle (dict) containing the angular measure 
    """
    if independence:
        # extreme features may be large independently
        if alpha:
            alpha_ = np.ones(d)/alpha
        else:
            alpha_ = np.ones(d)/d
    else:
        # extremes features are large simultaneously
        if alpha:
            alpha_ = np.ones(d)*alpha
        else:  
            alpha_ = np.ones(d)*d
    # initialize paving of the L_inf sphere
    grid = np.linspace(tau, 1, num=grid_size+1) 
    # initialize rectangle dictionary
    rectangle = dict()
    # samples radius and norm for MC estimation
    theta = dirichlet.rvs(alpha=alpha_, size=N_MC, random_state=rand_seed)
    R  = pareto.rvs(b=1, size=N_MC, random_state=rand_seed)
    # polar decomposition X = R*theta
    X_MC = R.reshape(-1, 1) * theta
    # norms of all the generated samples
    norm_X_MC = np.linalg.norm(X_MC, axis=1, ord=np.inf)

    # display information
    if verbose:
        print("MC sample size   :", N_MC) 
        print("dimension        :", d)
        print("grid_size of cube:", grid_size)
        print("-------------------------------------------------------")
        
    # loop over the extremes generated samples
    for idx, theta_i in enumerate(theta[norm_X_MC >= 1]):
        # display information
        if idx % int(1e6) == 0:
            if verbose:
                print(idx, time.ctime())
        # current face
        key = str(theta_i.argmax()) + '-'
        # loop over the faces of the current sample
        for l in range(len(theta_i)):
                if l != theta_i.argmax():
                    key += str(np.min(np.where(theta_i[l] / np.linalg.norm(theta_i, ord=np.inf) <= grid )))
        # update value of MC estimate
        if key in rectangle.keys():
            rectangle[key] += d/N_MC
        else:
            rectangle[key] = d/N_MC
    if pickle: # save result
        with open('rectangle.'+'N_MC='+str(np.format_float_scientific(N_MC))+
                  '.d='+str(d)+'.alpha='+str(alpha_)+'.tau='+str(tau)+
                  '.grid_size='+str(grid_size)+'.pickle', 'wb') as handle:
            pickle.dump(rectangle, handle, protocol=pickle.HIGHEST_PROTOCOL)
    else: # return rectangle dictionary
        return rectangle
Esempio n. 31
0
def make_data(rand_seed, n_train, n_test,
              d, tau, quantile=95, kappa=1.,
              alpha_plus=None, alpha_minus=None,
              verbose=False):
    """ Generate data of three different types:
    - Theta       (with known margins)  
    - hat_Theta   (based on the Pareto standardization)
    - hat_Theta_M (based on the Pareto standardization + Truncation)
    Params:
    @rand_seed    (int): random_seed for reproducibility 
    @n            (int): number of samples to generate 
    @d            (int): dimension of the samples
    @tau        (float): angular region to avoid
    @quantile     (int): [0, 100] quantile of the ||\widehat T(X_i)|| to remove samples
    @kappa      (float): multiplicative factor
    alpha_plus  (float): Dirichlet concentration coef for the data labeled +1
    alpha_minus (float): Dirichlet concentration coef for the data labeled -1
    
    Returns:
    @Theta_train      : Angle of train extreme samples based on V = d R * Theta such that R * Theta > 1 
    @hat_Theta_train  : Angle of train extreme samples based on \hat V = \hat T(X)
    @hat_Theta_train_M:
    @Theta_test       : Angle of test  extreme samples based on V = d R * Theta such that R * Theta > 1 
    @hat_Theta_test   : Angle of test  extreme samples based on \hat V = \hat T(X)
    @y_train          : label corresponding to extreme train samples V
    @y_hat_train      : label corresponding to extreme train samples \hat V
    @y_hat_train_M    :
    @y_test           : label of extreme test samples V 
    @y_hat_test       : label of extreme test samples \hat V 
    """
    # threshold for selection extremes
    k = np.sqrt(n_train)    
    # sanity check to have samples on the sphere
    if not (n_train/k) * tau > d or verbose:
        print("Condition (n_train / k) * tau > d is ", (n_train / k) * tau > d)
    n_train = int(n_train/2) # because we generate data labeled +1 and data labeled -1
    n_test  = int(n_test/2) # because we generate data labeled +1 and data labeled -1
    # weights of Dirichlet for data labeled +1
    if alpha_plus:
        alpha_plus_ = np.ones(d)*alpha_plus
    else:
        alpha_plus_ = np.ones(d)/d
    # weights of Dirichlet for data labeled -1
    if alpha_minus:
        alpha_minus_ = np.ones(d)*alpha_minus
    else:  
        alpha_minus_ = np.ones(d)*d
    # Generate angular vectors: train data 
    theta_plus_train  = dirichlet.rvs(alpha=alpha_plus_,
                                      size=n_train,
                                      random_state=rand_seed)    
    theta_minus_train = dirichlet.rvs(alpha=alpha_minus_,
                                      size=n_train,
                                      random_state=rand_seed)
    # Generate angular vectors: test data 
    theta_plus_test   = dirichlet.rvs(alpha=alpha_plus_,  
                                      size=n_test, 
                                      random_state=rand_seed + 700) #+ 700 for test samples
    theta_minus_test  = dirichlet.rvs(alpha=alpha_minus_, 
                                      size=n_test, 
                                      random_state=rand_seed + 700)
    
    # Generate Radius of train data
    R_plus_train   = pareto.rvs(b=1, size=n_train, random_state=rand_seed)
    R_minus_train  = pareto.rvs(b=1, size=n_train, random_state=rand_seed + 123) #+ 123 to change radius
    
    # Generate Radius of test data
    R_plus_test   = pareto.rvs(b=1, size=n_test, random_state=rand_seed + 700)
    R_minus_test  = pareto.rvs(b=1, size=n_test, random_state=rand_seed + 123 + 700)
    
    # Build X = R * theta for train data
    X_plus_train  = R_plus_train.reshape(-1, 1)  * theta_plus_train
    X_minus_train = R_minus_train.reshape(-1, 1) * theta_minus_train
    
    # Build X = R * theta for test data
    X_plus_test  = R_plus_test.reshape(-1, 1)  * theta_plus_test
    X_minus_test = R_minus_test.reshape(-1, 1) * theta_minus_test
    
    # Build V = d * X for train data
    V_plus_train  = d * X_plus_train#[np.min(X_plus_train, axis=1) > 1]
    V_minus_train = d * X_minus_train#[np.min(X_minus_train, axis=1) > 1]
    V_train = np.vstack((V_plus_train, V_minus_train))
    
    is_min_V_train_g_d = np.min(V_train, axis=1) > d 
    
    # Build V = d * X for test data
    V_plus_test  = d * X_plus_test#[np.min(X_plus_test, axis=1) > 1]
    V_minus_test = d * X_minus_test#[np.min(X_minus_test, axis=1) > 1]
    V_test  = np.vstack((V_plus_test, V_minus_test))
    
    # sanity check
    is_min_V_test_g_d = np.min(V_test, axis=1) > d
    
    # Labels for both V_train and V_test 
    y_train = np.hstack((np.ones(n_train), np.zeros(n_train)))
    y_test  = np.hstack((np.ones(n_test),  np.zeros(n_test)))
    
    #label for both train and test for all data
    y_hat_train = np.hstack((np.ones(n_train), np.zeros(n_train)))
    y_hat_test  = np.hstack((np.ones(n_test), np.zeros(n_test)))
    
    # building X_train and X_test
    X_train = np.vstack((X_plus_train, X_minus_train))
    X_test  = np.vstack((X_plus_test, X_minus_test))
    
    # Pareto standardization
    order_X = order(X_train)
    hat_V_train = transform(order_X, X_train)
    hat_V_test  = transform(order_X, X_test)
    
    #Computing norms for train and test
    norm_V_train = np.linalg.norm(V_train, axis=1, ord=np.inf)
    norm_hat_V_train = np.linalg.norm(hat_V_train, axis=1, ord=np.inf)
    
    norm_V_test = np.linalg.norm(V_test, axis=1, ord=np.inf)
    norm_hat_V_test = np.linalg.norm(hat_V_test, axis=1, ord=np.inf) 
    
    # Assessing train samples with norms greater than n / k
    is_extreme_V_train     = norm_V_train >  (2 * n_train) / k # we multiply by 2 cause we divided by 2 before
    is_extreme_hat_V_train = norm_hat_V_train > (2 * n_train) / k
    
    # M value
    M = np.percentile(norm_hat_V_train[is_extreme_hat_V_train], q=quantile)
    if M < 1:
        print("Warning M < 1:", M)
    
    # Assessing train samples with norms smaller than M 
    #(M being a quantile of ||\hat V||[||\hat V|| > n/k])
    is_smaller = norm_hat_V_train <= M 
    
    # Computing angular vectors
    Theta_train = V_train / norm_V_train.reshape(-1, 1)
    Theta_test  = V_test  / norm_V_test.reshape(-1, 1)
     
    hat_Theta_train = hat_V_train / norm_hat_V_train.reshape(-1,1)
    hat_Theta_test  = hat_V_test  / norm_hat_V_test.reshape(-1,1)
    
    # Assessing train samples which are tau far from axes
    is_tau_valid_V_train = np.min(Theta_train, axis=1) > tau
    is_tau_valid_hat_V_train = np.min(hat_Theta_train, axis=1) > tau
    
    # Assessing test samples which are tau far from axes
    is_tau_valid_V_test = np.min(Theta_test, axis=1) > tau
    is_tau_valid_hat_V_test = np.min(hat_Theta_test, axis=1) > tau
    
    # Finding the samples which verify both conditions for F known and F unknown
    #bool_condition_train = (is_extreme_V_train * is_extreme_hat_V_train) * (is_tau_valid_V_train * is_tau_valid_hat_V_train)
    
    #defining the angular train samples on the truncated subspace
    hat_Theta_train_M = hat_Theta_train[is_extreme_hat_V_train * is_tau_valid_hat_V_train * is_smaller ]
    y_hat_train_M = y_hat_train[is_extreme_hat_V_train * is_tau_valid_hat_V_train * is_smaller ]
    
    # defining the angular train samples verifying all conditions    
    Theta_train = Theta_train[is_extreme_V_train * is_tau_valid_V_train]#[bool_condition_train] 
    hat_Theta_train = hat_Theta_train[is_extreme_hat_V_train * is_tau_valid_hat_V_train]
  
    is_extreme_V_test     = norm_V_test >= kappa * (2 * n_train) / k # we multiply by 2 cause we divided by 2 before
    is_extreme_hat_V_test = norm_hat_V_test >= kappa * (2 * n_train) / k
    
    #bool_condition_test  = (is_extreme_V_test  * is_extreme_hat_V_test)  * (is_tau_valid_V_test  * is_tau_valid_hat_V_test)
    
    hat_Theta_test = hat_Theta_test[is_extreme_hat_V_test * is_tau_valid_hat_V_test]
    Theta_test     = Theta_test[is_extreme_V_test * is_tau_valid_V_test]
    
    if verbose:
        # csq of (n_train / k) * tau > d 
        print((is_min_V_train_g_d[is_extreme_V_train * is_tau_valid_V_train]).mean()) 
        print((is_tau_valid_V_train * is_extreme_V_train == is_tau_valid_hat_V_train * is_extreme_hat_V_train).mean())
        
        print("shapes")
        print("Theta_train.shape:",Theta_train.shape)
        print("hat_Theta_train.shape:",hat_Theta_train.shape)
        print("Theta_test.shape:",Theta_test.shape)
        print("hat_Theta_test.shape:",hat_Theta_test.shape)  
    
    # Focusing on extreme samples
    y_train = y_train[is_extreme_V_train * is_tau_valid_V_train]
    y_hat_train = y_hat_train[is_extreme_hat_V_train * is_tau_valid_hat_V_train]
    
    y_test = y_test[is_extreme_V_test * is_tau_valid_V_test]
    y_hat_test = y_hat_test[is_extreme_hat_V_test * is_tau_valid_hat_V_test]
       
    return Theta_train, hat_Theta_train, hat_Theta_train_M, Theta_test, hat_Theta_test, y_train, y_hat_train, y_hat_train_M, y_test, y_hat_test
Esempio n. 32
0
def generateParetoStandardization(rand_seed, n=100, d=2, grid_size=4, alpha=4,
                       independence=False, tau=0,quantile = 66, verbose=False, pickle_=True,):

    """
    Estimate the influence of the Pareto standardization
    - Phi       (with known margins)  
    - hat Phi   (based on the Pareto standardization)
    - hat_Phi_M (based on the Pareto standardization + Truncation)
    Params:
        @rand_seed     (int): random_seed for reproducibility
	@n             (int): number of generated data
        @d             (int): sample size
        @grid_size     (int): the paving size of the L_inf sphere
        @alpha   (float > 1): Dirichlet concentration param
        @independence (bool): central or axis concentration param
        @tau         (float): min angular region from the axis to avoid on the L_inf sphere
        @quantile  ([0,100]): build M such that a ratio quantile of extreme points are kept
        @verbose      (bool): bool to print output
        @pickle       (bool): dump the generated samples
    Returns
        None if pickle is True or rectangle (dict) containing the angular measure 
    """
     ####################################
    # sanity check to make sure that given n is an int
    n = int(n)
    # setting k to define the extreme region thrshld
    k = np.sqrt(n)
    # sanity check 
    if np.sqrt(n)*tau <= d:
        print("error n/k * \tau > d is false:", np.sqrt(n)*tau,  d)
    if independence:
	## extreme features may be large independently
        if alpha:
            alpha_ = np.ones(d) * 1 / (alpha)
        else:
            alpha_ = np.ones(d) * 1/(d)
    else:
	# extreme features are large independently
        if alpha:
            alpha_ = np.ones(d) * (alpha)
        else:  
            alpha_  = np.ones(d) * (d)
    grid = np.linspace(tau, 1, num=grid_size+1) #np.arange(tau, 1+1/grid_size , 1/grid_size) 

    # initialize all rectangle dictionaries
    # true input data
    rectangle_V = dict()
    # Pareto standardized data
    rectangle_hat_V = dict()
    # Pareto standardized data + Truncation
    rectange_hat_V_M = dict()
    
    # sample radius and angular components for simulation study
    theta = dirichlet.rvs(alpha=alpha_, size=n, random_state=rand_seed)
    R  = pareto.rvs(b=1, size=n, random_state=rand_seed)

    X = R.reshape(-1, 1) * theta
    V = d * X#[np.min(X, axis=1) > 1]
    hat_V = transform(order(X), X)
    
    norm_V = np.linalg.norm(V, axis=1, ord=np.inf)
    norm_hat_V = np.linalg.norm(hat_V, axis=1, ord=np.inf)

    is_extreme_V    = norm_V >= n / k
    is_extreme_hatV = norm_hat_V >= n / k
        
    # all samples with norms smaller than a given quantile
    M = np.percentile(norm_hat_V[is_extreme_hatV], q=quantile)
    
    if M < 1:
        print("Warning M < 1:", M)
    is_smaller = norm_hat_V <= M 
    
    is_V_tau_valide = np.min(V / norm_V.reshape(-1, 1), axis=1) >= tau
    is_hat_V_tau_valide = np.min(hat_V / norm_hat_V.reshape(-1,1), axis=1) >= tau
    
    N_removed = np.sum(is_extreme_hatV * is_hat_V_tau_valide) - np.sum(is_extreme_hatV * is_hat_V_tau_valide * is_smaller) 
    
    if verbose:
        print("The dimension of the problem              :", d)
        print("The size of the grid for the cube         :", grid_size)
        print("The number of points to compute phi by MC :", N_MC)
        print("-------------------------------------------------------")

    ## Computing the mass on V = d * X[X > 1]
    for idx, V_i in enumerate(V[is_extreme_V * is_V_tau_valide]):
        key = str(V_i.argmax()) + '-'
        for l in range(len(V_i)):
                if l != V_i.argmax():
                    key += str(np.min(np.where(V_i[l] / np.linalg.norm(V_i, ord=np.inf) <= grid )))
        if key in rectangle_V.keys():
            rectangle_V[key] += 1 / k #* is_tau_valide
        else:
            rectangle_V[key] = 1 / k #* is_tau_valide
        
    #-----------------------------------------------------------------------
    
    ## Computing the mass on hat V with the regular estimator
    for idx, V_i in enumerate(hat_V[is_extreme_hatV * is_hat_V_tau_valide]):
        key = str(V_i.argmax()) + '-'
        for l in range(len(V_i)):
                if l != V_i.argmax():
                    key += str(np.min(np.where(V_i[l] / np.linalg.norm(V_i, ord=np.inf) <= grid )))
        if key in rectangle_hat_V.keys():
            rectangle_hat_V[key] += 1 / k 
        else:
            rectangle_hat_V[key] = 1 / k         
            
     #-----------------------------------------------------------------------
    
    ## Computing the mass on hat V with the truncated estimator
    for idx, V_i in enumerate(hat_V[is_extreme_hatV * is_hat_V_tau_valide * is_smaller]):
        key = str(V_i.argmax()) + '-'
        for l in range(len(V_i)):
                if l != V_i.argmax():
                    key += str(np.min(np.where(V_i[l] / np.linalg.norm(V_i, ord=np.inf) <= grid )))
        if key in rectange_hat_V_M.keys():
            rectange_hat_V_M[key] += (M/(M-1)) * (1 / k) 
        else:
            rectange_hat_V_M[key] =(M/(M-1)) * (1 / k) 
    if pickle_:
        with open('rectangleV.'+'N_MC='+str(np.format_float_scientific(N_MC))+'.d='+str(d)+'.alpha='+str(alpha_)+'.tau='+str(tau)+'.grid_size='+str(grid_size)+'.pickle', 'wb') as handle:
            pickle.dump(rectangle, handle, protocol=pickle.HIGHEST_PROTOCOL)
    else:
        return rectangle_hat_V,rectange_hat_V_M, rectangle_V, N_removed
# In[42]:

import math
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import pareto
import scipy.stats as sts
# get_ipython().magic(u'matplotlib inline')


# In[43]:

# Выбор параметров для распределения
k = 10
#Сгенерируйте из него выборку объёма 1000
sampleRange = pareto.rvs(k, size=1000)
#Постройте гистограмму выборки и нарисуйте поверх неё теоретическую плотность распределения вашей случайной величины.
plt.hist(sampleRange, normed=True, bins=20, alpha=0.5, label='hist samples pareto')
plt.ylabel('number of samples')
plt.xlabel('$x$')

#теоретическая плотность распределения случайной величины
left = pareto.ppf(0.01, k)
right =  pareto.ppf(0.99, k)
x = np.linspace(left,  right, 100)
plt.plot(x, pareto.pdf(x, k), 'r-', lw=5, alpha=0.7, label='pareto pdf')
plt.legend(loc='best')


# In[57]:
Esempio n. 34
0
# Display the probability density function (``pdf``):

x = np.linspace(pareto.ppf(0.01, b), pareto.ppf(0.99, b), 100)
ax.plot(x, pareto.pdf(x, b), 'r-', lw=5, alpha=0.6, label='pareto pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = pareto(b)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = pareto.ppf([0.001, 0.5, 0.999], b)
np.allclose([0.001, 0.5, 0.999], pareto.cdf(vals, b))
# True

# Generate random numbers:

r = pareto.rvs(b, size=1000)

# And compare the histogram:

ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
ax.legend(loc='best', frameon=False)
plt.show()
Esempio n. 35
0
# m = []
# for _ in xrange(20):
#     m.append(np.mean(pareto.rvs(k, size=1000)))
# # plt.hist(m, normed=True, alpha=0.5, label='hist samples')

EX = pareto.mean(k)
print EX
std = pareto.std(k)
print std
DX = std**2
print DX
print
# Для нескольких значений n (например, 5, 10, 50) сгенерируйте 1000 выборок объёма n и
# постройте гистограммы распределений их выборочных средних.
n = 100
values = np.array([ pareto.rvs(k, size=n) for x in range(1000)])
meanVal = values.mean(axis = 1)
plt.hist(meanVal, normed=True, alpha=0.5, label='hist mean n ' + str(n))

mu = EX
sigma = math.sqrt(DX/n)

# зададим нормально распределенную случайную величину
norm_rv = sts.norm(loc=mu, scale=sigma)
x = np.linspace(0.5,2,100)
# print x
pdf = norm_rv.pdf(x)
plt.plot(x, pdf, 'r-', lw=3, alpha=0.7, label='pareto pdf')


plt.show()
Esempio n. 36
0
# hill = HillEstimator(reg_sizes, "Reflexive pareto RW", n)
#
# hill.plot_estimator()
#
# qi = QiEstimator(sizes=reg_sizes, name="Reflexive pareto RW", step_n=n, r=r)
#
# qi.plot_estimator()


def powerlaw_usage():
    n = 100000

    pareto_rw = ParetoReflectiveRandomWalk(step_n=n,
                                           name="Reflexive pareto RW")
    pareto_rw.generate_path()

    reg_sizes = pareto_rw.get_regeneration_block_sizes()

    fit = powerlaw.Fit(reg_sizes)

    return fit.power_law.alpha


# print(powerlaw_usage())

steps = pareto.rvs(1, size=1000)

fit = powerlaw.Fit(steps)

print(fit.power_law.alpha)
Esempio n. 37
0
left = pareto.ppf(0.01, k)
right =  pareto.ppf(0.99, k)
x = np.linspace(left,  right, 100)
plt.plot(x, pareto.pdf(x, k), 'r-', lw=5, alpha=0.7, label='pareto pdf')
plt.legend(loc='best')


# In[150]:

# values = np.array([pareto.rvs(k, size=10) for x in range(10)])
# print values
# plt.hist(values.mean(axis=1), normed=True)

m = []
for _ in xrange(20):
    m.append(np.mean(pareto.rvs(k, size=1000)))
# plt.hist(m, normed=True, alpha=0.5, label='hist samples')

mean = pareto.mean(k)
EX = mean
print mean
std = pareto.std(k)
print std
DX = std**2
print DX

n = 50
values = np.array([ pareto.rvs(k, size=n) for x in range(1000)])
# print 'values ', values
# print 'mean ', values.mean(axis = 1)
meanAr = values.mean(axis = 1)
Esempio n. 38
0
# from scipy.stats.rv_continuous import rvs
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import statistics

from scipy.stats import pareto
from matplotlib import colors as mcolors

colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS)

b = 0.9

np.random.seed(seed=200)
case01 = pareto.rvs(b, loc=0, scale=1, size=5)

np.random.seed(seed=200)
case02 = pareto.rvs(b, loc=0, scale=1, size=5)

assert any(case01 == case02)
'''
rnorm2 <- function(n,mean,sd) { mean+sd*scale(rnorm(n)) }
r <- rnorm2(100,4,1)
mean(r)  ## 4
sd(r)    ## 1
'''
'''
# Define real pars mu and sigma, sample 100x
trueMu <- 5
trueSig <- 2
def nuevo_enlace2(
    i
):  #Se selecciona un nodo y se le conecta a otro cercano. Se perturba la posición del primero.

    global g, delta_omega, b, delta_c

    max_intentos = 1000
    exito = False

    n_nodes = len(g.nodes())
    n_enlaces = len(g.edges())
    max_enalces = 0.5 * n_nodes * (n_nodes - 1.0)
    x0 = g.node[i]['x']
    y0 = g.node[i]['y']

    next_neighbors = []  #J:  primero busco los 2º vecinos
    for ii in nx.neighbors(g, i):
        for nnb in nx.neighbors(g, ii):
            #print('i=',i,'ii=',ii)
            next_neighbors.append(
                nnb)  #J: Solo consideramos enlaces con los 2º vecinos
            #print('next_neigh=',next_neighbors)

    if (n_enlaces < max_enalces) and (len(next_neighbors) > 0):

        #neighbs = nx.neighbors(g,i)
        intentos = 0
        while (exito != True) and (intentos < max_intentos):
            intentos += 1  #J: Hay que dejar siempre una vía de escape en los while
            #i = rd.choice(g.nodes()) #El agente se elige en el update2()

            rad = pareto.rvs(
                b, size=1
            )  #Generate random numbers form a pareto density distribution b/(x^(1+b))
            rad = rad[
                0]  #El comando anterior genera una lista con 1 elemento, que extraemos aquí

            #candidates0 = [nb for nb in g.nodes() if ( ((g.node[nb]['x']-x0)**2 + (g.node[nb]['y']-y0)**2) < rad**2) and (nb != i) ]
            candidates0 = [
                nb for nb in next_neighbors
                if (((g.node[nb]['x'] - x0)**2 +
                     (g.node[nb]['y'] - y0)**2) < rad**2) and (nb != i)
            ]

            candidates = [
                nb for nb in candidates0 if (nb in nx.non_neighbors(g, i))
            ]  #J: antes de elegir vecino comprueba que no tiene enlace
            n_candidates = len(
                candidates)  #se limita por los de su especie y la otra
            #print('i:',i,'n_candidates= ',n_candidates)

            if n_candidates > 0:

                j = rd.choice(candidates)

                if j in nx.non_neighbors(
                        g, i
                ):  #nx.non_neighbors(g,i) proporciona la lista de no-vecinos de i
                    g.add_edge(i, j)
                    exito = True

            #angulo = rd.random()*2*math.pi
            #modulo = rd.random()*delta_c
            #g.node[i]['x'] += modulo*np.cos(angulo)
            #g.node[i]['y'] += modulo*np.sin(angulo)

        return i  #devuelve el valor del nodo al que se ha conectado el nodo recien creado

    else:  #Si el grafo es completo, añadimos un nuevo nodo

        #i = rd.choice(g.nodes()) #J: Si no puede añadir enlace VUELVE
        #nuevo_nodo2(i)
        return i
Esempio n. 40
0
def generate_Anom_data(anom_mode, num_noise_samples, params):
    anom_views = []

    if anom_mode == 1:
        for i in range(num_noise_samples):
            nViews = int(np.random.uniform(low=100, high=1000))
            falseEntries = np.ones((nViews, ))
            anom_views.append(falseEntries)

    elif anom_mode == 2:
        for i in range(num_noise_samples):
            nViews = int(np.random.uniform(low=100, high=1000))
            complete_prob = np.random.uniform(low=0.8, high=0.9)
            falseEntries = np.random.uniform(low=0.8, high=0.9, size=nViews)
            compl_indexes = np.random.choice(range(falseEntries.shape[0]),
                                             size=int(complete_prob * nViews))
            falseEntries[compl_indexes] = 1.0
            anom_views.append(falseEntries)

    elif anom_mode == 3:
        ## EXPON
        for i in range(num_noise_samples):
            nViews = int(np.random.uniform(low=100, high=10000))
            views = expon.rvs(loc=params[0], scale=params[1], size=nViews)
            views[views > 1.0] = 1.0
            anom_views.append(views)

    elif anom_mode == 4:
        ## Pareto
        for i in range(num_noise_samples):
            nViews = int(np.random.uniform(low=100, high=1000))
            views = pareto.rvs(params[0], loc=params[1], size=nViews) - 1.0
            views[views > 1.0] = 1.0
            anom_views.append(views)

    elif anom_mode == 5:
        ## LogNorm
        for i in range(num_noise_samples):
            nViews = int(np.random.uniform(low=100, high=1000))
            views = lognorm.rvs(s=params[0], scale=params[1], size=nViews)
            views[views > 1.0] = 1.0
            anom_views.append(views)

    elif anom_mode == 6:
        ## Weibull_Min
        for i in range(num_noise_samples):
            nViews = int(np.random.uniform(low=100, high=1000))
            views = weibull_min.rvs(params[0], scale=params[1], size=nViews)
            views[views > 1.0] = 1.0
            anom_views.append(views)

    elif anom_mode == 7:
        ## Uniform
        for i in range(num_noise_samples):
            nViews = int(np.random.uniform(low=100, high=10000))
            views = np.random.uniform(low=params[0],
                                      high=params[1],
                                      size=nViews)
            views[views > 1.0] = 1.0
            anom_views.append(views)

    elif anom_mode == 8:
        ## Uniform SHORT
        for i in range(num_noise_samples):
            nViews = int(np.random.uniform(low=100, high=10000))
            views = np.random.uniform(low=params[0],
                                      high=params[1],
                                      size=nViews)
            views[views > 1.0] = 1.0
            anom_views.append(views)

    elif anom_mode == 9:
        ## Gamma Short
        for i in range(num_noise_samples):
            nViews = int(np.random.uniform(low=100, high=10000))
            views = gamma.rvs(params[0], scale=params[1], loc=0.0, size=nViews)
            views[views > 1.0] = 1.0
            anom_views.append(views)

    elif anom_mode == 10:
        for i in range(num_noise_samples):
            nViews = int(np.random.uniform(low=100, high=10000))
            views = 1 - gamma.rvs(
                params[0], scale=params[1], loc=0.0, size=nViews)
            views[views > 1.0] = 1.0
            views[views < 0.0] = 0.0
            anom_views.append(views)

    anom_views = np.array(anom_views)

    pbar = ProgressBar()
    synth_fisk_params = []
    for i in pbar(range(len(anom_views))):
        synth_single = fit_fisk(anom_views[i], 0)
        synth_fisk_params.append(
            [synth_single[0], synth_single[1][0], synth_single[1][2]])

    print len(synth_fisk_params)
    synth_fisk_params = np.array(synth_fisk_params)
    return synth_fisk_params
Esempio n. 41
0
def account_balance():
    """Generate account balances according to a Pareto distribution.
    We should expect balances to be distributed as with other income
    distributions.  The power exponent is chosen here to replicate
    the 80-20 rule."""
    return pareto.rvs(1.161)