def generate_random_exponential_timeseries(dlamb, ndays=100, num=100): """ This function will generate some random exponential 2d data in a time series. parameters: dlamb: float - the exponential lambda parameter ndays: int - number of days in the time series num: int - total number of locations returns: numpy array of coordinates with day index """ a = np.zeros([ndays * num, 3]) b = np.array(range(0, ndays)) b = np.repeat(b, num) a[:, 0] = b for row in a: row[1] = expon.rvs(loc=0, scale=dlamb) row[2] = expon.rvs(loc=0, scale=dlamb) return a
def bcc_sim(S, lambd, mu, simtime): remaining = simtime i = 0 # Estado actual ts = 0 time = np.zeros(S+1) while remaining > 0: if i == 0: T1 = expon.rvs(scale=1./lambd, size=1) T2 = np.inf elif i == S: T1 = np.inf T2 = expon.rvs(scale=1./(i*mu), size=1) else: T1 = expon.rvs(scale=1./lambd, size=1) T2 = expon.rvs(scale=1./(i*mu), size=1) if np.all(T1 < T2): ts = T1 time[i] = time[i] + ts i = i+1 else: ts = T2 time[i] = time[i] + ts i = i-1 remaining = remaining - ts[0] progress = (simtime - remaining) / simtime # print "{0}% --> {1} remaining".format(progress*100.0, remaining) return time/simtime
def poisson_rate(exposuretime: u.s, geomarea: u.cm**2) -> u.s: '''Generate Poisson distributed times. Parameters ---------- exposuretime : `~astropy.units.quantity.Quantity` Exposure time geomarea : `~astropy.units.quantity.Quantity` Geometric opening area of telescope Returns ------- times : `~astropy.units.quantity.Quantity` Poisson distributed times. ''' fullrate = rate * geomarea # Make 10 % more numbers then we expect to need, because it's random times = expon.rvs(scale=1./fullrate.to(1 / u.s), size=int((exposuretime * fullrate * 1.1).to(u.dimensionless_unscaled))) # If we don't have enough numbers right now, add some more. while (times.sum() * u.s) < exposuretime: times = np.hstack([times, expon.rvs(scale=1/fullrate.to(1 / u.s), size=int(((exposuretime - times.sum() * u.s) * fullrate * 1.1).to(u.dimensionless_unscaled)))]) times = np.cumsum(times) * u.s return times[times < exposuretime]
def generate_random_exponential_timeseries(dlamb, ndays=100, num=100): """ This function will generate some random exponential 2d data in a time series. parameters: dlamb: float - the exponential lambda parameter ndays: int - number of days in the time series num: int - total number of locations returns: numpy array of coordinates with day index """ a = np.zeros([ndays*num, 3]) b = np.array(range(0, ndays)) b = np.repeat(b, num) a[:,0] = b for row in a: row[1] = expon.rvs(loc=0, scale = dlamb) row[2] = expon.rvs(loc=0, scale = dlamb) return a
def lindley(m=55000, d=5000): ''' Estimates waiting time with m customers, discarding the first d customers Lindley approximation for waiting time in a M/G/1 queue ''' replications = 10 lindley = [] for rep in range(replications): y = 0 SumY = 0 for i in range(1, d): # Random number variable generation from scipy.stats # shape = 0, rate =1, 1 value a = expon.rvs(0, 1) # rate = .8/3, shape = 3 x = erlang.rvs(3, scale=0.8 / 3, size=1) y = max(0, y + x - a) for i in range(d, m): a = expon.rvs(0, 1) # rate = .8/3, shape = 3 x = erlang.rvs(3, scale=0.8 / 3, size=1) y = max(0, y + x - a) SumY += y result = SumY / (m - d) lindley.append(result) return lindley
def fast_sim(x, tH=1 / 70, nodec=5, isi=35., gen_var=1): """ Simulates a dataset with x trials and true hazardrate tH. Does so faster. nodec = minimum points between decisions. Nodec points are shown, after that 'isi' determines decision probability. """ inter_choice_dists = np.cumsum(expon.rvs(scale=1 / (1 / isi), size=10000)) inter_choice_dists = np.array([int(j + nodec + nodec * (np.where(inter_choice_dists == j)[0])) for j in inter_choice_dists]) # adds 5 (nodec) points between every decision inter_choice_dists = inter_choice_dists[inter_choice_dists < x] mus = [] values = [] start = random.choice([0.5, -0.5]) cnt = 0 while cnt < x: i = 1 + int(np.round(expon.rvs(scale=1 / tH))) mus.append([start] * i) values.append(norm.rvs(start, gen_var, size=i)) start *= -1 cnt += i df = pd.DataFrame({'rule': np.concatenate(mus)[:x], 'value': np.concatenate(values)[:x]}) # df.columns = ['rule', 'values'] df.loc[:, 'message'] = 'GL_TRIAL_LOCATION' df.loc[inter_choice_dists, 'message'] = 'decision' df.loc[:, 'index'] = np.arange(len(df)) return df
def sim(): state = 0 p0h_cnt = 0 p1h_cnt = 0 l_p0hat = [] l_p1hat = [] t = 0 while t < 10001: if state == 0: state = 1 ran = int(round(e.rvs(1 / lam))) t += ran p0h_cnt += ran l_p0hat.extend([p0h_cnt / 10000 for _ in range(0, ran)]) l_p1hat.extend([p1h_cnt / 10000 for _ in range(0, ran)]) else: state = 0 ran = int(round(e.rvs(1 / mu))) t += ran p1h_cnt += ran l_p0hat.extend([p0h_cnt / 10000 for _ in range(0, ran)]) l_p1hat.extend([p1h_cnt / 10000 for _ in range(0, ran)]) x = np.arange(10000) / 100 print(len(l_p0hat)) plt.plot(l_p0hat, '-', label='p0hat', markersize=0.1, linewidth=1) plt.plot(l_p1hat, '-', label='p1hat', markersize=0.1, linewidth=1) plt.legend()
def schedule_processes(self): id = 0 while True: waiting_time = expon.rvs(loc=0, scale=self.params['expected_wait']) std_norm = norm.rvs() gas_required = std_norm * self.params[ 'gas_required_std'] + self.params['gas_required_std'] gas_required = max([0, gas_required]) lay_time = expon.rvs(loc=0, scale=self.params['expected_lay_time']) yield self.env.timeout(waiting_time) self.processes.append( Car(id=id, gas_required=gas_required, lay_time=lay_time, gas_station=self.gas_station, env=self.env, params=self.params)) id += 1
def __expon_churn(self): while True: delay = expon.rvs(scale=self.__expon_online_beta) if delay >= self.__expon_online_threshold: delay = min(self.__expon_max_online, max(self.__expon_min_online, delay)) if self._community is None: if __debug__: dprint("expon wants us online for the next ", delay, " seconds") self.log("scenario-expon", state="online", duration=delay) self._community = self.community_class.load_community( self._master_member, *self.community_args, **self.community_kargs ) else: if __debug__: dprint("expon wants us online for the next ", delay, " seconds (we are already online)") self.log("scenario-expon", state="stay-online", duration=delay) yield float(delay) delay = expon.rvs(scale=self.__expon_offline_beta) if delay >= self.__expon_offline_threshold: delay = min(self.__expon_max_offline, max(self.__expon_min_offline, delay)) if self._community is None: if __debug__: dprint("expon wants us offline for the next ", delay, " seconds (we are already offline)") self.log("scenario-expon", state="stay-offline", duration=delay) else: if __debug__: dprint("expon wants us offline for the next ", delay, " seconds") self.log("scenario-expon", state="offline", duration=delay) self._community.unload_community() self._community = None yield float(delay)
def __init__(self, numCustomers=100): """Initializes the simulation.""" self.numCustomers = numCustomers self.customers = [] # initialize State Variables self.clock = 0 self.Idle = 0 self.Busy = 1 self.s1 = self.Idle self.s2 = self.Idle self.q1 = 0 self.q2 = 0 ### additional values to be tracked during simulation # average waiting time for all customers self.averageWaitingTime = 0 self.averageQ1Time = 0 self.averageQ2Time = 0 # avg waiting time for customers who wait self.averageWaitTimeWhoWait = 0 self.averageQ1TimeWait = 0 self.averageQ2TimeWait = 0 # average service time for all customers self.averageServiceTime = 0 self.averageService1Time = 0 self.averageService2Time = 0 # avg interarrival time for all customers self.averageInterarrrivalTime = 0 # avg total system time for all customers self.averageSystemTime = 0 # probability a customer has to wait self.waitProbability = 0 # percentage of time server is idle self.idleProbability = 0 # queue sizes at the moment each customer arrived self.q1sizes = {} self.q2sizes = {} ### generate values for random variables self.interarrivalTimeValues = poisson.rvs(4, 0, size=numCustomers).tolist() self.serviceTime1Values = expon.rvs(6, size=numCustomers).tolist() self.serviceTime2Values = expon.rvs(8, size=numCustomers).tolist() self.balkValues = [random.random() for x in xrange(numCustomers)] # print self.interarrivalTimeValues # print expon.rvs(6, 0, size=numCustomers) # print self.serviceTime2Values # print self.balkValues self.populate() # Ready. Set. Go!
def rvs(self): if not self.size: self.size = randint.rvs(low = self.min_size, high = self.max_size, size = 1) if self.scale: return expon.rvs(loc = self.loc * 0.09, scale = self.scale, size = self.size) else: return expon.rvs(loc = self.loc * 0.09, scale = self.loc * 8.0, size = self.size)
def create_data(n=N_SAMPLES, c=N_CLUSTERS, f=N_FEATURES, param=PARAM, time_str=""): feat_means = np.array([expon.rvs(size=f) for _ in range(c)]) feat_clusters = assign_clusters(f, c, param) cov = np.zeros([f,f]) for ii in range(f): for jj in range(ii,f): if feat_clusters[ii] == feat_clusters[jj]: cv = expon.rvs(size=1) cov[ii,jj] = cv cov[jj,ii] = cv # feat_sds = np.array([expon.rvs(size=f) for _ in range(c)]) cluster_assignments = assign_clusters(n, c, param) cluster_assignments_test = assign_clusters(TEST_SIZE, c, param) features = create_features(feat_means, cov, cluster_assignments) features_test = create_features(feat_means, cov, cluster_assignments_test) labels = cluster_assignments%2 labels_test = cluster_assignments_test%2 fstr = "data/{}_" + time_str np.save(fstr.format("importances"), feat_means) np.save(fstr.format("X"), features) np.save(fstr.format("y"), labels) np.save(fstr.format("X_test"), features_test) np.save(fstr.format("y_test"), labels_test) np.save(fstr.format("cl"), cluster_assignments) np.save(fstr.format("cl_test"), cluster_assignments_test) return features, labels
def random_bow(df_scale=0.1, **kwargs): return ('bow', { 'max_df': 1 - expon.rvs(loc=0, scale=df_scale, size=1).item(), # max % of times a term can be found 'min_df': expon.rvs(loc=0, scale=df_scale, size=1).item(), # min % of times a term can be found # 'max_features': # not sure how to randomly pick this 'binary': bernoulli.rvs(0.2, size=1).item(), # whether to make bow binary })
def __expon_churn(self): while True: delay = expon.rvs(scale=self.__expon_online_beta) if delay >= self.__expon_online_threshold: delay = min(self.__expon_max_online, max(self.__expon_min_online, delay)) if self._community is None: if __debug__: dprint("expon wants us online for the next ", delay, " seconds") self.log("scenario-expon", state="online", duration=delay) self._community = self.community_class.load_community(self._master_member, *self.community_args, **self.community_kargs) else: if __debug__: dprint("expon wants us online for the next ", delay, " seconds (we are already online)") self.log("scenario-expon", state="stay-online", duration=delay) yield float(delay) delay = expon.rvs(scale=self.__expon_offline_beta) if delay >= self.__expon_offline_threshold: delay = min(self.__expon_max_offline, max(self.__expon_min_offline, delay)) if self._community is None: if __debug__: dprint("expon wants us offline for the next ", delay, " seconds (we are already offline)") self.log("scenario-expon", state="stay-offline", duration=delay) else: if __debug__: dprint("expon wants us offline for the next ", delay, " seconds") self.log("scenario-expon", state="offline", duration=delay) self._community.unload_community() self._community = None yield float(delay)
def random_tfidf(df_scale=0.1, **kwargs): return ('tfidf', { 'max_df': 1 - expon.rvs(loc=0, scale=df_scale, size=1).item(), # max # of times a term can be found 'min_df': expon.rvs(loc=0, scale=df_scale, size=1).item(), # min # of times a term can be found # 'max_features': # not sure how to randomly pick this 'binary': bernoulli.rvs(0.2, size=1).item(), # whether to make bow binary 'norm': np.random.choice(['l2', 'l1', None], p=[0.8, 0.15, 0.05]), # how to normalize the vectors })
def __init__(self): self.state=estats.GeneradorInactiu self.index=-1 self.tempsArribada = expon.rvs(size=configuracio.aliments_a_processar, loc=configuracio.loc_temps_entre_arribades, scale=configuracio.scale_temps_entre_arribades) for i in range(len(self.tempsArribada)): while self.tempsArribada[i] < 0: self.tempsArribada[i] = expon.rvs(size=1, loc=configuracio.loc_temps_entre_arribades, scale=configuracio.scale_temps_entre_arribades) self.tempsArribada[i] = round(self.tempsArribada[i], 2)
def BLDGenerator(p,samplesize,lmbd,C): #Going to do a fixed species tree with given probabilities of introgression. outputList=[] for x in range (0,samplesize): if numpy.random.random_sample()<p: outputList.append(expon.rvs()*lmbd) else: outputList.append(expon.rvs()*lmbd+(C-truncexpon.rvs(C))*lmbd) # print (expon.rvs()*lmbd+(C-truncexpon.rvs(C))*lmbd) return [[('testA','testB','testC'),outputList,[],[]]]
def _resimulate(node, factor=1.0, skewed_admixture_prior=False): if node[2] is not None: if skewed_admixture_prior: node[2] = linear_distribution.rvs() else: node[2] = uniform.rvs() if node[3] is not None: node[3] = expon.rvs() / factor if node[4] is not None: node[4] = expon.rvs() / factor return node
def driving_process(params, step, sL, s): ''' Driving process for adding new participants (their funds) and new proposals. ''' arrival_rate = 10 / (1 + sentiment) rv1 = np.random.rand() new_participant = bool(rv1 < 1 / arrival_rate) supporters = get_edges_by_type(s['network'], 'support') len_parts = len(get_nodes_by_type(s['network'], 'participant')) #supply = s['supply'] expected_holdings = .1 * supply / len_parts if new_participant: h_rv = expon.rvs(loc=0.0, scale=expected_holdings) new_participant_holdings = h_rv else: new_participant_holdings = 0 network = s['network'] affinities = [network.edges[e]['affinity'] for e in supporters] median_affinity = np.median(affinities) proposals = get_nodes_by_type(network, 'proposal') fund_requests = [ network.nodes[j]['funds_requested'] for j in proposals if network.nodes[j]['status'] == 'candidate' ] funds = s['funds'] total_funds_requested = np.sum(fund_requests) proposal_rate = 1 / median_affinity * (1 + total_funds_requested / funds) rv2 = np.random.rand() new_proposal = bool(rv2 < 1 / proposal_rate) #sentiment = s['sentiment'] funds = s['funds'] scale_factor = funds * sentiment**2 / 10000 if scale_factor < 1: scale_factor = 1 #this shouldn't happen but expon is throwing domain errors if sentiment > .4: funds_arrival = expon.rvs(loc=0, scale=scale_factor) else: funds_arrival = 0 return ({ 'new_participant': new_participant, 'new_participant_holdings': new_participant_holdings, 'new_proposal': new_proposal, 'funds_arrival': funds_arrival })
def generate_parametric_services(new_ai, sol_m1, sol_m2): # ti are the same new_si = [] for a_new in new_ai: if a_new == 1: new_si.append(expon.rvs(scale=1 / sol_m2)) else: new_si.append(expon.rvs(scale=1 / sol_m1)) #print("New average S_i: ", np.mean(new_si)) return new_si
def InitialRepu(dist): if dist == "no": temp = 0 elif dist == "norm": temp = norm.rvs(loc=0.5, scale=0.15) while temp < 0 or 1 < temp: temp = norm.rvs(loc=0.5, scale=0.5) elif dist == "exp": temp = expon.rvs(scale=0.3) while temp < 0 or 1 < temp: temp = expon.rvs(scale=0.3) return temp
def supermarket_log(starting_time, finish_time, warehouse, file): # one day operation """ Simulating one day of restock and sells in a supermarket, the events in the supermarket follow an exponential distribution with an average time between events of 5 minutes. Each time that an event occur, the next event (restock or sell) is chosen with a binomial distribution where a sell has probability 0.65 and a restock 0.35. When a client buy a product, that product is selected randomly uniformly, whereas the quantity is chosen from a binomial with n=(max quantity of the product chosen) and p=0.15 We are making one restock at once, each time that a restock is made the product selected is randomly uniformly chosen, meanwhile the quantity of the product to restock is chosen from a binomial where n=(max quantity allowed in shelves), p=0.65 Parameters ---------- starting_time: supermarket opening time finish_time: supermarket closing time warehouse: class Warehouse where our products catalog is saved, we need this information to know products and their codes in our supermarket file: file path in which save our daily log """ log = [] last_hour = starting_time while last_hour < finish_time: # our loop finish when the last transaction has passed finish_time if binom.rvs(1, 0.65): product_chosen = list( warehouse.products.keys())[randint.rvs(1, 19) - 1] last_hour += timedelta(minutes=float(expon.rvs(scale=5, size=1))) aux = [ 'venta', last_hour, product_chosen, binom.rvs(n=warehouse[product_chosen][0], p=0.15, loc=1) ] log.append(aux) else: last_hour += timedelta(minutes=float(expon.rvs(scale=5, size=1))) product_chosen = list(warehouse.products.keys())[randint.rvs( 0, len(amazon.products) - 1)] log.append([ 'repo', last_hour, product_chosen, binom.rvs(n=warehouse[product_chosen][0], p=0.65, loc=1) ]) with open(file, 'w') as f: text = "" for el in log: text += el[0] + ' ' + format_date(el[1]) + " " + el[2] + " " + str( el[3]) + "\n" f.write(text)
def handle_xml_request(self, msg): if not 'http://' in msg.get_payload(): raise ValueError('url_mpd parameter should starts with http://') url_tokens = msg.get_payload().split('/')[2:] port = '80' host_name = url_tokens[0] path_name = '/' + '/'.join(url_tokens[1:]) mdp_file = '' try: connection = http.client.HTTPConnection(host_name, port) connection.request('GET', path_name) mdp_file = connection.getresponse().read().decode() connection.close() except Exception as err: print('> Houston, we have a problem!') print(f'> trying to connecto to: {msg.get_payload()}') print( f'Execution Time {self.timer.get_current_time()} > msg obj: {msg}' ) print(err) exit(-1) msg = Message(MessageKind.XML_RESPONSE, mdp_file) msg.add_bit_length(8 * len(mdp_file)) parsed_mpd = parse_mpd(msg.get_payload()) self.qi = parsed_mpd.get_qi() increase_factor = 1 low = round(self.qi[len(self.qi) - 1] * increase_factor) medium = round(self.qi[(len(self.qi) // 2) - 1] * increase_factor) high = round(self.qi[0] * increase_factor) self.traffic_shaping_values.append( expon.rvs(scale=1, loc=low, size=1000, random_state=self.traffic_shaping_seed)) self.traffic_shaping_values.append( expon.rvs(scale=1, loc=medium, size=1000, random_state=self.traffic_shaping_seed)) self.traffic_shaping_values.append( expon.rvs(scale=1, loc=high, size=1000, random_state=self.traffic_shaping_seed)) self.send_up(msg)
def rvs(self): if not self.size: self.size = randint.rvs(low=self.min_size, high=self.max_size, size=1) if self.scale: return expon.rvs(loc=self.loc * 0.09, scale=self.scale, size=self.size) else: return expon.rvs(loc=self.loc * 0.09, scale=self.loc * 8.0, size=self.size)
def __expon_churn(self): while True: delay = expon.rvs(scale=self.__expon_online_beta) if delay >= self.__expon_online_threshold: delay = float(min(self.__expon_max_online, max(self.__expon_min_online, delay))) self.scenario_churn("online", delay) yield delay delay = expon.rvs(scale=self.__expon_offline_beta) if delay >= self.__expon_offline_threshold: delay = float(min(self.__expon_max_offline, max(self.__expon_min_offline, delay))) self.scenario_churn("offline", delay) yield delay
def JumpDiffusionKou(mu, sigma, lam, p, e1, e2, ts, j_): # Simulate a double-exponential process # INPUTS # mu :[scalar] mean parameter of Gausian distribution # sigma :[scalar] standard deviation of Gaussian distribution # lam :[scalar] Poisson intensity of jumps # p :[scalar] binomial parameter of jumps # e1 :[scalar] exponential parameter for the up-jumps # e2 :[scalar] exponential parameter for the down-jumps # ts :[vector] time steps with ts[0]=0 # j_ :[scalar] number of simulations # OPS # x :[matrix](j_ x len(ts)) matrix of simulated paths ## Code tau = ts[-1] # simulate number of jumps n_jump = poisson.rvs(lam * tau, size=(j_)) k_ = len(ts) jumps = zeros((j_, k_)) for j in range(j_): # simulate jump arrival time t = tau * rand(n_jump[j], 1) t = sort(t) # simulate jump size ww = binom.rvs(1, p, size=(n_jump[j], 1)) S = ww * expon.rvs(scale=e1, size=(n_jump[j], 1)) - ( 1 - ww) * expon.rvs(scale=e2, size=(n_jump[j], 1)) # put things together CumS = cumsum(S) for k in range(1, k_): events = npsum(t <= ts[k]) if events: jumps[j, k] = CumS[events - 1] #simulate the arithmetic Brownian motion component d_BM = zeros((j_, k_)) for k in range(1, k_): dt = ts[k] - ts[k - 1] d_BM[:, [k]] = mu * dt + sigma * sqrt(dt) * randn(j_, 1) #put together the arithmetic BM with the jumps x = cumsum(d_BM, 1) + jumps return x
def testExponManyEvents(self): """ generate and fit an exponential distribution with lifetime of 25 make a plot in testExponManyEvents.png """ tau = 25.0 nBins = 400 size = 100 taulist = [] for i in range(1000): x = range(nBins) timeHgValues = np.zeros(nBins, dtype=np.int64) timeStamps = expon.rvs(loc=0, scale=tau, size=size) ts64 = timeStamps.astype(np.uint64) tsBinner.tsBinner(ts64, timeHgValues) param = expon.fit(timeStamps) fit = expon.pdf(x,loc=param[0],scale=param[1]) fit *= size print "i=",i," param[1]=",param[1] taulist.append(param[1]) hist,bins = np.histogram(taulist, bins=20, range=(15,25)) width = 0.7*(bins[1]-bins[0]) center = (bins[:-1]+bins[1:])/2 plt.step(center, hist, where = 'post') plt.savefig(inspect.stack()[0][3]+".png")
def testExponaverage(self): """ generate and fit a histogram of an exponential distribution with many events using the average time to find the fit. The histogram is then saved in testExponaverage.png """ tau = 25.0 nBins = 400 size = 100 taulist = [] for i in range(1000): x = range(nBins) yPlot = funcExpon(xPlot, *popt) timeHgValues = np.zeros(nBins, dtype=np.int64) timeStamps = expon.rvs(loc=0, scale=tau, size=size) ts64 = timeStamps.astype(np.uint64) tsBinner.tsBinner(ts64, timeHgValues) param = sum(timeStamps)/len(timeStamps) fit = expon.pdf(x,param) fit *= size taulist.append(param) hist,bins = np.histogram(taulist, bins=20, range=(15,35)) width = 0.7*(bins[1]-bins[0]) center = (bins[:-1]+bins[1:])/2 #plt.bar(center, hist, align = 'center', width = width) produces bar graph plt.step(center, hist, where = 'post') plt.savefig(inspect.stack()[0][3]+".png")
def generate_random_data_from_dist(param, shape, nrows, ncols): if shape == 'normal': data = norm.rvs(0, param, size=(nrows, ncols)) # link the two sliders and make the param for t dfs (yolked to sample size in other slider) # elif shape=='t': # data = t.rvs(df=ncols-1) elif shape == 'lognormal': data = lognorm.rvs(param, size=(nrows, ncols)) elif shape == 'contaminated chi-squared': # data = chi2.rvs(4, 0, param, size=size) data = chi2.rvs(4, size=(nrows, ncols)) contam_inds = np.random.randint(ncols, size=int(param * ncols)) data[:, contam_inds] *= 10 elif shape == 'contaminated normal': sub_size = round(param * ncols) norm_size = int(ncols - sub_size) standard_norm_values = norm.rvs(0, 1, size=(nrows, norm_size)) contam_values = norm.rvs(0, 10, size=(nrows, sub_size)) #print(standard_norm_values.shape) #print(contam_values.shape) data = np.concatenate([standard_norm_values, contam_values], axis=1) #print(data.shape) elif shape == 'exponential': data = expon.rvs(0, param, size=(nrows, ncols)) return data
def get_data(n): data = np.concatenate( (expon.rvs(scale=1, size=n // 2), skewnorm.rvs(5, loc=3, size=n // 2))) #getting exp dist data #now shuffle the data np.random.shuffle(data) return data
def testExpon(self): """ generate and fit an exponential distribution with lifetime of 25 make a plot in testExpon.png """ tau = 25.0 nBins = 400 size = 100 x = range(nBins) timeHgValues = np.zeros(nBins, dtype=np.int64) timeStamps = expon.rvs(loc=0, scale=tau, size=size) ts64 = timeStamps.astype(np.uint64) tsBinner.tsBinner(ts64, timeHgValues) with warnings.catch_warnings(): warnings.simplefilter("ignore") # Note: this line casus a RuntimeWarning in optimize.py:301 param = expon.fit(timeStamps) fit = expon.pdf(x,loc=param[0],scale=param[1]) fit *= size tvf = timeHgValues.astype(np.double) #tvf[tvf<1] = 1e-3 # the plot looks nicer if zero values are replaced plt.plot(x, tvf, label="data") plt.plot(x, fit, label="fit") plt.yscale('symlog', linthreshy=0.9) plt.xlim(xmax=100) plt.ylim(ymin = -0.1) plt.legend() plt.title("true tau=%.1f fit tau=%.1f"%(tau,param[1])) plt.savefig(inspect.stack()[0][3]+".png")
def simulate(ks,p,tf=10): n = len(ks) X = [1] * n + [0] * n + [p] #state is a vector of n empty sites, n complexes, protein t = 0 history = [(t,X[:])] while t < tf: p = X[-1] print "X:",X rates = [p*ks[i]*X[i] for i in range(n)] + [X[n + i] for i in range(n)] print "rates:",rates master_rate = float(sum(rates)) dt = expon.rvs(1,1/master_rate) print "dt:",dt print "normalized rates:",normalize(rates) j = inverse_cdf_sample(range(len(X)),normalize(rates)) print "chose reaction:",j if j < n: print "forming complex" #update state for complex formation X[j] = 0 X[n+j] = 1 X[-1] -= 1 else: #update state for complex dissociation print "dissolving complex" X[j] = 0 X[j-n] = 1 X[-1] += 1 t += dt history.append((t,X[:])) return history
def sample_multiple_random_variables(self, size: int): """Sample a number of random variables from the distribution. Args: size (int): Number of random variables to be sampled. """ return expon.rvs(scale=self.scale, size=size)
def mutation_signatures_sim_data(N, L, K, output_prefix, mloc=100, mscale=50): # Create the signatures p_alphas = [2./L] * L e_alphas = [1.] * K P = np.random.dirichlet(p_alphas, size=K) E = np.random.dirichlet(e_alphas, size=N) # Sample data samples = [ 'Sample-%s' % (i+1) for i in range(N) ] categories = [ 'C%s' % (i+1) for i in range(L) ] M = np.zeros((N, L), dtype=np.int) for i, (sample, e_i) in enumerate(zip(samples, E)): # Sample the number of mutations for this sample, and the exposures from scipy.stats import expon n_muts = int(expon.rvs(loc=mloc, scale=mscale, size=1)[0]) e_i = np.random.dirichlet(e_alphas) # Generate mutations for _ in range(n_muts): k = np.where(np.random.multinomial(1, pvals=e_i) == 1)[0][0] j = np.where(np.random.multinomial(1, pvals=P[k]) == 1)[0][0] M[i, j] += 1 # Add some Poisson noise M += np.random.poisson(1, size=(N, L)) # Output to file np.save(output_prefix + '-signatures.npy', P) np.save(output_prefix + '-exposure.npy', E) np.save(output_prefix + '-mutation-counts.npy', M) with open(output_prefix + '-mutation-counts.tsv', 'w') as OUT: OUT.write('\t%s\n' % '\t'.join(categories)) for i, sample in enumerate(samples): OUT.write('%s\t%s\n' % (sample, '\t'.join(map(str, M[i]))))
def sim_nd_na(E,N=1000, size_mean=100): """Simulate an exponential-size burst distribution with binomial (nd,na) """ nt = np.ceil(expon.rvs(scale=size_mean, size=N)).astype(int) na = binom.rvs(nt, E) nd = nt - na return nd, na
def sim_nd_na(E, N=1000, size_mean=100): """Simulate an exponential-size burst distribution with binomial (nd,na) """ nt = np.ceil(expon.rvs(scale=size_mean, size=N)).astype(int) na = binom.rvs(nt, E) nd = nt - na return nd, na
def generate_data_samples(MLE_lambda_inv, no_samples): generated_beer_visits = expon.rvs(scale=MLE_lambda_inv, loc=0, size=no_samples) return generated_beer_visits
def MBdist( n, e_photon, thick ): # n: particle number, loct: start point(x-x0), scale: sigma, wl: wavelength,thick: thickness of the cathode assert e_photon > bandgap if e_photon - bandgap - 0.8 <= 0: scale = e_photon - bandgap loct = 0 else: scale = 0.8 loct = e_photon - bandgap - scale data = maxwell.rvs(loc=loct, scale=scale, size=n) data_ene = np.array(data) params = maxwell.fit(data, floc=0) data_v = np.sqrt(2 * data_ene * ec / me) * 10**9 p2D = [] wl = ((19.82 - 27.95 * e_photon + 11.15 * e_photon**2) * 10**-3)**-1 pens = expon.rvs(loc=0, scale=wl, size=n) penss = filter(lambda x: x <= thick, pens) params_exp = expon.fit(pens, floc=0) i = 0 for n in range(len(penss)): phi = random.uniform(0, 2 * math.pi) # initial angular poy = random.uniform(-1 * 10**6, 1 * 10**6) # initial y direction position p2D.append([ penss[i], poy, data_v[i] * math.cos(phi), data_v[i] * math.sin(phi), data_v[i], data[i] ]) #p2D: (z,y,vz,vy,v,ene) i += 1 p2D = np.array(p2D) return params, p2D, penss, params_exp
def p_randomly(params, step, sL, s, **kwargs): commons = s["commons"] sentiment = s["sentiment"] ans = { "new_participant": False, "new_participant_investment": None, "new_participant_tokens": None } arrival_rate = (1 + sentiment) / 10 if probability(arrival_rate): ans["new_participant"] = True # Here we randomly generate each participant's post-Hatch # investment, in DAI/USD. # # expon.rvs() arguments: # # loc is the minimum number, so if loc=100, there will be no # investments < 100 # # scale is the standard deviation, so if scale=2, investments will # be around 0-12 DAI or even 15, if scale=100, the investments will be # around 0-600 DAI. ans["new_participant_investment"] = expon.rvs(loc=0.0, scale=100) ans["new_participant_tokens"] = commons.dai_to_tokens( ans["new_participant_investment"]) return ans
def expon_naive(n_reads=10000, scale=100, D=50): distances = np.zeros(n_reads, dtype=np.int64) for i in tqdm(range(n_reads)): distance = int(expon.rvs(scale=scale)) distances[i] = distance distances_D = distances[distances > D] Lambda = 1 / distances.mean() P = lambda x: Lambda * np.exp(-Lambda * x) quad_value, _ = quad(lambda x: P(x), 0, np.inf) print(f'quad_value: {quad_value}') log_likelihoods = [] d_range = range(0, 2 * D) for d in d_range: log_likelihood = 0 quad_value, _ = quad(lambda x: P(x), d, np.inf) P_d = lambda x: np.where(x <= d, 0, P(x) / quad_value) for distance in distances_D: if distance > d: log_likelihood += np.log(P_d(distance)) else: log_likelihood = -np.inf log_likelihoods.append(log_likelihood) plt.plot(d_range, log_likelihoods) plt.title('Log likelihood for naive estimation') plt.xlabel('d, distance estimate') plt.ylabel('log likelihood for d') plt.show()
def __sample_trend(self, size, bias=0): cursor = 0.0 cum_sum = bias trend = [] while cursor < size: previous = cursor cursor += expon.rvs(scale=self.__lambda) cursor = min(cursor, size) tan = norm.rvs(loc=self.__tan_mean, scale=self.__tan_var) left_idx = np.int(np.floor(previous)) right_idx = np.int(np.floor(cursor)) supp = np.arange(left_idx, right_idx) - previous if len(supp) > 0: trend.append(tan * supp + cum_sum) cum_sum += tan * (cursor - previous) trend = np.hstack(trend) return trend
def generate_dates(population, earliest_date, latest_date, rate): """Produce a sample of events whose frequency is geometric (increasingly common) """ low = datetime.strptime(earliest_date, "%Y-%m-%d").date() high = datetime.strptime(latest_date, "%Y-%m-%d").date() elapsed_days = (high - low).days if rate == "exponential_increase": # We oversample the distribution to trim the long tail of the # exponential function oversample_ratio = 1.5 distribution = (expon.rvs( loc=0, scale=0.1, size=int(population * oversample_ratio)) * elapsed_days).astype("int") distribution = distribution[distribution <= elapsed_days] elif rate == "uniform": distribution = uniform.rvs(size=int(population)) * elapsed_days distribution = distribution.astype("int") else: raise ValueError( "Only exponential_increase and uniform distributions currently supported" ) # And then sample it back down to the requested population size distribution = np.random.choice(distribution, population, replace=False) df = pd.DataFrame(distribution, columns=["days"]) shifts = pd.TimedeltaIndex(df["days"], unit="D") df["d"] = high df["d"] = pd.to_datetime(df["d"]) df["date"] = df["d"] - shifts return df[["date"]]
def testExponOneEvent(self): """ generate and fit an exponential distribution with lifetime of 25 make a plot in testExpon.png """ tau = 25.0 nBins = 400 size = 100 x = range(nBins) timeHgValues = np.zeros(nBins, dtype=np.int64) timeStamps = expon.rvs(loc=0, scale=tau, size=size) ts64 = timeStamps.astype(np.uint64) tsBinner.tsBinner(ts64, timeHgValues) param = expon.fit(timeStamps) fit = expon.pdf(x,loc=param[0],scale=param[1]) fit *= size tvf = timeHgValues.astype(np.double) tvf[tvf<1] = 1e-3 # the plot looks nicer if zero values are replaced plt.plot(x, tvf, label="data") plt.plot(x, fit, label="fit") plt.yscale('log') plt.xlim(xmax=100) plt.ylim(ymin=0.09) plt.legend() plt.title("true tau=%.1f fit tau=%.1f"%(tau,param[1])) plt.savefig(inspect.stack()[0][3]+".png")
def displayFits(self): """ generates two histograms on the same plot. One uses maximum likelihood to fit the data while the other uses the average time. """ tau = 25.0 nBins = 400 size = 100 taulist = [] taulistavg = [] for i in range(1000): x = range(nBins) timeHgValues = np.zeros(nBins, dtype=np.int64) timeStamps = expon.rvs(loc=0, scale=tau, size=size) ts64 = timeStamps.astype(np.uint64) tsBinner.tsBinner(ts64, timeHgValues) param = sum(timeStamps)/len(timeStamps) fit = expon.pdf(x,param) fit *= size taulistavg.append(param) for i in range(1000): x = range(nBins) timeHgValues = np.zeros(nBins, dtype=np.int64) timeStamps = expon.rvs(loc=0, scale=tau, size=size) ts64 = timeStamps.astype(np.uint64) tsBinner.tsBinner(ts64, timeHgValues) param = expon.fit(timeStamps) fit = expon.pdf(x,loc=param[0],scale=param[1]) fit *= size taulist.append(param[1]) hist,bins = np.histogram(taulistavg, bins=20, range=(15,35)) width = 0.7*(bins[1]-bins[0]) center = (bins[:-1]+bins[1:])/2 plt.step(center, hist, where = 'post', label="averagetime", color='g') hist,bins = np.histogram(taulist, bins=20, range=(15,35)) width = 0.7*(bins[1]-bins[0]) center = (bins[:-1]+bins[1:])/2 plt.step(center, hist, where = 'post', label="maxlikelihood") plt.legend() plt.savefig(inspect.stack()[0][3]+".png")
def _rstable0(alpha): U = uniform.rvs(size=1) while True: # generate non-zero exponential random variable W = expon.rvs(size=1) if W != 0: break return np.power(_A(math.pi * U, alpha) / np.power(W, 1.0 - alpha), 1.0 / alpha)
def generate_population(mu, N=1000, max_sigma=0.5, mean_sigma=0.08): """Extract samples from a normal distribution with variance distributed as an exponetial distribution """ exp_min_size = 1./max_sigma**2 exp_mean_size = 1./mean_sigma**2 sigma = 1/np.sqrt(expon.rvs(loc=exp_min_size, scale=exp_mean_size, size=N)) return np.random.normal(mu, scale=sigma, size=N), sigma
def central_limit_theorem(): y = [] n=100 for i in range(1000): r = expon.rvs(scale=1, size=n) rsum=np.sum(r) z=(rsum-n)/np.sqrt(n) y.append(z) plt.hist(y,color='grey') plt.savefig('central_limit_theorem.png')
def poisson_rate(exposuretime): '''Generate Poisson distributed times. Parameters ---------- exposuretime : float Returns ------- times : `numpy.ndarray` Poisson distributed times. ''' # Make 10 % more numbers then we expect to need, because it's random times = expon.rvs(scale=1./rate, size=exposuretime * rate * 1.1) # If we don't have enough numbers right now, add some more. while times.sum() < exposuretime: times = np.hstack([times, expon.rvs(scale=1/rate, size=(exposuretime - times.sum() * rate * 1.1))]) times = np.cumsum(times) return times[times < exposuretime]
def generate_random_exponential_data(dlamb, num = 100): """ This function will generate some random exponential 2d data. There is no time series information here. This is just a one-time bunch of points. parameters: dlamb: float - the exponential lambda parameter num: int - total number of coordinates returns: numpy array of coordinates. """ xcoord = expon.rvs(loc=0, scale = dlamb, size = num) ycoord = expon.rvs(loc=0, scale = dlamb, size = num) return np.asarray(zip(xcoord, ycoord), dtype=np.float64)
def reducedChiHist(self): def funcExpon(x, a, b, c, d): retval = a*np.exp(-b*(x-d)) + c retval[x < d] = 0 return retval tau = 25.0 t0 = 40 nBins = 800 size = 10000 taulist = [] for i in range(100): timeHgValues = np.zeros(nBins, dtype=np.int64) timeStamps = expon.rvs(loc=0, scale=tau, size=size) + t0 ts64 = timeStamps.astype(np.uint64) tsBinner.tsBinner(ts64, timeHgValues) xPoints = [] yPoints = [] for x in range(nBins): y = timeHgValues[x] if y > 2: xPoints.append(x) yPoints.append(y) bGuess = 1/(np.mean(timeStamps)) aGuess = bGuess*(size) cGuess = 0 dGuess = t0 pGuess = [aGuess, bGuess, cGuess, dGuess] xArray = np.array(xPoints) #must be in array for not list to use curve_fit yArray = np.array(yPoints) ySigma = (yArray ** 1/2) popt, pcov = curve_fit(funcExpon, xArray, yArray, p0=pGuess, sigma=ySigma) xPlot = np.linspace(xArray[0], xArray[-1], 1000) yPlot = funcExpon(xPlot, *popt) yPlotAtPoints = funcExpon(xArray, *popt) chi_2 = sum(((yArray-yPlotAtPoints)**2)/ySigma) red_chi_2 = chi_2/(len(yArray)-len(popt)) print red_chi_2 hist,bins = np.histogram(red_chi_2, bins=20) width = 0.7*(bins[1]-bins[0]) center = (bins[:-1]+bins[1:])/2 plt.step(center, hist, where = 'post') plt.savefig(inspect.stack()[0][3]+".png")
def buildMat(_M=1,_N=1,_pConJ=0.1,_pConR=1,_J=1.,_sdJ=0.,_R=10.,_sdR=0.,_dist="bernouilli",_isSym=True,_isUni=False,_type="all"): ####pick values####### minDim=min(_N,_M); if _dist=="bernouilli": temp=np.triu(bernoulli.rvs(_pConJ,size=(_M,_N))); temp[:minDim,:minDim]=temp[:minDim,:minDim]-diag(diagonal(temp))+diag(bernoulli.rvs(_pConR,size=(minDim))); if _dist=="poisson": temp=np.triu(poisson.rvs(_pConJ,size=(_M,_N))); temp[:minDim,:minDim]=temp[:minDim,:minDim]-diag(diagonal(temp))+diag(poisson.rvs(_pConR,size=(minDim))); elif _dist=="uniform": temp=_J+_sdJ*(0.5-np.triu(uniform.rvs(size=(_M,_N)))); temp[:minDim,:minDim]=temp[:minDim,:minDim]-diag(diagonal(temp))+diag(_R+_sdR*(0.5-uniform.rvs(size=(minDim)))); elif _dist=="expon": temp=np.triu(expon.rvs(_J,size=(_M,_N))); temp[:minDim,:minDim]=temp[:minDim,:minDim]-diag(diagonal(temp))+diag(expon.rvs(_R,size=(minDim))); elif _dist=="norm": temp=_sdJ*np.triu(norm.rvs(_J,size=(_M,_N))); temp[:minDim,:minDim]=temp[:minDim,:minDim]-diag(diagonal(temp))+diag(_sdR*norm.rvs(_R,size=(minDim))); ####symmetrize matrix### if _isSym==True: if _N==_M: temp=_J*(temp.T+temp-2*diag(diagonal(temp)))+_R*diag(diagonal(temp)); else: print("buildMat : N!=M, cannot 'symmetrize' the matrix"); ####unitarize matrix### if _isUni==True: if _isSym==True: print("buildMat : WARNING : isUni=True, the matrix will not be symetric..."); temp,s,VT=svd(temp); ####render dense or sparse matrix#### if _type=="all": return temp; elif _type=="sparse": return scipy.sparse.lil_matrix(temp);
def testScipyExponential(): data0 = expon.rvs(scale=10, size=1000) ################### data = data0 plt.figure() x = np.linspace(0, 100, 100) plt.hist(data, bins=x, normed=True) plt.plot(x, expon.pdf(x, loc=0, scale=10), color='g') #loc, scale = expon.fit(data, floc=0) #plt.plot(x, expon.pdf(x, loc=loc, scale=scale), color='r') removedHeadLength = 1.0 dataNoHead = [v for v in data if v > removedHeadLength] loc1, scale1 = expon.fit(dataNoHead) plt.plot(x, expon.pdf(x, loc=0, scale=scale1), color='b') loc, scale = expon.fit(dataNoHead, floc=removedHeadLength) plt.plot(x, expon.pdf(x, loc=0, scale=scale), color='r') # non-normed graphs # plt.figure() # plt.hist(data0, bins=x, normed=False) # plt.plot(x, expon.pdf(x, loc=0, scale=10)*len(data0), color='r') plt.figure() plt.hist(dataNoHead, bins=x, normed=False) # s = len(dataNoHead) / sInvNormalisation = intergral(pdf, removedHeadLength, infty) int_0_removedHeadLength_expon = np.exp(- float(removedHeadLength) / scale) s = len(dataNoHead) / int_0_removedHeadLength_expon s0 = len(data0) / 1.0 print >> sys.stderr, s0, len(dataNoHead), s plt.plot(x, expon.pdf(x, loc=0, scale=scale)*s, color='r') ############################################################## # deprecated ############################################################## # non-linear fit #A, K, C = fit_exp_nonlinear(t, noisy) # linear fit with the constant set to 0 # C = 0 # A, K = fit_exp_linear(t, noisy, C) # ysModel = model_func(t, A, K, C) #plt.tight_layout() #plt.xlim(0, 100) #plt.title("OSB length distribution in Human-Mouse comparison \n Confidence Interval : %s*sigma around mean" % arguments["ICfactorOfSigma"] ) #plt.title("") # #plt.legend() #plt.savefig(sys.stdout, format='svg')
def sampling_distribution(): fig, ax = plt.subplots(1, 1) x = np.linspace(-5,5,100) ax.plot(x,norm.pdf(x)) # 中心极限定理,样本均值服从正态分布(当样本足够大的时候) y = [] n = 100 for i in range(1000): r = expon.rvs(scale = 2 , size = 100) rsum = np.sum(r) z = (rsum - 100*2)/np.sqrt(4*100) y.append(z) ax.hist(y,normed = True,alpha=0.2) plt.show()
def poisson_rate(exposuretime, geomarea): '''Generate Poisson distributed times. Parameters ---------- exposuretime : float Exposure time in sec. geomarea : `astropy.unit.Quantity` Geometric opening area of telescope Returns ------- times : `numpy.ndarray` Poisson distributed times. ''' fullrate = rate * geomarea.to(u.cm**2).value # Make 10 % more numbers then we expect to need, because it's random times = expon.rvs(scale=1./fullrate, size=int(exposuretime * fullrate * 1.1)) # If we don't have enough numbers right now, add some more. while times.sum() < exposuretime: times = np.hstack([times, expon.rvs(scale=1/fullrate, size=int(exposuretime - times.sum() * fullrate * 1.1))]) times = np.cumsum(times) return times[times < exposuretime]
def sampling_distribution(): fig, ax = plt.subplots(1, 1) #display the probability density function x = np.linspace(-4, 4, 100) ax.plot(x, norm.pdf(x)) #simulate the sampling distribution y = [] n=100 for i in range(1000): r = expon.rvs(scale=1, size=n) rsum=np.sum(r) z=(rsum-n)/np.sqrt(n) y.append(z) ax.hist(y, normed=True, alpha=0.2) plt.show()
def printReducedChi(self): def funcExpon(x, a, b, c, d): retval = a*np.exp(-b*(x-d)) + c retval[x < d] = 0 return retval tau = 25.0 t0 = 40 nBins = 800 size = 10000 taulist = [] for i in range(100): timeHgValues = np.zeros(nBins, dtype=np.int64) timeStamps = expon.rvs(loc=0, scale=tau, size=size) + t0 ts64 = timeStamps.astype(np.uint64) tsBinner.tsBinner(ts64, timeHgValues) xPoints = [] yPoints = [] for x in range(nBins): y = timeHgValues[x] if y > 2: xPoints.append(x) yPoints.append(y) bGuess = 1/(np.mean(timeStamps)) aGuess = bGuess*(size) cGuess = 0 dGuess = t0 pGuess = [aGuess, bGuess, cGuess, dGuess] xArray = np.array(xPoints) #must be in array for not list to use curve_fit yArray = np.array(yPoints) ySigma = (yArray ** 1/2) popt, pcov = curve_fit(funcExpon, xArray, yArray, p0=pGuess, sigma=ySigma) xPlot = np.linspace(xArray[0], xArray[-1], 1000) yPlot = funcExpon(xPlot, *popt) yPlotAtPoints = funcExpon(xArray, *popt) chi_2 = sum(((yArray-yPlotAtPoints)**2)/ySigma) red_chi_2 = chi_2/(len(yArray)-len(popt)) print red_chi_2
def central_limit_theorem(): y = [] n=100 for i in range(1000): r = expon.rvs(scale=1, size=n) rsum=np.sum(r) z=(rsum-n)/np.sqrt(n) y.append(z) plt.subplot(211) print y plt.hist(y,color='grey') y2 = [] for i in range(10000): r = poisson.rvs(3 , size = n) rsum = np.sum(r) z = (rsum - n)/np.sqrt(n) y2.append(z) plt.subplot(212) plt.hist(y2,color='r') plt.show()
def gillespie(rate_funcs,stop_time,init_state,updates): n_reacts = len(rate_funcs) t = 0 s = tuple(init_state) path = [(t,s)] while True: jump_rates = [f(s) for f in rate_funcs] exit_rate = sum(jump_rates) if exit_rate == 0: break probs = [r/exit_rate for r in jump_rates] index = np.random.choice(n_reacts,p=probs) t = t + expon.rvs(scale=1/jump_rates[index]) if t >= stop_time: break s = update_state(s,updates[index]) #s = tuple(map(add,s,updates[index])) #extra tuple() for Python 3.x #s = tuple(x+y for (x,y) in zip(s,updates[index])) path = path + [(t,s)] path = path + [(stop_time,s)] return path
def _plot_correct_weights(): # draw data from an exponential distribution from scipy.stats import expon import pylab as P scale = 1.0 data = expon.rvs(scale=scale, size=8000) bandwidth = 20/np.sqrt(data.shape[0]) range = np.array([0, 7]) n = 500 y = np.linspace(range[0], range[1], num=n) eps = 1e-5 ## Use corrected samples q = _correct_weights(data, bandwidth, range, filter=False) target_densities1 = figtree(data, y, q, bandwidth, epsilon=eps, eval="auto", verbose=True) # now try again with uncorrected densities q = np.ones(data.shape) target_densities2 = figtree(data, y, q, bandwidth, epsilon=eps, eval="auto", verbose=True) print("Smallest sample at %g" % min(data)) # plot the exponential density with max. likelihood estimate of the scale P.plot(y, expon.pdf(y, scale=np.mean(data))) P.plot(y, target_densities1 , 'ro') P.title("Gaussian Kernel Density Estimation") # P.show() P.savefig("KDE_50000_h-0.05.pdf")
from scipy.stats import lognorm, expon # Trying out different broad distributions with linear and logarithmic PDFs: n_points = 100000 # power law: # slope = -2! one_over_rands = 1/np.random.rand(n_points) # http://en.wikipedia.org/wiki/Power_law # exponential distribution exps = expon.rvs(size=1000) # http://en.wikipedia.org/wiki/Exponential_distribution # lognormal (looks like a normal distribution in a log-log scale!) lognorms = lognorm.rvs(1.0, size=1000) # http://en.wikipedia.org/wiki/Log-normal_distribution fig = plt.figure(figsize=(15,15)) fig.suptitle('Different broad distribution PDFs in lin-lin, log-log, and lin-log axes') n_bins = 30 for i, (rands, name) in enumerate(zip([one_over_rands, exps, lognorms], ["power law", "exponential", "lognormal"])): # linear-linear scale ax = fig.add_subplot(4, 3, i+1) ax.hist(rands, n_bins, normed=True)