def find_best_annualized_usage_params(target_annualized_usage, model,
        start_params, params_to_change, weather_normal_source, n_guesses=100):

    best_params = start_params
    meter = AnnualizedUsageMeter(model=model, temperature_unit_str=TEMPERATURE_UNIT_STR)

    best_result = meter.evaluate_raw(model_params=best_params, weather_normal_source=weather_normal_source)
    best_ann_usage = best_result["annualized_usage"][0]

    for n in range(n_guesses):

        resolution = abs((target_annualized_usage - best_ann_usage) / target_annualized_usage)

        param_dict = best_params.to_dict()
        for param_name,scale_factor in params_to_change:
            current_value = param_dict[param_name]
            current_value = norm.rvs(param_dict[param_name], resolution * scale_factor)
            while current_value < 0:
                current_value = norm.rvs(param_dict[param_name], resolution * scale_factor)
            param_dict[param_name] = current_value

        model_params = model.param_type(param_dict)

        result = meter.evaluate_raw(model_params=model_params, weather_normal_source=weather_normal_source)
        ann_usage = result["annualized_usage"][0]

        if abs(target_annualized_usage - ann_usage) < abs(target_annualized_usage - best_ann_usage):

            diff = abs(target_annualized_usage - best_ann_usage)
            best_params = model_params
            best_ann_usage = ann_usage

    return best_params, best_ann_usage
Beispiel #2
0
def simulate(d, c, N, S, decide=decide_yn):
    """Simulate data under the modified YN model.

    Args:
        d: Sensitivity.
        c: Bias.
        N: Number of noise trials.
        S: Number of signal trials.
        decide: Decision-rule function.

    Returns:
        [(f1, h1, m1, r1) ... ]

    """
    out = []
    for _d, _c, _N, _S in zip(d, c, N, S):
        k = _d/2. + _c
        psi_0 = norm.rvs(0, 1, _N)
        rsp_0 = np.array([x for x in decide(psi_0, k)])
        r, f = [sum(rsp_0 == i) for i in xrange(2)]
        psi_1 = norm.rvs(_d, 1, _S)
        rsp_1 = np.array([x for x in decide(psi_1, k)])
        m, h = [sum(rsp_1 == i) for i in xrange(2)]
        out.append((f, h, m, r))
    return out
Beispiel #3
0
def get_rate(amount, month, period):
    rate  = 0.0
    while rate < rate_min or rate > rate_max:
        rv = random.uniform(0, 1)
        if rv < 0.8:
            rate = int(norm.rvs(loc=65, scale=8)) / 1000.0
        elif rv < 0.9:
            rate = int(norm.rvs(loc=88, scale=2)) / 1000.0
        else:
            rate = int(norm.rvs(loc=35, scale=50)) / 1000.0
        # Adjusting rate using needs (rate of low-amount is forced be large)  
        if amount < amount_wm:
            rate = round(rate * amount_wm / amount, 3)

    # set low rate when period is small  
    rate = rate * (0.5 * period / period_max + 0.5 * amount_wm / amount)

    # Campain  
    if month % 100 >= 10:
        rate = rate + 0.035

    # Adjusting
    rate = round(rate, 3)
    if rate < rate_min:
        rate = rate_min
    elif rate > rate_max:
        rate = rate_max

    return rate
def sim_regular_yn(d, c, N, S):
    """Simulate data under the modified YN model.

    Parameters
    ----------
    d : float
        Measure of sensitivity.
    c : float
        Measure of bias.
    N : int
        Number of trials with stimuli from the first
        class.
    S: int, optional
        Number of trials with stimuli from the second
        class; defaults to n1.

    Returns
    -------
    f : int
        Count of observed false alarms.
    h : int
        Count of observed hits.
    m : int
        Count of observed misses.
    r : int
        Count of observed correct rejections.
    """
    k = d/2. + c
    psi_0 = norm.rvs(0, 1, N)
    rsp_0 = np.array([x for x in decide(psi_0, k)])
    r, f = [sum(rsp_0 == i) for i in xrange(2)]
    psi_1 = norm.rvs(d, 1, S)
    rsp_1 = np.array([x for x in decide(psi_1, k)])
    m, h = [sum(rsp_1 == i) for i in xrange(2)]
    return f, h, m, r
Beispiel #5
0
def simulate_stupidDPM(iter_num, M):
	# Generate mixture sample
	N = 1000
	mu = [0.0, 10.0, 3.0]

	components = np.random.choice(range(3), size = N, replace = True, p = [0.3, 0.5, 0.2])
	samples = [norm.rvs(size = 1, loc = mu[components[i]], scale = 1)[0] for i in range(N)]

	## Sample G from DP(M, G0)
	v = beta.rvs(a = 1.0, b = M, size = N)
	prob_vector = np.append(np.array(v[0]), v[1:] * np.cumprod(1.0 - v[:-1]))
	thetas = norm.rvs(size = N, loc = 1.0, scale = 1.0)

	### Initialize thetas
	thetas = np.random.choice(thetas, size = N, replace = True, p = prob_vector)

	### Start MCMC chain
	for i in xrange(iter_num):
		for j in xrange(N):
			theta_temp = np.append(thetas[:j], thetas[j+1:])
			p = np.append(norm.pdf(samples[j], loc = theta_temp, scale = 1.0), M * norm.pdf(samples[j], loc = 1.0, scale = np.sqrt(2.0)))
			p = p / sum(p)
			temp = np.random.choice(np.append(theta_temp, N), size = 1, replace = True, p = p)
			if (temp == N):
				thetas[j] = norm.rvs(size = 1, loc = 0.5 * (samples[j] + 1), scale = np.sqrt(0.5))
			else:
				thetas[j] = temp
		print(thetas)
	return {"thetas": thetas, "y": samples}
def _test():
    '''
    '''
    dim1_mean = 0
    dim1_std = 3

    dim2_mean = 20
    dim2_std = 3
    # 20 normal RVs with mean=0, std=
    dim1 = list(norm.rvs(dim1_mean, dim1_std, size=20))
    # Add a couple of obvious outliers
    dim1.append(-10)
    dim1.append(10)

    dim2 = list(norm.rvs(dim2_mean, dim2_std, size=20))
    dim2.append(10)
    dim2.append(30)

    data = zip(dim1, dim2)

    confs = density_based(data)

    print 'Dim1 params:', dim1_mean, dim1_std
    print 'Dim2 params:', dim2_mean, dim2_std
    for d, conf in zip(data, confs):
        print d, conf
def test_simulated_correlations():

  # Get standard brain mask
  mr_directory = get_data_directory()
  standard = "%s/MNI152_T1_2mm_brain_mask.nii.gz" %(mr_directory)
  thresholds = [0.0,0.5,1.0,1.5,1.96,2.0]

  # Generate random data inside brain mask, run 10 iterations
  standard = nibabel.load(standard)
  number_values = len(numpy.where(standard.get_data()!=0)[0])
  numpy.random.seed(9191986)
  for x in range(0,10):  
    data1 = norm.rvs(size=number_values)
    data2 = norm.rvs(size=number_values)
    corr = pearsonr(data1,data2)[0]
      
    # Put into faux nifti images
    mr1 = numpy.zeros(standard.shape)
    mr1[standard.get_data()!=0] = data1
    mr1 = nibabel.nifti1.Nifti1Image(mr1,affine=standard.get_affine(),header=standard.get_header())
    mr2 = numpy.zeros(standard.shape)
    mr2[standard.get_data()!=0] = data2
    mr2 = nibabel.nifti1.Nifti1Image(mr2,affine=standard.get_affine(),header=standard.get_header())  
    pdmask = make_binary_deletion_mask([mr1,mr2])
    pdmask = nibabel.Nifti1Image(pdmask,header=mr1.get_header(),affine=mr1.get_affine())
    score = calculate_correlation(images = [mr1,mr2],mask=pdmask)  
    assert_almost_equal(corr,score,decimal=5)
Beispiel #8
0
def reamostrar(particulas, n_particulas = num_particulas):
    """
        Reamostra as partículas devolvendo novas particulas sorteadas
        de acordo com a probabilidade e deslocadas de acordo com uma variação normal    
        
        O notebook como_sortear tem dicas que podem ser úteis
        
        Depois de reamostradas todas as partículas precisam novamente ser deixadas com probabilidade igual
        
        Use 1/n ou 1, não importa desde que seja a mesma
    """
    probs = [p.w for p in particulas]

    print("Probabilidades: ")
    print(probs)
    print("Soma probs")
    print(sum(probs))

    pfinal = draw_random_sample(particulas, probs, n_particulas)
    
    
    for p in pfinal:
        p.x+=norm.rvs(scale=std_resample_x)
        p.y+=norm.rvs(scale=std_resample_y)
        p.theta+=norm.rvs(scale=std_resample_theta)
        p.w = 1.0

    return pfinal
def MakeSamples(parameters_A, parameters_B, percentage_A, TotalSize=200000):
    sizeA = int(percentage_A * TotalSize)
    sizeB = TotalSize - sizeA

    setA = []
    for mu0, sigma0 in parameters_A:
        setA.append(norm.rvs(loc=mu0, scale=sigma0, size=sizeA))
    setA = np.array(setA)
    ones = np.ones([1, setA.shape[1]])
    setA = np.transpose(np.append(setA, ones, axis=0))

    setB = []
    for mu0, sigma0 in parameters_B:
        setB.append(norm.rvs(loc=mu0, scale=sigma0, size=sizeB))
    setB = np.array(setB)
    zeros = np.zeros([1, setB.shape[1]])
    setB = np.transpose(np.append(setB, zeros, axis=0))

    npout = np.vstack([setA, setB])

    npout = np.concatenate((npout,
                            percentage_A * np.ones([npout.shape[0], 1])
                            ),
                           axis=1)

    return npout
Beispiel #10
0
    def setup(self):

        #########
        # PART 1: Make model calcium data
        #########

        # Data parameters
        RATE = 1  # mean firing rate of poisson spike train (Hz)
        STEPS = 100  # number of time steps in data
        STEPS_LONG = 5000  # number of time steps in data
        TAU = 0.6  # time constant of calcium indicator (seconds)
        DELTAT = 1 / 30  # time step duration (seconds)
        self.sigma = 0.1  # standard deviation of gaussian noise
        SEED = 2222  # random number generator seed

        # Make a poisson spike trains
        self.spikes = sima.spikes.get_poisson_spikes(deltat=DELTAT, rate=RATE, steps=STEPS, seed=SEED)

        # longer time-series for parameter estimation
        self.spikes_long = sima.spikes.get_poisson_spikes(deltat=DELTAT, rate=RATE, steps=STEPS_LONG, seed=SEED)

        # Convolve with kernel to make calcium signal
        np.random.seed(SEED)
        self.gamma = 1 - (DELTAT / TAU)
        CALCIUM = signal.lfilter([1], [1, -self.gamma], self.spikes)
        CALCIUM_LONG = signal.lfilter([1], [1, -self.gamma], self.spikes_long)

        # Make fluorescence traces with random gaussian noise and baseline
        self.fluors = CALCIUM + norm.rvs(scale=self.sigma, size=STEPS) + uniform.rvs()
        self.fluors_long = CALCIUM_LONG + norm.rvs(scale=self.sigma, size=STEPS_LONG) + uniform.rvs()
Beispiel #11
0
    def epsilon(self, asset):
        """Sample from the standard normal distribution for the given asset.

        For uncorrelated risk calculation jobs we sample the standard normal
        distribution for each asset.
        In the opposite case ("perfectly correlated" assets) we sample for each
        building typology i.e. two assets with the same typology will "share"
        the same standard normal distribution sample.

        Two assets are considered to be of the same building typology if their
        taxonomy is the same. The asset's `taxonomy` is only needed for
        correlated jobs and unlikely to be available for uncorrelated ones.
        """
        correlation = getattr(self, "ASSET_CORRELATION", None)
        if not correlation:
            # Sample per asset
            return norm.rvs(loc=0, scale=1)
        elif correlation != "perfect":
            raise ValueError('Invalid "ASSET_CORRELATION": %s' % correlation)
        else:
            # Sample per building typology
            samples = getattr(self, "samples", None)
            if samples is None:
                # These are two references for the same dictionary.
                samples = self.samples = dict()

            taxonomy = asset.get("taxonomy")
            if taxonomy is None:
                raise ValueError("Asset %s has no taxonomy" % asset["assetID"])

            if taxonomy not in samples:
                samples[taxonomy] = norm.rvs(loc=0, scale=1)
            return samples[taxonomy]
 def __init__(self, d, w = None, rate = 1.0):
     self.d = d
     if w == None:
         normal.rvs(loc = 0, scale = 0.1, size = d)
     else:
         self.w = w
     self.rate = rate
Beispiel #13
0
def get_rate(amount, month, period):
    rate  = 0.0
    while rate < rate_min or rate > rate_max:
        rv = random.uniform(0, 1)
        if rv < 0.7:
            rate = int(norm.rvs(loc=65, scale=8)) / 1000.0
        elif rv < 0.9:
            rate = int(norm.rvs(loc=88, scale=2)) / 1000.0
        else:
            rate = int(norm.rvs(loc=35, scale=50)) / 1000.0

    # set low rate when period is small
    rate = rate + rate * (amount_wm - amount) * period / amount_wm / period_max

    # Campain  
    if month % 100 >= 10:
        rate = rate + 0.015

    # Adjusting
    rate = round(rate, 3)
    if rate < rate_min:
        rate = rate_min
    elif rate > rate_max:
        rate = rate_max

    return rate
def MakeMultiGSamples(parameters_A,
                      parameters_B,
                      percentage_A,
                      percentage_A_Expected,
                      TotalSize=200000):

    sizeA = int(percentage_A * TotalSize)
    sizeB = TotalSize - sizeA
    print sizeA, sizeB

    setA = []
    for feature in parameters_A:
        # print feature
        tmp_feat = np.array([])
        for (mu0, sigma0), percent in feature:
            # print mu0, sigma0, percent
            tmp_feat = np.append(tmp_feat,
                                 norm.rvs(loc=mu0,
                                          scale=sigma0,
                                          size=int(sizeA * percent)
                                          )
                                 )
            # print "tmp_feat:",tmp_feat
            np.random.shuffle(tmp_feat)
        setA.append(tmp_feat)
        # print setA

    setA = np.array(setA)
    ones = np.ones([1, setA.shape[1]])
    setA = np.transpose(np.append(setA, ones, axis=0))
    # print setA

    setB = []
    for feature in parameters_B:
        tmp_feat = np.array([])
        for (mu0, sigma0), percent in feature:
            # print mu0, sigma0, percent
            tmp_feat = np.append(tmp_feat,
                                 norm.rvs(loc=mu0,
                                          scale=sigma0,
                                          size=int(sizeB * percent)
                                          )
                                 )
            np.random.shuffle(tmp_feat)
        setB.append(tmp_feat)
    setB = np.array(setB)
    zeros = np.zeros([1, setB.shape[1]])
    setB = np.transpose(np.append(setB, zeros, axis=0))

    print "Set1 shape:", setA.shape
    print "Set2 shape:", setB.shape

    npout = np.vstack([setA, setB])
    npout = np.concatenate((npout,
                            percentage_A_Expected * np.ones([npout.shape[0], 1])
                            ),
                           axis=1)

    return npout
def simulate_normal_model(means, serrs, count, taus=None, do_thetas=False):
    # Check if any deltas, and differ to it
    for ii in range(len(means)):
        if serrs[ii] == 0:
            if do_thetas:
                results = np.zeros((count, 2 + len(means)))
                results[:, 0] = 0
                results[:, 1:] = means[ii]
            else:
                results = np.zeros((count, 2))
                results[:, 0] = 0
                results[:, 1] = means[ii]

            return results

    means = np.array(means, dtype=np.float_)
    varis = np.square(np.array(serrs, dtype=np.float_))

    if taus is None:
        taus = np.linspace(0, 2 * max(serrs), 100)

    p_tau = np.array([p_tau_given_y(tau, means, varis) for tau in taus])
    F_tau = np.cumsum(p_tau)
    F_tau = F_tau / F_tau[-1]

    if do_thetas:
        results = np.zeros((count, 2 + len(means)))
    else:
        results = np.zeros((count, 2))

    rands = get_random(taus, F_tau, count)

    for ii in range(count):
        tau = rands[ii]
        (vari_tau_sqrs, v_mu, mu_hat) = helper_params(means, varis, tau)
        if np.isnan(mu_hat):
            mu = np.nan
        else:
            mu = norm.rvs(size=1, loc=mu_hat, scale=math.sqrt(v_mu))

        results[ii, 0] = tau
        results[ii, 1] = mu

        if do_thetas:
            if tau == 0:
                vs = np.zeros((1, varis.size))
                theta_hats = mu * ones((1, varis.size))
            else:
                tau_sqr = tau * tau
                denoms = 1.0 / varis + 1.0 / tau_sqr
                vs = 1.0 / denoms
                theta_hats = (means / varis + mu / tau_sqr) / denoms

                thetas = norm.rvs(loc=theta_hats, scale=vs)

            results[ii, 2:] = thetas

    return results
Beispiel #16
0
def test():
    from scipy.stats import norm
    rvs = np.append(norm.rvs(loc=2,scale=1,size=(200,1)),
                    norm.rvs(loc=1,scale=3,size=(200,1)),
                    axis=1).T

    scatter_kde(rvs[0,:], rvs[1,:])

    pl.show()
Beispiel #17
0
def normalDisPrior(fileName, avgBurstTime, procNumber, priorNum):
    normalDisList = norm.rvs(avgBurstTime, avgBurstTime/6 , procNumber)
    priorList = norm.rvs(priorNum, 9/6, procNumber)
    f = open(fileName, 'a')   
    for i,j in zip(normalDisList, priorList):
        n = int(i)
        r = random.randint(0,69) 
        p = int(j)
        f.write(str(n) +' '+ str(r) + ' '+ str(p) + '\n')
Beispiel #18
0
def updateSpare(spare_list, spare_amount):
    '''
    Updates the spare list of gaussian distributed random variables when they are used.
    '''
    del spare_list
    mov_spare_x = norm.rvs(size = spare_amount).tolist()
    mov_spare_y = norm.rvs(size = spare_amount).tolist()
    spare_list = [mov_spare_x,mov_spare_y]
    return spare_list
def get_normal_example(sample_count):
    loc = 1.0
    scale = 2.0
    samples0 = norm.rvs(loc, scale, sample_count)
    samples1 = norm.rvs(loc, scale, sample_count)
    scores0 = norm.logpdf(samples0, loc, scale)
    scores1 = norm.logpdf(samples1, loc, scale)
    samples = numpy.array(zip(samples0, samples1))
    scores = scores0 + scores1
    return {'name': 'normal', 'samples': samples, 'scores': scores}
Beispiel #20
0
 def sim_data(self, K, N):
     """
     Draws K stationary time series of length N from the Vasicek model and
     returns them as a K x N array.
     """
     X = np.zeros((K, N))
     X[:,0] = norm.rvs(size=K, loc=self.stat_mean, scale=self.stat_sd)
     for t in range(1, N):
         X[:,t] = self.beta + self.alpha * X[:,t-1] + self.s * norm.rvs(size=K)
     return X
Beispiel #21
0
def get_amount(amount_min):
    amount = 0
    while amount < amount_min:
        rv = random.uniform(0, 1)
        if rv < 0.65:
            amount = int(norm.rvs(loc=100, scale=35)) * 1000
        elif rv < 0.95:
            amount = int(norm.rvs(loc=285, scale=50)) * 1000
        else:
            amount = int(norm.rvs(loc=450, scale=7)) * 1000
    return amount
Beispiel #22
0
 def ts(self, n): 
     Z = norm.rvs(size=n)
     W = norm.rvs(size=n)
     X = np.empty(n)
     s = np.empty(n)  # Holds log of s
     s[0] = self.s0
     X[0] = self.beta / (1 - self.alpha)
     for t in range(1, n): 
         s[t] = self.b * (s[t-1]**(1 - self.rho)) * np.exp(self.gamma * W[t])
         X[t] = self.beta + self.alpha * X[t-1] + s[t-1] * Z[t]
     return X
Beispiel #23
0
def get_amount(amount_min):
    amount = 0
    while amount < amount_min or amount > amount_max:
        rv = random.uniform(0, 1)
        if rv < 0.3:
            amount = amount_max - int(norm.rvs(loc=120, scale=30)) * 1000
        elif rv < 0.55:
            amount = amount_max - int(chi2.rvs(5, loc=380, scale=145)) * 1000
        else:
            amount = amount_max - int(norm.rvs(loc=660, scale=145)) * 1000
    return amount
Beispiel #24
0
def generate(n, mu, sigma, gap, c_min, c_max, distribution="lognormal"):
	r=100 #readlength
	if distribution == 'normal':
		samples = norm.rvs(loc=mu, scale=sigma, size=2*n)

	elif distribution == 'lognormal':
		logsample = norm.rvs(loc=mu, scale=sigma, size=max(1,int(gap)/100)*n)
		samples = np.exp(logsample)
	else:
		print("Specify normal, lognormal or do not set this argument.")
		return None

	min_sample = min(samples)
	max_sample = float(max(samples))


	mean_samples =  sum(samples)/len(samples)
	# print 'Mean all observations:', mean_samples
	std_dev_samples = (sum(list(map((lambda x: x ** 2 - 2 * x * mean_samples + mean_samples ** 2), samples))) / (len(samples) - 1)) ** 0.5
	# print 'STDDEV all samples:', std_dev_samples

	#observations_over_gap = [ int(round(max(s-gap,0),0)) for s in samples]
	#observations_over_gap = filter(lambda x: x>0, observations_over_gap)
	#print sum(observations_over_gap)/len(observations_over_gap)

	samples_kept = []
	for s in samples:
		if s > c_min + c_max + gap or s < gap or s < -gap or c_min <= -gap:
			continue
		p = random.uniform(0,1)
		# print 'lol',max_sample, gap, (s-gap-2*r) / max(0,(max_sample-gap-2*r))
		if p < (s-gap-2*r) / max(0,(max_sample-gap-2*r)):
			samples_kept.append(s)

	# print len(samples_kept)
	if len(samples_kept) <= 1:
		return []
	# print "gap:", gap

	mean_samples =  sum(samples_kept)/len(samples_kept)
	# print 'Mean conditional fragment size:', mean_samples
	std_dev_samples = (sum(list(map((lambda x: x ** 2 - 2 * x * mean_samples + mean_samples ** 2), samples_kept))) / (len(samples_kept) - 1)) ** 0.5
	# print 'STDDEV conditional fragment size:', std_dev_samples


	observations_over_gap = [ int(round(max(s-gap,0),0)) if gap > 0 else int(round(max(s - gap,0),0)) for s in samples_kept]
	observations_kept = filter(lambda x: x>0, observations_over_gap)
	mean_obs =  sum(observations_kept)/len(observations_kept)
	# print 'Mean conditional observed size:', mean_obs
	std_dev_obs = (sum(list(map((lambda x: x ** 2 - 2 * x * mean_obs + mean_obs ** 2), observations_kept))) / (len(observations_kept) - 1)) ** 0.5
	# print 'STDDEV conditional observed size:', std_dev_obs
	# print
	# print
	return observations_kept
Beispiel #25
0
def chi2_distribution():
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    df = 10
    x=np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100)
    ax.plot(x, chi2.pdf(x,df))
    
    #simulate the chi2 distribution
    y = []
    n=10
    for i in range(1000):
        chi2r=0.0
        r = norm.rvs(size=n)
        for j in range(n):
            chi2r=chi2r+r[j]**2
        y.append(chi2r)

    ax.hist(y, normed=True, alpha=0.2) 
    plt.show()
    
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    df = 10
    x=np.linspace(-4, 4, 100)
    ax.plot(x, t.pdf(x,df))
    
    #simulate the t-distribution
    y = []
    for i in range(1000):
        rx = norm.rvs()
        ry = chi2.rvs(df)
        rt = rx/np.sqrt(ry/df)
        y.append(rt)

    ax.hist(y, normed=True, alpha=0.2)
    plt.show()
    
    fig, ax = plt.subplots(1, 1)
    #display the probability density function
    dfn, dfm = 10, 5
    x = np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100)
    ax.plot(x, f.pdf(x, dfn, dfm))
    
    #simulate the F-distribution
    y = []
    for i in range(1000):
        rx = chi2.rvs(dfn)
        ry = chi2.rvs(dfm)
        rf = np.sqrt(rx/dfn)/np.sqrt(ry/dfm)
        y.append(rf)

    ax.hist(y, normed=True, alpha=0.2)
    plt.show()
Beispiel #26
0
def get_period(min, max):
    # period = random.randint(min, max)
    period = 0
    while period < min or period > max:
        rv = random.uniform(0, 1)
        if rv < 0.45:
            period = int(norm.rvs(loc=12, scale=2))
        elif rv < 0.8:
            period = int(norm.rvs(loc=6, scale=1))
        else:
            period = int(norm.rvs(loc=14, scale=4))
    return period
Beispiel #27
0
    def update(self):
        system_dict = {s.get_data()['name']: s for s in self.systems}
        self.data['core_power'] *= (2 ** (1 / system_dict['reac'].double_time()))

        power = self.data['core_power']
        src1_accuracy = self.data['src1_accuracy']
        src2_accuracy = self.data['src2_accuracy']
        irc1_accuracy = self.data['irc1_accuracy']
        irc2_accuracy = self.data['irc2_accuracy']
        self.data['irc1'] = power / self.data['irc1a2fp'] * (1 + norm_module.rvs(0, src1_accuracy, 1)[0])
        self.data['irc2'] = power / self.data['irc2a2fp'] * (1 + norm_module.rvs(0, src2_accuracy, 1)[0])
        self.data['src1'] = power / self.data['src1cps2fp'] * (1 + norm_module.rvs(0, irc1_accuracy, 1)[0])
        self.data['src2'] = power / self.data['src2cps2fp'] * (1 + norm_module.rvs(0, irc2_accuracy, 1)[0])
def reduce_and_save(filename, add_noise=False, rms_noise=0.001,
                    output_path="", cube_output=None,
                    nsig=3, slicewise_noise=True):
    '''
    Load the cube in and derive the property arrays.
    '''

    if add_noise:
        if rms_noise is None:
            raise TypeError("Must specify value of rms noise.")

        cube, hdr = getdata(filename, header=True)

        # Optionally scale noise by 1/10th of the 98th percentile in the cube
        if rms_noise == 'scaled':
            rms_noise = 0.1*np.percentile(cube[np.isfinite(cube)], 98)

        from scipy.stats import norm
        if not slicewise_noise:
            cube += norm.rvs(0.0, rms_noise, cube.shape)
        else:
            spec_shape = cube.shape[0]
            slice_shape = cube.shape[1:]
            for i in range(spec_shape):
                cube[i, :, :] += norm.rvs(0.0, rms_noise, slice_shape)

        sc = SpectralCube(data=cube, wcs=WCS(hdr))

        mask = LazyMask(np.isfinite, sc)
        sc = sc.with_mask(mask)

    else:
        sc = filename

    reduc = Mask_and_Moments(sc, scale=rms_noise)
    reduc.make_mask(mask=reduc.cube > nsig * reduc.scale)

    reduc.make_moments()
    reduc.make_moment_errors()

    # Remove .fits from filename
    save_name = filename.split("/")[-1][:-4]

    reduc.to_fits(output_path+save_name)

    # Save the noisy cube too
    if add_noise:
        if cube_output is None:
            reduc.cube.hdu.writeto(output_path+save_name)
        else:
            reduc.cube.hdu.writeto(cube_output+save_name)
def kernelDensity():
    # creating data with two peaks
    sampD1 = norm.rvs(loc=-1.0,scale=1,size=300)
    sampD2 = norm.rvs(loc=2.0,scale=0.5,size=300)
    samp = hstack([sampD1,sampD2])

    # obtaining the pdf (my_pdf is a function!)
    my_pdf = gaussian_kde(samp)

    # plotting the result
    x = linspace(-5,5,100)
    plot(x,my_pdf(x),'r') # distribution function
    hist(samp,normed=1,alpha=.3) # histogram
    show()
def rprior(size, hyperparameters):
    """ returns untransformed parameters """
    mu = norm.rvs(size = size, loc = hyperparameters["mu_mean"], scale = hyperparameters["mu_sd"])
    beta = norm.rvs(size = size, loc = hyperparameters["beta_mean"], scale = hyperparameters["beta_sd"])
    xi = random.exponential(scale = 1 / hyperparameters["xi_rate"], size = size)
    omega2 = random.exponential(scale = 1 / hyperparameters["omega2_rate"], size = size)
    lamb = random.exponential(scale = 1 / hyperparameters["lambda_rate"], size = size)
    parameters = zeros((5, size))
    parameters[0, :] = mu
    parameters[1, :] = beta
    parameters[2, :] = xi
    parameters[3, :] = omega2
    parameters[4, :] = lamb
    return parameters
Beispiel #31
0
def multiprocessing_deconvolution(argument_list):

	negative_control_scores, sgRNA_indices, perturbation_profile, gamma_list, simulations_n, replicates, guideindices2bin, averaging_method, scale, rescaled_sgRNA_indices_w_obs, groups, maximum_distance = argument_list

	# # Iterate through n simulations
	# beta_distributions = {}
	# for n in range(1, simulations_n + 1):

	# 	if n%100 == 0:
	# 		logger.info('Simulation %s out of %s ...' % (str(n), str(simulations_n)))

	replicate_store = {}
	for r in range(replicates):

		if negative_control_scores[0] == 'gaussian':
			if scale > 1:
				rescaled_observations = []
				for scaled_index in rescaled_sgRNA_indices_w_obs:
					rescaled_observations.append(np.mean(norm.rvs(loc = negative_control_scores[1][r][0], scale = negative_control_scores[1][r][1], size = len(guideindices2bin[scaled_index]))))

			else:
				rescaled_observations = norm.rvs(loc = negative_control_scores[1][r][0], scale = negative_control_scores[1][r][1], size = len(rescaled_sgRNA_indices_w_obs))

		elif negative_control_scores[0] == 'laplace':
			if scale > 1:
				rescaled_observations = []
				for scaled_index in rescaled_sgRNA_indices_w_obs:
					rescaled_observations.append(np.mean(laplace.rvs(loc = negative_control_scores[1][r][0], scale = negative_control_scores[1][r][1], size = len(guideindices2bin[scaled_index]))))

			else:
				rescaled_observations = laplace.rvs(loc = negative_control_scores[1][r][0], scale = negative_control_scores[1][r][1], size = len(rescaled_sgRNA_indices_w_obs))

		elif negative_control_scores[0] == 'negative_control_guides':
			if scale > 1:
				rescaled_observations = []
				for scaled_index in rescaled_sgRNA_indices_w_obs:
					rescaled_observations.append(np.mean(np.random.choice(negative_control_scores[1][r], len(guideindices2bin[scaled_index]), replace = True)))

			else:
				rescaled_observations = np.random.choice(negative_control_scores[1][r], len(rescaled_sgRNA_indices_w_obs), replace = True)

		# Set up regularized deconvolution optimization problem
		df = pd.DataFrame({'pos':rescaled_sgRNA_indices_w_obs, 'lfc':rescaled_observations, 'group':groups})

		genomic_coordinates = []
		gammas2betas = {}
		delete_gammas = []

		# Iterate through groups and perform deconvolution
		for group in df.group.unique():

			# Filtered dataframe to separate individual groups
			dff = df[df.group == group]

			# Make sure >1 sgRNA exists per group
			# if len(dff.index) > 1:

			# Assign relevant variables for optimization problem
			y = dff.lfc.tolist()
			# y = np.array(y).reshape(len(y), 1)
			betas = Variable(len(np.arange(dff.pos.tolist()[0], dff.pos.tolist()[-1], scale).tolist()) + maximum_distance)

			x_shift = [int(maximum_distance + (x - dff.pos.tolist()[0])/int(scale)) for x in dff.pos.tolist()]

			gamma = Parameter(sign = "positive")
			# gamma = Parameter(nonneg = True)

			genomic_coordinates += np.arange(int(dff.pos.tolist()[0]), int(dff.pos.tolist()[-1]) + scale, scale).tolist()

			# Formulate optimization problem
			objective = Minimize(0.5*sum_squares(y - conv(perturbation_profile, betas)[x_shift]) + gamma*sum_entries(abs(diff(betas))))
			# objective = Minimize(0.5*sum_squares(y - conv(perturbation_profile, betas)[x_shift]) + gamma*tv(betas))
			p = Problem(objective)

			# Solve for varying lambdas
			for g in gamma_list:

				# Make sure solver converges, otherwise delete gammas that fail
				try:

					if g not in gammas2betas:
						gammas2betas[g] = []

					gamma.value = g
					result = p.solve()
					gammas2betas[g] += np.array(betas.value).reshape(-1).tolist()[int(maximum_distance/2):-int(maximum_distance/2)]

				except:

					delete_gammas.append(g)
					continue

		# Delete gammas that failed to converge
		for g in delete_gammas:
			del gammas2betas[g]

		gammas2betas['indices'] = genomic_coordinates

		# Add to replicate store
		replicate_store[r] = gammas2betas[gamma_list[0]]

	# Create combined deconvolved signals from replicates for simulation
	deconvolved_signal = {}
	for i in replicate_store.keys():

		for j in range(len(replicate_store[i])):

			if j not in deconvolved_signal:
				deconvolved_signal[j] = []

			deconvolved_signal[j].append(replicate_store[i][j])

	# Create mean or median profile
	if averaging_method == 'mean':
		combine_simulations = [np.mean(deconvolved_signal[x]) for x in deconvolved_signal]

	elif averaging_method == 'median':
		combine_simulations = [np.median(deconvolved_signal[x]) for x in deconvolved_signal]

	# for i in range(len(combine_simulations)):
	# 	try:
	# 		beta_distributions[i].append(combine_simulations[i])
	# 	except:
	# 		beta_distributions[i] = [combine_simulations[i]]

	return combine_simulations
 def sample_mixed(self, pis, mus, sigmas, j, size=1):
     choice = np.random.choice(np.arange(0, pis.shape[1]), p=pis[j])
     return norm.rvs(size=size, loc=mus[j][choice], scale=sigmas[j][choice])
Beispiel #33
0
# -*- coding: utf-8 -*-
"""
Created on Sat Oct 19 17:42:04 2019

@author: flori
"""

from pandas import Series, DataFrame
import pandas as pd
import numpy as np
methodeA = Series([
    79.98, 80.04, 80.02, 80.04, 80.03, 80.03, 80.04, 79.97, 80.05, 80.03,
    80.02, 80.00, 80.02
])
print(methodeA.mean())
print(methodeA.std())

##########################

from scipy.stats import norm
np.random.seed(1)
methodeA_sim1 = Series(np.round(norm.rvs(size=6, loc=80, scale=0.02), 2))
methodeA_sim1
methodeA_sim1.mean()
methodeA_sim1.std()
Beispiel #34
0
def normal_rvs(mu, sigma=1, random_state=None):
    return norm.rvs(loc=mu, scale=sigma, random_state=random_state)
    # 1 simulate characteristics Cij,t

    data_low = 0.9
    data_scale = 0.1
    data_size = pc1
    pj = uniform.rvs(loc=data_low, scale=data_scale, size=data_size)
    data_mean = 0
    data_std = 1
    data_size = id_num

    # epsilon_ij_t = norm.rvs(loc=data_mean, scale=data_std, size=data_size)

    c = np.zeros(shape=(id_num * T_num, pc1))

    for j in range(pc1):
        c[0:200, j] = norm.rvs(loc=data_mean, scale=data_std, size=data_size)
        for t in range(1, T_num):
            c[200 * t:200 * (t + 1),
              j] = c[200 * (t - 1):200 * t, j] * pj[j] + norm.rvs(
                  loc=data_mean, scale=data_std,
                  size=data_size) * np.sqrt(1 - pj[j]**2)
    c_rank = np.zeros(shape=(id_num * T_num, pc1))

    # rank over cross-section

    for j in range(pc1):
        temp_series = pd.Series(c[:, j])
        temp_series = temp_series.rank()
        temp_series = 2 * temp_series / (len(temp_series) + 1) - 1
        c_rank[:, j] = temp_series.copy()
Beispiel #36
0
 def resample(self, aj, ai, params):
     if isinstance(params, list):
         mu, kappa, theta, sigma, nu, eta, lda, omega = self._unwrap_params(params)
     else:
         mu, kappa, theta, sigma, nu, eta, lda, omega = self._unwrap_param_states(params)
     neg_idxs = np.where(aj<0)[0]
     for i in neg_idxs:
         while aj[i] < 0:
             aj[i] = ai[i] + kappa[i]*(eta[i]-ai[i])*self.dt + lda[i]*np.sqrt(ai[i]*self.dt)*norm.rvs()
     return aj
Beispiel #37
0
 def observation_predict(self, x_pred, particles, y_prev, mu):
     y_hat = y_prev + (mu-1/2*x_pred)*self.dt + np.sqrt(particles*self.dt)*norm.rvs()
     py_hat = np.array([np.mean(self.prediction_density(y_hat[k], y_prev, x_pred, mu)) for k in range(len(y_hat))])
     py_hat = py_hat/sum(py_hat)
     return np.sum(py_hat * y_hat)
 def moveBrownian(self):
     self.x += norm.rvs(scale=self.T)
     self.y += norm.rvs(scale=self.T)
Beispiel #39
0
    def filter(self, params, is_bounds=True, simple_resample=False, predict_obs=False):
        """
        Performs sequential monte-carlo sampling particle filtering
        Note: Currently only supports a bound of parameters
        """
        y = self.y
        N = self.N

        if not is_bounds: # params is an array of param values, not particles
            mu, kappa, theta, sigma, rho, v0 = self._unwrap_params(params)
        else:
            # initialize param states, N particles for each param sampled uniformly
            v0 = params[-1] # params is shape [(lb, ub)_1,...,k, v0]
            params_states = self._init_parameter_states(N, params[:-1])

        observations = np.zeros(len(y))
        hidden = np.zeros(len(y))
        observations[0] = y[0]
        hidden[0] = v0

        # particles = np.maximum(1e-3, self.proposal_sample(self.N, v, dy, params))
        weights = np.array([1/self.N] * self.N)

        # initialize v particles
        particles = norm.rvs(v0, 0.02, N)
        particles = np.maximum(1e-4, particles)

        # storing the estimated parameters each step
        params_steps = np.zeros((len(params)-1, len(y)))
        params_steps.transpose()[0] = np.mean(params_states, axis=1)

        for i in range(1, len(y)):
            dy = y[i] - y[i-1]

            # prediction
            # proposal sample
            x_pred = self.proposal_sample(N, particles, dy, params_states)
            x_pred = np.maximum(1e-3, x_pred)

            # weights
            Li = self.likelihood(y[i], x_pred, particles, y[i-1], params_states)
            I = self.proposal(x_pred, particles, dy, params_states)
            T = self.transition(x_pred, particles, params_states)
            weights = weights * (Li*T/I)
            weights = weights/sum(weights)

            # Resampling
            if self._neff(weights) < 0.7*self.N:
                print('resampling since: {}'.format(self._neff(weights)))
                if simple_resample:
                    x_pred, weights, params_states = self._simple_resample(x_pred, weights, params_states)
                else:
                    x_pred, weights, params_states = self._systematic_resample(x_pred, weights, params_states)

            # observation prediction
            if predict_obs:
                y_hat = self.observation_predict(x_pred, particles, y[i-1], np.mean(params_states[0])) # mu is the 0 index
                observations[i] = y_hat
                print("Done with iter: {}".format(i))

            hidden[i] = np.sum(x_pred * weights)
            particles = x_pred
            params_steps.transpose()[i] = np.sum(np.multiply(params_states, weights[np.newaxis, :]), axis=1)

        return (hidden, params_steps, observations) if predict_obs else (hidden, params_steps)
Beispiel #40
0
 def noise(self, relpos):
     ell = norm.rvs(loc=relpos[0],
                    scale=relpos[0] * self.distance_noise_rate)
     phi = norm.rvs(loc=relpos[1], scale=self.direction_noise)
     return np.array([ell, phi]).T
Beispiel #41
0
def dgv(mu, nu, minv=1, maxv=20):
    "Discrete Gaussian variate"
    rv = round(norm.rvs(mu, nu))
    return min(max(rv, minv), maxv)
def test_river_discharge_simulation():
    # Modules activation and deactivation
    # analysis = False
    # cdf_pdf_representation = False
    # temporal_dependency = False
    # climatic_events_fitting = True
    # threshold_checking_for_simulation = False
    # simulation_cycles = True
    analysis = True
    cdf_pdf_representation = False
    temporal_dependency = False
    climatic_events_fitting = True
    threshold_checking_for_simulation = False
    simulation_cycles = True

    #%% Input data
    # Initial year, number of years, number of valid  data in a year
    anocomienzo, duracion, umbralano = (2018, 10, 0.8)
    # Type of fit (0-GUI, 1-stationary, 2-nonstationary)
    ant = [2]
    # Fourier order for nonstationary analysis
    no_ord_cycles = [2]
    no_ord_calms = [2]
    # Number of simulations
    no_sim = 1
    # Type of fit functions
    fun_cycles = [st.exponweib]
    fun_calms = [st.norm]
    # Number of normals
    no_norm_cycles = [False]
    no_norm_calms = [False]
    f_mix_cycles = [False]
    mod_cycles = [[0, 0, 0, 0]]

    # Cycles River discharge
    threshold_cycles = 25
    # minimum_interarrival_time = pd.Timedelta('250 days')
    # minimum_cycle_length = pd.Timedelta('5 days')
    minimum_interarrival_time = pd.Timedelta('7 days')
    minimum_cycle_length = pd.Timedelta('2 days')

    # Cycles SPEI
    threshold_spei = 0
    minimum_interarrival_time_spei = pd.Timedelta('150 days')
    minimum_cycle_length_spei = pd.Timedelta('150 days')

    interpolation = True
    interpolation_method = 'linear'
    interpolation_freq = '1min'
    truncate = True
    extra_info = True

    #%% Read data
    # Import river discharge data when all dams were active
    data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter',
                             'tests', 'output', 'modf')
    modf_file_name = 'guadalete_estuary_river_discharge.modf'
    path_name = os.path.join(data_path, modf_file_name)
    modf_rd = MetOceanDF.read_file(path_name)

    # Group into dataframe
    river_discharge = pd.DataFrame(modf_rd)

    # Delete rows where with no common values
    river_discharge.dropna(how='any', inplace=True)

    # Import complete rive discharge historic data
    # All historic river discharge
    data_path = os.path.join(tests.current_path, '..', '..', '..', '..',
                             'data', 'solar_flux_nao_index_spei')
    modf_file_name = 'caudales.txt'
    path_name = os.path.join(data_path, modf_file_name)
    modf_all = pd.read_table(path_name, header=None, delim_whitespace=True)
    date_col = dates.extract_date(modf_all.iloc[:, 0:4])
    modf_all.index = date_col
    modf_all.drop(modf_all.columns[0:4], axis=1, inplace=True)
    modf_all.columns = ['Q']

    #%% Preprocessing
    t_step = missing_values.find_timestep(river_discharge)  # Find tstep
    data_gaps = missing_values.find_missing_values(river_discharge, t_step)
    river_discharge = missing_values.fill_missing_values(
        river_discharge,
        t_step,
        technique='interpolation',
        method='nearest',
        limit=16 * 24,
        limit_direction='both')
    data_gaps_after = missing_values.find_missing_values(
        river_discharge, t_step)

    # Add noise for VAR
    noise = np.random.rand(river_discharge.shape[0],
                           river_discharge.shape[1]) * 1e-2
    river_discharge = river_discharge + noise

    # Save_to_pickle
    river_discharge.to_pickle('river_discharge.p')

    # Group into list of dataframes
    df = list()
    df.append(pd.DataFrame(river_discharge['Q']))

    #%% Cycles and calms calculation
    cycles, calm_periods, info = extremal.extreme_events(
        river_discharge, 'Q', threshold_cycles, minimum_interarrival_time,
        minimum_cycle_length, interpolation, interpolation_method,
        interpolation_freq, truncate, extra_info)
    # Calculate duration of the cycles
    dur_cycles = extremal.events_duration(cycles)
    dur_cycles_description = dur_cycles.describe()

    sample_cycles = pd.DataFrame(info['data_cycles'].iloc[:, 0])
    noise = np.random.rand(sample_cycles.shape[0],
                           sample_cycles.shape[1]) * 1e-2
    sample_cycles = sample_cycles + noise

    sample_calms = pd.DataFrame(info['data_calm_periods'])
    noise = np.random.rand(sample_calms.shape[0], sample_calms.shape[1]) * 1e-2
    sample_calms = sample_calms + noise

    #%% CLIMATIC INDICES
    # Sunspots
    data_path = os.path.join(tests.current_path, '..', '..', '..', '..',
                             'data', 'solar_flux_nao_index_spei')
    modf_file_name = 'sunspot.csv'
    path_name = os.path.join(data_path, modf_file_name)
    sunspot = pd.read_csv(path_name,
                          header=None,
                          delim_whitespace=True,
                          parse_dates=[[0, 1]],
                          index_col=0)
    sunspot = sunspot.drop([2, 4, 5], axis=1)

    # SPEI
    data_path = os.path.join(tests.current_path, '..', '..', '..', '..',
                             'data', 'solar_flux_nao_index_spei')
    modf_file_name = 'spei_cadiz.csv'
    path_name = os.path.join(data_path, modf_file_name)
    spei = pd.read_csv(path_name, sep=',')
    spei.index = sunspot.index[2412:3233]

    # Calculate cycles over SPEI
    spei = pd.DataFrame(spei.loc[:, 'SPEI_12'] * 100).dropna()
    cycles_spei, calm_periods_spei, info_spei = extremal.extreme_events(
        spei, 'SPEI_12', threshold_spei, minimum_interarrival_time_spei,
        minimum_cycle_length_spei, interpolation, interpolation_method,
        interpolation_freq, truncate, extra_info)
    peaks_over_thres_spei = extremal.events_max(cycles_spei)

    # Plot peaks
    peaks_over_thres = extremal.events_max(cycles)

    # Represent cycles
    fig1 = plt.figure(figsize=(20, 20))
    ax = plt.axes()
    ax.plot(river_discharge)
    ax.axhline(threshold_cycles, color='lightgray')
    ax.plot(spei.loc[:, 'SPEI_12'] * 100, color='0.75', linewidth=2)
    # Plot cycles
    # for cycle in cycles_all:
    #     ax.plot(cycle, 'sandybrown', marker='.', markersize=5)
    #     # ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=10)
    #     # ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=10)
    for cycle in cycles:
        ax.plot(cycle, 'g', marker='.', markersize=5)
        # ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=10)
        # ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=10)
    for cycle in cycles_spei:
        ax.plot(cycle, 'k', marker='.', markersize=5, linewidth=2)
        ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=15)
        ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=15)
    ax.plot(peaks_over_thres, '.r', markersize=15)
    ax.plot(peaks_over_thres_spei, '.c', markersize=15)
    ax.grid()
    ax.set_xlim([datetime.date(1970, 01, 01), datetime.date(2018, 04, 11)])
    ax.set_ylim([-5, 500])
    fig1.savefig(
        os.path.join('output', 'analisis', 'graficas',
                     'ciclos_river_discharge_spei.png'))

    #%% # ANALISIS CLIMATICO (0: PARA SALTARLO, 1: PARA HACERLO; LO MISMO PARA TODOS ESTOS IF)
    if analysis:
        if cdf_pdf_representation:
            for i in range(len(df)):
                # DIBUJO LAS CDF Y PDF DE LOS REGISTROS
                plot_analisis.cdf_pdf_registro(df[i], df[i].columns[0])
                plt.pause(0.5)

        #%%  THEORETICAL FIT CYCLES
        data_cycles = sample_cycles['Q']

        # Empirical cdf
        ecdf = empirical_distributions.ecdf_histogram(data_cycles)
        # Fit the variable to an extremal distribution
        (param, x, cdf_expwbl, pdf_expwbl) = theoretical_fit.fit_distribution(
            data_cycles,
            fit_type=fun_cycles[0].name,
            x_min=min(data_cycles),
            x_max=2 * max(data_cycles),
            n_points=1000)
        par0_cycles = list()
        par0_cycles.append(np.asarray(param))
        # GUARDO LOS PARAMETROS
        np.save(
            os.path.join('output', 'analisis',
                         'parameter_river_discharge_cycles.npy'), par0_cycles)

        # Check the goodness of the fit
        fig1 = plt.figure(figsize=(20, 20))
        ax = plt.axes()
        ax.plot(ecdf.index, ecdf, '.')
        ax.plot(x, cdf_expwbl)
        ax.set_xlabel('Q (m3/s)')
        ax.set_ylabel('CDF')
        ax.legend([
            'ECDF',
            'Exponweib Fit',
        ])
        ax.grid()
        ax.set_xlim([0, 500])
        fig1.savefig(
            os.path.join('output', 'analisis', 'graficas',
                         'cdf_fit_ciclos_river_discharge.png'))

        # PP - Plot values
        (yppplot_emp,
         yppplot_teo) = theoretical_fit.pp_plot(x, cdf_expwbl, ecdf)
        # QQ - Plot values
        (yqqplot_emp,
         yqqplot_teo) = theoretical_fit.qq_plot(x, cdf_expwbl, ecdf)
        # Plot Goodness of fit
        theoretical_fit.plot_goodness_of_fit(cdf_expwbl, ecdf, river_discharge,
                                             'Q', x, yppplot_emp, yqqplot_emp,
                                             yppplot_teo, yqqplot_teo)

        # Non-stationary fit for calms
        par_cycles, mod_cycles, f_mix_cycles, data_graph_cycles = list(), list(
        ), list(), list()
        df = list()
        df.append(data_cycles)
        for i in range(len(df)):
            # SE HAN SELECCIONADO LOS ULTIMOS 7 ANOS PARA QUE EL ANALISIS SEA MAS RAPIDO
            analisis_ = analisis.analisis(df[i],
                                          fun_cycles[i],
                                          ant[i],
                                          ordg=no_ord_cycles[i],
                                          nnorm=no_norm_cycles[i],
                                          par0=par0_cycles[i])

            par_cycles.append(analisis_[0])
            mod_cycles.append(analisis_[1])
            f_mix_cycles.append(analisis_[2])

            aux = list(analisis_[3])
            aux[5] = i
            aux = tuple(aux)
            data_graph_cycles.append(aux)

            # DIBUJO LOS RESULTADOS (HAY UNA GRAN GAMA DE FUNCIONES DE DIBUJO; VER MANUAL)
            plot_analisis.cuantiles_ne(*data_graph_cycles[i])
            plt.pause(0.5)

        fig2 = plt.figure(figsize=(20, 20))
        plt.plot(x, pdf_expwbl)
        _ = plt.hist(data_cycles,
                     bins=np.linspace(0, 500, 100),
                     normed=True,
                     alpha=0.5)
        plt.xlim([0, 400])
        fig2.savefig(
            os.path.join('output', 'analisis', 'graficas',
                         'pdf_fit_ciclos_river_discharge.png'))

        # %%  THEORETICAL FIT CALMS
        param0_calms = list()
        data_calms = sample_calms['Q']
        (param, x, cdf, pdf) = theoretical_fit.fit_distribution(
            data_calms,
            fit_type=fun_calms[0].name,
            x_min=np.min(data_calms),
            x_max=1.1 * np.max(data_calms),
            n_points=1000)
        param0_calms.append(np.asarray(param))
        # Empirical cdf
        ecdf = empirical_distributions.ecdf_histogram(data_calms)
        epdf = empirical_distributions.epdf_histogram(data_calms, bins=0)
        # PP - Plot values
        (yppplot_emp, yppplot_teo) = theoretical_fit.pp_plot(x, cdf, ecdf)
        # QQ - Plot values
        (yqqplot_emp, yqqplot_teo) = theoretical_fit.qq_plot(x, cdf, ecdf)
        # Plot Goodness of fit
        theoretical_fit.plot_goodness_of_fit(cdf, ecdf, sample_calms, 'Q', x,
                                             yppplot_emp, yqqplot_emp,
                                             yppplot_teo, yqqplot_teo)

        # Non-stationary fit for calms
        par_calms, mod_calms, f_mix_calms, data_graph_calms = list(), list(
        ), list(), list()
        df = list()
        df.append(data_calms)
        for i in range(len(df)):
            # SE HAN SELECCIONADO LOS ULTIMOS 7 ANOS PARA QUE EL ANALISIS SEA MAS RAPIDO
            analisis_ = analisis.analisis(df[i],
                                          fun_calms[i],
                                          ant[i],
                                          ordg=no_ord_calms[i],
                                          nnorm=no_norm_calms[i],
                                          par0=param0_calms[i])

            par_calms.append(analisis_[0])
            mod_calms.append(analisis_[1])
            f_mix_calms.append(analisis_[2])
            data_graph_calms.append(analisis_[3])

            # DIBUJO LOS RESULTADOS (HAY UNA GRAN GAMA DE FUNCIONES DE DIBUJO; VER MANUAL)
            plot_analisis.cuantiles_ne(*data_graph_calms[i])
            plt.pause(0.5)

        # Guardo parametros
        np.save(
            os.path.join('output', 'analisis',
                         'parameter_river_discharge_calms.npy'), par_calms)
        np.save(
            os.path.join('output', 'analisis',
                         'mod_river_discharge_calms.npy'), mod_calms)
        np.save(
            os.path.join('output', 'analisis',
                         'f_mix_river_discharge_calms.npy'), f_mix_calms)

    #%% TEMPORAL DEPENDENCY
    if temporal_dependency:
        # SE UTILIZAN LOS PARAMETROS DE SALIDA DEL ANÁLISIS PREVIO
        # Lectura de datos
        par_cycles = np.load(
            os.path.join('output', 'analisis',
                         'parameter_river_discharge_cycles.npy'))
        par_calms = np.load(
            os.path.join('output', 'analisis',
                         'parameter_river_discharge_calms.npy'))
        mod_calms = np.load(
            os.path.join('output', 'analisis',
                         'mod_river_discharge_calms.npy'))
        f_mix_calms = np.load(
            os.path.join('output', 'analisis',
                         'f_mix_river_discharge_calms.npy'))

        (df_dt_cycles,
         cdf_) = analisis.dependencia_temporal(sample_cycles, par_cycles,
                                               mod_cycles, no_norm_cycles,
                                               f_mix_cycles, fun_cycles)

        # SE GUARDAN LOS PARAMETROS DEL MODELO VAR
        df_dt_cycles.to_pickle(
            os.path.join('output', 'dependencia_temporal',
                         'df_dt_river_discharge_cycles.p'))

        (df_dt_calms,
         cdf_) = analisis.dependencia_temporal(sample_calms, par_calms,
                                               mod_calms, no_norm_calms,
                                               f_mix_calms, fun_calms)

        # SE GUARDAN LOS PARAMETROS DEL MODELO VAR
        df_dt_calms.to_pickle(
            os.path.join('output', 'dependencia_temporal',
                         'df_dt_river_discharge_calms.p'))

    if climatic_events_fitting:
        #%% FIT NUMBER OF EVENTS DURING WET CYCLES
        events_wet_cycle = pd.Series([5, 2, 1, 3, 2, 2, 0, 6, 1])
        ecdf_events_wet_cycle = empirical_distributions.ecdf_histogram(
            events_wet_cycle)

        mu = np.mean(events_wet_cycle)
        simulated_number_events = pd.Series(
            poisson.rvs(mu, loc=0, size=100, random_state=None))
        ecdf_simulated_events_wet_cycle = empirical_distributions.ecdf_histogram(
            simulated_number_events)
        x_poisson = np.linspace(0, 10, 100)
        cdf_poisson = poisson.cdf(x_poisson, mu, loc=0)

        plt.figure()
        ax = plt.axes()
        ax.plot(ecdf_events_wet_cycle.index, ecdf_events_wet_cycle, '.')
        ax.plot(ecdf_simulated_events_wet_cycle.index,
                ecdf_simulated_events_wet_cycle, '.')
        ax.plot(x_poisson, cdf_poisson)
        ax.legend(['ECDF', 'ECDF Sim', 'Poisson Fit'])
        ax.grid()

        #%% FIT TIME BETWEEN WET CYCLES
        t_wet_cycles = peaks_over_thres_spei.index.to_series().diff().dropna(
        ).astype('m8[s]').astype(np.float32)
        ecdf_t_wet_cycle = empirical_distributions.ecdf_histogram(t_wet_cycles)

        norm_param = norm.fit(t_wet_cycles, loc=0)
        simulated_t_wet_cycles = pd.Series(
            norm.rvs(*norm_param, size=100, random_state=None))
        ecdf_simulated_t_wet_cycles = empirical_distributions.ecdf_histogram(
            simulated_t_wet_cycles)
        x_norm = np.linspace(0, 2 * max(t_wet_cycles), 100)
        cdf_norm = norm.cdf(x_norm, *norm_param)

        plt.figure()
        ax = plt.axes()
        ax.plot(ecdf_t_wet_cycle.index, ecdf_t_wet_cycle, '.')
        ax.plot(ecdf_simulated_t_wet_cycles.index, ecdf_simulated_t_wet_cycles,
                '.')
        ax.plot(x_norm, cdf_norm)
        ax.legend(['ECDF', 'ECDF Sim', 'Exponential Fit'])
        ax.grid()

        simulated_t_wet_cycles_days = simulated_t_wet_cycles.astype('m8[s]')
        # Elimino valores negativos
        simulated_t_wet_cycles_days = simulated_t_wet_cycles_days[
            simulated_t_wet_cycles_days.values > datetime.timedelta(days=1)]

        #%% FIT TIME BETWEEN EVENTS DURING WET CYCLES
        t_between_events = peaks_over_thres.index.to_series().diff().dropna()
        t_between_events = t_between_events[
            t_between_events < datetime.timedelta(days=400)]
        t_between_events = t_between_events.astype('m8[s]').astype(np.float32)
        ecdf_t_between_events = empirical_distributions.ecdf_histogram(
            t_between_events)

        lambda_par = expon.fit(t_between_events, loc=0)
        simulated_t_between_events = pd.Series(
            expon.rvs(scale=lambda_par[1], size=100, random_state=None))
        ecdf_simulated_t_between_events = empirical_distributions.ecdf_histogram(
            simulated_t_between_events)
        x_expon = np.linspace(0, 2 * max(t_between_events), 100)
        cdf_expon = expon.cdf(x_expon, scale=lambda_par[1], loc=0)

        plt.figure()
        ax = plt.axes()
        ax.plot(ecdf_t_between_events.index, ecdf_t_between_events, '.')
        ax.plot(ecdf_simulated_t_between_events.index,
                ecdf_simulated_t_between_events, '.')
        ax.plot(x_expon, cdf_expon)
        ax.legend(['ECDF', 'ECDF Sim', 'Exponential Fit'])
        ax.grid()

        simulated_t_between_events_days = simulated_t_between_events.astype(
            'm8[s]')

        #%% FIT TIME BETWEEN ALL EVENTS
        # Fit time between events (without considering wet cycles) 2 method
        t_between_events_2method = peaks_over_thres.index.to_series().diff(
        ).dropna()
        t_between_events_2method = t_between_events_2method.astype(
            'm8[s]').astype(np.float32)
        ecdf_t_between_events_2method = empirical_distributions.ecdf_histogram(
            t_between_events_2method)

        lambda_par = expon.fit(t_between_events_2method, loc=0)
        simulated_t_between_events_2method = pd.Series(
            expon.rvs(scale=lambda_par[1], size=100, random_state=None))
        ecdf_simulated_t_between_events_2method = empirical_distributions.ecdf_histogram(
            simulated_t_between_events_2method)
        x_expon = np.linspace(0, 2 * np.max(t_between_events_2method), 100)
        cdf_expon = expon.cdf(x_expon, scale=lambda_par[1], loc=0)

        plt.figure()
        ax = plt.axes()
        ax.plot(ecdf_t_between_events_2method.index,
                ecdf_t_between_events_2method, '.')
        ax.plot(ecdf_simulated_t_between_events_2method.index,
                ecdf_simulated_t_between_events_2method, '.')
        ax.plot(x_expon, cdf_expon)
        ax.legend(['ECDF', 'ECDF Sim', 'Exponential Fit'])
        ax.grid()

        simulated_t_between_events_2method_days = simulated_t_between_events.astype(
            'm8[s]')
        # nul_values = simulated_t_between_events_2method_days.values > datetime.timedelta(days=2000)

    #%% SIMULACION CLIMÁTICA CHEQUEO UMBRAL OPTIMO PARA AJUSTAR DURACIONES
    if threshold_checking_for_simulation:
        # CARGO PARÁMETROS
        par_cycles = np.load(
            os.path.join('output', 'analisis',
                         'parameter_river_discharge_cycles.npy'))
        df_dt_cycles = pd.read_pickle(
            os.path.join('output', 'dependencia_temporal',
                         'df_dt_river_discharge_cycles.p'))
        vars_ = ['Q']

        # Cargo el SPEI Index para ajustar tiempo entre ciclos humedos, numero de eventos por ciclo humedo
        # tiempo entre eventos dentro de ciclo humedo

        # Figura de las cdf y pdf empiricas
        fig1, axes1 = plt.subplots(1, 2, figsize=(20, 7))

        cont = 0
        iter = 0
        while cont < no_sim:
            df_sim = simulacion.simulacion(anocomienzo,
                                           duracion,
                                           par_cycles,
                                           mod_cycles,
                                           no_norm_cycles,
                                           f_mix_cycles,
                                           fun_cycles,
                                           vars_,
                                           sample_cycles,
                                           df_dt_cycles, [0, 0, 0, 0, 0],
                                           semilla=int(
                                               np.random.rand(1) * 1e6))

            iter += 1

            # Primero filtro si hay valores mayores que el umbral,en cuyo caso descarto la serie
            if np.max(df_sim).values <= np.max(sample_cycles['Q']) * 1.25:
                # Representacion de la serie
                plt.figure()
                ax = plt.axes()
                ax.plot(df_sim)
                ax.plot(sample_cycles, '.')
                ax.plot(df_sim * 0 + max(sample_cycles['Q']), 'r')
                ax.grid()

                # Cdf Pdf
                data = df_sim['Q']
                ecdf = empirical_distributions.ecdf_histogram(data)
                epdf = empirical_distributions.epdf_histogram(data, bins=0)
                axes1[0].plot(epdf.index, epdf, '--', color='0.75')
                axes1[1].plot(ecdf.index, ecdf, '--', color='0.75')

                # Extract cycles from data for different thresholds to fix the duration
                fig2, axes2 = plt.subplots(1, 2, figsize=(20, 7))
                if cont == 0:
                    dur_cycles = dur_cycles.astype('m8[s]').astype(
                        np.float32)  # Convierto a segundos y flotante
                ecdf_dur = empirical_distributions.ecdf_histogram(dur_cycles)
                epdf_dur = empirical_distributions.epdf_histogram(dur_cycles,
                                                                  bins=0)
                axes2[0].plot(epdf_dur.index, epdf_dur, 'r', lw=2)
                axes2[1].plot(ecdf_dur.index, ecdf_dur, 'r', lw=2)

                threshold = np.arange(20, 110, 10)
                color_sequence = [
                    '#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c',
                    '#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5',
                    '#8c564b', '#c49c94', '#e377c2', '#f7b6d2', '#7f7f7f',
                    '#c7c7c7', '#bcbd22', '#dbdb8d', '#17becf', '#9edae5'
                ]
                for j, th in enumerate(threshold):
                    minimum_interarrival_time = pd.Timedelta('1 hour')
                    minimum_cycle_length = pd.Timedelta('2 days')
                    cycles, calm_periods, info = extremal.extreme_events(
                        df_sim, 'Q', th, minimum_interarrival_time,
                        minimum_cycle_length, interpolation,
                        interpolation_method, interpolation_freq, truncate,
                        extra_info)

                    # Calculate duration of the cycles
                    dur_cycles_sim = extremal.events_duration(cycles)
                    dur_cycles_sim_description = dur_cycles_sim.describe()

                    # Represent cycles
                    fig3 = plt.figure(figsize=(20, 20))
                    ax = plt.axes()
                    ax.plot(df_sim)
                    ax.axhline(th, color='lightgray')
                    ax.grid()
                    ax.legend([
                        'Threshold: ' + str(th) + ' (m3/s)' + '/ Dur_min ' +
                        str(dur_cycles_description['min']) + ' - ' +
                        str(dur_cycles_sim_description['min']) +
                        '/ Dur_mean ' + str(dur_cycles_description['mean']) +
                        ' - ' + str(dur_cycles_sim_description['mean']) +
                        '/ Dur_max ' + str(dur_cycles_description['max']) +
                        ' - ' + str(dur_cycles_sim_description['max'])
                    ])

                    for cycle in cycles:
                        ax.plot(cycle, 'g', marker='.', markersize=5)
                        ax.plot(cycle.index[0],
                                cycle[0],
                                'gray',
                                marker='.',
                                markersize=10)
                        ax.plot(cycle.index[-1],
                                cycle[-1],
                                'black',
                                marker='.',
                                markersize=10)
                    ax.set_xlim([
                        datetime.date(2018, 04, 01),
                        datetime.date(2030, 01, 01)
                    ])
                    ax.set_ylim([0, 600])

                    fig_name = 'ciclos_sim_' + str(cont) + '_threshold_' + str(
                        th) + '.png'
                    fig3.savefig(
                        os.path.join('output', 'simulacion', 'graficas',
                                     'descarga_fluvial', 'umbral_optimo',
                                     fig_name))

                    # Calculate the cdf and pdf of the cycle duration
                    dur_cycles_sim = dur_cycles_sim.astype('m8[s]').astype(
                        np.float32)
                    ecdf_dur_sim = empirical_distributions.ecdf_histogram(
                        dur_cycles_sim)
                    epdf_dur_sim = empirical_distributions.epdf_histogram(
                        dur_cycles_sim, bins=0)
                    axes2[0].plot(epdf_dur_sim.index,
                                  epdf_dur_sim,
                                  '--',
                                  color=color_sequence[j],
                                  label=['Threshold: ' + str(threshold[j])])
                    axes2[1].plot(ecdf_dur_sim.index,
                                  ecdf_dur_sim,
                                  '--',
                                  color=color_sequence[j],
                                  label=['Threshold: ' + str(threshold[j])])
                    axes2[0].legend()
                    axes2[1].set_xlim([0, 5000000])
                    axes2[0].set_xlim([0, 5000000])

                fig_name = 'ciclos_dur_sim_' + str(cont) + '.png'
                fig2.savefig(
                    os.path.join('output', 'simulacion', 'graficas',
                                 'descarga_fluvial', 'umbral_optimo',
                                 fig_name))

                cont += 1

            data = sample_cycles['Q']
            ecdf = empirical_distributions.ecdf_histogram(data)
            epdf = empirical_distributions.epdf_histogram(data, bins=0)
            axes1[0].plot(epdf.index, epdf, 'r', lw=2)
            axes1[1].plot(ecdf.index, ecdf, 'r', lw=2)

        fig_name = 'pdf_cdf_descarga_fluvial.png'
        fig1.savefig(
            os.path.join('output', 'simulacion', 'graficas',
                         'descarga_fluvial', 'umbral_optimo', fig_name))

    #%% SIMULACION CLIMATICA
    threshold = 50
    minimum_interarrival_time = pd.Timedelta('1 hour')
    minimum_cycle_length = pd.Timedelta('2 days')
    if simulation_cycles:
        # CARGO PARÁMETROS
        par_cycles = np.load(
            os.path.join('output', 'analisis',
                         'parameter_river_discharge_cycles.npy'))
        par_calms = np.load(
            os.path.join('output', 'analisis',
                         'parameter_river_discharge_calms.npy'))
        mod_calms = np.load(
            os.path.join('output', 'analisis',
                         'mod_river_discharge_calms.npy'))
        f_mix_calms = np.load(
            os.path.join('output', 'analisis',
                         'f_mix_river_discharge_calms.npy'))

        df_dt_cycles = pd.read_pickle(
            os.path.join('output', 'dependencia_temporal',
                         'df_dt_river_discharge_cycles.p'))
        df_dt_calms = pd.read_pickle(
            os.path.join('output', 'dependencia_temporal',
                         'df_dt_river_discharge_calms.p'))
        vars_ = ['Q']

        # Figura de las cdf y pdf empiricas
        fig2, axes1 = plt.subplots(1, 2, figsize=(20, 7))

        cont = 0
        iter = 0
        while cont < no_sim:
            df_sim = simulacion.simulacion(anocomienzo,
                                           duracion,
                                           par_cycles,
                                           mod_cycles,
                                           no_norm_cycles,
                                           f_mix_cycles,
                                           fun_cycles,
                                           vars_,
                                           sample_cycles,
                                           df_dt_cycles, [0, 0, 0, 0, 0],
                                           semilla=int(
                                               np.random.rand(1) * 1e6))

            iter += 1

            # Primero filtro si hay valores mayores que el umbral,en cuyo caso descarto la serie
            if np.max(df_sim).values <= np.max(sample_cycles['Q']) * 1.25:
                df_sim = df_sim.resample('1H').interpolate()

                # Extract cycles from data for different thresholds to fix the duration
                if cont == 0:
                    dur_cycles = dur_cycles.astype('m8[s]').astype(
                        np.float32)  # Convierto a segundos y flotante
                # Calculate cycles
                cycles, calm_periods, info = extremal.extreme_events(
                    df_sim, 'Q', threshold, minimum_interarrival_time,
                    minimum_cycle_length, interpolation, interpolation_method,
                    interpolation_freq, truncate, extra_info)

                # # Represent cycles
                # fig3 = plt.figure(figsize=(20, 20))
                # ax = plt.axes()
                # ax.plot(df_sim)
                # ax.axhline(threshold, color='lightgray')
                # ax.grid()
                #
                # for cycle in cycles:
                #     ax.plot(cycle, 'g', marker='.', markersize=5)
                #     ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=10)
                #     ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=10)
                # ax.set_xlim([datetime.date(2018, 01, 01), datetime.date(2021, 01, 01)])
                # ax.set_ylim([0, 600])
                # fig3.savefig(os.path.join('output', 'simulacion', 'graficas', 'descarga_fluvial',
                #                           'ciclos_cadiz_simulado_' + str(cont).zfill(4) + '.png'))

                # Start to construct the time series
                indices = pd.date_range(start='2018', end='2100', freq='1H')
                df_simulate = pd.DataFrame(np.zeros((len(indices), 1)) + 25,
                                           dtype=float,
                                           index=indices,
                                           columns=['Q'])

                # The start is in wet cycles
                cont_wet_cicles = 0
                cont_df_events = 1
                t_ini = datetime.datetime(2018, 01, 01)
                t_end = datetime.datetime(2018, 01, 01)
                while t_end < datetime.datetime(2090, 01, 01):
                    if cont_wet_cicles != 0:
                        t_ini = t_end + simulated_t_wet_cycles_days[
                            cont_wet_cicles]
                        year = t_ini.year
                    else:
                        year = 2018

                    # Select the number of events during wet cycle
                    n_events = simulated_number_events[cont_wet_cicles] - 1
                    cont_wet_cicles += 1

                    if n_events != 0:

                        # for j in range(0, n_events):
                        cont_df_events_in_wet_cycles = 0
                        while cont_df_events_in_wet_cycles <= n_events:
                            if cont_df_events_in_wet_cycles != 0:
                                # Time between events
                                year = year + 1

                            # Select the event
                            cycle = cycles[cont_df_events]

                            if np.max(cycle) >= 150:
                                # Simulate date
                                month1 = [
                                    random.randint(1, 3),
                                    random.randint(10, 12)
                                ]
                                rand_pos = random.randint(0, 1)
                                month = month1[rand_pos]
                                day = random.randint(1, 28)
                                hour = random.randint(0, 23)
                            else:
                                # Simulate date
                                month = random.randint(1, 12)
                                day = random.randint(1, 28)
                                hour = random.randint(0, 23)
                            t_ini = datetime.datetime(year, month, day, hour)
                            pos_ini = np.where(
                                df_simulate.index == t_ini)[0][0]
                            pos_end = pos_ini + cycle.shape[0]

                            # Insert cycle
                            df_simulate.iloc[pos_ini:pos_end, 0] = cycle.values
                            t_end = df_simulate.index[pos_end]
                            year = df_simulate.index[pos_end].to_datetime(
                            ).year
                            cont_df_events += 1
                            cont_df_events_in_wet_cycles += 1

                    else:
                        t_end = t_ini

                # Simulation of calm periods
                df_sim_calms = simulacion.simulacion(
                    anocomienzo,
                    85,
                    par_calms,
                    mod_calms,
                    no_norm_calms,
                    f_mix_calms,
                    fun_calms,
                    vars_,
                    sample_calms,
                    df_dt_calms, [0, 0, 0, 0, 0],
                    semilla=int(np.random.rand(1) * 1e6))

                # Remove negative values
                df_sim_calms[df_sim_calms < 0] = np.random.randint(1, 5)

                # Combine both dataframes with cycles and calms
                pos_cycles = df_simulate >= 50
                df_river_discharge = df_sim_calms
                df_river_discharge[pos_cycles] = df_simulate

                # Hourly interpolation
                df_river_discharge = df_river_discharge.resample(
                    'H').interpolate()

                # Representation of results
                fig1 = plt.figure(figsize=(20, 10))
                ax = plt.axes()
                ax.plot(river_discharge)
                ax.plot(df_river_discharge)
                ax.legend('Hindcast', 'Forecast')
                ax.grid()
                ax.set_ylim([-5, 500])
                fig1.savefig(
                    os.path.join(
                        'output', 'simulacion', 'graficas', 'descarga_fluvial',
                        'descarga_fluvial_cadiz_simulado_' +
                        str(cont).zfill(4) + '.png'))

                # Cdf Pdf
                data = df_river_discharge['Q']
                ecdf = empirical_distributions.ecdf_histogram(data)
                epdf = empirical_distributions.epdf_histogram(data, bins=0)
                axes1[0].plot(epdf.index, epdf, '--', color='0.75')
                axes1[1].plot(ecdf.index, ecdf, '--', color='0.75')

                # Guardado de ficheros
                df_river_discharge.to_csv(os.path.join(
                    'output', 'simulacion', 'series_temporales',
                    'descarga_fluvial_500', 'descarga_fluvial_guadalete_sim_' +
                    str(cont).zfill(4) + '.txt'),
                                          sep=n(b'\t'))
                cont += 1

        data = river_discharge['Q']
        ecdf = empirical_distributions.ecdf_histogram(data)
        epdf = empirical_distributions.epdf_histogram(data, bins=0)
        axes1[0].plot(epdf.index, epdf, 'r', lw=2)
        axes1[1].plot(ecdf.index, ecdf, 'r', lw=2)
        fig_name = 'pdf_cdf_descarga_fluvial.png'
        fig2.savefig(
            os.path.join('output', 'simulacion', 'graficas',
                         'descarga_fluvial', fig_name))
Beispiel #43
0
#coding:utf8
import numpy as np
from scipy.stats import norm

from bokeh.io import push_notebook, show, output_notebook, curdoc
from bokeh.layouts import row, column, widgetbox, layout, gridplot
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from bokeh.models import CustomJS, Select, Slider, TextInput, Spinner
from bokeh.models.glyphs import MultiLine
from bokeh.models.widgets import Div
from bokeh import palettes



size = 20
X = norm.rvs(size=(size, 2), random_state=42) * 2
X = np.dot(X, np.linalg.cholesky([[1, .8], [.8, .8]]))


x = X[:, 0]
y = X[:, 1]

index = np.argsort(x)
# import ipdb; ipdb.set_trace()
x = np.sort(x)
y = y[index]

pred = np.nan*np.zeros(len(X))
error = np.nan*np.zeros(len(X))
# 表示的是残差间的点
error_0s = [np.array(np.nan*np.zeros(2)) for i in range(0,len(X))]
Beispiel #44
0
    page_source = page_response.content
    print('{}/{}'.format(num_pages - page_iter + 1, num_pages), page_response,
          page_link)
    if (page_response.status_code != 200) or (page_source is None):
        print('Connection aborted. Pause for {} seconds'.format(BREAK_TIME))
        time.sleep(BREAK_TIME)
        page_link = QUERY.format(page_iter)
        page_response = requests.get(
            page_link, headers={'User-Agent': UserAgent().chrome})
    access_time.append(datetime.now())
    page_source = page_response.content
    page_soup = BeautifulSoup(page_source, "lxml")
    page_descriptions_soup = page_soup.find_all('div',
                                                {'class': "description"})
    query_soup.append(page_descriptions_soup)
    time.sleep(expon.rvs(28, 9) + norm.rvs(5, 7))
    page_iter = page_iter + 1
print('len(access time): ', len(access_time))
print('len(query soup): ', len(query_soup))

flats = []
for page_iter, page in enumerate(query_soup):
    for description in page:
        flat_soup = {}
        for key in TAGS:
            for param in TAGS[key]:
                flat_soup[param] = description.find(TAGS[key][param][0],
                                                    TAGS[key][param][1])
        flat_params = {}
        for param in TAGS['text_tags']:
            if flat_soup[param] is not None:
Beispiel #45
0
    def __call__(self):

        self.clouds = []
        np.random.seed(11 + self.random_seed)

        a = np.random.uniform(low=0., high=1., size=self.n)
        self.phi = 2. * np.pi * a

        if self.model == 'Spherical':
            self.r = norm.rvs(loc=self.R_params[0],
                              scale=self.R_params[1],
                              size=self.n)

            v = np.random.uniform(low=0., high=1., size=self.n)
            self.theta = np.arccos(2. * v - 1.)

            coord_array = coord.PhysicsSphericalRepresentation(
                self.phi * u.rad, self.theta * u.rad, self.r * u.kpc)
            self.cartesian_galactocentric = self.cartesianize_coordinates(
                coord_array)
            self.heliocentric_coordinates()

            for i, x, p, t, d, latit, longit in zip(np.arange(self.n), self.r,
                                                    self.phi, self.theta,
                                                    self.d_sun, self.lat,
                                                    self.long):
                if x <= 0.:
                    self.r[i] = np.random.uniform(low=0., high=1., size=1)
                    x = self.r[i]
                c = Cloud(i, x, p, t, size=None, em=None)
                c.assign_sun_coord(d, latit, longit)
                self.clouds.append(c)

        else:
            self.r = self.phi * 0.
            rbar = self.R_params[2]

            if self.model == 'Axisymmetric':
                np.random.seed(self.random_seed + 29)
                self.r = norm.rvs(loc=self.R_params[0],
                                  scale=self.R_params[1],
                                  size=self.n)
                negs = np.ma.masked_less(self.r, 0.)
                #central molecular zone
                self.r[negs.mask] = 0.
            elif self.model == 'LogSpiral':
                #the bar is assumed axisymmetric and with an inclination angle phi0~25 deg as
                #it has been measured by  F*x et al. 1999

                phi_0 = np.deg2rad(25.)
                self.phi += phi_0
                subsize = np.int(self.n / 10)
                self.r[0:subsize] = norm.rvs(loc=self.R_params[0],
                                             scale=self.R_params[1],
                                             size=subsize)
                #np.random.uniform(low=0.,high=8.,size=self.n/4)
                rscale = rbar / 1.5
                self.r[subsize:self.n],self.phi[subsize:self.n]=log_spiral_radial_distribution2(\
                            rbar,phi_0,self.n-subsize,self.R_params[0],self.R_params[1])
                #self.r[subsize:self.n]=log_spiral_radial_distribution(self.phi[subsize:self.n],rbar,phi_0)
                #simulate the bar
                arr = np.ma.masked_less(self.r, rbar)
                self.r[arr.mask] = abs(
                    np.random.normal(loc=0.,
                                     scale=rscale,
                                     size=len(self.r[arr.mask])))
                negs = np.ma.masked_less(self.r, 0.)
                #central molecular zone
                self.r[negs.mask] = 0.

            #the thickness of the Galactic plane is function of the Galactic Radius roughly as ~ 100 pc *cosh((x/R0) ), with R0~10kpc
            # for reference see fig.6 of Heyer and Dame, 2015
            sigma_z0 = self.z_distr[0]
            R_z0 = self.z_distr[1]

            sigma_z = lambda R: sigma_z0 * np.cosh((R / R_z0))
            self.zeta = self.phi * 0.
            np.random.seed(self.random_seed + 19)
            for i, x, p in zip(np.arange(self.n), self.r, self.phi):
                self.zeta[i] = np.random.normal(loc=0., scale=sigma_z(x))
                self.clouds.append(
                    Cloud(i, x, p, self.zeta[i], size=None, em=None))
            coord_array = coord.CylindricalRepresentation(
                self.r * u.kpc, self.phi * u.rad, self.zeta * u.kpc)
            self.cartesian_galactocentric = self.cartesianize_coordinates(
                coord_array)
            self.heliocentric_coordinates()

            for c, d, latit, longit in zip(self.clouds, self.d_sun, self.lat,
                                           self.long):
                c.assign_sun_coord(d, latit, longit)
        self.L = np.array(self.sizes)
        self.healpix_vecs = self.compute_healpix_vec()
        self.W = self.get_pop_emissivities_sizes()[0]
Beispiel #46
0
# plot observed data
x_plot = np.linspace(136, 180, len(d2))[:, np.newaxis]
kde = KernelDensity(kernel='gaussian', bandwidth=2)
kde.fit(d2)
y = np.exp(kde.score_samples(x_plot))
plt.plot(x_plot, y)
plt.show()

pm.kdeplot(d2)
plt.xlabel('height')
plt.ylabel('density')
plt.title('Prior')
plt.show()

# code chunk 4.13 (set up prior)
sample_mu = norm.rvs(loc=178, scale=20, size=1000)
sample_sigma = uniform.rvs(0, 50, 1000)
prior_h = norm.rvs(sample_mu, sample_sigma, 1000)
sns.set_theme(style='darkgrid')
ax = sns.kdeplot(prior_h, bw=2)
ax.set(xlabel='height', title='Prior')
plt.show()

# code chunk 4.14 (grid estimation)
mu_grid = np.linspace(140, 160, 200)
sigma_grid = np.linspace(4, 9, 200)
post_list = [sum(norm.logpdf(d2, m, s)) for m in mu_grid for s in sigma_grid]
post_ll = np.concatenate(post_list, axis=0)

mu_grid_rep = np.repeat(mu_grid, 200)
sigma_grid_rep = np.tile(sigma_grid, 200)
Beispiel #47
0
def white_noise(n, nb_sensor):
    noise = []
    for i in range(n):
        noise += [norm.rvs(size=nb_sensor, loc=mean, scale=standard_deviation)]
    return noise
Beispiel #48
0
        'G3vOTHER':
        [-1 / 8, -1 / 8, 1, -1 / 8, -1 / 8, -1 / 8, -1 / 8, -1 / 8, -1 / 8]
    }

if dataSource == "Random":
    np.random.seed(47405)
    ysdtrue = 4.0
    a0true = 100
    atrue = [2, -2]  # sum to zero
    npercell = 8
    x = []
    y = []
    for xidx in range(len(atrue)):
        for subjidx in range(npercell):
            x.append(xidx)
            y.append(a0true + atrue[xidx] + norm.rvs(1, ysdtrue))
    Ntotal = len(y)
    NxLvl = len(set(x))
    #  # Construct list of all pairwise comparisons, to compare with NHST TukeyHSD:
    contrast_dict = None
    for g1idx in range(NxLvl):
        for g2idx in range(g1idx + 1, NxLvl):
            cmpVec = np.repeat(0, NxLvl)
            cmpVec[g1idx] = -1
            cmpVec[g2idx] = 1
            contrast_dict = (contrast_dict, cmpVec)

z = (y - np.mean(y)) / np.std(y)

## THE MODEL.
with pm.Model() as model:
 def sample(self, N):
     return self.m + np.sqrt(self.sigma2) * norm.rvs(size=N)
def particle_dynamics\
	(BoxDim, nPart, nTime, dyN, speed, dt,\
	dir_pos_data, dir_posCon_data, dir_vel_data, dir_dynamics,\
	start_time):

	###################################################################################################
	#### generate initial particle positions
	xPos0,yPos0,zPos0,phi0,theta0 = \
		initialize_particle_pos(BoxDim, nPart, dir_pos_data,dir_posCon_data,dir_vel_data)
	
	xPos = xPos0
	yPos = yPos0
	zPos = zPos0
		
	phi = phi0
	theta = theta0

	# ##### continous positions for MSD calculations
	# xPosCon = xPos0
	# yPosCon = yPos0
	# zPosCon = zPos0
	
	
	###################################################################################################
	### allocate for dynamics output
	m_speed = np.zeros(nTime)

	### create gaussian distribution for velocity distribution of particles (ensemble NOT time)
	v_distr = norm.rvs(size=1*nPart, scale=1.) ## std = 1, mean = 0
	
	

	#####################################################################
	#### loop over timesteps to move the particles 
	# particle positions are updated every timestep
	for nt in (np.arange(nTime)+1): # can not be parallized in a simple way
		#### time processed
		if nt % 500 == 0:
			print ('working on timestep ' + str(nt) +
				' out of ' + str(nTime+1))
			elapsed_time = time.time() - start_time
			el_min, el_sec = divmod(elapsed_time, 60)
			el_hrs, el_min = divmod(el_min, 60) 
			print ('elapsed time: %d:%02d:%02d' % (el_hrs,el_min,el_sec))
	
		#### particle displacement
		xPos, yPos, zPos, phi, theta, m_speed_nt = particle_displacement\
			(BoxDim, nPart, speed, dyN, \
			xPos, yPos, zPos, phi, theta, dt, nt, v_distr, \
			dir_pos_data, dir_posCon_data, dir_vel_data)

		m_speed[nt-1] = m_speed_nt
			
	##########################################################	
	## end loop over time steps

	
	##############################################################################################
	### save mean speed to file
	file_dy_out = (dir_dynamics + 'dynamics_' +
		str('{:0>8d}'.format(nPart)) +
		'_Ntimestep' + str('{:0>4d}'.format(nTime)) + '.dat')
	
	np.savetxt(file_dy_out, np.transpose((np.arange(nTime),m_speed, np.ones(nTime)*speed)), \
	fmt='%e', delimiter=' ', newline='\n')
def run(popsize, max_years, mutation_probability):
    '''
    The arguments to this function are what they sound like.
    Runs genetic_algorithm on various knapsack problem instances and keeps track of tabular information with this schema:
    DIFFICULTY YEAR HIGH_SCORE AVERAGE_SCORE BEST_PLAN
    '''
    table = pd.DataFrame(columns=[
        "DIFFICULTY", "YEAR", "HIGH_SCORE", "AVERAGE_SCORE", "BEST_PLAN"
    ])
    sanity_check = (10, [10, 5, 8], [100, 50, 80])
    chromosomes = genetic_algorithm(sanity_check, popsize, max_years,
                                    mutation_probability)
    for year, data in enumerate(chromosomes):
        year_chromosomes, fitnesses = data
        table = table.append(
            {
                'DIFFICULTY': 'sanity_check',
                'YEAR': year,
                'HIGH_SCORE': max(fitnesses),
                'AVERAGE_SCORE': np.mean(fitnesses),
                'BEST_PLAN': year_chromosomes[np.argmax(fitnesses)]
            },
            ignore_index=True)
    easy = (20, [20, 5, 15, 8, 13], [10, 4, 11, 2, 9])
    chromosomes = genetic_algorithm(easy, popsize, max_years,
                                    mutation_probability)
    for year, data in enumerate(chromosomes):
        year_chromosomes, fitnesses = data
        table = table.append(
            {
                'DIFFICULTY': 'easy',
                'YEAR': year,
                'HIGH_SCORE': max(fitnesses),
                'AVERAGE_SCORE': np.mean(fitnesses),
                'BEST_PLAN': year_chromosomes[np.argmax(fitnesses)]
            },
            ignore_index=True)
    medium = (100, [
        13, 19, 34, 1, 20, 4, 8, 24, 7, 18, 1, 31, 10, 23, 9, 27, 50, 6, 36, 9,
        15
    ], [
        26, 7, 34, 8, 29, 3, 11, 33, 7, 23, 8, 25, 13, 5, 16, 35, 50, 9, 30,
        13, 14
    ])
    chromosomes = genetic_algorithm(medium, popsize, max_years,
                                    mutation_probability)
    for year, data in enumerate(chromosomes):
        year_chromosomes, fitnesses = data
        table = table.append(
            {
                'DIFFICULTY': 'medium',
                'YEAR': year,
                'HIGH_SCORE': max(fitnesses),
                'AVERAGE_SCORE': np.mean(fitnesses),
                'BEST_PLAN': year_chromosomes[np.argmax(fitnesses)]
            },
            ignore_index=True)
    hard = (5000, norm.rvs(50, 15, size=100), norm.rvs(200, 60, size=100))
    chromosomes = genetic_algorithm(hard, popsize, max_years,
                                    mutation_probability)
    for year, data in enumerate(chromosomes):
        year_chromosomes, fitnesses = data
        table = table.append(
            {
                'DIFFICULTY': 'hard',
                'YEAR': year,
                'HIGH_SCORE': max(fitnesses),
                'AVERAGE_SCORE': np.mean(fitnesses),
                'BEST_PLAN': year_chromosomes[np.argmax(fitnesses)]
            },
            ignore_index=True)
    for difficulty_group in ['sanity_check', 'easy', 'medium', 'hard']:
        group = table[table['DIFFICULTY'] == difficulty_group]
        bestrow = group.ix[group['HIGH_SCORE'].argmax()]
        print(
            "Best year for difficulty {} is {} with high score {} and chromosome {}"
            .format(difficulty_group, int(bestrow['YEAR']),
                    bestrow['HIGH_SCORE'], bestrow['BEST_PLAN']))
    table.to_pickle(
        "results.pkl"
    )  #saves the performance data, in case you want to refer to it later. pickled python objects can be loaded back at any later point.
    pass
               0.1])  #with probability wieghts in 100 iterations

simulate(1000)
simulate(1000, [0.2, 0.3, 0.2, 0.1, 0.1, 0.1])
"""2nd question for generating random data for Multi-Linear regression"""

import numpy as np
import pandas as pd
import random
from scipy.stats import norm
random.seed(1)

#Y=b0+b1x1+b2x2 is the equation I choose
X = []
for i in range(2):
    X_i = norm.rvs(0, 1, 100)
    X.append(X_i)

eps = norm.rvs(0, 0.25, 100)
y = 1 + (0.4 * X[0]) + eps + (0.5 * X[1])
data_mlr = {'X0': X[0], 'X1': X[1]}
df = pd.DataFrame(data_mlr)
print(df)
"""Data for logistic regression"""

n_features = 4
X = []
for i in range(n_features):
    X_i = norm.rvs(0, 1, 100)
    X.append(X_i)
a1 = (np.exp(1 + (0.5 * X[0]) + (0.4 * X[1]) + (0.3 * X[2]) + (0.5 * X[3])) /
Beispiel #53
0
def sqrt_normal_rvs(mu, sigma=1, random_state=None):
    return norm.rvs(loc=mu**0.5, scale=sigma, random_state=random_state)**2
plot_x = arange_inc(1, 9, 0.05)
plot_f = f(plot_x)

# Create a proposal distribution by hand by looking at the chart
# Experiment to get M as small as possible
mu1 = 5.8
sigma1: float = np.std(plot_x)*0.9
# Proposal distribution g(x) (NOT majorized)
g1: funcType = lambda x : norm.pdf(x, loc=mu1, scale=sigma1)
plot_g1 = g1(plot_x)
M1: float = np.max(plot_f / plot_g1)*1.01
plot_g1_maj = M1 * plot_g1
print(f'Proposal Distribution and Majorizer for rejection sampling.')
print(f'mu={mu1:0.6f}, sigma={sigma1:0.6f}, M={M1:0.6f}')
# Define the sampling distribution for the chosen proposal distribution g(x)
g1_sample = lambda : norm.rvs(loc=mu1, scale=sigma1)

# Plot the PDF f_X(x) and the majorizing distribution Mg(x)
fig, ax = plt.subplots()
fig.set_size_inches([16, 8])
ax.set_title('PDF $f_X(x)$ and its Majorizer $Mg(x)$')
ax.set_xlabel('x')
ax.set_ylabel('$f_X(x)$')
ax.set_xlim([1,9])
ax.plot(plot_x, plot_f, label='PDF')
ax.plot(plot_x, plot_g1_maj, label='Mg(x)')
ax.legend()
ax.grid()
plt.show()

 
Beispiel #55
0
sigma = z_sigma * y_sd

# Posterior prediction:
# Specify x values for which predicted y's are needed:
x_post_pred = np.arange(55, 81)
# Define matrix for recording posterior predicted y values at each x value.
# One row per x value, with each row holding random predicted y values.
post_samp_size = len(b1)
y_post_pred = np.zeros((len(x_post_pred), post_samp_size))
# Define matrix for recording HDI limits of posterior predicted y values:
y_HDI_lim = np.zeros((len(x_post_pred), 2))
# Generate posterior predicted y values.
# This gets only one y value, at each x, for each step in the chain.
for chain_idx in range(post_samp_size):
    y_post_pred[:, chain_idx] = norm.rvs(
        loc=b0[chain_idx] + b1[chain_idx] * x_post_pred,
        scale=np.repeat([sigma[chain_idx]], [len(x_post_pred)]),
        size=len(x_post_pred))

for x_idx in range(len(x_post_pred)):
    y_HDI_lim[x_idx] = hpd(y_post_pred[x_idx])

## Display believable beta0 and b1 values
plt.figure()
plt.subplot(1, 2, 1)
thin_idx = 50
plt.plot(z1[::thin_idx], z0[::thin_idx], 'b.', alpha=0.7)
plt.ylabel('Standardized Intercept')
plt.xlabel('Standardized Slope')
plt.subplot(1, 2, 2)
plt.plot(b1[::thin_idx], b0[::thin_idx], 'b.', alpha=0.7)
plt.ylabel('Intercept (ht when wt=0)')
Beispiel #56
0
 def bm_change(self, dt, delta):
     change = norm.rvs(loc=0, size=1, scale=delta**2 * dt)
     return change
Beispiel #57
0
def cgv(mu, nu, minv=0., maxv=1.):
    "Continuous Gaussian variate"
    rv = norm.rvs(mu, nu)
    return min(max(rv, minv), maxv)
 def _impl(y):
     mu, std = gp(y)
     return norm.rvs(mu, std)
                                    dyn_e_T, dyn_tau_T,\
                                    T, t_step)

#%%
plt.figure()
plt.plot(control_coef_MKV['eta'])
plt.title('eta')

plt.figure()
plt.plot(control_coef_MKV['chi'])
plt.title('chi')

# %% simulate a trajectory of X_t
np.random.seed(seed=0)

dW_t = norm.rvs(loc=0, scale=sigma * np.sqrt(t_step), size=n_step)

X_t = np.zeros(n_step, dtype=float)
X_t[0] = X0
for i in range(1, n_step, 1):
    X_t[i] = X_t[i-1] + ((dyn_a_t + dyn_b_t *control_coef_MKV['eta'][i-1]) * X_t[i-1] \
                          + dyn_b_t * control_coef_MKV['chi'][i-1] + dyn_c_t[i-1]) * t_step \
             + dW_t[i-1]

alpha_t = opt_control(X_t, control_coef_MKV['eta'], control_coef_MKV['chi'],
                      -b2 / rt)

# simulated running cost
f_t = .5 * (qt * X_t**2 + bar_qt *
            (X_t - st * bar_mu_t_MKV)**2 + rt * alpha_t**2)
# terminal cost
Beispiel #60
0
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm

theme_blue = '#0C2B36'
theme_red = '#E04D4F'
theme_green = '#00F900'

mu = 5
sig = 1

# Generate some data for this demonstration.
data = norm.rvs(10.0, 2.5, size=500)

# Fit a normal distribution to the data:
mu, std = norm.fit(data)

# Plot the histogram.
plt.hist(data,
         bins=9,
         density=True,
         alpha=0.8,
         color=theme_blue,
         edgecolor='gray')

# Plot the PDF.
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, mu, std)
plt.plot(x, p, theme_red, linewidth=4)