예제 #1
0
def test_multivariate_normal(session_tf, x, mu, cov_sqrt):
    cov = np.dot(cov_sqrt, cov_sqrt.T)
    L = np.linalg.cholesky(cov)

    if len(x.shape) != 2 or len(mu.shape) != 2:
        with pytest.raises(Exception) as e_info:
            gp_result = logdensities.multivariate_normal(
                tf.convert_to_tensor(x),
                tf.convert_to_tensor(mu),
                tf.convert_to_tensor(L))
    else:
        x_tf = tf.placeholder(settings.float_type)
        mu_tf = tf.placeholder(settings.float_type)
        gp_result = logdensities.multivariate_normal(
            x_tf, mu_tf, tf.convert_to_tensor(L))

        gp_result = session_tf.run(gp_result, feed_dict={x_tf: x, mu_tf: mu})

        if mu.shape[1] > 1:
            if x.shape[1] > 1:
                sp_result = [mvn.logpdf(x[:,i], mu[:,i], cov) for i in range(mu.shape[1])]
            else:
                sp_result = [mvn.logpdf(x.ravel(), mu[:, i], cov) for i in range(mu.shape[1])]
        else:
            sp_result = mvn.logpdf(x.T, mu.ravel(), cov)
        assert_allclose(gp_result, sp_result)
예제 #2
0
def recluster_DUP(df):

    #priors
    mu_0={1: np.array([0.03, 2]), 2:np.array([0.27,3]), 3:np.array([0.45,4])}
    psi={1:np.matrix('0.00128 -0.00075; -0.00075 1.1367'), 
      2:np.matrix('0.013 -0.0196; -0.0196 0.4626'),
      3:np.matrix('0.0046 -0.0112; -0.0112 0.07556')}
    lambda_0=1
    nu_0=1

    gpd=df.loc[:, ['gtn', 'CN', 'AB']].groupby(['gtn'])
    covs=gpd[['AB','CN']].cov()
    mns=gpd[['AB', 'CN']].mean()
    cts=gpd.size()
    
    df.loc[:, 'gt_adj']=df.loc[:, 'gtn'].copy()

    mu_map={1: get_mu_map(1, cts, lambda_0, mu_0, mns),
            2: get_mu_map(2, cts, lambda_0, mu_0, mns),
            3: get_mu_map(3, cts, lambda_0, mu_0, mns)}
    sigma_map={1: get_sigma_map(1, cts, lambda_0, psi, covs, mns, mu_0),
               2: get_sigma_map(2, cts, lambda_0, psi, covs, mns, mu_0),
               3: get_sigma_map(3, cts, lambda_0, psi, covs, mns, mu_0)}

    df.loc[:, 'lld1']=multivariate_normal.logpdf(df.loc[:, ['AB', 'CN']], mean=mu_map[1], cov=sigma_map[1])
    df.loc[:, 'lld2']=multivariate_normal.logpdf(df.loc[:, ['AB', 'CN']], mean=mu_map[2], cov=sigma_map[2])
    df.loc[:, 'lld3']=multivariate_normal.logpdf(df.loc[:, ['AB', 'CN']], mean=mu_map[3], cov=sigma_map[3])
    lld_code={'lld1':1, 'lld2':2, 'lld3':3}
    df.loc[:,'gt_new']=df.loc[:, ['lld1', 'lld2', 'lld3']].idxmax(1).map(lld_code)
    df.loc[:, 'gq']=df.loc[:, ['lld1', 'lld2', 'lld3']].max(axis=1)-df.loc[:, ['lld1', 'lld2', 'lld3']].median(axis=1)
    df.loc[:, 'med_gq']=df.loc[:, 'gq'].median()
    df.loc[:, 'q10_gq']=df.loc[:, 'gq'].quantile(0.1)
    return 
예제 #3
0
def recluster_INV_BND(df):

    #priors
    mu_0={1: 0.03, 2:0.46, 3:0.94}
    psi={1:0.00128, 2:0.013, 3:0.0046}
    
    lambda_0=1
    nu_0=1

    gpd=df.loc[:, ['gtn', 'AB']].groupby(['gtn'])
    covs=gpd[['AB']].cov()
    mns=gpd[['AB']].mean()
    cts=gpd.size()
    
    df.loc[:, 'gt_adj']=df.loc[:, 'gtn'].copy()
    mu_map={1: get_mu_map(1, cts, lambda_0, mu_0, mns),
            2: get_mu_map(2, cts, lambda_0, mu_0, mns),
            3: get_mu_map(3, cts, lambda_0, mu_0, mns)}
    sigma_map={1: get_sigma_map(1, cts, lambda_0, psi, covs, mns, mu_0),
               2: get_sigma_map(2, cts, lambda_0, psi, covs, mns, mu_0),
               3: get_sigma_map(3, cts, lambda_0, psi, covs, mns, mu_0)}

    df.loc[:, 'lld1']=multivariate_normal.logpdf(df.loc[:, ['AB', 'CN']], mean=mu_map[1], cov=sigma_map[1])
    df.loc[:, 'lld2']=multivariate_normal.logpdf(df.loc[:, ['AB', 'CN']], mean=mu_map[2], cov=sigma_map[2])
    df.loc[:, 'lld3']=multivariate_normal.logpdf(df.loc[:, ['AB', 'CN']], mean=mu_map[3], cov=sigma_map[3])
    lld_code={'lld1':1, 'lld2':2, 'lld3':3}
    df.loc[:,'gt_new']=df.loc[:, ['lld1', 'lld2', 'lld3']].idxmax(1).map(lld_code)
    df.loc[:, 'gq']=df.loc[:, ['lld1', 'lld2', 'lld3']].max(axis=1)-df.loc[:, ['lld1', 'lld2', 'lld3']].median(axis=1)
    df.loc[:, 'med_gq']=df.loc[:, 'gq'].median()
    df.loc[:, 'q10_gq']=df.loc[:, 'gq'].quantile(0.1)
    return 
예제 #4
0
def recluster_DEL(df):

    #priors
    mu_0={1: np.array([0.03, 2]), 2:np.array([0.46,1.1]), 3:np.array([0.94,0.1])}
    psi={1:np.matrix('0.00128 -0.00075; -0.00075 1.1367'), 
      2:np.matrix('0.013 -0.0196; -0.0196 0.4626'),
      3:np.matrix('0.0046 -0.0112; -0.0112 0.07556')}
    lambda_0=1
    nu_0=1

    gpd=df.loc[:, ['gtn', 'CN', 'AB']].groupby(['gtn'])
    covs=gpd[['AB','CN']].cov()
    mns=gpd[['AB', 'CN']].mean()
    cts=gpd.size()

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore")
        lin_fit=smf.ols('CN~AB',df).fit()
        df.loc[:, 'gt_adj']=df.loc[:, 'gtn'].copy()
        #check that CN, AB are correlated, and in the right direction
        if (lin_fit.rsquared>0.5) and (-1*lin_fit.params[1]>0.5):
             x_int=-lin_fit.params[0]/lin_fit.params[1]
             #adjust init GT calls if AB shifted toward 0 
             if x_int<1:
                 #find mdpts between neighboring GT
                 mins=gpd['AB'].min()
                 maxes=gpd['AB'].max()
                 bound1=0.2 
                 bound2=0.7 
                 if (2 in mins) and (1 in maxes):
                     bound1=0.5*(mins[2]+maxes[1])
                 if (3 in mins) and (2 in maxes):
                     bound2=0.5*(mins[3]+maxes[2])
                 newbound1=bound1*x_int
                 newbound2=bound2*x_int
                 df.loc[:, 'gt_adj']=pd.to_numeric(pd.cut(df['AB'], bins=[-1, newbound1, newbound2, 1], labels=['1', '2', '3']))
                 gpd=df.loc[:,['gt_adj', 'CN', 'AB']].groupby(['gt_adj'])
                 covs=gpd[['AB', 'CN']].cov()
                 mns=gpd[['AB', 'CN']].mean()
                 cts=gpd.size()

    mu_map={1: get_mu_map(1, cts, lambda_0, mu_0, mns),
            2: get_mu_map(2, cts, lambda_0, mu_0, mns),
            3: get_mu_map(3, cts, lambda_0, mu_0, mns)}
    sigma_map={1: get_sigma_map(1, cts, lambda_0, psi, covs, mns, mu_0),
               2: get_sigma_map(2, cts, lambda_0, psi, covs, mns, mu_0),
               3: get_sigma_map(3, cts, lambda_0, psi, covs, mns, mu_0)}

    df.loc[:, 'lld1']=multivariate_normal.logpdf(df.loc[:, ['AB', 'CN']], mean=mu_map[1], cov=sigma_map[1])
    df.loc[:, 'lld2']=multivariate_normal.logpdf(df.loc[:, ['AB', 'CN']], mean=mu_map[2], cov=sigma_map[2])
    df.loc[:, 'lld3']=multivariate_normal.logpdf(df.loc[:, ['AB', 'CN']], mean=mu_map[3], cov=sigma_map[3])
    lld_code={'lld1':1, 'lld2':2, 'lld3':3}
    df.loc[:,'gt_new']=df.loc[:, ['lld1', 'lld2', 'lld3']].idxmax(1).map(lld_code)
    df.loc[:, 'gq']=df.loc[:, ['lld1', 'lld2', 'lld3']].max(axis=1)-df.loc[:, ['lld1', 'lld2', 'lld3']].median(axis=1)
    df.loc[:, 'med_gq']=df.loc[:, 'gq'].median()    
    df.loc[:, 'q10_gq']=df.loc[:, 'gq'].quantile(0.1)
    return
예제 #5
0
def log_joint(y, z, lam, prior_cov, likelihood_cov):
    '''
    The log joint distribution of the model.
    :return:
    '''
    plog_prior = mvn.logpdf(z, lam.T, prior_cov)
    plog_likelihood = 0
    for ii in xrange(len(y)):
        plog_likelihood += mvn.logpdf(y[ii,:], z.T, likelihood_cov)
    plog_joint = plog_prior + plog_likelihood

    return plog_joint, plog_prior, plog_likelihood
예제 #6
0
 def test_logpdf_default_values(self):
     # Check that the log of the pdf is in fact the logpdf
     # with default parameters Mean=None and cov = 1
     np.random.seed(1234)
     x = np.random.randn(5)
     d1 = multivariate_normal.logpdf(x)
     d2 = multivariate_normal.pdf(x)
     # check whether default values are being used
     d3 = multivariate_normal.logpdf(x, None, 1)
     d4 = multivariate_normal.pdf(x, None, 1)
     assert_allclose(d1, np.log(d2))
     assert_allclose(d3, np.log(d4))
예제 #7
0
 def condition_on_2(mu_x, sigma_x, A, y, sigma_obs):
     sigma_xy = sigma_x.dot(A.T)
     sigma_yy = A.dot(sigma_x).dot(A.T) + sigma_obs
     mu = mu_x + sigma_xy.dot(np.linalg.solve(sigma_yy, y - A.dot(mu_x)))
     sigma = sigma_x - sigma_xy.dot(np.linalg.solve(sigma_yy, sigma_xy.T))
     ll = mvn.logpdf(y, A.dot(mu_x), sigma_yy)
     return (mu, sigma), ll
    def nll(self, x, y, z, theta_x, theta_y, theta_z, sigma_x, sigma_y, sigma_z):
        '''
        '''
        
        if self.verbose:
            print '\n=========='
            print 'x      :\t', x      , '[cm]'
            print 'y      :\t', y      , '[cm]'
            print 'z      :\t', z      , '[cm]'
            print 'theta_x:\t', theta_x, '[rad]'
            print 'theta_y:\t', theta_y, '[rad]'
            print 'theta_z:\t', theta_z, '[rad]'
            print 'sigma x:\t', sigma_x, '[cm]'
            print 'sigma y:\t', sigma_y, '[cm]'
            print 'sigma z:\t', sigma_z, '[cm]'

        cov = self._compute_covariance_matrix(theta_x, theta_y, theta_z, sigma_x, sigma_y, sigma_z)
        
        if self.verbose:
            print 'covariance matrix', cov
            print 'determinant: ', np.linalg.det(cov) 
        
        # check singularity / inveritbility
        if np.linalg.det(cov) > 0.:
            nll = -multivariate_normal.logpdf(self.events,
                                              mean=np.array([x, y, z]),
                                              cov=cov).sum()
        else:
            print 'WARNING! Singular covariance matrix, cannot invert!'
            return float('nan')

        if self.verbose:
            print 'nLL: ', nll
        
        return nll
    def _compute_sum_nll_vtx(self, cov_beam, x, y, z):

        nlls = np.array([]).astype(np.float64)

        for i in self.rnevents:
            vtx_xx = np.power(self.errorscale * self.uncertainties[i][0], 2)
            vtx_yy = np.power(self.errorscale * self.uncertainties[i][1], 2)
            vtx_zz = np.power(self.errorscale * self.uncertainties[i][2], 2)
    
            vtx_xy = self.correlations[i][0] * self.errorscale * self.uncertainties[i][0] * self.errorscale * self.uncertainties[i][1]
            vtx_xz = self.correlations[i][1] * self.errorscale * self.uncertainties[i][0] * self.errorscale * self.uncertainties[i][2]
            vtx_yz = self.correlations[i][2] * self.errorscale * self.uncertainties[i][1] * self.errorscale * self.uncertainties[i][2]
    
            cov_vtx = np.matrix([
                [vtx_xx, vtx_xy, vtx_xz],
                [vtx_xy, vtx_yy, vtx_yz],
                [vtx_xz, vtx_yz, vtx_zz],
            ]).astype(np.float64)

            cov_tot = cov_vtx + cov_beam
            
            nll = -multivariate_normal.logpdf(self.events[i],
                                              mean=np.array([x, y, z]),
                                              cov=cov_tot,
                                              allow_singular=True) # this was needed because?
            
            if self.verbose and i%20==0:
                print '\t====> evaluated %d/%d vertex, nll = %f, sum nll = %f' %(i, self.nevents, nll, nlls.sum())

            nlls = np.append(nlls, nll)
                        
        return nlls.sum()
예제 #10
0
파일: get_spec.py 프로젝트: HIPS/DESI-MCMC
def simple_model_prior_logpdf(values):
    means = np.sum([uniform_logpdf(val, START, END) for val in values[components:(2*components)]])
    scales = np.sum([invgamma.logpdf(val, A, LOC, SCALE) for val in values[(2*components):(3*components)]])
    weights = multivariate_normal.logpdf(values[:components],
                                         mean=np.zeros(components),
                                         cov=COV_ALPHA*np.eye(components))
    return weights + means + scales
def loglikelihood(A):
    """Compute log likelihood function of GMM.
    Assume hard assignments, i.e. A = [A_1, A_2, ...] so each set A_1
    corresponds to one cluster.
    """
    return sum([multivariate_normal.logpdf(a, mean=a.mean(axis=0), 
                cov=np.cov(a.T)).sum() for a in A])
예제 #12
0
def test(test_img, test_label, m_array, mu_array, sigma_array):
    error = 0
    abstain = 0
    thre = 0
    pi_array = np.log(m_array / np.sum(m_array))
    test_n = len(test_label)
    print "Run ", test_n, " Tests"

    test_img_array = np.array(test_img)
    p_x = np.zeros((10, test_n))
    for k in range(10):
        p_x[k] = multivariate_normal.logpdf(test_img_array, mean=mu_array[k], cov=sigma_array[k])
    # px_T = p_x.T + pi_array
    px_T = p_x.T
    # np.save("p_x.npy", px_T)
    for i in range(test_n):
        log_sum = logsumexp(px_T[i])
        py_x = np.exp(px_T[i] - log_sum)
        print i, py_x
        class_id = np.argmax(px_T[i])
        print "Predict: ", class_id, "Accurate: ", test_label[i]
        m = np.sort(px_T[i])
        if m[9] - m[8] < thre:
            abstain += 1
            continue
        if class_id != test_label[i]:
            # print i
            error += 1
    print "Abstain ", abstain
    return error * 100.0 / (test_n - abstain)
예제 #13
0
 def predict_log_probs(self,X,bias_term = None):
     '''
     Calculates log of probabilities
     
     Parameters:
     -----------
     
     X: numpy array of size 'unknown x m'
         Expalanatory variables
         
     bias_term: bool
         If True , explanatory variables matrix contains bias_term (bias term should be 
         in last column of design matrix)
         
     Returns:
     --------
     
     prior_prob: numpy array of size 'unknown x k'
         Posterior probability that class belongs to particular probability
     
     '''
     X         = self._bias_term_pre_processing_X(X,bias_term)
     n,m       = np.shape(X)
     log_posterior = np.zeros([n,self.k])
     for i in range(self.k):
         log_posterior[:,i]  = mvn.logpdf(X,self.means[:,i], cov = self.cov)
         log_posterior[:,i] += self.log_priors[i]
     normaliser         = logsumexp(log_posterior, axis = 1)
     posterior_log_prob = (log_posterior.T - normaliser).T
     return posterior_log_prob
def loglike(rho,x,y):
    beta = rho[0:K+1]
    sigma2 = np.exp(rho[K+1])
    residual = y-np.dot(x,beta)
    contributions = mn.logpdf(residual,0,sigma2)
    loglikelihood = np.sum(contributions)
    return -loglikelihood
예제 #15
0
def test_miner_init_smoke(miner_df):
    logcf = lambda row, x: mvn.logpdf(x, np.zeros(2), np.eye(len(x)))
    miner = MInER(miner_df, logcf, ['x_2', 'x_3'], n_models=2, use_mp=False)
    miner.init_models()
    assert hasattr(miner, '_logcf')
    assert hasattr(miner, '_miner_cols')
    assert hasattr(miner, '_miner_col_idxs')
    assert not hasattr(miner, 'combat_wombat')
예제 #16
0
def test_fit_smoke(miner_df):
    logcf = lambda row, x: mvn.logpdf(x, np.zeros(2), np.eye(len(x)))
    miner = MInER(miner_df, logcf, ['x_2', 'x_3'], n_models=2, use_mp=False)
    miner.init_models()
    miner.fit(1, 5)

    assert(not np.any(np.isnan(miner._df['x_2'].values)))
    assert(not np.any(np.isnan(miner._df['x_3'].values)))
예제 #17
0
def loglikelihood(X, Z, W):
  ZW = Z.dot(W.T)
  LL = 0
  for i in xrange(N):
    ll = mvn.logpdf(X[i], mean=ZW[i], cov=sigmaI)
    LL += ll
  LL += norm.logpdf(W.flatten(), scale=1/lam).sum()
  return LL
예제 #18
0
def mvn_likelihood(x, mu, Sigma):
    # Work around for multivariate_normal logpdf, since it only accepts dimensions as arrays
    # Reshape arrays to 1 dim
    if mu.ndim != 1 and not isinstance(mu, float):
        (rows, cols) = mu.shape
        x = x.reshape((rows))
        mu = mu.reshape((rows))
    return multivariate_normal.logpdf(x=x, mean=mu, cov=Sigma, allow_singular=True)
 def predict(self, X):
     N, D = X.shape
     K = len(self.gaussians)
     P = np.zeros((N, K))
     for c, g in self.gaussians.iteritems():
         mean, cov = g['mean'], g['cov']
         P[:,c] = mvn.logpdf(X, mean=mean, cov=cov) + np.log(self.priors[c])
     return np.argmax(P, axis=1)
예제 #20
0
    def kalman_filter(self, observ):
        """Kalman filter using the model on a set of observations"""
        
        # Get system matrices
        F = self.transition_matrix()
        Q = self.transition_covariance()
        H = self.observation_matrix()
        R = self.observation_covariance()

        # Initialise arrays of Gaussian densities and (log-)likelihood
        num_time_instants = len(observ)
        flt = GaussianDensityTimeSeries(num_time_instants, self.ds)
        prd = GaussianDensityTimeSeries(num_time_instants, self.ds)
        lhood = 0

        # Loop through time instants
        for kk in range(num_time_instants):

            # Prediction
            if kk > 0:
                prd_kk = kal.predict(flt.get_instant(kk-1), F, Q)
            else:
                prd_kk = self.initial_state_prior
            prd.set_instant(kk, prd_kk)

            # Correction - handles misisng data indicated by NaNs
            y = observ[kk]
            if not np.any(np.isnan(y)):
                # Nothing missing - full update
                flt_kk,innov = kal.correct(prd.get_instant(kk), y, H, R)
                lhood = lhood + mvn.logpdf(observ[kk], innov.mn, innov.vr)
            elif np.all(np.isnan(y)):
                # All missing - no update
                flt_kk = prd_kk
            else:
                # Partially missing - delete missing elements
                missing = np.where( np.isnan(y) )
                yp = np.delete(y, missing, axis=0)
                Hp = np.delete(H, missing, axis=0)
                Rp = np.delete(np.delete(R, missing, axis=0), missing, axis=1)
                flt_kk,innov = kal.correct(prd.get_instant(kk), yp, Hp, Rp)
                lhood = lhood + mvn.logpdf(yp, innov.mn, innov.vr)
                
            flt.set_instant(kk, flt_kk)

        return flt, prd, lhood
예제 #21
0
    def compute_likelihood(self, data):
        ps, covs = zip(*self.get_weights_and_covariances())

        # get the log prob under each covariance matrix
        lps = map(lambda c: multivariate_normal.logpdf(data, mean=origin, cov=c), covs)

        ## TODO: DOUBLE CHECK THIS:

        return sum(logsumexp([lp + log(p) for p, lp in zip(ps, lps)], axis=0))
예제 #22
0
def test_logpdf():
    # Check that the log of the pdf is in fact the logpdf
    np.random.seed(1234)
    x = np.random.randn(5)
    mean = np.random.randn(5)
    cov = np.abs(np.random.randn(5))
    d1 = multivariate_normal.logpdf(x, mean, cov)
    d2 = multivariate_normal.pdf(x, mean, cov)
    assert_allclose(d1, np.log(d2))
예제 #23
0
def test_frozen():
    # The frozen distribution should agree with the regular one
    np.random.seed(1234)
    x = np.random.randn(5)
    mean = np.random.randn(5)
    cov = np.abs(np.random.randn(5))
    norm_frozen = multivariate_normal(mean, cov)
    assert_allclose(norm_frozen.pdf(x), multivariate_normal.pdf(x, mean, cov))
    assert_allclose(norm_frozen.logpdf(x),
                    multivariate_normal.logpdf(x, mean, cov))
def loglikelihood(X, parameters):
    pis, mus, Sigmas = parameters
    N = X.shape[0]
    K = len(pis)
    log_probs_flat = np.asarray(
                [np.log(pis[k]) + mvn.logpdf(X[n, :], mus[k, :], Sigmas[k, :, :]) 
                 for k in range(K) for n in range(N)])
    log_probs = np.reshape(log_probs_flat, (K, N)).T
    L = np.sum(logsumexp(log_probs, axis=1))
    return L
예제 #25
0
def get_supernovae(n, data=True):
    redshifts = RedshiftSampler()

    # Redshift distribution
    zs = redshifts.sample(size=n)

    # import matplotlib.pyplot as plt
    # plt.hist(zs, 100)
    # plt.show()
    # exit()

    # Population stats
    vals = get_truths_labels_significance()
    mapping = {k[0]: k[1] for k in vals}
    cosmology = FlatwCDM(70.0, mapping["Om"])
    mus = cosmology.distmod(zs).value

    alpha = mapping["alpha"]
    beta = mapping["beta"]
    dscale = mapping["dscale"]
    dratio = mapping["dratio"]
    p_high_masses = np.random.uniform(low=0.0, high=1.0, size=n)
    means = np.array([mapping["mean_MB"], mapping["mean_x1"], mapping["mean_c"]])
    sigmas = np.array([mapping["sigma_MB"], mapping["sigma_x1"], mapping["sigma_c"]])
    sigmas_mat = np.dot(sigmas[:, None], sigmas[None, :])
    correlations = np.dot(mapping["intrinsic_correlation"], mapping["intrinsic_correlation"].T)
    pop_cov = correlations * sigmas_mat

    results = []
    for z, p, mu in zip(zs, p_high_masses, mus):
        try:
            MB, x1, c = np.random.multivariate_normal(means, pop_cov)
            mass_correction = dscale * (1.9 * (1 - dratio) / (0.9 + np.power(10, 0.95 * z)) + dratio)
            adjustment = - alpha * x1 + beta * c - mass_correction * p
            MB_adj = MB + adjustment
            mb = MB_adj + mu
            result = get_ia_summary_stats(z, MB_adj, x1, c, cosmo=cosmology, data=data)
            d = {
                "MB": MB,
                "mB": mb,
                "x1": x1,
                "c": c,
                "m": p,
                "z": z,
                "pc": result["passed_cut"],
                "lp": multivariate_normal.logpdf([MB, x1, c], means, pop_cov),
                "dp": result.get("delta_p"),
                "parameters": result.get("params"),
                "covariance": result.get("cov"),
                "lc": None if data else result.get("lc")
            }
            results.append(d)
        except RuntimeError:
            print("Error on nova: %0.2f %0.2f %0.2f %0.3f" % (MB, x1, c, z))
    return results
예제 #26
0
 def _e_step(self):
     '''
     Calculates posterior distribution of latent variable for each class
     '''
     log_lvpr    = np.log(self.latent_var_prior)
     for i,resp_k in enumerate(self.responsibilities):
         for j in range(self.clusters[i]):
             log_prior        = mvn.logpdf(self.X,self.mu[i][:,j],self.covar)
             resp_k[:,j]      = log_prior + log_lvpr[i][j]
         normaliser = logsumexp(resp_k, axis = 1)
         self.responsibilities[i]    = np.exp((resp_k.T - normaliser).T)
예제 #27
0
def logpdf(x, mean, cov, allow_singular=True):
    """Computes the log of the probability density function of the normal
    N(mean, cov) for the data x. The normal may be univariate or multivariate.

    Wrapper for older versions of scipy.multivariate_normal.logpdf which
    don't support support the allow_singular keyword prior to verion 0.15.0.

    If it is not supported, and cov is singular or not PSD you may get
    an exception.

    `x` and `mean` may be column vectors, row vectors, or lists.
    """

    flat_mean = np.asarray(mean).flatten()
    flat_x = np.asarray(x).flatten()

    if _support_singular:
        return multivariate_normal.logpdf(flat_x, flat_mean, cov, allow_singular)
    else:
        return multivariate_normal.logpdf(flat_x, flat_mean, cov)
 def predict_score(self, X):
     m = self.mu.shape[0]  # number of classes
     n, p = X.shape
     ans = np.zeros((n, m))
     for k in range(0, m):
         mu_k = self.mu[k, :]
         sigma_k = self.sigma[k, :, :]
         ans[:, k] = multivariate_normal.logpdf(X, mu_k, sigma_k)
     log_prior = [log(p) for p in self.prior]
     ans += log_prior
     return ans
예제 #29
0
    def addTraces(self, traces, plaintexts, ciphertexts, knownkeys=None, progressBar=None, pointRange=None):

        # Hack for now - just use last template found
        template = self.loadTemplatesFromProject()[-1]
        pois = template["poi"]
        numparts = len(template['mean'][0])
        results = np.zeros((16, 256))

        tdiff = self._reportinginterval

        if progressBar:
            progressBar.setMinimum(0)
            progressBar.setMaximum(16 * len(traces))
            pcnt = 0

        for tnum in range(0, len(traces)):
            for bnum in range(0, 16):
                newresultsint = [multivariate_normal.logpdf(traces[tnum][pois[bnum]], mean=template['mean'][bnum][i], cov=np.diag(template['cov'][bnum][i])) for i in range(0, numparts)]

                ptype = template["partitiontype"]

                if ptype == "PartitionHWIntermediate":
                    newresults = []
                    # Map to key guess format
                    for i in range(0, 256):
                        # Get hypothetical hamming weight
                        hypint = HypHW(plaintexts[tnum], None, i, bnum)
                        newresults.append(newresultsint[ hypint ])
                elif ptype == "PartitionHDLastRound":
                    newresults = []
                    # Map to key guess format
                    for i in range(0, 256):
                        # Get hypothetical hamming distance
                        # hypint = HypHD(plaintexts[tnum], None, i, bnum)
                        hypint = HypHD(None, ciphertexts[tnum], i, bnum)
                        newresults.append(newresultsint[ hypint ])
                else:
                    newresults = newresultsint

                results[bnum] += newresults
                self.stats.updateSubkey(bnum, results[bnum], tnum=(tnum + 1))

                if progressBar:
                    progressBar.setValue(pcnt)
                    progressBar.updateStatus((tnum, len(traces)), bnum)
                    pcnt += 1

                    if progressBar.wasCanceled():
                        raise KeyboardInterrupt


            # Do plotting if required
            if (tnum % tdiff) == 0 and self.sr:
                self.sr()
예제 #30
0
    def condition_on(mu_x, sigma_x, C, sigma_obs, y):
        p, n = C.shape
        sigma_xy = sigma_x.dot(C.T)
        sigma_yy = C.dot(sigma_x).dot(C.T) + np.diag(sigma_obs)
        mu_y = C.dot(mu_x)
        mu = mu_x + sigma_xy.dot(np.linalg.solve(sigma_yy, y - mu_y))
        sigma = sigma_x - sigma_xy.dot(np.linalg.solve(sigma_yy, sigma_xy.T))

        ll = multivariate_normal.logpdf(y, mu_y, sigma_yy)

        return ll, mu, sigma
예제 #31
0
def log_pdf_ppca(X, W_k, mu_k, sigma_k):
    """Calculate the log density of each point in a dataset
    w.r.t a specific local PPCA model
    $p(x_n | mu_k, sigma_k) ~ N(x_n | mu_k, W_k*W_k^T + sigma_k I)$

    Args:
        X: (D, N)
        W_k: (D, M)
        mu_k: (D)
        sigma_k: (scalar)

    Returns:
        log_density: (N)
        T_inv: (M, M)
    """
    D, N = X.shape
    D, M = W_k.shape

    C = W_k @ W_k.T + sigma_k * np.eye(D)  # (D, D)
    T = W_k.T @ W_k + sigma_k * np.eye(M)  # (M, M)
    T_inv = np.linalg.inv(T)  # (M, M)
    # C_inv = 1.0 / sigma_k * (           # (D, D)
    #     np.ones(D) - W_k @ T_inv @ W_k.T
    # )
    # log_det = - 0.5 * np.log(np.linalg.det(C))  # scalar
    # coeff = -0.5 * D * np.log(2 * np.pi)  # scalar

    # X_centered = X - mu_k.reshape(D, 1)  # (D, N)
    # log_density = coeff + log_det \
    #     - 0.5 * (X_centered.T @ C_inv @ X_centered).sum(axis=0)  # (N, 1)

    log_density2 = multivariate_normal.logpdf(X.T, mean=mu_k, cov=C)
    # # print('my implementation', log_density.sum())
    # # print('scipy: ', log_density2.sum())

    return log_density2, T_inv
예제 #32
0
    def get_state_sequence(self, x):
        # returns the most likely state sequence given observed sequence x
        # using the Viterbi algorithm
        T = len(x)

        # make the emission matrix B
        logB = np.zeros((self.M, T))
        for j in range(self.M):
            for t in range(T):
                for k in range(self.K):
                    p = np.log(self.R[j, k]) + mvn.logpdf(
                        x[t], self.mu[j, k], self.sigma[j, k])
                    logB[j, t] += p
        print("logB:", logB)

        # perform Viterbi as usual
        delta = np.zeros((T, self.M))
        psi = np.zeros((T, self.M))

        # smooth pi in case it is 0
        pi = self.pi + 1e-10
        pi /= pi.sum()

        delta[0] = np.log(pi) + logB[:, 0]
        for t in range(1, T):
            for j in range(self.M):
                next_delta = delta[t - 1] + np.log(self.A[:, j])
                delta[t, j] = np.max(next_delta) + logB[j, t]
                psi[t, j] = np.argmax(next_delta)

        # backtrack
        states = np.zeros(T, dtype=np.int32)
        states[T - 1] = np.argmax(delta[T - 1])
        for t in range(T - 2, -1, -1):
            states[t] = psi[t + 1, states[t + 1]]
        return states
    def compute_vlb(self, observations, pi, mu, sigma, gamma):
        """
        Each input is numpy array:
        X: (N x d), data points
        gamma: (N x C), distribution q(T)
        pi: (C)
        mu: (C x d)
        sigma: (C x d x d)

        Returns value of variational lower bound
        """
        number_of_observations = observations.shape[0]
        number_of_clusters = gamma.shape[1]

        loss_per_observation = np.zeros(number_of_observations)
        for k in range(number_of_clusters):
            loss_per_observation += gamma[:, k] * (
                np.log(pi[k]) + multivariate_normal.logpdf(
                    observations, mean=mu[k, :], cov=sigma[k, ...]))
            loss_per_observation -= gamma[:, k] * np.log(gamma[:, k])

        total_loss = np.sum(loss_per_observation)

        return total_loss
예제 #34
0
파일: GMM.py 프로젝트: ArrogantL/ML
def recalPKX(pk, means, vars, XX):
    """
    E步重新计算类后验概率矩阵P(Y|X)
    :param pk:
    :param means:
    :param vars:
    :param XX:
    :return: pkx_array
    """
    logpxi_array = np.zeros(len(XX) * len(pk)).reshape(len(XX), len(pk))
    pkx_array = np.zeros(len(XX) * len(pk)).reshape(len(XX), len(pk))
    for j in range(len(XX)):
        for i in range(len(pk)):
            try:
                logpxi_array[j, i] = multivariate_normal.logpdf(
                    XX[j], mean=means[i], cov=vars[i]) + np.log(pk[i])
            except:
                print("Singular Matrix!!")
    for k in range(len(pk)):
        pkx_array[:, k] = np.sum(
            np.exp(logpxi_array - np.tile(logpxi_array[:, k], (len(pk), 1)).T),
            axis=1)
    pkx_array = 1.0 / pkx_array
    return pkx_array
예제 #35
0
    def nll(self, x, y, z, theta_x, theta_y, theta_z, sigma_x, sigma_y,
            sigma_z):
        '''
        '''

        if self.verbose:
            print '\n=========='
            print 'x      :\t', x, '[cm]'
            print 'y      :\t', y, '[cm]'
            print 'z      :\t', z, '[cm]'
            print 'theta_x:\t', theta_x, '[rad]'
            print 'theta_y:\t', theta_y, '[rad]'
            print 'theta_z:\t', theta_z, '[rad]'
            print 'sigma x:\t', sigma_x, '[cm]'
            print 'sigma y:\t', sigma_y, '[cm]'
            print 'sigma z:\t', sigma_z, '[cm]'

        cov = self._compute_covariance_matrix(theta_x, theta_y, theta_z,
                                              sigma_x, sigma_y, sigma_z)

        if self.verbose:
            print 'covariance matrix', np.matrix(cov)
            print 'determinant: ', cov.det()

        # check singularity / inveritbility
        if np.linalg.det(cov) > 0.:
            nll = -multivariate_normal.logpdf(
                self.events, mean=np.array([x, y, z]), cov=cov).sum()
        else:
            print 'WARNING! Singular covariance matrix, cannot invert!'
            return float('nan')

        if self.verbose:
            print 'nLL: ', nll

        return nll
예제 #36
0
    def _log_likelihood(self, features, k_idx):
        """
        Compute the likelihood of the features given the index of the Gaussian
        in the mixture model. This function compute the log multivariate_normal
        distribution for features given the means and covariance of the ```k_idx```th
        Gaussian. To do this, you can use the function:

            scipy.stats.multivariate_normal.logpdf

        Read the documentation of this function to understand how it is used here:

            https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.multivariate_normal.html

        Once the raw likelihood is computed, incorporate the mixing_weights for the Gaussian
        via:

            log(mixing_weight) + logpdf

        Where logpdf is the output of multivariate_normal.

        Arguments:
            features {np.ndarray} -- Features to compute multivariate_normal distribution
                on.
            k_idx {int} -- Which Gaussian to use (e.g. use self.means[k_idx],
                self.covariances[k_idx], self.mixing_weights[k_idx]).

        Returns:
            np.ndarray -- log likelihoods of each feature given a Gaussian.
        """

        r = np.empty(features.shape[0])
        for i, value in enumerate(features):
            y = multivariate_normal.logpdf(value, mean = self.means[k_idx], cov = self.covariances[k_idx])
            r[i] = y + np.log(self.mixing_weights[k_idx])

        return r
예제 #37
0
 def critical_values(self, sims):
   #this is the case where we consider all configurations of how the SNP may affect the traits
   if self.model == 'config':
     #priors are already in terms of log
     null = multivariate_normal.logpdf(sims, self.mean, self.null_cov)
     null = null - self.nullprior
     one = multivariate_normal.logpdf(sims, self.mean, self.alt_cov[0])
     one += self.altprior[0]
     two = multivariate_normal.logpdf(sims, self.mean, self.alt_cov[1])
     two += self.altprior[1]
     alt = Likelihood_Ratio.sumlog(one, two)
     for i in range(2,len(self.alt_cov)):
       add = multivariate_normal.logpdf(sims, self.mean, self.alt_cov[i])
       add += self.altprior[i]
       alt = Likelihood_Ratio.sumlog(add, alt)
     alt = math.log(len(self.alt_cov)) + alt
     ratio = alt - null
     self.sets = np.sort(ratio)
   #this is the case where we are only considering the full model (the SNP affects all traits)
   else:
     null = multivariate_normal.logpdf(sims, self.mean, self.null_cov)
     alt = multivariate_normal.logpdf(sims, self.mean, self.alt_cov)
     ratio = alt - null
     self.sets = np.sort(ratio)
예제 #38
0
파일: gmm.py 프로젝트: jayy-zou/Clustering
    def _log_likelihood(self, features, k_idx):
        log=np.log(self.mixing_weights[k_idx])
        pdf=multivariate_normal.logpdf(features, self.means[k_idx], self.covariances[k_idx])

        return log+pdf
예제 #39
0
def tseriescm(data,
              maxiter=400,
              burnin=sentinel,
              thinning=5,
              level=False,
              trend=True,
              seasonality=True,
              deg=2,
              c0eps=2,
              c1eps=1,
              c0beta=2,
              c1beta=1,
              c0alpha=2,
              c1alpha=1,
              priora=False,
              pia=0.5,
              q0a=1,
              q1a=1,
              priorb=False,
              q0b=1,
              q1b=1,
              a=0.25,
              b=0,
              indlpml=False,
              **kwargs):

    if burnin == sentinel:
        burnin = math.floor(0.1 * maxiter)

    if deg % 1 != 0 or deg <= 0:
        raise ValueError("deg must be a positive integer number.")

    if maxiter % 1 != 0 or maxiter <= 0:
        raise ValueError("maxiter must be a positive (large) integer number.")

    if burnin % 1 != 0 or burnin <= 0:
        raise ValueError("burnin must be a non-negative integer number.")

    if thinning % 1 != 0 or thinning <= 0:
        raise ValueError("thinning must be a non-negative integer number.")

    if maxiter <= burnin:
        raise ValueError("maxiter cannot be less than or equal to burnin.")

    if c0eps <= 0 or c1eps <= 0 or c0beta <= 0 or c1beta <= 0 or c0alpha <= 0 or c1alpha <= 0:
        raise ValueError(
            "c0eps,c1eps,c0beta,c1beta,c0alpha and c1alpha must be positive numbers."
        )

    if pia <= 0 or pia >= 1:
        raise ValueError(
            "The mixing proportion pia must be a number in (0,1).")

    if q0a <= 0 or q1a <= 0:
        raise ValueError("q0a and q1a must be positive numbers.")

    if a < 0 or a >= 1:
        raise ValueError("'a' must be a number in [0,1).")

    if q0b <= 0 or q1b <= 0:
        raise ValueError("q0b and q1b must be positive numbers.")

    if b <= -a:
        raise ValueError("'b' must be greater than '-a'.")

    periods, mydata, cts = scaleandperiods(data)

    ##### Construction of the design matrices#####
    T = mydata.shape[0]  # Number of periods of the time series
    n = mydata.shape[1]  # Number of time series present in the data
    p, d, X, Z = designmatrices(level, trend, seasonality, deg, T)

    ##### Initial Values for the parameters that will be part of the gibbs sampling #####
    sig2eps = np.ones(
        n
    )  # Vector that has the diagonal entries of the variance-covariance matrix for every epsilon_i.
    sig2the = 1  # Initial value for sig2the.
    rho = 0  # Initial value for rho.

    P = np.zeros((T, T))  # Initial matrix P.

    for j in np.arange(1, T + 1):
        for k in np.arange(1, T + 1):
            P[j - 1, k - 1] = rho**(abs(j - k))

    R = sig2the * P  # Initial matrix R.

    if level + trend + seasonality == 0:
        sig2alpha = np.ones(
            p
        )  # Vector that has the diagonal entries of the variance-covariance matrix for alpha.
        sigmaalpha = np.diag(
            sig2alpha)  # Variance-covariance matrix for alpha.
        invsigmaalpha = np.diag(
            1 / sig2alpha)  # Inverse variance-covariance matrix for alpha.

        alpha = np.random.multivariate_normal(
            np.zeros(p), sigmaalpha, size=n
        ).T  # alpha is a matrix with a vector value of alpha for every time series in its columns.
        theta = np.random.multivariate_normal(
            np.zeros(T), R, size=n
        ).T  # theta is a matrix with a vector value of theta for every time series in its columns.
        gamma = theta  # gamma is the union by rows of the beta and theta matrices

    elif level + trend + seasonality == 3:
        sig2beta = np.ones(d)
        sigmabeta = np.diag(sig2beta)
        invsigmabeta = np.diag(1 / sig2beta)

        beta = np.random.multivariate_normal(np.zeros(d), sigmabeta, size=n).T
        theta = np.random.multivariate_normal(np.zeros(T), R, size=n).T
        gamma = np.concatenate((beta, theta))
    else:
        sig2beta = np.ones(d)
        sigmabeta = np.diag(sig2beta)
        invsigmabeta = np.diag(1 / sig2beta)
        sig2alpha = np.ones(p)
        sigmaalpha = np.diag(sig2alpha)
        invsigmaalpha = np.diag(1 / sig2alpha)

        alpha = np.random.multivariate_normal(np.zeros(p), sigmaalpha,
                                              size=n).T
        beta = np.random.multivariate_normal(np.zeros(d), sigmabeta, size=n).T
        theta = np.random.multivariate_normal(np.zeros(T), R, size=n).T
        gamma = np.concatenate((beta, theta))

    iter0 = 0
    iter1 = 0  # Counter for the number of iterations saved during the Gibbs sampling.
    arrho = 0  # Variable that will contain the acceptance rate of rho in the Metropolis-Hastings step.
    ara = 0  # Variable that will contain the acceptance rate of a in the Metropolis-Hastings step.
    arb = 0  # Variable that will contain the acceptance rate of b in the Metropolis-Hastings step.
    sim = np.zeros((n, n))  # Initialization of the similarities matrix.

    if thinning == 0:
        CL = math.floor(maxiter - burnin)
    else:
        CL = math.floor((maxiter - burnin) / thinning)

    memory = np.zeros(
        (CL * n, n)
    )  # Matrix that will contain the cluster configuration of every iteration that is saved during the Gibbs sampling.
    memorygn = np.zeros(
        (CL, n)
    )  # Matrix that will save the group number to which each time series belongs in every iteration saved.
    sig2epssample = np.zeros(
        (CL, n)
    )  # Matrix that in its columns will contain the sample of each sig2eps_i's posterior distribution after Gibbs sampling.
    sig2thesample = np.zeros(
        (CL, 1)
    )  # Vector that will contain the sample of sig2the's posterior distribution after Gibbs sampling.
    rhosample = np.zeros(
        (CL, 1)
    )  # Vector that will contain the sample of rho's posterior distribution after Gibbs sampling.
    asample = np.zeros(
        (CL, 1)
    )  # Vector that will contain the sample of a's posterior distribution after Gibbs sampling.
    bsample = np.zeros(
        (CL, 1)
    )  # Vector that will contain the sample of b's posterior distribution after Gibbs sampling.
    msample = np.zeros(
        (CL, 1)
    )  # Vector that will contain the sample of the number of groups at each Gibbs sampling iteration.

    if level + trend + seasonality == 0:
        sig2alphasample = np.zeros(
            (CL, p)
        )  # Matrix that in its columns will contain the sample of each sig2alpha_i's posterior distribution after Gibbs sampling.
    elif level + trend + seasonality == 3:
        sig2betasample = np.zeros(
            (CL, d)
        )  # Matrix that in its columns will contain the sample of each sig2beta_i's posterior distribution after Gibbs sampling.
    else:
        sig2alphasample = np.zeros((CL, p))
        sig2betasample = np.zeros((CL, d))

    if indlpml != 0:
        iter2 = 0
        auxlpml = np.zeros((math.floor((maxiter - burnin) / 10), n))

##### BEGINNING OF GIBBS SAMPLING #####

    while iter0 < maxiter:

        ##### 1) SIMULATION OF ALPHA'S POSTERIOR DISTRIBUTION #####

        if level + trend + seasonality != 3:
            if level + trend + seasonality == 0:
                for i in range(0, n):
                    sigmaeps = np.diag(np.repeat(sig2eps[i], T))
                    Q = sigmaeps + R
                    Qinv = inv(Q)
                    Winv = Qinv
                    W = Q

                    Valphainv = (
                        np.transpose(Z).dot(Winv).dot(Z)) + invsigmaalpha
                    Valpha = inv(Valphainv)

                    mualpha = Valpha.dot(np.transpose(Z)).dot(Winv).dot(
                        mydata[:, i])

                    alpha[:, i] = np.random.multivariate_normal(mualpha,
                                                                Valpha,
                                                                size=1)
            else:
                for i in range(0, n):
                    sigmaeps = np.diag(np.repeat(sig2eps[i], T))
                    Q = sigmaeps + R
                    Qinv = inv(Q)
                    Vinv = (np.transpose(X).dot(Qinv).dot(X)) + invsigmabeta
                    V = inv(Vinv)

                    Winv = Qinv + Qinv.dot(X).dot(V).dot(
                        np.transpose(X)).dot(Qinv)
                    W = inv(Winv)

                    Valphainv = (
                        np.transpose(Z).dot(Winv).dot(Z)) + invsigmaalpha
                    Valpha = inv(Valphainv)

                    mualpha = Valpha.dot(np.transpose(Z)).dot(Winv).dot(
                        mydata[:, i])

                    alpha[:, i] = np.random.multivariate_normal(mualpha,
                                                                Valpha,
                                                                size=1)

##### 2) SIMULATION OF GAMMA'S = (BETA,THETA) POSTERIOR DISTRIBUTION #####
        for i in range(0, n):
            jstar, nstar, mi, gn = comp(
                np.delete(gamma[0, :], i)
            )  # Only the first entries of gamma[,-i] are compared to determine the cluster configuration
            gmi = np.delete(gamma, i, axis=1)
            gammastar = gmi[:,
                            jstar]  # Matrix with all the elements of gamma, except for the i-th element
            if level + trend + seasonality == 0:
                thetastar = gammastar[d:(T + d), :]
            else:
                if d == 1:
                    betastar = gammastar[
                        0:
                        d, :]  # Separation of unique vectors between betastar and thetastar
                    thetastar = gammastar[d:(T + d), :]
                else:
                    betastar = gammastar[0:d, :]
                    thetastar = gammastar[d:(T + d), :]

            sigmaeps = sig2eps[i] * np.diag(np.repeat(1, T))
            invsigmaeps = (1 / sig2eps[i]) * np.diag(np.repeat(1, T))
            Q = sigmaeps + R
            Qinv = inv(Q)

            if level + trend + seasonality == 0:
                Winv = Winv
                W = Q
            else:
                Vinv = (np.transpose(X).dot(Qinv).dot(X)) + invsigmabeta
                V = inv(Vinv)

                Winv = Qinv + (Qinv.dot(X).dot(V).dot(
                    np.transpose(X)).dot(Qinv))
                W = inv(Winv)

        # Computing weigths for gamma(i)'s posterior distribution
            if level + trend + seasonality == 0:
                dj = np.zeros((mi))
                d0 = (b + a * mi) * multivariate_normal.pdf(
                    mydata[:, i], (Z.dot(alpha[:, i])), W)

                den = 0

                for j in range(0, mi):
                    dj[j] < -(nstar[j] - a) * multivariate_normal.pdf(
                        mydata[:, i],
                        (Z.dot(alpha[:, i]) + thetastar[:, j]), sigmaeps)

                den = d0 + sum(dj)
                if den == 0:
                    d0 = (b + a * mi) + multivariate_normal.logpdf(
                        mydata[:, i], (Z.dot(alpha[:, i])), W)
                    for j in range(0, mi):
                        dj[j] = (nstar[j] - a) + multivariate_normal.logpdf(
                            mydata[:, i],
                            (Z.dot(alpha[:, i]) + thetastar[:, j]), sigmaeps)
                    dj = np.concatenate((dj, d0))
                    aa = min(dj)
                    q = (1 + (dj - aa) +
                         (dj - aa)**2 / 2) / sum(1 + (dj - aa) +
                                                 (dj - aa)**2 / 2)
                else:
                    q = dj / den
                    q = np.append(q, (d0 / den))

            elif level + trend + seasonality == 3:
                dj = np.zeros((mi))
                d0 = (b + a * mi) * multivariate_normal.pdf(
                    mydata[:, i], np.zeros((T)), W)

                den = 0

                for j in range(0, mi):
                    dj[j] = (nstar[j] - a) * multivariate_normal.pdf(
                        mydata[:, i],
                        (X.dot(betastar[:, j]) + thetastar[:, j]), sigmaeps)

                den = d0 + sum(dj)
                if den == 0:
                    d0 = (b + a * mi) + multivariate_normal.logpdf(
                        mydata[:, i], np.zeros((T)), W)
                    for j in range(0, mi):
                        dj[j] = (nstar[j] - a) + multivariate_normal.logpdf(
                            mydata[:, i],
                            (X.dot(betastar[:, j]) + thetastar[:, j]),
                            sigmaeps)
                    dj = np.concatenate((dj, d0))
                    aa = min(dj)
                    q = (1 + (dj - aa) +
                         (dj - aa)**2 / 2) / sum(1 + (dj - aa) +
                                                 (dj - aa)**2 / 2)
                else:
                    q = dj / den
                    q = np.append(q, (d0 / den))

            else:
                dj = np.zeros((mi))
                d0 = (b + a * mi) * multivariate_normal.pdf(
                    mydata[:, i], (Z.dot(alpha[:, i])), W)

                den = 0

                for j in range(0, mi):
                    dj[j] = (nstar[j] - a) * multivariate_normal.pdf(
                        mydata[:, i],
                        (Z.dot(alpha[:, i]) + X.dot(betastar[:, j]) +
                         thetastar[:, j]), sigmaeps)

                den = d0 + sum(dj)
                if den == 0:
                    d0 = (b + a * mi) + multivariate_normal.logpdf(
                        mydata[:, i], Z * alpha[:, i], W)
                    for j in range(0, mi):
                        dj[j] = (nstar[j] - a) + multivariate_normal.logpdf(
                            mydata[:, i],
                            (Z.dot(alpha[:, i]) + X.dot(betastar[:, j]) +
                             thetastar[:, j]), sigmaeps)
                    dj = np.concatenate(dj, d0)
                    aa = min(dj)
                    q = (1 + (dj - aa) +
                         (dj - aa)**2 / 2) / sum(1 + (dj - aa) +
                                                 (dj - aa)**2 / 2)
                else:
                    q = dj / den
                    q = np.append(q, (d0 / den))

    # Sampling a number between 1 and (mi+1) to determine what will be the simulated value for gamma(i)
    # The probabilities of the sample are based on the weights previously computed
            y = np.random.choice(np.arange(1, (mi + 2)),
                                 size=1,
                                 replace=False,
                                 p=q)

            # If sample returns the value (mi+1), a new vector from g0 will be simulated and assigned to gamma(i)
            if y == (mi + 1):
                if level + trend + seasonality == 0:
                    Sthetai = inv(invsigmaeps + inv(R))
                    muthetai = Sthetai.dot(invsigmaeps).dot(mydata[:, i] -
                                                            (Z.dot(alpha[:,
                                                                         i])))
                    theta0 = np.random.multivariate_normal(muthetai, Sthetai)
                    gamma[:, i] = theta0
                elif level + trend + seasonality == 3:
                    Sthetai = inv(invsigmaeps + inv(R))
                    muthetai = Sthetai.dot(invsigmaeps).dot(mydata[:, i] -
                                                            (X.dot(beta[:,
                                                                        i])))
                    mubetai = V.dot(np.transpose(X)).dot(Qinv).dot(mydata[:,
                                                                          i])
                    beta0 = np.random.multivariate_normal(mubetai, V)
                    theta0 = np.random.multivariate_normal(muthetai, Sthetai)
                    gamma[:, i] = np.concatenate((beta0, theta0))
                else:
                    Sthetai = inv(invsigmaeps + inv(R))
                    muthetai = Sthetai.dot(invsigmaeps).dot(mydata[:, i] - (
                        Z.dot(alpha[:, i])) - (X.dot(beta[:, i])))
                    mubetai = V.dot(
                        np.transpose(X)).dot(Qinv).dot(mydata[:, i] -
                                                       (Z.dot(alpha[:, i])))
                    beta0 = np.random.multivariate_normal(mubetai, V)
                    theta0 = np.random.multivariate_normal(muthetai, Sthetai)
                    gamma[:, i] = np.concatenate((beta0, theta0))
            else:
                gamma[:, i] = gammastar[:, y - 1].reshape(
                    len(gammastar)
                )  # Otherwise, column y from gammastar will be assigned to gamma(i)

        ##### 2.1) ACCELERATION STEP AND CONSTRUCTION OF SIMILARITIES MATRIX #####
        jstar, nstar, m, gn = comp(gamma[0, :])
        gammastar = gamma[:, jstar]

        if level + trend + seasonality == 0:
            theta = (gamma[d:(T + d), :])
            thetastar = gammastar[d:(T + d), :]
        else:
            if d == 1:
                beta = gamma[0:d, :]
                theta = gamma[d:(T + d), :]
                betastar = gammastar[0:d, :]
                thetastar = gammastar[d:(T + d), :]
            else:
                beta = gamma[0:d, :]
                theta = gamma[d:(T + d), :]
                betastar = gammastar[0:d, :]
                thetastar = gammastar[d:(T + d), :]

        for j in range(0, m):

            if level + trend + seasonality == 0:
                cc = np.where(
                    gn ==
                    j)  # Identifying the cluster configuration of each group.
                aux = np.zeros(
                    (T, T)
                )  # Calculating the necessary matrices for the simulation of the distributions for the acceleration step.
                aux1 = np.zeros((T, 1))
                aux2 = np.zeros((T, 1))

                for i in range(0, nstar[j]):
                    aux = aux + np.diag(np.repeat(1 / sig2eps[cc[0][i]], T))
                    aux1 = aux1 + (np.diag(np.repeat(
                        1 / sig2eps[cc[0][i]],
                        T)).dot(mydata[:, i] - Z.dot(alpha[:, i]))).reshape(
                            (T, 1))

                Sthetastar = inv(aux + inv(R))
                muthetastar = Sthetastar.dot(aux1)

                theta[:, cc[0]] = np.random.multivariate_normal(
                    muthetastar.flatten(), Sthetastar).reshape(
                        (len(muthetastar), 1))

            elif level + trend + seasonality == 3:
                cc = np.where(gn == j)
                aux = np.zeros((T, T))
                aux1 = np.zeros((T, 1))
                aux2 = np.zeros((T, 1))

                for i in range(0, nstar[j]):
                    aux = aux + np.diag(np.repeat(1 / sig2eps[cc[0][i]], T))
                    aux1 = aux1 + (np.diag(np.repeat(
                        1 / sig2eps[cc[0][i]],
                        T)).dot(mydata[:, i] - X.dot(betastar[:, j]))).reshape(
                            (T, 1))
                    aux2 = aux2 + (np.diag(np.repeat(
                        1 / sig2eps[cc[0][i]],
                        T)).dot(mydata[:, i] - thetastar[:, j])).reshape(
                            (T, 1))

                Sthetastar = inv(aux + inv(R))
                muthetastar = Sthetastar.dot(aux1)
                Sbetastar = inv(np.transpose(X).dot(aux).dot(X) + invsigmabeta)
                mubetastar = Sbetastar.dot(np.transpose(X)).dot(aux2)

                beta[:, cc[0]] = np.random.multivariate_normal(
                    mubetastar.flatten(), Sbetastar).reshape(
                        (len(mubetastar), 1))
                theta[:, cc[0]] = np.random.multivariate_normal(
                    muthetastar.flatten(), Sthetastar).reshape(
                        (len(muthetastar), 1))

            else:
                cc = np.where(gn == j)
                aux = np.zeros((T, T))
                aux1 = np.zeros((T, 1))
                aux2 = np.zeros((T, 1))

                for i in range(0, nstar[j]):
                    aux = aux + np.diag(np.repeat(1 / sig2eps[cc[0][i]], T))
                    aux1 = aux1 + (np.diag(np.repeat(
                        1 / sig2eps[cc[0][i]],
                        T)).dot(mydata[:, i] - Z.dot(alpha[:, i]) -
                                X.dot(betastar[:, j]))).reshape((T, 1))
                    aux2 = aux2 + (np.diag(
                        np.repeat(1 / sig2eps[cc[0][i]],
                                  T)).dot(mydata[:, i] - Z.dot(alpha[:, i]) -
                                          thetastar[:, j])).reshape((T, 1))

                Sthetastar = inv(aux + inv(R))
                muthetastar = Sthetastar.dot(aux1)
                Sbetastar = inv(np.transpose(X).dot(aux).dot(X) + invsigmabeta)
                mubetastar = Sbetastar.dot(np.transpose(X)).dot(aux2)

                beta[:, cc[0]] = np.random.multivariate_normal(
                    mubetastar.flatten(), Sbetastar).reshape(
                        (len(mubetastar), 1))
                theta[:, cc[0]] = np.random.multivariate_normal(
                    muthetastar.flatten(), Sthetastar).reshape(
                        (len(muthetastar), 1))

            if (iter0 % thinning == 0) & iter0 >= burnin:
                for i1 in range(0, nstar[j]):
                    for i2 in range(i1, nstar[j]):
                        sim[cc[0][i1],
                            cc[0][i2]] = sim[cc[0][i1], cc[0][i2]] + 1
                        sim[cc[0][i2],
                            cc[0][i1]] = sim[cc[0][i2], cc[0][i1]] + 1
                        memory[cc[0][i1] + (n * iter1),
                               cc[0][i2]] = memory[cc[0][i1] +
                                                   (n * iter1), cc[0][i2]] + 1
                        memory[cc[0][i2] + (n * iter1),
                               cc[0][i1]] = memory[cc[0][i2] +
                                                   (n * iter1), cc[0][i1]] + 1

        if level + trend + seasonality == 0:
            gamma = theta
        else:
            gamma = np.concatenate(
                (beta, theta), axis=0
            )  # Obtaining all gamma vectors after the acceleration step.

        jstar, nstar, m, gn = comp(gamma[1, :])
        gammastar = gamma[:, jstar]

        if level + trend + seasonality == 0:
            theta = gamma[d:(T + d), :]
            thetastar = gammastar[d:(T + d), :]
        else:
            if d == 1:
                beta = gamma[0:d, :]
                theta = gamma[d:(T + d), :]
                betastar = gammastar[0:d, :]
                thetastar = gammastar[d:(T + d), :]
            else:
                beta = gamma[0:d, :]
                theta = gamma[d:(T + d), :]
                betastar = gammastar[0:d, :]
                thetastar = gammastar[d:(T + d), :]

##### 3) SIMULATION OF SIG2EPS' POSTERIOR DISTRIBUTION #####
        if level + trend + seasonality == 0:
            M = np.transpose(mydata - Z.dot(alpha) -
                             theta).dot(mydata - Z.dot(alpha) - theta)
        elif level + trend + seasonality == 3:
            M = np.transpose(mydata - X.dot(beta) -
                             theta).dot(mydata - X.dot(beta) - theta)
        else:
            M = np.transpose(mydata - Z.dot(alpha) - X.dot(beta) -
                             theta).dot(mydata - Z.dot(alpha) - X.dot(beta) -
                                        theta)

        sig2eps = scipy.stats.invgamma.rvs((c0eps + T / 2),
                                           scale=(c1eps + M.diagonal() / 2),
                                           size=n)

        ##### 4) SIMULATION OF SIMGAALPHA'S POSTERIOR DISTRIBUTION #####
        if level + trend + seasonality != 3:
            sig2alpha = scipy.stats.invgamma.rvs(
                (c0alpha + n / 2),
                scale=(c1alpha + (alpha**2).sum(axis=1)),
                size=p)
            sigmaalpha = np.diag(sig2alpha)
            invsigmaalpha = np.diag(1 / sig2alpha)

##### 5) SIMULATION OF SIGMABETA'S POSTERIOR DISTRIBUTION #####
        if level + trend + seasonality != 0:
            diff_in_shape = d - betastar.shape[1]
            if diff_in_shape < 0:
                sig2beta = 1 / scipy.stats.invgamma.rvs(
                    (c0beta + m / 2),
                    scale=(c1beta + ((betastar**2).sum(axis=0) / 2)))[0:d]
            elif diff_in_shape <= (betastar.shape[1] / 2):
                sig2beta = 1/np.concatenate((scipy.stats.invgamma.rvs((c0beta + m/2), scale = (c1beta + ((betastar**2).sum(axis=0)/2)), size = betastar.shape[1]),\
                                 scipy.stats.invgamma.rvs((c0beta + m/2), scale = (c1beta + ((betastar**2).sum(axis=0)/2)), size = betastar.shape[1])[:diff_in_shape]))
            else:
                beta_vector = []
                for v in range(0, (math.floor(d / betastar.shape[1]))):
                    beta_vector = np.concatenate(
                        (beta_vector,
                         scipy.stats.invgamma.rvs(
                             (c0beta + m / 2),
                             scale=(c1beta + ((betastar**2).sum(axis=0) / 2)),
                             size=betastar.shape[1])))
                sig2beta = 1 / np.concatenate(
                    (beta_vector,
                     scipy.stats.invgamma.rvs(
                         (c0beta + m / 2),
                         scale=(c1beta + ((betastar**2).sum(axis=0) / 2)),
                         size=betastar.shape[1])[:(d % betastar.shape[1])]))

            sigmabeta = np.diag(sig2beta)
            invsigmabeta = np.diag(1 / sig2beta)

##### 6) SIMULATION OF SIG2THE'S POSTERIOR DISTRIBUTION #####
        cholP = np.linalg.cholesky(P)
        Pinv = inv(cholP)
        s1 = 0

        # Calculating the sum necessary for the rate parameter of the posterior distribution.
        for j in range(0, m):
            s1 = s1 + np.transpose(thetastar[:, j]).dot(Pinv).dot(thetastar[:,
                                                                            j])
            if s1 < 0:
                s1 = s1 * -1
        sig2the = scipy.stats.invgamma.rvs((m * T / 2), scale=(s1 / 2), size=1)

        ##### 7) SIMULATION OF RHO'S POSTERIOR DISTRIBUTION (Metropolis-Hastings step) #####
        rhomh = np.random.uniform(low=-1, high=1, size=1)
        Pmh = np.zeros((T, T))

        # Calculating the matrix P for the proposed value rhomh.
        for j in range(1, T + 1):
            for k in range(1, T + 1):
                Pmh[j - 1, k - 1] = rhomh**(abs(j - k))

        cholPmh = scipy.linalg.cholesky(Pmh)
        Pmhinv = inv(cholPmh)
        s = 0

        # Calculating the sum necessary for the computation of the acceptance probability.
        for j in range(0, m):
            s = np.add(
                s,
                np.asmatrix(thetastar[:, j]).dot(Pmhinv - Pmh).dot(
                    np.transpose(np.asmatrix(thetastar[:, j]))))

        # Computation of the acceptance probability.
        q = (-m) * (np.log(np.prod(np.diag(cholPmh))) - np.log(
            np.prod(np.diag(cholP)))) - ((1 / (2 * sig2the)) * s) + (1 / 2) * (
                np.log(1 + rhomh * rhomh) - np.log(1 + rho * rho)) - np.log(
                    1 - rhomh * rhomh) + np.log(1 - rho * rho)

        # Definition of the acceptance probability.
        quot = min(0, q)

        # Sampling a uniform random variable in [0,1] to determine if the proposal is accepted or not.
        unif1 = np.random.uniform(low=0, high=1, size=1)

        # Acceptance step.
        if np.log(unif1) <= quot:
            rho = rhomh
            arrho = arrho + 1

            for j in np.arange(1, T + 1):
                for k in np.arange(1, T + 1):
                    P[j - 1, k - 1] = rho**(abs(j - k))

        R = sig2the * P

        ##### 8) SIMULATION OF A'S POSTERIOR DISTRIBUTION (METROPOLIS-HASTINGS WITH UNIFORM PROPOSALS) #####
        if priora == 1:
            if b < 0:
                amh = np.random.uniform(low=-b, high=1, size=1)
            else:
                unif2 = np.random.uniform(low=0, high=1, size=1)
                if unif2 <= 0.5:
                    amh = 0
                else:
                    amh = np.random.uniform(low=0, high=1, size=1)

            # If b is not greater than -a, then accept the proposal directly.
            if a + b <= 0:
                a = amh
                print("a + b < 0")
            else:
                quot1 = 0

                if (m > 1):
                    for j in range(0, m - 1):
                        quot1 = quot1 + np.log(b + (j + 1) * amh) + np.log(
                            scipy.special.gamma(nstar[j] - amh)) - np.log(
                                scipy.special.gamma(1 - amh)
                            ) - np.log(b + (j + 1) * a) - np.log(
                                scipy.special.gamma(nstar[j] - a)) + np.log(
                                    scipy.special.gamma(1 - a))

                quot1 = quot1 + np.log(
                    scipy.special.gamma(nstar[m - 1] - amh)) - np.log(
                        scipy.special.gamma(1 - amh)) - np.log(
                            scipy.special.gamma(nstar[m - 1] - a)) + np.log(
                                scipy.special.gamma(1 - a))

                if a == 0:
                    fa = 0.5
                else:
                    fa = 0.5 * scipy.stats.beta.pdf(a, q0a, q1a)

                if amh == 0:
                    famh = 0.5
                else:
                    famh = 0.5 * scipy.stats.beta.pdf(amh, q0a, q1a)

                # Quotient to evaluate the Metropolis-Hastings step in logs
                quot1 = quot1 + np.log(famh) - np.log(fa)

                # Determination of the probability for the Metropolis-Hastings step
                alphamh1 = min(quot1, 0)

                unif3 = np.random.uniform(low=0, high=1, size=1)

                # Acceptance step
                if np.log(unif3) == alphamh1:
                    a = amh
                    ara = ara + 1

##### 9) SIMULATION OF B'S POSTERIOR DISTRIBUTION (METROPOLIS-HASTINGS WITH GAMMA PROPOSALS) #####
        if priorb == 1:
            y1 = scipy.stats.gamma.rvs(1, 1, scale=10)
            bmh = y1 - a

            # If b is not greater than -a, then accept the proposal directly.
            if a + b <= 0:
                b = bmh
                print("a+b < 0")
            else:
                quot2 = 0

                if m > 1:
                    for j in range(0, m - 1):
                        quot2 = quot2 + np.log(bmh + (j + 1) *
                                               a) - np.log(b + (j + 1) * a)

                fb = scipy.stats.gamma.pdf(a + b, q0b, scale=q1b)
                fbmh = scipy.stats.gamma.pdf(y1, q0b, scale=q1b)

                # Quotient to evaluate the Metropolis-Hastings step in logs
                quot2 = quot2 + (np.log(scipy.special.gamma(bmh + 1)) -
                                 np.log(scipy.special.gamma(bmh + n)) -
                                 np.log(scipy.special.gamma(b + 1)) +
                                 np.log(scipy.special.gamma(b + n))) + (
                                     np.log(fbmh) -
                                     np.log(fb)) - 0.1 * (b - bmh)

                # Determination of the probability for the Metropolis-Hastings step
                alphamh2 = min(quot2, 0)

                unif4 = np.random.uniform(low=0, high=1, size=1)

                # Acceptance step
                if np.log(unif4) <= alphamh2:
                    b = bmh
                    arb = arb + 1

        if (iter0 % thinning == 0) & (iter0 >= burnin):
            iter1 = iter1 + 1
            sig2epssample[iter1 - 1, :] = sig2eps
            sig2thesample[iter1 - 1] = sig2the
            rhosample[iter1 - 1] = rho
            asample[iter1 - 1] = a
            bsample[iter1 - 1] = b
            msample[iter1 - 1, :] = m
            memorygn[iter1 - 1, :] = gn

            if level + trend + seasonality == 0:
                sig2alphasample[iter1 - 1, :] = sig2alpha
            elif level + trend + seasonality == 3:
                sig2betasample[iter1 - 1, :] = sig2beta
            else:
                sig2alphasample[iter1 - 1, :] = sig2alpha
                sig2betasample[iter1 - 1, :] = sig2beta

        if indlpml != 0:
            if (iter0 % 10 == 0) & (iter0 >= burnin):
                iter2 = iter2 + 1
                for i in range(0, n):
                    if level + trend + seasonality == 0:
                        for j in range(0, m):
                            auxlpml[iter2 - 1, i] = auxlpml[iter2 - 1, i] + (
                                (nstar[j] - a) /
                                (b + n)) * scipy.stats.multivariate_normal.pdf(
                                    mydata[:, i],
                                    ((Z.dot(alpha[:, i])) + thetastar[:, j]),
                                    np.diag(np.repeat(sig2eps[i], T)))

                        sigmaeps = np.diag(np.repeat(sig2eps[i], T))
                        invsigmaeps = np.diag(np.repeat(1 / sig2eps[i], T))

                        Q = sigmaeps + R
                        Qinv = inv(Q)

                        Winv = Qinv
                        W = Q

                        auxlpml[iter2 - 1, i] = auxlpml[iter2 - 1, i] + (
                            (b + (a * m)) /
                            (b + n)) * scipy.stats.multivariate_normal.pdf(
                                mydata[:, i], (Z.dot(alpha[:, i])), W)

                    elif level + trend + seasonality == 3:
                        for j in range(0, m):
                            auxlpml[iter2 - 1, i] = auxlpml[iter2 - 1, i] + (
                                (nstar[j] - a) /
                                (b + n)) * scipy.stats.multivariate_normal.pdf(
                                    mydata[:, i],
                                    (X.dot(betastar[:, j]) + thetastar[:, j]),
                                    np.diag(np.repeat(sig2eps[i], T)))

                        sigmaeps = np.diag(np.repeat(sig2eps[i], T))
                        invsigmaeps = np.diag(np.repeat(1 / sig2eps[i], T))

                        Q = sigmaeps + R
                        Qinv = inv(Q)

                        Vinv = np.transpose(X).dot(Qinv).dot(X) + invsigmabeta
                        V = inv(Vinv)
                        Winv = Qinv + (Qinv.dot(X).dot(V).dot(
                            np.transpose(X)).dot(Qinv))
                        W = inv(Winv)

                        auxlpml[iter2 - 1, i] = auxlpml[iter2 - 1, i] + (
                            (b + (a * m)) /
                            (b + n)) * scipy.stats.multivariate_normal.pdf(
                                mydata[:, i], np.zeros(T), W)

                    else:
                        for j in range(0, m):
                            auxlpml[iter2 - 1, i] = auxlpml[iter2 - 1, i] + (
                                (nstar[j] - a) /
                                (b + n)) * scipy.stats.multivariate_normal.pdf(
                                    mydata[:, i],
                                    (Z.dot(alpha[:, i]) +
                                     X.dot(betastar[:, j]) + thetastar[:, j]),
                                    np.diag(np.repeat(sig2eps[i], T)))

                        sigmaeps = np.diag(np.repeat(sig2eps[i], T))
                        invsigmaeps = np.diag(np.repeat(1 / sig2eps[i], T))

                        Q = sigmaeps + R
                        Qinv = inv(Q)

                        Vinv = np.transpose(X).dot(Qinv).dot(X) + invsigmabeta
                        V = inv(Vinv)
                        Winv = Qinv + (Qinv.dot(X).dot(V).dot(
                            np.transpose(X)).dot(Qinv))
                        W = inv(Winv)

                        auxlpml[iter2 - 1, i] = auxlpml[iter2 - 1, i] + (
                            (b + (a * m)) /
                            (b + n)) * scipy.stats.multivariate_normal.pdf(
                                mydata[:, i], Z.dot(alpha[:, i]), W)

        iter0 = iter0 + 1
        if iter0 % 50 == 0:
            print("Iteration Number: ", iter0, "Progress: ",
                  round((iter0 / maxiter), 2) * 100, "% \n")
##### END OF GIBBS SAMPLING #####

# Calculation of acceptance rates and similarities matrix
    arrho = arrho / iter0
    ara = ara / iter0
    arb = arb / iter0
    sim = sim / iter1

    dist = np.zeros(CL)

    # Calculating the distance between each cluster configuration to the similarities matrix
    for i in range(0, CL):
        aux4 = memory[(i * n):((i + 1) * n), :] - sim
        dist[i] = np.linalg.norm(aux4)

    # Determining which cluster configuration minimizes the distance to the similarities matrix
    mstar = msample[np.argmin(dist)]
    gnstar = memorygn[np.argmin(dist), :]

    ##### HM MEASURE CALCULATION #####
    HM = 0

    for j in range(0, mstar[0].astype(int)):
        cc = np.where(gnstar == j)[0]
        HM1 = 0

        if len(cc) > 1:
            for i1 in range(0, len(cc)):
                for i2 in range(0, i1):
                    HM1 = HM1 + sum((mydata[:, cc[i2]] - mydata[:, cc[i1]])**2)
            HM = HM + (2 / (len(cc) - 1)) * HM1

##### PRINT FINAL CLUSTER ASSIGNMENTS AND HM MEASURE #####
    print("Number of groups of the chosen cluster configuration: ",
          mstar[0].astype(int))
    for i in range(0, mstar[0].astype(int)):
        print("Time series in group ", i,
              np.where(gnstar == i)[0].astype(int), "\n")

    print("HM Measure: ", HM)

    if indlpml != 0:
        auxlpml = 1 / auxlpml
        cpo = auxlpml.mean(axis=0)
        cpo = 1 / cpo
        lpml = sum(np.log(cpo))

    ##### PLOT FINAL CLUSTER ASSIGNMENTS (ONE PLOT PER CLUSTER)#####
    for j in range(0, mstar[0].astype(int)):
        plt.figure()
        plt.axes([0, 0, 1, 1])
        cc_plot = np.where(gnstar == j)[0]
        plt.xlabel('Time Period')
        plt.ylabel('Scaled Value')
        title = "Group " + str(j)
        plt.title(title)
        plt.plot(mydata[:, cc_plot], c=np.random.rand(3))
        plt.show()
예제 #40
0
파일: methods.py 프로젝트: koditaraszka/pat
 def lr_real(self, data):
     null = multivariate_normal.logpdf(data, self.mean, self.sigmaE)
     alt = multivariate_normal.logpdf(data, self.mean,
                                      np.add(self.sigmaE, self.sigmaG))
     return np.asarray(np.subtract(alt, null))
    delta_2 = np.tanh(theta_draw[4])
    sigma_R = np.exp(theta_draw[5])
    sigma_Q = np.exp(theta_draw[6])

    #print(delta_2, sigma_R, sigma_Q)

    #print(theta_draw)
    params_m = [theta_draw[0], theta_draw[1], sigma_R]
    params_t = [theta_draw[2], theta_draw[3], delta_2, sigma_Q]

    z_t = z(params_m, params_t, x_t, y_t, M, c_init, r_star)

    # Prior for gamma_0, gamma_1, delta_0 and delta_1
    prior = multivariate_normal.logpdf(theta_draw[:4],
                                       mean=np.array([2.74, -1.19, 0.5, 0.8]),
                                       cov=2 * np.eye(4))

    prior -= multivariate_normal.logpdf(theta[i][:4],
                                        mean=np.array([2.74, -1.19, 0.5, 0.8]),
                                        cov=2 * np.eye(4))
    # Prior for delta_1
    prior += uniform.logpdf(delta_2, -1, 2)
    prior -= uniform.logpdf(np.tanh(theta[i][4]), -1, 2)

    # Prior Sigma_R
    prior += gamma.logpdf(sigma_R, a=5, scale=1 / 5)
    prior -= gamma.logpdf(np.exp(theta[i][5]), a=5, scale=1 / 5)

    # Prior Sigma_Q
    prior += gamma.logpdf(sigma_Q, a=5, scale=1 / 5)
예제 #42
0
def log_partition(kernel_x,
                  kernel_y,
                  base_density,
                  X,
                  Y,
                  base_x,
                  base_y,
                  beta,
                  lmbda,
                  num_samples=1000):

    # Data need to be centered and normalized

    d_node = Y.shape[1]
    sigmas = 4.
    mu = np.mean(Y, axis=0)

    num_samples = 1000
    num_x = X.shape[0]
    X_rep = np.repeat(X, num_samples, axis=0)
    tmp_sigmas = sigmas * np.ones([X_rep.shape[0], 1])

    samples = np.random.multivariate_normal(mu, np.eye(mu.shape[0]),
                                            num_samples * num_x)
    samples = np.multiply(samples, tmp_sigmas)
    chunk_size = 10000
    if num_samples * num_x > 50000:
        log_diff = np.zeros([num_samples * num_x])
        num_chunks = 2000

        chunk_size = num_samples * num_x / num_chunks
        for i in range(num_chunks):
            log_diff[i * chunk_size:(i + 1) * chunk_size] = log_pdf(
                kernel_x, kernel_y, base_density,
                X_rep[i * chunk_size:(i + 1) * chunk_size],
                samples[i * chunk_size:(i + 1) * chunk_size], base_x, base_y,
                beta, lmbda)

    else:
        log_diff = log_pdf(kernel_x, kernel_y, base_density, X_rep, samples,
                           base_x, base_y, beta, lmbda)
    # computing proposal log-pdf
    tmp = multivariate_normal.logpdf(
        samples, mean=mu) + (d_node / 2.) * np.log(2 * np.pi)
    tmp = np.reshape(tmp, [-1, 1])
    tmp = np.multiply(tmp, 1. / tmp_sigmas**2)
    tmp = tmp - d_node * (np.log(2 * np.pi) / 2. + np.log(tmp_sigmas))
    tmp = np.reshape(tmp, [-1])
    # substracting proposal log-pdf
    log_diff -= tmp
    if base_x.shape[0] > 0:
        log_diff = np.reshape(log_diff, [-1, num_samples])
        max_diff = np.max(log_diff, axis=1)
        log_diff -= np.reshape(max_diff, [-1, 1])
        shifted_log_Z = np.log(np.mean(np.exp(log_diff), axis=1))

        log_Z = max_diff + shifted_log_Z

        shifted_log_Z_2 = np.log(np.mean(np.exp(2 * log_diff), axis=1))

    else:
        log_diff = np.reshape(log_diff, [-1, 1])
        max_diff = np.max(log_diff)
        log_diff -= max_diff

        shifted_log_Z = np.log(np.mean(np.exp(log_diff)))

        log_Z = max_diff + shifted_log_Z

        shifted_log_Z_2 = np.log(np.mean(np.exp(2 * log_diff)))

    std_log_Z = shifted_log_Z_2 - 2 * shifted_log_Z
    std_log_Z = np.sqrt((np.exp(std_log_Z) - 1) / num_samples)

    if base_x.shape[0] == 0:
        log_Z = log_Z * np.ones(Y.shape[0])
        std_log_Z = std_log_Z * np.ones(Y.shape[0])

    return log_Z, std_log_Z
예제 #43
0
파일: methods.py 프로젝트: koditaraszka/pat
 def lr_null(self, sims, weigh, percent):
     null = multivariate_normal.logpdf(sims, self.mean, self.sigmaE)
     alt = multivariate_normal.logpdf(sims, self.mean,
                                      np.add(self.sigmaE, self.sigmaG))
     lrcrit = np.asarray(np.subtract(alt, null))
     return self.process_crit(lrcrit, weigh, percent)
예제 #44
0
    def _compute_accept_prob(self, current_state, proposed_state):
        if self.current_iter < 2.0 * self.settings['memory_length']:
            proposed_state.update({'accept_prob': 1.0})
            return True

        if type(self.emp_hessian) is np.ndarray:
            sr1_trust_region_cov = self.settings['sr1_trust_region_scale'] * \
                self.emp_hessian
        else:
            sr1_trust_region_cov = self.settings['sr1_trust_region_cov']

        try:
            if self.qn_method is 'sr1' and self.settings['sr1_trust_region']:
                # Using trust-region approach for the SR1 update.
                current = current_state['params_free']
                current_mean = current + current_state['nat_gradient']

                proposed = proposed_state['params_free']
                proposed_mean = proposed + proposed_state['nat_gradient']

                proposed_probability = pmvn.logpdf(proposed, current_mean,
                                                   current,
                                                   current_state['hessian'],
                                                   sr1_trust_region_cov)

                current_probability = pmvn.logpdf(current, proposed_mean,
                                                  proposed,
                                                  proposed_state['hessian'],
                                                  sr1_trust_region_cov)
            else:
                current = current_state['params_free']
                proposed = proposed_state['params_free']
                current_mean = current + current_state['nat_gradient']
                current_hess = current_state['hessian']

                proposed_mean = proposed + proposed_state['nat_gradient']
                proposed_hess = proposed_state['hessian']

                proposed_probability = mvn.logpdf(proposed, current_mean,
                                                  current_hess)
                current_probability = mvn.logpdf(current, proposed_mean,
                                                 proposed_hess)

            tar_diff = proposed_state['log_target'] - \
                current_state['log_target']
            jac_diff = proposed_state['log_jacobian'] - \
                current_state['log_jacobian']
            pro_diff = current_probability - proposed_probability

            accept_prob = np.min((1.0, np.exp(tar_diff + jac_diff + pro_diff)))

        except Exception as e:
            if self.settings['show_overflow_warnings']:
                current_hess = current_state['hessian']
                proposed_hess = proposed_state['hessian']
                print("")
                print("Iteration: {}. Overflow in accept prob calculation.".
                      format(self.current_iter))
                print(
                    "This is probably due to a mismatch in the current and proposed Hessians."
                )
                print("Diag of current Hessian: {}.".format(
                    np.diag(current_hess)))
                print("Diag of candidate Hessian: {}.".format(
                    np.diag(proposed_hess)))
                print("")
            if self.settings['remove_overflow_iterations']:
                return False
            else:
                proposed_state.update({'accept_prob': 1.0})
                return True

        proposed_state.update({'accept_prob': accept_prob})
        return True
예제 #45
0
파일: infer.py 프로젝트: wuyangf7/fizi
 def obj(vars):
     V = sum(As[i] * vars[i] for i in range(r))
     logL = -mvn.logpdf(zscores, cov=V, allow_singular=True)
     print("NLL({}) = {}".format(",".join(map(str, vars)), logL))
     return logL
예제 #46
0
import numpy as np
from numpy.linalg import inv
import random
import scipy.linalg as linalg
import scipy.sparse as sp
import scipy.sparse.linalg as spln
import scipy.stats
from scipy.stats import norm, multivariate_normal
import warnings


# Older versions of scipy do not support the allow_singular keyword. I could
# check the version number explicily, but perhaps this is clearer
_support_singular = True
try:
    multivariate_normal.logpdf(1, 1, 1, allow_singular=True)
except:
    _support_singular = False



def _validate_vector(u, dtype=None):
    # this is taken from scipy.spatial.distance. Internal function, so
    # redefining here.

    u = np.asarray(u, dtype=dtype).squeeze()
    # Ensure values such as u=1 and u=[1] still return 1-D arrays.
    u = np.atleast_1d(u)
    if u.ndim > 1:
        raise ValueError("Input vector should be 1-D.")
    return u
예제 #47
0
      c_dm[m][i] = c_dm[m][i] / sum_c_dm
  #print c_dm[m], sum(c_dm[m])


####アクション選択の計算
Ad_candidate = [m for m in xrange(M)]
za_candidate = [k for k in xrange(Ka)]
CDP = [[pi_a[k] / float(M) for m in xrange(M)] for k in xrange(Ka)] #candidate propbability
#F_temp = [f for f in itertools.permutations(modality,N)]  ##モダリティの順列組み合わせ

for c in list(itertools.product(za_candidate, Ad_candidate)):
    #print c[0],c[1]
    temp_ocpw = 0.0
    logpdf = []
    for zok in xrange(Ko):
      logpdf += [multivariate_normal.logpdf(o_dm[c[1]], mean=Mu_o[zok], cov=Sig_o[zok])]
    #print logpdf
    max_log = np.max(logpdf)
    for zok in xrange(Ko):
      temp_cpw = 0.0
      for zck in xrange(Kc):
        temp_pw = 0.0
        for zpk in xrange(Kp):
          temp_w = 0.0
          for F_temp in itertools.permutations(modality,N):
            #print c[0],zok,zck,zpk,F_temp
            temp = 1e+1#00#1.0
            for n in xrange(N):
              #print i,n,N[d],M[d]
              if F_temp[n] == "a":
                temp = temp * theta[c[0]           ][W_list.index(w_dn[n])]
예제 #48
0
    def addTraces(self,
                  traceSource,
                  tracerange,
                  progressBar=None,
                  pointRange=None):
        data = []
        textins = []
        textouts = []

        for i in range(tracerange[0], tracerange[1] + 1):
            d = traceSource.getTrace(i)

            if d is None:
                continue

            startingPoint, endingPoint = pointRange  # TODO:support start/end point different per byte
            d = d[startingPoint:endingPoint]

            data.append(d)
            textins.append(traceSource.getTextin(i))
            textouts.append(traceSource.getTextout(i))

        try:
            from scipy.stats import multivariate_normal
        except ImportError:
            raise Warning(
                "Version of SciPy too old, require >= 0.14, have %s. "
                "Update to support this attack" % (scipy.version.version))

        # Hack for now - just use last template found
        template = self.loadTemplatesFromProject()[-1]
        pois = template["poi"]
        numparts = len(template['mean'][0])
        results = np.zeros(
            (self.model.getNumSubKeys(), self.model.getPermPerSubkey()))

        if progressBar:
            progressBar.setStatusMask("Current Trace = %d Current Subkey = %d",
                                      (0, 0))
            progressBar.setMaximum(self.model.getNumSubKeys() * len(data))
        pcnt = 0

        for tnum in range(0, len(data)):
            for bnum in self.brange:
                try:
                    newresultsint = [
                        multivariate_normal.logpdf(
                            data[tnum][pois[bnum]],
                            mean=template['mean'][bnum][i],
                            cov=np.diag(template['cov'][bnum][i]))
                        for i in range(0, numparts)
                    ]
                except np.linalg.LinAlgError as e:
                    logging.warning(
                        'Error in applying template, probably template is poorly formed or POI incorrect. Byte %d for tnum %d skipped.'
                        % (bnum, tnum))
                    logging.debug(e)
                    newresultsint = [0] * self.model.getPermPerSubkey()

                ptype = template["partitiontype"]

                newresults = []
                # Map to key guess format
                for i in range(0, self.model.getPermPerSubkey()):
                    if ptype == "PartitionHWIntermediate":
                        self.model.setHwModel(
                            self.model.
                            hwModels['HW: AES SBox Output, First Round (Enc)'])
                        hypint = self.model.leakage(textins[tnum],
                                                    textouts[tnum], i, bnum,
                                                    None)
                    elif ptype == "PartitionHDLastRound":
                        self.model.setHwModel(
                            self.model.hwModels['HD: AES Last-Round State'])
                        hypint = self.model.leakage(textins[tnum],
                                                    textouts[tnum], i, bnum,
                                                    None)
                    # TODO Temp
                    elif ptype == "PartitionHDRounds":
                        if bnum == 0:
                            hypint = self.model.getHW(textins[tnum][bnum] ^ i)
                        else:
                            knownkey = [
                                0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6,
                                0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c
                            ]
                            s1 = textins[tnum][bnum - 1] ^ knownkey[bnum - 1]
                            s2 = textins[tnum][bnum] ^ i
                            hypint = self.model.getHW(s1 ^ s2)
                    else:
                        hypint = i
                    newresults.append(newresultsint[hypint])

                results[bnum] += newresults
                self.stats.updateSubkey(bnum, results[bnum], tnum=(tnum + 1))

                pcnt += 1
                if progressBar:
                    progressBar.updateStatus(pcnt, (tnum, bnum))
                    if progressBar.wasAborted():
                        return

            # Do plotting if required
            if (tnum % self._reportingInterval) == 0 and self.sr:
                self.sr()
####アクション選択の計算
Ad_candidate = [m for m in xrange(M)]
za_candidate = [k for k in xrange(Ka)]
CDP = [[pi_a[k] / float(M) for m in xrange(M)]
       for k in xrange(Ka)]  #candidate propbability
#F_temp = [f for f in itertools.permutations(modality,N)]  ##モダリティの順列組み合わせ

for c in list(itertools.product(za_candidate, Ad_candidate)):
    #print c[0],c[1]
    temp_ocpw = 0.0
    logpdf = []
    for zok in xrange(Ko):
        logpdf += [
            multivariate_normal.logpdf(o_dm[c[1]],
                                       mean=Mu_o[zok],
                                       cov=Sig_o[zok])
        ]
    #print logpdf
    max_log = np.max(logpdf)
    for zok in xrange(Ko):
        temp_cpw = 0.0
        for zck in xrange(Kc):
            temp_pw = 0.0
            for zpk in xrange(Kp):
                temp_w = 0.0
                F_temp2 = [
                    f for f in itertools.product(modality, repeat=N)
                ]  # デカルト積 繰り返しを許す: 1,1 がある 順序が違えば別と見なす: 1,2 と 2,1 は別
                #for F_temp in itertools.product(modality,repeat=N):
                for i in xrange(len(F_temp2)):
예제 #50
0
def trainHSdataPGfull(Train_HS, labels_HS, diagonal=False):
    #define a function to train HS data
    Train = Train_HS  #get Train data
    labels = labels_HS  #get Train laabel
    Classes = np.sort(np.unique(labels))  #get class
    M = 15
    X_train, X_valid, label_train, label_valid = train_test_split(
        Train, labels, test_size=0.33, random_state=M)
    X_train_class = []  #initialize classification
    for j in range(Classes.shape[0]):
        #classify train data according to label
        jth_class = X_train[label_train == Classes[j], :]
        X_train_class.append(jth_class)
    class0 = X_train_class[0]  #get class
    class1 = X_train_class[1]  #get class
    class2 = X_train_class[2]  #get class
    class3 = X_train_class[3]  #get class
    class4 = X_train_class[4]  #get class
    mu0 = np.mean(class0, axis=0)  #get mean
    mu1 = np.mean(class1, axis=0)  #get mean
    mu2 = np.mean(class2, axis=0)  #get mean
    mu3 = np.mean(class3, axis=0)  #get mean
    mu4 = np.mean(class4, axis=0)  #get mean
    if diagonal == 1:
        cov0 = np.cov(class0.T) * np.eye(class0.shape[1])  #get diagonal matrix
        cov1 = np.cov(class1.T) * np.eye(class1.shape[1])  #get diagonal matrix
        cov2 = np.cov(class2.T) * np.eye(class2.shape[1])  #get diagonal matrix
        cov3 = np.cov(class3.T) * np.eye(class3.shape[1])  #get diagonal matrix
        cov4 = np.cov(class4.T) * np.eye(class4.shape[1])  #get diagonal matrix
    else:
        constant = 1e-1
        cov0 = np.cov(class0.T) + np.eye(
            class0.shape[1]
        ) * constant  #get covariance and solve problem of singular matrix
        cov1 = np.cov(class1.T) + np.eye(
            class1.shape[1]
        ) * constant  #get covariance and solve problem of singular matrix
        cov2 = np.cov(class2.T) + np.eye(
            class2.shape[1]
        ) * constant  #get covariance and solve problem of singular matrix
        cov3 = np.cov(class3.T) + np.eye(
            class3.shape[1]
        ) * constant  #get covariance and solve problem of singular matrix
        cov4 = np.cov(class4.T) + np.eye(
            class4.shape[1]
        ) * constant  #get covariance and solve problem of singular matrix
    psum = (class0.shape[0] + class1.shape[0] + class2.shape[0] +
            class3.shape[0] + class4.shape[0])  #calculate N
    pc0 = class0.shape[0] / psum  #calculate P(Ck)
    pc1 = class1.shape[0] / psum  #calculate P(Ck)
    pc2 = class2.shape[0] / psum  #calculate P(Ck)
    pc3 = class3.shape[0] / psum  #calculate P(Ck)
    pc4 = class4.shape[0] / psum  #calculate P(Ck)
    PG_predicted = np.zeros((X_valid.shape[0], 1))  #initialization
    for i in range(X_valid.shape[0]):
        y0 = multivariate_normal.logpdf(X_valid[i, :], mu0,
                                        cov0)  #calculate prior
        y1 = multivariate_normal.logpdf(X_valid[i, :], mu1,
                                        cov1)  #calculate prior
        y2 = multivariate_normal.logpdf(X_valid[i, :], mu2,
                                        cov2)  #calculate prior
        y3 = multivariate_normal.logpdf(X_valid[i, :], mu3,
                                        cov3)  #calculate prior
        y4 = multivariate_normal.logpdf(X_valid[i, :], mu4,
                                        cov4)  #calculate prior
        # pall = y0 * pc0 + y1 * pc1 + y2 * pc2 + y3 * pc3 + y4 * pc4 #P(x)
        pos0 = y0 + math.log(
            pc0)  #calculate posterior because P(x) is same so we omit it
        pos1 = y1 + math.log(
            pc1)  #calculate posterior because P(x) is same so we omit it
        pos2 = y2 + math.log(
            pc2)  #calculate posterior because P(x) is same so we omit it
        pos3 = y3 + math.log(
            pc3)  #calculate posterior because P(x) is same so we omit it
        pos4 = y4 + math.log(
            pc4)  #calculate posterior because P(x) is same so we omit it
        a = {
            1: pos0,
            2: pos1,
            3: pos2,
            4: pos3,
            5: pos4
        }  #get dictionary of classes
        PG_predicted[i] = max(
            a, key=a.get)  #get classes responding to max posterior
    if diagonal == 1:
        accuracy_PGdiag = accuracy_score(label_valid,
                                         PG_predicted)  #compare and get score
        print(
            '\nThe accuracy of Probabilistic Generative classifier HS with diagonal covariance is: ',
            accuracy_PGdiag * 100, '%')
    else:
        accuracy_PG = accuracy_score(label_valid,
                                     PG_predicted)  #compare and get score
        print(
            '\nThe accuracy of Probabilistic Generative classifier HS with full covariance is: ',
            accuracy_PG * 100, '%', 'M is: ', M)
    return PG_predicted
예제 #51
0
sigmaE = np.array( [[ 1.00000000e+00,2.21634186e-01,-6.59629045e-02,1.45811322e-01],
    [ 2.21634186e-01,1.00000000e+00,-9.71533404e-04,6.77997010e-01],
    [-6.59629045e-02,-9.71533404e-04,1.00000000e+00,-4.56618686e-02],
    [ 1.45811322e-01,6.77997010e-01,-4.56618686e-02,1.00000000e+00]])


maximum = -np.inf
alpha = -1

for x in range(int(count),100,-25):
  if x == 0:
    x = 1
  x = float(x)
  mult = count/x
  covar = sigmaE + mult*sigmaG
  pdf = multivariate_normal.logpdf(alphaData,np.array([0.0,0.0,0.0,0.0]), covar)
  total = np.sum(pdf)
  if total > maximum:
    maximum = total
    alpha = x

print(alpha)
sigmaG = (count/alpha)*sigmaG
mat = sigmaE + sigmaG
value = multivariate_normal.logpdf(zs, np.array([0.0,0.0,0.0,0.0]), mat)
all_configs = np.copy(value)
mvalues = [np.copy(value) for i in range(k)]
for z in range(1,len(include)):
  alt = np.copy(mat)
  loc = include[z]
  for i in range(0,k):
예제 #52
0
 def get_log_prior_at(self, *log_kernel_parameters):
     N = len(log_kernel_parameters)
     return multivariate_normal.logpdf(
         np.array(log_kernel_parameters).reshape(1, N),
         mean=np.zeros(N),
         cov=(self.sigma_prior_parameter**2) * np.identity(N))
예제 #53
0
 def loglike(self, x):
     return multivariate_normal.logpdf(x, mean=np.zeros(self.x_dim),
                                       cov=np.eye(self.x_dim) + self.corr * (1 - np.eye(self.x_dim)))
예제 #54
0
파일: lmm.py 프로젝트: pschulam/Snippets
 def log_likelihood(self, y, X, Z):
     m = np.dot(X, self._coef)
     S = np.dot(Z, np.dot(self._ranef_cov, Z.T))
     S += self._noise_var * np.eye(len(y))
     return mvn.logpdf(y, m, S)
예제 #55
0
print(mu)

var1 = variance(ip_file, "CS Score (USNews)")
var2 = variance(ip_file, "Research Overhead %")
var3 = variance(ip_file, "Admin Base Pay$")
var4 = variance(ip_file, "Tuition(out-state)$")

sigma1 = std(ip_file, "CS Score (USNews)")
sigma2 = std(ip_file, "Research Overhead %")
sigma3 = std(ip_file, "Admin Base Pay$")
sigma4 = std(ip_file, "Tuition(out-state)$")

df = ip_file.iloc[0:49, 2:6]
cov_mat = df.cov().round(3)
print(cov_mat)
print(df.corr().round(3))  #do correlation using numpy

#log likelihood independent variable

X = 0
for i in range(0, 49):
    X += (multivariate_normal.logpdf(df.iloc[i, :],
                                     mu,
                                     cov_mat,
                                     allow_singular='True'))
print(X)

#model = BayesianNetwork.from_samples(df, algorithm='exact')

mpl.plot()
예제 #56
0
def main():

    ## speed:Make vectors of length iter for each round so we can store all the estimates from each iteration and
    ## take summary statistics at the end
    true_L = []
    true_U = []
    true_MLE = []
    L_vec = []
    U_vec = []
    L_vec_noise = []
    U_vec_noise = []
    L_vec_g_noise = []
    U_vec_g_noise = []
    MLE_hat = []
    MLE_var = []
    a_store = []
    b_store = []
    a_reparam_store = []
    a_var_store = []
    b_var_store = []
    a_reparam_var_store = []

    ## These will be updated additively, could have stored as a vector and taken mean, but no need
    SD_g = 0
    bias_g = 0

    ### Begin Simulation ###

    coverage_iter_number = 2  # number of simulations I will use to assess the coverage
    for k in range(coverage_iter_number):
        ## Here I have to reset the seed because, if not, the seed reset in my optimization will mess up
        ## my data
        np.random.seed(k)

        ### Create data set for this iteration and true parameter values ###

        ## Simulation parameter
        sigma = 2 * np.array([[1, .5], [.5, 3]])
        mu = np.array([-5.1, 5.2])
        n = 20

        data_dict = data_generator(n, mu, sigma)  ## Generate data
        data_mean = data_dict['data_mean']
        true_MLE.append(data_dict['true_MLE'])
        true_L.append(data_dict['true_L'])  # storage
        true_U.append(data_dict['true_U'])  # storage

        mle_like = multivariate_normal.logpdf(data_mean,
                                              mean=data_mean,
                                              cov=sigma / n)

        ### Generate points 'below' the profile likelihood that I will use to estimate true profile likelihood ###

        ## Estimation parameters
        t_g = 10  # Allotted horizontal error in each point, the larger, the smaller the horizontal error
        sample = 20  # Number of points I will generate to estimate the profile likelihood

        estimation_points_dict = estimation_points(data_mean, sigma, n, sample,
                                                   t_g)

        likehood_sample = estimation_points_dict['likehood_sample']
        mu_hat_max = estimation_points_dict['mu_hat_max']
        y_star_max = max(likehood_sample)
        x_star_sd = estimation_points_dict['epsilon_sd']
        bias_g = bias_g + estimation_points_dict['sum_epsilon']
        SD_g = SD_g + x_star_sd  # storage

        ### Given my points, I get an estimate of the profile likelihood  ###

        ## Get initial quadratic guess
        ## I can alter this to get better initial estimates
        curvature = -5  # Inital estimate of curvature
        center = np.mean(
            mu_hat_max)  # Initial estimate of center of my quadratic
        height = mle_like  # My height is based on the likelihood of true mle which is known

        ## Get the corresponding values for a quadratic function
        a_init = curvature
        b_init = -2 * curvature * center
        c_reparam = height - y_star_max

        ## Find the optimized quadratice parameters, i.e. my PL estimate
        optimized_parameters = meta_model_optimization(a_init, b_init,
                                                       c_reparam, mu_hat_max,
                                                       likehood_sample,
                                                       x_star_sd, y_star_max,
                                                       10000, sample)

        a_reparam = optimized_parameters.x[0]
        a = -np.exp(a_reparam)
        b = optimized_parameters.x[1]
        #information_inv_est = optimized_parameters.hess_inv	## keep this positive since I minimized the negative log likelihood

        ### Storage of values and finding new cut offs for our profile likelihood ###

        MLE_hat.append(-b / (2 * a))  # Store estimate of MLE based on PL
        grad_mle = np.array(
            [-b / (2 * np.exp(a_reparam)), 1 / (2 * np.exp(a_reparam))])
        #cur_inv = information_inv_est		# get error estimates of parameters based on hessian
        #cur_MLE_var = grad_mle.dot(cur_inv).dot(grad_mle)	# get estimate of MLE variance
        #MLE_var.append(cur_MLE_var)	# store MLE variance estimate
        a_store.append(a)  # store curvaturue
        b_store.append(b)  # store b value in quadratice
        #a_reparam_store.append(a_reparam)
        #a_var_store.append(np.exp(-2*a_reparam)*cur_inv[0,0])	# variance in a estimate
        #b_var_store.append(cur_inv[1,1])	# variance in b estimate
        #a_reparam_var_store.append(cur_inv[0,0])		# variance in reparameterized a

        ## obtain new profile likelihood cutoff based on estimated PL
        new_cut_off = y_star_max - 1.92  # Tim double check this should be y_star_max vs mle_like

        L_vec_noise_cur = -np.sqrt(
            (new_cut_off - (c_reparam + y_star_max)) / a) - b / (
                2 * a)  # New estimated lower bound
        U_vec_noise_cur = np.sqrt((new_cut_off - (c_reparam + y_star_max)) /
                                  a) - b / (2 * a)  # New estimated upper bound

        L_vec_noise.append(L_vec_noise_cur)
        U_vec_noise.append(U_vec_noise_cur)

        # Add in plotting?

        ## Print the iteration
        print k

    ## write the valid file in tab delimited format
    print "Noisy Upper Bound"
    print U_vec_noise
    print "True Upper Bound"
    print true_U
    print "Noisy Lower Bound"
    print L_vec_noise
    print "True Lower Bound"
    print true_L
예제 #57
0
def log_target(X, b, v):
    Y = X
    Y[1] = X[1] - b * ((X[0]**2) - v)
    Y[0] = X[0] / np.sqrt(v)
    return multivariate_normal.logpdf(Y, np.zeros([2]), np.eye(2))
예제 #58
0
 def log_likelihood(self, X, Y, beta):
     return mvn.logpdf(Y,
                       np.zeros(len(X)),
                       nearestSPD(self.cov_matrix_(X, X, beta)),
                       allow_singular=True)
예제 #59
0
def log_emission_prob(X, mu, sigma2):
  # Add singleton dimension using None because log_multivariate_normal_density is written for
  # multiple samples, but we only need it for 1
  return multivariate_normal.logpdf(X, mean = mu, cov = sigma2)
예제 #60
0
            AxesStyle="Normal2",
            color="g")

    ############ ESTIMATE THEM ################
    theta1 = Gae.get_Gaussian_muSigma_ML(X1.T)
    print("mu1:")
    print(theta1[0])
    print("Sigma1")
    print(theta1[1])

    ############## Estimate Likelihood ###################
    ll = Gad.Gaussian_pdf_log(X1, [mu1, cov1])
    ll2 = []
    for i in range(ll.size):
        ll2.append(
            multivariate_normal.logpdf(X1[:, i], mean=mu1.flatten(), cov=cov1))
    ll2 = np.array(ll2).reshape(ll.shape)

    print("ll ours")
    print(ll.T)
    print("ll scipy")
    print(ll2.T)
    print("Difference in ll")
    print((ll - ll2).T)

    ###### Multiple clusters case
    ll_K = Gad.Gaussian_K_pdf_log(X1, [[mu1, cov1], [mu2, cov2]])

########################################################################################
#####################   Crossvalidate Using EM   ####################################
#########################################################################################