Exemple #1
0
def mc_test():
    import os
    import numpy as np
    from pyemu import MonteCarlo, Cov
    jco = os.path.join("pst","pest.jcb")
    pst = jco.replace(".jcb",".pst")

    out_dir = os.path.join("mc")
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    #write testing
    mc = MonteCarlo(jco=jco,verbose=True,sigma_range=6)
    cov = Cov.from_parameter_data(mc.pst,sigma_range=6)
    assert np.abs((mc.parcov.x - cov.x).sum()) == 0.0
    mc.draw(10,obs=True)
    mc.write_psts(os.path.join("temp","real_"))
    mc.parensemble.to_parfiles(os.path.join("mc","real_"))
    mc = MonteCarlo(jco=jco,verbose=True)
    mc.draw(10,obs=True)
    print("prior ensemble variance:",
          np.var(mc.parensemble.loc[:,"mult1"]))
    projected_en = mc.project_parensemble(inplace=False)
    print("projected ensemble variance:",
          np.var(projected_en.loc[:,"mult1"]))

    import pyemu
    sc = pyemu.Schur(jco=jco)

    mc = MonteCarlo(pst=pst,parcov=sc.posterior_parameter,verbose=True)
    mc.draw(10)
    print("posterior ensemble variance:",
          np.var(mc.parensemble.loc[:,"mult1"]))
Exemple #2
0
 def test_mat_output(self):
     samples = GMM1([.9999, .0001], [0.0, 1.0], [0.000001, 0.000001],
             rng=self.rng,
             size=[40, 20])
     assert samples.shape == (40, 20)
     assert -.001 < np.mean(samples) < .001, np.mean(samples)
     assert np.var(samples) < .0001, np.var(samples)
 def testPdfOfSampleMultiDims(self):
   student = student_t.StudentT(df=[7., 11.], loc=[[5.], [6.]], scale=3.)
   self.assertAllEqual([], student.event_shape)
   self.assertAllEqual([], self.evaluate(student.event_shape_tensor()))
   self.assertAllEqual([2, 2], student.batch_shape)
   self.assertAllEqual([2, 2], self.evaluate(student.batch_shape_tensor()))
   num = 50000
   samples = student.sample(num, seed=123456)
   pdfs = student.prob(samples)
   sample_vals, pdf_vals = self.evaluate([samples, pdfs])
   self.assertEqual(samples.get_shape(), (num, 2, 2))
   self.assertEqual(pdfs.get_shape(), (num, 2, 2))
   self.assertNear(5., np.mean(sample_vals[:, 0, :]), err=.03)
   self.assertNear(6., np.mean(sample_vals[:, 1, :]), err=.03)
   self._assertIntegral(sample_vals[:, 0, 0], pdf_vals[:, 0, 0], err=0.02)
   self._assertIntegral(sample_vals[:, 0, 1], pdf_vals[:, 0, 1], err=0.02)
   self._assertIntegral(sample_vals[:, 1, 0], pdf_vals[:, 1, 0], err=0.02)
   self._assertIntegral(sample_vals[:, 1, 1], pdf_vals[:, 1, 1], err=0.02)
   if not stats:
     return
   self.assertNear(
       stats.t.var(7., loc=0., scale=3.),  # loc d.n. effect var
       np.var(sample_vals[:, :, 0]),
       err=.4)
   self.assertNear(
       stats.t.var(11., loc=0., scale=3.),  # loc d.n. effect var
       np.var(sample_vals[:, :, 1]),
       err=.4)
Exemple #4
0
    def _call(self, dataset):
        """Computes featurewise scores."""

        attrdata = dataset.sa[self.__attr].value
        if np.issubdtype(attrdata.dtype, 'c'):
            raise ValueError("Correlation coefficent measure is not meaningful "
                             "for datasets with literal labels.")

        samples = dataset.samples
        pvalue_index = self.__pvalue
        result = np.empty((dataset.nfeatures,), dtype=float)

        for ifeature in xrange(dataset.nfeatures):
            samples_ = samples[:, ifeature]
            corr = pearsonr(samples_, attrdata)
            corrv = corr[pvalue_index]
            # Should be safe to assume 0 corr_coef (or 1 pvalue) if value
            # is actually NaN, although it might not be the case (covar of
            # 2 constants would be NaN although should be 1)
            if np.isnan(corrv):
                if np.var(samples_) == 0.0 and np.var(attrdata) == 0.0 \
                   and len(samples_):
                    # constant terms
                    corrv = 1.0 - pvalue_index
                else:
                    corrv = pvalue_index
            result[ifeature] = corrv

        return Dataset(result[np.newaxis])
Exemple #5
0
def average_data(data):
    """
    Find mean and std. deviation of data returned by ``simulate``.
    """
    numnodes = data['nodes']
    its = data['its']
    its_mean = numpy.average(its)
    its_std = math.sqrt(numpy.var(its))
    dead = data['dead']
    dead_mean = 100.0*numpy.average(dead)/numnodes
    dead_std = 100.0*math.sqrt(numpy.var(dead))/numnodes
    immune = data['immune']
    immune_mean = 100.0*numpy.average(immune)/numnodes
    immune_std = 100.0*math.sqrt(numpy.var(immune))/numnodes
    max_contam = data['max_contam']
    max_contam_mean = 100.0*numpy.average(max_contam)/numnodes
    max_contam_std = 100.0*math.sqrt(numpy.var(max_contam))/numnodes
    normal = data['normal']
    normal_mean = 100.0*numpy.average(normal)/numnodes
    normal_std = 100.0*math.sqrt(numpy.var(normal))/numnodes
    return {'its': (its_mean, its_std),
            'nodes': numnodes,
            'dead': (dead_mean, dead_std),
            'immune': (immune_mean, immune_std),
            'max_contam': (max_contam_mean, max_contam_std),
            'normal': (normal_mean, normal_std)}
def featArray(data):
    sh = n.shape(data)
    freqs = n.linspace(0,sh[1],sh[1])
    NNvar = n.zeros_like(data)
    dvar = n.var(n.abs(data))
    for i in range(sh[0]):
        for j in range(sh[1]):
            #samples = []
            #for p in range(100):
            k = n.random.randint(-1,1,size=1000)
            l = n.random.randint(-1,1,size=1000)
            #    try:
            #samples = n.abs(data[k+i,l+j])
            #    except:
            #        pass
            NNvar[i,j] = n.var(n.abs(data[k+i,l+j]))
    X1 = n.zeros((sh[0]*sh[1],3))
    X1[:,0] = (n.real(data)).reshape(sh[0]*sh[1])
    X1[:,1] = (n.imag(data)).reshape(sh[0]*sh[1])
    #X1[:,2] = (n.log10(n.abs(NNvar)) - n.median(n.log10(n.abs(NNvar)))).reshape(sh[0]*sh[1])
    NNvar = NNvar - n.median(NNvar)
    X1[:,2] = (n.log10(n.abs(NNvar))).reshape(sh[0]*sh[1])
    #X1[:,3] = (n.array([freqs]*sh[0])).reshape(sh[0]*sh[1])
    #X1[:,4] = (n.array([times]*sh[1])).reshape(sh[0]*sh[1])
    X1[n.abs(X1)>10**100] = 0
    for m in range(X1.shape[1]):
        X1[:,m] = X1[:,m]/n.abs(X1[:,m]).max()
    X1 = n.nan_to_num(X1)
    return X1
Exemple #7
0
def r2_score(y_true, y_pred, round_to=2):
    R"""R-squared for Bayesian regression models. Only valid for linear models.
    http://www.stat.columbia.edu/%7Egelman/research/unpublished/bayes_R2.pdf

    Parameters
    ----------
    y_true: : array-like of shape = (n_samples) or (n_samples, n_outputs)
        Ground truth (correct) target values.
    y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)
        Estimated target values.
    round_to : int
        Number of decimals used to round results (default 2).

    Returns
    -------
    `namedtuple` with the following elements:
    R2_median: median of the Bayesian R2
    R2_mean: mean of the Bayesian R2
    R2_std: standard deviation of the Bayesian R2
    """
    dimension = None
    if y_true.ndim > 1:
        dimension = 1

    var_y_est = np.var(y_pred, axis=dimension)
    var_e = np.var(y_true - y_pred, axis=dimension)

    r2 = var_y_est / (var_y_est + var_e)
    r2_median = np.around(np.median(r2), round_to)
    r2_mean = np.around(np.mean(r2), round_to)
    r2_std = np.around(np.std(r2), round_to)
    r2_r = namedtuple('r2_r', 'r2_median, r2_mean, r2_std')
    return r2_r(r2_median, r2_mean, r2_std)
def test_hash_functions():
    # Checks randomness of hash functions.
    # Variance and mean of each hash function (projection vector)
    # should be different from flattened array of hash functions.
    # If hash functions are not randomly built (seeded with
    # same value), variances and means of all functions are equal.
    n_samples = 12
    n_features = 2
    n_estimators = 5
    rng = np.random.RandomState(42)
    X = rng.rand(n_samples, n_features)

    lshf = ignore_warnings(LSHForest, category=DeprecationWarning)(
        n_estimators=n_estimators,
        random_state=rng.randint(0, np.iinfo(np.int32).max))
    ignore_warnings(lshf.fit)(X)

    hash_functions = []
    for i in range(n_estimators):
        hash_functions.append(lshf.hash_functions_[i].components_)

    for i in range(n_estimators):
        assert_not_equal(np.var(hash_functions),
                         np.var(lshf.hash_functions_[i].components_))

    for i in range(n_estimators):
        assert_not_equal(np.mean(hash_functions),
                         np.mean(lshf.hash_functions_[i].components_))
Exemple #9
0
    def test_bernoulli_extract(self):
        fit = self.fit
        extr = fit.extract(permuted=True)
        assert -7.4 < np.mean(extr['lp__']) < -7.0
        assert 0.1 < np.mean(extr['theta']) < 0.4
        assert 0.01 < np.var(extr['theta']) < 0.02

        # use __getitem__
        assert -7.4 < np.mean(fit['lp__']) < -7.0
        assert 0.1 < np.mean(fit['theta']) < 0.4
        assert 0.01 < np.var(fit['theta']) < 0.02

        # permuted=False
        extr = fit.extract(permuted=False)
        self.assertEqual(extr.shape, (1000, 4, 2))
        self.assertTrue(0.1 < np.mean(extr[:, 0, 0]) < 0.4)

        # permuted=True
        extr = fit.extract('lp__', permuted=True)
        assert -7.4 < np.mean(extr['lp__']) < -7.0
        extr = fit.extract('theta', permuted=True)
        assert 0.1 < np.mean(extr['theta']) < 0.4
        assert 0.01 < np.var(extr['theta']) < 0.02
        extr = fit.extract('theta', permuted=False)
        assert extr.shape == (1000, 4, 2)
        assert 0.1 < np.mean(extr[:, 0, 0]) < 0.4
def test_reductions():
    assert compute(t.a.sum(), b) == 6
    assert compute(t.a.min(), b) == 1
    assert compute(t.a.max(), b) == 3
    assert compute(t.a.mean(), b) == 2.0
    assert abs(compute(t.a.std(), b) - np.std([1, 2, 3])) < 1e-5
    assert abs(compute(t.a.var(), b) - np.var([1, 2, 3])) < 1e-5
    assert abs(compute(t.a.std(unbiased=True), b) - np.std([1, 2, 3],
                                                           ddof=1)) < 1e-5
    assert abs(compute(t.a.var(unbiased=True), b) - np.var([1, 2, 3],
                                                           ddof=1)) < 1e-5
    assert len(list(compute(t.distinct(), b))) == 3
    assert len(list(compute(t.a.distinct(), b))) == 3

    assert compute(t.a.nunique(), b) == 3
    assert isinstance(compute(t.a.nunique(), b), np.integer)

    assert compute(t.a.count(), b) == 3
    assert isinstance(compute(t.date.count(), b), np.integer)

    assert compute(t.date.nunique(), b) == 2
    assert isinstance(compute(t.date.nunique(), b), np.integer)

    assert compute(t.date.count(), b) == 2
    assert isinstance(compute(t.a.count(), b), np.integer)

    assert compute(t.a[0], b) == 1
    assert compute(t.a[-1], b) == 3
    assert compute(t[0], b) == compute(t[0], b)
    assert compute(t[-1], b) == compute(t[-1], b)
    def _get_likelihood(self, model):
        """Compute the marginal likelihood of the linear model with a g-prior on betas.

        Parameters
        ----------
        model : np.ndarray in R^ndim
            vector of variable inclusion indicators

        Returns
        -------
        float
            log marginal likelihood
        """

        X = self.X[:, model == 1]
        y = self.y
        nobs, ndim = X.shape
        design = np.hstack((np.ones((nobs, 1)), X))

        mle = np.linalg.solve(np.dot(design.T, design), np.dot(design.T, y))
        residuals = y - np.dot(design, mle)
        rsquared = 1 - np.var(residuals) / np.var(y)

        return (log_gamma((nobs - 1) / 2)
            - (nobs - 1) / 2 * np.log(np.pi)
            - 0.5 * np.log(nobs)
            - (nobs - 1) / 2 * np.log(np.dot(residuals, residuals))
            + (nobs - ndim - 1) / 2 * np.log(1 + self.par["penalty"])
            - (nobs - 1) / 2 * np.log(1 + self.par["penalty"] * (1 - rsquared)))
def welch_ttest (X, y):

    classes = np.unique(y)
    n_class = len(classes)
    n_feats = X.shape[1]

    b = np.zeros(n_feats)
    for i in np.arange(n_class):
        for j in np.arange(i+1, n_class):
            if j > i:
                xi = X[y == i, :]
                xj = X[y == j, :]
                yi = y[y == i]
                yj = y[y == j]

                mi = np.mean (xi, axis=0)
                mj = np.mean (xj, axis=0)

                vi = np.var  (xi, axis=0)
                vj = np.var  (xj, axis=0)

                n_subjsi = len(yi)
                n_subjsj = len(yj)

                t = (mi - mj) / np.sqrt((np.square(vi) / n_subjsi) + (np.square(vj) / n_subjsj))
                t[np.isnan(t)] = 0
                t[np.isinf(t)] = 0

                b = np.maximum(b, t)

    return b
def curv_fit(x=None, y=None, model=None):
    
    x = np.array(x)    
    y = np.array(y)    
    params = lmfit.Parameters()
    
    if model == 'gaussian':
        mod = lmfit.models.GaussianModel()
        params = mod.guess(y, x=x)
        out = mod.fit(y,params, x=x)
        r_sq = 1 - out.residual.var()/np.var(y)
        
    elif model == '4PL':
        mod = lmfit.Model(logistic_4p)
        params.add('la', value=1.0)
        params.add('gr', value=120.0, vary=False)
        params.add('ce', value=150.0)
        params.add('ua', value=3.0)
        out = mod.fit(y, params,x=x)
        r_sq = 1 - out.residual.var()/np.var(y)
        
    elif model == '5PL':
        mod = lmfit.Model(logistic_5p)
        params.add('la', value=1.0)
        params.add('gr', value=1.0)
        params.add('ce', value=1.0)
        params.add('ua', value=1.0)
        params.add('sy', value=1.0)
        out = mod.fit(y, params, x=x)
        r_sq = 1 - out.residual.var()/np.var(y)
    
    out.R_sq = r_sq
    return out
Exemple #14
0
def explained_variance_score(y_true, y_pred):
    """Explained variance regression score function

    Best possible score is 1.0, lower values are worse.

    Note: the explained variance is not a symmetric function.

    return the explained variance

    Parameters
    ----------
    y_true : array-like

    y_pred : array-like

    """
    y_true, y_pred = check_arrays(y_true, y_pred)
    numerator = np.var(y_true - y_pred)
    denominator = np.var(y_true)
    if denominator == 0.0:
        if numerator == 0.0:
            return 1.0
        else:
            # arbitary set to zero to avoid -inf scores, having a constant
            # y_true is not interesting for scoring a regression anyway
            return 0.0
    return 1 - numerator / denominator
def bhattacharyya_dist (X, y):

    classes = np.unique(y)
    n_class = len(classes)
    n_feats = X.shape[1]

    b = np.zeros(n_feats)
    for i in np.arange(n_class):
        for j in np.arange(i+1, n_class):
            if j > i:
                xi = X[y == i, :]
                xj = X[y == j, :]

                mi = np.mean (xi, axis=0)
                mj = np.mean (xj, axis=0)

                vi = np.var  (xi, axis=0)
                vj = np.var  (xj, axis=0)

                si = np.std  (xi, axis=0)
                sj = np.std  (xj, axis=0)

                d  = 0.25 * (np.square(mi - mj) / (vi + vj)) + 0.5  * (np.log((vi + vj) / (2*si*sj)))
                d[np.isnan(d)] = 0
                d[np.isinf(d)] = 0

                b = np.maximum(b, d)

    return b
    def indivConfInter(self, data):
        if type(data) is float:
            med = numpy.median(data)
            mean = numpy.mean(data)
            stdDev = math.sqrt(numpy.var(data))
            #confidence interval
            ci95low = mean - 10 * (1.96 * stdDev)
            ci95up = mean + 10 * (1.96 * stdDev)

            #confidence level
            cl95low = med - (1.96 * stdDev)
            cl95up = med + (1.96 * stdDev)

            return [med, mean, ci95low, ci95up, cl95low, cl95up]
        elif len(data) > 0:
            med = numpy.median(data)
            mean = numpy.mean(data)
            stdDev = math.sqrt(numpy.var(data))
            ci95low = mean - 10 * (1.96 * (stdDev / math.sqrt(len(data))))
            ci95up = mean + 10 * (1.96 * (stdDev / math.sqrt(len(data))))

            cl95low = med - (1.96 * (stdDev / math.sqrt(len(data))))
            cl95up = med + (1.96 * (stdDev / math.sqrt(len(data))))

            return [med, mean, ci95low, ci95up, cl95low, cl95up]
        else:
            return [None, None, None, None, None, None]
Exemple #17
0
def calc_twosample_ts(propGroup1, propGroup2):
    n1 = len(propGroup1[0])
    n2 = len(propGroup2[0])
    numFeatures = len(propGroup1)

    T_statistics = []
    effectSizes = []
    notes = []
    for r in xrange(0, numFeatures):
        meanG1 = float(sum(propGroup1[r])) / n1
        varG1 = var(propGroup1[r], ddof=1)
        stdErrG1 = varG1 / n1

        meanG2 = float(sum(propGroup2[r])) / n2
        varG2 = var(propGroup2[r], ddof=1)
        stdErrG2 = varG2 / n2

        dp = meanG1 - meanG2
        effectSizes.append(dp * 100)

        denom = math.sqrt(stdErrG1 + stdErrG2)

        if denom == 0:
            notes.append("degenerate case: zero variance for both groups; variance set to 1e-6.")
            T_statistics.append(dp / 1e-6)
        else:
            notes.append("")
            T_statistics.append(dp / denom)

    return T_statistics, effectSizes, notes
Exemple #18
0
	def drawPlot(m):
		print("Poly degree is %d" % m)
		A = getAMatrix(x,m,sigma)
		B = getBColumn(x,y,m,sigma)
		#print("A matrix is")
		#print(A)
		#print("B column is")
		#print(B)
		c,v = solveKramer(A, B)
		polyCurve = np.poly1d(c)
		ty = polyCurve(x) #teoretic y
		print("calculated coeficients are:")
		print(c)
		print("coef variance:")
		print(v)
		#polyfit throws this error sometimes: when rank(A)< m(equiv A cannot be inversed and there is no unique solution)?
		try:
			polyC, polyV = np.polyfit(x,y,m, full=False, cov=True)
			print("coef from polyfit are:")
			print(polyC)
			print("covariance matrix from polyfit are:")
			print(polyV)
		except ValueError as err:
			print("Error in numpy.polyfit " )
			print(err)
		print("-------------------------------------------------------------------")
		print("goodness=R**2=%4.3f"% (1  - np.var(np.subtract(ty,y)) / np.var(y)) )
		#corrected division by 0 error when m==n
		print("avg unc=sigma**2=%4.3f"% ((1.0 / ((n-m) if n!=m  else 1) ) *  np.sum(np.power(np.subtract(ty,y),2))) )
		l.set_ydata(ty)
		ax.relim()
		ax.autoscale_view(True,True,True)
		plt.draw()
Exemple #19
0
def fig_spesen(spe, sen, fname='fig_model.png', leg=False):
    """Plot the specificity for the early, middle and end section"""
    fig, ax = plt.subplots(figsize=(3, 3))
    fa_rate = 1 - np.mean(spe, axis=0)
    fa_err = np.var(spe, axis=0)
    hits_rate = np.mean(sen, axis=0)
    hits_err = np.var(sen, axis=0)
    ax.plot(np.arange(1, 4), hits_rate, color='#47bb3a', linewidth=1)
    ax.plot(np.arange(1, 4), fa_rate, color='#ec1d2a', linewidth=1)
    width = 0.5
    ax.bar(np.arange(1, 4) - width/2., hits_rate,
           width, yerr=hits_err, color='#a7db60', label='HIT rate',
           error_kw=dict(ecolor='black', lw=1, capsize=2.5, capthick=1))

    ax.bar(np.arange(1, 4) - width/2., fa_rate,
           width, yerr=fa_err, color='#f2507b', label='FA rate',
           error_kw=dict(ecolor='black', lw=1, capsize=2.5, capthick=1))
    plt.xlim(0, 4)
    plt.ylim(0, 0.9)
    plt.ylabel("Response Rate")
    plt.xlabel(r'Session Start $\rightarrow$ Session End')
    ax.set_xticks(range(1, 4))
    ax.set_xticklabels(["Initial", "Middle", "Final"],  fontsize=10)
    colors = ("red", "orange", "green")
    [t.set_color(colors[i]) for i, t in enumerate(plt.gca().get_xticklabels())]
    adjust_spines(ax, ["bottom", "left"])
    if leg:
        ax.legend(fontsize=10)
    plt.tight_layout()
    plt.savefig(fname)
def test_pairwise_distances_data_derived_params(n_jobs, metric, dist_function,
                                                y_is_x):
    # check that pairwise_distances give the same result in sequential and
    # parallel, when metric has data-derived parameters.
    with config_context(working_memory=1):  # to have more than 1 chunk
        rng = np.random.RandomState(0)
        X = rng.random_sample((1000, 10))

        if y_is_x:
            Y = X
            expected_dist_default_params = squareform(pdist(X, metric=metric))
            if metric == "seuclidean":
                params = {'V': np.var(X, axis=0, ddof=1)}
            else:
                params = {'VI': np.linalg.inv(np.cov(X.T)).T}
        else:
            Y = rng.random_sample((1000, 10))
            expected_dist_default_params = cdist(X, Y, metric=metric)
            if metric == "seuclidean":
                params = {'V': np.var(np.vstack([X, Y]), axis=0, ddof=1)}
            else:
                params = {'VI': np.linalg.inv(np.cov(np.vstack([X, Y]).T)).T}

        expected_dist_explicit_params = cdist(X, Y, metric=metric, **params)
        dist = np.vstack(tuple(dist_function(X, Y,
                                             metric=metric, n_jobs=n_jobs)))

        assert_allclose(dist, expected_dist_explicit_params)
        assert_allclose(dist, expected_dist_default_params)
Exemple #21
0
def statprint(host_per_pg, pg_per_host):
    val = pg_per_host.values()  # sets val to a list of the values in pg_per_host
    mean = numpy.mean(val)
    maxvalue = numpy.amax(val)
    minvalue = numpy.amin(val)
    std = numpy.std(val)
    median = numpy.median(val)
    variance = numpy.var(val)
    print("for placement groups on hosts: ")
    print( "the mean is: ", mean)
    print( "the max value is: ", maxvalue)
    print( "the min value is: ", minvalue)
    print( "the standard deviation is: ", std)
    print( "the median is: ", median)
    print( "the variance is: ", variance)
    # prints statements for stats
    host_mean = numpy.mean(host_per_pg)
    host_max = numpy.amax(host_per_pg)
    host_min = numpy.amin(host_per_pg)
    host_std = numpy.std(host_per_pg)
    host_median = numpy.median(host_per_pg)
    host_variance = numpy.var(host_per_pg)
    # these are the variables for hosts/pgs
    print("hosts per placement group: ")
    print("the mean is: ", host_mean)
    print("the max value is: ", host_max)
    print("the min value is: ", host_min)
    print("the standard deviation is: ", host_std)
    print("the median is: ", host_median)
    print("the variance is: ", host_variance)
Exemple #22
0
def XDapogee(options,args):
    #First load the chains
    savefile= open(args[0],'rb')
    thesesamples= pickle.load(savefile)
    savefile.close()
    vcs= numpy.array([s[0] for s in thesesamples])*_APOGEEREFV0/_REFV0
    dvcdrs= numpy.array([s[6] for s in thesesamples])*30. #To be consistent with this project's dlnvcdlnr 
    print numpy.mean(vcs)
    print numpy.mean(dvcdrs)
    #Now fit XD to the 2D PDFs
    ydata= numpy.zeros((len(vcs),2))
    ycovar= numpy.zeros((len(vcs),2))
    ydata[:,0]= numpy.log(vcs)
    ydata[:,1]= dvcdrs
    vcxamp= numpy.ones(options.g)/options.g
    vcxmean= numpy.zeros((options.g,2))
    vcxcovar= numpy.zeros((options.g,2,2))
    for ii in range(options.g):
        vcxmean[ii,:]= numpy.mean(ydata,axis=0)+numpy.std(ydata,axis=0)*numpy.random.normal(size=(2))/4.
        vcxcovar[ii,0,0]= numpy.var(ydata[:,0])
        vcxcovar[ii,1,1]= numpy.var(ydata[:,1])
    extreme_deconvolution.extreme_deconvolution(ydata,ycovar,
                                                vcxamp,vcxmean,vcxcovar)
    save_pickles(options.plotfile,
                 vcxamp,vcxmean,vcxcovar)
    print vcxamp
    print vcxmean[:,0]
    print vcxmean[:,1]
    return None
def AsianCallSimPrice(S0, K, T, r, sigma, M, I, CV=False):
    dt = T / M
    S = np.zeros((M + 1, I))
    z = np.random.standard_normal((M + 1, I))  # pseudorandom numbers
    Savg = np.zeros(I)
    S[0] = S0
    S = S0 * np.exp(np.cumsum((r - 0.5 * sigma ** 2) * dt + sigma * np.sqrt(dt) * z, axis=0))
    Savg = np.average(S, axis=0)
    if CV == False:
        price = np.exp(-r * T) * np.sum(np.maximum(Savg - K, 0)) / I
        error = math.sqrt(np.var(np.maximum(Savg - K, 0))) / math.sqrt(I)
        result = (price, error)
    else:
        Tvector = np.arange(dt, T + dt, dt)
        T_avg = Tvector.mean()
        i_vector = np.arange(1, 2 * M + 1, 2)
        sigma_avg = math.sqrt(sigma ** 2 / (M ** 2 * T_avg) * np.dot(i_vector, Tvector[::-1]))
        delta = 0.5 * (sigma ** 2 - sigma_avg ** 2)
        d = (math.log(S0 / K) + (r - delta + 0.5 * sigma_avg ** 2) * T_avg) / (sigma_avg * math.sqrt(T_avg))
        GeomAsianCall = np.exp(-delta * T_avg) * S0 * scipy.stats.norm.cdf(d) - np.exp(
            -r * T_avg
        ) * K * scipy.stats.norm.cdf(d - sigma_avg * math.sqrt(T_avg))
        S_CV = scipy.stats.mstats.gmean(S, axis=0)
        X = np.exp(-r * T) * np.maximum(S_CV - K, 0)
        Y = np.exp(-r * T) * np.maximum(Savg - K, 0)
        b = np.cov(X, Y)[0][1] / X.var()
        price = Y.mean() - b * (X.mean() - GeomAsianCall)
        error = math.sqrt(np.var(Y - b * X)) / math.sqrt(I)
        rho = np.corrcoef(X, Y)[0][1]
        result = (price, error, rho)
    return result
Exemple #24
0
def log_evidence(X, y, g):
    """Compute the model's log evidence (a.k.a. marginal likelihood).

    Parameters
    ----------
    X : np.ndarray in R^(nobs x ndim)
        feature matrix
    y : np.ndarray in R^nobs
        target vector
    g : float (0, inf)
        dimensionality penalty

    Returns
    -------
    float
        log evidence
    """

    n, d = X.shape
    X_int = np.hstack((np.ones((n, 1)), X))

    mle = np.linalg.solve(np.dot(X_int.T, X_int), np.dot(X_int.T, y))
    resid = y - np.dot(X_int, mle)
    rsq = (d > 0 and 1 - np.var(resid) / np.var(y)) or 0

    return (log_gamma((n - 1) / 2)
        - (n - 1) / 2 * np.log(np.pi)
        - 0.5 * np.log(n)
        - (n - 1) / 2 * np.log(np.dot(resid, resid))
        + (n - d - 1) / 2 * np.log(1 + 1 / g)
        - (n - 1) / 2 * np.log(1 + 1 / g * (1 - rsq)))
def main():
    images, labels = load_labeled_training(flatten=True)
    images = standardize(images)
    unl = load_unlabeled_training(flatten=True)
    unl = standardize(unl)
    test = load_public_test(flatten=True)
    test = standardize(test)
    shuffle_in_unison(images, labels)
    #d = DictionaryLearning().fit(images)
    d = MiniBatchDictionaryLearning(n_components=500, n_iter=500, verbose=True).fit(images)
    s = SparseCoder(d.components_)
    proj_test = s.transform(images)
    pt = s.transform(test)
    #kpca = KernelPCA(kernel="rbf")
    #kpca.fit(unl)
    #test_proj = kpca.transform(images)
    #pt = kpca.transform(test)
    #spca = SparsePCA().fit(unl)
    #test_proj = spca.transform(images)
    #pt = spca.transform(test)
    svc = SVC()
    scores = cross_validation.cross_val_score(svc, proj_test, labels, cv=10)
    print scores
    print np.mean(scores)
    print np.var(scores)
    svc.fit(proj_test, labels)
    pred = svc.predict(pt)
    write_results(pred, '../svm_res.csv')
Exemple #26
0
	def findvdisp3(self,r,v,mags,r200,maxv):
		"use red sequence to find members"
		binedge = np.arange(0,r200+1,0.3)
		rin = r
		vin = v
		colin = mags.T[1] - mags.T[2]
		avg_c = np.average(colin)
		vfinal = np.array([])
		for i in range(binedge.size-1):
			i += 1
			x = rin[np.where((rin>binedge[i-1]) & (rin<binedge[i]))]
			y = vin[np.where((rin>binedge[i-1]) & (rin<binedge[i]))]
			c = colin[np.where((rin>binedge[i-1]) & (rin<binedge[i]))]
			for k in range(6):
				y2 = y
				x2 = x
				c2 = c
				stv = 3.5 * np.std(y2)
				y = y2[np.where((y2 > -stv) & (y2 < stv) | ((c2<avg_c+0.04) & (c2>avg_c-0.04)))]
				x = x2[np.where((y2 > -stv) & (y2 < stv) | ((c2<avg_c+0.04) & (c2>avg_c-0.04)))]
				c = c2[np.where((y2 > -stv) & (y2 < stv) | ((c2<avg_c+0.04) & (c2>avg_c-0.04)))]
			vstd2 = np.std(y)
			vvar2 = np.var(y)
			print 'standard dev of zone %i = %f' % (i,vstd2)
			vfinal = np.append(y[np.where((y<vvar2) & (y>-vvar2))],vfinal)
		return np.var(vfinal)
def classify_2d(data_a, data_b, x):
    x1 = x[0]
    x2 = x[1]

    probability_a = data_a.shape[1] / (data_a.shape[1] + data_b.shape[1])
    probability_b = data_b.shape[1] / (data_a.shape[1] + data_b.shape[1])

    mean_x1_a = np.mean(data_a[0,:])
    mean_x2_a = np.mean(data_a[1,:])

    mean_x1_b = np.mean(data_b[0,:])
    mean_x2_b = np.mean(data_b[1,:])

    variance_x1_a = np.var(data_a[0,:])
    variance_x2_a = np.var(data_a[1,:])

    variance_x1_b = np.var(data_b[0,:])
    variance_x2_b = np.var(data_b[1,:])

    pd_x1_given_a = mlab.normpdf(x1, mean_x1_a, variance_x1_a)
    pd_x2_given_a = mlab.normpdf(x2, mean_x2_a, variance_x2_a)
    pd_x1_given_b = mlab.normpdf(x1, mean_x1_b, variance_x1_b)
    pd_x2_given_b = mlab.normpdf(x2, mean_x2_b, variance_x2_b)

    posterior_numerator_a = probability_a * pd_x1_given_a * pd_x2_given_a
    posterior_numerator_b = probability_b * pd_x1_given_b * pd_x2_given_b

    posterior_numerators = { 'A': posterior_numerator_a, 'B': posterior_numerator_b }

    return max(posterior_numerators.iterkeys(), key=(lambda k: posterior_numerators[k]))
Exemple #28
0
def calc_com(mask):
    pts = index_to_zyx( mask )

    z = pts[0,:].astype(float).mean()
    # Correct Center of Mass for reentrant domain
    y1 = pts[1,:].astype(float)
    x1 = pts[2,:].astype(float)
    y2 = (y1 < ny/2.)*y1 + (y1>= ny/2.)*(y1 - ny)
    x2 = (x1 < nx/2.)*x1 + (x1>= nx/2.)*(x1 - nx)
    y1m = y1.mean()
    y2m = y2.mean()
    x1m = x1.mean()
    x2m = x2.mean()
    
    if numpy.var(y2 - y2m) > numpy.var(y1 - y1m):
        y = y1m
    else:
        y = (y2m + .5)%ny - .5
        
    if numpy.var(x2 - x2m) > numpy.var(x1 - x1m):
        x = x1m
    else:
        x = (x2m + .5)%nx - .5
        
    return numpy.array((z, y, x))
    def _tTest(x, y, exclude=95):
        """Compute a one-sided Welsh t-statistic."""
        with np.errstate(all="ignore"):
            def cappedSlog(v):
                q = np.percentile(v, exclude)
                v2 = v.copy()
                v2 = v2[~np.isnan(v2)]
                v2[v2 > q] = q
                v2[v2 <= 0] = 1. / (75 + 1)
                return np.log(v2)
            x1 = cappedSlog(x)
            x2 = cappedSlog(y)
            sx1 = np.var(x1) / len(x1)
            sx2 = np.var(x2) / len(x2)
            totalSE = np.sqrt(sx1 + sx2)
            if totalSE == 0:
                stat = 0
            else:
                stat = (np.mean(x1) - np.mean(x2)) / totalSE

            #df   = (sx1 + sx2)**2 / (sx1**2/(len(x1)-1) + sx2**2/(len(x2) - 1))
            #pval = 1 - scidist.t.cdf(stat, df)

            # Scipy's t distribution CDF implementaton has inadequate
            # precision.  We have switched to the normal distribution for
            # better behaved p values.
            pval = 0.5 * erfc(stat / sqrt(2))

            return {'testStatistic': stat, 'pvalue': pval}
Exemple #30
0
def calc_error(data):
    """
    Error estimation for time series of simulation observables and take into
    account that these series are to some kind degree correlated (which
    enhances the estimated statistical error).
    """
    # calculate the normalized autocorrelation function of data
    acf = autocorrelation(data)
    # calculate the integrated correlation time tau_int
    # (Janke, Wolfhard. "Statistical analysis of simulations: Data correlations
    # and error estimation." Quantum Simulations of Complex Many-Body Systems:
    # From Theory to Algorithms 10 (2002): 423-445.)
    tau_int = 0.5
    for i in range(len(acf)):
        tau_int += acf[i]
        if ( i >= 6 * tau_int ):
            break
    # mean value of the time series
    data_mean = np.mean(data)
    # calculate the so called effective length of the time series N_eff
    if (tau_int > 0.5):
        N_eff = len(data) / (2.0 * tau_int)
        # finally the error is sqrt(var(data)/N_eff)
        stat_err = np.sqrt(np.var(data) / N_eff)
    else:
        stat_err = np.sqrt(np.var(data) / len(data))
    return data_mean, stat_err
                     sys.getsizeof(graph_data[1].Ri) + 
                     sys.getsizeof(graph_data[1].Ro) + 
                     sys.getsizeof(graph_data[1].y) + 
                     sys.getsizeof(graph_data[1].a))/10.**6)

        n_nodes, n_edges = get_shape(graph_data)
        node_counts.append(n_nodes)
        edge_counts.append(n_edges)
        
        truth_eff = get_truth_efficiency(i, graph_data, truth_table)
        seg_eff = get_segment_efficiency(graph_data)
        truth_effs.append(truth_eff)
        seg_effs.append(seg_eff)

        
    avg_seg_eff   = [np.mean(seg_effs), np.sqrt(np.var(seg_effs))]
    avg_truth_eff = [np.mean(truth_effs), np.sqrt(np.var(truth_effs))]
    
    avg_nodes     = [np.mean(node_counts), np.sqrt(np.var(node_counts))]
    avg_edges     = [np.mean(edge_counts), np.sqrt(np.var(edge_counts))]
    avg_size      = [np.mean(sizes), np.sqrt(np.var(sizes))]

    # print out a brief report of the measurements
    data_tag = " ***** pt=" + pt_cuts[i] + " data ***** "
    print("{0}\n \t seg_eff: {1} +/- {2} \n \t truth_eff: {3} +/- {4}"
          .format(data_tag, np.round(avg_seg_eff[0], decimals=3),
                  np.round(avg_seg_eff[1], decimals=3),
                  np.round(avg_truth_eff[0], decimals=3),
                  np.round(avg_truth_eff[1], decimals=3)))
    print("\t nodes: {0} +/- {1} \n \t edges: {2} +/- {3}"
          .format(np.round(avg_nodes[0], decimals=3),
    frame = pyfits.getdata(dir+filename_sub+str(files[i])+'.fits')
    cds = fluxdir*(frame-f9)

    #Define reference pixels
    ref = [0,1,2,3, len(cds)-1, len(cds)-2, len(cds)-3, len(cds)-4]
    #subtract median of reference pixels
    for j in range((cds.shape)[1]):
        cds[ : , j] -= np.median(cds[ref, j])

    for j in range(n_crops):
        crop  = cds[center+yshifts[j] : center+yshifts[j]+70 , 
                    center+1+(j-n_crops/2)*64 : center-1+(j-n_crops/2)*64 +64 ]
    
        #crosstalk correction increases the variance but does not affect the signal
        mask = masks[j*2:j*2+2]
        noisearr[i , j] = np.var(crop[mask], ddof=1) * correction
        fluxarr[i , j] = np.mean(crop[mask])

        #verify masking is being reasonable
        if i == len(files)-1:
            myrange = [np.median(crop)-10.*np.std(crop[mask]) , 
                       np.median(crop)+10.*np.std(crop[mask])]
            junk = plt.hist(crop.flatten(), bins=50, range=myrange)
            
            plt.title("Pixel Brightness Histogram, CDS "+str(max(files))+'-09')
            plt.xlabel('ADU')
            plt.ylabel('N_Occurences')
            plt.vlines( (np.min(crop[mask]) , np.max(crop[mask])) , 
                        0.1, 1.2*np.max(junk[0]), colors='r')
            plt.show()
Exemple #33
0
        if j > 11 and j < (length - 1):
            data = line.split(",")
            #print ("J= ", j)

            interface = data[2]
            #print ("interafce= ", interface)
            inter = interface.split("/")
            #print (inter[0], inter[1], inter[2])

            if data[3] == "6\n" and inter[2] == str(i):
                #print (data[0], data[1], data[2], data[3])
                portno = int(data[1])
                #print ("port no type = ", type(portno))
                #print ("port no = ", portno)
                port.insert(k, portno)
                k = k + 1

    portlen = len(port)
    port.sort()

    if portlen > 0:
        max = port[portlen - 1]
        min = port[0]
        avg = math.ceil(numpy.mean(port))
        var = math.ceil(numpy.var(port))
        std = math.ceil(numpy.std(port))

        print("### interface= G1/0/", i, "tcp port numbers= ", portlen,
              "gap= ", max - min, "max= ", max, "min =", min, "avg= ", avg,
              "var= ", var, "std= ", std, "tcp ports are= ", port)
Exemple #34
0
min_res = min(res)

inversas = []

for value in res:
    k = int(math.floor((value - delta) / w))  # Este es mi indice de bin
    #interpola los valores de la F^-1(x)
    inv = ((value - (delta + k * w)) / w) * (x[k + 1] - x[k]) + x[k]
    #inv = np.interp(value,[delta+k*w, delta+(k+1)*w],[x[k],x[k+1]])
    inversas.append(inv)
#print inversas

max_inv = max(inversas)
min_inv = min(inversas)
#h = [0] * m
#for value in inversas:
#    k = int(math.floor((value+gamma)/q))
#    h[k] = h[k]+1
#print sum(h)
#p, bins, patches = plt.hist(inversas, m , density=True, facecolor='g', alpha=0.75)
inversas_2 = []
for i in inversas:
    inversas_2.append(round(i, 2))

print "Media Calculada: " + str(np.mean(inversas_2)) + " vs " + "0"
print "Desvio Calculado: " + str(np.var(inversas_2)) + " vs " + "1"
print "Moda Calculada: " + str(stats.mode(inversas_2)[0]) + " vs " + "0"

plt.hist(inversas_2, 50)
plt.show()
Exemple #35
0
print(np.mean(a2, axis = 0))
print(np.mean(a2, axis = 1))
print()

# std() : 표준 편차 계산
print('std() : 표준 편차 계산')
print(a2)
print(np.std(a2))
print(np.std(a2, axis = 0))
print(np.std(a2, axis = 1))
print()

# var() : 분산 계산
print('var() : 분산 계산')
print(a2)
print(np.var(a2))
print(np.var(a2, axis = 0))
print(np.var(a2, axis = 1))
print()

# min() : 최소값
print('min() : 최소값')
print(a2)
print(np.min(a2))
print(np.min(a2, axis = 0))
print(np.min(a2, axis = 1))
print()

# max() : 최대값
print('max() : 최대값')
print(a2)
Y = np.array([f(V2X(V[i,:])) for i in range(ned)])

# reformat if necessary
if np.size(np.shape(Y))==1:
    Y = np.expand_dims(Y, axis=1)

# build surrogates
# fpce,_   = MM.build_fpce(V, Y, ppce)
fpce     = MM.build_spce(V, Y, Q, ppce, q)
flra,_,_ = MM.build_cp(V, Y, R, plra)

# compute generalization error (MC estimate)
Ypce  = np.array([fpce(Ve[i,:]) for i in range(nmc)]).flatten()
Ylra  = np.array([flra(Ve[i,:]) for i in range(nmc)]).flatten()

err_pce = np.mean((Ye -  Ypce)**2)/np.var(Ye)
err_lra = np.mean((Ye -  Ylra)**2)/np.var(Ye)

print "\nGeneralization Error Comparison\n\n"\
        "PCE: g_e = {0}\n"\
        "LRA: g_e = {1}".format(err_pce,err_lra)

# ---------------------------------------------------------------------------
# density plots
# ---------------------------------------------------------------------------

de   = ss.kde.gaussian_kde(np.abs(Ye))
dpce = ss.kde.gaussian_kde(np.abs(Ypce))
dlra = ss.kde.gaussian_kde(np.abs(Ylra))
y    = np.linspace(min(np.abs(Ye)),max(np.abs(Ye)),100)
Exemple #37
0
 def var(x):
     return np.var(x, ddof=1)
Exemple #38
0
    def processData(self):
        self.NcascStart = self.par_obj.NcascStart
        self.NcascEnd = self.par_obj.NcascEnd
        self.Nsub = self.par_obj.Nsub
        self.winInt = self.par_obj.winInt

        #self.subChanArr, self.trueTimeArr, self.dTimeArr,self.resolution = pt3import(self.filepath)
        if self.ext == 'pt2':
            self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = pt2import(
                self.filepath)
        if self.ext == 'pt3':
            self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = pt3import(
                self.filepath)
        if self.ext == 'csv':
            self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = csvimport(
                self.filepath)
        if self.ext == 'spc':
            self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = spc_file_import(
                self.filepath)
        if self.ext == 'asc':
            self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = asc_file_import(
                self.filepath)

        self.subArrayGeneration(self.xmin, self.xmax)

        self.dTimeMin = self.parentId.dTimeMin
        self.dTimeMax = self.parentId.dTimeMax
        self.subDTimeMin = self.dTimeMin
        self.subDTimeMax = self.dTimeMax

        #Time series of photon counts. For visualisation.
        self.timeSeries1, self.timeSeriesScale1 = delayTime2bin(
            np.array(self.trueTimeArr) / 1000000, np.array(self.subChanArr),
            self.ch_present[0], self.photonCountBin)

        unit = self.timeSeriesScale1[-1] / self.timeSeriesScale1.__len__()
        self.kcount_CH1 = np.average(self.timeSeries1)

        raw_count = np.average(
            self.timeSeries1
        )  #This is the unnormalised intensity count for int_time duration (the first moment)
        var_count = np.var(self.timeSeries1)

        self.brightnessNandBCH0 = (((var_count - raw_count) / (raw_count)) /
                                   (float(unit)))
        if (var_count - raw_count) == 0:
            self.numberNandBCH0 = 0
        else:
            self.numberNandBCH0 = (raw_count**2 / (var_count - raw_count))

        if self.numOfCH == 2:

            self.timeSeries2, self.timeSeriesScale2 = delayTime2bin(
                np.array(self.trueTimeArr) / 1000000,
                np.array(self.subChanArr), self.ch_present[1],
                self.photonCountBin)
            unit = self.timeSeriesScale2[-1] / self.timeSeriesScale2.__len__()
            self.kcount_CH2 = np.average(self.timeSeries2)
            raw_count = np.average(
                self.timeSeries2
            )  #This is the unnormalised intensity count for int_time duration (the first moment)
            var_count = np.var(self.timeSeries2)
            self.brightnessNandBCH1 = (((var_count - raw_count) /
                                        (raw_count)) / (float(unit)))
            if (var_count - raw_count) == 0:
                self.numberNandBCH1 = 0
            else:
                self.numberNandBCH1 = (raw_count**2 / (var_count - raw_count))

            self.CV = calc_coincidence_value(self)

        #Adds names to the fit function for later fitting.
        if self.objId1 == None:
            corrObj = corrObject(self.filepath, self.fit_obj)
            self.objId1 = corrObj.objId
            self.objId1.parent_name = 'pt FCS tgated -tg0: ' + str(
                np.round(self.xmin, 0)) + ' -tg1: ' + str(
                    np.round(self.xmax, 0))
            self.objId1.parent_uqid = 'pt FCS tgated -tg0: ' + str(
                np.round(self.xmin, 0)) + ' -tg1: ' + str(
                    np.round(self.xmax, 0))
            self.fit_obj.objIdArr.append(corrObj.objId)
            self.objId1.param = copy.deepcopy(self.fit_obj.def_param)
            self.objId1.name = self.name + '_CH0_Auto_Corr'
            self.objId1.ch_type = 0  #channel 0 Auto
            self.objId1.siblings = None
            self.objId1.prepare_for_fit()

            self.objId1.kcount = self.kcount_CH1
        self.objId1.autoNorm = np.array(self.autoNorm[:, 0, 0]).reshape(-1)
        self.objId1.autotime = np.array(self.autotime).reshape(-1)
        self.objId1.param = copy.deepcopy(self.fit_obj.def_param)

        if self.numOfCH == 2:
            self.objId1.CV = self.CV
            if self.objId3 == None:
                corrObj = corrObject(self.filepath, self.fit_obj)
                self.objId3 = corrObj.objId
                self.objId3.parent_name = 'pt FCS tgated -tg0: ' + str(
                    np.round(self.xmin, 0)) + ' -tg1: ' + str(
                        np.round(self.xmax, 0))
                self.objId3.parent_uqid = 'pt FCS tgated -tg0: ' + str(
                    np.round(self.xmin, 0)) + ' -tg1: ' + str(
                        np.round(self.xmax, 0))
                self.fit_obj.objIdArr.append(corrObj.objId)
                self.objId3.param = copy.deepcopy(self.fit_obj.def_param)
                self.objId3.name = self.name + '_CH1_Auto_Corr'
                self.objId3.ch_type = 1  #channel 1 Auto
                self.objId3.siblings = None
                self.objId3.prepare_for_fit()
                self.objId3.kcount = self.kcount_CH2

            self.objId3.autoNorm = np.array(self.autoNorm[:, 1, 1]).reshape(-1)
            self.objId3.autotime = np.array(self.autotime).reshape(-1)
            self.objId3.param = copy.deepcopy(self.fit_obj.def_param)
            self.objId3.CV = self.CV
            if self.objId2 == None:
                corrObj = corrObject(self.filepath, self.fit_obj)
                self.objId2 = corrObj.objId
                self.objId2.parent_name = 'pt FCS tgated -tg0: ' + str(
                    np.round(self.xmin, 0)) + ' -tg1: ' + str(
                        np.round(self.xmax, 0))
                self.objId2.parent_uqid = 'pt FCS tgated -tg0: ' + str(
                    np.round(self.xmin, 0)) + ' -tg1: ' + str(
                        np.round(self.xmax, 0))
                self.objId2.param = copy.deepcopy(self.fit_obj.def_param)
                self.fit_obj.objIdArr.append(corrObj.objId)
                self.objId2.name = self.name + '_CH01_Cross_Corr'
                self.objId2.ch_type = 2  #channel 01 Cross
                self.objId2.siblings = None
                self.objId2.prepare_for_fit()

            self.objId2.autoNorm = np.array(self.autoNorm[:, 0, 1]).reshape(-1)
            self.objId2.autotime = np.array(self.autotime).reshape(-1)
            self.objId2.param = copy.deepcopy(self.fit_obj.def_param)
            self.objId2.CV = self.CV
            if self.objId4 == None:
                corrObj = corrObject(self.filepath, self.fit_obj)
                self.objId4 = corrObj.objId

                self.objId4.parent_name = 'pt FCS tgated -tg0: ' + str(
                    np.round(self.xmin, 0)) + ' -tg1: ' + str(
                        np.round(self.xmax, 0))
                self.objId4.parent_uqid = 'pt FCS tgated -tg0: ' + str(
                    np.round(self.xmin, 0)) + ' -tg1: ' + str(
                        np.round(self.xmax, 0))
                self.objId4.param = copy.deepcopy(self.fit_obj.def_param)
                self.fit_obj.objIdArr.append(corrObj.objId)
                self.objId4.name = self.name + '_CH10_Cross_Corr'
                self.objId4.ch_type = 3  #channel 10 Cross
                self.objId4.siblings = None
                self.objId4.prepare_for_fit()

            self.objId4.autoNorm = np.array(self.autoNorm[:, 1, 0]).reshape(-1)
            self.objId4.autotime = np.array(self.autotime).reshape(-1)
            self.objId4.CV = self.CV

        self.fit_obj.fill_series_list()
        #del self.subChanArr
        #self.trueTimeArr
        del self.dTimeArr
Exemple #39
0
def get_video_feat(video, feat_dates):
    # 源数据
    history = video[video['day'].map(lambda x: x in feat_dates)]
    history['cnt'] = 1
    # 返回的特征
    feature = pd.DataFrame(columns=['user_id'])

    ## 统计特征
    pivot = pd.pivot_table(history,
                           index=['user_id', 'day'],
                           values='cnt',
                           aggfunc=len)
    pivot = pivot.unstack(level=-1)
    pivot.fillna(0, downcast='infer', inplace=True)
    feat = pd.DataFrame()
    feat['user_id'] = pivot.index
    feat.index = pivot.index
    # 每一天的特征
    for i in range(1, len(feat_dates) + 1):
        feat['user_video_cnt_before_' + str(i) +
             '_day'] = pivot[pivot.columns.tolist()[-i]]
    # 总和
    feat['user_video_cnt_sum'] = pivot.sum(1)
    # 均值
    feat['user_video_cnt_mean'] = pivot.mean(1)
    # 方差
    feat['user_video_cnt_var'] = pivot.var(1)
    # 最大值
    feat['user_video_cnt_max'] = pivot.max(1)
    # 最小值
    feat['user_video_cnt_min'] = pivot.min(1)
    # 加入feature
    feature = pd.merge(feature, feat, on=['user_id'], how='outer')

    #    ## 差分与统计
    #    diff = pivot.diff(axis = 1)
    #    diff = diff[diff.columns.tolist()[1:]]
    #    feat = pd.DataFrame()
    #    feat['user_id'] = diff.index
    #    feat.index = diff.index
    #    # 每一个差分
    #    for i in range(1,len(feat_dates)):
    #        feat['user_video_diff_before_' + str(i) + '_day'] = diff[diff.columns.tolist()[-i]]
    #    # 总和
    #    feat['user_video_diff_sum'] = diff.sum(1)
    #    # 均值
    #    feat['user_video_diff_mean'] = diff.mean(1)
    #    # 方差
    #    feat['user_video_diff_var'] = diff.var(1)
    #    # 最大值
    #    feat['user_video_diff_max'] = diff.max(1)
    #    # 最小值
    #    feat['user_video_diff_min'] = diff.min(1)
    #    # 加入feature
    #    feature = pd.merge(feature,feat,on = ['user_id'],how = 'outer')

    ## 连续拍摄
    feat = pd.DataFrame()
    feat['user_id'] = pivot.index
    feat.index = pivot.index
    pivot = pivot.applymap(lambda x: 1 if x != 0 else 0)
    feat['video_list'] = pivot.apply(
        lambda x: reduce(lambda y, z: str(y) + str(z), x), axis=1)
    # 连续拍摄天数_均值
    feat['user_video_continue_mean'] = feat['video_list'].map(
        lambda x: np.mean([len(y) for y in re.split('0+', x.strip('0'))]))
    # 连续拍摄天数_方差
    feat['user_video_continue_var'] = feat['video_list'].map(
        lambda x: np.var([len(y) for y in re.split('0+', x.strip('0'))]))
    # 连续拍摄天数_最大值
    feat['user_video_continue_max'] = feat['video_list'].map(
        lambda x: np.max([len(y) for y in re.split('0+', x.strip('0'))]))
    # 连续拍摄天数_最小值
    feat['user_video_continue_min'] = feat['video_list'].map(
        lambda x: np.min([len(y) for y in re.split('0+', x.strip('0'))]))
    # 去掉无用的
    feat.drop(['video_list'], axis=1, inplace=True)
    # 加入feature
    feature = pd.merge(feature, feat, on=['user_id'], how='outer')

    ## 时间间隔
    # 最近/远一次拍摄距离最近考察日的时间间隔
    near = 'nearest_day_video'
    fur = 'furest_day_video'
    pivot_n = pd.pivot_table(history,
                             index=['user_id'],
                             values='day',
                             aggfunc=max)
    pivot_n.rename(columns={'day': near}, inplace=True)
    pivot_n.reset_index(inplace=True)
    pivot_f = pd.pivot_table(history,
                             index=['user_id'],
                             values='day',
                             aggfunc=min)
    pivot_f.rename(columns={'day': fur}, inplace=True)
    pivot_f.reset_index(inplace=True)
    feature = pd.merge(feature, pivot_n, on=['user_id'], how='left')
    feature = pd.merge(feature, pivot_f, on=['user_id'], how='left')
    feature[near +
            '_to_label'] = feature[near].map(lambda x: feat_dates[-1] + 1 - x)
    feature[fur +
            '_to_label'] = feature[fur].map(lambda x: feat_dates[-1] + 1 - x)
    feature.drop([near, fur], axis=1, inplace=True)

    ## 填空
    feature.fillna(0, downcast='infer', inplace=True)
    ## 返回
    return feature
Exemple #40
0
    def processData(self):

        self.NcascStart = self.par_obj.NcascStart
        self.NcascEnd = self.par_obj.NcascEnd
        self.Nsub = self.par_obj.Nsub
        self.winInt = self.par_obj.winInt
        self.photonCountBin = 25  #self.par_obj.photonCountBin

        #File import
        if self.ext == 'spc':
            self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = spc_file_import(
                self.filepath)
        if self.ext == 'asc':
            self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = asc_file_import(
                self.filepath)
        if self.ext == 'pt2':
            self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = pt2import(
                self.filepath)
        if self.ext == 'pt3':
            self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = pt3import(
                self.filepath)
        if self.ext == 'ptu':
            out = ptuimport(self.filepath)
            if out != False:
                self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = out
            else:
                self.par_obj.data.pop(-1)
                self.par_obj.objectRef.pop(-1)
                self.exit = True

                return
        if self.ext == 'csv':
            self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = csvimport(
                self.filepath)
            #If the file is empty.
            if self.subChanArr == None:
                #Undoes any preparation of resource.
                self.par_obj.data.pop(-1)
                self.par_obj.objectRef.pop(-1)
                self.exit = True

                return

        #Colour assigned to file.
        self.color = self.par_obj.colors[self.unqID % len(self.par_obj.colors)]

        #How many channels there are in the files.

        self.ch_present = np.sort(np.unique(np.array(self.subChanArr)))
        if self.ext == 'pt3' or self.ext == 'ptu' or self.ext == 'pt2':
            self.numOfCH = self.ch_present.__len__(
            ) - 1  #Minus 1 because not interested in channel 15.
        else:
            self.numOfCH = self.ch_present.__len__()

        #Finds the numbers which address the channels.

        #Calculates decay function for both channels.
        self.photonDecayCh1, self.decayScale1 = delayTime2bin(
            np.array(self.dTimeArr), np.array(self.subChanArr),
            self.ch_present[0], self.winInt)

        if self.numOfCH == 2:
            self.photonDecayCh2, self.decayScale2 = delayTime2bin(
                np.array(self.dTimeArr), np.array(self.subChanArr),
                self.ch_present[1], self.winInt)

        #Time series of photon counts. For visualisation.

        self.timeSeries1, self.timeSeriesScale1 = delayTime2bin(
            np.array(self.trueTimeArr) / 1000000, np.array(self.subChanArr),
            self.ch_present[0], self.photonCountBin)

        unit = self.timeSeriesScale1[-1] / self.timeSeriesScale1.__len__()

        #Converts to counts per
        self.kcount_CH1 = np.average(self.timeSeries1)

        raw_count = np.average(
            self.timeSeries1
        )  #This is the unnormalised intensity count for int_time duration (the first moment)
        var_count = np.var(self.timeSeries1)

        self.brightnessNandBCH0 = (((var_count - raw_count) / (raw_count)) /
                                   (float(unit)))
        if (var_count - raw_count) == 0:
            self.numberNandBCH0 = 0
        else:
            self.numberNandBCH0 = (raw_count**2 / (var_count - raw_count))

        if self.numOfCH == 2:

            self.timeSeries2, self.timeSeriesScale2 = delayTime2bin(
                np.array(self.trueTimeArr) / 1000000,
                np.array(self.subChanArr), self.ch_present[1],
                self.photonCountBin)
            unit = self.timeSeriesScale2[-1] / self.timeSeriesScale2.__len__()
            self.kcount_CH2 = np.average(self.timeSeries2)
            raw_count = np.average(
                self.timeSeries2
            )  #This is the unnormalised intensity count for int_time duration (the first moment)
            var_count = np.var(self.timeSeries2)
            self.brightnessNandBCH1 = (((var_count - raw_count) /
                                        (raw_count)) / (float(unit)))
            if (var_count - raw_count) == 0:
                self.numberNandBCH1 = 0
            else:
                self.numberNandBCH1 = (raw_count**2 / (var_count - raw_count))
            self.CV = calc_coincidence_value(self)

        #Calculates the Auto and Cross-correlation functions.
        self.crossAndAuto(np.array(self.trueTimeArr),
                          np.array(self.subChanArr))

        if self.fit_obj != None:
            #If fit object provided then creates fit objects.
            if self.objId1 == None:
                corrObj = corrObject(self.filepath, self.fit_obj)
                self.objId1 = corrObj.objId
                self.objId1.parent_name = 'point FCS'
                self.objId1.parent_uqid = 'point FCS'
                self.fit_obj.objIdArr.append(corrObj.objId)
                self.objId1.param = copy.deepcopy(self.fit_obj.def_param)
                self.objId1.name = self.name + '_CH0_Auto_Corr'
                self.objId1.ch_type = 0  #channel 0 Auto
                self.objId1.siblings = None
                self.objId1.prepare_for_fit()
                self.objId1.kcount = self.kcount_CH1

            self.objId1.autoNorm = np.array(self.autoNorm[:, 0, 0]).reshape(-1)
            self.objId1.autotime = np.array(self.autotime).reshape(-1)
            self.objId1.param = copy.deepcopy(self.fit_obj.def_param)
            self.objId1.max = np.max(self.objId1.autoNorm)
            self.objId1.min = np.min(self.objId1.autoNorm)

            if self.numOfCH == 2:
                self.objId1.CV = self.CV

                if self.objId3 == None:
                    corrObj = corrObject(self.filepath, self.fit_obj)
                    self.objId3 = corrObj.objId
                    self.objId3.parent_name = 'point FCS'
                    self.objId3.parent_uqid = 'point FCS'
                    self.objId3.param = copy.deepcopy(self.fit_obj.def_param)
                    self.fit_obj.objIdArr.append(corrObj.objId)
                    self.objId3.name = self.name + '_CH1_Auto_Corr'
                    self.objId3.ch_type = 1  #channel 1 Auto
                    self.objId3.siblings = None
                    self.objId3.prepare_for_fit()
                    self.objId3.kcount = self.kcount_CH2

                self.objId3.autoNorm = np.array(self.autoNorm[:, 1,
                                                              1]).reshape(-1)
                self.objId3.autotime = np.array(self.autotime).reshape(-1)
                self.objId3.param = copy.deepcopy(self.fit_obj.def_param)
                self.objId3.max = np.max(self.objId3.autoNorm)
                self.objId3.min = np.min(self.objId3.autoNorm)
                self.objId3.CV = self.CV
                if self.objId2 == None:
                    corrObj = corrObject(self.filepath, self.fit_obj)
                    self.objId2 = corrObj.objId
                    self.objId2.parent_name = 'point FCS'
                    self.objId2.parent_uqid = 'point FCS'
                    self.objId2.param = copy.deepcopy(self.fit_obj.def_param)
                    self.fit_obj.objIdArr.append(corrObj.objId)
                    self.objId2.name = self.name + '_CH01_Cross_Corr'
                    self.objId2.ch_type = 2  #01cross
                    self.objId2.siblings = None
                    self.objId2.prepare_for_fit()

                self.objId2.autoNorm = np.array(self.autoNorm[:, 0,
                                                              1]).reshape(-1)
                self.objId2.autotime = np.array(self.autotime).reshape(-1)
                self.objId2.param = copy.deepcopy(self.fit_obj.def_param)
                self.objId2.max = np.max(self.objId2.autoNorm)
                self.objId2.min = np.min(self.objId2.autoNorm)
                self.objId2.CV = self.CV

                if self.objId4 == None:
                    corrObj = corrObject(self.filepath, self.fit_obj)
                    self.objId4 = corrObj.objId
                    self.objId4.parent_name = 'point FCS'
                    self.objId4.parent_uqid = 'point FCS'
                    self.objId4.param = copy.deepcopy(self.fit_obj.def_param)
                    self.fit_obj.objIdArr.append(corrObj.objId)
                    self.objId4.name = self.name + '_CH10_Cross_Corr'
                    self.objId4.ch_type = 3  #10cross
                    self.objId4.siblings = None
                    self.objId4.prepare_for_fit()

                self.objId4.autoNorm = np.array(self.autoNorm[:, 1,
                                                              0]).reshape(-1)
                self.objId4.autotime = np.array(self.autotime).reshape(-1)
                self.objId4.param = copy.deepcopy(self.fit_obj.def_param)
                self.objId4.max = np.max(self.objId4.autoNorm)
                self.objId4.min = np.min(self.objId4.autoNorm)
                self.objId4.CV = self.CV
            self.fit_obj.fill_series_list()
        self.dTimeMin = 0
        self.dTimeMax = np.max(self.dTimeArr)
        self.subDTimeMin = self.dTimeMin
        self.subDTimeMax = self.dTimeMax
        self.exit = False
        #del self.subChanArr
        #del self.trueTimeArr
        del self.dTimeArr
Exemple #41
0
def produce_plots():
    # Generate Arrays of Random numbers
    test_arrays = [
        np.random.random((2**5, 2**5)),
        np.random.random((2**6, 2**6)),
        np.random.random(
            (2**7, 2**7)),  # DFT Mean around 5.5s, var around 0.0119s
        np.random.random(
            (2**8, 2**8)),  # DFT Mean around 42.838s, var around 1.19s
        np.random.random((
            2**9,
            2**9)),  # DFT Mean around 372.367s, var around 5.299s (Super long)
        np.random.random((2**10, 2**10))  # (Going to take way too long to run)
    ]
    # Store results
    dimensions_array = []
    dft_mean_array = []
    dft_variance_array = []
    fft_mean_array = []
    fft_variance_array = []

    for array in test_arrays:
        # Store problem size and append to designated array
        dimension = array.shape[0]
        dimensions_array.append(dimension)

        dft_results = []
        fft_results = []
        for i in range(10):
            # Naive DFT Method
            start_time = time.time()
            fouriertransform.dft_2d(array)
            end_time = time.time()
            dft_results.append(end_time - start_time)

            # FFT Method
            start_time = time.time()
            fouriertransform.fft_2d(array)
            end_time = time.time()
            fft_results.append(end_time - start_time)

        # Store dft mean and variance
        dft_mean = np.mean(dft_results)
        dft_mean_array.append(dft_mean)
        dft_variance = np.var(dft_results)
        dft_variance_array.append(dft_variance)

        # Store fft mean and variance
        fft_mean = np.mean(fft_results)
        fft_mean_array.append(fft_mean)
        fft_variance = np.var(fft_results)
        fft_variance_array.append(fft_variance)

        # Print mean and variance
        print('Array Dimensions: {} by {}'.format(dimension, dimension))
        print("----------------------------------------")
        print("DFT Mean: ", dft_mean)
        print("FFT Mean: ", np.mean(fft_results))
        print("DFT Variance: ", dft_variance)
        print("FFT Variance: ", np.var(fft_results))
        print("----------------------------------------\n")

        # Plot Results
    # Error is standard deviation * 2
    dft_errors = [math.sqrt(i) * 2 for i in dft_variance_array]
    fft_errors = [math.sqrt(i) * 2 for i in fft_variance_array]

    plt.errorbar(dimensions_array,
                 dft_mean_array,
                 yerr=dft_errors,
                 ecolor="red",
                 label='DFT')
    plt.errorbar(dimensions_array,
                 fft_mean_array,
                 color='green',
                 yerr=fft_errors,
                 ecolor="red",
                 label='FFT')
    plt.title('Mean Time vs Problem Size')
    plt.xlabel('Problem Size', fontsize=14)
    plt.ylabel('Runtime (s)', fontsize=14)
    plt.legend(loc='upper left')
    plt.grid(True)
    plt.show()
    def run_CV(self):

        cvIter = 0

        totalInstanceNum = len(self.m_targetLabel)
        print("totalInstanceNum\t", totalInstanceNum)
        indexList = [i for i in range(totalInstanceNum)]

        totalTransferNumList = []
        np.random.seed(3)
        np.random.shuffle(indexList)

        foldNum = 10
        foldInstanceNum = int(totalInstanceNum * 1.0 / foldNum)
        foldInstanceList = []

        for foldIndex in range(foldNum - 1):
            foldIndexInstanceList = indexList[foldIndex *
                                              foldInstanceNum:(foldIndex + 1) *
                                              foldInstanceNum]
            foldInstanceList.append(foldIndexInstanceList)

        foldIndexInstanceList = indexList[foldInstanceNum * (foldNum - 1):]
        foldInstanceList.append(foldIndexInstanceList)

        totalAccList = [[] for i in range(10)]
        humanAccList = [[] for i in range(10)]

        correctTransferRatioList = []
        totalTransferNumList = []
        correctTransferLabelNumList = []
        correctUntransferRatioList = []

        totalAuditorPrecisionList = []
        totalAuditorRecallList = []
        totalAuditorAccList = []

        for foldIndex in range(foldNum):
            if self.m_multipleClass:
                self.m_clf = LR(multi_class="multinomial",
                                solver='lbfgs',
                                random_state=3)
            else:
                self.m_clf = LR(random_state=3)
            self.m_auditor0 = LR(random_state=3)
            self.m_auditor1 = LR(random_state=3)

            train = []
            for preFoldIndex in range(foldIndex):
                train.extend(foldInstanceList[preFoldIndex])

            test = foldInstanceList[foldIndex]
            for postFoldIndex in range(foldIndex + 1, foldNum):
                train.extend(foldInstanceList[postFoldIndex])

            trainNum = int(totalInstanceNum * 0.9)

            targetNameFeatureTrain = self.m_targetNameFeature[train]
            targetLabelTrain = self.m_targetLabel[train]
            # targetDataFeatureTrain = self.m_targetDataFeature[train]

            targetNameFeatureTest = self.m_targetNameFeature[test]
            targetLabelTest = self.m_targetLabel[test]

            # transferLabelTest = self.m_transferLabel[test]
            transferLabelTest = []

            initExList = []
            initExList = self.pretrainSelectInit(train, foldIndex)
            # random.seed(101)
            # initExList = random.sample(train, 3)

            targetNameFeatureInit = self.m_targetNameFeature[initExList]
            targetLabelInit = self.m_targetLabel[initExList]

            print("initExList\t", initExList, targetLabelInit)

            queryIter = 0
            labeledExList = []
            unlabeledExList = []
            ###labeled index
            labeledExList.extend(initExList)
            unlabeledExList = list(set(train) - set(labeledExList))

            activeLabelNum = 3.0
            transferLabelNum = 0.0
            transferFeatureList = []
            transferFlagList0 = []
            transferFlagList1 = []

            featureDim = len(targetNameFeatureTrain[0])
            self.init_confidence_bound(featureDim, labeledExList,
                                       unlabeledExList)

            targetNameFeatureIter = targetNameFeatureInit
            targetLabelIter = targetLabelInit

            correctTransferLabelNum = 0.0
            wrongTransferLabelNum = 0.0
            correctUntransferLabelNum = 0.0
            wrongUntransferLabelNum = 0.0

            # auditorPrecisionList = []
            # auditorRecallList = []
            auditorAccList = []

            while activeLabelNum < rounds:

                # targetNameFeatureIter = self.m_targetNameFeature[labeledExList]
                # targetLabelIter = self.m_targetLabel[labeledExList]

                self.m_clf.fit(targetNameFeatureIter, targetLabelIter)

                exId = self.select_example(unlabeledExList)
                # self.update_select_confidence_bound(exId)

                # print(idx)
                activeLabelFlag = False
                transferLabelFlag, weakOracleIndex, transferLabel = self.get_transfer_flag(
                    transferFeatureList, transferFlagList0, transferFlagList1,
                    exId, activeLabelNum)

                exLabel = -1
                if transferLabelFlag:
                    self.m_weakLabeledIDList.append(exId)

                    transferLabelNum += 1.0
                    activeLabelFlag = False

                    exLabel = transferLabel
                    targetNameFeatureIter = np.vstack(
                        (targetNameFeatureIter,
                         self.m_targetNameFeature[exId]))
                    targetLabelIter = np.hstack((targetLabelIter, exLabel))
                    # targetNameFeatureIter.append(self.m_targetNameFeature[exId])
                    # targetLabelIter.append(exLabel)

                    if exLabel == self.m_targetLabel[exId]:
                        correctTransferLabelNum += 1.0
                        print("queryIter\t", queryIter)
                    else:
                        wrongTransferLabelNum += 1.0
                        print("query iteration", queryIter,
                              "error transfer label\t", exLabel, "true label",
                              self.m_targetLabel[exId])
                else:
                    self.m_strongLabeledIDList.append(exId)
                    self.update_judge_confidence_bound(exId)
                    activeLabelNum += 1.0
                    activeLabelFlag = True

                    exLabel = self.m_targetLabel[exId]
                    targetNameFeatureIter = np.vstack(
                        (targetNameFeatureIter,
                         self.m_targetNameFeature[exId]))
                    targetLabelIter = np.hstack((targetLabelIter, exLabel))
                    # targetNameFeatureIter.append(self.m_targetNameFeature[exId])
                    # targetLabelIter.append(exLabel)

                    weakLabel0 = self.m_transferLabel0[exId]
                    weakLabel1 = self.m_transferLabel1[exId]

                    transferFeatureList.append(self.m_targetNameFeature[exId])

                    if weakLabel0 == exLabel:
                        correctUntransferLabelNum += 1.0
                        transferFlagList0.append(1.0)
                    else:
                        wrongUntransferLabelNum += 1.0
                        transferFlagList0.append(0.0)

                    if weakLabel1 == exLabel:
                        correctUntransferLabelNum += 1.0
                        transferFlagList1.append(1.0)
                    else:
                        wrongUntransferLabelNum += 1.0
                        transferFlagList1.append(0.0)

                    auditorAcc = self.getAuditorMetric(transferFeatureList,
                                                       transferFlagList0,
                                                       transferFlagList1,
                                                       targetNameFeatureTest,
                                                       transferLabelTest,
                                                       targetLabelTest)
                    print("auditorAcc", auditorAcc)

                    auditorAccList.append(auditorAcc)

                labeledExList.append(exId)
                unlabeledExList.remove(exId)

                acc = self.get_pred_acc(targetNameFeatureTest, targetLabelTest,
                                        targetNameFeatureIter, targetLabelIter)
                totalAccList[cvIter].append(acc)
                if activeLabelFlag:
                    humanAccList[cvIter].append(acc)
                queryIter += 1

            totalAuditorAccList.append(auditorAccList)

            transferLabelNum = len(self.m_weakLabeledIDList)
            totalTransferNumList.append(transferLabelNum)
            correctTransferLabelNumList.append(correctTransferLabelNum)

            cvIter += 1

        print("transfer num\t", np.mean(totalTransferNumList),
              np.sqrt(np.var(totalTransferNumList)))
        print("correct transfer num\t", np.mean(correctTransferLabelNumList),
              np.sqrt(np.var(correctTransferLabelNumList)))

        AuditorAccFile = modelVersion + "_auditor_acc.txt"
        writeFile(totalAuditorAccList, AuditorAccFile)

        totalACCFile = modelVersion + "_acc.txt"
        writeFile(totalAccList, totalACCFile)

        humanACCFile = modelVersion + "_human_acc.txt"
        writeFile(humanAccList, humanACCFile)
    def find_best_network(self, T_val=100):
        self._open_hp_files()
        
        best_network = None
        best_ll = -float('inf')
        best_tau = 0
        best_dropout = 0
        best_HIDDEN_UNITS = []
        for dropout_rate in self._DROPOUT_RATES:
            for tau in self._TAU_VALUES:
                for n_hidden in self._HIDDEN_UNITS_FILE:
                    
                    print ('Grid search step: Tau: ' + str(tau) + ' Dropout rate: ' + str(dropout_rate)+ ' Hidden units : ' + str(n_hidden))

                    network = self.mcd_model.model_runner(
                        self.X_train, self.y_train,
                        dropout_prob=dropout_rate,
                        n_epochs=self.n_epochs,
                        tau=tau,
                        batch_size=self.batch_size,
                        lengthscale=1e-2,
                        n_hidden=n_hidden
                    )
                    
                    print('Starting prediction using validation data..')
                    probs_mc_dropout = []
                    self.model = network
                    T = T_val
                    for t_i in range(T):
                        print('T: ', t_i)
                        probs_mc_dropout += [self.model.predict(self.X_val, batch_size=self.batch_size, verbose=1)]
                    predictive_mean = np.mean(probs_mc_dropout, axis=0)
                    predictive_variance = np.var(probs_mc_dropout, axis=0)
                    
                    

                    # obtained the test ll from the validation sets
                    ll = self.log_likelihood(self.y_val, predictive_mean, tau, T)
                    
                    if (ll > best_ll):
                        best_ll = ll
                        best_network = network
                        best_tau = tau
                        best_dropout = dropout_rate
                        best_HIDDEN_UNITS = n_hidden
                        print ('Best log_likelihood changed to: ' + str(best_ll))
                        print ('Best tau changed to: ' + str(best_tau))
                        print ('Best dropout rate changed to: ' + str(best_dropout))


        self.best_tau_val = best_tau
        self.best_dropout_val = best_dropout
        self.best_HIDDEN_UNITS_lay = best_HIDDEN_UNITS
        self.best_MSD_model = best_network

        best_val = {
            'best_tau': self.best_tau_val,
            'best_dropout': self.best_dropout_val,
            'best_HIDDEN_UNITS': self.best_HIDDEN_UNITS_lay

        }

        with open(self.hp_output_PATH, 'w') as fp:
            json.dump(best_val, fp)

        self.best_MSD_model.save(self.MCDmodel_output_PATH)
def plot_results(results,
                 W,
                 poss,
                 shape,
                 epochs,
                 explore_prop,
                 performance_check,
                 sample_amount=3):

    # Check if the samples taken from the results is not more than the results
    sample_amount_check = True
    while sample_amount_check:
        if sample_amount * 2 > len(results):
            sample_amount -= 1
            print("sample_amount reduced by 1")
        else:
            sample_amount_check = False

    top_results = []
    mean_var_top_results = []
    top_poss = []
    bottom_results = []
    mean_var_bottom_results = []
    bottom_poss = []
    title = f"Value at highest {sample_amount} states"

    # Remove end state (2,2)
    del results[2 * shape[1] + 2]
    del poss[2 * shape[1] + 2]

    for i in range(sample_amount):
        index_top = np.argmax(results, axis=0)[epochs - 1]
        top_results.append(results[index_top])
        mean_var_top_results.append(
            (np.mean(results[index_top][:int(explore_prop * epochs)]),
             np.var(results[index_top][:int(explore_prop * epochs)])))
        top_poss.append(poss[index_top])

        del results[index_top]
        del poss[index_top]

        is_wall = True
        while is_wall:
            index_bottom = np.argmin(results, axis=0)[epochs - 1]
            if poss[index_bottom] in W:
                del results[index_bottom]
                del poss[index_bottom]
            else:
                is_wall = False

        bottom_results.append(results[index_bottom])
        mean_var_bottom_results.append(
            (np.mean(results[index_bottom][:int(explore_prop * epochs)]),
             np.var(results[index_bottom][:int(explore_prop * epochs)])))
        bottom_poss.append(poss[index_bottom])

        del results[index_bottom]
        del poss[index_bottom]

    print_var = False
    for i in range(len(top_poss)):
        plt.plot(top_results[i],
                 label=(f"{top_poss[i]}" + f" Top No. {i+1}" +
                        (" #Start" if top_poss[i] == (5, 0) else
                         "  #End" if top_poss[i] == (2, 2) else "") +
                        (f" {mean_var_top_results[i]}" if print_var else "")))
    plt.xlabel("Epochs")
    plt.ylabel("Value")
    xmin, xmax, ymin, ymax = plt.axis()
    # plt.vlines(epochs*explore_prop, ymin, ymax, label="explore to exploit", linestyle="dotted")
    plt.legend(loc="best", framealpha=1)
    plt.title(f"Value at the highest {sample_amount} states")
    plt.draw()
    plt.waitforbuttonpress()
    plt.clf()

    plt.plot(performance_check)
    plt.title("Learning curve")
    plt.xlabel("Epochs")
    plt.ylabel("Error in shortest paths")
    plt.draw()
    plt.waitforbuttonpress()
    plt.clf()

    # for i in range(len(bottom_poss)):
    #     plt.plot(bottom_results[i], label=(f"{bottom_poss[i]}" + f" Bottom No. {i+1}" + (" #Start" if bottom_poss[i]==(5,0) else "  #End" if bottom_poss[i] == (2,2) else "") + (f" {mean_var_bottom_results[i]}" if print_var else "")))
    # plt.xlabel("Epochs")
    # plt.ylabel("Value")
    # xmin, xmax, ymin, ymax = plt.axis()
    # plt.vlines(epochs*explore_prop, ymin, ymax, label="explore to exploit", linestyle="dotted")
    # plt.legend(loc="best", framealpha=1)
    # plt.title(f"Value at the lowest {sample_amount} states")
    # plt.draw()
    # plt.waitforbuttonpress()
    # plt.clf()

    plt.close()
Exemple #45
0
def batchnorm_forward(x, gamma, beta, bn_param):
    """
    Forward pass for batch normalization.

    During training the sample mean and (uncorrected) sample variance are
    computed from minibatch statistics and used to normalize the incoming data.
    During training we also keep an exponentially decaying running mean of the
    mean and variance of each feature, and these averages are used to normalize
    data at test-time.

    At each timestep we update the running averages for mean and variance using
    an exponential decay based on the momentum parameter:

    running_mean = momentum * running_mean + (1 - momentum) * sample_mean
    running_var = momentum * running_var + (1 - momentum) * sample_var

    Note that the batch normalization paper suggests a different test-time
    behavior: they compute sample mean and variance for each feature using a
    large number of training images rather than using a running average. For
    this implementation we have chosen to use running averages instead since
    they do not require an additional estimation step; the torch7
    implementation of batch normalization also uses running averages.

    Input:
    - x: Data of shape (N, D)
    - gamma: Scale parameter of shape (D,)
    - beta: Shift paremeter of shape (D,)
    - bn_param: Dictionary with the following keys:
      - mode: 'train' or 'test'; required
      - eps: Constant for numeric stability
      - momentum: Constant for running mean / variance.
      - running_mean: Array of shape (D,) giving running mean of features
      - running_var Array of shape (D,) giving running variance of features

    Returns a tuple of:
    - out: of shape (N, D)
    - cache: A tuple of values needed in the backward pass
    """
    mode = bn_param['mode']
    eps = bn_param.get('eps', 1e-5)
    momentum = bn_param.get('momentum', 0.9)

    N, D = x.shape
    running_mean = bn_param.get('running_mean', np.zeros(D, dtype=x.dtype))
    running_var = bn_param.get('running_var', np.zeros(D, dtype=x.dtype))

    out, cache = None, None
    if mode == 'train':
        #######################################################################
        # TODO: Implement the training-time forward pass for batch norm.      #
        # Use minibatch statistics to compute the mean and variance, use      #
        # these statistics to normalize the incoming data, and scale and      #
        # shift the normalized data using gamma and beta.                     #
        #                                                                     #
        # You should store the output in the variable out. Any intermediates  #
        # that you need for the backward pass should be stored in the cache   #
        # variable.                                                           #
        #                                                                     #
        # You should also use your computed sample mean and variance together #
        # with the momentum variable to update the running mean and running   #
        # variance, storing your result in the running_mean and running_var   #
        # variables.                                                          #
        #######################################################################
        sample_mean = np.mean(x, axis=0)
        sample_var = np.var(x, axis=0)
        x_norm = (x - sample_mean) / np.sqrt(sample_var + eps)
        out = x_norm * gamma + beta

        running_mean = momentum * running_mean + (1 - momentum) * sample_mean
        running_var = momentum * running_var + (1 - momentum) * sample_var

        cache = (x_norm, gamma, x - sample_mean, 1 / np.sqrt(sample_var + eps))
        #######################################################################
        #                           END OF YOUR CODE                          #
        #######################################################################
    elif mode == 'test':
        #######################################################################
        # TODO: Implement the test-time forward pass for batch normalization. #
        # Use the running mean and variance to normalize the incoming data,   #
        # then scale and shift the normalized data using gamma and beta.      #
        # Store the result in the out variable.                               #
        #######################################################################
        x_norm = (x - running_mean) / np.sqrt(running_var + eps)
        out = x_norm * gamma + beta
        #######################################################################
        #                          END OF YOUR CODE                           #
        #######################################################################
    else:
        raise ValueError('Invalid forward batchnorm mode "%s"' % mode)

    # Store the updated running means back into bn_param
    bn_param['running_mean'] = running_mean
    bn_param['running_var'] = running_var

    return out, cache
Exemple #46
0
def outputTemp(Mean, Variance):
    print(
        time.strftime("%H:%M:%S", time.localtime()) +
        "  {:>5.2f} {:>4.3f}".format(Mean, Variance))


if __name__ == '__main__':
    outputTimeStep = 1
    screenOutput = False

    filename = time.strftime("%m-%d-at-%H:%M-year%YRecordLog.txt",
                             time.localtime())
    with open(filename, 'w') as f:
        if screenOutput:
            outputBanner()
        f.write("{} {} {}\n".format('Time', 'ObjectiveTemp', 'Variance'))
        while True:
            startTime = time.time()
            tempList = []
            while time.time() - startTime < outputTimeStep:
                tempList.append(chanRef.voltage * 10)
                time.sleep(outputTimeStep // 50)
            tempArray = np.array(tempList)
            Mean, Var = np.mean(tempArray), np.var(tempArray)
            f.write(
                time.strftime("%H:%M:%S", time.localtime()) +
                "  {:>5.2f} {:>4.3f}\n".format(Mean, Var))
            if screenOutput:
                outputTemp(Mean, Var)
    def test_hermitian(self, device, tol, skip_if):
        """Test that a tensor product involving qml.Hermitian works correctly"""
        n_wires = 3
        dev = device(n_wires)
        skip_if(dev, {"supports_tensor_observables": False})

        theta = 0.432
        phi = 0.123
        varphi = -0.543

        A_ = 0.1 * np.array([
            [-6, 2 + 1j, -3, -5 + 2j],
            [2 - 1j, 0, 2 - 1j, -5 + 4j],
            [-3, 2 + 1j, 0, -4 + 3j],
            [-5 - 2j, -5 - 4j, -4 - 3j, -6],
        ])

        @qml.qnode(dev)
        def circuit():
            qml.RX(theta, wires=[0])
            qml.RX(phi, wires=[1])
            qml.RX(varphi, wires=[2])
            qml.CNOT(wires=[0, 1])
            qml.CNOT(wires=[1, 2])
            return qml.sample(
                qml.PauliZ(wires=[0]) @ qml.Hermitian(A_, wires=[1, 2]))

        res = circuit()

        # res should only contain the eigenvalues of
        # the hermitian matrix tensor product Z
        Z = np.diag([1, -1])
        eigvals = np.linalg.eigvalsh(np.kron(Z, A_))
        assert np.allclose(sorted(np.unique(res)),
                           sorted(eigvals),
                           atol=tol(False))

        mean = np.mean(res)
        expected = (0.1 * 0.5 *
                    (-6 * np.cos(theta) *
                     (np.cos(varphi) + 1) - 2 * np.sin(varphi) *
                     (np.cos(theta) + np.sin(phi) - 2 * np.cos(phi)) +
                     3 * np.cos(varphi) * np.sin(phi) + np.sin(phi)))
        assert np.allclose(mean, expected, atol=tol(False))

        var = np.var(res)
        expected = (
            0.01 *
            (1057 - np.cos(2 * phi) + 12 *
             (27 + np.cos(2 * phi)) * np.cos(varphi) -
             2 * np.cos(2 * varphi) * np.sin(phi) *
             (16 * np.cos(phi) + 21 * np.sin(phi)) + 16 * np.sin(2 * phi) - 8 *
             (-17 + np.cos(2 * phi) + 2 * np.sin(2 * phi)) * np.sin(varphi) -
             8 * np.cos(2 * theta) *
             (3 + 3 * np.cos(varphi) + np.sin(varphi))**2 - 24 * np.cos(phi) *
             (np.cos(phi) + 2 * np.sin(phi)) * np.sin(2 * varphi) -
             8 * np.cos(theta) *
             (4 * np.cos(phi) *
              (4 + 8 * np.cos(varphi) + np.cos(2 * varphi) -
               (1 + 6 * np.cos(varphi)) * np.sin(varphi)) + np.sin(phi) *
              (15 + 8 * np.cos(varphi) - 11 * np.cos(2 * varphi) +
               42 * np.sin(varphi) + 3 * np.sin(2 * varphi)))) / 16)
        assert np.allclose(var, expected, atol=tol(False))
def makeViolinPlots(dataForViolinO, theAx, ytitle):
    dataForViolin = dict()
    for ke in dataForViolinO.keys():
        # if ke != 'GSE47652':
        #	continue
        dataForViolin[ke] = dataForViolinO[ke]

    ax = theAx

    titles = dataForViolin.keys()
    allData = []
    numFns = len(titles)
    for ke in titles:
        dataRAND = dataForViolin[ke][1]
        if np.var(dataRAND) < 0.000001:
            dataRAND[0] = dataRAND[1] + 0.0001
        dataRSS = dataForViolin[ke][2]
        if np.var(dataRSS) < 0.000001:
            dataRSS[0] = dataRSS[1] + 0.0001
        allData.append(dataRAND)
        allData.append(dataRSS)

    positions = [[0.7 * i, 0.7 * i] for i in range(1, numFns + 1)]
    positions = [item for sublist in positions for item in sublist]
    ax.plot([positions[0] - 1.05, positions[-1] + 0.35], [1, 1],
            'r:',
            alpha=0.7,
            zorder=-1)

    violin_parts = ax.violinplot(allData,
                                 showmeans=True,
                                 positions=positions,
                                 showextrema=False)
    # print(str(violin_parts['cmeans'].get_segments()))
    count = 0
    Osegs = violin_parts['cmeans'].get_segments()
    segs = []
    for b in violin_parts['bodies']:
        # b.set_alpha(0.75)
        thisSeg = Osegs[count]
        midPointX = thisSeg[0][0] + (thisSeg[1][0] - thisSeg[0][0]) / 2.0
        if count % 2 == 0:
            m = np.mean(b.get_paths()[0].vertices[:, 0])
            b.get_paths()[0].vertices[:, 0] = np.clip(
                b.get_paths()[0].vertices[:, 0], -np.inf, m)
            b.set_color('b')
            b.set_facecolor('blue')
            newSeg = np.array([[thisSeg[0][0], thisSeg[0][1]],
                               [midPointX + 0.01, thisSeg[1][1]]])
        else:
            m = np.mean(b.get_paths()[0].vertices[:, 0])
            b.get_paths()[0].vertices[:, 0] = np.clip(
                b.get_paths()[0].vertices[:, 0], m, np.inf)
            b.set_color('g')
            b.set_facecolor('green')
            newSeg = np.array([[midPointX - 0.01, thisSeg[0][1]],
                               [thisSeg[1][0], thisSeg[1][1]]])
        count += 1
        segs.append(newSeg)
        b.set_alpha(0.99)

    violin_parts['cmeans'].set_color('black')
    # print("seg:")

    violin_parts['cmeans'].set_segments(segs)

    # print(str(violin_parts['cmeans'].get_segments()))
    # print("__\n")

    ax.set_xticks([0.7 * i for i in range(1, numFns + 1)])
    ax.set_xticklabels(titles, rotation='vertical')
    ax.set_ylabel(ytitle)

    ylim = list(ax.get_ylim())
    if ylim[0] < 1 and ylim[1] <= 1:
        ylim[1] = 1.1
    if ylim[0] >= 1 and ylim[1] > 1:
        ylim[0] = 0.9
    ax.set_ylim(ylim)
    ax.set_yticks(ax.get_yticks()[1:-1])

    # Custom legend
    import matplotlib.patches as mpatches
    pB = mpatches.Patch(color='blue', linewidth=0)
    pG = mpatches.Patch(color='green', linewidth=0)
Exemple #49
0
MpsdWelch = periodogramaWelch(x,K,O)     #Este es el implementado por mi
psdWelch = np.zeros(shape=(201,R)) 
for i in range(R):
    f,psdWelch[:,i] = signal.welch(x[:,i], fs, 'bartlett',nperseg=400, noverlap=200)    # ver: scipy.signal.welch

plt.figure(1)
plt.plot(f,20*np.log10(psdWelch))

Mf = np.arange(len(MpsdWelch))*(fs/(len(MpsdWelch)*2))
plt.figure(2)
plt.plot(Mf,20*np.log10(MpsdWelch))

# Obtengo la frecuencia de la senoidal utilizando el estimador
estF0 = f[np.argmax(psdWelch,axis=0)]
valEspF0 = np.mean(estF0)
varEstF0 = np.var(estF0)

print(estF0)
print(valEspF0)
print(varEstF0)



'''
f = np.arange(len(psdWelch))*(fs/(len(psdWelch)*2))

# Obtengo la frecuencia de la senoidal utilizando el estimador
estF0 = f[np.argmax(psdWelch,axis=0)]

valEspF0 = np.mean(estF0)
varEstF0 = np.var(estF0)
Exemple #50
0
    except Exception:                                 # protect against API changes
        pass
    if origVariance:
        fig.suptitle("Diffim residuals: Normalized by sqrt(input variance)", fontsize=titleFs)
    else:
        fig.suptitle("Diffim residuals: Normalized by sqrt(diffim variance)", fontsize=titleFs)

    sp1 = pylab.subplot(221)
    sp2 = pylab.subplot(222, sharex=sp1, sharey=sp1)
    sp3 = pylab.subplot(223, sharex=sp1, sharey=sp1)
    sp4 = pylab.subplot(224, sharex=sp1, sharey=sp1)
    xs  = np.arange(-5, 5.05, 0.1)
    ys  = 1. / np.sqrt(2 * np.pi) * np.exp( -0.5 * xs**2 )

    sp1.hist(candidateResids, bins=xs, normed=True, alpha=0.5, label="N(%.2f, %.2f)"
             % (np.mean(candidateResids), np.var(candidateResids)))
    sp1.plot(xs, ys, "r-", lw=2, label="N(0,1)")
    sp1.set_title("Candidates: basis fit", fontsize=titleFs-2)
    sp1.legend(loc=1, fancybox=True, shadow=True, prop = FontProperties(size=titleFs-6))

    sp2.hist(spatialResids, bins=xs, normed=True, alpha=0.5, label="N(%.2f, %.2f)"
             % (np.mean(spatialResids), np.var(spatialResids)))
    sp2.plot(xs, ys, "r-", lw=2, label="N(0,1)")
    sp2.set_title("Candidates: spatial fit", fontsize=titleFs-2)
    sp2.legend(loc=1, fancybox=True, shadow=True, prop = FontProperties(size=titleFs-6))

    sp3.hist(nonfitResids, bins=xs, normed=True, alpha=0.5, label="N(%.2f, %.2f)"
             % (np.mean(nonfitResids), np.var(nonfitResids)))
    sp3.plot(xs, ys, "r-", lw=2, label="N(0,1)")
    sp3.set_title("Control sample: spatial fit", fontsize=titleFs-2)
    sp3.legend(loc=1, fancybox=True, shadow=True, prop = FontProperties(size=titleFs-6))
data = mat_file.get('arrhythmia')

data = data[~np.all(data == 0, axis=1)]  # deleting eventual zero columns
class_id = data[:, -1]
class_id[np.where(class_id > 1)] = 2
class_id = class_id - 1

data = data[:, :-1]
(N, F) = np.shape(data)

mean = np.mean(data)
std = np.std(data)
x_norm = (data - mean) / std

mean = np.mean(x_norm, 0)
var = np.var(x_norm, 0)

n_healthy = sum(class_id == 0)
n_ill = sum(class_id == 1)

# initializing the neural network graph
tf.set_random_seed(1234)
learning_rate = 1e-4
n_hidden_nodes_1 = F
n_hidden_nodes_2 = 128

x = tf.placeholder(tf.float64, [N, F])
t = tf.placeholder(tf.float64, [N, 1])

# first layer
w1 = tf.Variable(
Exemple #52
0

def func(N):
    sum = 0
    for i in range(N):
        x, y = np.random.rand(2)
        sum += np.exp(5 * abs(x - 5) + 5 * abs(y - 5))
    return sum / N


s = []
for i in range(5):
    print(func(100))
for i in range(50):
    s.append(func(1000))
print(np.var(s))


def func1(N):
    sum = 0
    for i in range(N):
        x, y = np.random.rand(2) * 2 - [1, 1]
        sum += math.cos(math.pi + 5 * x + 5 * y)
    return 2 * 2 * sum / N


b = []
for i in range(5):
    print(func1(100))
for i in range(50):
    b.append(func1(1000))
y_train = data_train[:,0]
y_test = data_test_val[:,0]


# normalize x and y
num_classes = len(np.unique(y_train))
'''
base = np.min(y_train)  #Check if data is 0-based
if base != 0:
    y_train -= base
    y_test -= base
'''

if input_norm:
    mean = np.mean(X_train,axis=0)
    variance = np.var(X_train,axis=0)
    X_train -= mean
    #The 1e-9 avoids dividing by zero
    X_train /= np.sqrt(variance)+1e-9
    X_test -= mean
    X_test /= np.sqrt(variance)+1e-9

#epochs = np.floor(batch_size*max_iterations / N)
#print('Train with approximately %d epochs' %(epochs))

# place for the input variables
x = tf.placeholder("float", shape=[None, D], name = 'Input_data')
y_ = tf.placeholder(tf.int64, shape=[None], name = 'Ground_truth')
keep_prob = tf.placeholder("float")
bn_train = tf.placeholder(tf.bool)  #Boolean value to guide batchnorm
Exemple #54
0
matrix_np = np.array(matrix)
print(matrix_np)
print("sum  ", matrix_np.sum())
print(matrix_np[1, 1])
print("min in array", matrix_np.min())
print("min in row", matrix_np.min(axis=0))
print("min in column", matrix_np.min(axis=1))
print("max in array", matrix_np.max())
print("max in row", matrix_np.max(axis=0))
print("max in column ", matrix_np.max(axis=1))
print("sum row", matrix_np.sum(axis=0))
print("sum column", matrix_np.sum(axis=1))
print("mean column", matrix_np.mean(axis=1))
print("mean row", matrix_np.mean(axis=0))
print("var ", np.var(matrix_np))
print("std ", np.std(matrix_np))
print("median ", np.median(matrix_np))

################ sqrt, sin, log, abs #####################
print("add  ", np.add(matrix_np, 5))
print("sqrt  ", np.sqrt(matrix_np))
print("sin  ", np.sin(matrix_np))
print("log ", np.log(matrix_np))
matrix_abs = [[-15, 35], [9, -36]]
print(np.abs(matrix_abs))
print(np.add(matrix_abs, matrix_np))
print(np.array_equal(matrix_np, matrix_np))
print(np.array_equal(matrix_np, matrix_abs))
print(np.ceil(random_numbers))
print(np.floor(random_numbers))
Exemple #55
0
    # 2. Compute frobenius norm of each timestep of the utterance
    timestep_norms.extend(LA.norm(mat, axis=1))

utt_norms = sorted(utt_norms)
timestep_norms = sorted(timestep_norms)

fit_utt = stats.norm.pdf(utt_norms, np.mean(utt_norms), np.std(utt_norms))
fit_timestep = stats.norm.pdf(timestep_norms, np.mean(timestep_norms),
                              np.std(timestep_norms))

#print(skew(fit_utt), kurtosis(fit_utt))

plt.plot(utt_norms, fit_utt, '-r')
plt.hist(utt_norms, bins=50, normed=True, alpha=0.5)
m, v, s, k = round(float(np.mean(utt_norms)),
                   3), round(float(np.var(utt_norms)),
                             3), round(skew(fit_utt),
                                       3), round(kurtosis(fit_utt), 3)
print(m, v, s, k)
plt.title("Mean {}, Var {}, Skew {}, Kurt {}".format(m, v, s, k))

plt.savefig(utt_norm_dist, dpi=300)

# Clear the figure
plt.clf()

plt.plot(timestep_norms, fit_timestep, '-r')
plt.hist(timestep_norms, bins=50, normed=True, alpha=0.5)
m, v, s, k = round(float(np.mean(timestep_norms)),
                   3), round(float(np.var(timestep_norms)),
                             3), round(skew(timestep_norms),
Exemple #56
0
        input_image = resized_image.transpose((2, 0, 1))

        # Repeat image according to batch size for inference.
        input_image = np.repeat(input_image[np.newaxis, :, :, :],
                                input_shape[0],
                                axis=0)

        # Inference using Bayesian SegNet
        start = time.time()
        out = net.forward_all(data=input_image)
        end = time.time()
        print '%30s' % 'Executed Bayesian SegNet in ',\
            str((end - start) * 1000), 'ms'

        mean_confidence = np.mean(confidence_output, axis=0, dtype=np.float64)
        var_confidence = np.var(confidence_output, axis=0, dtype=np.float64)

        # Prepare segmented image results
        classes = np.argmax(mean_confidence, axis=0)
        segmentation_bgr = np.asarray(LABEL_COLOURS[classes]).astype(np.uint8)
        segmented_image = overlay_segmentation_results(resized_image,
                                                       segmentation_bgr)

        # Prepare confidence results
        confidence = np.amax(mean_confidence, axis=0)

        # Prepare uncertainty results
        uncertainty = np.mean(var_confidence, axis=0, dtype=np.float64)

        print(np.sqrt(np.mean(uncertainty)))
Exemple #57
0
    def _partial_fit(self, X, y, classes=None, _refit=False,
                     sample_weight=None):
        """Actual implementation of Gaussian NB fitting.
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training vectors, where n_samples is the number of samples and
            n_features is the number of features.
        y : array-like, shape (n_samples,)
            Target values.
        classes : array-like, shape (n_classes,)
            List of all the classes that can possibly appear in the y vector.
            Must be provided at the first call to partial_fit, can be omitted
            in subsequent calls.
        _refit: bool
            If true, act as though this were the first time we called
            _partial_fit (ie, throw away any past fitting and start over).
        sample_weight : array-like, shape (n_samples,), optional
            Weights applied to individual samples (1. for unweighted).
        Returns
        -------
        self : object
            Returns self.
        """
        X, y = check_X_y(X, y)

        # If the ratio of data variance between dimensions is too small, it
        # will cause numerical errors. To address this, we artificially
        # boost the variance by epsilon, a small fraction of the standard
        # deviation of the largest dimension.
        epsilon = 1e-9 * np.var(X, axis=0).max()

        if _refit:
            self.classes_ = None

        if _check_partial_fit_first_call(self, classes):
            # This is the first call to partial_fit:
            # initialize various cumulative counters
            n_features = X.shape[1]
            n_classes = len(self.classes_)
            self.theta_ = np.zeros((n_classes, n_features))
            self.sigma_ = np.zeros((n_classes, n_features))
            self.class_prior_ = np.zeros(n_classes)
            self.class_count_ = np.zeros(n_classes)
        else:
            if X.shape[1] != self.theta_.shape[1]:
                msg = "Number of features %d does not match previous data %d."
                raise ValueError(msg % (X.shape[1], self.theta_.shape[1]))
            # Put epsilon back in each time
            self.sigma_[:, :] -= epsilon

        classes = self.classes_

        unique_y = np.unique(y)
        unique_y_in_classes = in1d(unique_y, classes)

        if not np.all(unique_y_in_classes):
            raise ValueError("The target label(s) %s in y do not exist in the "
                             "initial classes %s" %
                             (y[~unique_y_in_classes], classes))

        for y_i in unique_y:
            i = classes.searchsorted(y_i)
            X_i = X[y == y_i, :]

            if sample_weight is not None:
                sw_i = sample_weight[y == y_i]
                N_i = sw_i.sum()
            else:
                sw_i = None
                N_i = X_i.shape[0]

            new_theta, new_sigma = self._update_mean_variance(
                self.class_count_[i], self.theta_[i, :], self.sigma_[i, :],
                X_i, sw_i)

            self.theta_[i, :] = new_theta
            self.sigma_[i, :] = new_sigma
            self.class_count_[i] += N_i

        self.sigma_[:, :] += epsilon
        self.class_prior_[:] = self.class_count_ / np.sum(self.class_count_)
        return self
def versuch_auswerten(versuch_werte, versuch_name, header):

    # Werte verarbeitbar machen
    delta_l_values = pd.to_numeric(versuch_werte.delta_l_t).values
    delta_r_values = pd.to_numeric(versuch_werte.delta_r_t).values
    delta_m_values = pd.to_numeric(versuch_werte.delta_m_t).values
    geschwindigkeit_l_values = pd.to_numeric(
        versuch_werte.geschwindigkeit_l).values
    geschwindigkeit_r_values = pd.to_numeric(
        versuch_werte.geschwindigkeit_r).values
    geschwindigkeit_m_values = pd.to_numeric(
        versuch_werte.geschwindigkeit_m).values
    tendenz_l_values = pd.to_numeric(versuch_werte.tendenz_l).values
    tendenz_r_values = pd.to_numeric(versuch_werte.tendenz_r).values
    tendenz_m_values = pd.to_numeric(versuch_werte.tendenz_m).values
    blick_l_x_values = pd.to_numeric(versuch_werte.blick_l_x).values
    blick_l_y_values = pd.to_numeric(versuch_werte.blick_l_y).values
    blick_r_x_values = pd.to_numeric(versuch_werte.blick_r_x).values
    blick_r_y_values = pd.to_numeric(versuch_werte.blick_r_y).values
    sacc_m_values = pd.to_numeric(versuch_werte.sacc_m).values
    sacc_l_values = pd.to_numeric(versuch_werte.sacc_l).values
    sacc_r_values = pd.to_numeric(versuch_werte.sacc_r).values

    # Mittelkwerte bestimmen
    # Kein Exceptionhandling, da ein leeres Array dazu fuehrt, dass np.mean() nan zurueckgibt und keine Exception
    if delta_l_values[np.nonzero(delta_l_values)].size == 0:
        mean_delta_l = -1
    else:
        mean_delta_l = np.mean(delta_l_values[np.nonzero(delta_l_values)])
    if delta_r_values[np.nonzero(delta_r_values)].size == 0:
        mean_delta_r = -1
    else:
        mean_delta_r = np.mean(delta_r_values[np.nonzero(delta_r_values)])
    if delta_m_values[np.nonzero(delta_m_values)].size == 0:
        mean_delta_m = -1
    else:
        mean_delta_m = np.mean(delta_m_values[np.nonzero(delta_m_values)])
    if geschwindigkeit_l_values[np.nonzero(
            geschwindigkeit_l_values)].size == 0:
        mean_geschwindigkeit_l = -1
    else:
        mean_geschwindigkeit_l = np.mean(
            geschwindigkeit_l_values[np.nonzero(geschwindigkeit_l_values)])
    if geschwindigkeit_r_values[np.nonzero(
            geschwindigkeit_r_values)].size == 0:
        mean_geschwindigkeit_r = -1
    else:
        mean_geschwindigkeit_r = np.mean(
            geschwindigkeit_r_values[np.nonzero(geschwindigkeit_r_values)])
    if geschwindigkeit_m_values[np.nonzero(
            geschwindigkeit_m_values)].size == 0:
        mean_geschwindigkeit_m = -1
    else:
        mean_geschwindigkeit_m = np.mean(
            geschwindigkeit_m_values[np.nonzero(geschwindigkeit_m_values)])

    header = np.append(header, [
        versuch_name + '_mean_delta_l', versuch_name + '_mean_delta_r',
        versuch_name + '_mean_delta_m', versuch_name +
        '_mean_geschwindigkeit_l', versuch_name + '_mean_geschwindigkeit_r',
        versuch_name + '_mean_geschwindigkeit_m'
    ])

    # Maxima bestimmen
    try:
        max_delta_l = np.max(delta_l_values[np.nonzero(delta_l_values)])
    except ValueError:
        max_delta_l = -1
    try:
        max_delta_r = np.max(delta_r_values[np.nonzero(delta_r_values)])
    except ValueError:
        max_delta_r = -1
    try:
        max_delta_m = np.max(delta_m_values[np.nonzero(delta_m_values)])
    except ValueError:
        max_delta_m = -1
    try:
        max_geschwindigkeit_l = np.max(
            geschwindigkeit_l_values[np.nonzero(geschwindigkeit_l_values)])
    except ValueError:
        max_geschwindigkeit_l = -1
    try:
        max_geschwindigkeit_r = np.max(
            geschwindigkeit_r_values[np.nonzero(geschwindigkeit_r_values)])
    except ValueError:
        max_geschwindigkeit_r = -1
    try:
        max_geschwindigkeit_m = np.max(
            geschwindigkeit_m_values[np.nonzero(geschwindigkeit_m_values)])
    except ValueError:
        max_geschwindigkeit_m = -1

    header = np.append(header, [
        versuch_name + '_max_delta_l', versuch_name + '_max_delta_r',
        versuch_name + '_max_delta_m', versuch_name + '_max_geschwindigkeit_l',
        versuch_name + '_max_geschwindigkeit_r',
        versuch_name + '_max_geschwindigkeit_m'
    ])

    # Minima bestimmen
    #Exceptionhandling fuer die Versuchspersonen, bei denen nur ein Auge gemessen wurde
    try:
        min_delta_l = np.min(delta_l_values[np.nonzero(delta_l_values)])
    except ValueError:
        min_delta_l = -1
    try:
        min_delta_r = np.min(delta_r_values[np.nonzero(delta_r_values)])
    except ValueError:
        min_delta_r = -1
    try:
        min_delta_m = np.min(delta_m_values[np.nonzero(delta_m_values)])
    except ValueError:
        min_delta_m = -1
    try:
        min_geschwindigkeit_l = np.min(
            geschwindigkeit_l_values[np.nonzero(geschwindigkeit_l_values)])
    except ValueError:
        min_geschwindigkeit_l = -1
    try:
        min_geschwindigkeit_r = np.min(
            geschwindigkeit_r_values[np.nonzero(geschwindigkeit_r_values)])
    except ValueError:
        min_geschwindigkeit_r = -1
    try:
        min_geschwindigkeit_m = np.min(
            geschwindigkeit_m_values[np.nonzero(geschwindigkeit_m_values)])
    except ValueError:
        min_geschwindigkeit_m = -1

    header = np.append(header, [
        versuch_name + '_min_delta_l', versuch_name + '_min_delta_r',
        versuch_name + '_min_delta_m', versuch_name + '_min_geschwindigkeit_l',
        versuch_name + '_min_geschwindigkeit_r',
        versuch_name + '_min_geschwindigkeit_m'
    ])

    # Standardabweichungen berechnen
    if delta_l_values[np.nonzero(delta_l_values)].size == 0:
        std_delta_l = -1
    else:
        std_delta_l = np.std(delta_l_values[np.nonzero(delta_l_values)])
    if delta_r_values[np.nonzero(delta_r_values)].size == 0:
        std_delta_r = -1
    else:
        std_delta_r = np.std(delta_r_values[np.nonzero(delta_r_values)])
    if delta_m_values[np.nonzero(delta_m_values)].size == 0:
        std_delta_m = -1
    else:
        std_delta_m = np.std(delta_m_values[np.nonzero(delta_m_values)])
    if geschwindigkeit_l_values[np.nonzero(
            geschwindigkeit_l_values)].size == 0:
        std_geschwindigkeit_l = -1
    else:
        std_geschwindigkeit_l = np.std(
            geschwindigkeit_l_values[np.nonzero(geschwindigkeit_l_values)])
    if geschwindigkeit_r_values[np.nonzero(
            geschwindigkeit_r_values)].size == 0:
        std_geschwindigkeit_r = -1
    else:
        std_geschwindigkeit_r = np.std(
            geschwindigkeit_r_values[np.nonzero(geschwindigkeit_r_values)])
    if geschwindigkeit_m_values[np.nonzero(
            geschwindigkeit_m_values)].size == 0:
        std_geschwindigkeit_m = -1
    else:
        std_geschwindigkeit_m = np.std(
            geschwindigkeit_m_values[np.nonzero(geschwindigkeit_m_values)])

    header = np.append(header, [
        versuch_name + '_standardabweichung_delta_l',
        versuch_name + '_standardabweichung_delta_r',
        versuch_name + '_standardabweichung_delta_m',
        versuch_name + '_standardabweichung_geschwindigkeit_l',
        versuch_name + '_standardabweichung_geschwindigkeit_r',
        versuch_name + '_standardabweichung_geschwindigkeit_m'
    ])

    # Varianzen berechnen
    if delta_l_values[np.nonzero(delta_l_values)].size == 0:
        var_delta_l = -1
    else:
        var_delta_l = np.var(delta_l_values[np.nonzero(delta_l_values)])
    if delta_r_values[np.nonzero(delta_r_values)].size == 0:
        var_delta_r = -1
    else:
        var_delta_r = np.var(delta_r_values[np.nonzero(delta_r_values)])
    if delta_m_values[np.nonzero(delta_m_values)].size == 0:
        var_delta_m = -1
    else:
        var_delta_m = np.var(delta_m_values[np.nonzero(delta_m_values)])
    if geschwindigkeit_l_values[np.nonzero(
            geschwindigkeit_l_values)].size == 0:
        var_geschwindigkeit_l = -1
    else:
        var_geschwindigkeit_l = np.var(
            geschwindigkeit_l_values[np.nonzero(geschwindigkeit_l_values)])
    if geschwindigkeit_r_values[np.nonzero(
            geschwindigkeit_r_values)].size == 0:
        var_geschwindigkeit_r = -1
    else:
        var_geschwindigkeit_r = np.var(
            geschwindigkeit_r_values[np.nonzero(geschwindigkeit_r_values)])
    if geschwindigkeit_m_values[np.nonzero(
            geschwindigkeit_m_values)].size == 0:
        var_geschwindigkeit_m = -1
    else:
        var_geschwindigkeit_m = np.var(
            geschwindigkeit_m_values[np.nonzero(geschwindigkeit_m_values)])

    header = np.append(header, [
        versuch_name + '_varianz_delta_l', versuch_name + '_varianz_delta_r',
        versuch_name + '_varianz_delta_m',
        versuch_name + '_varianz_geschwindigkeit_l',
        versuch_name + '_varianz_geschwindigkeit_r',
        versuch_name + '_varianz_geschwindigkeit_m'
    ])

    # Tendenz auswerten
    condition_voraus_l = np.equal(tendenz_l_values, 1)
    num_voraus_l = len(np.extract(condition_voraus_l, tendenz_l_values))
    condition_voraus_r = np.equal(tendenz_r_values, 1)
    num_voraus_r = len(np.extract(condition_voraus_r, tendenz_r_values))
    condition_voraus_m = np.equal(tendenz_m_values, 1)
    num_voraus_m = len(np.extract(condition_voraus_m, tendenz_m_values))
    condition_hinter_l = np.equal(tendenz_l_values, -1)
    num_hinter_l = len(np.extract(condition_hinter_l, tendenz_l_values))
    condition_hinter_r = np.equal(tendenz_r_values, -1)
    num_hinter_r = len(np.extract(condition_hinter_r, tendenz_r_values))
    condition_hinter_m = np.equal(tendenz_m_values, -1)
    num_hinter_m = len(np.extract(condition_hinter_m, tendenz_m_values))

    # -100 steht fuer keinen errechneten Wert, sondern fuer nich vorhanden.
    if num_voraus_l == 0 and num_hinter_l == 0:
        tendenz_l = -100
    else:
        if num_voraus_l > num_hinter_l:
            tendenz_l = 1
        else:
            if num_hinter_l > num_voraus_l:
                tendenz_l = -1
            else:
                tendenz_l = 0

    if num_voraus_r == 0 and num_hinter_r == 0:
        tendenz_r = -100
    else:
        if num_voraus_r > num_hinter_r:
            tendenz_r = 1
        else:
            if num_hinter_r > num_voraus_r:
                tendenz_r = -1
            else:
                tendenz_r = 0

    if num_voraus_m == 0 and num_hinter_m == 0:
        tendenz_m = -100
    else:
        if num_voraus_m > num_hinter_m:
            tendenz_m = 1
        else:
            if num_hinter_m > num_voraus_m:
                tendenz_m = -1
            else:
                tendenz_m = 0

    header = np.append(header, [
        versuch_name + '_tendenz_l', versuch_name + '_tendenz_r',
        versuch_name + '_tendenz_m'
    ])

    # Berechnung der Kovarianz vom linken und rechten Auge
    cov_x = np.cov(blick_l_x_values, blick_r_x_values)[0][1]
    cov_y = np.cov(blick_l_y_values, blick_r_y_values)[0][1]

    header = np.append(header, [
        versuch_name + '_Kovarianz_blick_x',
        versuch_name + '_Kovarianz_blick_y'
    ])

    verhaeltnis_l_x_da = blick_l_x_values[np.nonzero(
        blick_l_x_values)].size / blick_l_x_values.size
    verhaeltnis_l_y_da = blick_l_y_values[np.nonzero(
        blick_l_y_values)].size / blick_l_y_values.size
    verhaeltnis_r_x_da = blick_r_x_values[np.nonzero(
        blick_r_x_values)].size / blick_r_x_values.size
    verhaeltnis_r_y_da = blick_r_y_values[np.nonzero(
        blick_r_y_values)].size / blick_r_y_values.size

    sacc_m = np.sum(sacc_m_values)
    sacc_l = np.sum(sacc_l_values)
    sacc_r = np.sum(sacc_r_values)
    if versuch_name == 'Horizontal' or versuch_name == 'Liegende_8_schnell':
        sacc_rate_m = sacc_m / (999 * 4)
        sacc_rate_l = sacc_l / (999 * 4)
        sacc_rate_r = sacc_r / (999 * 4)
    else:
        sacc_rate_m = sacc_m / (999 * 5)
        sacc_rate_l = sacc_l / (999 * 5)
        sacc_rate_r = sacc_r / (999 * 5)

    header = np.append(header, [
        versuch_name + '_links_verhaeltnis_x', versuch_name +
        '_links_verhaeltnis_y', versuch_name + '_rechts_verhaeltnis_x',
        versuch_name + '_rechts_verhaeltnis_y', versuch_name + '_sacc_m',
        versuch_name + '_sacc_rate_m', versuch_name + '_sacc_l', versuch_name +
        '_sacc_rate_l', versuch_name + '_sacc_r', versuch_name + '_sacc_rate_r'
    ])

    yield [[
        mean_delta_l, mean_delta_r, mean_delta_m, mean_geschwindigkeit_l,
        mean_geschwindigkeit_r, mean_geschwindigkeit_m, max_delta_l,
        max_delta_r, max_delta_m, max_geschwindigkeit_l, max_geschwindigkeit_r,
        max_geschwindigkeit_m, min_delta_l, min_delta_r, min_delta_m,
        min_geschwindigkeit_l, min_geschwindigkeit_r, min_geschwindigkeit_m,
        std_delta_l, std_delta_r, std_delta_m, std_geschwindigkeit_l,
        std_geschwindigkeit_r, std_geschwindigkeit_m, var_delta_l, var_delta_r,
        var_delta_m, var_geschwindigkeit_l, var_geschwindigkeit_r,
        var_geschwindigkeit_m, tendenz_l, tendenz_r, tendenz_m, cov_x, cov_y,
        verhaeltnis_l_x_da, verhaeltnis_l_y_da, verhaeltnis_r_x_da,
        verhaeltnis_r_y_da, sacc_m, sacc_rate_m, sacc_l, sacc_rate_l, sacc_r,
        sacc_rate_r
    ]]
    yield header
Exemple #59
0
    def _update_mean_variance(n_past, mu, var, X, sample_weight=None):
        """Compute online update of Gaussian mean and variance.
        Given starting sample count, mean, and variance, a new set of
        points X, and optionally sample weights, return the updated mean and
        variance. (NB - each dimension (column) in X is treated as independent
        -- you get variance, not covariance).
        Can take scalar mean and variance, or vector mean and variance to
        simultaneously update a number of independent Gaussians.
        See Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque:
        http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf
        Parameters
        ----------
        n_past : int
            Number of samples represented in old mean and variance. If sample
            weights were given, this should contain the sum of sample
            weights represented in old mean and variance.
        mu : array-like, shape (number of Gaussians,)
            Means for Gaussians in original set.
        var : array-like, shape (number of Gaussians,)
            Variances for Gaussians in original set.
        sample_weight : array-like, shape (n_samples,), optional
            Weights applied to individual samples (1. for unweighted).
        Returns
        -------
        total_mu : array-like, shape (number of Gaussians,)
            Updated mean for each Gaussian over the combined set.
        total_var : array-like, shape (number of Gaussians,)
            Updated variance for each Gaussian over the combined set.
        """
        if X.shape[0] == 0:
            return mu, var

        # Compute (potentially weighted) mean and variance of new datapoints
        if sample_weight is not None:
            n_new = float(sample_weight.sum())
            new_mu = np.average(X, axis=0, weights=sample_weight / n_new)
            new_var = np.average((X - new_mu) ** 2, axis=0,
                                 weights=sample_weight / n_new)
        else:
            n_new = X.shape[0]
            new_var = np.var(X, axis=0)
            new_mu = np.mean(X, axis=0)

        if n_past == 0:
            return new_mu, new_var

        n_total = float(n_past + n_new)

        # Combine mean of old and new data, taking into consideration
        # (weighted) number of observations
        total_mu = (n_new * new_mu + n_past * mu) / n_total

        # Combine variance of old and new data, taking into consideration
        # (weighted) number of observations. This is achieved by combining
        # the sum-of-squared-differences (ssd)
        old_ssd = n_past * var
        new_ssd = n_new * new_var
        total_ssd = (old_ssd + new_ssd +
                     (n_past / float(n_new * n_total)) *
                     (n_new * mu - n_new * new_mu) ** 2)
        total_var = total_ssd / n_total

        return total_mu, total_var
 def SNR (self, y):
     return self.P_0  - np.var(y)