Example #1
0
def test_reductions():
    assert compute(t.a.sum(), b) == 6
    assert compute(t.a.min(), b) == 1
    assert compute(t.a.max(), b) == 3
    assert compute(t.a.mean(), b) == 2.0
    assert abs(compute(t.a.std(), b) - np.std([1, 2, 3])) < 1e-5
    assert abs(compute(t.a.var(), b) - np.var([1, 2, 3])) < 1e-5
    assert abs(compute(t.a.std(unbiased=True), b) - np.std([1, 2, 3],
                                                           ddof=1)) < 1e-5
    assert abs(compute(t.a.var(unbiased=True), b) - np.var([1, 2, 3],
                                                           ddof=1)) < 1e-5
    assert len(list(compute(t.distinct(), b))) == 3
    assert len(list(compute(t.a.distinct(), b))) == 3

    assert compute(t.a.nunique(), b) == 3
    assert isinstance(compute(t.a.nunique(), b), np.integer)

    assert compute(t.a.count(), b) == 3
    assert isinstance(compute(t.date.count(), b), np.integer)

    assert compute(t.date.nunique(), b) == 2
    assert isinstance(compute(t.date.nunique(), b), np.integer)

    assert compute(t.date.count(), b) == 2
    assert isinstance(compute(t.a.count(), b), np.integer)

    assert compute(t.a[0], b) == 1
    assert compute(t.a[-1], b) == 3
    assert compute(t[0], b) == compute(t[0], b)
    assert compute(t[-1], b) == compute(t[-1], b)
Example #2
0
def test_em_gmm_largedim():
    # testing the GMM model in larger dimensions
    
    # generate some data
    dim = 10
    x = nr.randn(100, dim)
    x[:30] += 2
    
    # estimate different GMMs of that data
    maxiter, delta = 100, 1.e-4
    
    for k in range(2, 3):
        lgmm = GMM(k,dim)
        bgmm = lgmm.initialize_and_estimate(x, None, maxiter, delta, ninit=5)
        
    z = bgmm.map_label(x)

    # define the correct labelling
    u = np.zeros(100)
    u[:30] = 1

    #check the correlation between the true labelling
    # and the computed one
    eta = np.absolute(np.dot(z - z.mean(), u - u.mean()) /\
                          (np.std(z) * np.std(u) * 100))
    assert_true(eta > 0.3)
def Haffine_from_points(fp, tp):
    '''计算仿射变换的单应性矩阵H,使得tp是由fp经过仿射变换得到的'''
    if fp.shape != tp.shape:
        raise RuntimeError('number of points do not match')

    # 对点进行归一化
    # 映射起始点
    m = numpy.mean(fp[:2], axis=1)
    maxstd = numpy.max(numpy.std(fp[:2], axis=1)) + 1e-9
    C1 = numpy.diag([1/maxstd, 1/maxstd, 1])
    C1[0, 2] = -m[0] / maxstd
    C1[1, 2] = -m[1] / maxstd
    fp_cond = numpy.dot(C1, fp)

    # 映射对应点
    m = numpy.mean(tp[:2], axis=1)
    maxstd = numpy.max(numpy.std(tp[:2], axis=1)) + 1e-9
    C2 = numpy.diag([1/maxstd, 1/maxstd, 1])
    C2[0, 2] = -m[0] / maxstd
    C2[1, 2] = -m[1] / maxstd
    tp_cond = numpy.dot(C2, tp)

    # 因为归一化之后点的均值为0,所以平移量为0
    A = numpy.concatenate((fp_cond[:2], tp_cond[:2]), axis=0)
    U, S, V = numpy.linalg.svd(A.T)
    # 创建矩阵B和C
    tmp = V[:2].T
    B = tmp[:2]
    C = tmp[2:4]

    tmp2 = numpy.concatenate((numpy.dot(C, numpy.linalg.pinv(B)), numpy.zeros((2, 1))), axis=1)
    H = numpy.vstack((tmp2, [0, 0, 1]))

    H = numpy.dot(numpy.linalg.inv(C2), numpy.dot(H, C1))  # 反归一化
    return H / H[2, 2]  # 归一化,然后返回
def _get_mean_std_from_runs(results_for_runs,decider):
    '''
    For a collection of runs (usually from HPs) return the average and std of the decider error and test error.
    Usually decider error will be validation or train error (which we then also get the average test error)

    results_for_runs = array with all results for runs (each run usually corresponds to a speicfic HP) for a specific model
        e.g. [result1, ..., result200]
    decider = namespace holding the appropriate function handler/pointer named get_errors_from (e.g. get_errors_based_on_train_error).
    So decider must be able to call decider.get_errors_from(run)
    '''
    decider_errors_for_runs = [] #
    #train_errors_for_runs = []
    #cv_errors_for_runs = []
    test_errors_for_runs = [] #
    for current_result in results_for_runs:
        decider_error, train_error, cv_error, test_error = decider.get_errors_from(current_result)
        print('decider_error ', decider_error)
        #
        # if np.isnan( decider_error ):
        #     pdb.set_trace()
        decider_errors_for_runs.append(decider_error)
        #train_errors_for_runs.append(train_error)
        #cv_errors_for_runs.append(cv_error)
        test_errors_for_runs.append(test_error)
    decider_mean, decider_std = np.mean(decider_errors_for_runs), np.std(decider_errors_for_runs)
    test_mean, test_std = np.mean(test_errors_for_runs), np.std(test_errors_for_runs)
    #pdb.set_trace()
    return decider_mean, decider_std, test_mean, test_std
Example #5
0
    def testNormalizeLike(self):
        a = np.empty((10, 3))
        a[:, 0] = np.random.random(10)
        a[:, 1] = np.random.random(10)
        a[:, 2] = np.random.random(10)

        b = np.empty((10, 3))
        b[:, 0] = np.random.random(10)
        b[:, 1] = np.random.random(10)
        b[:, 2] = np.random.random(10)
        b = b * 2

        c = normalizeArrayLike(b, a)

        # Should be normalized like a
        mean = []
        std = []
        mean.append(np.mean(a[:, 0]))
        mean.append(np.mean(a[:, 1]))
        mean.append(np.mean(a[:, 2]))
        std.append(np.std(a[:, 0]))
        std.append(np.std(a[:, 1]))
        std.append(np.std(a[:, 2]))

        # Check all values
        for col in xrange(b.shape[1]):
            for bval, cval in zip(b[:, col].flat, c[:, col].flat):
                print cval, (bval - mean[col]) / std[col]
                print cval, bval
                assert cval == (bval - mean[col]) / std[col]
        print ("TestNormalizeLike success")
Example #6
0
def summarize_features_mfcc(mfccs, v=False):
    """
    Given mfcc matrix, return summary for a window
    :param mfccs: NxM matrix
        mfcc matrix
    :param i_start: int
        index for beginning of window
    :param i_end: int
        index for end of window
    :return: 1xL array
        feature vector
    """

    # Summarize features
    features = np.max(mfccs, axis=1)
    features = np.append(features, np.mean(mfccs, axis=1))
    features = np.append(features, np.std(mfccs, axis=1))
    d_mfccs = np.diff(mfccs, axis=1)
    features = np.append(features, np.mean(d_mfccs, axis=1))
    features = np.append(features, np.std(d_mfccs, axis=1))
    d_d_mfccs = np.diff(d_mfccs, axis=1)
    features = np.append(features, np.mean(d_d_mfccs, axis=1))
    features = np.append(features, np.std(d_d_mfccs, axis=1))

    # print np.shape(d_d_mfccs)
    # print np.shape(features)
    return np.reshape(features, (1, len(features)))
Example #7
0
 def __init__(self, fndark, nblocksize):
     if (os.path.isfile(fndark+'-dark.npz')):
         npzfile=np.load(fndark+'-dark.npz');
         self.dmean=npzfile['dmean'];
         self.dstd=npzfile['dstd'];
         self.dbpm=npzfile['dbpm'];
     else:
         dark=Binary(fndark);
         nframes=dark.nframes; my=dark.my; mx=dark.mx;
         nblocks=nframes//nblocksize;
         
         bmed=np.zeros((nblocks,my,mx));
         bstd=np.zeros((nblocks,my,mx));
         for iblock in range(nblocks):
             t0=time.clock();
             a=dark.data[iblock*nblocksize:(iblock+1)*nblocksize];
             a,idx=dropbadframes(a);
             print '- read block, dropped bad, subtracted dark in '+str(time.clock()-t0)+'s';
             nfb=a.shape[0];                
             bmed[iblock,:,:]=np.median(a,axis=0);
             bstd[iblock,:,:]=np.std(a,axis=0);
         self.dmean=np.mean(bmed,axis=0);
         self.dstd=np.sqrt(np.sum((bstd)**2,axis=0));
         self.dbpm=self.dstd<(np.median(self.dstd)+5*np.std(self.dstd));
         self.dbpm=self.dstd<(np.median(self.dstd*self.dbpm)+5*np.std(self.dstd*self.dbpm));
         
         np.savez(fndark+'-dark',dmean=self.dmean,dstd=self.dstd,dbpm=self.dbpm);
         del dark;
def meanclip2(xx,yy,slope, clipsig=3.0, maxiter=5, converge_num=0.1, verbose=0):
    from numpy import array
    import numpy
    xx=array(xx)
    yy=array(yy)
    xx0=array(xx[:])
    yy0=array(yy[:])
    ct=len(yy)
    slope=float(slope)
    iter = 0; c1 = 1.0 ; c2 = 0.0
    while (c1 >= c2) and (iter < maxiter):
        lastct = ct
        sig=numpy.std(yy0-xx0*slope)
#        mean=numpy.mean(array(yy0)-array(xx0)*slope)
        mean=numpy.median(array(yy0)-array(xx0)*slope)
        wsm = numpy.where( abs(yy0-xx0*slope) < mean+clipsig*sig )
        ct = len(wsm[0])
        if ct > 0:
            xx0=xx0[wsm]
            yy0=yy0[wsm]
        c1 = abs(ct - lastct)
        c2 = converge_num * lastct
        iter += 1
# End of while loop
#    mean=numpy.mean(array(yy0)-array(xx0)*slope)
    mean=numpy.median(array(yy0)-array(xx0)*slope)
    sig=numpy.std(array(yy0)-array(xx0)*float(slope))
    if verbose: pass
    return mean, sig,yy0,xx0
def meanclip3(xx,yy,slope, clipsig=3.0, maxiter=5, converge_num=0.1, verbose=0):
    from numpy import array, polyfit
    import numpy
    xx=array(xx)
    yy=array(yy)
    xx0=array(xx[:])
    yy0=array(yy[:])
    ct=len(yy)
    iter = 0; c1 = 1.0 ; c2 = 0.0
    while (c1 >= c2) and (iter < maxiter):
        lastct = ct
        pol = polyfit(xx0,yy0,1,full=True) ###
        mean0=pol[0][1]
        slope=pol[0][0]
        sig=numpy.std(yy0-mean0-slope*xx0)
        wsm = numpy.where( abs(yy0-xx0*slope) < mean0+clipsig*sig )
        ct = len(wsm[0])
        if ct > 0:
            xx0=xx0[wsm]
            yy0=yy0[wsm]
        c1 = abs(ct - lastct)
        c2 = converge_num * lastct
        iter += 1
# End of while loop
    pol = polyfit(xx0,yy0,1,full=True) ###
    mean0=pol[0][1]
    slope=pol[0][0]
    sig=numpy.std(yy0-mean0-slope*xx0)
    if verbose: pass
    return mean0, sig,slope,yy0,xx0
Example #10
0
    def run(self, inputs, run_id):
        pstore = self.pstore(run_id)
        data, npatients = inputs[0], inputs[1][0]
        corrs = []
        for row in data:
            allrow = row[:npatients]
            amlrow = row[npatients:]

            mean, std = numpy.mean(row), numpy.std(row)
            amlrow = [(val - mean)/std for val in amlrow]
            allrow = [(val - mean)/std for val in allrow]

            amlmean, allmean = numpy.mean(amlrow), numpy.mean(allrow)
            amlstd,  allstd  = numpy.std(amlrow),  numpy.std(allrow)
            corr = (allmean - amlmean) / (amlstd + allstd)
            corrs.append( corr )



        start = time.time()
        rowsize = len(data[0])
        if pstore.takes_pointers():
            for rowidx, b in enumerate(corrs):
                fd = pstore.popen((rowidx,))
                if fd:
                    for colidx in xrange(rowsize):
                        pstore.pwrite(fd, 0, (rowidx, colidx))
                        pstore.pwrite(fd, 1, (0,))
                pstore.pclose(fd)
        end = time.time()
            

        return numpy.array(corrs), {'provoverhead' : end-start}
Example #11
0
 def _ols(self,x,y):
     lr = LinearRegression()
     coef_xy = lr.fit(y= y.reshape(-1, 1), X= x.reshape(-1, 1)).coef_
     coef_yx = lr.fit(y= x.reshape(-1, 1), X= y.reshape(-1, 1)).coef_
     r_xy = y - coef_xy*x
     r_yx = x - coef_yx*y
     return r_xy/np.std(r_xy), r_yx/np.std(r_yx)
Example #12
0
 def condBias(H,O):  
     H_ensmean = np.mean(H, axis=1)
     r = np.corrcoef(H_ensmean, O)[0,1]
     std_H = np.std(H_ensmean)
     std_O = np.std(O)            
     cond_bias = r * std_O/std_H
     return cond_bias
Example #13
0
def test_Moster13SmHm_behavior():
	"""
	"""
	default_model = Moster13SmHm()
	mstar1 = default_model.mean_stellar_mass(prim_haloprop = 1.e12)
	ratio1 = mstar1/3.4275e10
	np.testing.assert_array_almost_equal(ratio1, 1.0, decimal=3)

	default_model.param_dict['n10'] *= 1.1
	mstar2 = default_model.mean_stellar_mass(prim_haloprop = 1.e12)
	assert mstar2 > mstar1

	default_model.param_dict['n11'] *= 1.1
	mstar3 = default_model.mean_stellar_mass(prim_haloprop = 1.e12)
	assert mstar3 == mstar2

	mstar4_z1 = default_model.mean_stellar_mass(prim_haloprop = 1.e12, redshift=1)
	default_model.param_dict['n11'] *= 1.1
	mstar5_z1 = default_model.mean_stellar_mass(prim_haloprop = 1.e12, redshift=1)
	assert mstar5_z1 != mstar4_z1

	mstar_realization1 = default_model.mc_stellar_mass(prim_haloprop = np.ones(1e4)*1e12, seed=43)
	mstar_realization2 = default_model.mc_stellar_mass(prim_haloprop = np.ones(1e4)*1e12, seed=43)
	mstar_realization3 = default_model.mc_stellar_mass(prim_haloprop = np.ones(1e4)*1e12, seed=44)
	assert np.array_equal(mstar_realization1, mstar_realization2)
	assert not np.array_equal(mstar_realization1, mstar_realization3)

	measured_scatter1 = np.std(np.log10(mstar_realization1))
	model_scatter = default_model.param_dict['scatter_model_param1']
	np.testing.assert_allclose(measured_scatter1, model_scatter, rtol=1e-3)

	default_model.param_dict['scatter_model_param1'] = 0.3
	mstar_realization4 = default_model.mc_stellar_mass(prim_haloprop = np.ones(1e4)*1e12, seed=43)
	measured_scatter4 = np.std(np.log10(mstar_realization4))
	np.testing.assert_allclose(measured_scatter4, 0.3, rtol=1e-3)
    def cross_validate(self, seg_corpus, dep_corpus, out_folder=None):
        assert seg_corpus.keys() == dep_corpus.keys()
        texts = np.array(sorted(seg_corpus.keys()))
        folds = KFold(len(texts), number_of_folds)

        # extract features for all texts
        all_features = {}
        all_labels = {}
        for text in texts:
            features, labels = self.extract_features_from_text(
                dep_corpus[text], seg_forest=seg_corpus[text])
            all_features[text] = features
            all_labels[text] = labels

        # do the cross-validation
        macro_F1s = []
        micro_F1s = []
        tp = fp = fn = tp_i = fp_i = fn_i = 0
        for i, (train, test) in enumerate(folds):
            print "# FOLD", i
            # train
            train_texts = texts[train]
            train_features = chained([all_features[text] for text in
                                      train_texts])
            train_labels = chained([all_labels[text] for text in train_texts])
            print "  training on %d items..." % len(train_labels)
            self._train(train_features, train_labels)
            print "  extracted %d features using the dict vectorizer." % \
                len(self.pipeline.named_steps[
                    'vectorizer'].get_feature_names())
            # test (predicting textwise)
            test_labels = []
            pred_labels = []
            for text in texts[test]:
                features = all_features[text]
                labels = all_labels[text]
                predictions = self._predict(features)
                test_labels.extend(labels)
                pred_labels.extend(predictions)
                if out_folder is not None:
                    discourse_tree = self._segment_text(predictions,
                                                        dep_corpus[text])
                    with open(out_folder + '/' + text + '.tree', 'w') as fout:
                        fout.write(str(discourse_tree))
            macro_f1, micro_f1 = self._score(test_labels, pred_labels)
            macro_F1s.append(macro_f1)
            micro_F1s.append(micro_f1)
            tp_i, fp_i, fn_i = _cnt_stat(test_labels, pred_labels)
            tp += tp_i
            fp += fp_i
            fn += fn_i

        print "# Average Macro F1 = %3.1f +- %3.2f" % \
            (100 * np.mean(macro_F1s), 100 * np.std(macro_F1s))
        print "# Average Micro F1 = %3.1f +- %3.2f" % \
            (100 * np.mean(micro_F1s), 100 * np.std(micro_F1s))
        if tp or fp or fn:
            print "# F1_{tp,fp} %.2f" % (2. * tp / (2. * tp + fp + fn) * 100)
        else:
            print "# F1_{tp,fp} 0. %"
Example #15
0
def average_form_factors(qz_lists, F_lists):
    """Average multiple sets of form factors. Need at least two 
    input data sets.
    
    qz_lists : list of lists
    F_lists : list of lists
    
    Each list must be in an ascending order, which is the default format
    in NFIT frm.dat.
    """ 
    if len(qz_lists) < 2:
        raise TypeError('Need more than one form factor set for averaging')
    if len(qz_lists) != len(F_lists):
        raise TypeError('Number of qz and F data sets must agree')
    for qzvalues, Fvalues in zip(qz_lists, F_lists):
        if len(qzvalues) != len(Fvalues):
            raise TypeError('Length of each qz and F data set must agree') 
   
    qz_bin, F_bin = create_binned_data(qz_lists, F_lists)
    normalize_to_each_other(F_bin)
    qz_bin = np.array(qz_bin)
    F_bin = np.array(F_bin)
    avg_qz = np.mean(qz_bin, axis=1)
    err_qz = np.std(qz_bin, axis=1, ddof=1, dtype=np.float64)
    avg_F = np.mean(F_bin, axis=1)    
    err_F = np.std(F_bin, axis=1, ddof=1, dtype=np.float64)   
         
    return avg_qz, err_qz, avg_F, err_F
Example #16
0
    def compute_data(self):
        breakdown_data = self.exp_config["breakdown"]

        table_concat = []
        for config in breakdown_data["config"]:
            table = prettytable.PrettyTable(["Time", "TotalHLFracAVG", "TotalHLFracSTD", "LLFracAVG", "LLFracSTD"])
            table.set_style(prettytable.PLAIN_COLUMNS)
            experiments = {}
            for job in breakdown_data[self.language]["jobs"]:
                experiments[job["name"]] = [Experiment(os.path.join(self.EXPERIMENT_ROOT, exp_dir,
                                                                    config["strat-path"],
                                                                    job["name"]))
                                            for exp_dir in breakdown_data[self.language]["expdir"][config["expdir"]]]
            for minute in range(self.START_MINUTES, self.END_MINUTES+1, 5):
                time_stamp = minute * 60 * 1000000
                hlfrac_data = np.array(
                    [[float(exp.high_level_tests.countSince(time_stamp))/len(exp.high_level_tests)
                      for exp in exp_list] for exp_list in experiments.itervalues()])
                llfrac_data = np.array(
                    [[float(exp.high_level_tests.countSince(time_stamp))/(exp.low_level_tests.countSince(time_stamp) or 1)
                      for exp in exp_list] for exp_list in experiments.itervalues()])

                table.add_row([minute,
                               np.average(np.average(hlfrac_data, axis=1)),
                               np.std(np.average(hlfrac_data, axis=1)),
                               np.average(np.average(llfrac_data, axis=1)),
                               np.std(np.average(llfrac_data, axis=1))])
            table_concat.append(table.get_string())

        with open(self.data_file, "w") as f:
            print >>f, "\n\n\n".join(table_concat)
Example #17
0
def ccf(x, y, unbiased=True):
    '''cross-correlation function for 1d

    Parameters
    ----------
    x, y : arrays
       time series data
    unbiased : boolean
       if True, then denominators for autocovariance is n-k, otherwise n

    Returns
    -------
    ccf : array
        cross-correlation function of x and y

    Notes
    -----
    This is based np.correlate which does full convolution. For very long time
    series it is recommended to use fft convolution instead.

    If unbiased is true, the denominator for the autocovariance is adjusted
    but the autocorrelation is not an unbiased estimtor.

    '''
    cvf = ccovf(x, y, unbiased=unbiased, demean=True)
    return cvf / (np.std(x) * np.std(y))
    def performFit(self):
        """
        Fit Distribution with triple Guassian function and validate via Kolmogorov-Smirnov test.
        """
      
        self.fill = False

        self.x0 = [len(self.Data)/2.,0.0,numpy.std(self.Data), \
                   len(self.Data)/3.,0.0,numpy.std(self.Data)*2, \
                   len(self.Data)/4.,0.0,numpy.std(self.Data)*0.5]
        self.popt, pcov = scipy.optimize.curve_fit(self.triple, self.hists.bin_centers, self.hist, p0=self.x0, sigma=None, absolute_sigma=False)
        print("in",self.x0)
        print("out",self.popt)
        sigmaw = self.getSigmaW()
        X = numpy.linspace(self.hists.bin_centers[0],self.hists.bin_centers[-1],1000)
        fithist = numpy.array([self.triple(x,*self.popt) for x in X])

        # Kolmogorov smirnov test
        ks = scipy.stats.ks_2samp(self.hist, fithist)
        props = dict(boxstyle='round', edgecolor='gray', facecolor='white', linewidth=0.1, alpha=0.5)
        self.axis.text(0.6, 0.5, r'$KS-test: p='+str(numpy.round(ks[1],3))+'$', fontsize=20, bbox=props
                       ,verticalalignment='top', horizontalalignment='left', transform=self.axis.transAxes)


        self.axis.text(0.6, 0.6, r'$<\Delta t> ='+str(numpy.round(sigmaw,3))+'ps$', fontsize=20, bbox=props
                       ,verticalalignment='top', horizontalalignment='left', transform=self.axis.transAxes)



        self.set_plot_options( plot_kwargs={ 'marker':' ','linestyle':'-'})
        p = self._plot_datapoints(self.axis, X,fithist, xerr=None , yerr=None)
        self.plots.append(p)
       
        return self
Example #19
0
    def _computePositionTraditionalControl(self, caseObservations, controlObservations, methylFractionFlag, identifyFlag, testProcedure=_tTest):
        """Summarize the observed ipds at one template position/strand, using a case-control analysis"""
        # Compute stats on the observed ipds
        caseData = caseObservations['data']['ipd']
        controlData = controlObservations['data']['ipd']

        res = dict()
        res['refId'] = self.refId

        # FASTA header name
        res['refName'] = self.refName

        strand = res['strand'] = 1 - caseObservations['strand']
        tpl = res['tpl'] = caseObservations['tpl']
        res['base'] = self.cognateBaseFunc(tpl, strand)

        res['coverage'] = int(round((caseData.size + controlData.size) / 2.0))  # need a coverage annotation

        res['caseCoverage'] = caseData.size
        res['controlCoverage'] = controlData.size

        res['caseMean'] = caseData.mean().item()
        res['caseMedian'] = np.median(caseData).item()
        res['caseStd'] = np.std(caseData).item()

        res['controlMean'] = controlData.mean().item()
        res['controlMedian'] = np.median(controlData).item()
        res['controlStd'] = np.std(controlData).item()

        trim = (0.001, 0.03)
        ctrlMean = mstats.trimmed_mean(controlData, trim).item()
        if abs(ctrlMean) > 1e-3:
            res['ipdRatio'] = (mstats.trimmed_mean(caseData, trim).item() / ctrlMean)
        else:
            res['ipdRatio'] = 1.0

        testResults = testProcedure(caseData, controlData)
        res['testStatistic'] = testResults['testStatistic']
        res['pvalue'] = testResults['pvalue']

        pvalue = max(sys.float_info.min, res['pvalue'])
        res['score'] = round(-10.0 * math.log10(pvalue))

        # If the methylFractionFlag is set, then estimate fraction using just modelPrediction in the detection case.
        if methylFractionFlag and pvalue < self.options.pvalue and not identifyFlag:
            if res['controlCoverage'] > self.options.methylMinCov and res['caseCoverage'] > self.options.methylMinCov:

                # Instantiate mixture estimation methods:
                mixture = MixtureEstimationMethods(self.ipdModel.gbmModel.post, self.ipdModel.gbmModel.pre, res, self.options.methylMinCov)
                x = mixture.detectionMixModelBootstrap(res['controlMean'], caseData)

                res[FRAC] = x[0]
                res[FRAClow] = x[1]
                res[FRACup] = x[2]
            else:
                res[FRAC] = np.nan
                res[FRACup] = np.nan
                res[FRAClow] = np.nan

        return res
Example #20
0
def plotForce():
    figure(size=3,aspect=0.5)
    subplot(1,2,1)
    from EvalTraj import plotFF
    plotFF(vp=351,t=28,f=900,cm=0.6,foffset=8)
    subplot_annotate()
    
    subplot(1,2,2)
    for i in [1,2,3,4]:
        R=np.squeeze(np.load('Rdpse%d.npy'%i))
        R=stats.nanmedian(R,axis=2)[:,1:,:]
        dps=np.linspace(-1,1,201)[1:]
        plt.plot(dps,R[:,:,2].mean(0));
    plt.legend([0,0.1,0.2,0.3],loc=3) 
    i=2
    R=np.squeeze(np.load('Rdpse%d.npy'%i))
    R=stats.nanmedian(R,axis=2)[:,1:,:]
    mn=np.argmin(R,axis=1)
    y=np.random.randn(mn.shape[0])*0.00002+0.0438
    plt.plot(np.sort(dps[mn[:,2]]),y,'+',mew=1,ms=6,mec=[ 0.39  ,  0.76,  0.64])
    plt.xlabel('Displacement of Force Origin')
    plt.ylabel('Average Net Force Magnitude')
    hh=dps[mn[:,2]]
    err=np.std(hh)/np.sqrt(hh.shape[0])*stats.t.ppf(0.975,hh.shape[0])
    err2=np.std(hh)/np.sqrt(hh.shape[0])*stats.t.ppf(0.75,hh.shape[0])
    m=np.mean(hh)
    print m, m-err,m+err
    np.save('force',[m, m-err,m+err,m-err2,m+err2])
    plt.xlim([-0.5,0.5])
    plt.ylim([0.0435,0.046])
    plt.grid(b=True,axis='x')
    subplot_annotate()
Example #21
0
def explore_city_data(city_data):
    """Calculate the Boston housing statistics."""

    # Get the labels and features from the housing data
    housing_prices = city_data.target
    housing_features = city_data.data

    ###################################
    ### Step 1. YOUR CODE GOES HERE ###
    ###################################

    # Please calculate the following values using the Numpy library
    print "Size of data (number of houses)"
    print np.size(housing_prices)
    print "Number of features"
    print np.size(housing_features, 1)
    print "Minimum price"
    print np.min(housing_prices)
    print "Maximum price"
    print np.max(housing_prices)
    print "Calculate mean price"
    print np.mean(housing_prices)
    print "Calculate median price"
    print np.median(housing_prices)
    print "Calculate standard deviation"
    print np.std(housing_prices)
Example #22
0
    def prepare_results(self, initial_pops=[50,100]):
        """
        Analyzes data from a batch run, preparing it for plotting.

        """

        self.initial_pops = initial_pops
        self.result_dict = {}

        for pop in self.initial_pops:
            self.result_dict[pop] = {}

            print('Starting batch for %d.' % pop)

            batch = BatchDriver(self.num_sims)
            results = batch.drive(initial_pop=pop)

            stdevs = []

            for indx, result in enumerate(results):
                adults = result['adults']
                minus_120 = len(adults) - 120
                last_120 = adults[minus_120:]

                stdev = np.std(last_120)
                stdevs.append(stdev)

            stdev_of_stdev = np.std(stdevs)

            self.result_dict[pop]['mean_stdev'] = np.mean(stdevs)
            self.result_dict[pop]['ci'] = (1.96 * stdev_of_stdev) / math.sqrt(self.num_sims)

        print(self.result_dict)
def main():
    train = pd.DataFrame.from_csv('train.csv')
    places_index = train['place_id'].values

    places_loc_sqr_wei = []
    for i, place_id in enumerate(train['place_id'].unique()):
        if not i % 100:
            print(i)
        place_df = train.iloc[places_index == place_id]
        place_weights_acc_sqred = 1 / (place_df['accuracy'].values ** 2)

        places_loc_sqr_wei.append([place_id,
                                   np.average(place_df['x'].values, weights=place_weights_acc_sqred),
                                   np.std(place_df['x'].values),
                                   np.average(place_df['y'].values, weights=place_weights_acc_sqred),
                                   np.std(place_df['y'].values),
                                   np.average(np.log(place_df['accuracy'].values)),
                                   np.std(np.log(place_df['accuracy'].values)),
                                   place_df.shape[0]])

        # print(places_loc_sqr_wei[-1])
        # plt.hist2d(place_df['x'].values, place_df['y'].values, bins=100)
        # plt.show()
        plt.hist(np.log(place_df['accuracy'].values), bins=20)
        plt.show()
    places_loc_sqr_wei = np.array(places_loc_sqr_wei)
    column_names = ['x_mean', 'x_sd', 'y_mean', 'y_sd', 'accuracy_mean', 'accuracy_sd', 'n_persons']
    places_loc_sqr_wei = pd.DataFrame(data=places_loc_sqr_wei[:, 1:], index=places_loc_sqr_wei[:, 0],
                                      columns=column_names)

    now = str(datetime.datetime.now().strftime("%Y-%m-%d-%H-%M"))
    places_loc_sqr_wei.to_csv('places_loc_sqr_weights_%s.csv' % now)
def test_stats(x):
	coords_vals = year_sample_dict_data[x]
		
	x_nodes = []	
	y_nodes = []
	z_nodes = []
	values_list = []
	for item in coords_vals:
		x_nodes.append(item[0])
		y_nodes.append(item[1])
		z_nodes.append(item[2])
		values_list.append(coords_vals[item])
	xs = x_nodes
	ys = y_nodes
	zs = z_nodes
	values_list = np.array(values_list)
	all_data = year_stack[x, :time_len, :lat_end, :lon_end]
	### New and improved and faster!!!
	annual_mean = np.mean(all_data)
	sample_mean = np.mean(values_list)
	annual_stdev = np.std(all_data)
	sample_stdev = np.std(values_list)
	annual_max = np.max(all_data)
	annual_min = np.min(all_data)
	sample_max = np.max(all_data)
	sample_min = np.min(all_data)
	annual_range = np.abs(annual_max - annual_min)
	sample_range = np.abs(sample_max - sample_min)
	fitness = np.abs(annual_mean-sample_mean) + np.abs(annual_stdev - sample_stdev) + np.abs(annual_range - sample_range)
	return fitness, annual_mean, sample_mean, annual_stdev, sample_stdev 
Example #25
0
def rectif(z_in, contrast=contrast, method=method, verbose=False):
    """
    Transforms an image (can be 1, 2 or 3D) with normal histogram into
    a 0.5 centered image of determined contrast
    method is either 'Michelson' or 'Energy'

    Phase randomization takes any image and turns it into Gaussian-distributed
    noise of the same power (or, equivalently, variance).
    # See: Peter J. Bex J. Opt. Soc. Am. A/Vol. 19, No. 6/June 2002 Spatial
    frequency, phase, and the contrast of natural images
    """
    z = z_in.copy()
    # Final rectification
    if verbose:
        print('Before Rectification of the frames')
        print( 'Mean=', np.mean(z[:]), ', std=', np.std(z[:]), ', Min=', np.min(z[:]), ', Max=', np.max(z[:]), ' Abs(Max)=', np.max(np.abs(z[:])))

    z -= np.mean(z[:]) # this should be true *on average* in MotionClouds

    if (method == 'Michelson'):
        z = (.5* z/np.max(np.abs(z[:]))* contrast + .5)
    else:
        z = (.5* z/np.std(z[:])  * contrast + .5)

    if verbose:
        print('After Rectification of the frames')
        print('Mean=', np.mean(z[:]), ', std=', np.std(z[:]), ', Min=', np.min(z[:]), ', Max=', np.max(z[:]))
        print('percentage pixels clipped=', np.sum(np.abs(z[:])>1.)*100/z.size)
    return z
Example #26
0
    def normalize( self, verbose=False):
        #list_of_points, point_count, stroke_count = self.getListOfXYPoints( self )
        #coords = np.array( list_of_points ).reshape( point_count, 2 )
        coords = self.listCoordinates()
        point_count,ccrd = coords.shape
        stroke_count = len(self.strokes)
        mean = np.mean(coords, 0)
        sdev = np.std(coords, 0)
        coords = coords - mean
        
        if sdev[0] != 0 and sdev[1] != 0:
			coords = coords * ( 1 / sdev )
			
        new_sketch = self.constructNormalizedSketch(coords, point_count, stroke_count )
        
        if verbose:
            print(mean, sdev)
            print(np.std(coords, 0))
            print(np.mean(coords, 0))
            for i in range(0, point_count):
                print( coords[i, 0], coords[i, 1] )
            
            plt.figure(1)            
            allpts = new_sketch.listCoordinates()
            plt.plot(allpts[:,0],allpts[:,1])
            plt.xlabel('x')
            plt.ylabel('y')
            plt.title('New Sketch with Normalized Points')
            new_sketch.printContents()
        
        return new_sketch
Example #27
0
def find_velocity(times, frames, maxshift=10):
	frame_times = np.array(times)
	nframes = len(frames)
	last_idx = np.argmax(frame_times)
	velocities = []
	npairs = nframes*(nframes-1)/2
	denom = 0
	maxshift = 10
	for i in range(nframes):
		for j in range(i+1, nframes):
			dt = frame_times[i] - frame_times[j]
			offset = findshift(frames[i], frames[j], maxshift)
			if abs(max(offset)) > maxshift:
				continue
			denom += 1
			print (i, j, offset, dt)
			velocity = -offset/dt
			velocities.append(velocity)

	denom = min(1, denom-1)
	velocities = np.array(velocities)
	(vx1, vy1) = velocities[:,0].mean(), velocities[:,1].mean()
	vx = np.mean(velocities[:,0])
	vy = np.mean(velocities[:,1])
	sx = np.std(velocities[:,0])/denom + 0.2*abs(vx) + 5e-4
	sy = np.std(velocities[:,1])/denom + 0.2*abs(vy) + 5e-4
	return (vx, vy, sx, sy)
Example #28
0
def mcnoise(data, noise_std, n, noise_scaling=1.):
    """
    Parameters
    ----------
    data : ndarray
        Array of data.
    noise_std : float
        Standard deviation of the noise
    n : int
        Number of repetition
    noise_scaling: float
        Scaling factor for noise

    Returns
    -------
    variance, variance error, skewness, skewness error, kurtosis, kurtosis error

    """
    noise_arr = np.random.normal(0, noise_std, (n, data.size)) * noise_scaling
    var_sample = np.var(data + noise_arr, axis=1)
    skew_sample = skew(data + noise_arr, axis=1)
    kurt_sample = kurtosis(data + noise_arr, axis=1)
    var_val = np.mean(var_sample)
    skew_val = np.mean(skew_sample)
    kurt_val = np.mean(kurt_sample)
    var_err = np.std(var_sample)
    skew_err = np.std(skew_sample)
    kurt_err = np.std(kurt_sample)
    return var_val, var_err, skew_val, skew_err, kurt_val, kurt_err
Example #29
0
    def gaussian_kernel(self,xvalues,yvalues,r200,normalization=100,scale=10,xres=200,yres=220,xmax=6.0,ymax=5000.0,adj=20):
        """
        Uses a 2D gaussian kernel to estimate the density of the phase space.
        As of now, the maximum radius extends to 6Mpc and the maximum velocity allowed is 5000km/s
        The "q" parameter is termed "scale" here which we have set to 10 as default, but can go as high as 50.
        "normalization" is simply H0
        "x/yres" can be any value, but are recommended to be above 150
        "adj" is a custom value and changes the size of uniform filters when used (not normally needed)
        """
        self.x_scale = xvalues/xmax*xres
        self.y_scale = ((yvalues+ymax)/(normalization*scale))/((ymax*2.0)/(normalization*scale))*yres

        img = np.zeros((xres+1,yres+1))
        self.x_range = np.linspace(0,xmax,xres+1)
        self.y_range = np.linspace(-ymax,ymax,yres+1) 

        for j in range(xvalues.size):
            img[self.x_scale[j],self.y_scale[j]] += 1
        
        #Estimate kernel sizes
        #Uniform
        #self.ksize = 3.12/(xvalues.size)**(1/6.0)*((np.var(self.x_scale[xvalues<r200])+np.var(self.y_scale[xvalues<r200]))/2.0)**0.5/adj
        #if self.ksize < 3.5:
        #    self.ksize = 3.5
        #Gaussian
        self.ksize_x = (4.0/(3.0*xvalues.size))**(1/5.0)*np.std(self.x_scale[xvalues<r200])
        self.ksize_y = (4.0/(3.0*yvalues.size))**(1/5.0)*np.std(self.y_scale[xvalues<r200])
        
        #smooth with estimated kernel sizes
        #img = ndi.uniform_filter(img, (self.ksize,self.ksize))#,mode='reflect')
        self.img = ndi.gaussian_filter(img, (self.ksize_y,self.ksize_x),mode='reflect')
        self.img_grad = ndi.gaussian_gradient_magnitude(img, (self.ksize_y,self.ksize_x))
        self.img_inf = ndi.gaussian_gradient_magnitude(ndi.gaussian_gradient_magnitude(img, (self.ksize_y,self.ksize_x)), (self.ksize_y,self.ksize_x))
Example #30
0
def getDftBins(data=None, sampleRate=None, low=100, high=8000, chunk=64):
    """Return DFT (discrete Fourier transform) of ``data``, doing so in
    time-domain bins, each of size ``chunk`` samples.

    e.g., for getting FFT magnitudes in a ms-by-ms manner.

    If given a sampleRate, the data are bandpass filtered (low, high).
    """
    # good to reshape & vectorize data rather than use a python loop
    if data is None:
        data = []
    bins = []
    i = chunk
    if sampleRate:
        # just to get freq vector
        _junk, freq = getDft(data[:chunk], sampleRate)
        band = (freq > low) & (freq < high)  # band (frequency range)
    while i <= len(data):
        magn = getDft(data[i - chunk:i])
        if sampleRate:
            bins.append(np.std(magn[band]))  # filtered by frequency
        else:
            bins.append(np.std(magn))  # unfiltered
        i += chunk
    return np.array(bins)
def calculateSE(data):
    standardError = np.std(data, ddof=1) / np.sqrt(len(data) - 1)
    return standardError
Example #32
0
    def extract(self):
        global LEVEL, RADIUS, ANGLE
        # print '*** AGA - extracting level, radius and angle info ***'
        #         z, x, self.AGA_types_ci = self.extract_parameter_errors(self.AGA_typeProbHistory,
        #                                                      ANGLE)
        self.aga_levels, self.aga_levels_std_dev, self.aga_levels_ci = self.extract_parameter_errors(
            self.AGA_errors, LEVEL)
        # print 'AGA - levels OK'
        self.aga_radius, self.aga_radius_std_dev, self.aga_radius_ci = self.extract_parameter_errors(
            self.AGA_errors, RADIUS)
        # print 'AGA - radius OK'
        self.aga_angles, self.aga_angles_std_dev, self.aga_angles_ci = self.extract_parameter_errors(
            self.AGA_errors, ANGLE)
        # print 'AGA - angles OK'
        self.aga_level_error_mean = np.mean(np.array(self.aga_levels))
        self.aga_angle_error_mean = np.mean(np.array(self.aga_angles))
        self.aga_radius_error_mean = np.mean(np.array(self.aga_radius))
        self.aga_type_probability_mean = np.mean(
            np.array(self.AGA_typeProbHistory))

        self.aga_level_error_ci = np.std(np.array(self.aga_levels))
        self.aga_angle_error_ci = np.std(np.array(self.aga_angles))
        self.aga_radius_error_ci = np.std(np.array(self.aga_radius))
        self.aga_type_probability_ci = np.std(
            np.array(self.AGA_typeProbHistory))

        # print '*** ABU - extracting level, radius and angle info ***'
        self.abu_levels, self.abu_levels_std_dev, self.abu_levels_ci = self.extract_parameter_errors(
            self.ABU_errors, LEVEL)
        # print 'ABU - levels OK'
        self.abu_radius, self.abu_radius_std_dev, self.abu_radius_ci = self.extract_parameter_errors(
            self.ABU_errors, RADIUS)
        # print 'ABU - radius O
        self.abu_angles, self.abu_angles_std_dev, self.abu_angles_ci = self.extract_parameter_errors(
            self.ABU_errors, ANGLE)
        # print 'ABU - angles OK'

        self.abu_level_error_mean = np.mean(np.array(self.abu_levels))
        self.abu_angle_error_mean = np.mean(np.array(self.abu_angles))
        self.abu_radius_error_mean = np.mean(np.array(self.abu_radius))
        self.abu_type_probability_mean = np.mean(
            np.array(self.ABU_typeProbHistory))

        self.abu_level_error_ci = np.std(np.array(self.abu_levels))
        self.abu_angle_error_ci = np.std(np.array(self.abu_angles))
        self.abu_radius_error_ci = np.std(np.array(self.abu_radius))
        self.abu_type_probability_ci = np.std(
            np.array(self.ABU_typeProbHistory))

        # print '*** OGE - extracting level, radius and angle info ***'
        self.OGE_levels, self.OGE_levels_std_dev, self.OGE_levels_ci = self.extract_parameter_errors(
            self.OGE_errors, LEVEL)
        # print 'OGE - levels OK'
        self.OGE_radius, self.OGE_radius_std_dev, self.OGE_radius_ci = self.extract_parameter_errors(
            self.OGE_errors, RADIUS)
        # print 'OGE - radius OK'
        self.OGE_angles, self.OGE_angles_std_dev, self.OGE_angles_ci = self.extract_parameter_errors(
            self.OGE_errors, ANGLE)
        # print 'OGE - angles OK'

        self.oge_level_error_mean = np.mean(np.array(self.OGE_levels))
        self.oge_angle_error_mean = np.mean(np.array(self.OGE_angles))
        self.oge_radius_error_mean = np.mean(np.array(self.OGE_radius))
        self.oge_type_probability_mean = np.mean(
            np.array(self.OGE_typeProbHistory))
        self.oge_level_error_ci = np.std(np.array(self.OGE_levels))
        self.oge_angle_error_ci = np.std(np.array(self.OGE_angles))
        self.oge_radius_error_ci = np.std(np.array(self.OGE_radius))
        self.oge_type_probability_ci = np.std(
            np.array(self.OGE_typeProbHistory))

        if len(self.OGE_me_errors) > 0:
            # print '*** OGE - extracting level, radius and angle info ***'
            self.OGE_me_levels, self.OGE_me_levels_std_dev, self.OGE_me_levels_ci = self.extract_parameter_errors(
                self.OGE_me_errors, LEVEL)
            # print 'OGE - levels OK'
            self.OGE_me_radius, self.OGE_me_radius_std_dev, self.OGE_me_radius_ci = self.extract_parameter_errors(
                self.OGE_me_errors, RADIUS)
            # print 'OGE - radius OK'
            self.OGE_me_angles, self.OGE_me_angles_std_dev, self.OGE_me_angles_ci = self.extract_parameter_errors(
                self.OGE_me_errors, ANGLE)
            # print 'OGE - angles OK'

            self.oge_me_level_error_mean = np.mean(np.array(
                self.OGE_me_levels))
            self.oge_me_angle_error_mean = np.mean(np.array(
                self.OGE_me_angles))
            self.oge_me_radius_error_mean = np.mean(
                np.array(self.OGE_me_radius))
            self.oge_me_type_probability_mean = np.mean(
                np.array(self.OGE_me_typeProbHistory))
            self.oge_me_level_error_ci = np.std(np.array(self.OGE_me_levels))
            self.oge_me_angle_error_ci = np.std(np.array(self.OGE_me_angles))
            self.oge_me_radius_error_ci = np.std(np.array(self.OGE_me_radius))
            self.oge_me_type_probability_ci = np.std(
                np.array(self.OGE_me_typeProbHistory))

        if len(self.OGE_mo_errors) > 0:
            # print '*** OGE - extracting level, radius and angle info ***'
            self.OGE_mo_levels, self.OGE_mo_levels_std_dev, self.OGE_mo_levels_ci = self.extract_parameter_errors(
                self.OGE_mo_errors, LEVEL)
            # print 'OGE - levels OK'
            self.OGE_mo_radius, self.OGE_mo_radius_std_dev, self.OGE_mo_radius_ci = self.extract_parameter_errors(
                self.OGE_mo_errors, RADIUS)
            # print 'OGE - radius OK'
            self.OGE_mo_angles, self.OGE_mo_angles_std_dev, self.OGE_mo_angles_ci = self.extract_parameter_errors(
                self.OGE_mo_errors, ANGLE)
            # print 'OGE - angles OK'

            self.oge_mo_level_error_mean = np.mean(np.array(
                self.OGE_mo_levels))
            self.oge_mo_angle_error_mean = np.mean(np.array(
                self.OGE_mo_angles))
            self.oge_mo_radius_error_mean = np.mean(
                np.array(self.OGE_mo_radius))
            self.oge_mo_type_probability_mean = np.mean(
                np.array(self.OGE_mo_typeProbHistory))
            self.oge_mo_level_error_ci = np.std(np.array(self.OGE_mo_levels))
            self.oge_mo_angle_error_ci = np.std(np.array(self.OGE_mo_angles))
            self.oge_mo_radius_error_ci = np.std(np.array(self.OGE_mo_radius))
            self.oge_mo_type_probability_ci = np.std(
                np.array(self.OGE_mo_typeProbHistory))

        if len(self.pomcp_errors) > 0:
            # print '*** pomcp - extracting level, radius and angle info ***'
            self.pomcp_levels, self.pomcp_levels_std_dev, self.pomcp_levels_ci = self.extract_parameter_errors(
                self.pomcp_errors, LEVEL)
            # print 'pomcp - levels OK'
            self.pomcp_radius, self.pomcp_radius_std_dev, self.pomcp_radius_ci = self.extract_parameter_errors(
                self.pomcp_errors, RADIUS)
            # print 'pomcp - radius OK'
            self.pomcp_angles, self.pomcp_angles_std_dev, self.pomcp_angles_ci = self.extract_parameter_errors(
                self.pomcp_errors, ANGLE)
            # print 'pomcp - angles OK'

            #	z, x, self.pomcp_types_ci = self.extract_parameter_errors(self.pomcp_typeProbHistory,

            self.pomcp_level_error_mean = np.mean(np.array(self.pomcp_levels))
            self.pomcp_angle_error_mean = np.mean(np.array(self.pomcp_angles))
            self.pomcp_radius_error_mean = np.mean(np.array(self.pomcp_radius))
            self.pomcp_type_probability_mean = np.mean(
                np.array(self.pomcp_typeProbHistory))
            self.pomcp_level_error_ci = np.std(np.array(self.pomcp_levels))
            self.pomcp_angle_error_ci = np.std(np.array(self.pomcp_angles))
            self.pomcp_radius_error_ci = np.std(np.array(self.pomcp_radius))
            self.pomcp_type_probability_ci = np.std(
                np.array(self.pomcp_typeProbHistory))
Example #33
0
    def asteroids_plot(self,
                       image_path=None,
                       ra=None,
                       dec=None,
                       odate=None,
                       time_travel=1,
                       radi=6,
                       max_mag=20.0,
                       circle_color='yellow',
                       arrow_color='red',
                       invert_yaxis="True"):
        """
        Source plot module.
        @param image_path: data part of the FITS image
        @type image_path: numpy array
        @param ra: RA coordinate of target area.
        @type ra: str in "HH MM SS"
        @param dec: DEC coordinate of target area
        @type dec: str in "+DD MM SS"
        @param radi: Radius in arcmin.
        @type radi: float
        @param odate: Ephemeris date of observation in date
        @type odate: "2017-08-15T19:50:00.95" format in str
        @param time_travel: Jump into time after given date (in hour).
        @type time_travel: float
        @param max_mag: Limit magnitude to be queried object(s)
        @type max_mag: float
        @param circle_color: Color of the asteroids marks
        @type circle_color: str
        @param arrow_color: Color of the asteroids direction marks
        @type arrow_color: str
        @param invert_yaxis: invert y axis or not.
        @type invert_yaxis: bool
        @returns: boolean
        """

        from .catalog import Query

        # filename = get_pkg_data_filename(image_path)
        rcParams['figure.figsize'] = [10., 8.]
        # rcParams.update({'font.size': 10})

        if image_path:
            hdu = fits.open(image_path)[0]
        elif not image_path and ra and dec and odate:
            co = coordinates.SkyCoord('{0} {1}'.format(ra, dec),
                                      unit=(u.hourangle, u.deg),
                                      frame='icrs')
            print('Target Coordinates:', co.to_string(style='hmsdms', sep=':'),
                  'in {0} arcmin'.format(radi))
            try:
                server_img = SkyView.get_images(position=co,
                                                survey=['DSS'],
                                                radius=radi * u.arcmin)
                hdu = server_img[0][0]
            except Exception as e:
                print("SkyView could not get the image from DSS server.")
                print(e)
                raise SystemExit

        wcs = WCS(hdu.header)

        data = hdu.data.astype(float)

        bkg = sep.Background(data)
        # bkg_image = bkg.back()
        # bkg_rms = bkg.rms()
        data_sub = data - bkg
        m, s = np.mean(data_sub), np.std(data_sub)

        ax = plt.subplot(projection=wcs)

        plt.imshow(data_sub,
                   interpolation='nearest',
                   cmap='gray',
                   vmin=m - s,
                   vmax=m + s,
                   origin='lower')
        ax.coords.grid(True, color='white', ls='solid')
        ax.coords[0].set_axislabel('Galactic Longitude')
        ax.coords[1].set_axislabel('Galactic Latitude')

        overlay = ax.get_coords_overlay('icrs')
        overlay.grid(color='white', ls='dotted')
        overlay[0].set_axislabel('Right Ascension (ICRS)')
        overlay[1].set_axislabel('Declination (ICRS)')

        sb = Query()
        ac = AstCalc()
        if image_path:
            fo = FitsOps(image_path)
            if not odate:
                odate = fo.get_header('date-obs')
            else:
                odate = odate
            ra_dec = ac.center_finder(image_path, wcs_ref=True)
        elif not image_path and ra and dec and odate:
            odate = odate
            ra_dec = [co.ra, co.dec]

        request0 = sb.find_skybot_objects(odate,
                                          ra_dec[0].degree,
                                          ra_dec[1].degree,
                                          radius=radi)

        if request0[0]:
            asteroids = request0[1]
        elif request0[0] is False:
            print(request0[1])
            raise SystemExit

        request1 = sb.find_skybot_objects(odate,
                                          ra_dec[0].degree,
                                          ra_dec[1].degree,
                                          radius=float(radi),
                                          time_travel=time_travel)

        if request1[0]:
            asteroids_after = request1[1]
        elif request1[0] is False:
            print(request1[1])
            raise SystemExit

        for i in range(len(asteroids)):
            if float(asteroids['m_v'][i]) <= max_mag:
                c = coordinates.SkyCoord('{0} {1}'.format(
                    asteroids['ra(h)'][i], asteroids['dec(deg)'][i]),
                                         unit=(u.hourangle, u.deg),
                                         frame='icrs')

                c_after = coordinates.SkyCoord('{0} {1}'.format(
                    asteroids_after['ra(h)'][i],
                    asteroids_after['dec(deg)'][i]),
                                               unit=(u.hourangle, u.deg),
                                               frame='icrs')

                r = FancyArrowPatch((c.ra.degree, c.dec.degree),
                                    (c_after.ra.degree, c_after.dec.degree),
                                    arrowstyle='->',
                                    mutation_scale=10,
                                    transform=ax.get_transform('icrs'))

                p = Circle((c.ra.degree, c.dec.degree),
                           0.005,
                           edgecolor=circle_color,
                           facecolor='none',
                           transform=ax.get_transform('icrs'))
                ax.text(c.ra.degree,
                        c.dec.degree - 0.007,
                        asteroids['name'][i],
                        size=12,
                        color='black',
                        ha='center',
                        va='center',
                        transform=ax.get_transform('icrs'))

                r.set_facecolor('none')
                r.set_edgecolor(arrow_color)
                ax.add_patch(p)
                ax.add_patch(r)
        # plt.gca().invert_xaxis()
        if invert_yaxis == "True":
            plt.gca().invert_yaxis()
        plt.show()
        print(asteroids)
        return True
Example #34
0
def make_local_connectivity_scorr(func_img, clust_mask_img, thresh):
    """
    Constructs a spatially constrained connectivity matrix from a fMRI dataset.
    The weights w_ij of the connectivity matrix W correspond to the
    spatial correlation between the whole brain FC maps generated from the
    time series from voxel i and voxel j. Connectivity is only calculated
    between a voxel and the 27 voxels in its 3D neighborhood
    (face touching and edge touching).

    Parameters
    ----------
    func_img : Nifti1Image
        4D Nifti1Image containing fMRI data.
    clust_mask_img : Nifti1Image
        3D NIFTI file containing a mask, which restricts the voxels used in the analysis.
    thresh : str
        Threshold value, correlation coefficients lower than this value
        will be removed from the matrix (set to zero).

    Returns
    -------
    W : Compressed Sparse Matrix
        A Scipy sparse matrix, with weights corresponding to the spatial correlation between the time series from
        voxel i and voxel j

    References
    ----------
    .. Adapted from PyClusterROI
    """
    from scipy.sparse import csc_matrix
    from scipy import prod
    from itertools import product
    from pynets.fmri.clustools import indx_1dto3d, indx_3dto1d

    neighbors = np.array(sorted(sorted(sorted([list(x) for x in list(set(product({-1, 0, 1}, repeat=3)))],
                                              key=lambda k: (k[0])), key=lambda k: (k[1])), key=lambda k: (k[2])))

    # Read in the mask
    msz = clust_mask_img.shape

    # Convert the 3D mask array into a 1D vector
    mskdat = np.reshape(np.asarray(clust_mask_img.dataobj).astype('bool'), prod(msz))

    # Determine the 1D coordinates of the non-zero
    # elements of the mask
    iv = np.nonzero(mskdat)[0]
    sz = func_img.shape

    # Reshape fmri data to a num_voxels x num_timepoints array
    imdat = np.reshape(np.asarray(func_img.dataobj).astype('float32'), (prod(sz[:3]), sz[3]))

    # Mask the datset to only the in-mask voxels
    imdat = imdat[iv, :]
    imdat_sz = imdat.shape

    # Z-score fmri time courses, this makes calculation of the
    # correlation coefficient a simple matrix product
    imdat_s = np.tile(np.std(imdat, 1), (imdat_sz[1], 1)).T

    # Replace 0 with really large number to avoid div by zero
    imdat_s[imdat_s == 0] = 1000000
    imdat_m = np.tile(np.mean(imdat, 1), (imdat_sz[1], 1)).T
    imdat = (imdat - imdat_m) / imdat_s

    # Set values with no variance to zero
    imdat[imdat_s == 0] = 0
    imdat[np.isnan(imdat)] = 0

    # Remove voxels with zero variance, do this here
    # so that the mapping will be consistent across
    # subjects
    vndx = np.nonzero(np.var(imdat, 1) != 0)[0]
    iv = iv[vndx]
    m = len(iv)
    print(m, ' # of non-zero valued or non-zero variance voxels in the mask')

    # Construct a sparse matrix from the mask
    msk = csc_matrix((vndx + 1, (iv, np.zeros(m))), shape=(prod(msz), 1), dtype=np.float32)

    sparse_i = []
    sparse_j = []
    sparse_w = [[]]

    for i in range(0, m):
        if i % 1000 == 0:
            print('voxel #', i)

        # Convert index into 3D and calculate neighbors, then convert resulting 3D indices into 1D
        ndx1d = indx_3dto1d(indx_1dto3d(iv[i], sz[:-1]) + neighbors, sz[:-1])

        # Convert 1D indices into masked versions
        ondx1d = msk[ndx1d].todense()

        # Exclude indices not in the mask
        ndx1d = ndx1d[np.nonzero(ondx1d)[0]].flatten()
        ondx1d = np.array(ondx1d[np.nonzero(ondx1d)[0]])
        ondx1d = ondx1d.flatten() - 1

        # Keep track of the index corresponding to the "seed"
        nndx = np.nonzero(ndx1d == iv[i])[0]

        # Extract the time courses corresponding to the "seed"
        # and 3D neighborhood voxels
        tc = np.array(imdat[ondx1d.astype('int'), :])

        # Ensure that the "seed" has variance, if not just skip it
        if np.var(tc[nndx, :]) == 0:
            continue

        # Calculate functional connectivity maps for "seed"
        # and 3D neighborhood voxels
        R = np.corrcoef(np.dot(tc, imdat.T) / (sz[3] - 1))

        if np.linalg.matrix_rank(R) == 1:
            R = np.reshape(R, (1, 1))

        # Set nans to 0
        R[np.isnan(R)] = 0

        # Set values below thresh to 0
        R[R < thresh] = 0

        # Calculate the spatial correlation between FC maps
        if np.linalg.matrix_rank(R) == 0:
            R = np.reshape(R, (1, 1))

        # Keep track of the indices and the correlation weights
        # to construct sparse connectivity matrix
        sparse_i = np.append(sparse_i, ondx1d, 0)
        sparse_j = np.append(sparse_j, (ondx1d[nndx]) * np.ones(len(ondx1d)))
        sparse_w = np.append(sparse_w, R[nndx, :], 1)

    # Ensure that the weight vector is the correct shape
    sparse_w = np.reshape(sparse_w, prod(np.shape(sparse_w)))

    # Concatenate the i, j, and w_ij vectors
    outlist = sparse_i
    outlist = np.append(outlist, sparse_j)
    outlist = np.append(outlist, sparse_w)

    # Calculate the number of non-zero weights in the connectivity matrix
    n = len(outlist) / 3

    # Reshape the 1D vector read in from infile in to a 3xN array
    outlist = np.reshape(outlist, (3, int(n)))

    m = max(max(outlist[0, :]), max(outlist[1, :])) + 1

    # Make the sparse matrix, CSC format is supposedly efficient for matrix arithmetic
    W = csc_matrix((outlist[2, :], (outlist[0, :], outlist[1, :])), shape=(int(m), int(m)), dtype=np.float32)

    del imdat, msk, mskdat, outlist, m, sparse_i, sparse_j, sparse_w

    return W
Example #35
0
    def random_forest(self,
                      col_to_predict,
                      cols=None,
                      cols_to_remove=None,
                      num_splits=25,
                      graph=True,
                      num_vars_graph=10):
        cols = [] if cols is None else cols
        cols_to_remove = [] if cols_to_remove is None else cols_to_remove

        if cols:
            cols_input = cols
        else:
            # TODO: Validate if random forest requires all variables to be numeric (no categoric).
            cols_input = list(self.dataset)
        if col_to_predict in cols_input: cols_input.remove(col_to_predict)
        for col in cols_to_remove:
            if col in cols_input: cols_input.remove(col)

        logger.info("*** Training random forest model...")
        # logger.info("*** Input features: ")
        # logger.info(cols_input)
        # X_train, X_test, y_train, y_test = train_test_split(
        #     self.dataset[cols_input],
        #     self.dataset[col_to_predict],
        #     random_state = 777
        # )
        # output_model.fit(X_train, y_train)
        # prediction = output_model.predict(X = X_test)
        # r2 = r2_score(y_true=y_test, y_pred=prediction)
        # logger.info(f"R2 coefficient (Using training data): {r2}")
        # mse = mean_squared_error(
        #     y_true  = y_test,
        #     y_pred  = prediction,
        #     squared = False
        # )

        model = RandomForestRegressor(n_estimators=10,
                                      criterion='mse',
                                      max_depth=None,
                                      max_features='auto',
                                      oob_score=False,
                                      n_jobs=-1,
                                      random_state=777)

        cv = ShuffleSplit(n_splits=num_splits, test_size=0.3, random_state=777)
        output_models = cross_validate(
            model,
            self.dataset[cols_input],
            self.dataset[col_to_predict],
            cv=cv,
            scoring=["r2", "neg_mean_squared_error"
                     ],  # The smallest the number the better
            return_estimator=True,
        )

        # Use abs since the scoring metric neg_mean_squared_error is negative in order
        # to follow the convention: higher return values are better than lower return
        # values when evaluating the models.
        model_mean = abs(np.mean(output_models["test_neg_mean_squared_error"]))
        model_standard_deviation = abs(
            np.std(output_models["test_neg_mean_squared_error"]))
        max_r2 = np.amax(output_models["test_r2"])
        logger.info(f"*** After {num_splits} folds using cross validation:")
        logger.info(f"    The average MSE is: {model_mean}")
        logger.info(
            f"    The standard deviation of the MSE is: {model_standard_deviation}"
        )
        logger.info(f"    The maximum R2 is: {max_r2}")

        # Choose the best model to show variable importance and graphs
        max_estimator = np.amax(output_models["test_neg_mean_squared_error"])
        max_estimator_index = np.where(
            output_models["test_neg_mean_squared_error"] ==
            max_estimator)[0][0]
        best_model = output_models["estimator"][max_estimator_index]

        # Get the variable importance of the best model found.
        importances = best_model.feature_importances_
        # Summarize feature importance.
        cols_importance = list(zip(cols_input, importances))
        cols_importance_ordered = sorted(cols_importance,
                                         key=lambda x: x[1],
                                         reverse=True)

        for col, importance in cols_importance_ordered:
            logger.info(f"Feature: {col}, Score: {importance}")

        if graph:
            x_axis = [
                "\n".join(wrap(x, 20))
                for x in list(zip(
                    *cols_importance_ordered))[0][:num_vars_graph]
            ]
            y_axis = list(zip(*cols_importance_ordered))[1][:num_vars_graph]
            plt.figure(figsize=(9, 4))
            plt.title("Feature Importance - Random Forest")
            plt.bar(x_axis, y_axis)
            plt.xticks(rotation=90)
            plt.margins(x=0, y=0.1)
            plt.show()
            best_model.top_vars_graph = zip(x_axis, y_axis)

        return best_model, output_models
Example #36
0
    def linear_regression_ridge(self,
                                col_to_predict,
                                cols=None,
                                cols_to_remove=None,
                                num_splits=25,
                                graph=True,
                                num_vars_graph=10):
        cols = [] if cols is None else cols
        cols_to_remove = [] if cols_to_remove is None else cols_to_remove

        if cols:
            cols_input = cols
        else:
            cols_input = list(self.dataset)
        if col_to_predict in cols_input: cols_input.remove(col_to_predict)
        for col in cols_to_remove:
            if col in cols_input: cols_input.remove(col)

        logger.info("*** Training linear regression model...")
        reg = Ridge()
        cv = ShuffleSplit(n_splits=num_splits, test_size=0.3, random_state=123)
        output_models = cross_validate(
            reg,
            self.dataset[cols_input],
            self.dataset[col_to_predict],
            cv=cv,
            scoring=["r2", "neg_mean_squared_error"
                     ],  # The higher the number the better
            return_estimator=True,
        )

        # Use abs since the scoring metric neg_mean_squared_error is negative in order
        # to follow the convention: higher return values are better than lower return
        # values when evaluating the models.
        model_mean = abs(np.mean(output_models["test_neg_mean_squared_error"]))
        model_standard_deviation = abs(
            np.std(output_models["test_neg_mean_squared_error"]))
        max_r2 = np.amax(output_models["test_r2"])
        logger.info(f"*** After {num_splits} folds using cross validation:")
        logger.info(f"    The average MSE is: {model_mean}")
        logger.info(
            f"    The standard deviation of the MSE is: {model_standard_deviation}"
        )
        logger.info(f"    The maximum R2 is: {max_r2}")

        # Choose the best model to show variable importance and graphs
        max_estimator = np.amax(output_models["test_neg_mean_squared_error"])
        max_estimator_index = np.where(
            output_models["test_neg_mean_squared_error"] ==
            max_estimator)[0][0]
        best_model = output_models["estimator"][max_estimator_index]

        # Get the variable importance of the best model found. In this model the absolute
        # value measures the importance of each feature.
        importances = tuple(abs(item) for item in best_model.coef_)
        # Summarize feature importance.
        cols_importance = list(zip(cols_input, importances))
        cols_importance_ordered = sorted(cols_importance,
                                         key=lambda x: x[1],
                                         reverse=True)
        for col, importance in cols_importance_ordered:
            logger.info(f"Feature: {col}, Score: {importance}")

        if graph:
            x_axis = [
                "\n".join(wrap(x, 20))
                for x in list(zip(
                    *cols_importance_ordered))[0][:num_vars_graph]
            ]
            y_axis = list(zip(*cols_importance_ordered))[1][:num_vars_graph]
            plt.figure(figsize=(9, 4))
            plt.title("Feature Importance - Linear Regression")
            plt.bar(x_axis, y_axis)
            plt.xticks(rotation=90)
            plt.margins(x=0, y=0.1)
            plt.show()
            best_model.top_vars_graph = zip(x_axis, y_axis)

        return best_model, output_models
def reclassify(accuracy, evidence):
    """
    This function reclassifies correct responses that are likely to be correct by chance as incorrect responses in
    discrimination experiments using some evidence. We've recomputed 140 classification images with accuracies reclassified by
    response time, for example, and increased SNR by more than 10% on average with little bias (Gosselin et al., submitted).

    The function takes 2 inputs: _accuracy_, a vector equal to 0 when the response to a trial was incorrect and to 1 when it
    was correct; and _reclass_evidence_, a vector of the same length as _accuracy_ equal to a reclassification evidence for each
    corresponding trial such as the response times. Note that the average of the evidence for incorrect responses ca be either
    greater or smaller than the average of the evidence for correct responses but it must be different. The The function has 2
    outputs: _accuracy_reclass_, a vector of the same length as _accuracy_ equal to 0 when the response to a trial was incorrect
    or when a correct response to a trial was reclassified as incorrect, and to 1 when the response to trial was correct and
    wasn't reclassified as incorrect; and _stats_ and _stats_ a dictionary with 4 items: _reclass_evidence_criterion_, a reclassification
    evidence such that when _reclass_evidence_polarity_ * _reclass_evidence_ > _reclass_evidence_polarity_ * _reclass_evidence_criterion_
    a correct response was reclassified as incorrect; _reclass_evidence_polarity_, either 1 or -1 and indicating how to interpret
    the criterion; _reclass_index_, an index of the correct responses reclassified as incorrect; _reclass_efficiency_, the estimated
    proportion of true correct and incorrect responses minus false correct and incorrect responses following reclassification;
    and _reclass_gain_, the ratio between _reclass_efficiency_ and the efficiency prior to reclassification. Note that sqrt(_reclass_gain_)
    provides an approximation of the expected SNR gain.

    This function reclassifies correct responses that are likely to be correct by chance as incorrect responses in
    discrimination experiments using some evidence (e.g. response times). We've recomputed 140 classification images with accuracies
    reclassified by response time, and increased SNR by more than 10% on average with little bias (Gosselin et al., submitted).

    Gosselin, F., Daigneault, V., Larouche, J.-M. & Caplette, L. (submitted). Reclassifying guesses to increase signal-to-noise ratio
    in psychological experiments.

    Frederic Gosselin, 01/06/2020
    [email protected]

    Adapted to Python by
    Laurent Caplette, 17/08/2020
    [email protected]
    """

    evidence = np.array(evidence).astype(np.float32)  # in case a list is feeded
    accuracy = np.array(accuracy).astype(np.float32)  # in case a list is feeded

    if not all(np.unique(accuracy) == [0, 1]):  # check that accuracy is composed of only zeros and ones
        raise ("'accuracy' variable must be composed of zeros and ones")

    if not accuracy.shape == evidence.shape:  # check that variables are of the same size
        raise ("'accuracy' and evidence must have the same size")

    polarity = 1  # default evidence polarity
    if (np.mean(evidence[accuracy == 0]) - np.mean(evidence[accuracy == 1])) < 0:  # the evidence is greater for incorrect than correct trials
        polarity = -1  # change evidence polarity
    evidence *= polarity  # the evidence multiplied by its polarity

    nb_std = 2
    outliers = evidence > np.mean(evidence) + nb_std * np.std(evidence)  # temporary outliers to help frame the histogram
    _, bins = np.histogram(evidence[np.logical_not(outliers)], 'fd')  # uses the Freedman-Diaconis rule for bin width
    bin_width = bins[1] - bins[0]  # bin width
    bins = np.arange(bins[0], np.ceil(np.amax(evidence) / bin_width) * bin_width, bin_width)  # complete evidence range, including outliers
    correct_evidence = evidence[accuracy == 1]  # correct response evidences
    n_correct, _ = np.histogram(correct_evidence, bins)  # correct evidences histograms
    incorrect_evidence = evidence[accuracy == 0]  # incorrect response evidences
    n_incorrect, _ = np.histogram(incorrect_evidence, bins)  # incorrect evidences histogram; this is also the false correct evidences histogram

    # calculates frequency distribution
    x = (bins[:-1] + bins[1:]) / 2  # centers of the histogram bins
    s_x = np.linspace(np.amin(x), np.amax(x), ((np.amax(x) - np.amin(x)) // .01).astype(np.int32))  # fine histogram bins for interpolation
    for ii in range(len(bins) - 1):  # replaces histogram bin centers by histogram bin averages whenever possible
        ind = np.where(np.logical_and(evidence >= bins[ii], evidence < bins[ii + 1]))[0]
        if ind.size != 0:
            x[ii] = np.mean(evidence[ind])
    f1 = interp.CubicSpline(x, n_correct)
    s_n_correct = f1(s_x)
    f2 = interp.CubicSpline(x, n_incorrect)
    s_n_incorrect = f2(s_x)
    s_n_true_correct = s_n_correct - s_n_incorrect

    # finds the best evidence criteria
    N = np.sum(s_n_correct) + np.sum(s_n_incorrect)  # number of points in all interpolated frequency distributions; general case
    I_o = np.sum(s_n_incorrect)  # number of points in interpolated n_incorrect frequency distribution
    cCR = np.cumsum(s_n_true_correct)  # cumulative interpolated true correct evidence frequency distribution
    cM = np.cumsum(s_n_incorrect)  # cumulative interpolated false correct evidence frequency distribution
    s_efficiency = (4 * I_o - N + 2 * (cCR - cM)) / N  # interpolated efficiency as a function of evidence reclassification criterion; general case
    s_ind = np.argmax(s_efficiency)
    reclass_criterion = s_x[s_ind]  # chosen evidence criterion

    # reclassifies correct responses as incorrect responses
    accuracy_reclass = accuracy  # initialize with old accuracy
    reclass_index = np.where(np.logical_and(accuracy == 1, (evidence > reclass_criterion)))[0]  # which correct response should be reclassified as an incorrect
    accuracy_reclass[reclass_index] = 0

    # some statistics
    stats = {
        'reclass_polarity': polarity,
        'reclass_criterion': polarity * reclass_criterion,
        'reclass_index': reclass_index,
        'reclass_efficiency': s_efficiency[s_ind],
        'reclass_gain': s_efficiency[s_ind] / s_efficiency[-1]
    }

    return accuracy_reclass, stats
Example #38
0
point_num = 12
th = int(area_list[-1] - area_list[0]) / point_num
temp_volume_list = [[] for i in range(point_num)]
temp_deviation_list = []
for item in area_list:
    for i in range(point_num):
        if (area_list[0] + th * i) < item and (area_list[0] + th *
                                               (i + 1)) >= item:
            temp_volume_list[i].append(item**3)
    if (area_list[0] + th * 3) < item and (area_list[0] + th * 7) >= item:
        temp_deviation_list.append(item)

sum_volume_list = []
for i in range(point_num):
    sum_volume_list.append(sum(temp_volume_list[i]))
temp_sum = sum(sum_volume_list)
for i in range(point_num):
    sum_volume_list[i] /= temp_sum
grade = sum(sum_volume_list[4:8])
standard_deviation = np.std(np.array(temp_deviation_list))
print(grade, standard_deviation)

plt.plot(sum_volume_list)
plt.ylabel("Volume Ratio %")
plt.xlabel("Relative Volume")
plt.gca().yaxis.set_major_formatter(FuncFormatter(to_percent))
plt.show()

# plt.bar(rad, y, alpha=0.5, width=0.5, label='Partical Size', lw=3)
# plt.legend()
Example #39
0
def train_PG(exp_name, env_name, n_iter, gamma, min_timesteps_per_batch,
             max_path_length, learning_rate, reward_to_go, animate, logdir,
             normalize_advantages, nn_baseline, seed, n_layers, size):

    start = time.time()

    #========================================================================================#
    # Set Up Logger
    #========================================================================================#
    setup_logger(logdir, locals())

    #========================================================================================#
    # Set Up Env
    #========================================================================================#

    # Make the gym environment
    env = gym.make(env_name)

    # Set random seeds
    tf.set_random_seed(seed)
    np.random.seed(seed)
    env.seed(seed)

    # Maximum length for episodes
    max_path_length = max_path_length or env.spec.max_episode_steps

    # Is this env continuous, or self.discrete?
    discrete = isinstance(env.action_space, gym.spaces.Discrete)

    # Observation and action sizes
    ob_dim = env.observation_space.shape[0]
    ac_dim = env.action_space.n if discrete else env.action_space.shape[0]

    #========================================================================================#
    # Initialize Agent
    #========================================================================================#
    computation_graph_args = {
        'n_layers': n_layers,
        'ob_dim': ob_dim,
        'ac_dim': ac_dim,
        'discrete': discrete,
        'size': size,
        'learning_rate': learning_rate,
    }

    sample_trajectory_args = {
        'animate': animate,
        'max_path_length': max_path_length,
        'min_timesteps_per_batch': min_timesteps_per_batch,
    }

    estimate_return_args = {
        'gamma': gamma,
        'reward_to_go': reward_to_go,
        'nn_baseline': nn_baseline,
        'normalize_advantages': normalize_advantages,
    }

    agent = Agent(computation_graph_args, sample_trajectory_args,
                  estimate_return_args)

    # build computation graph
    agent.build_computation_graph()

    # tensorflow: config, session, variable initialization
    agent.init_tf_sess()

    #========================================================================================#
    # Training Loop
    #========================================================================================#

    total_timesteps = 0
    for itr in range(n_iter):
        print("********** Iteration %i ************" % itr)
        paths, timesteps_this_batch = agent.sample_trajectories(itr, env)
        total_timesteps += timesteps_this_batch

        # Build arrays for observation, action for the policy gradient update by concatenating
        # across paths
        ob_no = np.concatenate([path["observation"] for path in paths])
        ac_na = np.concatenate([path["action"] for path in paths])
        re_n = [path["reward"] for path in paths]

        q_n, adv_n = agent.estimate_return(ob_no, re_n)
        agent.update_parameters(ob_no, ac_na, q_n, adv_n)

        # Log diagnostics
        returns = [path["reward"].sum() for path in paths]
        ep_lengths = [pathlength(path) for path in paths]
        logz.log_tabular("Time", time.time() - start)
        logz.log_tabular("Iteration", itr)
        logz.log_tabular("AverageReturn", np.mean(returns))
        logz.log_tabular("StdReturn", np.std(returns))
        logz.log_tabular("MaxReturn", np.max(returns))
        logz.log_tabular("MinReturn", np.min(returns))
        logz.log_tabular("EpLenMean", np.mean(ep_lengths))
        logz.log_tabular("EpLenStd", np.std(ep_lengths))
        logz.log_tabular("TimestepsThisBatch", timesteps_this_batch)
        logz.log_tabular("TimestepsSoFar", total_timesteps)
        logz.dump_tabular()
        logz.pickle_tf_vars()
Example #40
0
    if args.query_url is not 'none':
        print('Query url: ' + args.query_url)
        if args.query_url in url_dict and url_dict[args.query_url] in res:
            print('page rank =' + str(round(res[url_dict[args.query_url]], 7)))
        else:
            print 'page rank = 0'

    print 'pagerank stats: '
    max_hub = 0
    min_hub = 1
    median_hub = 0
    std_hub = 0
    i = 0
    hubs = np.ones(shape=(500))

    for tup in sorted_pr[:500]:
        if tup[1] > max_hub:
            max_hub = tup[1]
        if tup[1] < min_hub:
            min_hub = tup[1]
        if i == 249 or i == 250:
            median_hub += tup[1]
        hubs[i] = tup[1]
        i += 1

    print('max = ' + str(max_hub))
    print('min = ' + str(min_hub))
    print('median = ' + str(median_hub / 2))
    print('std = ' + str(np.std(hubs)))
Example #41
0
        # plt.legend(loc='upper right')
        plt.xlim(min(vals), max(vals))
        plt.savefig('output/dist.png')
        plt.show()
        plt.close()

    if 'walkers' in args.plot:
        file = 'output/dist.dat'

        vals = []
        with open(file) as f:
            for line in f:
                vals.append(float(line))
        steps = np.arange(0, len(vals))
        avg = round(np.mean(vals), 3)
        std = round(np.std(vals) / math.sqrt(len(vals)), 3)
        avg_array = np.array([avg for i in vals])

        plt.figure(figsize=[16, 5])
        plt.step(steps,
                 vals,
                 color='k',
                 alpha=.7,
                 linewidth=1,
                 label=r'$T=100$')
        plt.plot(steps,
                 avg_array,
                 color='r',
                 label=r'$<E>=%s \pm %s$' % (str(avg), str(std)))
        plt.fill_between(steps,
                         avg_array - std,
Example #42
0
			peak1Array.append(third_list[i][1] * 10**3)
		elif third_list[i][0] == max_index:
			peak3Array.append(third_list[i][1] * 10**3)
		else:
			peak2Array.append(third_list[i][1] * 10**3)

# Take the mean values
peak1_mean = np.mean(peak1Array)
peak2_mean = np.mean(peak2Array)
peak3_mean = np.mean(peak3Array)
print("peak1: " + str(peak1_mean))
print("peak2: " + str(peak2_mean))
print("peak3: " + str(peak3_mean))

# Take RMS deviation
std1 = np.std(peak1Array)/math.sqrt(len(peak1Array))
std2 = np.std(peak2Array)/math.sqrt(len(peak2Array))
std3 = np.std(peak3Array)/math.sqrt(len(peak3Array))
print("std1: " + str(std1))
print("std2: " + str(std2))
print("std3: " + str(std3))
f_open.close()

# Create root file
rootFile = '%speaks_run_%s.root' % (RootFilePath, run)
f_root = TFile(rootFile, "RECREATE")
treeName = 'data'
tree = TTree(treeName, treeName)
run_array = np.zeros(1, dtype=np.dtype('u4'))
timestamp_array = np.zeros(1, dtype=np.float32)
mean1_array = np.zeros(1, dtype=np.float32)
def sort_by_target(mnist):
    reorder_train = np.array(sorted([(target, i) for i, target in enumerate(mnist.target[:60000])]))[:, 1]
    reorder_test = np.array(sorted([(target, i) for i, target in enumerate(mnist.target[60000:])]))[:, 1]
    mnist.data[:60000] = mnist.data[reorder_train]
    mnist.target[:60000] = mnist.target[reorder_train]
    mnist.data[60000:] = mnist.data[reorder_test + 60000]
    mnist.target[60000:] = mnist.target[reorder_test + 60000]

# Get MNIST data, normalize, and divide by level
# mnist = fetch_openml('MNIST original', data_home='./data')
mnist = fetch_openml('mnist_784', version=1, cache=True)
mnist.target = mnist.target.astype(np.int8)  # fetch_openml() returns targets as strings
sort_by_target(mnist)  # fetch_openml() returns an unsorted dataset

mu = np.mean(mnist.data.astype(np.float32), 0)
sigma = np.std(mnist.data.astype(np.float32), 0)
mnist.data = (mnist.data.astype(np.float32) - mu)/(sigma+0.001)
mnist_data = []
for i in trange(10):
    idx = mnist.target==i
    mnist_data.append(mnist.data[idx])

print([len(v) for v in mnist_data])

###### CREATE USER DATA SPLIT #######
# Assign 10 samples to each user
X = [[] for _ in range(1000)]
y = [[] for _ in range(1000)]
idx = np.zeros(10, dtype=np.int64)
for user in range(1000):
    for j in range(2):
Example #44
0
    def update_parameters(self, ob_no, ac_na, q_n, adv_n):
        """
            Update the parameters of the policy and (possibly) the neural network baseline,
            which is trained to approximate the value function.

            arguments:
                ob_no: shape: (sum_of_path_lengths, ob_dim)
                ac_na: shape: (sum_of_path_lengths).
                q_n: shape: (sum_of_path_lengths). A single vector for the estimated q values
                    whose length is the sum of the lengths of the paths
                adv_n: shape: (sum_of_path_lengths). A single vector for the estimated
                    advantages whose length is the sum of the lengths of the paths

            returns:
                nothing

        """
        #====================================================================================#
        #                           ----------PROBLEM 6----------
        # Optimizing Neural Network Baseline
        #====================================================================================#
        if self.nn_baseline:
            # If a neural network baseline is used, set up the targets and the inputs for the
            # baseline.
            #
            # Fit it to the current batch in order to use for the next iteration. Use the
            # baseline_update_op you defined earlier.
            #
            # Hint #bl2: Instead of trying to target raw Q-values directly, rescale the
            # targets to have mean zero and std=1. (Goes with Hint #bl1 in
            # Agent.compute_advantage.)

            # YOUR_CODE_HERE
            # raise NotImplementedError
            target_n = (q_n - np.mean(q_n)) / np.std(q_n)
            self.sess.run(self.baseline_update_op,
                          feed_dict={
                              self.sy_ob_no: ob_no,
                              self.sy_target_n: target_n
                          })

        #====================================================================================#
        #                           ----------PROBLEM 3----------
        # Performing the Policy Update
        #====================================================================================#

        # Call the update operation necessary to perform the policy gradient update based on
        # the current batch of rollouts.
        #
        # For debug purposes, you may wish to save the value of the loss function before
        # and after an update, and then log them below.


#
# YOUR_CODE_HERE
        self.sess.run(self.update_op,
                      feed_dict={
                          self.sy_ob_no: ob_no,
                          self.sy_ac_na: ac_na,
                          self.sy_adv_n: adv_n
                      })
Example #45
0
    'data/glass_data_7.txt': 0.2
}

for test in tests:
    data_instances = []
    data_file = open(test)
    print("Running with %s" % test)
    for line in data_file:
        # Digest read data
        line_split = line.split(',')
        data_instances.append(map(float, line_split))
    data_instances = np.array(data_instances)
    # Normalize continuous attributes
    if 'iris' in test:
        for column in data_instances.T:
            column = (column - np.mean(column)) / (2.0 * np.std(column))
    # Shuffle data instances
    np.random.shuffle(data_instances)

    data_indices = [idx for idx in range(data_instances.shape[0])]
    # 10-fold cross validation
    fold_size = (data_instances.shape[0]) / 10
    total_performance = 0.0
    for holdout_fold_idx in range(10):
        print("Cross validation fold %d" % (holdout_fold_idx + 1))
        # training_indices = data_indices - holdout_fold indices
        training_indices = np.array(
            np.setdiff1d(
                data_indices,
                data_indices[fold_size * holdout_fold_idx : \
                             fold_size * holdout_fold_idx + fold_size]))
Example #46
0
def train_and_predict():
    print('-'*30)
    print('Loading and preprocessing train data...')
    print('-'*30)
    imgs_train, imgs_mask_train = load_train_data()

    imgs_train = preprocess(imgs_train)
    imgs_mask_train = preprocess(imgs_mask_train)

    imgs_train = imgs_train.astype('float32')
    mean = np.mean(imgs_train)  # mean for data centering
    std = np.std(imgs_train)  # std for data normalization

    imgs_train -= mean
    imgs_train /= std

    imgs_mask_train = imgs_mask_train.astype('float32')
    imgs_mask_train /= 255.  # scale masks to [0, 1]

    print('-'*30)
    print('Creating and compiling model...')
    print('-'*30)
    model = get_unet()
    model_checkpoint = ModelCheckpoint('unet.hdf5', monitor='loss', save_best_only=True)

    print('-'*30)
    print('Fitting model...')
    print('-'*30)
    model.fit(imgs_train, imgs_mask_train, batch_size=32, nb_epoch=20, verbose=1, shuffle=True,
              callbacks=[model_checkpoint])

    print('-'*30)
    print('Loading and preprocessing test data...')
    print('-'*30)
    imgs_test, imgs_mask_test_truth = load_test_data()
    imgs_test = preprocess(imgs_test)

    imgs_test = imgs_test.astype('float32')
    imgs_test -= mean
    imgs_test /= std

    print('-'*30)
    print('Loading saved weights...')
    print('-'*30)
    model.load_weights('unet.hdf5')

    print('-'*30)
    print('Predicting masks on test data...')
    print('-'*30)
    imgs_mask_test_result = model.predict(imgs_test, verbose=1)
    
    imgs_mask_test_result = postprocess(imgs_mask_test_result)
    #test results is converted to 0-255 due to resizing
    print(imgs_mask_test_result.max())

    imgs_mask_test_result = imgs_mask_test_result.astype('float32')
    imgs_mask_test_result /= 255
    print(imgs_mask_test_truth.shape)



    imgs_mask_test_truth = imgs_mask_test_truth.astype('float32')
    imgs_mask_test_truth /= 255

    test_truth = imgs_mask_test_truth.flatten()
    test_result = imgs_mask_test_result.flatten()

    print(test_result.shape)
    print(test_truth.shape)
    intersect = test_result * test_truth
    dice_score = (2. * intersect.sum()) / (test_truth.sum() + test_result.sum())
    print('Dice coefficient on testing data is : {0:.3f}.'.format(dice_score))
Example #47
0
def evaluate_recon(model, dataloader, n_samples, cuda):

    # Models in Eval mode
    model.eval()

    # Setup Average Meters
    # Single Modality CMI meters
    single_image_meter = utils.AverageMeter()
    single_trajectory_meter = utils.AverageMeter()
    single_sound_meter = utils.AverageMeter()

    # Double Modality CMI meters
    double_image_sound_meter = utils.AverageMeter()
    double_image_trajectory_meter = utils.AverageMeter()
    double_sound_trajectory_meter = utils.AverageMeter()

    # Triple Modality CMI meters
    all_mods_meter = utils.AverageMeter()

    # Main Evaluation Loop
    with torch.no_grad():

        for batch_idx, data in enumerate(tqdm(dataloader)):

            # Original data
            img_data = data[1]
            trj_data = data[2]
            snd_data = data[3]
            sym_data = torch.nn.functional.one_hot(data[0],
                                                   num_classes=10).float()
            labels = data[0]

            # To generate multiple samples
            labels = labels.repeat_interleave(repeats=n_samples, dim=0)
            img_data = img_data.repeat_interleave(repeats=n_samples, dim=0)
            trj_data = trj_data.repeat_interleave(repeats=n_samples, dim=0)
            snd_data = snd_data.repeat_interleave(repeats=n_samples, dim=0)
            sym_data = sym_data.repeat_interleave(repeats=n_samples, dim=0)

            if cuda:
                labels = labels.cuda()
                img_data = img_data.cuda()
                trj_data = trj_data.cuda()
                snd_data = snd_data.cuda()
                sym_data = sym_data.cuda()

            # Single Modality CMI
            # From image
            _, _, _, cm_sym = model.generate(x_img=img_data)
            sym_acc = compute_accuracy(samples=cm_sym[1],
                                       target=labels,
                                       classifier=None)
            single_image_meter.update(sym_acc.item())

            # From sound
            _, _, _, cm_sym = model.generate(x_snd=snd_data)
            snd_acc = compute_accuracy(samples=cm_sym[1],
                                       target=labels,
                                       classifier=None)
            single_sound_meter.update(snd_acc.item())

            # From trajectory
            _, _, _, cm_sym = model.generate(x_trj=trj_data)
            trj_acc = compute_accuracy(samples=cm_sym[1],
                                       target=labels,
                                       classifier=None)
            single_trajectory_meter.update(trj_acc.item())

            # Double mod CMI encoding
            # From image and sound
            _, _, _, cm_sym = model.generate(x_snd=snd_data, x_img=img_data)
            img_snd_acc = compute_accuracy(samples=cm_sym[1],
                                           target=labels,
                                           classifier=None)
            double_image_sound_meter.update(img_snd_acc.item())

            # From image and trajectory
            _, _, _, cm_sym = model.generate(x_trj=trj_data, x_img=img_data)
            img_trj_acc = compute_accuracy(samples=cm_sym[1],
                                           target=labels,
                                           classifier=None)
            double_image_trajectory_meter.update(img_trj_acc.item())

            # From sound and trajectory
            _, _, _, cm_sym = model.generate(x_snd=snd_data, x_trj=trj_data)
            snd_trj_acc = compute_accuracy(samples=cm_sym[1],
                                           target=labels,
                                           classifier=None)
            double_sound_trajectory_meter.update(snd_trj_acc.item())

            # Triple mod CMI encoding
            _, _, _, cm_sym = model.generate(x_img=img_data,
                                             x_snd=snd_data,
                                             x_trj=trj_data)
            all_mods_acc = compute_accuracy(samples=cm_sym[1],
                                            target=labels,
                                            classifier=None)
            all_mods_meter.update(all_mods_acc.item())

        # Compile Results
        sym_acc_scores_dic = {
            'single_image': single_image_meter.avg,
            'single_sound': single_sound_meter.avg,
            'single_trajectory': single_trajectory_meter.avg,
            'double_image_sound': double_image_sound_meter.avg,
            'double_image_trajectory': double_image_trajectory_meter.avg,
            'double_sound_trajectory': double_sound_trajectory_meter.avg,
            'all_mods': all_mods_meter.avg
        }

        single_sym_acc_results = [
            sym_acc_scores_dic['single_image'],
            sym_acc_scores_dic['single_sound'],
            sym_acc_scores_dic['single_trajectory']
        ]

        print("\n Symbol Accuracy:")
        print("   * Single Modality = " +
              str(np.mean(single_sym_acc_results)) + " +-" +
              str(np.std(single_sym_acc_results)))
        print("   * All Modalities = " + str(sym_acc_scores_dic['all_mods']))
        print("\n")

        return sym_acc_scores_dic
data.sample(20)

X = data["text"]
y = data["sentiment"]

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=123)
num_words = 10000
tokenizer = Tokenizer(num_words=num_words)
tokenizer.fit_on_texts(x_train)
x_train_tokens = tokenizer.texts_to_sequences(x_train)
x_test_tokens = tokenizer.texts_to_sequences(x_test)

num_tokens = [len(tokens) for tokens in x_train_tokens + x_test_tokens]
num_tokens = np.array(num_tokens)

max_tokens = np.mean(num_tokens) + 2 * np.std(num_tokens)
max_tokens = int(max_tokens)

pad = 'pre'
x_train_pad = pad_sequences(x_train_tokens, maxlen=max_tokens,
                            padding=pad, truncating=pad)
x_test_pad = pad_sequences(x_test_tokens, maxlen=max_tokens,
                           padding=pad, truncating=pad)

model = Sequential()

embedding_size = 8
model.add(Embedding(input_dim=num_words,
                    output_dim=embedding_size,
                    input_length=max_tokens,
                    name='layer_embedding'))
Example #49
0
 def df_stdevp(sub_df):
   """Get the population standard deviation"""
   return np.std(sub_df)
Example #50
0
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 16 11:27:36 2020

@author: merel
"""
import numpy as np

a = np.array([4.36,3.75,4.10,4.86,4.45,4.45,4.28,3.97,4.30,4.09,\
              3.74,3.79,3.91,3.60,4.51,4.59,4.27,3.74,4.30,4.12,\
              3.81,4.44,4.36,4.44,3.90,4.29,4.35,4.16,4.63,3.92,\
              3.90,4.28,4.42,4.54,3.68,4.43,3.84,4.06,4.20,4.01,4.23])
gemiddelde = a.mean()

standaarddeviatie = np.std(a,ddof=1)
print(gemiddelde, standaarddeviatie) 
Example #51
0
import numpy as np
import matplotlib.pyplot as plt

x = np.arange(-4, 4, 0.01)
def f(x):
    return (np.e) ** (-(x+0.00788786) ** 2 / (2 * 0.9933789 ** 2)) / (2 * np.pi * 0.9933789 ** 2) ** 0.5

def g(x):
    return (np.e) ** (-x ** 2 / (2 * 1 ** 2)) / (2 * np.pi * 1 ** 2) ** 0.5

data1 = np.loadtxt('W_4.txt',delimiter=",")
data = data1[:, 0]
mean = np.mean(data)
std = np.std(data)
print(mean, std)

plt.plot(x, f(x), label='BNN')
plt.plot(x, g(x), label='True')
plt.hist(data, bins=100, density=True, histtype='stepfilled', label='NN')
plt.legend(loc='upper left')

plt.title('w1')
plt.show()







import os
import matplotlib.pyplot as plt

folder = r'C:\Users\magnu\OneDrive\Dokument\KTH\2020HT\DD2412_-_Deep_Learning_Advanced_Course\Project\Results\resnet'

files = next(os.walk(folder))[2]

for filename in files:
    if os.path.splitext(filename)[0][-3:] == 'iou':
        filepath = os.path.join(folder,filename)
        print(filename)

        f = open(filepath,'r')

        null_ious = 0
        ious = []

        lines = f.readlines()

        for line in lines:
            iou = float(line)

            if iou > 0:
                ious.append(iou)
            else:
                null_ious += 1

        
        print(np.mean(ious))
        print(np.std(ious))
def normalize(X, m, s):
    """normalizes (standardizes) a matrix"""
    return (X - np.mean(X, 0)) / np.std(X, 0)
Example #54
0
    z0 = [-1.193, -3.876]
    t_grid_train = np.linspace(0, 20, data_size)
    t_grid_true = np.linspace(0, 40, data_size_true)

    y_train = odeint(Damped_pendulum, z0, t_grid_train, args=(alpha, beta))
    idx = np.random.choice(np.arange(data_size - batch_time - 1,
                                     dtype=np.int64),
                           batch_size,
                           replace=False)
    y_train = y_train[idx]
    t_grid_train = t_grid_train[idx]

    y_true = odeint(Damped_pendulum, z0, t_grid_true, args=(alpha, beta))

    sigma_normal1 = np.std(y_train[:, 0:1])
    sigma_normal2 = np.std(y_train[:, 1:2])

    sigma_normal = np.asarray([sigma_normal1, sigma_normal2])

    parameters = np.load("parameters.npy")
    precision = np.load("loggammalist.npy")
    loglikelihood = np.load("loglikelihood.npy")
    precision = np.exp(precision)
    print("precision", precision)
    print(parameters.shape)
    loglikelihood = loglikelihood[-N_total:]
    num_samples = parameters.shape[0]
    length_dict = parameters.shape[1]
    num_dim = 2
plt.plot(xvals, yvals, label="KDE")
plt.axvline(mode, label="Mode", c='r')
plt.legend()
plt.show()

plt.hist(data, bins=100, label="Data", alpha=0.5)
plt.axvline(data.mean(), label="Mean", ls="--", c='#f9ee4a')
plt.axvline(np.median(data), label="Median", ls="-", c='#44d9ff')
plt.axvline(mode, label="Mode", ls=":", c='#f95b4a')
plt.legend()
plt.show()
"""
Approximate with gauss dist.
"""
xs = np.linspace(data.min(), data.max(), 100)
ys = st.norm.pdf(xs, loc=np.mean(data), scale=np.std(data))

plt.hist(data, bins=50, density=True, histtype="step", label="Data")
plt.plot(xs, ys, label="Normal approximation")
plt.legend()
plt.ylabel("Probability")
plt.show()
"""
Approximate with gauss dist + skew.
"""
xs = np.linspace(data.min(), data.max(), 100)
ys1 = st.norm.pdf(xs, loc=np.mean(data), scale=np.std(data))
ys2 = st.skewnorm.pdf(xs, st.skew(data), loc=np.mean(data), scale=np.std(data))

plt.hist(data, bins=50, density=True, histtype="step", label="Data")
plt.plot(xs, ys1, label="Normal approximation")
np.min(a)
np.min(a, 1)
np.max(a, 1)

np.ptp(a)  # 差值
##Range of values (maximum - minimum) along an axis.

np.percentile(a, 50)  # 百分位数
np.percentile(a, 25)

np.median(a)

np.mean(a, 1)
np.mean(a, 0)

np.std(a, 0)  ##标准差
np.var(a)  ##方差

#排序相关功能
#获取非零元素
np.nonzero(a)  #非零元素的下标
# =============================================================================
# (array([0, 0, 1, 1, 1, 2, 2, 2], dtype=int64),
#  array([1, 2, 0, 1, 2, 0, 1, 2], dtype=int64))
# (0,1),(0,2,)...
# =============================================================================
a = np.ones((3, 3))
a[[0, 2, 1, 1, 0], [0, 2, 0, 1, 2]] = 0
np.nonzero(a)
#等同于
a[np.where(a != 0)]
def _get_relevant_channels_over_median_peaks(threshold, template):
    median = np.median(np.nanmin(template, axis=0))
    std = np.std(np.nanmin(template, axis=0))
    points_under_median = np.argwhere(template < (median - threshold * std))
    channels_over_threshold = np.unique(points_under_median[:, 1])
    return channels_over_threshold
Example #58
0
                    item_mask = itens[5:]
                    resultados = resultados[5:]

            resultado[j, item_mask] = resultados

            participante_acertos[j] = sum(resultados)
            j = j + 1

    print(' Fim do preenchimento. ')

    #--------------------------------------------------------------------------
    #  Inicializar o parametro theta para todos os participantes
    #--------------------------------------------------------------------------

    participante_theta = (participante_acertos - np.mean(participante_acertos)
                          ) / np.std(participante_acertos)

    #--------------------------------------------------------------------------
    #  INICIALIZACAO DOS MODELOS DE REGRESSSAO LOGISTICA
    #--------------------------------------------------------------------------

    # modelo para ajuste dos parametros dos itens (a e b)

    lr_ab = LogisticRegression(
        penalty=REGRESSAO_LOGISTICA_AJUSTE_AB_PENALIDADE,
        C=REGRESSAO_LOGISTICA_AJUSTE_AB_REGULARIZACAO,
        fit_intercept=True)

    # modelo para ajuste da habilidade dos participantes (theta)

    lr_theta = LogisticRegression(
Example #59
0
        'batch_size': 100,
        'learn_rate': 1e-3,
        'max_epochs': 1500,
        'early_stop': 5,
        'check_freq': 5,
    }
     
    for argv in sys.argv:
        if('--' == argv[:2] and '=' in argv):
            eq_ind = argv.index('=')
            setting_feature = argv[2:eq_ind]
            setting_value = argv[eq_ind+1:]
            if(setting_feature in ['save', 'plot']):
                training_settings[setting_feature] = (setting_value=='True')
            if(setting_feature == 'model'):
                model_names = [setting_value]
    
    print(training_settings)

    eval_rmses, eval_lls = run_experiment(
        model_names, 'KEGG', dataset, **training_settings)
    print(eval_rmses, eval_lls)
    
    for model_name in model_names:
        rmse_mu = np.mean(eval_rmses[model_name])
        rmse_std = np.std(eval_rmses[model_name])
        ll_mu = np.mean(eval_lls[model_name])
        ll_std = np.std(eval_lls[model_name])
        print('>>> '+model_name)
        print('>> RMSE = {:.4f} \pm {:.4f}'.format(rmse_mu, 1.96*rmse_std))
        print('>> NLPD = {:.4f} \pm {:.4f}'.format(ll_mu, 1.96*ll_std))
    def AnalyzeScaleVariation(filenames):
        import matplotlib.pyplot as pyplot
        import mpl_utils
        import krebs.quantities as Q
        import collections

        data = []
        for fn in filenames:
            with h5py.File(fn, 'r+') as f:
                sample = ObtainDataOfVesselFile(f)
                del sample['message']
                data.append(sample)

        byScale = collections.defaultdict(list)
        for d in data:
            scale = d['scale']
            byScale[scale].append(d)
        for k, v in byScale.items():
            byScale[k] = myutils.zipListOfDicts(v)

        curves = collections.defaultdict(list)
        for k, v in byScale.items():
            res = ComputeSingleNumberAvgStd(v)
            for name, (std, avg) in res.items():
                curves[name].append((std, avg))
        order = np.argsort(np.asarray(curves['scale'])[:, 0])
        for k, v in curves.items():
            curves[k] = np.asarray(v).transpose()[:, order]

        scales = {
            'mvd': (Q.um**-2).asNumber(Q.mm**-2),
            'rbv': 100.,
            'rbf': 60.,
        }

        with mpl_utils.PageWriter('vessel-calibration-analysis',
                                  fileformats=['pdf']) as pdfwriter:
            fig, axes = pyplot.subplots(3,
                                        1,
                                        figsize=mpl_utils.a4size *
                                        np.asarray([0.4, 0.5]))
            for scale, data in byScale.items():
                bins = np.average(
                    data['bins'], axis=0
                )  # sanity check, all bins arrays have equal size, so just try to average the bin boundaries, even if it makes no real sense
                x = bins
                x_rbv = 0.5 * (x[1:] + x[:-1])  # bin center for rBV
                # plot things
                ax = axes[0]
                ya = data['mvd']
                ax.errorbar(x,
                            scales['mvd'] * np.average(ya, axis=0),
                            yerr=scales['mvd'] * np.std(ya, axis=0),
                            label=('h = %0.f' % scale))
                legend = ax.legend(loc=4, fontsize='xx-small')
                ax = axes[1]
                scale = scales['rbv']
                ya = data['rbv']
                ax.errorbar(x_rbv,
                            scale * np.average(ya, axis=0),
                            yerr=scale * np.std(ya, axis=0))
                ax = axes[2]
                ya = data['rbf']
                scale = scales['rbf']
                ax.errorbar(x,
                            scale * np.average(ya, axis=0),
                            yerr=scale * np.std(ya, axis=0))
                axes[0].set(ylabel='mvd [$mm^{-1}$]')
                axes[1].set(ylabel='rbv [$\%$]')
                axes[2].set(ylabel='rbf [$min^{-1}$]', xlabel='$|x| [\mu m]$')
            pyplot.tight_layout()
            pyplot.legend()
            pdfwriter.savefig(fig)

            fig, axes = pyplot.subplots(3,
                                        1,
                                        figsize=mpl_utils.a4size *
                                        np.asarray([0.4, 0.5]))

            for ax in axes:
                ax.grid(linestyle=':', linewidth=0.5, color='#aaaaaa')

            x = curves['scale'][0, :]
            ax = axes[0]
            y, yerr = scales['mvd'] * curves['mvd']
            ax.errorbar(x, y, yerr=yerr)
            ax = axes[1]
            y, yerr = scales['rbv'] * curves['rbv']
            ax.errorbar(x, y, yerr=yerr)
            ax = axes[2]
            y, yerr = scales['rbf'] * curves['rbf']
            ax.errorbar(x, y, yerr=yerr)

            axes[0].set(ylabel='mvd [$mm^{-1}$]')
            axes[1].set(ylabel='rbv [$\%$]')
            axes[2].set(ylabel='rbf [$min^{-1}$]', xlabel='$h [\mu m]$')

            pyplot.tight_layout()
            pdfwriter.savefig(fig)