def get_bar_data(xx, delta, Tmin, Tmax, step):
    T_seg = []
    mean_seg = []
    std_seg = []
    sampleNums = []
    for i in np.arange(Tmin, Tmax, step):
        idx = np.where(np.logical_and(xx >= i, xx < (i + step)))[0]

        if idx.size > 0:
            DTb_block = delta[idx]
        else:
            continue

        mean1 = mean(DTb_block)
        std1 = std(DTb_block)

        idx1 = np.where((abs(DTb_block - mean1) < std1))[0]  # 去掉偏差大于std的点
        if idx1.size > 0:
            DTb_block = DTb_block[idx1]
            mean_seg.append(mean(DTb_block))
            std_seg.append(std(DTb_block))
            sampleNums.append(len(DTb_block))
        else:
            mean_seg.append(0)
            std_seg.append(0)
            sampleNums.append(0)
        T_seg.append(i + step / 2.)

    return np.array(T_seg), np.array(mean_seg), np.array(std_seg), np.array(sampleNums)
Beispiel #2
0
def G_reg1d(xx, yy, ww=None):
    """
    description needed
    ww: weights
    """
    rtn = []
    ab = polyfit(xx, yy, 1, w=ww)
    rtn.append(ab[0])
    rtn.append(ab[1])
    rtn.append(std(yy) / std(xx))
    rtn.append(mean(yy) - rtn[2] * mean(xx))
    r = corrcoef(xx, yy)
    rr = r[0, 1] * r[0, 1]
    rtn.append(rr)
    return rtn
def G_reg1d(xx, yy, ww=None):
    '''
    计算斜率和截距
    ww: weights
    '''
    rtn = []
    ab = polyfit(xx, yy, 1, w=ww)
    rtn.append(ab[0])
    rtn.append(ab[1])
    rtn.append(std(yy) / std(xx))
    rtn.append(mean(yy) - rtn[2] * mean(xx))
    r = corrcoef(xx, yy)
    rr = r[0, 1] * r[0, 1]
    rtn.append(rr)
    return rtn
    def log_det_estimate_shogun(Q):
        logging.debug("Entering")
        op = RealSparseMatrixOperator(csc_matrix(Q))
        engine = SerialComputationEngine()
        linear_solver = CGMShiftedFamilySolver()
        accuracy = 1e-3
        eigen_solver = LanczosEigenSolver(op)
        eigen_solver.set_min_eigenvalue(OzonePosterior.ridge)
        op_func = LogRationalApproximationCGM(op, engine, eigen_solver, linear_solver, accuracy)

        # limit computation time
        linear_solver.set_iteration_limit(1000)
        eigen_solver.set_max_iteration_limit(1000)
        
        logging.info("Computing Eigenvalues (only largest)")
        eigen_solver.compute()
        
        trace_sampler = ProbingSampler(op)
        log_det_estimator = LogDetEstimator(trace_sampler, op_func, engine)
        n_estimates = 1
        logging.info("Sampling log-determinant with probing vectors and rational approximation")
        estimates = log_det_estimator.sample(n_estimates)
        
        logging.debug("Leaving")
        return mean(estimates)
Beispiel #5
0
 def _rescaleInput(self):
   '''If scaled == True : sets self._dataRange equal to a 1-dim numpy.array containing the ranges in each of m 
   feature space dimensions, and divides self.Xi through by these ranges (leaving the data in a 1*1*1 cube); 
   if the bandwidth, h, is not set, this defaults to 0.1 (i.e. 10% of the range). If scaled == False and h is not set, 
   h defaults to 10% of the range in each feature space dimension. In both the scaled and not scaled cases,
   if the segment length, t0, is not set this defaults to mean(h).
   '''
   scaled = self._lpcParameters['scaled']
   h = self._lpcParameters['h']
   if scaled: 
     data_range = numpy.max(self.Xi, axis = 0) - numpy.min(self.Xi, axis = 0) #calculate ranges of each dimension
     if any(data_range == 0):
       raise ValueError, 'Data cannot be scale because the range in at least 1 direction is zero (i.e. data lies wholly in plane x/y/z = c)'
     self._dataRange = data_range
     self.Xi = self.Xi / self._dataRange
     if h is None:
       self.resetScaleParameters(0.1, self._lpcParameters['t0'])  
   else:
     if h is None:
       self._dataRange = numpy.max(self.Xi, axis = 0) - numpy.min(self.Xi, axis = 0) #calculate ranges of each dimension
       h = list(0.1 * self._dataRange)
       self.resetScaleParameters(h, self._lpcParameters['t0'])
   #make sure that t0 is set 
   if self._lpcParameters['t0'] is None:
     self._lpcParameters['t0'] = mean(h)  
Beispiel #6
0
 def __init__(self, **params):
   '''
   Parameters
   ----------
   
   ms_h : float, sets the bandwidth of the mean shift algorithm, defaults to None, whereby the algorithm automatically
   determines the bandwidth
   
   automatic_ms_h : bool, if True forces the algorithm to determine its own bandwidth, overrides any ms_h setting
   
   ms_sub : float, sets the percentage (0 < ms_sub <= 100) of the data points supplied to self.__call__ that are used to compute the ms seed
   points
   
   rho_threshold : float, ratio of 2nd largest to largest cluster eigenvalues, above which cluster centers are
   removed from the output
   ''' 
   super(lpcMeanShift, self).__init__()
   self._lpcParameters = { 'ms_h': None,
                           'automatic_ms_h': False, 
                           'ms_sub': 30,
                           'rho_threshold': 0.2
                         }
   self._prm_list = [self._lpcParameters] 
   self.user_prm = None #extension of parameter set disallowed
   self._type_check.update({ 'ms_h': lambda x: (x is None) or lpcMeanShift._positivityCheck(x) or (isinstance(x, list) and all(map(lpcMeanShift._positivityCheck, x)) ) ,
                             'automatic_ms_h': (bool,), 
                             'ms_sub': lambda x: lpcMeanShift._positivityCheck and x <= 100,
                             'rho_threshold': lambda x: lpcMeanShift._positivityCheck and x < 1
                           })
   self.set(**params)
   if self._lpcParameters['automatic_ms_h'] or self._lpcParameters['ms_h'] is None :
     self._meanShift = MeanShift()
   else:
     self._meanShift = MeanShift(bandwidth = mean(self._lpcParameters['ms_h']))
Beispiel #7
0
 def evaluateKernel(self, kernelArgs):
     cvg = crossValidationGen(self._totNumFolds, self._data, self._labels)
             
     errorList = []
     
     try:
         iter(kernelArgs)
     except TypeError:
         kernelArgs = kernelArgs.tolist()
     try:
         iter(kernelArgs)
     except TypeError:
         kernelArgs = [kernelArgs]
     
     for foldTrainData, foldTrainLabels, foldTestData, foldTestLabels in cvg:
         foldFileIdentifier = '-'.join((self._fileIdentifier, str(cvg.get_cur_fold()), str(cvg.get_num_folds())))
         #get the test and train kernels
         (trainKernel, testKernel) = createAndSaveTestAndTrainKernels(foldTrainData, foldTestData, self._datasetDir, self._kernelFunct, foldFileIdentifier, *kernelArgs)
         #trainKernel = createKernel(foldTrainData, foldTrainData, self._kernelFunct, *kernelArgs)
         #testKernel = createKernel(foldTestData, foldTrainData, self._kernelFunct, *kernelArgs)
         #train the model
         #model = trainSVM(trainKernel, foldTrainLabels)
         foldModelFileLoc = getModelFileLoc(getKernelDir(getKernelFileLoc(self._datasetDir, self._kernelFunct.__name__, foldFileIdentifier, True, kernelArgs)), 'svm', foldFileIdentifier)
         model = trainSVMAndSave(foldModelFileLoc, trainKernel, foldTrainLabels)
         #predict using the model
         predictedLabels = predictSVMAndSave(model, testKernel, getPredictFileLoc(getModelDir(foldModelFileLoc), foldFileIdentifier))
         #evaluate the results
         foldAccuracy = evaluations(foldTestLabels, predictedLabels)[0]
         foldError = 100 - foldAccuracy
         errorList.append(foldError)
     meanError = mean(errorList)
     print kernelArgs, meanError
     return meanError
Beispiel #8
0
def calcN(classKernels, trainLabels):
    N = zeros((len(trainLabels), len(trainLabels)))
    for i, l in enumerate(unique(trainLabels)):
        numExamplesWithLabel = len(where(trainLabels == l)[0])
        Idiff = identity(numExamplesWithLabel, Float64) - (1.0 / numExamplesWithLabel) * ones(numExamplesWithLabel, Float64)
        firstDot = dot(classKernels[i], Idiff)
        labelTerm = dot(firstDot, transpose(classKernels[i]))
        N += labelTerm
    N = nan_to_num(N)
    #make N more numerically stable
    #if I had more time, I would train this parameter, but I don't
    additionToN = ((mean(diag(N)) + 1) / 100.0) * identity(N.shape[0], Float64) 
    N += additionToN
            
    #make sure N is invertable
    for i in range(1000):
        try:
            inv(N)
        except LinAlgError:
            #doing this to make sure the maxtrix is invertable
            #large value supported by section titled
            #"numerical issues and regularization" in the paper
            N += additionToN

    return N
    def log_det_estimate_shogun(Q):
        logging.debug("Entering")
        op = RealSparseMatrixOperator(csc_matrix(Q))
        engine = SerialComputationEngine()
        linear_solver = CGMShiftedFamilySolver()
        accuracy = 1e-3
        eigen_solver = LanczosEigenSolver(op)
        eigen_solver.set_min_eigenvalue(OzonePosterior.ridge)
        op_func = LogRationalApproximationCGM(op, engine, eigen_solver,
                                              linear_solver, accuracy)

        # limit computation time
        linear_solver.set_iteration_limit(1000)
        eigen_solver.set_max_iteration_limit(1000)

        logging.info("Computing Eigenvalues (only largest)")
        eigen_solver.compute()

        trace_sampler = ProbingSampler(op)
        log_det_estimator = LogDetEstimator(trace_sampler, op_func, engine)
        n_estimates = 1
        logging.info(
            "Sampling log-determinant with probing vectors and rational approximation"
        )
        estimates = log_det_estimator.sample(n_estimates)

        logging.debug("Leaving")
        return mean(estimates)
 def log_likelihood(self, tau, kappa):
     logging.debug("Entering")
     logging.info("Computing %d likelihood estimates" % self.num_estimates)
     estimates = self.precompute_likelihood_estimates(tau, kappa)
     result = mean(estimates)
     std_dev = std(estimates)
     logging.info("Average of %d likelihood estimates is %d +- %f" % 
                  (self.num_estimates, result, std_dev))
     logging.debug("Leaving")
     return result
Beispiel #11
0
 def log_likelihood(self, tau, kappa):
     estimates = self.precompute_likelihood_estimates(tau, kappa)
     
     if var(estimates) > 0:
         logging.info("Performing exponential Russian Roulette on %d precomputed samples" % 
                      len(estimates))
         rr_ified = self.rr_instance.exponential(estimates)
         return rr_ified
     else:
         logging.warn("Russian Roulette on one estimate not possible. Returning the estimate")
         return mean(estimates)
Beispiel #12
0
 def setScaleParameters(self, ms_h = None):
   '''This is for initially setting the scale parameters, and only has an effect if 
   self._lpcParamters['automatic_ms_h'] is False
   
   Parameters
   ----------
   ms_h : float or None, sets the bandwidth of meanshift algorithm, default (None) has no effect
   '''
   if not self._lpcParameters['automatic_ms_h'] and ms_h is not None:
       self.set_in_dict('ms_h', ms_h, self._lpcParameters)  
       bandwidth = mean(self._lpcParameters['ms_h'])
       self._meanShift = MeanShift(bandwidth = bandwidth)
Beispiel #13
0
 def resetScaleParameters(self, h, t0 = None):
   '''Sets the bandwidth as h and lpc segment length as t0. If t0 is None, t0 is set as mean(h). The scale 
   parameter for the start points generator is also set to h.
   Parameters
   ----------
   h : 1-dim, length m numpy.array or float where m is the dimension of the feature space
   t0 : float
   '''
   self.set_in_dict('h', h, self._lpcParameters)
   self._startPointsGenerator.setScaleParameters(self._lpcParameters['h'])
   if t0 is None:
     t0 = mean(h)  
   self.set_in_dict('t0', t0, self._lpcParameters)
Beispiel #14
0
def serie_std(serie):
    serie_mean = mean(serie)
    serie_std = []
    std = 0.0
    for value in serie:
        std += pow(value - serie_mean, 2)
    std = sqrt(std/(len(serie)-1))
    for value in serie:
        if std == 0.0:
            serie_std.append(0.0)
        else:
            serie_std.append((value-serie_mean)/std)
    return serie_std
Beispiel #15
0
def _compute_stats_function(values):
    stats = None
    if len(values)>1:
        stats = {}
        stats['min'] = min(values)
        stats['max'] = max(values)
        stats['mean'] = mean(values)
        stats['median'] = median(values)
        stats['1st-quartile'] = percentile(values,25)
        stats['3rd-quartile'] = percentile(values,75)
        stats['std-error'] = std(values)
        
    return stats
Beispiel #16
0
    def decide(self):  # not tested
        Qtmp = self.agent.x[-1] - self.agent.gamma * min(self.agent.x[-1])
        Qtmp = Qtmp / mean(Qtmp)

        tau = .2
        total = sum(exp(-Qtmp / tau))
        rn = random() * total
        i = 0
        total = exp(-Qtmp[i] / tau)
        while rn > total:
            i += 1
            total += exp(-Qtmp[i] / tau)
        action = i
        return action
 def get_estimate(self, estimates, index):
     start_idx = index * self.block_size
     stop_idx = index * self.block_size + self.block_size
     
     # if there are enough samples, use them, sub-sample if not
     if stop_idx <= len(estimates):
         logging.debug("Averaging over %d samples from index %d to %d" % 
                      (self.block_size, start_idx, stop_idx))
         indices = arange(start_idx, stop_idx)
     else:
         logging.debug("Averaging over a random subset of %d samples" % 
                      self.block_size)
         
         indices = permutation(len(estimates))[:self.block_size]
     
     return mean(estimates[indices])
 def update(self, mcmc_chain, step_output):
     i = mcmc_chain.iteration
     if i >= self.print_from and i % self.lag == 0:
         self.times.append(time.time() - sum(self.times) - self.start_time)
             
         print "iteration:", i
         print "mean acceptance:", mean(mcmc_chain.accepteds[0:i])
         
         elapsed = int(round(sum(self.times)))
         percent = int(self.get_percent_done(i, mcmc_chain.mcmc_params.num_iterations))
         since_last = int(round(self.times[-1]))
         remaining = self.get_estimated_time_remaining(i, mcmc_chain.mcmc_params.num_iterations)
         total = elapsed + remaining
         
         print percent, "percent done in ", elapsed, "seconds"
         print "Since last update:", since_last, "seconds"
         print "remaining (estimated):", remaining, "seconds"
         print "total (estimated):", total, "seconds"
         
         print ""
    def update(self, mcmc_chain, step_output):
        i = mcmc_chain.iteration
        if i >= self.print_from and i % self.lag == 0:
            self.times.append(time.time() - sum(self.times) - self.start_time)

            print "iteration:", i
            print "mean acceptance:", mean(mcmc_chain.accepteds[0:i])

            elapsed = int(round(sum(self.times)))
            percent = int(
                self.get_percent_done(i,
                                      mcmc_chain.mcmc_params.num_iterations))
            since_last = int(round(self.times[-1]))
            remaining = self.get_estimated_time_remaining(
                i, mcmc_chain.mcmc_params.num_iterations)
            total = elapsed + remaining

            print percent, "percent done in ", elapsed, "seconds"
            print "Since last update:", since_last, "seconds"
            print "remaining (estimated):", remaining, "seconds"
            print "total (estimated):", total, "seconds"

            print ""
Beispiel #20
0
    def test_log_mean_exp(self):
        X = asarray([-1, 1])
        X = reshape(X, (len(X), 1))
        y = asarray([+1. if x >= 0 else -1. for x in X])
        covariance = SquaredExponentialCovariance(sigma=1, scale=1)
        likelihood = LogitLikelihood()
        gp = GaussianProcess(y, X, covariance, likelihood)
        laplace = LaplaceApproximation(gp, newton_start=asarray([3, 3]))
        proposal = laplace.get_gaussian()

        n = 200
        prior = gp.get_gp_prior()
        samples = proposal.sample(n).samples

        log_likelihood = asarray([gp.log_likelihood(f) for f in samples])
        log_prior = prior.log_pdf(samples)
        log_proposal = proposal.log_pdf(samples)

        X = log_likelihood + log_prior - log_proposal

        a = log(mean(exp(X)))
        b = GPTools.log_mean_exp(X)

        self.assertLessEqual(a - b, 1e-5)
Beispiel #21
0
 def test_log_mean_exp(self):
     X = asarray([-1, 1])
     X = reshape(X, (len(X), 1))
     y = asarray([+1. if x >= 0 else -1. for x in X])
     covariance = SquaredExponentialCovariance(sigma=1, scale=1)
     likelihood = LogitLikelihood()
     gp = GaussianProcess(y, X, covariance, likelihood)
     laplace = LaplaceApproximation(gp, newton_start=asarray([3, 3]))
     proposal=laplace.get_gaussian()
     
     n=200
     prior = gp.get_gp_prior()
     samples = proposal.sample(n).samples
     
     log_likelihood=asarray([gp.log_likelihood(f) for f in samples])
     log_prior = prior.log_pdf(samples)
     log_proposal = proposal.log_pdf(samples)
     
     X=log_likelihood+log_prior-log_proposal
     
     a=log(mean(exp(X)))
     b=GPTools.log_mean_exp(X)
     
     self.assertLessEqual(a-b, 1e-5)
Beispiel #22
0
from sklearn.cross_validation import cross_val_score
from sklearn.datasets import load_boston
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import scale

bostonDataset = load_boston()
target = bostonDataset.target
data = scale(load_boston().data)

maxValue = -100000
maxP = -1
for i in linspace(start=1, stop=10, num=200):
    regressionFun = KNeighborsRegressor(n_neighbors=5,
                                        weights='distance',
                                        metric='minkowski')
    regressionFun.p = i
    score = cross_val_score(estimator=regressionFun,
                            cv=KFold(n_splits=5, random_state=42,
                                     shuffle=True).split(data),
                            X=data,
                            y=target,
                            scoring='neg_mean_squared_error')
    meanScore = mean(score)
    if meanScore > maxValue:
        maxValue = meanScore
        maxP = i
    print(i, meanScore)

print(maxP, maxValue)
Beispiel #23
0
 def combine(self, key, values):
     return (key, mean(values))
Beispiel #24
0
def make_binwidth_plot(duration, bin_s, bin_ms, intraburst_bins_ms,
                       interburst_bins_ms, transient, filename, foldername):
    #bins include first and last point (0 and duration seconds or ms)
    insert(bin_s, 0, 0.0)
    append(bin_s, duration / second)
    append(bin_ms, duration / ms)
    insert(bin_ms, 0, 0.0)

    #find binwidth
    #    bin_ms_temp=bin_ms[:-1]
    #    bin_ms_shift=bin_ms[1:]
    #    binwidth_ms=[bin_ms_shift-bin_ms_temp for bin_ms_shift,bin_ms_temp in zip(bin_ms_shift,bin_ms_temp)]
    #    binwidth_ms_temp=binwidth_ms

    binwidth_ms = [x - y for x, y in zip(bin_ms[1:], bin_ms[:-1])]
    intrabinwidth_ms = [
        a - b
        for a, b in zip(intraburst_bins_ms[1::2], intraburst_bins_ms[::2])
    ]  #all odd indices - all even indices
    interbinwidth_ms = [
        a - b
        for a, b in zip(interburst_bins_ms[1::2], interburst_bins_ms[::2])
    ]  #all odd indices - all even indices

    #find binwidth avg and std
    avg_binwidth_ms = mean(binwidth_ms)
    std_binwidth_ms = std(binwidth_ms)
    avg_intrabinwidth_ms = mean(intrabinwidth_ms)
    std_intrabinwidth_ms = std(intrabinwidth_ms)
    avg_interbinwidth_ms = mean(interbinwidth_ms)
    std_interbinwidth_ms = std(interbinwidth_ms)

    #write out binwidth info.
    #Format:
    #avg std
    #binwidth1 binwidth2 ....... binwidthn
    f_binwidth_ms = open(foldername + "/" + filename + "_binwidth.txt", "w")
    f_binwidth_ms.write(str(avg_binwidth_ms) + " ")
    f_binwidth_ms.write(str(std_binwidth_ms) + " ")
    f_binwidth_ms.write("\n")
    f_binwidth_ms.write(' '.join(map(str, binwidth_ms)))

    #write out intrabins
    f_intrabinwidth_ms = open(
        foldername + "/" + filename + "_intrabinwidth.txt", "w")
    f_intrabinwidth_ms.write(str(avg_intrabinwidth_ms) + " ")
    f_intrabinwidth_ms.write(str(std_intrabinwidth_ms) + " ")
    f_intrabinwidth_ms.write("\n")
    f_intrabinwidth_ms.write(' '.join(map(str, intrabinwidth_ms)))

    #write out interbins
    f_interbinwidth_ms = open(
        foldername + "/" + filename + "_interbinwidth.txt", "w")
    f_interbinwidth_ms.write(str(avg_interbinwidth_ms) + " ")
    f_interbinwidth_ms.write(str(std_interbinwidth_ms) + " ")
    f_interbinwidth_ms.write("\n")
    f_interbinwidth_ms.write(' '.join(map(str, interbinwidth_ms)))

    #find center of bin in time
    ctr_of_bin_ms = [x - 0.5 * y for x, y in zip(bin_ms[1:], binwidth_ms)]
    ctr_of_intrabin_ms = [
        x - 0.5 * y for x, y in zip(intraburst_bins_ms[1::2], intrabinwidth_ms)
    ]
    ctr_of_interbin_ms = [
        x - 0.5 * y for x, y in zip(interburst_bins_ms[1::2], interbinwidth_ms)
    ]

    #find number of bins
    numbins = len(bin_s) - 1

    ### Plot Binwidths
    plot(ctr_of_bin_ms, binwidth_ms)
    if len(ctr_of_bin_ms) > 1:
        if ctr_of_bin_ms[-2] > (transient):
            xlim([transient, ctr_of_bin_ms[-2]])
    xlabel("Time (ms)")
    ylabel("Bin Width (ms)")
    #suptitle("Bin Width (ms)")
    title("Avg=%0.1f, SD=%0.1f" % (avg_binwidth_ms, std_binwidth_ms))
    #tight_layout(pad=2.5)
    savefig(foldername + "/" + filename + "_binwidth.png")
    close()

    plot(ctr_of_intrabin_ms, intrabinwidth_ms)
    if len(ctr_of_intrabin_ms) > 1:
        if ctr_of_intrabin_ms[-2] > (transient):
            xlim([transient, ctr_of_intrabin_ms[-2]])
    xlabel("Time (ms)")
    ylabel("Intraburst Bin Width (ms)")
    #suptitle("Intraburst Bin Width (ms)")
    title("Avg=%0.1f, SD=%0.1f" % (avg_intrabinwidth_ms, std_intrabinwidth_ms))
    #tight_layout(pad=2.5)
    savefig(foldername + "/" + filename + "_intraburst_binwidth.png")
    close()

    plot(ctr_of_interbin_ms, interbinwidth_ms)
    if len(ctr_of_interbin_ms) > 1:
        if ctr_of_interbin_ms[-2] > transient:
            xlim([transient, ctr_of_interbin_ms[-2]])
    xlabel("Time (ms)")
    ylabel("Interburst Bin Width (ms)")
    #title("Interburst Bin Width (ms)")
    title("Avg=%0.1f, SD=%0.1f" % (avg_interbinwidth_ms, std_interbinwidth_ms))
    tight_layout(pad=2.5)
    savefig(foldername + "/" + filename + "_interburst_binwidth.png")
    close()

    return [
        binwidth_ms, intrabinwidth_ms, interbinwidth_ms, ctr_of_bin_ms,
        ctr_of_intrabin_ms, ctr_of_interbin_ms, numbins, bin_ms, bin_s
    ]
Beispiel #25
0
sampler_names_short = ["SM","AM-FS","AM-LS","KAMH-LS"]
sampler_names = ["StandardMetropolis","AdaptiveMetropolis","AdaptiveMetropolisLearnScale","KameleonWindowLearnScale"]

colours = ['blue', 'red', 'magenta', 'green']


ii=0
for sampler_name in sampler_names:
    filename = directory+sampler_name+"_mmds.bin"
    f = open(filename,"r")
    upto, mmds, mean_dist = load(f)
    trials=shape(mean_dist)[1]
    figure(1)
    if which_plot == "mean":
        stds = std(mean_dist,1)/sqrt(trials)
        means = mean(mean_dist,1)
    if which_plot == "mmd":
        stds = std(mmds,1)/sqrt(trials)
        means = mean(mmds,1)
    zscore=1.28
    yerr = zscore*stds
    if highlight == "SM":
        condition = sampler_name == "StandardMetropolis"
    elif highlight == "AM":
        condition = sampler_name == "AdaptiveMetropolis" or sampler_name == "AdaptiveMetropolisLearnScale"
    elif highlight == "KAMH":
        condition = sampler_name == "KameleonWindowLearnScale"
    else:
        condition = True
    
    if condition:
    def exponential(self, estimates):
        logging.debug("Entering")
        
        # find a strict lower bound on the estimates and remove it from list
        bound = estimates.min()
        bound_idx = estimates.argmin()
        estimates = delete(estimates, bound_idx)
        estimates = estimates - bound
        

        # find an integer close to the mean of the transformed estimates and divide
        E = max(int(round(abs(mean(estimates)))), 1)
        estimates = estimates / E
        
        logging.info("Using %f as lower bound on estimates" % bound)
        logging.info("Computing product of E=%d RR estimates" % E)
        logging.info("Std-deviation after scaling is %f" % std(estimates))
        
        # index for iterating through the used estimates
        # (might be averaged, so might be lower than the number of available estimates
        # if the block size is greater than one
        estimate_idx = 0
        
        samples = zeros(E)
        for iteration in range(E):
            weight = 1
            
            # start with x^0 which is 1
            samples[iteration] = 1
            term = 1
            
            # index for computed samples
            series_term_idx = 1

            while weight > 0:
                # update current term of infinite series
                # average over block
                x_inner = self.get_estimate(estimates, estimate_idx)
                term *= (x_inner / series_term_idx)
                
                # if summation has reached threshold, update weights
                if abs(term) < self.threshold:
                    q = term / self.threshold
                    if rand() < q:
                        # continue and update weight
                        weight = weight / q
                    else:
                        # stop summation
                        weight = 0
            
                samples[iteration] += weight * term;
                estimate_idx += 1
                series_term_idx += 1
                
            logging.info("RR estimate %d/%d with threshold %.2f is %.4f and took %d series terms" % 
                         (iteration + 1, E, self.threshold, samples[iteration], series_term_idx))
            
        # now put things together. Note that samples contains an unbiased estimate
        # which might be quite small. However, due to the removal of the bound,
        # this will not cause an underflow and we can just take the log.
        logging.debug("Leaving")
        return bound + sum(log(samples));
    def __process_results__(self):
        lines = []
        if len(self.experiments) == 0:
            lines.append("no experiments to process")
            return
        
        # burnin is the same for all chains
        burnin = self.experiments[0].mcmc_chain.mcmc_params.burnin
        
        quantiles = zeros((len(self.experiments), len(self.ref_quantiles)))
        norm_of_means = zeros(len(self.experiments))
        acceptance_rates = zeros(len(self.experiments))
#         ess_0 = zeros(len(self.experiments))
#         ess_1 = zeros(len(self.experiments))
#         ess_minima = zeros(len(self.experiments))
#         ess_medians = zeros(len(self.experiments))
#         ess_maxima = zeros(len(self.experiments))
        times = zeros(len(self.experiments))
        
        for i in range(len(self.experiments)):
            burned_in = self.experiments[i].mcmc_chain.samples[burnin:, :]
            
            # use precomputed quantiles if they match with the provided ones
            if hasattr(self.experiments[i], "ref_quantiles") and \
               hasattr(self.experiments[i], "quantiles") and \
               allclose(self.ref_quantiles, self.experiments[i].ref_quantiles):
                quantiles[i, :] = self.experiments[i].quantiles
            else:
                try:
                    quantiles[i, :] = self.experiments[i].mcmc_chain.mcmc_sampler.distribution.emp_quantiles(\
                                      burned_in, self.ref_quantiles)
                except NotImplementedError:
                    print "skipping quantile computations, distribution does", \
                          "not support it."
            
            # quantiles should be about average error rather than average quantile
            quantiles[i,:]=abs(quantiles[i,:]-self.ref_quantiles)
            
            dim = self.experiments[i].mcmc_chain.mcmc_sampler.distribution.dimension
            norm_of_means[i] = norm(mean(burned_in, 0))
            acceptance_rates[i] = mean(self.experiments[i].mcmc_chain.accepteds[burnin:])
            
            # dump burned in samples to disc
            # sample_filename=self.experiments[0].experiment_dir + self.experiments[0].name + "_burned_in.txt"
            # savetxt(sample_filename, burned_in)
            
            # store minimum ess for every experiment
            #ess_per_covariate = asarray([RCodaTools.ess_coda(burned_in[:, cov_idx]) for cov_idx in range(dim)])
#             ess_per_covariate = asarray([0 for _ in range(dim)])
#             ess_0=ess_per_covariate[0]
#             ess_1=ess_per_covariate[1]
#             ess_minima[i] = min(ess_per_covariate)
#             ess_medians[i] = median(ess_per_covariate)
#             ess_maxima[i] = max(ess_per_covariate)
            
            # save chain time needed
            ellapsed = self.experiments[i].mcmc_chain.mcmc_outputs[0].times
            times[i] = int(round(sum(ellapsed)))

        mean_quantiles = mean(quantiles, 0)
        std_quantiles = std(quantiles, 0)
        
        sqrt_num_trials=sqrt(len(self.experiments))
        
        # print median kernel width sigma
        #sigma=GaussianKernel.get_sigma_median_heuristic(burned_in.T)
        #lines.append("median kernel sigma: "+str(sigma))
        
        lines.append("quantiles:")
        for i in range(len(self.ref_quantiles)):
            lines.append(str(mean_quantiles[i]) + " +- " + str(std_quantiles[i]/sqrt_num_trials))
        
        lines.append("norm of means:")
        lines.append(str(mean(norm_of_means)) + " +- " + str(std(norm_of_means)/sqrt_num_trials))
        
        lines.append("acceptance rate:")
        lines.append(str(mean(acceptance_rates)) + " +- " + str(std(acceptance_rates)/sqrt_num_trials))
        
#         lines.append("ess dimension 0:")
#         lines.append(str(mean(ess_0)) + " +- " + str(std(ess_0)/sqrt_num_trials))
#         
#         lines.append("ess dimension 1:")
#         lines.append(str(mean(ess_1)) + " +- " + str(std(ess_1)/sqrt_num_trials))
#         
#         lines.append("minimum ess:")
#         lines.append(str(mean(ess_minima)) + " +- " + str(std(ess_minima)/sqrt_num_trials))
#         
#         lines.append("median ess:")
#         lines.append(str(mean(ess_medians)) + " +- " + str(std(ess_medians)/sqrt_num_trials))
#         
#         lines.append("maximum ess:")
#         lines.append(str(mean(ess_maxima)) + " +- " + str(std(ess_maxima)/sqrt_num_trials))
        
        lines.append("times:")
        lines.append(str(mean(times)) + " +- " + str(std(times)/sqrt_num_trials))
        
        # mean as a function of iterations, normalised by time
        step = round((self.experiments[0].mcmc_chain.mcmc_params.num_iterations - burnin)/5)
        iterations = arange(self.experiments[0].mcmc_chain.mcmc_params.num_iterations - burnin, step=step)
        
        running_means = zeros(len(iterations))
        running_errors = zeros(len(iterations))
        for i in arange(len(iterations)):
            # norm of mean of chain up 
            norm_of_means_yet = zeros(len(self.experiments))
            for j in range(len(self.experiments)):
                samples_yet = self.experiments[j].mcmc_chain.samples[burnin:(burnin + iterations[i] + 1 + step), :]
                norm_of_means_yet[j] = norm(mean(samples_yet, 0))
            
            running_means[i] = mean(norm_of_means_yet)
            error_level = 1.96
            running_errors[i] = error_level * std(norm_of_means_yet) / sqrt(len(norm_of_means_yet))
        
        ioff()
        figure()
        plot(iterations, running_means*mean(times))
        fill_between(iterations, (running_means - running_errors)*mean(times), \
                     (running_means + running_errors)*mean(times), hold=True, color="gray")
        
        # make sure path to save exists
        try:
            os.makedirs(self.experiments[0].experiment_dir)
        except OSError as exception:
            if exception.errno != errno.EEXIST:
                raise
        
        savefig(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean.png")
        close()
        
        # also store plot X and Y
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_X.txt", \
                iterations)
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_Y.txt", \
                running_means*mean(times))
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_errors.txt", \
                running_errors*mean(times))
        
        # dont produce quantile convergence plots here for now
        """# quantile convergence of a single one
        desired_quantile=0.5
        running_quantiles=zeros(len(iterations))
        running_quantile_errors=zeros(len(iterations))
        for i in arange(len(iterations)):
            quantiles_yet = zeros(len(self.experiments))
            for j in range(len(self.experiments)):
                samples_yet = self.experiments[j].mcmc_chain.samples[burnin:(burnin + iterations[i] + 1 + step), :]
                
                # just compute one quantile for now
                quantiles_yet[j]=self.experiments[j].mcmc_chain.mcmc_sampler.distribution.emp_quantiles(samples_yet, \
                                                                                          array([desired_quantile]))
                quantiles_yet[j]=abs(quantiles_yet[j]-desired_quantile)
            running_quantiles[i] = mean(quantiles_yet)
            error_level = 1.96
            running_quantile_errors[i] = error_level * std(quantiles_yet) / sqrt(len(quantiles_yet))
        
        
        ioff()
        figure()
        plot(iterations, running_quantiles*mean(times))
        fill_between(iterations, (running_quantiles - running_quantile_errors)*mean(times), \
                     (running_quantiles + running_quantile_errors)*mean(times), hold=True, color="gray")
        
        plot([iterations.min(),iterations.max()], [desired_quantile*mean(times) for _ in range(2)])
        
        title(str(desired_quantile)+"-quantile convergence")
        savefig(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile.png")
        close()
        
        # also store plot X and Y
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_X.txt", \
                iterations)
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_Y.txt", \
                running_quantiles*mean(times))
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_errors.txt", \
                running_quantile_errors*mean(times))
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_reference.txt", \
                [desired_quantile*mean(times)])
        """
        # add latex table line
#         latex_lines = []
#         latex_lines.append("Sampler & Acceptance & ESS2 & Norm(mean) & ")
#         for i in range(len(self.ref_quantiles)):
#             latex_lines.append('%.1f' % self.ref_quantiles[i] + "-quantile")
#             if i < len(self.ref_quantiles) - 1:
#                 latex_lines.append(" & ")
#         latex_lines.append("\\\\")
#         lines.append("".join(latex_lines))
#         
#         latex_lines = []
#         latex_lines.append(self.experiments[0].mcmc_chain.mcmc_sampler.__class__.__name__)
#         latex_lines.append('$%.3f' % mean(acceptance_rates) + " \pm " + '%.3f$' % (std(acceptance_rates)/sqrt_num_trials))
#         latex_lines.append('$%.3f' % mean(norm_of_means) + " \pm " + '%.3f$' % (std(norm_of_means)/sqrt_num_trials))
#         for i in range(len(self.ref_quantiles)):
#             latex_lines.append('$%.3f' % mean_quantiles[i] + " \pm " + '%.3f$' % (std_quantiles[i]/sqrt_num_trials))
#         
#         
#         lines.append(" & ".join(latex_lines) + "\\\\")
        
        return lines
Beispiel #28
0
 def _followxSingleDirection(  self, 
                               x, 
                               direction = Direction.FORWARD,
                               forward_curve = None,
                               last_eigenvector = None, 
                               weights = 1.):
   '''Generates a partial lpc curve dictionary from the start point, x.
   Arguments
   ---------
   x : 1-dim, length m, numpy.array of floats, start point for the algorithm when m is dimension of feature space
   
   direction :  bool, proceeds in Direction.FORWARD or Direction.BACKWARD from this point (just sets sign for first eigenvalue) 
   
   forward_curve : dictionary as returned by this function, is used to detect crossing of the curve under construction with a
       previously constructed curve
       
   last_eigenvector : 1-dim, length m, numpy.array of floats, a unit vector that defines the initial direction, relative to
       which the first eigenvector is biased and initial cos_neu_neu is calculated  
       
   weights : 1-dim, length n numpy.array of observation weights (can also be used to exclude
       individual observations from the computation by setting their weight to zero.),
       where n is the number of feature points 
   '''
   x0 = copy(x)
   N = self.Xi.shape[0]
   d = self.Xi.shape[1]
   it = self._lpcParameters['it']
   h = array(self._lpcParameters['h'])
   t0 = self._lpcParameters['t0']
   rho0 = self._lpcParameters['rho0']
   
   save_xd = empty((it,d))
   eigen_vecd = empty((it,d))
   c0 = ones(it)
   cos_alt_neu = ones(it)
   cos_neu_neu = ones(it)    
   lamb = empty(it) #NOTE this is named 'lambda' in the original R code
   rho = zeros(it)
   high_rho_points = empty((0,d))    
   count_points = 0
   
   for i in range(it):
     kernel_weights = self._kernd(self.Xi, x0, c0[i]*h) * weights
     mu_x = average(self.Xi, axis = 0, weights = kernel_weights)
     sum_weights = sum(kernel_weights)
     mean_sub = self.Xi - mu_x 
     cov_x = dot( dot(transpose(mean_sub), numpy.diag(kernel_weights)), mean_sub) / sum_weights 
     #assert (abs(cov_x.transpose() - cov_x)/abs(cov_x.transpose() + cov_x) < 1e-6).all(), 'Covariance matrix not symmetric, \n cov_x = {0}, mean_sub = {1}'.format(cov_x, mean_sub)
     save_xd[i] = mu_x #save first point of the branch
     count_points += 1
     
     #calculate path length
     if i==0:
       lamb[0] = 0
     else:
       lamb[i] = lamb[i-1] + sqrt(sum((mu_x - save_xd[i-1])**2))
     
     #calculate eigenvalues/vectors
     #(sorted_eigen_cov is a list of tuples containing eigenvalue and associated eigenvector, sorted descending by eigenvalue)
     eigen_cov = eigh(cov_x)
     sorted_eigen_cov = zip(eigen_cov[0],map(ravel,vsplit(eigen_cov[1].transpose(),len(eigen_cov[1]))))
     sorted_eigen_cov.sort(key = lambda elt: elt[0], reverse = True)   
     eigen_norm = sqrt(sum(sorted_eigen_cov[0][1]**2))
     eigen_vecd[i] = direction * sorted_eigen_cov[0][1] / eigen_norm  #Unit eigenvector corresponding to largest eigenvalue
     
     #rho parameters
     rho[i] = sorted_eigen_cov[1][0] / sorted_eigen_cov[0][0] #Ratio of two largest eigenvalues
     if i != 0 and rho[i] > rho0 and rho[i-1] <= rho0:
       high_rho_points = vstack((high_rho_points, x0))
     
     #angle between successive eigenvectors
     if i==0 and last_eigenvector is not None:
       cos_alt_neu[i] = direction * dot(last_eigenvector, eigen_vecd[i])
     if i > 0:
       cos_alt_neu[i] = dot(eigen_vecd[i], eigen_vecd[i-1])
     
     #signum flipping
     if cos_alt_neu[i] < 0:
       eigen_vecd[i] = -eigen_vecd[i]
       cos_neu_neu[i] = -cos_alt_neu[i]
     else:
       cos_neu_neu[i] = cos_alt_neu[i]
    
     #angle penalization
     pen = self._lpcParameters['pen']
     if pen > 0:
       if i == 0 and last_eigenvector is not None:
         a = abs(cos_alt_neu[i])**pen
         eigen_vecd[i] = a * eigen_vecd[i] + (1-a) * last_eigenvector
       if i > 0:
         a = abs(cos_alt_neu[i])**pen
         eigen_vecd[i] = a * eigen_vecd[i] + (1-a) * eigen_vecd[i-1]
             
     #check curve termination criteria
     if i not in (0, it-1):
       #crossing
       cross = self._lpcParameters['cross']
       if forward_curve is None:
         full_curve_points = save_xd[0:i+1]
       else:
         full_curve_points = vstack((forward_curve['save_xd'],save_xd[0:i+1])) #inefficient, initialize then append? 
       if not cross:
         prox = where(ravel(cdist(full_curve_points,[mu_x])) <= mean(h))[0]
         if len(prox) != max(prox) - min(prox) + 1:
           break
         
       #convergence
       convergence_at = self._lpcParameters['convergence_at']
       conv_ratio = abs(lamb[i] - lamb[i-1]) / (2 * (lamb[i] + lamb[i-1]))
       if conv_ratio  < convergence_at:
         break
       
       #boundary
       boundary = self._lpcParameters['boundary']
       if conv_ratio < boundary:
         c0[i+1] = 0.995 * c0[i]
       else:
         c0[i+1] = min(1.01*c0[i], 1)
     
     #step along in direction eigen_vecd[i]
     x0 = mu_x + t0 * eigen_vecd[i]
   
   #trim output in the case where convergence occurs before 'it' iterations    
   curve = { 'save_xd': save_xd[0:count_points],
             'eigen_vecd': eigen_vecd[0:count_points],
             'cos_neu_neu': cos_neu_neu[0:count_points],
             'rho': rho[0:count_points],
             'high_rho_points': high_rho_points,
             'lamb': lamb[0:count_points],
             'c0': c0[0:count_points]
           }
   return curve  
Beispiel #29
0
def score_tfidf_freq(tfidfs):
    
    return round(float(mean(tfidfs)*100),2)
import sys

if __name__ == '__main__':
    # load data
    data, labels = GPData.get_glass_data()

    # throw away some data
    n = 250
    seed(1)
    idx = permutation(len(data))
    idx = idx[:n]
    data = data[idx]
    labels = labels[idx]

    # normalise and whiten dataset
    data -= mean(data, 0)
    L = cholesky(cov(data.T))
    data = solve_triangular(L, data.T, lower=True).T
    dim = shape(data)[1]

    # prior on theta and posterior target estimate
    theta_prior = Gaussian(mu=0 * ones(dim), Sigma=eye(dim) * 5)
    target=PseudoMarginalHyperparameterDistribution(data, labels, \
                                                    n_importance=100, prior=theta_prior, \
                                                    ridge=1e-3)

    # create sampler
    burnin = 10000
    num_iterations = burnin + 300000
    kernel = GaussianKernel(sigma=23.0)
    sampler = KameleonWindowLearnScale(target, kernel, stop_adapt=burnin)
Beispiel #31
0
test_1 = line2array(list_test, list_label)
print("test:")
print(test_1)
#--------------------↑前六列测试数据----------------------
p_0_pre = diff_0 * test_1
print("前六列为'否'各概率:")
print(p_0_pre)
print("前六列为'是'各概率:")
p_1_pre = diff_1 * test_1
print(p_1_pre)
#--------------------↑前六列概率计算----------------------
print("val0: (存放'否'类中后两列密度和含糖率的数据)")
print(val0)
print("val1: (存放'是'类中后两列密度和含糖率的数据)")
print(val1)
mean0 = np.array([mean(val0[0]), mean(val0[1])])  #两个数 分别为0(否)类 密度和含糖率的均值
mean1 = np.array([mean(val1[0]), mean(val1[1])])  #两个数 分别为1(是)类 密度和含糖率的均值
var0 = np.array([var(val0[0]), var(val0[1])])  #两个数 分别为0(否)类 密度和含糖率的方差
var1 = np.array([var(val1[0]), var(val1[1])])  #两个数 分别为1(是)类 密度和含糖率的方差


#--------------------↑后两列均值方差计算----------------------
def gaussian(mean, var, x):
    res = 1 / sqrt(2 * 3.14 * var) * np.exp(-(mean - x)**2 / 2 * var)
    return res


p_0_pro = gaussian(mean0[0], var0[0], 0.697) + gaussian(
    mean0[1], var0[1], 0.460)
print(p_0_pro)
p_1_pro = gaussian(mean1[0], var1[0], 0.697) + gaussian(
Beispiel #32
0
plotting = False
pkernel = PolynomialKernel(degree=3)

samples_long = loadtxt(
    "/nfs/home2/dino/kamh-results/StandardMetropolis_PseudoMarginalHyperparameterDistribution_merged_samples.txt"
)
samples_long = samples_long[:10000]
# f_long=open("/nfs/home2/dino/kamh-results/long_experiment_output.bin")
# experiment_long=load(f_long)
# f_long.close()
# thin_long=100
# mcmc_chain_long=experiment_long.mcmc_chain
# burnin=mcmc_chain_long.mcmc_params.burnin
# indices_long = range(burnin, mcmc_chain_long.iteration,thin_long)
# samples_long=mcmc_chain_long.samples[indices_long]
mu_long = mean(samples_long, 0)

print 'using this many samples for the long chain: ', shape(samples_long)[0]

how_many_chains = 20
stats_granularity = 10

path_above = "/nfs/home2/dino/git/kameleon-mcmc/main/gp/scripts/glass_gaussian_ard/"
#path_above = "/nfs/data3/ucabhst/kameleon_experiments/glass_ard/"
path_below = "output/experiment_output.bin"

#sampler_names = ["KameleonWindowLearnScale", "AdaptiveMetropolisLearnScale","AdaptiveMetropolis"]
sampler_names = ["StandardMetropolis"]
path_temp = "_PseudoMarginalHyperparameterDistribution_#/"

for sampler_name in sampler_names:
Beispiel #33
0
def detect_insertions( h0_mean, h1_mean, h2_mean, heads_per_base, inputf, reads_per_base, tails_per_base):
    fp_scores, tp_scores = [], []
    not_found_insertions = json.load(open(data_d('subject_genome.fa.alu_positions.json')))

    if not not_found_insertions:
        return

    pers_alu_info = copy.deepcopy(not_found_insertions)
    total_alus = count_alus(not_found_insertions)
    false_positives = []
    skip_until = -1
    window = []

    for col in inputf.pileup():
        if col.pos < skip_until:
            continue

        if len(window) == WIN_LENGTH:
            window = window[1:WIN_LENGTH]

        window.append(site(col))

        #        if col.pos < 36265890:
        #            continue
        if boundary(window) and enough_coverage(window, reads_per_base):
            reason = potential_ALU_insert(window, heads_per_base, tails_per_base)

            if reason:
                spanning = window_stats(window)

                if spanning >= h0_mean:
                    continue
                    #            hyp, _ = min((None, h0_mean), ('heterozygous', h1_mean), ('homozygous', h2_mean),
                    #                             key = lambda (hyp, mean): abs(spanning - mean))
                hyp, _ = min(('heterozygous', h1_mean), ('homozygous', h2_mean),
                                key = lambda (hyp, mean): abs(spanning - mean))

                if hyp:
                    chrom = inputf.getrname(col.tid)
                    window = []
                    skip_until = col.pos + RLEN

                    fp = True
                    for hap_no, haplotype_positions in enumerate(pers_alu_info[chrom]):
                        for inserted in haplotype_positions:
                            if abs(inserted['ref_pos'] - col.pos) < 300:
                                if inserted in not_found_insertions[chrom][hap_no]: not_found_insertions[chrom][
                                                                                    hap_no].remove(inserted)
                                fp = False

                    if fp:
                        false_positives.append([hyp, spanning, chrom, col.pos, reason])
                        fp_scores.append(spanning)
                    else:
                        tp_scores.append(spanning)

                    logm('\t'.join(map(str, [hyp, not fp, spanning, chrom, col.pos, reason])))

    print 'False positives:\n', pformat(sorted(false_positives, key=lambda fp: (fp[-1], fp[-2])))
    print 'False negatives:\n', pformat(not_found_insertions)
    print
    #print bgr_spanning, reads_per_base
    print 'True positives: %d (%.2lf%%)\t' % (len(tp_scores), float(100 * len(tp_scores)) / (len(tp_scores) + len(fp_scores))), mean(tp_scores), var(tp_scores)

    if fp_scores:
        print 'False positives: %d (%.2lf%%)\t' % (

        len(fp_scores), float(100 * len(fp_scores)) / (len(tp_scores) + len(fp_scores))), mean(fp_scores), var(fp_scores)

    print 'False negatives: %d(%.2f%%)\n' % (count_alus(not_found_insertions), float(100 * count_alus(not_found_insertions)) / total_alus)
Beispiel #34
0
    def __process_results__(self):
        lines = []
        if len(self.experiments) == 0:
            lines.append("no experiments to process")
            return

        # burnin is the same for all chains
        burnin = self.experiments[0].mcmc_chain.mcmc_params.burnin

        quantiles = zeros((len(self.experiments), len(self.ref_quantiles)))
        norm_of_means = zeros(len(self.experiments))
        acceptance_rates = zeros(len(self.experiments))
        #         ess_0 = zeros(len(self.experiments))
        #         ess_1 = zeros(len(self.experiments))
        #         ess_minima = zeros(len(self.experiments))
        #         ess_medians = zeros(len(self.experiments))
        #         ess_maxima = zeros(len(self.experiments))
        times = zeros(len(self.experiments))

        for i in range(len(self.experiments)):
            burned_in = self.experiments[i].mcmc_chain.samples[burnin:, :]

            # use precomputed quantiles if they match with the provided ones
            if hasattr(self.experiments[i], "ref_quantiles") and \
               hasattr(self.experiments[i], "quantiles") and \
               allclose(self.ref_quantiles, self.experiments[i].ref_quantiles):
                quantiles[i, :] = self.experiments[i].quantiles
            else:
                try:
                    quantiles[i, :] = self.experiments[i].mcmc_chain.mcmc_sampler.distribution.emp_quantiles(\
                                      burned_in, self.ref_quantiles)
                except NotImplementedError:
                    print "skipping quantile computations, distribution does", \
                          "not support it."

            # quantiles should be about average error rather than average quantile
            quantiles[i, :] = abs(quantiles[i, :] - self.ref_quantiles)

            dim = self.experiments[
                i].mcmc_chain.mcmc_sampler.distribution.dimension
            norm_of_means[i] = norm(mean(burned_in, 0))
            acceptance_rates[i] = mean(
                self.experiments[i].mcmc_chain.accepteds[burnin:])

            # dump burned in samples to disc
            # sample_filename=self.experiments[0].experiment_dir + self.experiments[0].name + "_burned_in.txt"
            # savetxt(sample_filename, burned_in)

            # store minimum ess for every experiment
            #ess_per_covariate = asarray([RCodaTools.ess_coda(burned_in[:, cov_idx]) for cov_idx in range(dim)])
            #             ess_per_covariate = asarray([0 for _ in range(dim)])
            #             ess_0=ess_per_covariate[0]
            #             ess_1=ess_per_covariate[1]
            #             ess_minima[i] = min(ess_per_covariate)
            #             ess_medians[i] = median(ess_per_covariate)
            #             ess_maxima[i] = max(ess_per_covariate)

            # save chain time needed
            ellapsed = self.experiments[i].mcmc_chain.mcmc_outputs[0].times
            times[i] = int(round(sum(ellapsed)))

        mean_quantiles = mean(quantiles, 0)
        std_quantiles = std(quantiles, 0)

        sqrt_num_trials = sqrt(len(self.experiments))

        # print median kernel width sigma
        #sigma=GaussianKernel.get_sigma_median_heuristic(burned_in.T)
        #lines.append("median kernel sigma: "+str(sigma))

        lines.append("quantiles:")
        for i in range(len(self.ref_quantiles)):
            lines.append(
                str(mean_quantiles[i]) + " +- " +
                str(std_quantiles[i] / sqrt_num_trials))

        lines.append("norm of means:")
        lines.append(
            str(mean(norm_of_means)) + " +- " +
            str(std(norm_of_means) / sqrt_num_trials))

        lines.append("acceptance rate:")
        lines.append(
            str(mean(acceptance_rates)) + " +- " +
            str(std(acceptance_rates) / sqrt_num_trials))

        #         lines.append("ess dimension 0:")
        #         lines.append(str(mean(ess_0)) + " +- " + str(std(ess_0)/sqrt_num_trials))
        #
        #         lines.append("ess dimension 1:")
        #         lines.append(str(mean(ess_1)) + " +- " + str(std(ess_1)/sqrt_num_trials))
        #
        #         lines.append("minimum ess:")
        #         lines.append(str(mean(ess_minima)) + " +- " + str(std(ess_minima)/sqrt_num_trials))
        #
        #         lines.append("median ess:")
        #         lines.append(str(mean(ess_medians)) + " +- " + str(std(ess_medians)/sqrt_num_trials))
        #
        #         lines.append("maximum ess:")
        #         lines.append(str(mean(ess_maxima)) + " +- " + str(std(ess_maxima)/sqrt_num_trials))

        lines.append("times:")
        lines.append(
            str(mean(times)) + " +- " + str(std(times) / sqrt_num_trials))

        # mean as a function of iterations, normalised by time
        step = round(
            (self.experiments[0].mcmc_chain.mcmc_params.num_iterations -
             burnin) / 5)
        iterations = arange(
            self.experiments[0].mcmc_chain.mcmc_params.num_iterations - burnin,
            step=step)

        running_means = zeros(len(iterations))
        running_errors = zeros(len(iterations))
        for i in arange(len(iterations)):
            # norm of mean of chain up
            norm_of_means_yet = zeros(len(self.experiments))
            for j in range(len(self.experiments)):
                samples_yet = self.experiments[j].mcmc_chain.samples[burnin:(
                    burnin + iterations[i] + 1 + step), :]
                norm_of_means_yet[j] = norm(mean(samples_yet, 0))

            running_means[i] = mean(norm_of_means_yet)
            error_level = 1.96
            running_errors[i] = error_level * std(norm_of_means_yet) / sqrt(
                len(norm_of_means_yet))

        ioff()
        figure()
        plot(iterations, running_means * mean(times))
        fill_between(iterations, (running_means - running_errors)*mean(times), \
                     (running_means + running_errors)*mean(times), hold=True, color="gray")

        # make sure path to save exists
        try:
            os.makedirs(self.experiments[0].experiment_dir)
        except OSError as exception:
            if exception.errno != errno.EEXIST:
                raise

        savefig(self.experiments[0].experiment_dir + self.experiments[0].name +
                "_running_mean.png")
        close()

        # also store plot X and Y
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_X.txt", \
                iterations)
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_Y.txt", \
                running_means*mean(times))
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_errors.txt", \
                running_errors*mean(times))

        # dont produce quantile convergence plots here for now
        """# quantile convergence of a single one
        desired_quantile=0.5
        running_quantiles=zeros(len(iterations))
        running_quantile_errors=zeros(len(iterations))
        for i in arange(len(iterations)):
            quantiles_yet = zeros(len(self.experiments))
            for j in range(len(self.experiments)):
                samples_yet = self.experiments[j].mcmc_chain.samples[burnin:(burnin + iterations[i] + 1 + step), :]
                
                # just compute one quantile for now
                quantiles_yet[j]=self.experiments[j].mcmc_chain.mcmc_sampler.distribution.emp_quantiles(samples_yet, \
                                                                                          array([desired_quantile]))
                quantiles_yet[j]=abs(quantiles_yet[j]-desired_quantile)
            running_quantiles[i] = mean(quantiles_yet)
            error_level = 1.96
            running_quantile_errors[i] = error_level * std(quantiles_yet) / sqrt(len(quantiles_yet))
        
        
        ioff()
        figure()
        plot(iterations, running_quantiles*mean(times))
        fill_between(iterations, (running_quantiles - running_quantile_errors)*mean(times), \
                     (running_quantiles + running_quantile_errors)*mean(times), hold=True, color="gray")
        
        plot([iterations.min(),iterations.max()], [desired_quantile*mean(times) for _ in range(2)])
        
        title(str(desired_quantile)+"-quantile convergence")
        savefig(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile.png")
        close()
        
        # also store plot X and Y
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_X.txt", \
                iterations)
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_Y.txt", \
                running_quantiles*mean(times))
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_errors.txt", \
                running_quantile_errors*mean(times))
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_reference.txt", \
                [desired_quantile*mean(times)])
        """
        # add latex table line
        #         latex_lines = []
        #         latex_lines.append("Sampler & Acceptance & ESS2 & Norm(mean) & ")
        #         for i in range(len(self.ref_quantiles)):
        #             latex_lines.append('%.1f' % self.ref_quantiles[i] + "-quantile")
        #             if i < len(self.ref_quantiles) - 1:
        #                 latex_lines.append(" & ")
        #         latex_lines.append("\\\\")
        #         lines.append("".join(latex_lines))
        #
        #         latex_lines = []
        #         latex_lines.append(self.experiments[0].mcmc_chain.mcmc_sampler.__class__.__name__)
        #         latex_lines.append('$%.3f' % mean(acceptance_rates) + " \pm " + '%.3f$' % (std(acceptance_rates)/sqrt_num_trials))
        #         latex_lines.append('$%.3f' % mean(norm_of_means) + " \pm " + '%.3f$' % (std(norm_of_means)/sqrt_num_trials))
        #         for i in range(len(self.ref_quantiles)):
        #             latex_lines.append('$%.3f' % mean_quantiles[i] + " \pm " + '%.3f$' % (std_quantiles[i]/sqrt_num_trials))
        #
        #
        #         lines.append(" & ".join(latex_lines) + "\\\\")

        return lines
Beispiel #35
0
 def reduce(self, key, values):
     return (key, mean(values))
Beispiel #36
0
        print "python " + str(sys.argv[0]).split(os.sep)[-1] + " /nfs/nhome/live/ucabhst/kameleon_experiments/ 3"
        exit()
    
    experiment_dir_base = str(sys.argv[1])
    n = int(str(sys.argv[2]))
    
    # loop over parameters here
    
    experiment_dir = experiment_dir_base + str(os.path.abspath(sys.argv[0])).split(os.sep)[-1].split(".")[0] + os.sep
    print "running experiments", n, "times at base", experiment_dir
   
    # load data
    data,labels=GPData.get_glass_data()

    # normalise and whiten dataset
    data-=mean(data, 0)
    L=cholesky(cov(data.T))
    data=solve_triangular(L, data.T, lower=True).T
    dim=shape(data)[1]

    # prior on theta and posterior target estimate
    theta_prior=Gaussian(mu=0*ones(dim), Sigma=eye(dim)*5)
    distribution=PseudoMarginalHyperparameterDistribution(data, labels, \
                                                    n_importance=100, prior=theta_prior, \
                                                    ridge=1e-3)

    sigma = 23.0
    print "using sigma", sigma
    kernel = GaussianKernel(sigma=sigma)
    
    for i in range(n):
Beispiel #37
0
from numpy import ones, array
from pkg.plots import figureAgent2, showQValues2, show3dQValues2
from pkg.agent import Agent
from pkg.simulationCtLbd import simulateCtLbd
from matplotlib.pyplot import show
from numpy.ma.core import mean

#Initial conditions
x0 = array([.5])
#lRates = [0.01,0.11,0.21,0.31,0.41,0.51, 0.61, 0.71, 0.81, 0.91]
lRates = [0.11]
#prob = array([.55, .65, .75, .85, .95])
#vol = array([.001,.005])
prob = array([.85])
vol = array([.001])
pPolicy = 'greedy'
nTrials = 5000  # number of trials of an episode
nEpisodes = 1  # number of episodes
saveOutput = False
path = 'data/ctLbd/'
fixSeed = False

(agent, environment) = simulateCtLbd(vol, prob, x0, lRates, nTrials, nEpisodes,
                                     pPolicy, fixSeed, saveOutput, path)
#import pdb
figureAgent2(environment, agent)

print mean(agent.err**2)

show()
#pdb.set_trace()