def get_bar_data(xx, delta, Tmin, Tmax, step): T_seg = [] mean_seg = [] std_seg = [] sampleNums = [] for i in np.arange(Tmin, Tmax, step): idx = np.where(np.logical_and(xx >= i, xx < (i + step)))[0] if idx.size > 0: DTb_block = delta[idx] else: continue mean1 = mean(DTb_block) std1 = std(DTb_block) idx1 = np.where((abs(DTb_block - mean1) < std1))[0] # 去掉偏差大于std的点 if idx1.size > 0: DTb_block = DTb_block[idx1] mean_seg.append(mean(DTb_block)) std_seg.append(std(DTb_block)) sampleNums.append(len(DTb_block)) else: mean_seg.append(0) std_seg.append(0) sampleNums.append(0) T_seg.append(i + step / 2.) return np.array(T_seg), np.array(mean_seg), np.array(std_seg), np.array(sampleNums)
def G_reg1d(xx, yy, ww=None): """ description needed ww: weights """ rtn = [] ab = polyfit(xx, yy, 1, w=ww) rtn.append(ab[0]) rtn.append(ab[1]) rtn.append(std(yy) / std(xx)) rtn.append(mean(yy) - rtn[2] * mean(xx)) r = corrcoef(xx, yy) rr = r[0, 1] * r[0, 1] rtn.append(rr) return rtn
def G_reg1d(xx, yy, ww=None): ''' 计算斜率和截距 ww: weights ''' rtn = [] ab = polyfit(xx, yy, 1, w=ww) rtn.append(ab[0]) rtn.append(ab[1]) rtn.append(std(yy) / std(xx)) rtn.append(mean(yy) - rtn[2] * mean(xx)) r = corrcoef(xx, yy) rr = r[0, 1] * r[0, 1] rtn.append(rr) return rtn
def log_det_estimate_shogun(Q): logging.debug("Entering") op = RealSparseMatrixOperator(csc_matrix(Q)) engine = SerialComputationEngine() linear_solver = CGMShiftedFamilySolver() accuracy = 1e-3 eigen_solver = LanczosEigenSolver(op) eigen_solver.set_min_eigenvalue(OzonePosterior.ridge) op_func = LogRationalApproximationCGM(op, engine, eigen_solver, linear_solver, accuracy) # limit computation time linear_solver.set_iteration_limit(1000) eigen_solver.set_max_iteration_limit(1000) logging.info("Computing Eigenvalues (only largest)") eigen_solver.compute() trace_sampler = ProbingSampler(op) log_det_estimator = LogDetEstimator(trace_sampler, op_func, engine) n_estimates = 1 logging.info("Sampling log-determinant with probing vectors and rational approximation") estimates = log_det_estimator.sample(n_estimates) logging.debug("Leaving") return mean(estimates)
def _rescaleInput(self): '''If scaled == True : sets self._dataRange equal to a 1-dim numpy.array containing the ranges in each of m feature space dimensions, and divides self.Xi through by these ranges (leaving the data in a 1*1*1 cube); if the bandwidth, h, is not set, this defaults to 0.1 (i.e. 10% of the range). If scaled == False and h is not set, h defaults to 10% of the range in each feature space dimension. In both the scaled and not scaled cases, if the segment length, t0, is not set this defaults to mean(h). ''' scaled = self._lpcParameters['scaled'] h = self._lpcParameters['h'] if scaled: data_range = numpy.max(self.Xi, axis = 0) - numpy.min(self.Xi, axis = 0) #calculate ranges of each dimension if any(data_range == 0): raise ValueError, 'Data cannot be scale because the range in at least 1 direction is zero (i.e. data lies wholly in plane x/y/z = c)' self._dataRange = data_range self.Xi = self.Xi / self._dataRange if h is None: self.resetScaleParameters(0.1, self._lpcParameters['t0']) else: if h is None: self._dataRange = numpy.max(self.Xi, axis = 0) - numpy.min(self.Xi, axis = 0) #calculate ranges of each dimension h = list(0.1 * self._dataRange) self.resetScaleParameters(h, self._lpcParameters['t0']) #make sure that t0 is set if self._lpcParameters['t0'] is None: self._lpcParameters['t0'] = mean(h)
def __init__(self, **params): ''' Parameters ---------- ms_h : float, sets the bandwidth of the mean shift algorithm, defaults to None, whereby the algorithm automatically determines the bandwidth automatic_ms_h : bool, if True forces the algorithm to determine its own bandwidth, overrides any ms_h setting ms_sub : float, sets the percentage (0 < ms_sub <= 100) of the data points supplied to self.__call__ that are used to compute the ms seed points rho_threshold : float, ratio of 2nd largest to largest cluster eigenvalues, above which cluster centers are removed from the output ''' super(lpcMeanShift, self).__init__() self._lpcParameters = { 'ms_h': None, 'automatic_ms_h': False, 'ms_sub': 30, 'rho_threshold': 0.2 } self._prm_list = [self._lpcParameters] self.user_prm = None #extension of parameter set disallowed self._type_check.update({ 'ms_h': lambda x: (x is None) or lpcMeanShift._positivityCheck(x) or (isinstance(x, list) and all(map(lpcMeanShift._positivityCheck, x)) ) , 'automatic_ms_h': (bool,), 'ms_sub': lambda x: lpcMeanShift._positivityCheck and x <= 100, 'rho_threshold': lambda x: lpcMeanShift._positivityCheck and x < 1 }) self.set(**params) if self._lpcParameters['automatic_ms_h'] or self._lpcParameters['ms_h'] is None : self._meanShift = MeanShift() else: self._meanShift = MeanShift(bandwidth = mean(self._lpcParameters['ms_h']))
def evaluateKernel(self, kernelArgs): cvg = crossValidationGen(self._totNumFolds, self._data, self._labels) errorList = [] try: iter(kernelArgs) except TypeError: kernelArgs = kernelArgs.tolist() try: iter(kernelArgs) except TypeError: kernelArgs = [kernelArgs] for foldTrainData, foldTrainLabels, foldTestData, foldTestLabels in cvg: foldFileIdentifier = '-'.join((self._fileIdentifier, str(cvg.get_cur_fold()), str(cvg.get_num_folds()))) #get the test and train kernels (trainKernel, testKernel) = createAndSaveTestAndTrainKernels(foldTrainData, foldTestData, self._datasetDir, self._kernelFunct, foldFileIdentifier, *kernelArgs) #trainKernel = createKernel(foldTrainData, foldTrainData, self._kernelFunct, *kernelArgs) #testKernel = createKernel(foldTestData, foldTrainData, self._kernelFunct, *kernelArgs) #train the model #model = trainSVM(trainKernel, foldTrainLabels) foldModelFileLoc = getModelFileLoc(getKernelDir(getKernelFileLoc(self._datasetDir, self._kernelFunct.__name__, foldFileIdentifier, True, kernelArgs)), 'svm', foldFileIdentifier) model = trainSVMAndSave(foldModelFileLoc, trainKernel, foldTrainLabels) #predict using the model predictedLabels = predictSVMAndSave(model, testKernel, getPredictFileLoc(getModelDir(foldModelFileLoc), foldFileIdentifier)) #evaluate the results foldAccuracy = evaluations(foldTestLabels, predictedLabels)[0] foldError = 100 - foldAccuracy errorList.append(foldError) meanError = mean(errorList) print kernelArgs, meanError return meanError
def calcN(classKernels, trainLabels): N = zeros((len(trainLabels), len(trainLabels))) for i, l in enumerate(unique(trainLabels)): numExamplesWithLabel = len(where(trainLabels == l)[0]) Idiff = identity(numExamplesWithLabel, Float64) - (1.0 / numExamplesWithLabel) * ones(numExamplesWithLabel, Float64) firstDot = dot(classKernels[i], Idiff) labelTerm = dot(firstDot, transpose(classKernels[i])) N += labelTerm N = nan_to_num(N) #make N more numerically stable #if I had more time, I would train this parameter, but I don't additionToN = ((mean(diag(N)) + 1) / 100.0) * identity(N.shape[0], Float64) N += additionToN #make sure N is invertable for i in range(1000): try: inv(N) except LinAlgError: #doing this to make sure the maxtrix is invertable #large value supported by section titled #"numerical issues and regularization" in the paper N += additionToN return N
def log_det_estimate_shogun(Q): logging.debug("Entering") op = RealSparseMatrixOperator(csc_matrix(Q)) engine = SerialComputationEngine() linear_solver = CGMShiftedFamilySolver() accuracy = 1e-3 eigen_solver = LanczosEigenSolver(op) eigen_solver.set_min_eigenvalue(OzonePosterior.ridge) op_func = LogRationalApproximationCGM(op, engine, eigen_solver, linear_solver, accuracy) # limit computation time linear_solver.set_iteration_limit(1000) eigen_solver.set_max_iteration_limit(1000) logging.info("Computing Eigenvalues (only largest)") eigen_solver.compute() trace_sampler = ProbingSampler(op) log_det_estimator = LogDetEstimator(trace_sampler, op_func, engine) n_estimates = 1 logging.info( "Sampling log-determinant with probing vectors and rational approximation" ) estimates = log_det_estimator.sample(n_estimates) logging.debug("Leaving") return mean(estimates)
def log_likelihood(self, tau, kappa): logging.debug("Entering") logging.info("Computing %d likelihood estimates" % self.num_estimates) estimates = self.precompute_likelihood_estimates(tau, kappa) result = mean(estimates) std_dev = std(estimates) logging.info("Average of %d likelihood estimates is %d +- %f" % (self.num_estimates, result, std_dev)) logging.debug("Leaving") return result
def log_likelihood(self, tau, kappa): estimates = self.precompute_likelihood_estimates(tau, kappa) if var(estimates) > 0: logging.info("Performing exponential Russian Roulette on %d precomputed samples" % len(estimates)) rr_ified = self.rr_instance.exponential(estimates) return rr_ified else: logging.warn("Russian Roulette on one estimate not possible. Returning the estimate") return mean(estimates)
def setScaleParameters(self, ms_h = None): '''This is for initially setting the scale parameters, and only has an effect if self._lpcParamters['automatic_ms_h'] is False Parameters ---------- ms_h : float or None, sets the bandwidth of meanshift algorithm, default (None) has no effect ''' if not self._lpcParameters['automatic_ms_h'] and ms_h is not None: self.set_in_dict('ms_h', ms_h, self._lpcParameters) bandwidth = mean(self._lpcParameters['ms_h']) self._meanShift = MeanShift(bandwidth = bandwidth)
def resetScaleParameters(self, h, t0 = None): '''Sets the bandwidth as h and lpc segment length as t0. If t0 is None, t0 is set as mean(h). The scale parameter for the start points generator is also set to h. Parameters ---------- h : 1-dim, length m numpy.array or float where m is the dimension of the feature space t0 : float ''' self.set_in_dict('h', h, self._lpcParameters) self._startPointsGenerator.setScaleParameters(self._lpcParameters['h']) if t0 is None: t0 = mean(h) self.set_in_dict('t0', t0, self._lpcParameters)
def serie_std(serie): serie_mean = mean(serie) serie_std = [] std = 0.0 for value in serie: std += pow(value - serie_mean, 2) std = sqrt(std/(len(serie)-1)) for value in serie: if std == 0.0: serie_std.append(0.0) else: serie_std.append((value-serie_mean)/std) return serie_std
def _compute_stats_function(values): stats = None if len(values)>1: stats = {} stats['min'] = min(values) stats['max'] = max(values) stats['mean'] = mean(values) stats['median'] = median(values) stats['1st-quartile'] = percentile(values,25) stats['3rd-quartile'] = percentile(values,75) stats['std-error'] = std(values) return stats
def decide(self): # not tested Qtmp = self.agent.x[-1] - self.agent.gamma * min(self.agent.x[-1]) Qtmp = Qtmp / mean(Qtmp) tau = .2 total = sum(exp(-Qtmp / tau)) rn = random() * total i = 0 total = exp(-Qtmp[i] / tau) while rn > total: i += 1 total += exp(-Qtmp[i] / tau) action = i return action
def get_estimate(self, estimates, index): start_idx = index * self.block_size stop_idx = index * self.block_size + self.block_size # if there are enough samples, use them, sub-sample if not if stop_idx <= len(estimates): logging.debug("Averaging over %d samples from index %d to %d" % (self.block_size, start_idx, stop_idx)) indices = arange(start_idx, stop_idx) else: logging.debug("Averaging over a random subset of %d samples" % self.block_size) indices = permutation(len(estimates))[:self.block_size] return mean(estimates[indices])
def update(self, mcmc_chain, step_output): i = mcmc_chain.iteration if i >= self.print_from and i % self.lag == 0: self.times.append(time.time() - sum(self.times) - self.start_time) print "iteration:", i print "mean acceptance:", mean(mcmc_chain.accepteds[0:i]) elapsed = int(round(sum(self.times))) percent = int(self.get_percent_done(i, mcmc_chain.mcmc_params.num_iterations)) since_last = int(round(self.times[-1])) remaining = self.get_estimated_time_remaining(i, mcmc_chain.mcmc_params.num_iterations) total = elapsed + remaining print percent, "percent done in ", elapsed, "seconds" print "Since last update:", since_last, "seconds" print "remaining (estimated):", remaining, "seconds" print "total (estimated):", total, "seconds" print ""
def update(self, mcmc_chain, step_output): i = mcmc_chain.iteration if i >= self.print_from and i % self.lag == 0: self.times.append(time.time() - sum(self.times) - self.start_time) print "iteration:", i print "mean acceptance:", mean(mcmc_chain.accepteds[0:i]) elapsed = int(round(sum(self.times))) percent = int( self.get_percent_done(i, mcmc_chain.mcmc_params.num_iterations)) since_last = int(round(self.times[-1])) remaining = self.get_estimated_time_remaining( i, mcmc_chain.mcmc_params.num_iterations) total = elapsed + remaining print percent, "percent done in ", elapsed, "seconds" print "Since last update:", since_last, "seconds" print "remaining (estimated):", remaining, "seconds" print "total (estimated):", total, "seconds" print ""
def test_log_mean_exp(self): X = asarray([-1, 1]) X = reshape(X, (len(X), 1)) y = asarray([+1. if x >= 0 else -1. for x in X]) covariance = SquaredExponentialCovariance(sigma=1, scale=1) likelihood = LogitLikelihood() gp = GaussianProcess(y, X, covariance, likelihood) laplace = LaplaceApproximation(gp, newton_start=asarray([3, 3])) proposal = laplace.get_gaussian() n = 200 prior = gp.get_gp_prior() samples = proposal.sample(n).samples log_likelihood = asarray([gp.log_likelihood(f) for f in samples]) log_prior = prior.log_pdf(samples) log_proposal = proposal.log_pdf(samples) X = log_likelihood + log_prior - log_proposal a = log(mean(exp(X))) b = GPTools.log_mean_exp(X) self.assertLessEqual(a - b, 1e-5)
def test_log_mean_exp(self): X = asarray([-1, 1]) X = reshape(X, (len(X), 1)) y = asarray([+1. if x >= 0 else -1. for x in X]) covariance = SquaredExponentialCovariance(sigma=1, scale=1) likelihood = LogitLikelihood() gp = GaussianProcess(y, X, covariance, likelihood) laplace = LaplaceApproximation(gp, newton_start=asarray([3, 3])) proposal=laplace.get_gaussian() n=200 prior = gp.get_gp_prior() samples = proposal.sample(n).samples log_likelihood=asarray([gp.log_likelihood(f) for f in samples]) log_prior = prior.log_pdf(samples) log_proposal = proposal.log_pdf(samples) X=log_likelihood+log_prior-log_proposal a=log(mean(exp(X))) b=GPTools.log_mean_exp(X) self.assertLessEqual(a-b, 1e-5)
from sklearn.cross_validation import cross_val_score from sklearn.datasets import load_boston from sklearn.model_selection import KFold from sklearn.neighbors import KNeighborsRegressor from sklearn.preprocessing import scale bostonDataset = load_boston() target = bostonDataset.target data = scale(load_boston().data) maxValue = -100000 maxP = -1 for i in linspace(start=1, stop=10, num=200): regressionFun = KNeighborsRegressor(n_neighbors=5, weights='distance', metric='minkowski') regressionFun.p = i score = cross_val_score(estimator=regressionFun, cv=KFold(n_splits=5, random_state=42, shuffle=True).split(data), X=data, y=target, scoring='neg_mean_squared_error') meanScore = mean(score) if meanScore > maxValue: maxValue = meanScore maxP = i print(i, meanScore) print(maxP, maxValue)
def combine(self, key, values): return (key, mean(values))
def make_binwidth_plot(duration, bin_s, bin_ms, intraburst_bins_ms, interburst_bins_ms, transient, filename, foldername): #bins include first and last point (0 and duration seconds or ms) insert(bin_s, 0, 0.0) append(bin_s, duration / second) append(bin_ms, duration / ms) insert(bin_ms, 0, 0.0) #find binwidth # bin_ms_temp=bin_ms[:-1] # bin_ms_shift=bin_ms[1:] # binwidth_ms=[bin_ms_shift-bin_ms_temp for bin_ms_shift,bin_ms_temp in zip(bin_ms_shift,bin_ms_temp)] # binwidth_ms_temp=binwidth_ms binwidth_ms = [x - y for x, y in zip(bin_ms[1:], bin_ms[:-1])] intrabinwidth_ms = [ a - b for a, b in zip(intraburst_bins_ms[1::2], intraburst_bins_ms[::2]) ] #all odd indices - all even indices interbinwidth_ms = [ a - b for a, b in zip(interburst_bins_ms[1::2], interburst_bins_ms[::2]) ] #all odd indices - all even indices #find binwidth avg and std avg_binwidth_ms = mean(binwidth_ms) std_binwidth_ms = std(binwidth_ms) avg_intrabinwidth_ms = mean(intrabinwidth_ms) std_intrabinwidth_ms = std(intrabinwidth_ms) avg_interbinwidth_ms = mean(interbinwidth_ms) std_interbinwidth_ms = std(interbinwidth_ms) #write out binwidth info. #Format: #avg std #binwidth1 binwidth2 ....... binwidthn f_binwidth_ms = open(foldername + "/" + filename + "_binwidth.txt", "w") f_binwidth_ms.write(str(avg_binwidth_ms) + " ") f_binwidth_ms.write(str(std_binwidth_ms) + " ") f_binwidth_ms.write("\n") f_binwidth_ms.write(' '.join(map(str, binwidth_ms))) #write out intrabins f_intrabinwidth_ms = open( foldername + "/" + filename + "_intrabinwidth.txt", "w") f_intrabinwidth_ms.write(str(avg_intrabinwidth_ms) + " ") f_intrabinwidth_ms.write(str(std_intrabinwidth_ms) + " ") f_intrabinwidth_ms.write("\n") f_intrabinwidth_ms.write(' '.join(map(str, intrabinwidth_ms))) #write out interbins f_interbinwidth_ms = open( foldername + "/" + filename + "_interbinwidth.txt", "w") f_interbinwidth_ms.write(str(avg_interbinwidth_ms) + " ") f_interbinwidth_ms.write(str(std_interbinwidth_ms) + " ") f_interbinwidth_ms.write("\n") f_interbinwidth_ms.write(' '.join(map(str, interbinwidth_ms))) #find center of bin in time ctr_of_bin_ms = [x - 0.5 * y for x, y in zip(bin_ms[1:], binwidth_ms)] ctr_of_intrabin_ms = [ x - 0.5 * y for x, y in zip(intraburst_bins_ms[1::2], intrabinwidth_ms) ] ctr_of_interbin_ms = [ x - 0.5 * y for x, y in zip(interburst_bins_ms[1::2], interbinwidth_ms) ] #find number of bins numbins = len(bin_s) - 1 ### Plot Binwidths plot(ctr_of_bin_ms, binwidth_ms) if len(ctr_of_bin_ms) > 1: if ctr_of_bin_ms[-2] > (transient): xlim([transient, ctr_of_bin_ms[-2]]) xlabel("Time (ms)") ylabel("Bin Width (ms)") #suptitle("Bin Width (ms)") title("Avg=%0.1f, SD=%0.1f" % (avg_binwidth_ms, std_binwidth_ms)) #tight_layout(pad=2.5) savefig(foldername + "/" + filename + "_binwidth.png") close() plot(ctr_of_intrabin_ms, intrabinwidth_ms) if len(ctr_of_intrabin_ms) > 1: if ctr_of_intrabin_ms[-2] > (transient): xlim([transient, ctr_of_intrabin_ms[-2]]) xlabel("Time (ms)") ylabel("Intraburst Bin Width (ms)") #suptitle("Intraburst Bin Width (ms)") title("Avg=%0.1f, SD=%0.1f" % (avg_intrabinwidth_ms, std_intrabinwidth_ms)) #tight_layout(pad=2.5) savefig(foldername + "/" + filename + "_intraburst_binwidth.png") close() plot(ctr_of_interbin_ms, interbinwidth_ms) if len(ctr_of_interbin_ms) > 1: if ctr_of_interbin_ms[-2] > transient: xlim([transient, ctr_of_interbin_ms[-2]]) xlabel("Time (ms)") ylabel("Interburst Bin Width (ms)") #title("Interburst Bin Width (ms)") title("Avg=%0.1f, SD=%0.1f" % (avg_interbinwidth_ms, std_interbinwidth_ms)) tight_layout(pad=2.5) savefig(foldername + "/" + filename + "_interburst_binwidth.png") close() return [ binwidth_ms, intrabinwidth_ms, interbinwidth_ms, ctr_of_bin_ms, ctr_of_intrabin_ms, ctr_of_interbin_ms, numbins, bin_ms, bin_s ]
sampler_names_short = ["SM","AM-FS","AM-LS","KAMH-LS"] sampler_names = ["StandardMetropolis","AdaptiveMetropolis","AdaptiveMetropolisLearnScale","KameleonWindowLearnScale"] colours = ['blue', 'red', 'magenta', 'green'] ii=0 for sampler_name in sampler_names: filename = directory+sampler_name+"_mmds.bin" f = open(filename,"r") upto, mmds, mean_dist = load(f) trials=shape(mean_dist)[1] figure(1) if which_plot == "mean": stds = std(mean_dist,1)/sqrt(trials) means = mean(mean_dist,1) if which_plot == "mmd": stds = std(mmds,1)/sqrt(trials) means = mean(mmds,1) zscore=1.28 yerr = zscore*stds if highlight == "SM": condition = sampler_name == "StandardMetropolis" elif highlight == "AM": condition = sampler_name == "AdaptiveMetropolis" or sampler_name == "AdaptiveMetropolisLearnScale" elif highlight == "KAMH": condition = sampler_name == "KameleonWindowLearnScale" else: condition = True if condition:
def exponential(self, estimates): logging.debug("Entering") # find a strict lower bound on the estimates and remove it from list bound = estimates.min() bound_idx = estimates.argmin() estimates = delete(estimates, bound_idx) estimates = estimates - bound # find an integer close to the mean of the transformed estimates and divide E = max(int(round(abs(mean(estimates)))), 1) estimates = estimates / E logging.info("Using %f as lower bound on estimates" % bound) logging.info("Computing product of E=%d RR estimates" % E) logging.info("Std-deviation after scaling is %f" % std(estimates)) # index for iterating through the used estimates # (might be averaged, so might be lower than the number of available estimates # if the block size is greater than one estimate_idx = 0 samples = zeros(E) for iteration in range(E): weight = 1 # start with x^0 which is 1 samples[iteration] = 1 term = 1 # index for computed samples series_term_idx = 1 while weight > 0: # update current term of infinite series # average over block x_inner = self.get_estimate(estimates, estimate_idx) term *= (x_inner / series_term_idx) # if summation has reached threshold, update weights if abs(term) < self.threshold: q = term / self.threshold if rand() < q: # continue and update weight weight = weight / q else: # stop summation weight = 0 samples[iteration] += weight * term; estimate_idx += 1 series_term_idx += 1 logging.info("RR estimate %d/%d with threshold %.2f is %.4f and took %d series terms" % (iteration + 1, E, self.threshold, samples[iteration], series_term_idx)) # now put things together. Note that samples contains an unbiased estimate # which might be quite small. However, due to the removal of the bound, # this will not cause an underflow and we can just take the log. logging.debug("Leaving") return bound + sum(log(samples));
def __process_results__(self): lines = [] if len(self.experiments) == 0: lines.append("no experiments to process") return # burnin is the same for all chains burnin = self.experiments[0].mcmc_chain.mcmc_params.burnin quantiles = zeros((len(self.experiments), len(self.ref_quantiles))) norm_of_means = zeros(len(self.experiments)) acceptance_rates = zeros(len(self.experiments)) # ess_0 = zeros(len(self.experiments)) # ess_1 = zeros(len(self.experiments)) # ess_minima = zeros(len(self.experiments)) # ess_medians = zeros(len(self.experiments)) # ess_maxima = zeros(len(self.experiments)) times = zeros(len(self.experiments)) for i in range(len(self.experiments)): burned_in = self.experiments[i].mcmc_chain.samples[burnin:, :] # use precomputed quantiles if they match with the provided ones if hasattr(self.experiments[i], "ref_quantiles") and \ hasattr(self.experiments[i], "quantiles") and \ allclose(self.ref_quantiles, self.experiments[i].ref_quantiles): quantiles[i, :] = self.experiments[i].quantiles else: try: quantiles[i, :] = self.experiments[i].mcmc_chain.mcmc_sampler.distribution.emp_quantiles(\ burned_in, self.ref_quantiles) except NotImplementedError: print "skipping quantile computations, distribution does", \ "not support it." # quantiles should be about average error rather than average quantile quantiles[i,:]=abs(quantiles[i,:]-self.ref_quantiles) dim = self.experiments[i].mcmc_chain.mcmc_sampler.distribution.dimension norm_of_means[i] = norm(mean(burned_in, 0)) acceptance_rates[i] = mean(self.experiments[i].mcmc_chain.accepteds[burnin:]) # dump burned in samples to disc # sample_filename=self.experiments[0].experiment_dir + self.experiments[0].name + "_burned_in.txt" # savetxt(sample_filename, burned_in) # store minimum ess for every experiment #ess_per_covariate = asarray([RCodaTools.ess_coda(burned_in[:, cov_idx]) for cov_idx in range(dim)]) # ess_per_covariate = asarray([0 for _ in range(dim)]) # ess_0=ess_per_covariate[0] # ess_1=ess_per_covariate[1] # ess_minima[i] = min(ess_per_covariate) # ess_medians[i] = median(ess_per_covariate) # ess_maxima[i] = max(ess_per_covariate) # save chain time needed ellapsed = self.experiments[i].mcmc_chain.mcmc_outputs[0].times times[i] = int(round(sum(ellapsed))) mean_quantiles = mean(quantiles, 0) std_quantiles = std(quantiles, 0) sqrt_num_trials=sqrt(len(self.experiments)) # print median kernel width sigma #sigma=GaussianKernel.get_sigma_median_heuristic(burned_in.T) #lines.append("median kernel sigma: "+str(sigma)) lines.append("quantiles:") for i in range(len(self.ref_quantiles)): lines.append(str(mean_quantiles[i]) + " +- " + str(std_quantiles[i]/sqrt_num_trials)) lines.append("norm of means:") lines.append(str(mean(norm_of_means)) + " +- " + str(std(norm_of_means)/sqrt_num_trials)) lines.append("acceptance rate:") lines.append(str(mean(acceptance_rates)) + " +- " + str(std(acceptance_rates)/sqrt_num_trials)) # lines.append("ess dimension 0:") # lines.append(str(mean(ess_0)) + " +- " + str(std(ess_0)/sqrt_num_trials)) # # lines.append("ess dimension 1:") # lines.append(str(mean(ess_1)) + " +- " + str(std(ess_1)/sqrt_num_trials)) # # lines.append("minimum ess:") # lines.append(str(mean(ess_minima)) + " +- " + str(std(ess_minima)/sqrt_num_trials)) # # lines.append("median ess:") # lines.append(str(mean(ess_medians)) + " +- " + str(std(ess_medians)/sqrt_num_trials)) # # lines.append("maximum ess:") # lines.append(str(mean(ess_maxima)) + " +- " + str(std(ess_maxima)/sqrt_num_trials)) lines.append("times:") lines.append(str(mean(times)) + " +- " + str(std(times)/sqrt_num_trials)) # mean as a function of iterations, normalised by time step = round((self.experiments[0].mcmc_chain.mcmc_params.num_iterations - burnin)/5) iterations = arange(self.experiments[0].mcmc_chain.mcmc_params.num_iterations - burnin, step=step) running_means = zeros(len(iterations)) running_errors = zeros(len(iterations)) for i in arange(len(iterations)): # norm of mean of chain up norm_of_means_yet = zeros(len(self.experiments)) for j in range(len(self.experiments)): samples_yet = self.experiments[j].mcmc_chain.samples[burnin:(burnin + iterations[i] + 1 + step), :] norm_of_means_yet[j] = norm(mean(samples_yet, 0)) running_means[i] = mean(norm_of_means_yet) error_level = 1.96 running_errors[i] = error_level * std(norm_of_means_yet) / sqrt(len(norm_of_means_yet)) ioff() figure() plot(iterations, running_means*mean(times)) fill_between(iterations, (running_means - running_errors)*mean(times), \ (running_means + running_errors)*mean(times), hold=True, color="gray") # make sure path to save exists try: os.makedirs(self.experiments[0].experiment_dir) except OSError as exception: if exception.errno != errno.EEXIST: raise savefig(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean.png") close() # also store plot X and Y savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_X.txt", \ iterations) savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_Y.txt", \ running_means*mean(times)) savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_errors.txt", \ running_errors*mean(times)) # dont produce quantile convergence plots here for now """# quantile convergence of a single one desired_quantile=0.5 running_quantiles=zeros(len(iterations)) running_quantile_errors=zeros(len(iterations)) for i in arange(len(iterations)): quantiles_yet = zeros(len(self.experiments)) for j in range(len(self.experiments)): samples_yet = self.experiments[j].mcmc_chain.samples[burnin:(burnin + iterations[i] + 1 + step), :] # just compute one quantile for now quantiles_yet[j]=self.experiments[j].mcmc_chain.mcmc_sampler.distribution.emp_quantiles(samples_yet, \ array([desired_quantile])) quantiles_yet[j]=abs(quantiles_yet[j]-desired_quantile) running_quantiles[i] = mean(quantiles_yet) error_level = 1.96 running_quantile_errors[i] = error_level * std(quantiles_yet) / sqrt(len(quantiles_yet)) ioff() figure() plot(iterations, running_quantiles*mean(times)) fill_between(iterations, (running_quantiles - running_quantile_errors)*mean(times), \ (running_quantiles + running_quantile_errors)*mean(times), hold=True, color="gray") plot([iterations.min(),iterations.max()], [desired_quantile*mean(times) for _ in range(2)]) title(str(desired_quantile)+"-quantile convergence") savefig(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile.png") close() # also store plot X and Y savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_X.txt", \ iterations) savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_Y.txt", \ running_quantiles*mean(times)) savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_errors.txt", \ running_quantile_errors*mean(times)) savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_reference.txt", \ [desired_quantile*mean(times)]) """ # add latex table line # latex_lines = [] # latex_lines.append("Sampler & Acceptance & ESS2 & Norm(mean) & ") # for i in range(len(self.ref_quantiles)): # latex_lines.append('%.1f' % self.ref_quantiles[i] + "-quantile") # if i < len(self.ref_quantiles) - 1: # latex_lines.append(" & ") # latex_lines.append("\\\\") # lines.append("".join(latex_lines)) # # latex_lines = [] # latex_lines.append(self.experiments[0].mcmc_chain.mcmc_sampler.__class__.__name__) # latex_lines.append('$%.3f' % mean(acceptance_rates) + " \pm " + '%.3f$' % (std(acceptance_rates)/sqrt_num_trials)) # latex_lines.append('$%.3f' % mean(norm_of_means) + " \pm " + '%.3f$' % (std(norm_of_means)/sqrt_num_trials)) # for i in range(len(self.ref_quantiles)): # latex_lines.append('$%.3f' % mean_quantiles[i] + " \pm " + '%.3f$' % (std_quantiles[i]/sqrt_num_trials)) # # # lines.append(" & ".join(latex_lines) + "\\\\") return lines
def _followxSingleDirection( self, x, direction = Direction.FORWARD, forward_curve = None, last_eigenvector = None, weights = 1.): '''Generates a partial lpc curve dictionary from the start point, x. Arguments --------- x : 1-dim, length m, numpy.array of floats, start point for the algorithm when m is dimension of feature space direction : bool, proceeds in Direction.FORWARD or Direction.BACKWARD from this point (just sets sign for first eigenvalue) forward_curve : dictionary as returned by this function, is used to detect crossing of the curve under construction with a previously constructed curve last_eigenvector : 1-dim, length m, numpy.array of floats, a unit vector that defines the initial direction, relative to which the first eigenvector is biased and initial cos_neu_neu is calculated weights : 1-dim, length n numpy.array of observation weights (can also be used to exclude individual observations from the computation by setting their weight to zero.), where n is the number of feature points ''' x0 = copy(x) N = self.Xi.shape[0] d = self.Xi.shape[1] it = self._lpcParameters['it'] h = array(self._lpcParameters['h']) t0 = self._lpcParameters['t0'] rho0 = self._lpcParameters['rho0'] save_xd = empty((it,d)) eigen_vecd = empty((it,d)) c0 = ones(it) cos_alt_neu = ones(it) cos_neu_neu = ones(it) lamb = empty(it) #NOTE this is named 'lambda' in the original R code rho = zeros(it) high_rho_points = empty((0,d)) count_points = 0 for i in range(it): kernel_weights = self._kernd(self.Xi, x0, c0[i]*h) * weights mu_x = average(self.Xi, axis = 0, weights = kernel_weights) sum_weights = sum(kernel_weights) mean_sub = self.Xi - mu_x cov_x = dot( dot(transpose(mean_sub), numpy.diag(kernel_weights)), mean_sub) / sum_weights #assert (abs(cov_x.transpose() - cov_x)/abs(cov_x.transpose() + cov_x) < 1e-6).all(), 'Covariance matrix not symmetric, \n cov_x = {0}, mean_sub = {1}'.format(cov_x, mean_sub) save_xd[i] = mu_x #save first point of the branch count_points += 1 #calculate path length if i==0: lamb[0] = 0 else: lamb[i] = lamb[i-1] + sqrt(sum((mu_x - save_xd[i-1])**2)) #calculate eigenvalues/vectors #(sorted_eigen_cov is a list of tuples containing eigenvalue and associated eigenvector, sorted descending by eigenvalue) eigen_cov = eigh(cov_x) sorted_eigen_cov = zip(eigen_cov[0],map(ravel,vsplit(eigen_cov[1].transpose(),len(eigen_cov[1])))) sorted_eigen_cov.sort(key = lambda elt: elt[0], reverse = True) eigen_norm = sqrt(sum(sorted_eigen_cov[0][1]**2)) eigen_vecd[i] = direction * sorted_eigen_cov[0][1] / eigen_norm #Unit eigenvector corresponding to largest eigenvalue #rho parameters rho[i] = sorted_eigen_cov[1][0] / sorted_eigen_cov[0][0] #Ratio of two largest eigenvalues if i != 0 and rho[i] > rho0 and rho[i-1] <= rho0: high_rho_points = vstack((high_rho_points, x0)) #angle between successive eigenvectors if i==0 and last_eigenvector is not None: cos_alt_neu[i] = direction * dot(last_eigenvector, eigen_vecd[i]) if i > 0: cos_alt_neu[i] = dot(eigen_vecd[i], eigen_vecd[i-1]) #signum flipping if cos_alt_neu[i] < 0: eigen_vecd[i] = -eigen_vecd[i] cos_neu_neu[i] = -cos_alt_neu[i] else: cos_neu_neu[i] = cos_alt_neu[i] #angle penalization pen = self._lpcParameters['pen'] if pen > 0: if i == 0 and last_eigenvector is not None: a = abs(cos_alt_neu[i])**pen eigen_vecd[i] = a * eigen_vecd[i] + (1-a) * last_eigenvector if i > 0: a = abs(cos_alt_neu[i])**pen eigen_vecd[i] = a * eigen_vecd[i] + (1-a) * eigen_vecd[i-1] #check curve termination criteria if i not in (0, it-1): #crossing cross = self._lpcParameters['cross'] if forward_curve is None: full_curve_points = save_xd[0:i+1] else: full_curve_points = vstack((forward_curve['save_xd'],save_xd[0:i+1])) #inefficient, initialize then append? if not cross: prox = where(ravel(cdist(full_curve_points,[mu_x])) <= mean(h))[0] if len(prox) != max(prox) - min(prox) + 1: break #convergence convergence_at = self._lpcParameters['convergence_at'] conv_ratio = abs(lamb[i] - lamb[i-1]) / (2 * (lamb[i] + lamb[i-1])) if conv_ratio < convergence_at: break #boundary boundary = self._lpcParameters['boundary'] if conv_ratio < boundary: c0[i+1] = 0.995 * c0[i] else: c0[i+1] = min(1.01*c0[i], 1) #step along in direction eigen_vecd[i] x0 = mu_x + t0 * eigen_vecd[i] #trim output in the case where convergence occurs before 'it' iterations curve = { 'save_xd': save_xd[0:count_points], 'eigen_vecd': eigen_vecd[0:count_points], 'cos_neu_neu': cos_neu_neu[0:count_points], 'rho': rho[0:count_points], 'high_rho_points': high_rho_points, 'lamb': lamb[0:count_points], 'c0': c0[0:count_points] } return curve
def score_tfidf_freq(tfidfs): return round(float(mean(tfidfs)*100),2)
import sys if __name__ == '__main__': # load data data, labels = GPData.get_glass_data() # throw away some data n = 250 seed(1) idx = permutation(len(data)) idx = idx[:n] data = data[idx] labels = labels[idx] # normalise and whiten dataset data -= mean(data, 0) L = cholesky(cov(data.T)) data = solve_triangular(L, data.T, lower=True).T dim = shape(data)[1] # prior on theta and posterior target estimate theta_prior = Gaussian(mu=0 * ones(dim), Sigma=eye(dim) * 5) target=PseudoMarginalHyperparameterDistribution(data, labels, \ n_importance=100, prior=theta_prior, \ ridge=1e-3) # create sampler burnin = 10000 num_iterations = burnin + 300000 kernel = GaussianKernel(sigma=23.0) sampler = KameleonWindowLearnScale(target, kernel, stop_adapt=burnin)
test_1 = line2array(list_test, list_label) print("test:") print(test_1) #--------------------↑前六列测试数据---------------------- p_0_pre = diff_0 * test_1 print("前六列为'否'各概率:") print(p_0_pre) print("前六列为'是'各概率:") p_1_pre = diff_1 * test_1 print(p_1_pre) #--------------------↑前六列概率计算---------------------- print("val0: (存放'否'类中后两列密度和含糖率的数据)") print(val0) print("val1: (存放'是'类中后两列密度和含糖率的数据)") print(val1) mean0 = np.array([mean(val0[0]), mean(val0[1])]) #两个数 分别为0(否)类 密度和含糖率的均值 mean1 = np.array([mean(val1[0]), mean(val1[1])]) #两个数 分别为1(是)类 密度和含糖率的均值 var0 = np.array([var(val0[0]), var(val0[1])]) #两个数 分别为0(否)类 密度和含糖率的方差 var1 = np.array([var(val1[0]), var(val1[1])]) #两个数 分别为1(是)类 密度和含糖率的方差 #--------------------↑后两列均值方差计算---------------------- def gaussian(mean, var, x): res = 1 / sqrt(2 * 3.14 * var) * np.exp(-(mean - x)**2 / 2 * var) return res p_0_pro = gaussian(mean0[0], var0[0], 0.697) + gaussian( mean0[1], var0[1], 0.460) print(p_0_pro) p_1_pro = gaussian(mean1[0], var1[0], 0.697) + gaussian(
plotting = False pkernel = PolynomialKernel(degree=3) samples_long = loadtxt( "/nfs/home2/dino/kamh-results/StandardMetropolis_PseudoMarginalHyperparameterDistribution_merged_samples.txt" ) samples_long = samples_long[:10000] # f_long=open("/nfs/home2/dino/kamh-results/long_experiment_output.bin") # experiment_long=load(f_long) # f_long.close() # thin_long=100 # mcmc_chain_long=experiment_long.mcmc_chain # burnin=mcmc_chain_long.mcmc_params.burnin # indices_long = range(burnin, mcmc_chain_long.iteration,thin_long) # samples_long=mcmc_chain_long.samples[indices_long] mu_long = mean(samples_long, 0) print 'using this many samples for the long chain: ', shape(samples_long)[0] how_many_chains = 20 stats_granularity = 10 path_above = "/nfs/home2/dino/git/kameleon-mcmc/main/gp/scripts/glass_gaussian_ard/" #path_above = "/nfs/data3/ucabhst/kameleon_experiments/glass_ard/" path_below = "output/experiment_output.bin" #sampler_names = ["KameleonWindowLearnScale", "AdaptiveMetropolisLearnScale","AdaptiveMetropolis"] sampler_names = ["StandardMetropolis"] path_temp = "_PseudoMarginalHyperparameterDistribution_#/" for sampler_name in sampler_names:
def detect_insertions( h0_mean, h1_mean, h2_mean, heads_per_base, inputf, reads_per_base, tails_per_base): fp_scores, tp_scores = [], [] not_found_insertions = json.load(open(data_d('subject_genome.fa.alu_positions.json'))) if not not_found_insertions: return pers_alu_info = copy.deepcopy(not_found_insertions) total_alus = count_alus(not_found_insertions) false_positives = [] skip_until = -1 window = [] for col in inputf.pileup(): if col.pos < skip_until: continue if len(window) == WIN_LENGTH: window = window[1:WIN_LENGTH] window.append(site(col)) # if col.pos < 36265890: # continue if boundary(window) and enough_coverage(window, reads_per_base): reason = potential_ALU_insert(window, heads_per_base, tails_per_base) if reason: spanning = window_stats(window) if spanning >= h0_mean: continue # hyp, _ = min((None, h0_mean), ('heterozygous', h1_mean), ('homozygous', h2_mean), # key = lambda (hyp, mean): abs(spanning - mean)) hyp, _ = min(('heterozygous', h1_mean), ('homozygous', h2_mean), key = lambda (hyp, mean): abs(spanning - mean)) if hyp: chrom = inputf.getrname(col.tid) window = [] skip_until = col.pos + RLEN fp = True for hap_no, haplotype_positions in enumerate(pers_alu_info[chrom]): for inserted in haplotype_positions: if abs(inserted['ref_pos'] - col.pos) < 300: if inserted in not_found_insertions[chrom][hap_no]: not_found_insertions[chrom][ hap_no].remove(inserted) fp = False if fp: false_positives.append([hyp, spanning, chrom, col.pos, reason]) fp_scores.append(spanning) else: tp_scores.append(spanning) logm('\t'.join(map(str, [hyp, not fp, spanning, chrom, col.pos, reason]))) print 'False positives:\n', pformat(sorted(false_positives, key=lambda fp: (fp[-1], fp[-2]))) print 'False negatives:\n', pformat(not_found_insertions) print #print bgr_spanning, reads_per_base print 'True positives: %d (%.2lf%%)\t' % (len(tp_scores), float(100 * len(tp_scores)) / (len(tp_scores) + len(fp_scores))), mean(tp_scores), var(tp_scores) if fp_scores: print 'False positives: %d (%.2lf%%)\t' % ( len(fp_scores), float(100 * len(fp_scores)) / (len(tp_scores) + len(fp_scores))), mean(fp_scores), var(fp_scores) print 'False negatives: %d(%.2f%%)\n' % (count_alus(not_found_insertions), float(100 * count_alus(not_found_insertions)) / total_alus)
def __process_results__(self): lines = [] if len(self.experiments) == 0: lines.append("no experiments to process") return # burnin is the same for all chains burnin = self.experiments[0].mcmc_chain.mcmc_params.burnin quantiles = zeros((len(self.experiments), len(self.ref_quantiles))) norm_of_means = zeros(len(self.experiments)) acceptance_rates = zeros(len(self.experiments)) # ess_0 = zeros(len(self.experiments)) # ess_1 = zeros(len(self.experiments)) # ess_minima = zeros(len(self.experiments)) # ess_medians = zeros(len(self.experiments)) # ess_maxima = zeros(len(self.experiments)) times = zeros(len(self.experiments)) for i in range(len(self.experiments)): burned_in = self.experiments[i].mcmc_chain.samples[burnin:, :] # use precomputed quantiles if they match with the provided ones if hasattr(self.experiments[i], "ref_quantiles") and \ hasattr(self.experiments[i], "quantiles") and \ allclose(self.ref_quantiles, self.experiments[i].ref_quantiles): quantiles[i, :] = self.experiments[i].quantiles else: try: quantiles[i, :] = self.experiments[i].mcmc_chain.mcmc_sampler.distribution.emp_quantiles(\ burned_in, self.ref_quantiles) except NotImplementedError: print "skipping quantile computations, distribution does", \ "not support it." # quantiles should be about average error rather than average quantile quantiles[i, :] = abs(quantiles[i, :] - self.ref_quantiles) dim = self.experiments[ i].mcmc_chain.mcmc_sampler.distribution.dimension norm_of_means[i] = norm(mean(burned_in, 0)) acceptance_rates[i] = mean( self.experiments[i].mcmc_chain.accepteds[burnin:]) # dump burned in samples to disc # sample_filename=self.experiments[0].experiment_dir + self.experiments[0].name + "_burned_in.txt" # savetxt(sample_filename, burned_in) # store minimum ess for every experiment #ess_per_covariate = asarray([RCodaTools.ess_coda(burned_in[:, cov_idx]) for cov_idx in range(dim)]) # ess_per_covariate = asarray([0 for _ in range(dim)]) # ess_0=ess_per_covariate[0] # ess_1=ess_per_covariate[1] # ess_minima[i] = min(ess_per_covariate) # ess_medians[i] = median(ess_per_covariate) # ess_maxima[i] = max(ess_per_covariate) # save chain time needed ellapsed = self.experiments[i].mcmc_chain.mcmc_outputs[0].times times[i] = int(round(sum(ellapsed))) mean_quantiles = mean(quantiles, 0) std_quantiles = std(quantiles, 0) sqrt_num_trials = sqrt(len(self.experiments)) # print median kernel width sigma #sigma=GaussianKernel.get_sigma_median_heuristic(burned_in.T) #lines.append("median kernel sigma: "+str(sigma)) lines.append("quantiles:") for i in range(len(self.ref_quantiles)): lines.append( str(mean_quantiles[i]) + " +- " + str(std_quantiles[i] / sqrt_num_trials)) lines.append("norm of means:") lines.append( str(mean(norm_of_means)) + " +- " + str(std(norm_of_means) / sqrt_num_trials)) lines.append("acceptance rate:") lines.append( str(mean(acceptance_rates)) + " +- " + str(std(acceptance_rates) / sqrt_num_trials)) # lines.append("ess dimension 0:") # lines.append(str(mean(ess_0)) + " +- " + str(std(ess_0)/sqrt_num_trials)) # # lines.append("ess dimension 1:") # lines.append(str(mean(ess_1)) + " +- " + str(std(ess_1)/sqrt_num_trials)) # # lines.append("minimum ess:") # lines.append(str(mean(ess_minima)) + " +- " + str(std(ess_minima)/sqrt_num_trials)) # # lines.append("median ess:") # lines.append(str(mean(ess_medians)) + " +- " + str(std(ess_medians)/sqrt_num_trials)) # # lines.append("maximum ess:") # lines.append(str(mean(ess_maxima)) + " +- " + str(std(ess_maxima)/sqrt_num_trials)) lines.append("times:") lines.append( str(mean(times)) + " +- " + str(std(times) / sqrt_num_trials)) # mean as a function of iterations, normalised by time step = round( (self.experiments[0].mcmc_chain.mcmc_params.num_iterations - burnin) / 5) iterations = arange( self.experiments[0].mcmc_chain.mcmc_params.num_iterations - burnin, step=step) running_means = zeros(len(iterations)) running_errors = zeros(len(iterations)) for i in arange(len(iterations)): # norm of mean of chain up norm_of_means_yet = zeros(len(self.experiments)) for j in range(len(self.experiments)): samples_yet = self.experiments[j].mcmc_chain.samples[burnin:( burnin + iterations[i] + 1 + step), :] norm_of_means_yet[j] = norm(mean(samples_yet, 0)) running_means[i] = mean(norm_of_means_yet) error_level = 1.96 running_errors[i] = error_level * std(norm_of_means_yet) / sqrt( len(norm_of_means_yet)) ioff() figure() plot(iterations, running_means * mean(times)) fill_between(iterations, (running_means - running_errors)*mean(times), \ (running_means + running_errors)*mean(times), hold=True, color="gray") # make sure path to save exists try: os.makedirs(self.experiments[0].experiment_dir) except OSError as exception: if exception.errno != errno.EEXIST: raise savefig(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean.png") close() # also store plot X and Y savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_X.txt", \ iterations) savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_Y.txt", \ running_means*mean(times)) savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_errors.txt", \ running_errors*mean(times)) # dont produce quantile convergence plots here for now """# quantile convergence of a single one desired_quantile=0.5 running_quantiles=zeros(len(iterations)) running_quantile_errors=zeros(len(iterations)) for i in arange(len(iterations)): quantiles_yet = zeros(len(self.experiments)) for j in range(len(self.experiments)): samples_yet = self.experiments[j].mcmc_chain.samples[burnin:(burnin + iterations[i] + 1 + step), :] # just compute one quantile for now quantiles_yet[j]=self.experiments[j].mcmc_chain.mcmc_sampler.distribution.emp_quantiles(samples_yet, \ array([desired_quantile])) quantiles_yet[j]=abs(quantiles_yet[j]-desired_quantile) running_quantiles[i] = mean(quantiles_yet) error_level = 1.96 running_quantile_errors[i] = error_level * std(quantiles_yet) / sqrt(len(quantiles_yet)) ioff() figure() plot(iterations, running_quantiles*mean(times)) fill_between(iterations, (running_quantiles - running_quantile_errors)*mean(times), \ (running_quantiles + running_quantile_errors)*mean(times), hold=True, color="gray") plot([iterations.min(),iterations.max()], [desired_quantile*mean(times) for _ in range(2)]) title(str(desired_quantile)+"-quantile convergence") savefig(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile.png") close() # also store plot X and Y savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_X.txt", \ iterations) savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_Y.txt", \ running_quantiles*mean(times)) savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_errors.txt", \ running_quantile_errors*mean(times)) savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_reference.txt", \ [desired_quantile*mean(times)]) """ # add latex table line # latex_lines = [] # latex_lines.append("Sampler & Acceptance & ESS2 & Norm(mean) & ") # for i in range(len(self.ref_quantiles)): # latex_lines.append('%.1f' % self.ref_quantiles[i] + "-quantile") # if i < len(self.ref_quantiles) - 1: # latex_lines.append(" & ") # latex_lines.append("\\\\") # lines.append("".join(latex_lines)) # # latex_lines = [] # latex_lines.append(self.experiments[0].mcmc_chain.mcmc_sampler.__class__.__name__) # latex_lines.append('$%.3f' % mean(acceptance_rates) + " \pm " + '%.3f$' % (std(acceptance_rates)/sqrt_num_trials)) # latex_lines.append('$%.3f' % mean(norm_of_means) + " \pm " + '%.3f$' % (std(norm_of_means)/sqrt_num_trials)) # for i in range(len(self.ref_quantiles)): # latex_lines.append('$%.3f' % mean_quantiles[i] + " \pm " + '%.3f$' % (std_quantiles[i]/sqrt_num_trials)) # # # lines.append(" & ".join(latex_lines) + "\\\\") return lines
def reduce(self, key, values): return (key, mean(values))
print "python " + str(sys.argv[0]).split(os.sep)[-1] + " /nfs/nhome/live/ucabhst/kameleon_experiments/ 3" exit() experiment_dir_base = str(sys.argv[1]) n = int(str(sys.argv[2])) # loop over parameters here experiment_dir = experiment_dir_base + str(os.path.abspath(sys.argv[0])).split(os.sep)[-1].split(".")[0] + os.sep print "running experiments", n, "times at base", experiment_dir # load data data,labels=GPData.get_glass_data() # normalise and whiten dataset data-=mean(data, 0) L=cholesky(cov(data.T)) data=solve_triangular(L, data.T, lower=True).T dim=shape(data)[1] # prior on theta and posterior target estimate theta_prior=Gaussian(mu=0*ones(dim), Sigma=eye(dim)*5) distribution=PseudoMarginalHyperparameterDistribution(data, labels, \ n_importance=100, prior=theta_prior, \ ridge=1e-3) sigma = 23.0 print "using sigma", sigma kernel = GaussianKernel(sigma=sigma) for i in range(n):
from numpy import ones, array from pkg.plots import figureAgent2, showQValues2, show3dQValues2 from pkg.agent import Agent from pkg.simulationCtLbd import simulateCtLbd from matplotlib.pyplot import show from numpy.ma.core import mean #Initial conditions x0 = array([.5]) #lRates = [0.01,0.11,0.21,0.31,0.41,0.51, 0.61, 0.71, 0.81, 0.91] lRates = [0.11] #prob = array([.55, .65, .75, .85, .95]) #vol = array([.001,.005]) prob = array([.85]) vol = array([.001]) pPolicy = 'greedy' nTrials = 5000 # number of trials of an episode nEpisodes = 1 # number of episodes saveOutput = False path = 'data/ctLbd/' fixSeed = False (agent, environment) = simulateCtLbd(vol, prob, x0, lRates, nTrials, nEpisodes, pPolicy, fixSeed, saveOutput, path) #import pdb figureAgent2(environment, agent) print mean(agent.err**2) show() #pdb.set_trace()