def next_steps(self): last_tic = time.time() for t in range(self.T): excluded = t in self.excludedobservations if self.verbose: print "time %i" % t if excluded: print "observations", self.observations[t,:], "set to be excluded" TandWresults = self.modelx.transitionAndWeight(self.xparticles[..., newaxis], \ self.observations[t,:], self.theta[:, newaxis], t + 1) self.xparticles[...] = TandWresults["states"][..., 0] if not(excluded): self.logxweights[...] = TandWresults["weights"][..., 0] self.logxweights[isnan(self.logxweights)] = -(10**150) self.logxweights[isinf(self.logxweights)] = -(10**150) self.constants[t] = numpymax(self.logxweights) self.logxweights[...] -= self.constants[t] else: self.logxweights = zeros(self.Nx) self.xweights[...] = exp(self.logxweights) logLike = log(mean(self.xweights)) self.totalLogLike += logLike self.xresample(t) if ((t+1) in self.savingtimes): self.xhistory[..., self.alreadystored] = self.xparticles self.alreadystored += 1 self.meanpath[t,:] = mean(self.xparticles, axis = 0) new_tic = time.time() self.computingtimes[t] = new_tic - last_tic last_tic = new_tic
def next_steps(self): last_tic = time.time() for t in range(self.T): excluded = t in self.excludedobservations if self.verbose: print "time %i" % t if excluded: print "observations", self.observations[ t, :], "set to be excluded" TandWresults = self.modelx.transitionAndWeight(self.xparticles[..., newaxis], \ self.observations[t,:], self.theta[:, newaxis], t + 1) self.xparticles[...] = TandWresults["states"][..., 0] if not (excluded): self.logxweights[...] = TandWresults["weights"][..., 0] self.logxweights[isnan(self.logxweights)] = -(10**150) self.logxweights[isinf(self.logxweights)] = -(10**150) self.constants[t] = numpymax(self.logxweights) self.logxweights[...] -= self.constants[t] else: self.logxweights = zeros(self.Nx) self.xweights[...] = exp(self.logxweights) if not (self.modelx.HScoreInference): logLike = log(mean(self.xweights)) + self.constants[t] else: logLike = TandWresults["simpleHScore"] self.totalLogLike += logLike self.xresample(t) if ((t + 1) in self.savingtimes): self.xhistory[..., self.alreadystored] = self.xparticles self.alreadystored += 1 self.meanpath[t, :] = mean(self.xparticles, axis=0) new_tic = time.time() self.computingtimes[t] = new_tic - last_tic last_tic = new_tic
def next_steps(self): for t in range(self.T): excluded = t in self.excludedobservations if self.verbose: print "time %i" % t if excluded: print "observations", self.observations[t,:], "set to be excluded" TandWresults = self.modelx.transitionAndWeight(self.xparticles, \ self.observations[t,:], self.thetaparticles, t + 1) self.xparticles[...] = TandWresults["states"] if not(excluded): self.logxweights[...] = TandWresults["weights"] self.logxweights[isnan(self.logxweights)] = -(10**150) self.logxweights[isinf(self.logxweights)] = -(10**150) self.constants[t, :] = numpymax(self.logxweights, axis = 0) self.logxweights[...] -= self.constants[t, :] else: self.logxweights = zeros((self.Nx, self.Ntheta)) self.xweights[...] = exp(self.logxweights) if self.saveproposals: self.allproposals[t, ...] = self.xparticles.copy() if self.savepath: for xdim in range(self.xparticles.shape[1]): self.paths[t, xdim, :] = average(self.xparticles[:, xdim, :], weights = self.xweights, axis = 0) logLike = log(mean(self.xweights, axis = 0)) logLike[isnan(logLike)] = -(10**150) logLike[isinf(logLike)] = -(10**150) self.totalLogLike += logLike if self.saveLL: self.allLL[t, :] = logLike + self.constants[t, :] if not(excluded): self.xresample() if self.saveproposals: self.allxparticles[t, ...] = self.xparticles.copy()
def next_steps(self): """ Perform all the iterations until time T == number of observations. """ for t in range(0, self.T): print "time %i" % t TandWresults = self.modelx.transitionAndWeight(self.xparticles[newaxis, ...], \ self.observations[t], self.thetaparticles, t + 1) self.xparticles[...] = TandWresults["states"][0, ...] self.logxweights[:] = TandWresults["weights"][0, :] self.logxweights[isnan(self.logxweights)] = -(10**150) self.logxweights[isinf(self.logxweights)] = -(10**150) self.constants[t] = numpymax(self.logxweights) self.logxweights[:] -= self.constants[t] self.xweights[:] = exp(self.logxweights) self.resample() if t in self.savingtimes or t == self.T - 1: reducedParticles, counts = self.reduceParticles(self.thetaparticles) self.allReducedParticles.append(reducedParticles) self.allCounts.append(counts)
def next_steps(self): """ Perform all the iterations until time T == number of observations. """ for t in range(0, self.T): print "time %i" % t TandWresults = self.modelx.transitionAndWeight(self.xparticles[newaxis, ...], \ self.observations[t], self.thetaparticles, t + 1) self.xparticles[...] = TandWresults["states"][0, ...] self.logxweights[:] = TandWresults["weights"][0, :] self.logxweights[isnan(self.logxweights)] = -(10**150) self.logxweights[isinf(self.logxweights)] = -(10**150) self.constants[t] = numpymax(self.logxweights) self.logxweights[:] -= self.constants[t] self.xweights[:] = exp(self.logxweights) self.resample() if t in self.savingtimes or t == self.T - 1: reducedParticles, counts = self.reduceParticles( self.thetaparticles) self.allReducedParticles.append(reducedParticles) self.allCounts.append(counts)
def next_steps(self): for t in range(self.T): excluded = t in self.excludedobservations if self.verbose: print "time %i" % t if excluded: print "observations", self.observations[ t, :], "set to be excluded" TandWresults = self.modelx.transitionAndWeight(self.xparticles, \ self.observations[t,:], self.thetaparticles, t + 1) self.xparticles[...] = TandWresults["states"] if not (excluded): self.logxweights[...] = TandWresults["weights"] self.logxweights[isnan(self.logxweights)] = -(10**150) self.logxweights[isinf(self.logxweights)] = -(10**150) self.constants[t, :] = numpymax(self.logxweights, axis=0) self.logxweights[...] -= self.constants[t, :] else: self.logxweights = zeros((self.Nx, self.Ntheta)) self.xweights[...] = exp(self.logxweights) if self.saveproposals: self.allproposals[t, ...] = self.xparticles.copy() if self.savepath: for xdim in range(self.xparticles.shape[1]): self.paths[t, xdim, :] = average(self.xparticles[:, xdim, :], weights=self.xweights, axis=0) logLike = log(mean(self.xweights, axis=0)) logLike[isnan(logLike)] = -(10**150) logLike[isinf(logLike)] = -(10**150) self.totalLogLike += logLike if self.saveLL: self.allLL[t, :] = logLike + self.constants[t, :] if not (excluded): self.xresample() if self.saveproposals: self.allxparticles[t, ...] = self.xparticles.copy()
def next_steps(self): """ Perform all the iterations until time T == number of observations. """ for t in range(0, self.T): #print "time %i" % t progressbar(t / (self.T - 1)) last_tic = time.time() TandWresults = self.modelx.transitionAndWeight(self.xparticles[newaxis, ...], \ self.observations[t], self.thetaparticles, t + 1) self.xparticles[...] = TandWresults["states"][0, ...] if self.AP["prediction"]: self.prediction(t) self.logxweights[:] = TandWresults["weights"][0, :] self.logxweights[isnan(self.logxweights)] = -(10**150) self.logxweights[isinf(self.logxweights)] = -(10**150) self.constants[t] = numpymax(self.logxweights) self.logxweights[:] -= self.constants[t] self.xweights[:] = exp(self.logxweights) self.evidences[t] = exp(self.constants[t]) * mean(self.xweights) covmean = self.computeCovarianceAndMean() m = (self.shrink) * self.transformedthetaparticles + \ (1 - self.shrink) * transpose(covmean["mean"][newaxis]) noise = transpose(random.multivariate_normal(repeat(0, self.modeltheta.parameterdimension), \ self.hsq * covmean["cov"], size = self.N)) self.transformedthetaparticles[...] = m + noise self.thetaparticles[...] = self.modeltheta.untransform(self.transformedthetaparticles) self.resample() self.resamplingindices.append(t) new_tic = time.time() self.computingtimes[t] = new_tic - last_tic last_tic = new_tic if t in self.savingtimes or t == self.T - 1: print "\nsaving particles at time %i" % t self.thetahistory[self.alreadystored, ...] = self.thetaparticles.copy() #self.weighthistory[self.alreadystored, ...] = self.xweights.copy() self.alreadystored += 1
print u[0:10],"..." weights = double(weights * Nx / sum(weights, axis = 0)) num_blocks_x = int(math.ceil((Ntheta + THREADS_PER_BLOCK_X - 1)/ THREADS_PER_BLOCK_X)) resampleGF(drv.In(u), \ drv.InOut(newstates), \ drv.In(states), \ drv.In(weights.astype(float32)), \ drv.In(array(Nx, dtype = int32)), \ drv.In(array(Ntheta, dtype = int32)), \ drv.In(array(xdim, dtype = int32)), \ block = (THREADS_PER_BLOCK_X, 1, 1), grid = (num_blocks_x, 1)) return newstates import cProfile cProfile.run(""" random.seed(923) resCUDA = resampleCUDA(states, uw, Nx, xdim, Ntheta) random.seed(923) res = resample2D(states, uw, Nx, xdim, Ntheta) """, "prof") import pstats p = pstats.Stats('prof') p.sort_stats('cumulative').print_stats(10) p.sort_stats('time').print_stats(10) print numpymax(resCUDA - res) print numpymean(resCUDA - res) print sum((resCUDA - res) > 0.1)
drv.In(array(statedim, dtype = int32)), \ block = (THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y, 1), grid = (num_blocks_x, num_blocks_y)) return {"states": states, "weights": noise} print "CUDA:" import cProfile cProfile.run( """ random.seed(820) CPUres = transitionAndWeight(states.copy(), y, parameters, 4) random.seed(820) cudares = transitionCUDA(states, y, parameters, 4) """, "prof") import pstats p = pstats.Stats('prof') p.sort_stats('cumulative').print_stats(10) p.sort_stats('time').print_stats(10) print "diff" print numpymax(CPUres["states"] - cudares["states"]) print numpymax(CPUres["weights"] - cudares["weights"]) print "CPU" print CPUres["states"] print "cuda" print cudares["states"] print "CPU" print CPUres["weights"] print "cuda" print cudares["weights"]
def next_steps(self): """ Perform all the iterations until time T == number of observations. """ for t in range(self.T): excluded = t in self.excludedobservations progressbar(t / (self.T - 1)) if excluded: print "\nobservations", self.observations[ t, :], "set to be excluded" last_tic = time.time() TandWresults = self.modelx.transitionAndWeight(self.xparticles, \ self.observations[t], self.thetaparticles, t + 1) self.xparticles[...] = TandWresults["states"] if not (excluded): self.logxweights[...] = TandWresults["weights"] # in case the measure function returns nans or infs, set the weigths very low self.logxweights[isnan(self.logxweights)] = -(10**150) self.logxweights[isinf(self.logxweights)] = -(10**150) self.constants[:] = numpymax(self.logxweights, axis=0) self.logxweights[...] -= self.constants[:] else: self.logxweights = zeros((self.Nx, self.Ntheta)) self.constants[:] = numpymax(self.logxweights, axis=0) self.xweights[...] = exp(self.logxweights) self.logLike[:] = log(mean(self.xweights, axis=0)) + self.constants[:] # prediction: at this point we have the transitioned x-particles and we didn't update # the weights of the theta-particles, and the x-particles are not weighted if self.AP["prediction"]: self.prediction(t) if t > 0: self.evidences[t] = self.getEvidence( self.thetalogweights[t - 1, :], self.logLike) self.totalLogLike[:] += self.logLike[:] self.thetalogweights[ t, :] = self.thetalogweights[t - 1, :] + self.logLike[:] else: self.evidences[t] = self.getEvidence( self.thetalogweights[t, :], self.logLike) self.totalLogLike[:] += self.logLike[:] self.thetalogweights[ t, :] = self.thetalogweights[t, :] + self.logLike[:] self.thetalogweights[t, :] -= max(self.thetalogweights[t, :]) self.xresample() self.ESS[t] = ESSfunction(exp(self.thetalogweights[t, :])) if self.AP["dynamicNx"]: progressbar(t / (self.T - 1), text=" ESS: %.3f, Nx: %i" % (self.ESS[t], self.Nx)) else: progressbar(t / (self.T - 1), text=" ESS: %.3f" % self.ESS[t]) while self.ESS[t] < (self.AP["ESSthreshold"] * self.Ntheta): progressbar(t / (self.T - 1), text =\ " ESS: %.3f - resample move step at iteration = %i" % (self.ESS[t], t)) covdict = self.computeCovarianceAndMean(t) if self.AP["proposalkernel"] == "randomwalk": self.proposalcovmatrix = self.AP["rwvariance"] * covdict[ "cov"] self.proposalmean = None elif self.AP["proposalkernel"] == "independent": self.proposalcovmatrix = covdict["cov"] self.proposalmean = covdict["mean"] self.thetaresample(t) self.resamplingindices.append(t) self.ESS[t] = ESSfunction(exp(self.thetalogweights[t, :])) for move in range(self.AP["nbmoves"]): self.PMCMCstep(t) acceptrate = self.acceptratios[-1] progressbar(t / (self.T - 1), text = \ " \nresample move step at iteration = %i - acceptance rate: %.3f\n" % (t, acceptrate)) if self.acceptratios[-1] < self.AP["dynamicNxThreshold"] \ and self.Nx <= (self.AP["NxLimit"] / 2) \ and self.AP["dynamicNx"]: self.increaseParticlesNb(t) self.ESS[t] = ESSfunction( exp(self.thetalogweights[t, :])) new_tic = time.time() self.computingtimes[t] = new_tic - last_tic last_tic = new_tic """ filtering and smoothing """ if self.AP["filtering"]: self.filtering(t) if self.smoothingEnable and t == self.T - 1: self.smoothing(t) if t in self.savingtimes or t == self.T - 1: print "\nsaving particles at time %i" % t self.thetahistory[self.alreadystored, ...] = self.thetaparticles.copy() self.weighthistory[self.alreadystored, ...] = exp(self.thetalogweights[t, :]) self.alreadystored += 1
noise = random.normal(size = (Nx, Ntheta), loc = 0, scale = 1).astype(float32) #print "CUDA noise" #print noise num_blocks_x = int(math.ceil(Nx / THREADS_PER_BLOCK_X)) num_blocks_y = int(math.ceil(Ntheta / THREADS_PER_BLOCK_Y)) comboGF(drv.In(y), drv.InOut(states), \ drv.In(parameters), \ drv.InOut(noise), \ drv.In(array(Nx, dtype = int32)), \ drv.In(array(Ntheta, dtype = int32)), \ drv.In(array(statedim, dtype = int32)), \ block = (THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y, 1), grid = (num_blocks_x, num_blocks_y)) return {"states": states, "weights": noise} print "CUDA:" import cProfile cProfile.run(""" random.seed(820) CPUres = transitionAndWeight(states.copy(), y, parameters, 0) random.seed(820) cudares = combo_CUDA(states, y, parameters, comboGF) """, "prof") import pstats p = pstats.Stats('prof') p.sort_stats('cumulative').print_stats(10) p.sort_stats('time').print_stats(10) print "diff" print numpymax(CPUres["states"] - cudares["states"]) print numpymax(CPUres["weights"] - cudares["weights"])
def next_steps(self): """ Perform all the iterations until time T == number of observations. """ for t in range(self.T): excluded = t in self.excludedobservations progressbar(t / (self.T - 1)) if excluded: print "\nobservations", self.observations[t,:], "set to be excluded" last_tic = time.time() TandWresults = self.modelx.transitionAndWeight(self.xparticles, \ self.observations[t], self.thetaparticles, t) # t + 1) #if states are not time series as in the case of Kangaroo, #then the 0th initial state scorrespond to no observation, so I dummied an initial time self.xparticles[...] = TandWresults["states"] if not(excluded): self.logxweights[...] = TandWresults["weights"] # in case the measure function returns nans or infs, set the weigths very low self.logxweights[isnan(self.logxweights)] = -(10**150) self.logxweights[isinf(self.logxweights)] = -(10**150) self.constants[:] = numpymax(self.logxweights, axis = 0) self.logxweights[...] -= self.constants[:] else: self.logxweights = zeros((self.Nx, self.Ntheta)) self.constants[:] = numpymax(self.logxweights, axis = 0) self.xweights[...] = exp(self.logxweights) if not(self.modelx.HScoreInference): #if use log score for inference self.logLike[:] = log(mean(self.xweights, axis = 0)) + self.constants[:] else: self.logLike[:] = - TandWresults["simpleHScore"] # prediction: at this point we have the transitioned x-particles and we didn't update # the weights of the theta-particles, and the x-particles are not weighted (should be "resampled") if self.AP["prediction"]: self.prediction(t) if t > 0: self.evidences[t] = self.getEvidence(self.thetalogweights[t-1, :], self.logLike) self.totalLogLike[:] += self.logLike[:] self.thetalogweights[t, :] = self.thetalogweights[t-1, :] + self.logLike[:] else: self.evidences[t] = self.getEvidence(self.thetalogweights[t, :], self.logLike) self.totalLogLike[:] += self.logLike[:] self.thetalogweights[t, :] = self.thetalogweights[t, :] + self.logLike[:] self.lScore[t] = -log(self.evidences[t]) #store h score #self.hScore[t] = self.getHScore(t) if self.modelx.continuousObs: #use latest theta particles hScoreResults = self.modelx.computeHscore(self.xparticles, self.observations[t], self.thetaparticles, \ exp(self.logxweights), exp(self.thetalogweights[t, :]), t) else: #use theta particles from the last step hScoreResults = self.modelx.computeHscore(self.xparticles, self.observations[t], self.thetaparticles_last, \ exp(self.logxweights_last), exp(self.thetalogweights_last), t) self.hScore[t] = hScoreResults["compositeHScore"] self.thetalogweights[t, :] -= max(self.thetalogweights[t, :]) self.xresample() #resample is to get ancester index, to prepare for the following state transition #the above resample step contains the line: self.xparticles[...] = self.xparticles[parentsindices, :] #therefore, for the filtering distribution at time t, we should use the xparticles before this step self.ESS[t] = ESSfunction(exp(self.thetalogweights[t, :])) if self.AP["dynamicNx"]: progressbar(t / (self.T - 1), text = " \n effective sample size: %.3f, Nx: %i" % (self.ESS[t], self.Nx)) else: progressbar(t / (self.T - 1), text = " \n effective sample size: %.3f" % self.ESS[t]) while self.ESS[t] < (self.AP["ESSthreshold"] * self.Ntheta): progressbar(t / (self.T - 1), text =\ " \n effective sample size: %.3f" % (self.ESS[t])) #- resample move step at iteration = %i" % (self.ESS[t], t)) covdict = self.computeCovarianceAndMean(t) if self.AP["proposalkernel"] == "randomwalk": self.proposalcovmatrix = self.AP["rwvariance"] * covdict["cov"] self.proposalmean = None elif self.AP["proposalkernel"] == "independent": self.proposalcovmatrix = covdict["cov"] self.proposalmean = covdict["mean"] self.thetaresample(t) self.resamplingindices.append(t) self.ESS[t] = ESSfunction(exp(self.thetalogweights[t, :])) for move in range(self.AP["nbmoves"]): self.PMCMCstep(t) acceptrate = self.acceptratios[-1] progressbar(t / (self.T - 1), text = \ " \nresample move step at iteration = %i - acceptance rate: %.3f\n" % (t, acceptrate)) if self.acceptratios[-1] < self.AP["dynamicNxThreshold"] \ and self.Nx <= (self.AP["NxLimit"] / 2) \ and self.AP["dynamicNx"]: self.increaseParticlesNb(t) self.ESS[t] = ESSfunction(exp(self.thetalogweights[t, :])) #update below the quantities for computing H score of discrete-obs. #put them here after potential PMCMC steps self.thetaparticles_last = self.thetaparticles self.logxweights_last = self.logxweights self.thetalogweights_last = self.thetalogweights[t,:] new_tic = time.time() self.computingtimes[t] = new_tic - last_tic last_tic = new_tic """ filtering and smoothing """ if self.AP["filtering"]: self.filtering(t) if self.smoothingEnable and t == self.T - 1: self.smoothing(t) if t in self.savingtimes or t == self.T - 1: print "\nsaving particles at time %i" % t self.thetahistory[self.alreadystored, ...] = self.thetaparticles.copy() self.weighthistory[self.alreadystored, ...] = exp(self.thetalogweights[t, :]) self.alreadystored += 1
def next_steps(self): """ Perform all the iterations until time T == number of observations. """ for t in range(self.T): excluded = t in self.excludedobservations progressbar(t / (self.T - 1)) if excluded: print "\nobservations", self.observations[t,:], "set to be excluded" last_tic = time.time() TandWresults = self.modelx.transitionAndWeight(self.xparticles, \ self.observations[t], self.thetaparticles, t + 1) self.xparticles[...] = TandWresults["states"] if not(excluded): self.logxweights[...] = TandWresults["weights"] # in case the measure function returns nans or infs, set the weigths very low self.logxweights[isnan(self.logxweights)] = -(10**150) self.logxweights[isinf(self.logxweights)] = -(10**150) self.constants[:] = numpymax(self.logxweights, axis = 0) self.logxweights[...] -= self.constants[:] else: self.logxweights = zeros((self.Nx, self.Ntheta)) self.constants[:] = numpymax(self.logxweights, axis = 0) self.xweights[...] = exp(self.logxweights) self.logLike[:] = log(mean(self.xweights, axis = 0)) + self.constants[:] # prediction: at this point we have the transitioned x-particles and we didn't update # the weights of the theta-particles, and the x-particles are not weighted if self.AP["prediction"]: self.prediction(t) if t > 0: self.evidences[t] = self.getEvidence(self.thetalogweights[t-1, :], self.logLike) self.totalLogLike[:] += self.logLike[:] self.thetalogweights[t, :] = self.thetalogweights[t-1, :] + self.logLike[:] else: self.evidences[t] = self.getEvidence(self.thetalogweights[t, :], self.logLike) self.totalLogLike[:] += self.logLike[:] self.thetalogweights[t, :] = self.thetalogweights[t, :] + self.logLike[:] self.thetalogweights[t, :] -= max(self.thetalogweights[t, :]) self.xresample() self.ESS[t] = ESSfunction(exp(self.thetalogweights[t, :])) if self.AP["dynamicNx"]: progressbar(t / (self.T - 1), text = " ESS: %.3f, Nx: %i" % (self.ESS[t], self.Nx)) else: progressbar(t / (self.T - 1), text = " ESS: %.3f" % self.ESS[t]) while self.ESS[t] < (self.AP["ESSthreshold"] * self.Ntheta): progressbar(t / (self.T - 1), text =\ " ESS: %.3f - resample move step at iteration = %i" % (self.ESS[t], t)) covdict = self.computeCovarianceAndMean(t) if self.AP["proposalkernel"] == "randomwalk": self.proposalcovmatrix = self.AP["rwvariance"] * covdict["cov"] self.proposalmean = None elif self.AP["proposalkernel"] == "independent": self.proposalcovmatrix = covdict["cov"] self.proposalmean = covdict["mean"] self.thetaresample(t) self.resamplingindices.append(t) self.ESS[t] = ESSfunction(exp(self.thetalogweights[t, :])) for move in range(self.AP["nbmoves"]): self.PMCMCstep(t) acceptrate = self.acceptratios[-1] progressbar(t / (self.T - 1), text = \ " \nresample move step at iteration = %i - acceptance rate: %.3f\n" % (t, acceptrate)) if self.acceptratios[-1] < self.AP["dynamicNxThreshold"] \ and self.Nx <= (self.AP["NxLimit"] / 2) \ and self.AP["dynamicNx"]: self.increaseParticlesNb(t) self.ESS[t] = ESSfunction(exp(self.thetalogweights[t, :])) new_tic = time.time() self.computingtimes[t] = new_tic - last_tic last_tic = new_tic """ filtering and smoothing """ if self.AP["filtering"]: self.filtering(t) if self.smoothingEnable and t == self.T - 1: self.smoothing(t) if t in self.savingtimes or t == self.T - 1: print "\nsaving particles at time %i" % t self.thetahistory[self.alreadystored, ...] = self.thetaparticles.copy() self.weighthistory[self.alreadystored, ...] = exp(self.thetalogweights[t, :]) self.alreadystored += 1
print u[0:10], "..." weights = double(weights * Nx / sum(weights, axis=0)) num_blocks_x = int( math.ceil((Ntheta + THREADS_PER_BLOCK_X - 1) / THREADS_PER_BLOCK_X)) resampleGF(drv.In(u), \ drv.InOut(newstates), \ drv.In(states), \ drv.In(weights.astype(float32)), \ drv.In(array(Nx, dtype = int32)), \ drv.In(array(Ntheta, dtype = int32)), \ drv.In(array(xdim, dtype = int32)), \ block = (THREADS_PER_BLOCK_X, 1, 1), grid = (num_blocks_x, 1)) return newstates import cProfile cProfile.run( """ random.seed(923) resCUDA = resampleCUDA(states, uw, Nx, xdim, Ntheta) random.seed(923) res = resample2D(states, uw, Nx, xdim, Ntheta) """, "prof") import pstats p = pstats.Stats('prof') p.sort_stats('cumulative').print_stats(10) p.sort_stats('time').print_stats(10) print numpymax(resCUDA - res) print numpymean(resCUDA - res) print sum((resCUDA - res) > 0.1)