def constructIntegrandFunctionObject(marginalPair, eta, k, l, N): """ Input: marginalPair, e.g. [0.5,0.3] eta, k,l, N Output: integrand function which object maps t to Estimated emission probability of p(baseMarginals)(t) from p^\eta and which has some additional methods/properties, facilitating binary search, associated with it. """ #functionFromTwoMarginalsAndParameterToIntegrand(x,y,t) = Estimated emission probability of p(x,y,t) from p^\eta functionFromTwoMarginalsAndParameterToIntegrand = epc.emissionProbabilityCalculator( eta, k, l, N).RobbinsEstimateOfEmissionProbability t = 0.001 #used for logger logging.info("For marginals %s, Robbins function takes value %s at t=%s" % (marginalPair, functionFromTwoMarginalsAndParameterToIntegrand( marginalPair[0], marginalPair[1], t), t)) functionFromParameterToIntegrandObject = fa.functionAlgorithms( functionFromTwoMarginalsAndParameterToIntegrand) functionFromParameterToIntegrandObject.setFixedArgumentList(marginalPair) logging.info( "Fixed argument list set to %s" % (str(functionFromParameterToIntegrandObject.fixedArgumentList))) #functionFromParameterToIntegrand(t) = Estimated emission probability of p(baseMarginals)(t) from p^\eta functionFromParameterToIntegrand = functionFromParameterToIntegrandObject.functionOfOneVariable logging.info("As func. of one variable, takes value %s at t=%s" % (functionFromParameterToIntegrand(t), t)) return fa.functionAlgorithms(functionFromParameterToIntegrand)
def setUp(self): k,l=2,2 N=100 gamma=1e-3 eta=1e-3 probabilityCalculatorAssociatedToEta = epc.emissionProbabilityCalculator(eta, k, l, N) probabilityCalculatorAssociatedToEta.setGamma(gamma) self.functionProbabilityOfEmissionByP_eta_Robbins = probabilityCalculatorAssociatedToEta.RobbinsEstimateOfEmissionProbabilityTimesCharFunctionOfTauMinusGamma
def testSearchArgWhereIncreasingFunctionTakesProportionOfMaxVal(self): theProportion = stats.norm.pdf(1)/stats.norm.pdf(0) N_list = list(10*np.array(range(1,10))) N_list.extend((100*np.array(range(1,10)))) k,l=2,2 eta=0.01 gamma = 0.001 logger.debug("Set eta=%s, gamma=%s"%(eta,gamma)) #firstMarginalsDist = stats.uniform(loc = .4,scale = .2) #secondMarginalsDist = stats.uniform(loc = .4,scale = .2) for N in N_list: for iteration in range(1): #firstMarginal, secondMarginal = [firstMarginalsDist.rvs(), secondMarginalsDist.rvs()] firstMarginal, secondMarginal = 1.0/l, 1.0/k #0.5, 0.5 for binary-binary logger.debug("Randomly chosen marginals: (%s,%s)"%(firstMarginal, secondMarginal)) functionFromTwoMarginalsAndParameterToIntegrand = epc.emissionProbabilityCalculator(eta, k, l, N).RobbinsEstimateOfEmissionProbability logger.debug("For marginals (%s,%s), Robbins function takes value %s at t=%s"%(firstMarginal, secondMarginal, functionFromTwoMarginalsAndParameterToIntegrand(firstMarginal, secondMarginal, 0.001), 0.001)) functionFromParameterToIntegrandObject = fa.functionAlgorithms(functionFromTwoMarginalsAndParameterToIntegrand) functionFromParameterToIntegrandObject.setFixedArgumentList([firstMarginal, secondMarginal]) logger.debug("Fixed argument list set to %s"%(str(functionFromParameterToIntegrandObject.fixedArgumentList))) functionFromParameterToIntegrand = functionFromParameterToIntegrandObject.functionOfOneVariable logger.debug("As func. of one variable, takes value %s at t=%s"%(functionFromParameterToIntegrand(0.001), 0.001)) probDistPath = pdpf.probabilityDistributionPathFactory([firstMarginal, secondMarginal], k, l).construct() t_gamma_plus = probDistPath.largestPos_t_atWhichKLDivergenceFromBaseIsLessThanEta(gamma) t_gamma_minus = probDistPath.smallestNeg_t_atWhichKLDivergenceFromBaseIsLessThanEta(gamma) logger.debug("Robbins function takes value %s at t_gamma_plus=%s"%(functionFromParameterToIntegrandObject.theFunction(firstMarginal, secondMarginal, t_gamma_plus), t_gamma_plus)) logger.debug("Robbins function takes value %s at t_gamma_minus=%s"%(functionFromParameterToIntegrandObject.theFunction(firstMarginal, secondMarginal, t_gamma_minus), t_gamma_minus)) integrandFunctionObject = fa.functionAlgorithms(functionFromParameterToIntegrand) logger.info("Searching for where the function is proportion %s of the max between %s and %s"%(theProportion, t_gamma_minus, t_gamma_plus)) computedScale = integrandFunctionObject.searchArgWhereIncreasingFunctionTakesProportionOfMaxVal(theProportion, t_gamma_minus, t_gamma_plus) logger.info("For marginals (%s,%s), N=%s, computed scale is %s"%(firstMarginal, secondMarginal, N, computedScale))
def constructIntegrandFunctionObject(marginalPair, eta, k,l,N): """ Input: marginalPair, e.g. [0.5,0.3] eta, k,l, N Output: integrand function which object maps t to Estimated emission probability of p(baseMarginals)(t) from p^\eta and which has some additional methods/properties, facilitating binary search, associated with it. """ #functionFromTwoMarginalsAndParameterToIntegrand(x,y,t) = Estimated emission probability of p(x,y,t) from p^\eta functionFromTwoMarginalsAndParameterToIntegrand = epc.emissionProbabilityCalculator(eta, k, l, N).RobbinsEstimateOfEmissionProbability t=0.001 #used for logger logging.info("For marginals %s, Robbins function takes value %s at t=%s"%( marginalPair, functionFromTwoMarginalsAndParameterToIntegrand(marginalPair[0], marginalPair[1], t), t)) functionFromParameterToIntegrandObject = fa.functionAlgorithms(functionFromTwoMarginalsAndParameterToIntegrand) functionFromParameterToIntegrandObject.setFixedArgumentList(marginalPair) logging.info("Fixed argument list set to %s"%(str(functionFromParameterToIntegrandObject.fixedArgumentList))) #functionFromParameterToIntegrand(t) = Estimated emission probability of p(baseMarginals)(t) from p^\eta functionFromParameterToIntegrand = functionFromParameterToIntegrandObject.functionOfOneVariable logging.info("As func. of one variable, takes value %s at t=%s"%(functionFromParameterToIntegrand(t), t)) return fa.functionAlgorithms(functionFromParameterToIntegrand)
def functionProbabilityOfEmissionByP_eta_Robbins(eta,N,gamma,k,l): probabilityCalculatorAssociatedToEta = epc.emissionProbabilityCalculator(eta, k, l, N) probabilityCalculatorAssociatedToEta.setGamma(gamma) return probabilityCalculatorAssociatedToEta.RobbinsEstimateOfEmissionProbabilityTimesCharFunctionOfTauMinusGamma
def functionProbabilityOfEmissionByP_eta_Robbins(eta, N, gamma, k, l): probabilityCalculatorAssociatedToEta = epc.emissionProbabilityCalculator( eta, k, l, N) probabilityCalculatorAssociatedToEta.setGamma(gamma) return probabilityCalculatorAssociatedToEta.RobbinsEstimateOfEmissionProbabilityTimesCharFunctionOfTauMinusGamma
def _testIntegratetoFlatFile(self): #load in benchmark data benchmarks = genfromtxt('cdf_for_valsets_of_variables_of_size2and2', delimiter=',', skip_header=4) benchmarks_without_stepsize = benchmarks[:,1:] lineInBenchMarkFileList = [0, 9] #prepare the csv file f=open('/home/eliot/Documents/sontag/benchmarkcomparisons/N@100_N@[email protected]', 'wt') writer = csv.writer(f) for lineInBenchMarkFile in lineInBenchMarkFileList: benchmark_N = (lineInBenchMarkFile+1)*benchMarkInterval_N benchmarksForThisN = benchmarks_without_stepsize[lineInBenchMarkFile] stepSizeInBenchmarks = benchmarks[0,0] numStepsInBenchmarks = len(benchmarks[0]) logging.info("Read in %s lines of benchmarks with stepsize=%s and %s steps"%(len(benchmarks_without_stepsize), stepSizeInBenchmarks, numStepsInBenchmarks)) #constants common to all of the runs k, l = 2,2 eta = 0.01 N = benchmark_N maxIterations = 10000 dictionaryFromNameOfMethodToResults = {} #build the inputs to IntegrateWithStoppingCriterion #the integrand, namely probability-of-emission calculator from p-eta, which is common to ALL methods probabilityCalculatorAssociatedToEta = epc.emissionProbabilityCalculator(eta, k, l, N) for gammaNumber in range(numStepsInBenchmarks-1): gamma = (gammaNumber+1)*stepSizeInBenchmarks benchMarkBeta = benchmarksForThisN[gammaNumber] probabilityCalculatorAssociatedToEta.setGamma(gamma) functionProbabilityOfEmissionByP_eta_Robbins = probabilityCalculatorAssociatedToEta.RobbinsEstimateOfEmissionProbabilityTimesCharFunctionOfTauMinusGamma #the probability distribution used to choose marginals distribution, also common to ALL methods (except for completely-uniform, if implemented) Gaussianscale = it.ChernoffRadius(CentralProbability, N) normalDistLocUniformScaleDepOnChernoff = stats.norm(loc=0.5,scale=Gaussianscale) #e.g. 5, .95 means "seeking 95 percent certainty answer is within 5 percent of truth , typically also common to all runs accuracy_percent = 10 probability_accuracy_achieved = 0.95 #the method for picking the parameter-distribution based on the marginals, specific to this run # statsDistFactory = sdf.statsDistributionFactory(gamma) dictionaryFromNameOfMethodToMethod = {} dictionaryFromNameOfMethodToMethod['GaussianScaleAdaptiveTo_l_gamma']=statsDistFactory.GaussianCenteredAttGammaPlusfromMarginals #dictionaryFromNameOfMethodToMethod['GaussianScaleAdaptiveTo_N']=statsDistFactory.GaussianCenteredAttGammaPlusfromMarginalsScaledFromScaleRatio #functionFromMarginalsTo_t_Distribution = statsDistFactory.GaussianCenteredAttGammaPlusfromMarginals uniformMarginals = [1.0/k, 1.0/l] for nameOfMethod in dictionaryFromNameOfMethodToMethod.keys(): if dictionaryFromNameOfMethodToResults.has_key( nameOfMethod): resultsDictionary = dictionaryFromNameOfMethodToResults[nameOfMethod] else: resultsDictionary = {} statsDistFactory.set_eta(eta) statsDistFactory.set_N(N) statsDistFactory.CalculateAndSetScaleRatio(uniformMarginals) #functionFromMarginalsTo_t_Distribution = statsDistFactory.GaussianCenteredAttGammaPlusfromMarginalsScaledFromScaleRatio functionFromMarginalsTo_t_Distribution = dictionaryFromNameOfMethodToMethod[nameOfMethod] #start the calculation proper with Timer.Timer() as t: iteration_values, rho_hat_values = iwsc.IntegrateWithStoppingCriterion(k,l, functionProbabilityOfEmissionByP_eta_Robbins, normalDistLocUniformScaleDepOnChernoff, functionFromMarginalsTo_t_Distribution, #!!!MAIN THING TO CHANGE!!! maxIterations, 100, 100, #frequencyOfRecodingreslt, frequencyOfApplicationOfStoppingCriterion accuracy_percent, probability_accuracy_achieved) logging.info("Time elapsed is %.03f sec." % t.interval) logging.info("Iteration_values for the method %s for eta=%s, gamma=%s, N=%s, accpercent=%s, prob_achieved=%s"%(nameOfMethod, eta,gamma,N,accuracy_percent, probability_accuracy_achieved)) logging.info(iteration_values) logging.info("rho_hat_values for the method %s for eta=%s, gamma=%s, N=%s, accpercent=%s, prob_achieved=%s"%(nameOfMethod, eta,gamma,N,accuracy_percent, probability_accuracy_achieved)) logging.info( rho_hat_values) betaResult = rho_hat_values[-1]*N**3 if resultsDictionary.has_key('time'): resultsDictionary['time'].append(t.interval) else: resultsDictionary['time'] = [t.interval] if resultsDictionary.has_key('CalculatedResult'): resultsDictionary['CalculatedResult'].append(betaResult) else: resultsDictionary['CalculatedResult'] = [betaResult] if resultsDictionary.has_key('BenchmarkBeta'): resultsDictionary['BenchmarkBeta'].append(benchMarkBeta) else: resultsDictionary['BenchmarkBeta'] = [benchMarkBeta] dictionaryFromNameOfMethodToResults[nameOfMethod] = resultsDictionary fileNameForPickleDump = "/home/eliot/Documents/sontag/benchmarkcomparisons/testRun" + str(N) pickle.dump( dictionaryFromNameOfMethodToResults, open( fileNameForPickleDump, "wb" ) ) #write to the CSV file N_header = 'N=%s'%(N) writer.writerow( [N_header]) for resultsDictionary in dictionaryFromNameOfMethodToResults.values(): for key in resultsDictionary.keys(): print key print resultsDictionary[key] rowList = resultsDictionary[key] rowList.insert(0,key) print rowList writer.writerow(rowList) f.close()
def testSearchArgWhereIncreasingFunctionTakesProportionOfMaxVal(self): theProportion = stats.norm.pdf(1) / stats.norm.pdf(0) N_list = list(10 * np.array(range(1, 10))) N_list.extend((100 * np.array(range(1, 10)))) k, l = 2, 2 eta = 0.01 gamma = 0.001 logger.debug("Set eta=%s, gamma=%s" % (eta, gamma)) # firstMarginalsDist = stats.uniform(loc = .4,scale = .2) # secondMarginalsDist = stats.uniform(loc = .4,scale = .2) for N in N_list: for iteration in range(1): # firstMarginal, secondMarginal = [firstMarginalsDist.rvs(), secondMarginalsDist.rvs()] firstMarginal, secondMarginal = 1.0 / l, 1.0 / k # 0.5, 0.5 for binary-binary logger.debug("Randomly chosen marginals: (%s,%s)" % (firstMarginal, secondMarginal)) functionFromTwoMarginalsAndParameterToIntegrand = epc.emissionProbabilityCalculator( eta, k, l, N ).RobbinsEstimateOfEmissionProbability logger.debug( "For marginals (%s,%s), Robbins function takes value %s at t=%s" % ( firstMarginal, secondMarginal, functionFromTwoMarginalsAndParameterToIntegrand(firstMarginal, secondMarginal, 0.001), 0.001, ) ) functionFromParameterToIntegrandObject = fa.functionAlgorithms( functionFromTwoMarginalsAndParameterToIntegrand ) functionFromParameterToIntegrandObject.setFixedArgumentList([firstMarginal, secondMarginal]) logger.debug( "Fixed argument list set to %s" % (str(functionFromParameterToIntegrandObject.fixedArgumentList)) ) functionFromParameterToIntegrand = functionFromParameterToIntegrandObject.functionOfOneVariable logger.debug( "As func. of one variable, takes value %s at t=%s" % (functionFromParameterToIntegrand(0.001), 0.001) ) probDistPath = pdpf.probabilityDistributionPathFactory( [firstMarginal, secondMarginal], k, l ).construct() t_gamma_plus = probDistPath.largestPos_t_atWhichKLDivergenceFromBaseIsLessThanEta(gamma) t_gamma_minus = probDistPath.smallestNeg_t_atWhichKLDivergenceFromBaseIsLessThanEta(gamma) logger.debug( "Robbins function takes value %s at t_gamma_plus=%s" % ( functionFromParameterToIntegrandObject.theFunction(firstMarginal, secondMarginal, t_gamma_plus), t_gamma_plus, ) ) logger.debug( "Robbins function takes value %s at t_gamma_minus=%s" % ( functionFromParameterToIntegrandObject.theFunction( firstMarginal, secondMarginal, t_gamma_minus ), t_gamma_minus, ) ) integrandFunctionObject = fa.functionAlgorithms(functionFromParameterToIntegrand) logger.info( "Searching for where the function is proportion %s of the max between %s and %s" % (theProportion, t_gamma_minus, t_gamma_plus) ) computedScale = integrandFunctionObject.searchArgWhereIncreasingFunctionTakesProportionOfMaxVal( theProportion, t_gamma_minus, t_gamma_plus ) logger.info( "For marginals (%s,%s), N=%s, computed scale is %s" % (firstMarginal, secondMarginal, N, computedScale) )