def biasMeasGenerator(generator: MeasurementGenerator, priors: Tuple[float], conds: List[List[float]]): # Updates measures with bias towards those indicated by class conditional probabilites. # No output. Destructively alters generator. newProbs = calcMeasProb(priors, conds) generator.updateProbs(newProbs) return
def main(): from random import seed from probability import genProbs #### Unit test for training funciton # Initiatializes seed for recurrent testing. for _ in range(10): dimen = (4, 3, 6, 5, 6) classValues = 5 measures = MeasurementGenerator(dimen) classes = ClassAssign(dimen, classValues) conds = [list(genProbs(measures.range)) for _ in range(classValues)] egain = [[2, 0, 0, 0, 1], [3, 4, 0, 2, 2], [2, 2, 5, 1, 1], [2, 2, 3, 4, 1], [0, 1, -3, 2, 3]] classifier = BayesClassifier( None, conds, eGain=egain ) # Worries that supplying similar priors is affecting our results. Even though vFold updates. y = train(classifier, 20, measures, classes, 6000, delta=.0005) z = [y[i] - y[i - 1] for i in range(1, len(y))] # Trying to figure out average negative error to see if this is floating point. print(y) print() print(z) q = [i for i in z if i < 0] q = sum(q) / max(len(q), 1) print(q) print() x = measures.genMeas(20) p = classes.assign(x) l = classifier.assign(x)
def train(tagger: BayesClassifier, iter: int, measGen: MeasurementGenerator, classGen: ClassAssign, Z: int, V: int = 10, delta: float = .05) -> List[float]: # Performs 'iter' iterations of vFold testing (default 'V' is ten) with 'tagger' classifier # for 'Z' samples generated by 'measGen' and 'classGen.' After each vFold validation, appends # an expected value (attached to tagger) and then optimizes tagger by 'delta' paramenter (default .05). # Outputs a new optimized tagger and list of gain values from each iteration. expectedGain = [] for _ in range(iter): # Generates measurements samples = measGen.genMeas(Z) values = classGen.assign(samples) # Shuffles values samplesSh, valuesSh = shuffle(samples, values) # Performs Test matrix = vFold(samplesSh, valuesSh, V, tagger) # Appends value to list expectedGain.append(calcExpGain(matrix, tagger.eGain)) # Gives class probability over whole data set. tagger.priorUpdate(calcClassProb(valuesSh, tagger.range)) # Updates tagger tagger.optimize(delta, samplesSh, valuesSh) return expectedGain
def main(): from bayes import calcClassProb priors = (.6, .4) conds = [[.12 / .6, .18 / .6, .3 / .6], [.2 / .4, .16 / .4, .04 / .4]] gain = ((1, 0), (0, 2)) ### Tests for biasCCP classifier = BayesClassifier(priors, conds, eGain=gain) # Classification should be 1,1,0. The update should alter, 0|1, 1|1, 2|0. newCCP = biasCCP(classifier, .05) # print(newCCP) # Uncomment to see if ccp comforms to predictiosn. Should bias 2|0 and raise 0 and 1 |1. ### Tests for biasMeasGenerator generator = MeasurementGenerator((2, 2)) # print(generator.cmlProbs) prev = generator.cmlProbs[1] - generator.cmlProbs[0] # Let's give some conditionals that biases one measure conds = ((.1, .7, .1, .1), (.25, .25, .25, .25)) # And feed biased priors priors = (.7, .3) biasMeasGenerator(generator, priors, conds) # print(generator.cmlProbs) now = generator.cmlProbs[1] - generator.cmlProbs[0] # Should show bias towards second value now. assert now > prev generator = MeasurementGenerator((2, 2)) # print(generator.cmlProbs) prev1 = generator.cmlProbs[1] - generator.cmlProbs[0] prev2 = generator.cmlProbs[-1] - generator.cmlProbs[-2] # Let's give some conditionals that biases one measure conds = ((.1, .5, .1, .3), (.25, .25, .25, .25)) # And feed biased priors priors = (.7, .3) biasMeasGenerator(generator, priors, conds) # print(generator.cmlProbs) now1 = generator.cmlProbs[1] - generator.cmlProbs[0] now2 = generator.cmlProbs[-1] - generator.cmlProbs[-2] # Should show bias towards second value now. assert now1 > prev1, (prev1, now1) assert now2 > prev2, (prev2, now2)
def main(): priors = (.6, .4) conds = [[.12 / .6, .18 / .6, .3 / .6], [.2 / .4, .16 / .4, .04 / .4]] gain = ((1, 0), (0, 2)) ### Tests for biasMeasGenerator generator = MeasurementGenerator((2, 2)) # print(generator.cmlProbs) ## For peeking. prev = generator.cmlProbs[1] - generator.cmlProbs[0] # Let's give some conditionals that biases one measure conds = ((.1, .7, .1, .1), (.25, .25, .25, .25)) # And feed biased priors priors = (.7, .3) biasMeasGenerator(generator, priors, conds) # print(generator.cmlProbs) now = generator.cmlProbs[1] - generator.cmlProbs[0] # Should show bias towards second value now. assert now > prev generator = MeasurementGenerator((2, 2)) # print(generator.cmlProbs) # Records previous probability for second value. prev1 = generator.cmlProbs[1] - generator.cmlProbs[0] # Records previous probability for final value. prev2 = generator.cmlProbs[-1] - generator.cmlProbs[-2] # Let's give some conditionals that biases one measure conds = ((0.0, .5, 0.0, .5), (.05, .4, .05, .5)) # And feed biased priors priors = (.7, .3) biasMeasGenerator(generator, priors, conds) # print(generator.cmlProbs) now1 = generator.cmlProbs[1] - generator.cmlProbs[0] now2 = generator.cmlProbs[-1] - generator.cmlProbs[-2] # Should show bias towards second value now. May vary due to the original conditional probability being random. assert now1 > prev1, (prev1, now1) assert now2 > prev2, (prev2, now2)
def fitData(classifier: BayesClassifier, generator: MeasurementGenerator, Z: int) -> Tuple[BayesClassifier, Tuple[int], Tuple[int]]: # Uses classifier values to generate new measure and tag generators that are biased towards the classifier. # Then generates Z new measure, tag pairs. # Returns measure-tag pairs for another round of testing conds = classifier.cond priors = classifier.prior posts = calcPosterior(priors, conds) postsNorm = normPosterior(posts) biasMeasGenerator(generator, priors, conds) measures = generator.genMeas(Z) tags = genBiasTags(measures, postsNorm) return measures, tags
def fitData(classifier: BayesClassifier, generator: MeasurementGenerator, Z: int, delta: float) -> Tuple[BayesClassifier, Tuple[int], Tuple[int]]: # Takes delta and uses to update conditional probability # Uses new value to update distribution of measurement space # Then generates Z new measures # Then generates new tags in line with conditional probabbility distribution of class per tag # Returns a classifier and measure-tag pairs for another round of testing conds = classifier.cond priors = classifier.prior posts = calcPosterior(priors, conds) postsNorm = normPosterior(posts) biasMeasGenerator(generator, priors, conds) measures = generator.genMeas(Z) tags = genBiasTags(measures, postsNorm) return classifier, measures, tags
def main(): parser = argparse.ArgumentParser() parser.add_argument("samples", help="Number of measurement samples to generate.", type=int) parser.add_argument("dimen", help="Measurement space.", type=str) parser.add_argument("classes", help="Number of classes.", type=int) parser.add_argument("seed", help="Random seed for experiement duplication.", type=int) parser.add_argument( "--vfolds", "-v", default=10, help= "Number of v-folds to partition testing data for v-folds testing. Default is 10.", type=int) parser.add_argument( "--optimization", "-o", default=0.0, help= "Specify if iterative improvement of class conditional probability values should be taken.", type=float) parser.add_argument("--iteration", "-t", default=10, help="Number of iterations for conditional update.", type=int) parser.add_argument( "--identity", "-i", action="store_true", default=False, help="Specify if economic gain matrix should be identity.") args = parser.parse_args() # Checks that our given limits are even feasible memory wise # Prompts for reader friendliness print("Generating testing data for seed {}".format(args.seed)) # Sets seed seed(args.seed) # Assigns values dimen = eval(args.dimen) # Calculates size of domain M = 1 for N in dimen: M *= N K = args.classes V = args.vfolds Z = args.samples print("Dimensions of Measurement Space: {}".format(dimen)) print("Number of Samples: {}".format(Z)) print("Classes: {}".format(K)) # Checks that this is even possible to calculate. if config.computeLimit(M, K): print("Possible measurements exceed memory capabilities.") sys.exit() print("Generating {0}x{0} Gain Matrix. Identity Matrix: {1}".format( K, args.identity)) gain = genGain(K, identity=args.identity) print("{}x{} Economic Gain Matrix Generated".format( len(gain), len(gain[0]))) # Generates measures print("Generating {} Measure-Value pairs.".format(Z)) print("Generating measures.") generator = MeasurementGenerator(dimen) measures = generator.genMeas(Z) assigner = ClassAssign(dimen, K) tags = assigner.assign(measures) print("{} measures and {} values generated.".format( len(measures), len(tags))) ## Generates classifier. print( "Generating class conditional probabilities for {} classes and {} possible measures." .format(K, M)) conditionals = genCCP(K, dimen) print( "Class conditional probabilities generated for {} classes and {} possible measures" .format(len(conditionals), len(conditionals[0]))) classifier = BayesClassifier( None, conditionals, eGain=gain) # No priors given since vFold always assigns. print("Testing classifier. V-fold factor: {}".format(V)) measures, tags = shuffle(measures, tags) results = vFold(measures, tags, V, classifier) matrix = genConMatrix(tags, results, K) norm = normConMatrix(matrix) expGain = calcExpGain(norm, classifier.eGain) #expGain = test(classifier, measures, tags, V=V) print("The expected gain for the given data is: {}".format(expGain)) #### Here we will work on updating if args.optimization: print( "Fitting data for improved performance. Improvement factor {} used over {} iterations." .format(args.optimization, args.iteration)) gains = [] # Going to set priors generated from this measurement set as permanent priors. priors = calcClassProb(tags, K) classifier.priorUpdate(priors) for i in range(args.iteration): # print(priors) classifier.optimize(args.optimization, measures, tags) classifier, measures, tags = fitData(classifier, generator, Z, args.optimization) measures, tags = shuffle(measures, tags) results = vFold(measures, tags, V, classifier) matrix = genConMatrix(tags, results, K) norm = normConMatrix(matrix) expGain = calcExpGain(norm, classifier.eGain) #expGain = test(classifier, measures, tags, V=V) gains.append(expGain) print("Expected Gain from iteration {} is {}".format( i + 1, expGain)) print("The expected gain for fitted data after {} iterations is: {}". format(args.iteration, gains[-1])) # Writes all data to files print("Writing to file.") reader.writeData(measures, tags, dimen) reader.writePriors(classifier.prior) reader.writeGain(gain) reader.writeCCP(classifier.cond) print("Done.")