def train(tagger: BayesClassifier, iter: int, measGen: MeasurementGenerator, classGen: ClassAssign, Z: int, V: int = 10, delta: float = .05) -> List[float]: # Performs 'iter' iterations of vFold testing (default 'V' is ten) with 'tagger' classifier # for 'Z' samples generated by 'measGen' and 'classGen.' After each vFold validation, appends # an expected value (attached to tagger) and then optimizes tagger by 'delta' paramenter (default .05). # Outputs a new optimized tagger and list of gain values from each iteration. expectedGain = [] for _ in range(iter): # Generates measurements samples = measGen.genMeas(Z) values = classGen.assign(samples) # Shuffles values samplesSh, valuesSh = shuffle(samples, values) # Performs Test matrix = vFold(samplesSh, valuesSh, V, tagger) # Appends value to list expectedGain.append(calcExpGain(matrix, tagger.eGain)) # Gives class probability over whole data set. tagger.priorUpdate(calcClassProb(valuesSh, tagger.range)) # Updates tagger tagger.optimize(delta, samplesSh, valuesSh) return expectedGain
def main(): parser = argparse.ArgumentParser() parser.add_argument("samples", help="Number of measurement samples to generate.", type=int) parser.add_argument("dimen", help="Measurement space.", type=str) parser.add_argument("classes", help="Number of classes.", type=int) parser.add_argument("seed", help="Random seed for experiement duplication.", type=int) parser.add_argument( "--vfolds", "-v", default=10, help= "Number of v-folds to partition testing data for v-folds testing. Default is 10.", type=int) parser.add_argument( "--optimization", "-o", default=0.0, help= "Specify if iterative improvement of class conditional probability values should be taken.", type=float) parser.add_argument("--iteration", "-t", default=10, help="Number of iterations for conditional update.", type=int) parser.add_argument( "--identity", "-i", action="store_true", default=False, help="Specify if economic gain matrix should be identity.") args = parser.parse_args() # Checks that our given limits are even feasible memory wise # Prompts for reader friendliness print("Generating testing data for seed {}".format(args.seed)) # Sets seed seed(args.seed) # Assigns values dimen = eval(args.dimen) # Calculates size of domain M = 1 for N in dimen: M *= N K = args.classes V = args.vfolds Z = args.samples print("Dimensions of Measurement Space: {}".format(dimen)) print("Number of Samples: {}".format(Z)) print("Classes: {}".format(K)) # Checks that this is even possible to calculate. if config.computeLimit(M, K): print("Possible measurements exceed memory capabilities.") sys.exit() print("Generating {0}x{0} Gain Matrix. Identity Matrix: {1}".format( K, args.identity)) gain = genGain(K, identity=args.identity) print("{}x{} Economic Gain Matrix Generated".format( len(gain), len(gain[0]))) # Generates measures print("Generating {} Measure-Value pairs.".format(Z)) print("Generating measures.") generator = MeasurementGenerator(dimen) measures = generator.genMeas(Z) assigner = ClassAssign(dimen, K) tags = assigner.assign(measures) print("{} measures and {} values generated.".format( len(measures), len(tags))) ## Generates classifier. print( "Generating class conditional probabilities for {} classes and {} possible measures." .format(K, M)) conditionals = genCCP(K, dimen) print( "Class conditional probabilities generated for {} classes and {} possible measures" .format(len(conditionals), len(conditionals[0]))) classifier = BayesClassifier( None, conditionals, eGain=gain) # No priors given since vFold always assigns. print("Testing classifier. V-fold factor: {}".format(V)) measures, tags = shuffle(measures, tags) results = vFold(measures, tags, V, classifier) matrix = genConMatrix(tags, results, K) norm = normConMatrix(matrix) expGain = calcExpGain(norm, classifier.eGain) #expGain = test(classifier, measures, tags, V=V) print("The expected gain for the given data is: {}".format(expGain)) #### Here we will work on updating if args.optimization: print( "Fitting data for improved performance. Improvement factor {} used over {} iterations." .format(args.optimization, args.iteration)) gains = [] # Going to set priors generated from this measurement set as permanent priors. priors = calcClassProb(tags, K) classifier.priorUpdate(priors) for i in range(args.iteration): # print(priors) classifier.optimize(args.optimization, measures, tags) classifier, measures, tags = fitData(classifier, generator, Z, args.optimization) measures, tags = shuffle(measures, tags) results = vFold(measures, tags, V, classifier) matrix = genConMatrix(tags, results, K) norm = normConMatrix(matrix) expGain = calcExpGain(norm, classifier.eGain) #expGain = test(classifier, measures, tags, V=V) gains.append(expGain) print("Expected Gain from iteration {} is {}".format( i + 1, expGain)) print("The expected gain for fitted data after {} iterations is: {}". format(args.iteration, gains[-1])) # Writes all data to files print("Writing to file.") reader.writeData(measures, tags, dimen) reader.writePriors(classifier.prior) reader.writeGain(gain) reader.writeCCP(classifier.cond) print("Done.")