def main(): import input logging.basicConfig(level=logging.INFO, stream=sys.stdout) np.set_printoptions(precision=3, edgeitems=3, threshold=20) random.seed(5108) # used by the GA randSample = random.Random(input.SAMPLE_SEED) # used for data set sampling inp = input.Input("train3-std.tsv", randSample) print "Train set:", inp.trainSet.show() print "Test set:", inp.testSet.show() n = inp.trainSet.size * 20/100 a = ANN() a.prepare(inp.trainSet, POPSIZE) tester = SampleTester() tester.prepare(inp.testSet, randSample) tester.showSampleSets() params = [] generatePop(params) mutateValue = 6.0 for genIndex in range(5000): print "Generation", genIndex, "starting." logFP("Population", params) outputValues = a.evaluate(params, returnOutputs=True) logFP("Outputs", outputValues) thresholds = a.nlargest(n) logFP("Thresholds", thresholds) lifts = a.lift(n) logFP("Lifts", lifts) taggedParams = sorted(zip(lifts, params, range(len(params))), key=lambda (l, p, i): l, reverse=True) sortedParams = [p for l, p, i in taggedParams] logFP("Sorted pop", sortedParams) testLift, _ = tester.test(sortedParams[0]) genplot.addGeneration(lifts, testLift, genIndex) params = generateGeneration(sortedParams, mutateValue) if genIndex%500 == 499: mutateValue -= 0.5 args = sys.argv[1:] if len(args) == 1: open(args[0], "w").write(repr(sortedParams[0])) genplot.plot()