Beispiel #1
0
def main(argv):
    global TrainSet
    global TestSet

    letters = "i:t"
    keywords = ["input=", "test="]
    trainfile = ""
    testfile = ""

    # run the algorithm by: python MainGA --input=train.txt --test=test.txt
    try:
        opts, arg = getopt.getopt(sys.argv[1:], letters, keywords)
    except getopt.GetoptError:
        print "GetoptError: -i <trainfile>"
        sys.exit(2)
    for opt, arg in opts:
        if opt in ("-i", "--input"):
            trainfile = arg
        if opt in ("-t", "--test"):
            testfile = arg

    if trainfile:
        trainF = open(trainfile, "r")
        TrainSet = DataSet(False, trainF, iB, sB)
        trainF.close()
        TrainSet.set_dataset_filename(str(trainfile))
        TrainSet.set_generations(GENERATIONS)
        TrainSet.set_pop_size(POPULATION)
        TrainSet.set_mutation_rate(MUTATION_RATE)
        genome = G1DList.G1DList(SIZE_OF_CHROMOSOMES)
        # * 2 to ascending and descending sorts, otherwise you'll have just ascending order
        genome.setParams(rangemin=1, rangemax=(TrainSet.FeaturesNum) * 2 * sB)
        genome.evaluator.set(eval_func)  # change here if you want a different fitness function
        ga = GSimpleGA.GSimpleGA(genome)
        ga.setGenerations(GENERATIONS)  # changes the # of generations(default 100)
        ga.setPopulationSize(POPULATION)  # changes the # of individuals(default 80)
        # ga.setMutationRate(MUTATION_RATE)   # --> use it when you want to change the Mutation Rate (default 0.02)
        # ga.setCrossoverRate(CROSS_OVER_RATE) # --> use it when you want to change the Crossover Rate (default 0.8)
        # ga.setMultiProcessing(True) # --> please read this: http://pyevolve.sourceforge.net/wordpress/?p=843

        ga.evolve(freq_stats=10)

        chromosome = ga.bestIndividual().getInternalList()  # the chromosome selected by GA
        # print chromosome ---> you can print the chromosome to see what was selected
        TrainSet.set_best_individual(chromosome)
        TrainSet.sort_dataset_by_chromosome(chromosome)
        TrainSet.write_scores(isTrain=True)

    else:
        sys.exit("GA_Algorithm: A train file is required.")

    if testfile:
        testF = open(testfile, "r")
        TestSet = DataSet(True, testF, iB, sB)
        testF.close()
        TestSet.set_dataset_filename(str(testfile))
        TestSet.set_generations(GENERATIONS)
        TestSet.set_pop_size(POPULATION)
        TestSet.set_mutation_rate(MUTATION_RATE)
        TestSet.isTest = True
        TestSet.sort_dataset_by_chromosome(
            TrainSet.get_best_individual()
        )  # order the testset with chromosome found by GA with trainset
        TestSet.write_scores()