Exemple #1
0
def logMiddleInfo_callback(gp_engine):
    Train_X = gol.get_val("Train_X")
    Train_Y = gol.get_val("Train_Y")
    validation_X = gol.get_val("validation_X")
    validation_Y = gol.get_val("validation_Y")
    Test_X = gol.get_val("Test_X")
    Test_Y = gol.get_val("Test_Y")
    sel_features = gol.get_val("sel_features")

    import sys
    from utils import delog
    sys.stdout.write("logMiddleInfo...")
    genid = gp_engine.getCurrentGeneration()

    best = gp_engine.bestIndividual()
    FinalMatrix, features_used_list = TMConvertor.getMatrixDirectly_and_feature(best)

    # result with local improvemtent
    cc = CC(features_used_list, sel_features, FinalMatrix)
    finalScore, finalAccuracy, infos_evaluations = cc.FinalTrainAndTest(Train_X, Train_Y, validation_X, validation_Y, Test_X, Test_Y)
    delog.logMiddle(genid, finalAccuracy, "AAAAA")
    delog.logMiddle(genid, finalScore, "AAAAAfscore")

    #  result without local improvemtent
    cc = CC(features_used_list, sel_features, FinalMatrix)
    cc.TrainAndTest_withoutlocalimp(Train_X, Train_Y, validation_X, validation_Y)
    _finalScore, _finalAccuracy = cc.FinalTrainAndTest_withoutlocalimp(Train_X, Train_Y, validation_X, validation_Y, Test_X, Test_Y)
    delog.logMiddle(genid, _finalAccuracy, "BestAcc_no_impro")
    delog.logMiddle(genid, _finalScore, "BestFscore_no_impro")

    sys.stdout.write("over\n")
    sys.stdout.flush()
Exemple #2
0
def eval_func_information_gain(chromosome):
    """
    # Calculate the information gain
    # The data is all training set
    """
    Train_Y = gol.get_val("Train_Y")
    classes = gol.get_val("classes")
    EcocMatrix, features_used_list = TMConvertor.getMatrixDirectly_and_feature(
        chromosome)
    infor_gain = information_gain(Train_Y, classes, EcocMatrix)
    return np.mean(infor_gain)
Exemple #3
0
def eval_func_entropy(chromosome):
    """
    # Calculate the complexity named "means"
    # The data is all training set
    """
    Train_Y = gol.get_val("Train_Y")
    classes = gol.get_val("classes")
    EcocMatrix, features_used_list = TMConvertor.getMatrixDirectly_and_feature(
        chromosome)
    entropy = information_entropy(Train_Y, classes, EcocMatrix)
    return np.mean(entropy)
Exemple #4
0
def eval_func_eucdist(chromosome):
    """
    # calculate avg_euclidean_dist of a individual
    """
    EcocMatrix, features_used_list = TMConvertor.getMatrixDirectly_and_feature(
        chromosome)
    classes = gol.get_val("classes")
    num_class = len(classes)
    num_cols = EcocMatrix.shape[1]
    _dist = euclidean_distances(EcocMatrix, EcocMatrix) / np.sqrt(num_cols)
    dist = np.sum(_dist) / 2 / (num_class * (num_class - 1))
    return dist
Exemple #5
0
def logResultEveryGen_callback(gp_engine):
    if gp_engine.getCurrentGeneration() ==0:
        print "="*65
        format_str = 'Gen' + ' '*12 + '%%-8s  %%-8s  %%-8%s %%-10%s   %%-10%s   %%-10%s'
        print( (format_str % ('s', 's', 's', 's')) % ('Max', 'Min', 'Avg', 'Best-Fscore', 'Best-Hamdist', 'Best-Accuracy'))
    np.set_printoptions(threshold='nan') 
    # do in every generation
    best = gp_engine.getPopulation().bestRaw()
    bestMatrix , feature_list = TMConvertor.getMatrixDirectly_and_feature(best)
    feature_method_index = gol.get_val("feature_method_index")
    feature_index_list = list(feature_method_index[method] for method in feature_list)
    bestMatrix = np.ndarray.tolist(bestMatrix)
    bestMatrix.insert(0,feature_index_list)
    print np.array(bestMatrix)
Exemple #6
0
def eval_func_hamdist(chromosome):
    """
    # calculate hamdist of a individual
    """
    EcocMatrix, features_used_list = TMConvertor.getMatrixDirectly_and_feature(
        chromosome)
    classes = gol.get_val("classes")
    dist = 0
    for i in xrange(len(EcocMatrix)):
        for j in xrange(i + 1, len(EcocMatrix)):
            dist += distance.hamming(EcocMatrix[i], EcocMatrix[j])
    num = len(classes) * (len(classes) - 1) / 2
    dist /= num
    return dist
Exemple #7
0
def eval_func_fscore(chromosome):
    """
    # calculate fscore
    """
    EcocMatrix, features_used_list = TMConvertor.getMatrixDirectly_and_feature(
        chromosome)
    Train_X = gol.get_val("Train_X")
    Train_Y = gol.get_val("Train_Y")
    validation_X = gol.get_val("validation_X")
    validation_Y = gol.get_val("validation_Y")
    sel_features = gol.get_val("sel_features")

    cc = CC(features_used_list, sel_features, EcocMatrix)
    fscore, accuracy, infos_evaluations = cc.TrainAndTest(
        Train_X, Train_Y, validation_X, validation_Y)
    chromosome.infos_evaluation = infos_evaluations
    return fscore, accuracy
Exemple #8
0
def main_run():
    ##########################################
    # variables preparation
    ##########################################
    Initializator.init_gol()
    gol.set_val("aimFolder", Configs.aimFolder)
    gol.set_val("dataName", Configs.dataName)
    Initializator.init_all()
    classes = gol.get_val("classes")
    maxDeap = gol.get_val("maxDeap")
    growMethod = gol.get_val("growMethod")
    generations = gol.get_val("generations")
    crossoverRate = gol.get_val("crossoverRate")
    mutationRate = gol.get_val("mutationRate")
    populationSize = gol.get_val("populationSize")
    freq_Stats = gol.get_val("freq_stats")
    Train_X = gol.get_val("Train_X")
    Train_Y = gol.get_val("Train_Y")
    validation_X = gol.get_val("validation_X")
    validation_Y = gol.get_val("validation_Y")
    Test_X = gol.get_val("Test_X")
    Test_Y = gol.get_val("Test_Y")
    sel_features = gol.get_val("sel_features")
    ##########################################

    genome = GTree.GTreeGP()
    genome.setParams(max_depth=maxDeap, method=growMethod)
    genome.evaluator += EM.eval_func_fscore

    ga = GSimpleGA.GSimpleGA(genome)
    ga.setParams(gp_terminals=classes, gp_function_prefix="Operation")
    ga.setMinimax(Consts.minimaxType["maximize"])
    ga.setGenerations(generations)
    ga.setCrossoverRate(crossoverRate)
    ga.setMutationRate(mutationRate)
    ga.setPopulationSize(populationSize)
    ga.setElitismReplacement(1)
    #ga.stepCallback.set(CB.printIndividuals_callback)
    ga.stepCallback += CB.checkAncients_callback
    ga.stepCallback += CB.logResultEveryGen_callback
    ga.stepCallback += CB.delogPopulation_callback
    ga.stepCallback += CB.logMiddleInfo_callback
    ga.stepCallback += CB.debug_callback

    print "------------------------------------------------------"

    ga(freq_stats=freq_Stats)
    best = ga.bestIndividual()

    #change the display_flag to display test labels and predict labels
    FinalMatrix, features_used_list = TMConvertor.getMatrixDirectly_and_feature(
        best)
    cc = ConnectClassifier(features_used_list, sel_features, FinalMatrix)
    finalScore, finalAccuracy, infos_evaluations = cc.FinalTrainAndTest(
        Train_X, Train_Y, validation_X, validation_Y, Test_X, Test_Y)

    # euddist
    num_class = len(classes)
    num_cols = FinalMatrix.shape[1]
    _dist = euclidean_distances(FinalMatrix, FinalMatrix) / np.sqrt(num_cols)
    dist = np.sum(_dist) / 2 / (num_class * (num_class - 1))

    infos_evaluations.insert(len(infos_evaluations),
                             "---------test------------")
    infos_evaluations.insert(len(infos_evaluations), "fscore: %f" % finalScore)
    infos_evaluations.insert(len(infos_evaluations),
                             "accuracy: %f" % finalAccuracy)
    infos_evaluations.insert(len(infos_evaluations), "dist: %f" % dist)

    for text in infos_evaluations:
        print text
Exemple #9
0
def checkAncients_callback(gp_engine):
    if gp_engine.getCurrentGeneration() != 0: return
    from utils import delog
    delog.decache("check first Gen...")

    begin = 0
    end = gol.get_val("populationSize")
    classes = gol.get_val("classes")
    population = gp_engine.getPopulation()
    for i in xrange(begin, end):
        genome = population[i]
        max_depth = genome.getParam("max_depth", None)

        #illegal?
        ecocMatrix, feature_list = TMConvertor.getMatrixDirectly_and_feature(genome)
        Illegal = False
        if LCheckers.tooLittleColumn(ecocMatrix):
            Illegal = True
        elif LCheckers.tooMuchColumn(ecocMatrix):
            Illegal = True
        # 2. if any class not included in the terminal nodes.
        else:
            labels = set(classes)
            for i in genome.nodes_list:
                if i.isLeaf():
                    labels = labels - set(i.getData())
            labels = list(labels)
            if len(labels) > 0:
                Illegal = True

        if max_depth is None:
            Util.raiseException("You must specify the max_depth genome parameter !", ValueError)
        if max_depth < 0:
            Util.raiseException("The max_depth must be >= 1, if you want to use GTreeGPMutatorSubtree crossover !", ValueError)

        while Illegal==True:
            new_genome = copy.deepcopy(genome)
            node = new_genome.getRandomNode()
            assert node is not None
            depth = new_genome.getNodeDepth(node)
            node_parent = node.getParent()
            root_subtree = GTreeNode.buildGTreeGPGrow(gp_engine, 0, max_depth - depth)
            if node_parent is None:
                new_genome.setRoot(root_subtree)
            else:
                root_subtree.setParent(node_parent)
                node_parent.replaceChild(node, root_subtree)
            new_genome.processNodes()

            # illegal ? 
            # Actually, case #1 and case #2 may not happen
            Illegal = False
            ecocMatrix, feature_list = TMConvertor.getMatrixDirectly_and_feature(new_genome)

            # 1.The number of column is too little
            if LCheckers.tooLittleColumn(ecocMatrix):
                Illegal = True
            elif LCheckers.tooMuchColumn(ecocMatrix):
                Illegal = True
            # 2. if any class not included in the terminal nodes.
            else:
                labels = set(classes)
                for i in new_genome.nodes_list:
                    if i.isLeaf():
                        labels = labels - set(i.getData())
                labels = list(labels)
                if len(labels) > 0:
                    Illegal = True

            # apply the mutations
            if Illegal == False:
                genome.setRoot(new_genome.getRoot())
                genome.processNodes()

    #Update the scores of population
    delog.deprint_string( "over.")
    population.evaluate()
    population.sort()