def logMiddleInfo_callback(gp_engine): Train_X = gol.get_val("Train_X") Train_Y = gol.get_val("Train_Y") validation_X = gol.get_val("validation_X") validation_Y = gol.get_val("validation_Y") Test_X = gol.get_val("Test_X") Test_Y = gol.get_val("Test_Y") sel_features = gol.get_val("sel_features") import sys from utils import delog sys.stdout.write("logMiddleInfo...") genid = gp_engine.getCurrentGeneration() best = gp_engine.bestIndividual() FinalMatrix, features_used_list = TMConvertor.getMatrixDirectly_and_feature(best) # result with local improvemtent cc = CC(features_used_list, sel_features, FinalMatrix) finalScore, finalAccuracy, infos_evaluations = cc.FinalTrainAndTest(Train_X, Train_Y, validation_X, validation_Y, Test_X, Test_Y) delog.logMiddle(genid, finalAccuracy, "AAAAA") delog.logMiddle(genid, finalScore, "AAAAAfscore") # result without local improvemtent cc = CC(features_used_list, sel_features, FinalMatrix) cc.TrainAndTest_withoutlocalimp(Train_X, Train_Y, validation_X, validation_Y) _finalScore, _finalAccuracy = cc.FinalTrainAndTest_withoutlocalimp(Train_X, Train_Y, validation_X, validation_Y, Test_X, Test_Y) delog.logMiddle(genid, _finalAccuracy, "BestAcc_no_impro") delog.logMiddle(genid, _finalScore, "BestFscore_no_impro") sys.stdout.write("over\n") sys.stdout.flush()
def eval_func_information_gain(chromosome): """ # Calculate the information gain # The data is all training set """ Train_Y = gol.get_val("Train_Y") classes = gol.get_val("classes") EcocMatrix, features_used_list = TMConvertor.getMatrixDirectly_and_feature( chromosome) infor_gain = information_gain(Train_Y, classes, EcocMatrix) return np.mean(infor_gain)
def eval_func_entropy(chromosome): """ # Calculate the complexity named "means" # The data is all training set """ Train_Y = gol.get_val("Train_Y") classes = gol.get_val("classes") EcocMatrix, features_used_list = TMConvertor.getMatrixDirectly_and_feature( chromosome) entropy = information_entropy(Train_Y, classes, EcocMatrix) return np.mean(entropy)
def eval_func_eucdist(chromosome): """ # calculate avg_euclidean_dist of a individual """ EcocMatrix, features_used_list = TMConvertor.getMatrixDirectly_and_feature( chromosome) classes = gol.get_val("classes") num_class = len(classes) num_cols = EcocMatrix.shape[1] _dist = euclidean_distances(EcocMatrix, EcocMatrix) / np.sqrt(num_cols) dist = np.sum(_dist) / 2 / (num_class * (num_class - 1)) return dist
def logResultEveryGen_callback(gp_engine): if gp_engine.getCurrentGeneration() ==0: print "="*65 format_str = 'Gen' + ' '*12 + '%%-8s %%-8s %%-8%s %%-10%s %%-10%s %%-10%s' print( (format_str % ('s', 's', 's', 's')) % ('Max', 'Min', 'Avg', 'Best-Fscore', 'Best-Hamdist', 'Best-Accuracy')) np.set_printoptions(threshold='nan') # do in every generation best = gp_engine.getPopulation().bestRaw() bestMatrix , feature_list = TMConvertor.getMatrixDirectly_and_feature(best) feature_method_index = gol.get_val("feature_method_index") feature_index_list = list(feature_method_index[method] for method in feature_list) bestMatrix = np.ndarray.tolist(bestMatrix) bestMatrix.insert(0,feature_index_list) print np.array(bestMatrix)
def eval_func_hamdist(chromosome): """ # calculate hamdist of a individual """ EcocMatrix, features_used_list = TMConvertor.getMatrixDirectly_and_feature( chromosome) classes = gol.get_val("classes") dist = 0 for i in xrange(len(EcocMatrix)): for j in xrange(i + 1, len(EcocMatrix)): dist += distance.hamming(EcocMatrix[i], EcocMatrix[j]) num = len(classes) * (len(classes) - 1) / 2 dist /= num return dist
def eval_func_fscore(chromosome): """ # calculate fscore """ EcocMatrix, features_used_list = TMConvertor.getMatrixDirectly_and_feature( chromosome) Train_X = gol.get_val("Train_X") Train_Y = gol.get_val("Train_Y") validation_X = gol.get_val("validation_X") validation_Y = gol.get_val("validation_Y") sel_features = gol.get_val("sel_features") cc = CC(features_used_list, sel_features, EcocMatrix) fscore, accuracy, infos_evaluations = cc.TrainAndTest( Train_X, Train_Y, validation_X, validation_Y) chromosome.infos_evaluation = infos_evaluations return fscore, accuracy
def main_run(): ########################################## # variables preparation ########################################## Initializator.init_gol() gol.set_val("aimFolder", Configs.aimFolder) gol.set_val("dataName", Configs.dataName) Initializator.init_all() classes = gol.get_val("classes") maxDeap = gol.get_val("maxDeap") growMethod = gol.get_val("growMethod") generations = gol.get_val("generations") crossoverRate = gol.get_val("crossoverRate") mutationRate = gol.get_val("mutationRate") populationSize = gol.get_val("populationSize") freq_Stats = gol.get_val("freq_stats") Train_X = gol.get_val("Train_X") Train_Y = gol.get_val("Train_Y") validation_X = gol.get_val("validation_X") validation_Y = gol.get_val("validation_Y") Test_X = gol.get_val("Test_X") Test_Y = gol.get_val("Test_Y") sel_features = gol.get_val("sel_features") ########################################## genome = GTree.GTreeGP() genome.setParams(max_depth=maxDeap, method=growMethod) genome.evaluator += EM.eval_func_fscore ga = GSimpleGA.GSimpleGA(genome) ga.setParams(gp_terminals=classes, gp_function_prefix="Operation") ga.setMinimax(Consts.minimaxType["maximize"]) ga.setGenerations(generations) ga.setCrossoverRate(crossoverRate) ga.setMutationRate(mutationRate) ga.setPopulationSize(populationSize) ga.setElitismReplacement(1) #ga.stepCallback.set(CB.printIndividuals_callback) ga.stepCallback += CB.checkAncients_callback ga.stepCallback += CB.logResultEveryGen_callback ga.stepCallback += CB.delogPopulation_callback ga.stepCallback += CB.logMiddleInfo_callback ga.stepCallback += CB.debug_callback print "------------------------------------------------------" ga(freq_stats=freq_Stats) best = ga.bestIndividual() #change the display_flag to display test labels and predict labels FinalMatrix, features_used_list = TMConvertor.getMatrixDirectly_and_feature( best) cc = ConnectClassifier(features_used_list, sel_features, FinalMatrix) finalScore, finalAccuracy, infos_evaluations = cc.FinalTrainAndTest( Train_X, Train_Y, validation_X, validation_Y, Test_X, Test_Y) # euddist num_class = len(classes) num_cols = FinalMatrix.shape[1] _dist = euclidean_distances(FinalMatrix, FinalMatrix) / np.sqrt(num_cols) dist = np.sum(_dist) / 2 / (num_class * (num_class - 1)) infos_evaluations.insert(len(infos_evaluations), "---------test------------") infos_evaluations.insert(len(infos_evaluations), "fscore: %f" % finalScore) infos_evaluations.insert(len(infos_evaluations), "accuracy: %f" % finalAccuracy) infos_evaluations.insert(len(infos_evaluations), "dist: %f" % dist) for text in infos_evaluations: print text
def checkAncients_callback(gp_engine): if gp_engine.getCurrentGeneration() != 0: return from utils import delog delog.decache("check first Gen...") begin = 0 end = gol.get_val("populationSize") classes = gol.get_val("classes") population = gp_engine.getPopulation() for i in xrange(begin, end): genome = population[i] max_depth = genome.getParam("max_depth", None) #illegal? ecocMatrix, feature_list = TMConvertor.getMatrixDirectly_and_feature(genome) Illegal = False if LCheckers.tooLittleColumn(ecocMatrix): Illegal = True elif LCheckers.tooMuchColumn(ecocMatrix): Illegal = True # 2. if any class not included in the terminal nodes. else: labels = set(classes) for i in genome.nodes_list: if i.isLeaf(): labels = labels - set(i.getData()) labels = list(labels) if len(labels) > 0: Illegal = True if max_depth is None: Util.raiseException("You must specify the max_depth genome parameter !", ValueError) if max_depth < 0: Util.raiseException("The max_depth must be >= 1, if you want to use GTreeGPMutatorSubtree crossover !", ValueError) while Illegal==True: new_genome = copy.deepcopy(genome) node = new_genome.getRandomNode() assert node is not None depth = new_genome.getNodeDepth(node) node_parent = node.getParent() root_subtree = GTreeNode.buildGTreeGPGrow(gp_engine, 0, max_depth - depth) if node_parent is None: new_genome.setRoot(root_subtree) else: root_subtree.setParent(node_parent) node_parent.replaceChild(node, root_subtree) new_genome.processNodes() # illegal ? # Actually, case #1 and case #2 may not happen Illegal = False ecocMatrix, feature_list = TMConvertor.getMatrixDirectly_and_feature(new_genome) # 1.The number of column is too little if LCheckers.tooLittleColumn(ecocMatrix): Illegal = True elif LCheckers.tooMuchColumn(ecocMatrix): Illegal = True # 2. if any class not included in the terminal nodes. else: labels = set(classes) for i in new_genome.nodes_list: if i.isLeaf(): labels = labels - set(i.getData()) labels = list(labels) if len(labels) > 0: Illegal = True # apply the mutations if Illegal == False: genome.setRoot(new_genome.getRoot()) genome.processNodes() #Update the scores of population delog.deprint_string( "over.") population.evaluate() population.sort()