print(str(symplified_best)) # output the top 3 champs champs = 3 for i in range(champs): ind = hof[i] symplified_model = gep.simplify(ind) print('\nSymplified best individual {}: '.format(i)) print(symplified_model) print("raw indivudal:") print(hof[i]) # we want to use symbol labels instead of words in the tree graph rename_labels = {'add': '+', 'sub': '-', 'mul': '*', 'protected_div': '/'} gep.export_expression_tree(best_ind, rename_labels, 'numerical_expression_tree.png') # As we can see from the above simplified expression, the *truth model* has been successfully found. # Due to the existence of Gaussian noise, the minimum mean absolute error (MAE) is still not zero even the best individual represents the true model. # ## Visualization # If you are interested in the expression tree corresponding to the individual, i.e., the genotype/phenotype system, *geppy* supports tree visualization by the `graph` and the `export_expression_tree` functions: # # - `graph` only outputs the nodes and links information to describe the tree topology, with which you can render the tree with tools you like; # - `export_expression_tree` implements tree visualization with data generated by `graph` internally using the `graphviz` package. # # **Note**: even if the linear scaling is applied, here only the raw individual in GP (i.e., the one without linear scaling) is visualized. # show the above image here for convenience from IPython.display import Image Image(filename='numerical_expression_tree.png')
print(str(symplified_best)) # output the top 3 champs champs = 3 for i in range(champs): ind = hof[i] symplified_model = gep.simplify(ind) print('\nSymplified best individual {}: '.format(i)) print(symplified_model) print("raw indivudal:") print(hof[i]) # we want to use symbol labels instead of words in the tree graph rename_labels = {'add': '+', 'sub': '-', 'mul': '*', 'protected_div': '/'} gep.export_expression_tree(best_ind, rename_labels, 'GEP_MT_ET.png') # As we can see from the above simplified expression, the *truth model* has been successfully found. Due to the existence of Gaussian noise, the minimum mean absolute error (MAE) is still not zero even the best individual represents the true model. # ## Visualization # If you are interested in the expression tree corresponding to the individual, i.e., the genotype/phenotype system, *geppy* supports tree visualization by the `graph` and the `export_expression_tree` functions: # # - `graph` only outputs the nodes and links information to describe the tree topology, with which you can render the tree with tools you like; # - `export_expression_tree` implements tree visualization with data generated by `graph` internally using the `graphviz` package. # # **Note**: even if the linear scaling is applied, here only the raw individual in GP (i.e., the one without linear scaling) is visualized. # show the above image here for convenience #from IPython.display import Image #Image(filename='expression_tree.png')
file.close() # In[68]: # we want to use symbol labels instead of words in the tree graph rename_labels = { 'add': '+', 'sub': '-', 'mul': '*', 'protected_div': '/', 'sin': 'sin', 'cos': 'cos', 'tan': 'tan' } gep.export_expression_tree(best_ind, rename_labels, 'results/heat_tree.pdf') # ### Let's eyeball predicted vs actual data: # In[76]: #from matplotlib import pyplot #pyplot.rcParams['figure.figsize'] = [20, 5] #plotlen=200 #pyplot.plot(predPE.head(plotlen)) # predictions are in blue #pyplot.plot(holdout.ut.head(plotlen)) # actual values are in orange #pyplot.show() # #best_ind = hof[0] #for gene in best_ind: # print(gene.kexpression)
# In this problem, the final solution *best* seems more complicated than the true function $f$. Thus, it would be quite helpful if we can get a simplified version of the model we found by removing all the redundancies for better comparison and verification.*geppy* has provided a convenient function `simplify()` to perform symbolic simplification of the individual (solution) by leveraging the `sympy` package. # In[14]: symplified_best = gep.simplify(best) print('Symplified best individual: ') print(symplified_best) # Clearly, after simplification the best individual evolved by GEP is just the ideal function $f$. More importantly, GEP can perform implicit variable (feature) selection effectively: though we provide it with four inputs, GEP only picks the three useful inputs `a, c, d`. # ## Visualization # If you are interested in the expression tree corresponding to the individual, i.e., the genotype/phenotype system, *geppy* supports tree visualization by the `graph` and the `export_expression_tree` functions: # # - `graph` only outputs the nodes and links information to describe the tree topology, with which you can render the tree with tools you like; # - `export_expression_tree` implements tree visualization with data generated by `graph` internally using the `graphviz` package. # In[15]: rename_labels = { 'and_': '&', 'or_': '|', 'not_': '~' } # we want use symbol labels instead of words in the tree graph gep.export_expression_tree(best, rename_labels, 'data/bool_tree.png') # In[17]: # show the above image here for convenience from IPython.display import Image Image(filename='data/bool_tree.png')
str(datetime.datetime.now())) print(hof[0]) #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #Symbolic simplification of the final solution # print the best symbolic regression we found: best_ind = hof[0] symplified_best = gep.simplify(best_ind) from sympy import init_printing init_printing() #use str(symplified_best) to get the string of the symplified model symplified_best #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #Visualization #we want to use symbol labels instead of words in the tree graph rename_labels = {'add': '+', 'sub': '-', 'mul': '*', 'protected_div': '/'} gep.export_expression_tree(best_ind, rename_labels, 'F:/GEP-LR/synthetic-data-modeling-tree.png') #show the above image here for convenience from IPython.display import Image Image(filename='F:/GEP-LR/synthetic-data-modeling-tree.png') #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #Plot maximum fitness values import matplotlib.pyplot as plt max_Fitness_values = log.select("max") fig = plt.figure(figsize=(15, 5)) plt.plot(max_Fitness_values, '-bo') # predictions are in red plt.show() fig.savefig('F:/GEP-LR/synthetic-data-modeling-maxFitness.eps', dpi=300, format='eps') #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #Calculate the final estimated probability
print(hof[0]) #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #Symbolic simplification of the final solution #print the best symbolic regression we found: best_ind = hof[0] symplified_best = gep.simplify(best_ind) from sympy import init_printing init_printing() #use str(symplified_best) to get the string of the symplified model symplified_best #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #Visualization #use symbol labels instead of words in the tree graph rename_labels = {'add': '+', 'sub': '-', 'mul': '*', 'protected_div': '/'} gep.export_expression_tree(best_ind, rename_labels, 'F:/GEP-LR/MPM-tree.png') #show the above image here for convenience from IPython.display import Image Image(filename='F:/GEP-LR/MPM-tree.png') #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #Plot maximum fitness values import matplotlib.pyplot as plt max_Fitness_values = log.select("max") fig = plt.figure(figsize=(15, 5)) plt.plot(max_Fitness_values, '-bo') # predictions are in red plt.show() fig.savefig('F:/GEP-LR/MPM-maxFitness.eps', dpi=300, format='eps') #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #Calculate the final estimated probability