def run_AI_all(pathdir,filename,BF_try_time=60,BF_ops_file_type="14ops", polyfit_deg=4, NN_epochs=4000, PA=PA): try: os.mkdir("results/") except: pass # load the data for different checks data = np.loadtxt(pathdir+filename) # Run bf and polyfit PA = run_bf_polyfit(pathdir,pathdir,filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg) # Run bf and polyfit on modified output PA = get_acos(pathdir,"results/mystery_world_acos/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg) PA = get_asin(pathdir,"results/mystery_world_asin/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg) PA = get_atan(pathdir,"results/mystery_world_atan/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg) PA = get_cos(pathdir,"results/mystery_world_cos/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg) PA = get_exp(pathdir,"results/mystery_world_exp/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg) PA = get_inverse(pathdir,"results/mystery_world_inverse/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg) PA = get_log(pathdir,"results/mystery_world_log/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg) PA = get_sin(pathdir,"results/mystery_world_sin/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg) PA = get_sqrt(pathdir,"results/mystery_world_sqrt/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg) PA = get_squared(pathdir,"results/mystery_world_squared/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg) PA = get_tan(pathdir,"results/mystery_world_tan/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg) ############################################################################################################################# # check if the NN is trained. If it is not, train it on the data. print("Checking for symmetry \n", filename) if len(data[0])<3: print("Just one variable!") pass elif path.exists("results/NN_trained_models/models/" + filename + ".h5"):# or len(data[0])<3: print("NN already trained \n") print("NN loss: ", NN_eval(pathdir,filename), "\n") elif path.exists("results/NN_trained_models/models/" + filename + "_pretrained.h5"): print("Found pretrained NN \n") NN_train(pathdir,filename,NN_epochs/2,lrs=1e-3,N_red_lr=3,pretrained_path="results/NN_trained_models/models/" + filename + "_pretrained.h5") print("NN loss after training: ", NN_eval(pathdir,filename), "\n") else: print("Training a NN on the data... \n") NN_train(pathdir,filename,NN_epochs) print("NN loss: ", NN_eval(pathdir,filename), "\n") # Check which symmetry/separability is the best # Symmetries symmetry_minus_result = check_translational_symmetry_minus(pathdir,filename) symmetry_divide_result = check_translational_symmetry_divide(pathdir,filename) symmetry_multiply_result = check_translational_symmetry_multiply(pathdir,filename) symmetry_plus_result = check_translational_symmetry_plus(pathdir,filename) # Separabilities separability_plus_result = check_separability_plus(pathdir,filename) separability_multiply_result = check_separability_multiply(pathdir,filename) if symmetry_plus_result[0]==-1: idx_min = -1 else: idx_min = np.argmin(np.array([symmetry_plus_result[0], symmetry_minus_result[0], symmetry_multiply_result[0], symmetry_divide_result[0], separability_plus_result[0], separability_multiply_result[0]])) # Apply the best symmetry/separability and rerun the main function on this new file if idx_min == 0: new_pathdir, new_filename = do_translational_symmetry_plus(pathdir,filename,symmetry_plus_result[1],symmetry_plus_result[2]) PA1_ = ParetoSet() PA1 = run_AI_all(new_pathdir,new_filename,BF_try_time,BF_ops_file_type, polyfit_deg, NN_epochs, PA1_) PA = add_sym_on_pareto(pathdir,filename,PA1,symmetry_plus_result[1],symmetry_plus_result[2],PA,"+") return PA elif idx_min == 1: new_pathdir, new_filename = do_translational_symmetry_minus(pathdir,filename,symmetry_minus_result[1],symmetry_minus_result[2]) PA1_ = ParetoSet() PA1 = run_AI_all(new_pathdir,new_filename,BF_try_time,BF_ops_file_type, polyfit_deg, NN_epochs, PA1_) PA = add_sym_on_pareto(pathdir,filename,PA1,symmetry_minus_result[1],symmetry_minus_result[2],PA,"-") return PA elif idx_min == 2: new_pathdir, new_filename = do_translational_symmetry_multiply(pathdir,filename,symmetry_multiply_result[1],symmetry_multiply_result[2]) PA1_ = ParetoSet() PA1 = run_AI_all(new_pathdir,new_filename,BF_try_time,BF_ops_file_type, polyfit_deg, NN_epochs, PA1_) PA = add_sym_on_pareto(pathdir,filename,PA1,symmetry_multiply_result[1],symmetry_multiply_result[2],PA,"*") return PA elif idx_min == 3: new_pathdir, new_filename = do_translational_symmetry_divide(pathdir,filename,symmetry_divide_result[1],symmetry_divide_result[2]) PA1_ = ParetoSet() PA1 = run_AI_all(new_pathdir,new_filename,BF_try_time,BF_ops_file_type, polyfit_deg, NN_epochs, PA1_) PA = add_sym_on_pareto(pathdir,filename,PA1,symmetry_divide_result[1],symmetry_divide_result[2],PA,"/") return PA elif idx_min == 4: new_pathdir1, new_filename1, new_pathdir2, new_filename2, = do_separability_plus(pathdir,filename,separability_plus_result[1],separability_plus_result[2]) PA1_ = ParetoSet() PA1 = run_AI_all(new_pathdir1,new_filename1,BF_try_time,BF_ops_file_type, polyfit_deg, NN_epochs, PA1_) PA2_ = ParetoSet() PA2 = run_AI_all(new_pathdir2,new_filename2,BF_try_time,BF_ops_file_type, polyfit_deg, NN_epochs, PA2_) PA = combine_pareto(pathdir,filename,PA1,PA2,separability_plus_result[1],separability_plus_result[2],PA,"+") return PA elif idx_min == 5: new_pathdir1, new_filename1, new_pathdir2, new_filename2, = do_separability_multiply(pathdir,filename,separability_multiply_result[1],separability_multiply_result[2]) PA1_ = ParetoSet() PA1 = run_AI_all(new_pathdir1,new_filename1,BF_try_time,BF_ops_file_type, polyfit_deg, NN_epochs, PA1_) PA2_ = ParetoSet() PA2 = run_AI_all(new_pathdir2,new_filename2,BF_try_time,BF_ops_file_type, polyfit_deg, NN_epochs, PA2_) PA = combine_pareto(pathdir,filename,PA1,PA2,separability_multiply_result[1],separability_multiply_result[2],PA,"*") return PA else: return PA
from S_change_output import * from S_brute_force import brute_force from S_combine_pareto import combine_pareto from S_get_number_DL import get_number_DL from sympy.parsing.sympy_parser import parse_expr from sympy import preorder_traversal, count_ops from S_polyfit import polyfit from S_get_symbolic_expr_error import get_symbolic_expr_error from S_add_snap_expr_on_pareto import add_snap_expr_on_pareto from S_add_sym_on_pareto import add_sym_on_pareto from S_run_bf_polyfit import run_bf_polyfit from S_final_gd import final_gd from S_add_bf_on_numbers_on_pareto import add_bf_on_numbers_on_pareto from dimensionalAnalysis import dimensionalAnalysis PA = ParetoSet() def run_AI_all(pathdir,filename,BF_try_time=60,BF_ops_file_type="14ops", polyfit_deg=4, NN_epochs=4000, PA=PA): try: os.mkdir("results/") except: pass # load the data for different checks data = np.loadtxt(pathdir+filename) # Run bf and polyfit PA = run_bf_polyfit(pathdir,pathdir,filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg) # Run bf and polyfit on modified output PA = get_acos(pathdir,"results/mystery_world_acos/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg) PA = get_asin(pathdir,"results/mystery_world_asin/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg)
def run_aifeynman(pathdir,filename,BF_try_time,BF_ops_file_type, polyfit_deg=4, NN_epochs=4000, vars_name=[],test_percentage=20): # If the variable names are passed, do the dimensional analysis first filename_orig = filename try: if vars_name!=[]: dimensionalAnalysis(pathdir,filename,vars_name) DR_file = filename + "_dim_red_variables.txt" filename = filename + "_dim_red" else: DR_file = "" except: DR_file = "" # Split the data into train and test set input_data = np.loadtxt(pathdir+filename) sep_idx = np.random.permutation(len(input_data)) train_data = input_data[sep_idx[0:(100-test_percentage)*len(input_data)//100]] test_data = input_data[sep_idx[test_percentage*len(input_data)//100:len(input_data)]] np.savetxt(pathdir+filename+"_train",train_data) if test_data.size != 0: np.savetxt(pathdir+filename+"_test",test_data) PA = ParetoSet() # Run the code on the train data PA = run_AI_all(pathdir,filename+"_train",BF_try_time,BF_ops_file_type, polyfit_deg, NN_epochs, PA=PA) PA_list = PA.get_pareto_points() # Run bf snap on the resulted equations for i in range(len(PA_list)): try: PA = add_bf_on_numbers_on_pareto(pathdir,filename,PA,PA_list[i][-1]) except: continue PA_list = PA.get_pareto_points() np.savetxt("results/solution_before_snap_%s.txt" %filename,PA_list,fmt="%s") # Run zero, integer and rational snap on the resulted equations for j in range(len(PA_list)): PA = add_snap_expr_on_pareto(pathdir,filename,PA_list[j][-1],PA, "") PA_list = PA.get_pareto_points() np.savetxt("results/solution_first_snap_%s.txt" %filename,PA_list,fmt="%s") # Run gradient descent on the data one more time for i in range(len(PA_list)): try: gd_update = final_gd(pathdir,filename,PA_list[i][-1]) PA.add(Point(x=gd_update[1],y=gd_update[0],data=gd_update[2])) except: continue PA_list = PA.get_pareto_points() for j in range(len(PA_list)): PA = add_snap_expr_on_pareto(pathdir,filename,PA_list[j][-1],PA, DR_file) list_dt = np.array(PA.get_pareto_points()) data_file_len = len(np.loadtxt(pathdir+filename)) log_err = [] log_err_all = [] for i in range(len(list_dt)): log_err = log_err + [np.log2(float(list_dt[i][1]))] log_err_all = log_err_all + [data_file_len*np.log2(float(list_dt[i][1]))] log_err = np.array(log_err) log_err_all = np.array(log_err_all) # Try the found expressions on the test data if DR_file=="" and test_data.size != 0: test_errors = [] for i in range(len(list_dt)): test_errors = test_errors + [get_symbolic_expr_error(pathdir,filename+"_test",str(list_dt[i][-1]))] test_errors = np.array(test_errors) # Save all the data to file save_data = np.column_stack((test_errors,log_err,log_err_all,list_dt)) else: save_data = np.column_stack((log_err,log_err_all,list_dt)) np.savetxt("results/solution_%s" %filename_orig,save_data,fmt="%s")
def run_bf_polyfit(pathdir,pathdir_transformed,filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg=4, output_type=""): ############################################################################################################################# # run BF on the data (+) print("Checking for brute force + \n") brute_force(pathdir_transformed,filename,BF_try_time,BF_ops_file_type,"+") try: # load the BF output data bf_all_output = np.loadtxt("results.dat", dtype="str") express = bf_all_output[:,2] prefactors = bf_all_output[:,1] prefactors = [str(i) for i in prefactors] # Calculate the complexity of the bf expression the same way as for gradient descent case complexity = [] errors = [] eqns = [] for i in range(len(prefactors)): try: if output_type=="": eqn = prefactors[i] + "+" + RPN_to_eq(express[i]) elif output_type=="acos": eqn = "cos(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")" elif output_type=="asin": eqn = "sin(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")" elif output_type=="atan": eqn = "tan(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")" elif output_type=="cos": eqn = "acos(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")" elif output_type=="exp": eqn = "log(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")" elif output_type=="inverse": eqn = "1/(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")" elif output_type=="log": eqn = "exp(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")" elif output_type=="sin": eqn = "acos(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")" elif output_type=="sqrt": eqn = "(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")**2" elif output_type=="squared": eqn = "sqrt(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")" elif output_type=="tan": eqn = "atan(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")" eqns = eqns + [eqn] errors = errors + [get_symbolic_expr_error(pathdir,filename,eqn)] expr = parse_expr(eqn) is_atomic_number = lambda expr: expr.is_Atom and expr.is_number numbers_expr = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)] compl = 0 for j in numbers_expr: try: compl = compl + get_number_DL(float(j)) except: compl = compl + 1000000 # Add the complexity due to symbols n_variables = len(expr.free_symbols) n_operations = len(count_ops(expr,visual=True).free_symbols) if n_operations!=0 or n_variables!=0: compl = compl + (n_variables+n_operations)*np.log2((n_variables+n_operations)) complexity = complexity + [compl] except: continue for i in range(len(complexity)): PA.add(Point(x=complexity[i], y=errors[i], data=eqns[i])) # run gradient descent of BF output parameters and add the results to the Pareto plot for i in range(len(express)): try: bf_gd_update = RPN_to_pytorch(pathdir+filename,eqns[i]) PA.add(Point(x=bf_gd_update[1],y=bf_gd_update[0],data=bf_gd_update[2])) except: continue except: pass ############################################################################################################################# # run BF on the data (*) print("Checking for brute force * \n") brute_force(pathdir_transformed,filename,BF_try_time,BF_ops_file_type,"*") try: # load the BF output data bf_all_output = np.loadtxt("results.dat", dtype="str") express = bf_all_output[:,2] prefactors = bf_all_output[:,1] prefactors = [str(i) for i in prefactors] # Calculate the complexity of the bf expression the same way as for gradient descent case complexity = [] errors = [] eqns = [] for i in range(len(prefactors)): try: if output_type=="": eqn = prefactors[i] + "*" + RPN_to_eq(express[i]) elif output_type=="acos": eqn = "cos(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")" elif output_type=="asin": eqn = "sin(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")" elif output_type=="atan": eqn = "tan(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")" elif output_type=="cos": eqn = "acos(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")" elif output_type=="exp": eqn = "log(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")" elif output_type=="inverse": eqn = "1/(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")" elif output_type=="log": eqn = "exp(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")" elif output_type=="sin": eqn = "acos(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")" elif output_type=="sqrt": eqn = "(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")**2" elif output_type=="squared": eqn = "sqrt(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")" elif output_type=="tan": eqn = "atan(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")" eqns = eqns + [eqn] errors = errors + [get_symbolic_expr_error(pathdir,filename,eqn)] expr = parse_expr(eqn) is_atomic_number = lambda expr: expr.is_Atom and expr.is_number numbers_expr = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)] compl = 0 for j in numbers_expr: try: compl = compl + get_number_DL(float(j)) except: compl = compl + 1000000 # Add the complexity due to symbols n_variables = len(expr.free_symbols) n_operations = len(count_ops(expr,visual=True).free_symbols) if n_operations!=0 or n_variables!=0: compl = compl + (n_variables+n_operations)*np.log2((n_variables+n_operations)) complexity = complexity + [compl] except: continue # add the BF output to the Pareto plot for i in range(len(complexity)): PA.add(Point(x=complexity[i], y=errors[i], data=eqns[i])) # run gradient descent of BF output parameters and add the results to the Pareto plot for i in range(len(express)): try: bf_gd_update = RPN_to_pytorch(pathdir+filename,eqns[i]) PA.add(Point(x=bf_gd_update[1],y=bf_gd_update[0],data=bf_gd_update[2])) except: continue except: pass ############################################################################################################################# # run polyfit on the data print("Checking polyfit \n") polyfit_result = polyfit(polyfit_deg, pathdir_transformed+filename) eqn = str(polyfit_result[0]) # Calculate the complexity of the polyfit expression the same way as for gradient descent case if output_type=="": eqn = eqn elif output_type=="acos": eqn = "cos(" + eqn + ")" elif output_type=="asin": eqn = "sin(" + eqn + ")" elif output_type=="atan": eqn = "tan(" + eqn + ")" elif output_type=="cos": eqn = "acos(" + eqn + ")" elif output_type=="exp": eqn = "log(" + eqn + ")" elif output_type=="inverse": eqn = "1/(" + eqn + ")" elif output_type=="log": eqn = "exp(" + eqn + ")" elif output_type=="sin": eqn = "acos(" + eqn + ")" elif output_type=="sqrt": eqn = "(" + eqn + ")**2" elif output_type=="squared": eqn = "sqrt(" + eqn + ")" elif output_type=="tan": eqn = "atan(" + eqn + ")" polyfit_err = get_symbolic_expr_error(pathdir,filename,eqn) expr = parse_expr(eqn) is_atomic_number = lambda expr: expr.is_Atom and expr.is_number numbers_expr = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)] complexity = 0 for j in numbers_expr: complexity = complexity + get_number_DL(float(j)) try: # Add the complexity due to symbols n_variables = len(polyfit_result[0].free_symbols) n_operations = len(count_ops(polyfit_result[0],visual=True).free_symbols) if n_operations!=0 or n_variables!=0: complexity = complexity + (n_variables+n_operations)*np.log2((n_variables+n_operations)) except: pass #run zero snap on polyfit output PA_poly = ParetoSet() PA_poly.add(Point(x=complexity, y=polyfit_err, data=str(eqn))) PA_poly = add_snap_expr_on_pareto_polyfit(pathdir, filename, str(eqn), PA_poly) for l in range(len(PA_poly.get_pareto_points())): PA.add(Point(PA_poly.get_pareto_points()[l][0],PA_poly.get_pareto_points()[l][1],PA_poly.get_pareto_points()[l][2])) print("Complexity RMSE Expression") for pareto_i in range(len(PA.get_pareto_points())): print(PA.get_pareto_points()[pareto_i]) return PA