def run_bf_polyfit(pathdir,pathdir_transformed,filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg=4, output_type=""): ############################################################################################################################# # run BF on the data (+) print("Checking for brute force + \n") brute_force(pathdir_transformed,filename,BF_try_time,BF_ops_file_type,"+") try: # load the BF output data bf_all_output = np.loadtxt("results.dat", dtype="str") express = bf_all_output[:,2] prefactors = bf_all_output[:,1] prefactors = [str(i) for i in prefactors] # Calculate the complexity of the bf expression the same way as for gradient descent case complexity = [] errors = [] eqns = [] for i in range(len(prefactors)): try: if output_type=="": eqn = prefactors[i] + "+" + RPN_to_eq(express[i]) elif output_type=="acos": eqn = "cos(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")" elif output_type=="asin": eqn = "sin(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")" elif output_type=="atan": eqn = "tan(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")" elif output_type=="cos": eqn = "acos(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")" elif output_type=="exp": eqn = "log(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")" elif output_type=="inverse": eqn = "1/(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")" elif output_type=="log": eqn = "exp(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")" elif output_type=="sin": eqn = "acos(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")" elif output_type=="sqrt": eqn = "(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")**2" elif output_type=="squared": eqn = "sqrt(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")" elif output_type=="tan": eqn = "atan(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")" eqns = eqns + [eqn] errors = errors + [get_symbolic_expr_error(pathdir,filename,eqn)] expr = parse_expr(eqn) is_atomic_number = lambda expr: expr.is_Atom and expr.is_number numbers_expr = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)] compl = 0 for j in numbers_expr: try: compl = compl + get_number_DL(float(j)) except: compl = compl + 1000000 # Add the complexity due to symbols n_variables = len(expr.free_symbols) n_operations = len(count_ops(expr,visual=True).free_symbols) if n_operations!=0 or n_variables!=0: compl = compl + (n_variables+n_operations)*np.log2((n_variables+n_operations)) complexity = complexity + [compl] except: continue for i in range(len(complexity)): PA.add(Point(x=complexity[i], y=errors[i], data=eqns[i])) # run gradient descent of BF output parameters and add the results to the Pareto plot for i in range(len(express)): try: bf_gd_update = RPN_to_pytorch(pathdir+filename,eqns[i]) PA.add(Point(x=bf_gd_update[1],y=bf_gd_update[0],data=bf_gd_update[2])) except: continue except: pass ############################################################################################################################# # run BF on the data (*) print("Checking for brute force * \n") brute_force(pathdir_transformed,filename,BF_try_time,BF_ops_file_type,"*") try: # load the BF output data bf_all_output = np.loadtxt("results.dat", dtype="str") express = bf_all_output[:,2] prefactors = bf_all_output[:,1] prefactors = [str(i) for i in prefactors] # Calculate the complexity of the bf expression the same way as for gradient descent case complexity = [] errors = [] eqns = [] for i in range(len(prefactors)): try: if output_type=="": eqn = prefactors[i] + "*" + RPN_to_eq(express[i]) elif output_type=="acos": eqn = "cos(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")" elif output_type=="asin": eqn = "sin(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")" elif output_type=="atan": eqn = "tan(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")" elif output_type=="cos": eqn = "acos(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")" elif output_type=="exp": eqn = "log(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")" elif output_type=="inverse": eqn = "1/(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")" elif output_type=="log": eqn = "exp(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")" elif output_type=="sin": eqn = "acos(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")" elif output_type=="sqrt": eqn = "(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")**2" elif output_type=="squared": eqn = "sqrt(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")" elif output_type=="tan": eqn = "atan(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")" eqns = eqns + [eqn] errors = errors + [get_symbolic_expr_error(pathdir,filename,eqn)] expr = parse_expr(eqn) is_atomic_number = lambda expr: expr.is_Atom and expr.is_number numbers_expr = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)] compl = 0 for j in numbers_expr: try: compl = compl + get_number_DL(float(j)) except: compl = compl + 1000000 # Add the complexity due to symbols n_variables = len(expr.free_symbols) n_operations = len(count_ops(expr,visual=True).free_symbols) if n_operations!=0 or n_variables!=0: compl = compl + (n_variables+n_operations)*np.log2((n_variables+n_operations)) complexity = complexity + [compl] except: continue # add the BF output to the Pareto plot for i in range(len(complexity)): PA.add(Point(x=complexity[i], y=errors[i], data=eqns[i])) # run gradient descent of BF output parameters and add the results to the Pareto plot for i in range(len(express)): try: bf_gd_update = RPN_to_pytorch(pathdir+filename,eqns[i]) PA.add(Point(x=bf_gd_update[1],y=bf_gd_update[0],data=bf_gd_update[2])) except: continue except: pass ############################################################################################################################# # run polyfit on the data print("Checking polyfit \n") polyfit_result = polyfit(polyfit_deg, pathdir_transformed+filename) eqn = str(polyfit_result[0]) # Calculate the complexity of the polyfit expression the same way as for gradient descent case if output_type=="": eqn = eqn elif output_type=="acos": eqn = "cos(" + eqn + ")" elif output_type=="asin": eqn = "sin(" + eqn + ")" elif output_type=="atan": eqn = "tan(" + eqn + ")" elif output_type=="cos": eqn = "acos(" + eqn + ")" elif output_type=="exp": eqn = "log(" + eqn + ")" elif output_type=="inverse": eqn = "1/(" + eqn + ")" elif output_type=="log": eqn = "exp(" + eqn + ")" elif output_type=="sin": eqn = "acos(" + eqn + ")" elif output_type=="sqrt": eqn = "(" + eqn + ")**2" elif output_type=="squared": eqn = "sqrt(" + eqn + ")" elif output_type=="tan": eqn = "atan(" + eqn + ")" polyfit_err = get_symbolic_expr_error(pathdir,filename,eqn) expr = parse_expr(eqn) is_atomic_number = lambda expr: expr.is_Atom and expr.is_number numbers_expr = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)] complexity = 0 for j in numbers_expr: complexity = complexity + get_number_DL(float(j)) try: # Add the complexity due to symbols n_variables = len(polyfit_result[0].free_symbols) n_operations = len(count_ops(polyfit_result[0],visual=True).free_symbols) if n_operations!=0 or n_variables!=0: complexity = complexity + (n_variables+n_operations)*np.log2((n_variables+n_operations)) except: pass #run zero snap on polyfit output PA_poly = ParetoSet() PA_poly.add(Point(x=complexity, y=polyfit_err, data=str(eqn))) PA_poly = add_snap_expr_on_pareto_polyfit(pathdir, filename, str(eqn), PA_poly) for l in range(len(PA_poly.get_pareto_points())): PA.add(Point(PA_poly.get_pareto_points()[l][0],PA_poly.get_pareto_points()[l][1],PA_poly.get_pareto_points()[l][2])) print("Complexity RMSE Expression") for pareto_i in range(len(PA.get_pareto_points())): print(PA.get_pareto_points()[pareto_i]) return PA
def add_snap_expr_on_pareto_polyfit(pathdir, filename, math_expr, PA): input_data = np.loadtxt(pathdir + filename) def unsnap_recur(expr, param_dict, unsnapped_param_dict): """Recursively transform each numerical value into a learnable parameter.""" import sympy from sympy import Symbol if isinstance(expr, sympy.numbers.Float) or isinstance( expr, sympy.numbers.Integer) or isinstance( expr, sympy.numbers.Rational) or isinstance( expr, sympy.numbers.Pi): used_param_names = list( param_dict.keys()) + list(unsnapped_param_dict) unsnapped_param_name = get_next_available_key(used_param_names, "p", is_underscore=False) unsnapped_param_dict[unsnapped_param_name] = float(expr) unsnapped_expr = Symbol(unsnapped_param_name) return unsnapped_expr elif isinstance(expr, sympy.symbol.Symbol): return expr else: unsnapped_sub_expr_list = [] for sub_expr in expr.args: unsnapped_sub_expr = unsnap_recur(sub_expr, param_dict, unsnapped_param_dict) unsnapped_sub_expr_list.append(unsnapped_sub_expr) return expr.func(*unsnapped_sub_expr_list) def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=True): """Get the next available key that does not collide with the keys in the dictionary.""" if key + suffix not in iterable: return key + suffix else: i = 0 underscore = "_" if is_underscore else "" while "{}{}{}{}{}".format(key, underscore, midfix, i, suffix) in iterable: i += 1 new_key = "{}{}{}{}{}".format(key, underscore, midfix, i, suffix) return new_key eq = parse_expr(str(math_expr)) expr = eq # # Get the numbers appearing in the expression # is_atomic_number = lambda expr: expr.is_Atom and expr.is_number # eq_numbers = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)] # # # Do zero snap one parameter at a time # zero_snapped_expr = [] # for w in range(len(eq_numbers)): # try: # param_dict = {} # unsnapped_param_dict = {'p':1} # eq = unsnap_recur(expr,param_dict,unsnapped_param_dict) # new_numbers = zeroSnap(eq_numbers,w+1) # for kk in range(len(new_numbers)): # eq_numbers[new_numbers[kk][0]] = new_numbers[kk][1] # jj = 0 # for parm in unsnapped_param_dict: # if parm!="p": # eq = eq.subs(parm, eq_numbers[jj]) # jj = jj + 1 # zero_snapped_expr = zero_snapped_expr + [eq] # except: # continue # Get the numbers appearing in the expression is_atomic_number = lambda expr: expr.is_Atom and expr.is_number eq_numbers = [ subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression) ] # Do integer snap one parameter at a time integer_snapped_expr = [] for w in range(len(eq_numbers)): try: param_dict = {} unsnapped_param_dict = {'p': 1} eq = unsnap_recur(expr, param_dict, unsnapped_param_dict) del unsnapped_param_dict["p"] temp_unsnapped_param_dict = copy.deepcopy(unsnapped_param_dict) new_numbers = integerSnap(eq_numbers, w + 1) new_numbers = {"p" + str(k): v for k, v in new_numbers.items()} temp_unsnapped_param_dict.update(new_numbers) #for kk in range(len(new_numbers)): # eq_numbers[new_numbers[kk][0]] = new_numbers[kk][1] new_eq = re.sub(r"(p\d*)", r"{\1}", str(eq)) new_eq = new_eq.format_map(temp_unsnapped_param_dict) integer_snapped_expr = integer_snapped_expr + [parse_expr(new_eq)] except: continue # Get the numbers appearing in the expression is_atomic_number = lambda expr: expr.is_Atom and expr.is_number eq_numbers = [ subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression) ] # Do rational snap one parameter at a time rational_snapped_expr = [] for w in range(len(eq_numbers)): try: param_dict = {} unsnapped_param_dict = {'p': 1} eq = unsnap_recur(expr, param_dict, unsnapped_param_dict) del unsnapped_param_dict["p"] temp_unsnapped_param_dict = copy.deepcopy(unsnapped_param_dict) new_numbers = rationalSnap(eq_numbers, w + 1) new_numbers = {"p" + str(k): v for k, v in new_numbers.items()} temp_unsnapped_param_dict.update(new_numbers) #for kk in range(len(new_numbers)): # eq_numbers_snap[new_numbers[kk][0]] = new_numbers[kk][1][1:3] new_eq = re.sub(r"(p\d*)", r"{\1}", str(eq)) new_eq = new_eq.format_map(temp_unsnapped_param_dict) rational_snapped_expr = rational_snapped_expr + [ parse_expr(new_eq) ] except: continue snapped_expr = np.append(integer_snapped_expr, rational_snapped_expr) # snapped_expr = np.append(snapped_expr,rational_snapped_expr) integer_snapped_expr = snapped_expr for i in range(len(snapped_expr)): try: # Calculate the error of the new, snapped expression snapped_error = get_symbolic_expr_error(input_data, str(snapped_expr[i])) # Calculate the complexity of the new, snapped expression expr = snapped_expr[i] for s in (expr.free_symbols): s = symbols(str(s), real=True) expr = parse_expr(str(snapped_expr[i]), locals()) expr = intify(expr) is_atomic_number = lambda expr: expr.is_Atom and expr.is_number numbers_expr = [ subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression) ] snapped_complexity = 0 for j in numbers_expr: snapped_complexity = snapped_complexity + get_number_DL_snapped( float(j)) # Add the complexity due to symbols n_variables = len(expr.free_symbols) n_operations = len(count_ops(expr, visual=True).free_symbols) if n_operations != 0 or n_variables != 0: snapped_complexity = snapped_complexity + ( n_variables + n_operations) * np.log2( (n_variables + n_operations)) PA.add(Point(x=snapped_complexity, y=snapped_error, data=str(expr))) except: continue return (PA)
def add_bf_on_numbers_on_pareto(pathdir, filename, PA, math_expr): def unsnap_recur(expr, param_dict, unsnapped_param_dict): """Recursively transform each numerical value into a learnable parameter.""" import sympy from sympy import Symbol if isinstance(expr, sympy.numbers.Float) or isinstance( expr, sympy.numbers.Integer) or isinstance( expr, sympy.numbers.Rational) or isinstance( expr, sympy.numbers.Pi): used_param_names = list( param_dict.keys()) + list(unsnapped_param_dict) unsnapped_param_name = get_next_available_key(used_param_names, "p", is_underscore=False) unsnapped_param_dict[unsnapped_param_name] = float(expr) unsnapped_expr = Symbol(unsnapped_param_name) return unsnapped_expr elif isinstance(expr, sympy.symbol.Symbol): return expr else: unsnapped_sub_expr_list = [] for sub_expr in expr.args: unsnapped_sub_expr = unsnap_recur(sub_expr, param_dict, unsnapped_param_dict) unsnapped_sub_expr_list.append(unsnapped_sub_expr) return expr.func(*unsnapped_sub_expr_list) def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=True): """Get the next available key that does not collide with the keys in the dictionary.""" if key + suffix not in iterable: return key + suffix else: i = 0 underscore = "_" if is_underscore else "" while "{}{}{}{}{}".format(key, underscore, midfix, i, suffix) in iterable: i += 1 new_key = "{}{}{}{}{}".format(key, underscore, midfix, i, suffix) return new_key eq = parse_expr(str(math_expr)) expr = eq # Get the numbers appearing in the expression is_atomic_number = lambda expr: expr.is_Atom and expr.is_number eq_numbers = [ subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression) ] # Do bf on one parameter at a time bf_on_numbers_expr = [] for w in range(len(eq_numbers)): param_dict = {} unsnapped_param_dict = {'p': 1} eq_ = unsnap_recur(expr, param_dict, unsnapped_param_dict) eq = eq_ np.savetxt(pathdir + "number_for_bf_%s.txt" % w, [eq_numbers[w]]) brute_force_number(pathdir, "number_for_bf_%s.txt" % w) # Load the predictions made by the bf code bf_numbers = np.loadtxt("results.dat", usecols=(1, ), dtype="str") new_numbers = copy.deepcopy(eq_numbers) # replace the number under consideration by all the proposed bf numbers for kk in range(len(bf_numbers)): eq = eq_ new_numbers[w] = parse_expr(RPN_to_eq(bf_numbers[kk])) jj = 0 for parm in unsnapped_param_dict: if parm != "p": eq = eq.subs(parm, new_numbers[jj]) jj = jj + 1 bf_on_numbers_expr = bf_on_numbers_expr + [eq] for i in range(len(bf_on_numbers_expr)): try: # Calculate the error of the new, snapped expression snapped_error = get_symbolic_expr_error(pathdir, filename, str(bf_on_numbers_expr[i])) # Calculate the complexity of the new, snapped expression expr = simplify(powsimp(bf_on_numbers_expr[i])) is_atomic_number = lambda expr: expr.is_Atom and expr.is_number numbers_expr = [ subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression) ] snapped_complexity = 0 for j in numbers_expr: snapped_complexity = snapped_complexity + get_number_DL_snapped( float(j)) # Add the complexity due to symbols n_variables = len(expr.free_symbols) n_operations = len(count_ops(expr, visual=True).free_symbols) if n_operations != 0 or n_variables != 0: snapped_complexity = snapped_complexity + ( n_variables + n_operations) * np.log2( (n_variables + n_operations)) PA.add(Point(x=snapped_complexity, y=snapped_error, data=str(expr))) except: continue return (PA)
def add_snap_expr_on_pareto_polyfit(pathdir, filename, math_expr, PA): def unsnap_recur(expr, param_dict, unsnapped_param_dict): """Recursively transform each numerical value into a learnable parameter.""" import sympy from sympy import Symbol if isinstance(expr, sympy.numbers.Float) or isinstance(expr, sympy.numbers.Integer) or isinstance(expr, sympy.numbers.Rational) or isinstance(expr, sympy.numbers.Pi): used_param_names = list(param_dict.keys()) + list(unsnapped_param_dict) unsnapped_param_name = get_next_available_key(used_param_names, "p", is_underscore=False) unsnapped_param_dict[unsnapped_param_name] = float(expr) unsnapped_expr = Symbol(unsnapped_param_name) return unsnapped_expr elif isinstance(expr, sympy.symbol.Symbol): return expr else: unsnapped_sub_expr_list = [] for sub_expr in expr.args: unsnapped_sub_expr = unsnap_recur(sub_expr, param_dict, unsnapped_param_dict) unsnapped_sub_expr_list.append(unsnapped_sub_expr) return expr.func(*unsnapped_sub_expr_list) def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=True): """Get the next available key that does not collide with the keys in the dictionary.""" if key + suffix not in iterable: return key + suffix else: i = 0 underscore = "_" if is_underscore else "" while "{}{}{}{}{}".format(key, underscore, midfix, i, suffix) in iterable: i += 1 new_key = "{}{}{}{}{}".format(key, underscore, midfix, i, suffix) return new_key eq = parse_expr(str(math_expr)) expr = eq # Get the numbers appearing in the expression is_atomic_number = lambda expr: expr.is_Atom and expr.is_number eq_numbers = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)] # Do zero snap one parameter at a time zero_snapped_expr = [] for w in range(len(eq_numbers)): try: param_dict = {} unsnapped_param_dict = {'p':1} eq = unsnap_recur(expr,param_dict,unsnapped_param_dict) new_numbers = zeroSnap(eq_numbers,w+1) for kk in range(len(new_numbers)): eq_numbers[new_numbers[kk][0]] = new_numbers[kk][1] jj = 0 for parm in unsnapped_param_dict: if parm!="p": eq = eq.subs(parm, eq_numbers[jj]) jj = jj + 1 zero_snapped_expr = zero_snapped_expr + [eq] except: continue for i in range(len(zero_snapped_expr)): try: # Calculate the error of the new, snapped expression snapped_error = get_symbolic_expr_error(pathdir,filename,str(zero_snapped_expr[i])) # Calculate the complexity of the new, snapped expression expr = simplify(powsimp(zero_snapped_expr[i])) for s in (expr.free_symbols): s = symbols(str(s), real = True) expr = simplify(parse_expr(str(zero_snapped_expr[i]),locals())) expr = intify(expr) is_atomic_number = lambda expr: expr.is_Atom and expr.is_number numbers_expr = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)] snapped_complexity = 0 for j in numbers_expr: snapped_complexity = snapped_complexity + get_number_DL_snapped(float(j)) # Add the complexity due to symbols n_variables = len(expr.free_symbols) n_operations = len(count_ops(expr,visual=True).free_symbols) if n_operations!=0 or n_variables!=0: snapped_complexity = snapped_complexity + (n_variables+n_operations)*np.log2((n_variables+n_operations)) PA.add(Point(x=snapped_complexity, y=snapped_error, data=str(expr))) except: print("error") print("") continue return(PA)
def add_snap_expr_on_pareto(pathdir, filename, math_expr, PA, DR_file=""): def unsnap_recur(expr, param_dict, unsnapped_param_dict): """Recursively transform each numerical value into a learnable parameter.""" import sympy from sympy import Symbol if isinstance(expr, sympy.numbers.Float) or isinstance( expr, sympy.numbers.Integer) or isinstance( expr, sympy.numbers.Rational) or isinstance( expr, sympy.numbers.Pi): used_param_names = list( param_dict.keys()) + list(unsnapped_param_dict) unsnapped_param_name = get_next_available_key(used_param_names, "p", is_underscore=False) unsnapped_param_dict[unsnapped_param_name] = float(expr) unsnapped_expr = Symbol(unsnapped_param_name) return unsnapped_expr elif isinstance(expr, sympy.symbol.Symbol): return expr else: unsnapped_sub_expr_list = [] for sub_expr in expr.args: unsnapped_sub_expr = unsnap_recur(sub_expr, param_dict, unsnapped_param_dict) unsnapped_sub_expr_list.append(unsnapped_sub_expr) return expr.func(*unsnapped_sub_expr_list) def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=True): """Get the next available key that does not collide with the keys in the dictionary.""" if key + suffix not in iterable: return key + suffix else: i = 0 underscore = "_" if is_underscore else "" while "{}{}{}{}{}".format(key, underscore, midfix, i, suffix) in iterable: i += 1 new_key = "{}{}{}{}{}".format(key, underscore, midfix, i, suffix) return new_key eq = parse_expr(str(math_expr)) expr = eq # Get the numbers appearing in the expression is_atomic_number = lambda expr: expr.is_Atom and expr.is_number eq_numbers = [ subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression) ] # Do integer snap one parameter at a time integer_snapped_expr = [] for w in range(len(eq_numbers)): try: param_dict = {} unsnapped_param_dict = {'p': 1} eq = unsnap_recur(expr, param_dict, unsnapped_param_dict) new_numbers = integerSnap(eq_numbers, w + 1) for kk in range(len(new_numbers)): eq_numbers[new_numbers[kk][0]] = new_numbers[kk][1] jj = 0 for parm in unsnapped_param_dict: if parm != "p": eq = eq.subs(parm, eq_numbers[jj]) jj = jj + 1 integer_snapped_expr = integer_snapped_expr + [eq] except: continue # # Get the numbers appearing in the expression # is_atomic_number = lambda expr: expr.is_Atom and expr.is_number # eq_numbers = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)] # # # Do zero snap one parameter at a time # zero_snapped_expr = [] # for w in range(len(eq_numbers)): # try: # param_dict = {} # unsnapped_param_dict = {'p':1} # eq = unsnap_recur(expr,param_dict,unsnapped_param_dict) # new_numbers = zeroSnap(eq_numbers,w+1) # for kk in range(len(new_numbers)): # eq_numbers[new_numbers[kk][0]] = new_numbers[kk][1] # jj = 0 # for parm in unsnapped_param_dict: # if parm!="p": # eq = eq.subs(parm, eq_numbers[jj]) # jj = jj + 1 # zero_snapped_expr = zero_snapped_expr + [eq] # except: # continue # Get the numbers appearing in the expression is_atomic_number = lambda expr: expr.is_Atom and expr.is_number eq_numbers = [ subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression) ] # Do rational snap one parameter at a time rational_snapped_expr = [] for w in range(len(eq_numbers)): try: eq_numbers_snap = copy.deepcopy(eq_numbers) param_dict = {} unsnapped_param_dict = {'p': 1} eq = unsnap_recur(expr, param_dict, unsnapped_param_dict) new_numbers = rationalSnap(eq_numbers, w + 1) for kk in range(len(new_numbers)): eq_numbers_snap[new_numbers[kk][0]] = new_numbers[kk][1][1:3] jj = 0 for parm in unsnapped_param_dict: if parm != "p": try: eq = eq.subs( parm, Rational(eq_numbers_snap[jj][0], eq_numbers_snap[jj][1])) except: eq = eq.subs(parm, eq_numbers_snap[jj]) jj = jj + 1 rational_snapped_expr = rational_snapped_expr + [eq] except: continue snapped_expr = np.append(integer_snapped_expr, rational_snapped_expr) # snapped_expr = np.append(snapped_expr,rational_snapped_expr) for i in range(len(snapped_expr)): try: # Calculate the error of the new, snapped expression snapped_error = get_symbolic_expr_error(pathdir, filename, str(snapped_expr[i])) # Calculate the complexity of the new, snapped expression expr = simplify(powsimp(snapped_expr[i])) for s in (expr.free_symbols): s = symbols(str(s), real=True) expr = simplify(parse_expr(str(snapped_expr[i]), locals())) #print("expr 0", expr) expr = intify(expr) is_atomic_number = lambda expr: expr.is_Atom and expr.is_number numbers_expr = [ subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression) ] if DR_file == "": snapped_complexity = 0 for j in numbers_expr: snapped_complexity = snapped_complexity + get_number_DL_snapped( float(j)) n_variables = len(expr.free_symbols) n_operations = len(count_ops(expr, visual=True).free_symbols) if n_operations != 0 or n_variables != 0: snapped_complexity = snapped_complexity + ( n_variables + n_operations) * np.log2( (n_variables + n_operations)) # If a bf file is provided, replace the variables with the actual ones before calculating the complexity else: dr_data = np.loadtxt(DR_file, dtype="str", delimiter=",") expr = str(expr) old_vars = ["x%s" % k for k in range(len(dr_data) - 3)] for i_dr in range(len(old_vars)): expr = expr.replace(old_vars[i_dr], "(" + dr_data[i_dr + 2] + ")") expr = "(" + dr_data[1] + ")*(" + expr + ")" expr = parse_expr(expr) for s in (expr.free_symbols): s = symbols(str(s), real=True) expr = simplify(parse_expr(str(expr), locals())) #print("expr 1", expr) #expr = intify(expr) #print("expr 2", expr) snapped_complexity = 0 for j in numbers_expr: snapped_complexity = snapped_complexity + get_number_DL_snapped( float(j)) n_variables = len(expr.free_symbols) n_operations = len(count_ops(expr, visual=True).free_symbols) if n_operations != 0 or n_variables != 0: snapped_complexity = snapped_complexity + ( n_variables + n_operations) * np.log2( (n_variables + n_operations)) PA.add(Point(x=snapped_complexity, y=snapped_error, data=str(expr))) except: continue return (PA)
def run_aifeynman(pathdir, filename, BF_try_time, BF_ops_file_type, polyfit_deg=4, NN_epochs=4000, vars_name=[], test_percentage=20): # If the variable names are passed, do the dimensional analysis first filename_orig = filename try: if vars_name != []: dimensionalAnalysis(pathdir, filename, vars_name) DR_file = filename + "_dim_red_variables.txt" filename = filename + "_dim_red" else: DR_file = "" except: DR_file = "" # Split the data into train and test set input_data = np.loadtxt(pathdir + filename) sep_idx = np.random.permutation(len(input_data)) train_data = input_data[sep_idx[0:(100 - test_percentage) * len(input_data) // 100]] test_data = input_data[sep_idx[test_percentage * len(input_data) // 100:len(input_data)]] np.savetxt(pathdir + filename + "_train", train_data) if test_data.size != 0: np.savetxt(pathdir + filename + "_test", test_data) # Run the code on the train data PA = run_AI_all(pathdir, filename + "_train", BF_try_time, BF_ops_file_type, polyfit_deg, NN_epochs) PA_list = PA.get_pareto_points() # Run bf snap on the resulted equations for i in range(len(PA_list)): try: PA = add_bf_on_numbers_on_pareto(pathdir, filename, PA, PA_list[i][-1]) except: continue PA_list = PA.get_pareto_points() np.savetxt("results/solution_before_snap_%s.txt" % filename, PA_list, fmt="%s") # Run zero, integer and rational snap on the resulted equations PA_snapped_1 = ParetoSet() for j in range(len(PA_list)): PA_snapped_1 = add_snap_expr_on_pareto(pathdir, filename, PA_list[j][-1], PA_snapped_1, "") PA_list = PA_snapped_1.get_pareto_points() np.savetxt("results/solution_first_snap_%s.txt" % filename, PA_list, fmt="%s") # Run gradient descent on the data one more time for i in range(len(PA_list)): try: gd_update = final_gd(pathdir + filename, PA_list[i][-1]) PA_snapped_1.add( Point(x=gd_update[1], y=gd_update[0], data=gd_update[2])) except: continue PA_list = PA_snapped_1.get_pareto_points() PA_snapped = ParetoSet() for j in range(len(PA_list)): PA_snapped = add_snap_expr_on_pareto(pathdir, filename, PA_list[j][-1], PA_snapped, DR_file) list_dt = np.array(PA_snapped.get_pareto_points()) data_file_len = len(np.loadtxt(pathdir + filename)) log_err = [] log_err_all = [] for i in range(len(list_dt)): log_err = log_err + [np.log2(float(list_dt[i][1]))] log_err_all = log_err_all + [ data_file_len * np.log2(float(list_dt[i][1])) ] log_err = np.array(log_err) log_err_all = np.array(log_err_all) # Try the found expressions on the test data if DR_file == "" and test_data.size != 0: test_errors = [] for i in range(len(list_dt)): test_errors = test_errors + [ get_symbolic_expr_error(pathdir, filename + "_test", str(list_dt[i][-1])) ] test_errors = np.array(test_errors) # Save all the data to file save_data = np.column_stack( (test_errors, log_err, log_err_all, list_dt)) else: save_data = np.column_stack((log_err, log_err_all, list_dt)) np.savetxt("results/solution_%s" % filename_orig, save_data, fmt="%s")