Beispiel #1
0
def run_AI_all(pathdir,filename,BF_try_time=60,BF_ops_file_type="14ops", polyfit_deg=4, NN_epochs=4000, PA=PA):
    try:
        os.mkdir("results/")
    except:
        pass
    
    # load the data for different checks
    data = np.loadtxt(pathdir+filename)

    # Run bf and polyfit
    PA = run_bf_polyfit(pathdir,pathdir,filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg)

    # Run bf and polyfit on modified output
    PA = get_acos(pathdir,"results/mystery_world_acos/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg)
    PA = get_asin(pathdir,"results/mystery_world_asin/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg)
    PA = get_atan(pathdir,"results/mystery_world_atan/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg)
    PA = get_cos(pathdir,"results/mystery_world_cos/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg)
    PA = get_exp(pathdir,"results/mystery_world_exp/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg)
    PA = get_inverse(pathdir,"results/mystery_world_inverse/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg)
    PA = get_log(pathdir,"results/mystery_world_log/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg)
    PA = get_sin(pathdir,"results/mystery_world_sin/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg)
    PA = get_sqrt(pathdir,"results/mystery_world_sqrt/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg)
    PA = get_squared(pathdir,"results/mystery_world_squared/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg)
    PA = get_tan(pathdir,"results/mystery_world_tan/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg)

#############################################################################################################################  
    # check if the NN is trained. If it is not, train it on the data.
    print("Checking for symmetry \n", filename)
    if len(data[0])<3:
        print("Just one variable!")
        pass
    elif path.exists("results/NN_trained_models/models/" + filename + ".h5"):# or len(data[0])<3:
        print("NN already trained \n")
        print("NN loss: ", NN_eval(pathdir,filename), "\n")
    elif path.exists("results/NN_trained_models/models/" + filename + "_pretrained.h5"):
        print("Found pretrained NN \n")
        NN_train(pathdir,filename,NN_epochs/2,lrs=1e-3,N_red_lr=3,pretrained_path="results/NN_trained_models/models/" + filename + "_pretrained.h5")
        print("NN loss after training: ", NN_eval(pathdir,filename), "\n")
    else:
        print("Training a NN on the data... \n")
        NN_train(pathdir,filename,NN_epochs)
        print("NN loss: ", NN_eval(pathdir,filename), "\n")
        
    # Check which symmetry/separability is the best
    
    # Symmetries
    symmetry_minus_result = check_translational_symmetry_minus(pathdir,filename)
    symmetry_divide_result = check_translational_symmetry_divide(pathdir,filename)
    symmetry_multiply_result = check_translational_symmetry_multiply(pathdir,filename)
    symmetry_plus_result = check_translational_symmetry_plus(pathdir,filename)
    
    # Separabilities
    separability_plus_result = check_separability_plus(pathdir,filename)
    separability_multiply_result = check_separability_multiply(pathdir,filename)
    
    if symmetry_plus_result[0]==-1:
        idx_min = -1
    else:
        idx_min = np.argmin(np.array([symmetry_plus_result[0], symmetry_minus_result[0], symmetry_multiply_result[0], symmetry_divide_result[0], separability_plus_result[0], separability_multiply_result[0]]))

    # Apply the best symmetry/separability and rerun the main function on this new file
    if idx_min == 0:
        new_pathdir, new_filename = do_translational_symmetry_plus(pathdir,filename,symmetry_plus_result[1],symmetry_plus_result[2])
        PA1_ = ParetoSet()
        PA1 = run_AI_all(new_pathdir,new_filename,BF_try_time,BF_ops_file_type, polyfit_deg, NN_epochs, PA1_)
        PA = add_sym_on_pareto(pathdir,filename,PA1,symmetry_plus_result[1],symmetry_plus_result[2],PA,"+")
        return PA
    
    elif idx_min == 1:
        new_pathdir, new_filename = do_translational_symmetry_minus(pathdir,filename,symmetry_minus_result[1],symmetry_minus_result[2])
        PA1_ = ParetoSet()
        PA1 = run_AI_all(new_pathdir,new_filename,BF_try_time,BF_ops_file_type, polyfit_deg, NN_epochs, PA1_)
        PA = add_sym_on_pareto(pathdir,filename,PA1,symmetry_minus_result[1],symmetry_minus_result[2],PA,"-")
        return PA
    
    elif idx_min == 2:
        new_pathdir, new_filename = do_translational_symmetry_multiply(pathdir,filename,symmetry_multiply_result[1],symmetry_multiply_result[2])
        PA1_ = ParetoSet()
        PA1 = run_AI_all(new_pathdir,new_filename,BF_try_time,BF_ops_file_type, polyfit_deg, NN_epochs, PA1_)
        PA = add_sym_on_pareto(pathdir,filename,PA1,symmetry_multiply_result[1],symmetry_multiply_result[2],PA,"*")
        return PA
    
    elif idx_min == 3:
        new_pathdir, new_filename = do_translational_symmetry_divide(pathdir,filename,symmetry_divide_result[1],symmetry_divide_result[2])
        PA1_ = ParetoSet()
        PA1 = run_AI_all(new_pathdir,new_filename,BF_try_time,BF_ops_file_type, polyfit_deg, NN_epochs, PA1_)
        PA = add_sym_on_pareto(pathdir,filename,PA1,symmetry_divide_result[1],symmetry_divide_result[2],PA,"/")
        return PA
    
    elif idx_min == 4:
        new_pathdir1, new_filename1, new_pathdir2, new_filename2,  = do_separability_plus(pathdir,filename,separability_plus_result[1],separability_plus_result[2])
        PA1_ = ParetoSet()
        PA1 = run_AI_all(new_pathdir1,new_filename1,BF_try_time,BF_ops_file_type, polyfit_deg, NN_epochs, PA1_)
        PA2_ = ParetoSet()
        PA2 = run_AI_all(new_pathdir2,new_filename2,BF_try_time,BF_ops_file_type, polyfit_deg, NN_epochs, PA2_)
        PA = combine_pareto(pathdir,filename,PA1,PA2,separability_plus_result[1],separability_plus_result[2],PA,"+")
        return PA 
    
    elif idx_min == 5:
        new_pathdir1, new_filename1, new_pathdir2, new_filename2,  = do_separability_multiply(pathdir,filename,separability_multiply_result[1],separability_multiply_result[2])
        PA1_ = ParetoSet()
        PA1 = run_AI_all(new_pathdir1,new_filename1,BF_try_time,BF_ops_file_type, polyfit_deg, NN_epochs, PA1_)
        PA2_ = ParetoSet()
        PA2 = run_AI_all(new_pathdir2,new_filename2,BF_try_time,BF_ops_file_type, polyfit_deg, NN_epochs, PA2_)
        PA = combine_pareto(pathdir,filename,PA1,PA2,separability_multiply_result[1],separability_multiply_result[2],PA,"*")
        return PA 
    else:
        return PA
Beispiel #2
0
from S_change_output import *
from S_brute_force import brute_force
from S_combine_pareto import combine_pareto
from S_get_number_DL import get_number_DL
from sympy.parsing.sympy_parser import parse_expr
from sympy import preorder_traversal, count_ops
from S_polyfit import polyfit
from S_get_symbolic_expr_error import get_symbolic_expr_error
from S_add_snap_expr_on_pareto import add_snap_expr_on_pareto
from S_add_sym_on_pareto import add_sym_on_pareto
from S_run_bf_polyfit import run_bf_polyfit
from S_final_gd import final_gd
from S_add_bf_on_numbers_on_pareto import add_bf_on_numbers_on_pareto
from dimensionalAnalysis import dimensionalAnalysis

PA = ParetoSet()
def run_AI_all(pathdir,filename,BF_try_time=60,BF_ops_file_type="14ops", polyfit_deg=4, NN_epochs=4000, PA=PA):
    try:
        os.mkdir("results/")
    except:
        pass
    
    # load the data for different checks
    data = np.loadtxt(pathdir+filename)

    # Run bf and polyfit
    PA = run_bf_polyfit(pathdir,pathdir,filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg)

    # Run bf and polyfit on modified output
    PA = get_acos(pathdir,"results/mystery_world_acos/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg)
    PA = get_asin(pathdir,"results/mystery_world_asin/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg)
Beispiel #3
0
def run_aifeynman(pathdir,filename,BF_try_time,BF_ops_file_type, polyfit_deg=4, NN_epochs=4000, vars_name=[],test_percentage=20):    
    # If the variable names are passed, do the dimensional analysis first
    filename_orig = filename
    try:
        if vars_name!=[]:
            dimensionalAnalysis(pathdir,filename,vars_name)
            DR_file = filename + "_dim_red_variables.txt"
            filename = filename + "_dim_red"
        else:
            DR_file = ""
    except:
        DR_file = ""

    # Split the data into train and test set                                                                                                                                      
    input_data = np.loadtxt(pathdir+filename)
    sep_idx = np.random.permutation(len(input_data))

    train_data = input_data[sep_idx[0:(100-test_percentage)*len(input_data)//100]]
    test_data = input_data[sep_idx[test_percentage*len(input_data)//100:len(input_data)]]

    np.savetxt(pathdir+filename+"_train",train_data)
    if test_data.size != 0:
        np.savetxt(pathdir+filename+"_test",test_data)

    PA = ParetoSet()
    # Run the code on the train data 
    PA = run_AI_all(pathdir,filename+"_train",BF_try_time,BF_ops_file_type, polyfit_deg, NN_epochs, PA=PA)
    PA_list = PA.get_pareto_points()

    # Run bf snap on the resulted equations
    for i in range(len(PA_list)):
        try:
            PA = add_bf_on_numbers_on_pareto(pathdir,filename,PA,PA_list[i][-1])
        except:
            continue
    PA_list = PA.get_pareto_points()

    np.savetxt("results/solution_before_snap_%s.txt" %filename,PA_list,fmt="%s")

    # Run zero, integer and rational snap on the resulted equations  
    for j in range(len(PA_list)):
        PA = add_snap_expr_on_pareto(pathdir,filename,PA_list[j][-1],PA, "")

    PA_list = PA.get_pareto_points()
    np.savetxt("results/solution_first_snap_%s.txt" %filename,PA_list,fmt="%s")
    
    # Run gradient descent on the data one more time                                                                                                                          
    for i in range(len(PA_list)):
        try:
            gd_update = final_gd(pathdir,filename,PA_list[i][-1])
            PA.add(Point(x=gd_update[1],y=gd_update[0],data=gd_update[2]))
        except:
            continue
  
    PA_list = PA.get_pareto_points()
    for j in range(len(PA_list)):
        PA = add_snap_expr_on_pareto(pathdir,filename,PA_list[j][-1],PA, DR_file)

    list_dt = np.array(PA.get_pareto_points())
    data_file_len = len(np.loadtxt(pathdir+filename))
    log_err = []
    log_err_all = []
    for i in range(len(list_dt)):
        log_err = log_err + [np.log2(float(list_dt[i][1]))]
        log_err_all = log_err_all + [data_file_len*np.log2(float(list_dt[i][1]))]
    log_err = np.array(log_err)
    log_err_all = np.array(log_err_all)

    # Try the found expressions on the test data                                                                                                                                  
    if DR_file=="" and test_data.size != 0:
        test_errors = []
        for i in range(len(list_dt)):
            test_errors = test_errors + [get_symbolic_expr_error(pathdir,filename+"_test",str(list_dt[i][-1]))]
        test_errors = np.array(test_errors)
        # Save all the data to file                                                                                                                                               
        save_data = np.column_stack((test_errors,log_err,log_err_all,list_dt))
    else:
        save_data = np.column_stack((log_err,log_err_all,list_dt))
    np.savetxt("results/solution_%s" %filename_orig,save_data,fmt="%s")
def run_bf_polyfit(pathdir,pathdir_transformed,filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg=4, output_type=""):
    
#############################################################################################################################
    
    # run BF on the data (+)
    print("Checking for brute force + \n")
    brute_force(pathdir_transformed,filename,BF_try_time,BF_ops_file_type,"+")
    
    try:
        # load the BF output data
        bf_all_output = np.loadtxt("results.dat", dtype="str")
        express = bf_all_output[:,2]
        prefactors = bf_all_output[:,1]
        prefactors = [str(i) for i in prefactors]
        
        # Calculate the complexity of the bf expression the same way as for gradient descent case
        complexity = []
        errors = []
        eqns = []
        for i in range(len(prefactors)):
            try:
                if output_type=="":
                    eqn = prefactors[i] + "+" + RPN_to_eq(express[i])
                elif output_type=="acos":
                    eqn = "cos(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="asin":
                    eqn = "sin(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="atan":
                    eqn = "tan(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="cos":
                    eqn = "acos(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="exp":
                    eqn = "log(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="inverse":
                    eqn = "1/(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="log":
                    eqn = "exp(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="sin":
                    eqn = "acos(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="sqrt":
                    eqn = "(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")**2"
                elif output_type=="squared":
                    eqn = "sqrt(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="tan":
                    eqn = "atan(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                
                eqns = eqns + [eqn]
                errors = errors + [get_symbolic_expr_error(pathdir,filename,eqn)]
                expr = parse_expr(eqn)
                is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
                numbers_expr = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)]
                compl = 0
                for j in numbers_expr:
                    try:
                        compl = compl + get_number_DL(float(j))
                    except:
                        compl = compl + 1000000

                # Add the complexity due to symbols
                n_variables = len(expr.free_symbols)
                n_operations = len(count_ops(expr,visual=True).free_symbols)
                if n_operations!=0 or n_variables!=0:
                    compl = compl + (n_variables+n_operations)*np.log2((n_variables+n_operations))

                complexity = complexity + [compl]
            except:
                continue

        for i in range(len(complexity)):
            PA.add(Point(x=complexity[i], y=errors[i], data=eqns[i]))

        # run gradient descent of BF output parameters and add the results to the Pareto plot
        for i in range(len(express)):
            try:
                bf_gd_update = RPN_to_pytorch(pathdir+filename,eqns[i])
                PA.add(Point(x=bf_gd_update[1],y=bf_gd_update[0],data=bf_gd_update[2]))
            except:
                continue
    except:
        pass

#############################################################################################################################
    # run BF on the data (*)
    print("Checking for brute force * \n")
    brute_force(pathdir_transformed,filename,BF_try_time,BF_ops_file_type,"*")

    try:
        # load the BF output data
        bf_all_output = np.loadtxt("results.dat", dtype="str")
        express = bf_all_output[:,2]
        prefactors = bf_all_output[:,1]
        prefactors = [str(i) for i in prefactors]
        
        # Calculate the complexity of the bf expression the same way as for gradient descent case
        complexity = []
        errors = []
        eqns = []
        for i in range(len(prefactors)):
            try:
                if output_type=="":
                    eqn = prefactors[i] + "*" + RPN_to_eq(express[i])
                elif output_type=="acos":
                    eqn = "cos(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="asin":
                    eqn = "sin(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="atan":
                    eqn = "tan(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="cos":
                    eqn = "acos(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="exp":
                    eqn = "log(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="inverse":
                    eqn = "1/(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="log":
                    eqn = "exp(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="sin":
                    eqn = "acos(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="sqrt":
                    eqn = "(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")**2"
                elif output_type=="squared":
                    eqn = "sqrt(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="tan":
                    eqn = "atan(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                
                eqns = eqns + [eqn]
                errors = errors + [get_symbolic_expr_error(pathdir,filename,eqn)]
                expr = parse_expr(eqn)
                is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
                numbers_expr = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)]
                compl = 0
                for j in numbers_expr:
                    try:
                        compl = compl + get_number_DL(float(j))
                    except:
                        compl = compl + 1000000

                # Add the complexity due to symbols
                n_variables = len(expr.free_symbols)
                n_operations = len(count_ops(expr,visual=True).free_symbols)
                if n_operations!=0 or n_variables!=0:
                    compl = compl + (n_variables+n_operations)*np.log2((n_variables+n_operations))

                complexity = complexity + [compl]
            except:
                continue

        # add the BF output to the Pareto plot
        for i in range(len(complexity)):
            PA.add(Point(x=complexity[i], y=errors[i], data=eqns[i]))

        # run gradient descent of BF output parameters and add the results to the Pareto plot
        for i in range(len(express)):
            try:
                bf_gd_update = RPN_to_pytorch(pathdir+filename,eqns[i])
                PA.add(Point(x=bf_gd_update[1],y=bf_gd_update[0],data=bf_gd_update[2]))
            except:
                continue
    except:
        pass
#############################################################################################################################
    # run polyfit on the data
    print("Checking polyfit \n")
    polyfit_result = polyfit(polyfit_deg, pathdir_transformed+filename)
    eqn = str(polyfit_result[0])
    
    # Calculate the complexity of the polyfit expression the same way as for gradient descent case    
    if output_type=="":
        eqn = eqn
    elif output_type=="acos":
        eqn = "cos(" + eqn + ")"
    elif output_type=="asin":
        eqn = "sin(" + eqn + ")" 
    elif output_type=="atan":
        eqn = "tan(" + eqn + ")"
    elif output_type=="cos":
        eqn = "acos(" + eqn + ")"
    elif output_type=="exp":
        eqn = "log(" + eqn + ")"
    elif output_type=="inverse":
        eqn = "1/(" + eqn + ")"
    elif output_type=="log":
        eqn = "exp(" + eqn + ")"
    elif output_type=="sin":
        eqn = "acos(" + eqn + ")"
    elif output_type=="sqrt":
        eqn = "(" + eqn + ")**2"
    elif output_type=="squared":
        eqn = "sqrt(" + eqn + ")"
    elif output_type=="tan":
        eqn = "atan(" + eqn + ")"
    
    polyfit_err = get_symbolic_expr_error(pathdir,filename,eqn)
    expr = parse_expr(eqn)
    is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
    numbers_expr = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)]
    complexity = 0
    for j in numbers_expr:
        complexity = complexity + get_number_DL(float(j))
    try:
        # Add the complexity due to symbols
        n_variables = len(polyfit_result[0].free_symbols)
        n_operations = len(count_ops(polyfit_result[0],visual=True).free_symbols)
        if n_operations!=0 or n_variables!=0:
            complexity = complexity + (n_variables+n_operations)*np.log2((n_variables+n_operations))
    except:
        pass

    
    #run zero snap on polyfit output
    PA_poly = ParetoSet()
    PA_poly.add(Point(x=complexity, y=polyfit_err, data=str(eqn)))
    PA_poly = add_snap_expr_on_pareto_polyfit(pathdir, filename, str(eqn), PA_poly)
    
    
    for l in range(len(PA_poly.get_pareto_points())):
        PA.add(Point(PA_poly.get_pareto_points()[l][0],PA_poly.get_pareto_points()[l][1],PA_poly.get_pareto_points()[l][2]))

    print("Complexity  RMSE  Expression")
    for pareto_i in range(len(PA.get_pareto_points())):
        print(PA.get_pareto_points()[pareto_i])
    
    return PA