Esempio n. 1
0
def final_gd(data_file, math_expr, lr=1e-2, N_epochs=5000):
    param_dict = {}
    unsnapped_param_dict = {'p': 1}

    def unsnap_recur(expr, param_dict, unsnapped_param_dict):
        """Recursively transform each numerical value into a learnable parameter."""
        import sympy
        from sympy import Symbol
        if isinstance(expr, sympy.numbers.Float):
            used_param_names = list(
                param_dict.keys()) + list(unsnapped_param_dict)
            unsnapped_param_name = get_next_available_key(used_param_names,
                                                          "p",
                                                          is_underscore=False)
            unsnapped_param_dict[unsnapped_param_name] = float(expr)
            unsnapped_expr = Symbol(unsnapped_param_name)
            return unsnapped_expr
        elif isinstance(expr, sympy.symbol.Symbol):
            return expr
        else:
            unsnapped_sub_expr_list = []
            for sub_expr in expr.args:
                unsnapped_sub_expr = unsnap_recur(sub_expr, param_dict,
                                                  unsnapped_param_dict)
                unsnapped_sub_expr_list.append(unsnapped_sub_expr)
            return expr.func(*unsnapped_sub_expr_list)

    def get_next_available_key(iterable,
                               key,
                               midfix="",
                               suffix="",
                               is_underscore=True):
        """Get the next available key that does not collide with the keys in the dictionary."""
        if key + suffix not in iterable:
            return key + suffix
        else:
            i = 0
            underscore = "_" if is_underscore else ""
            while "{}{}{}{}{}".format(key, underscore, midfix, i,
                                      suffix) in iterable:
                i += 1
            new_key = "{}{}{}{}{}".format(key, underscore, midfix, i, suffix)
            return new_key

    # Load the actual data

    data = np.loadtxt(data_file)

    # Turn BF expression to pytorch expression
    eq = parse_expr(math_expr)
    # eq = parse_expr("cos(0.5*a)") # this is for test_sympy.txt
    # eq = parse_expr("cos(0.5*a)+sin(1.2*b)+6") # this is for test_sympy_2.txt
    eq = unsnap_recur(eq, param_dict, unsnapped_param_dict)

    N_vars = len(data[0]) - 1
    N_params = len(unsnapped_param_dict)

    possible_vars = ["x%s" % i for i in np.arange(0, 30, 1)]
    variables = []
    params = []
    for i in range(N_vars):
        #variables = variables + ["x%s" %i]
        variables = variables + [possible_vars[i]]
    for i in range(N_params - 1):
        params = params + ["p%s" % i]

    symbols = params + variables

    f = lambdify(symbols, N(eq), torch)

    # Set the trainable parameters in the expression

    trainable_parameters = []
    for i in unsnapped_param_dict:
        if i != "p":
            vars()[i] = torch.tensor(unsnapped_param_dict[i])
            vars()[i].requires_grad = True
            trainable_parameters = trainable_parameters + [vars()[i]]

    # Prepare the loaded data

    real_variables = []
    for i in range(len(data[0]) - 1):
        real_variables = real_variables + [
            torch.from_numpy(data[:, i]).float()
        ]

    input = trainable_parameters + real_variables

    y = torch.from_numpy(data[:, -1]).float()

    for i in range(N_epochs):
        # this order is fixed i.e. first parameters
        yy = f(*input)
        loss = torch.mean((yy - y)**2)
        loss.backward()
        with torch.no_grad():
            for j in range(N_params - 1):
                trainable_parameters[j] -= lr * trainable_parameters[j].grad
                trainable_parameters[j].grad.zero_()

    for i in range(N_epochs):
        # this order is fixed i.e. first parameters
        yy = f(*input)
        loss = torch.mean((yy - y)**2)
        loss.backward()
        with torch.no_grad():
            for j in range(N_params - 1):
                trainable_parameters[
                    j] -= lr / 10 * trainable_parameters[j].grad
                trainable_parameters[j].grad.zero_()

    # get the updated symbolic regression
    ii = -1
    complexity = 0
    for parm in unsnapped_param_dict:
        if ii == -1:
            ii = ii + 1
        else:
            eq = eq.subs(parm, trainable_parameters[ii])
            complexity = complexity + get_number_DL(
                trainable_parameters[ii].detach().numpy())
            ii = ii + 1

    error = torch.mean((f(*input) - y)**2).data.numpy() * 1
    return error, complexity, eq
Esempio n. 2
0
def run_bf_polyfit(pathdir,pathdir_transformed,filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg=4, output_type=""):
    
#############################################################################################################################
    
    # run BF on the data (+)
    print("Checking for brute force + \n")
    brute_force(pathdir_transformed,filename,BF_try_time,BF_ops_file_type,"+")
    
    try:
        # load the BF output data
        bf_all_output = np.loadtxt("results.dat", dtype="str")
        express = bf_all_output[:,2]
        prefactors = bf_all_output[:,1]
        prefactors = [str(i) for i in prefactors]
        
        # Calculate the complexity of the bf expression the same way as for gradient descent case
        complexity = []
        errors = []
        eqns = []
        for i in range(len(prefactors)):
            try:
                if output_type=="":
                    eqn = prefactors[i] + "+" + RPN_to_eq(express[i])
                elif output_type=="acos":
                    eqn = "cos(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="asin":
                    eqn = "sin(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="atan":
                    eqn = "tan(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="cos":
                    eqn = "acos(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="exp":
                    eqn = "log(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="inverse":
                    eqn = "1/(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="log":
                    eqn = "exp(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="sin":
                    eqn = "acos(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="sqrt":
                    eqn = "(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")**2"
                elif output_type=="squared":
                    eqn = "sqrt(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="tan":
                    eqn = "atan(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                
                eqns = eqns + [eqn]
                errors = errors + [get_symbolic_expr_error(pathdir,filename,eqn)]
                expr = parse_expr(eqn)
                is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
                numbers_expr = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)]
                compl = 0
                for j in numbers_expr:
                    try:
                        compl = compl + get_number_DL(float(j))
                    except:
                        compl = compl + 1000000

                # Add the complexity due to symbols
                n_variables = len(expr.free_symbols)
                n_operations = len(count_ops(expr,visual=True).free_symbols)
                if n_operations!=0 or n_variables!=0:
                    compl = compl + (n_variables+n_operations)*np.log2((n_variables+n_operations))

                complexity = complexity + [compl]
            except:
                continue

        for i in range(len(complexity)):
            PA.add(Point(x=complexity[i], y=errors[i], data=eqns[i]))

        # run gradient descent of BF output parameters and add the results to the Pareto plot
        for i in range(len(express)):
            try:
                bf_gd_update = RPN_to_pytorch(pathdir+filename,eqns[i])
                PA.add(Point(x=bf_gd_update[1],y=bf_gd_update[0],data=bf_gd_update[2]))
            except:
                continue
    except:
        pass

#############################################################################################################################
    # run BF on the data (*)
    print("Checking for brute force * \n")
    brute_force(pathdir_transformed,filename,BF_try_time,BF_ops_file_type,"*")

    try:
        # load the BF output data
        bf_all_output = np.loadtxt("results.dat", dtype="str")
        express = bf_all_output[:,2]
        prefactors = bf_all_output[:,1]
        prefactors = [str(i) for i in prefactors]
        
        # Calculate the complexity of the bf expression the same way as for gradient descent case
        complexity = []
        errors = []
        eqns = []
        for i in range(len(prefactors)):
            try:
                if output_type=="":
                    eqn = prefactors[i] + "*" + RPN_to_eq(express[i])
                elif output_type=="acos":
                    eqn = "cos(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="asin":
                    eqn = "sin(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="atan":
                    eqn = "tan(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="cos":
                    eqn = "acos(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="exp":
                    eqn = "log(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="inverse":
                    eqn = "1/(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="log":
                    eqn = "exp(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="sin":
                    eqn = "acos(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="sqrt":
                    eqn = "(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")**2"
                elif output_type=="squared":
                    eqn = "sqrt(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="tan":
                    eqn = "atan(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                
                eqns = eqns + [eqn]
                errors = errors + [get_symbolic_expr_error(pathdir,filename,eqn)]
                expr = parse_expr(eqn)
                is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
                numbers_expr = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)]
                compl = 0
                for j in numbers_expr:
                    try:
                        compl = compl + get_number_DL(float(j))
                    except:
                        compl = compl + 1000000

                # Add the complexity due to symbols
                n_variables = len(expr.free_symbols)
                n_operations = len(count_ops(expr,visual=True).free_symbols)
                if n_operations!=0 or n_variables!=0:
                    compl = compl + (n_variables+n_operations)*np.log2((n_variables+n_operations))

                complexity = complexity + [compl]
            except:
                continue

        # add the BF output to the Pareto plot
        for i in range(len(complexity)):
            PA.add(Point(x=complexity[i], y=errors[i], data=eqns[i]))

        # run gradient descent of BF output parameters and add the results to the Pareto plot
        for i in range(len(express)):
            try:
                bf_gd_update = RPN_to_pytorch(pathdir+filename,eqns[i])
                PA.add(Point(x=bf_gd_update[1],y=bf_gd_update[0],data=bf_gd_update[2]))
            except:
                continue
    except:
        pass
#############################################################################################################################
    # run polyfit on the data
    print("Checking polyfit \n")
    polyfit_result = polyfit(polyfit_deg, pathdir_transformed+filename)
    eqn = str(polyfit_result[0])
    
    # Calculate the complexity of the polyfit expression the same way as for gradient descent case    
    if output_type=="":
        eqn = eqn
    elif output_type=="acos":
        eqn = "cos(" + eqn + ")"
    elif output_type=="asin":
        eqn = "sin(" + eqn + ")" 
    elif output_type=="atan":
        eqn = "tan(" + eqn + ")"
    elif output_type=="cos":
        eqn = "acos(" + eqn + ")"
    elif output_type=="exp":
        eqn = "log(" + eqn + ")"
    elif output_type=="inverse":
        eqn = "1/(" + eqn + ")"
    elif output_type=="log":
        eqn = "exp(" + eqn + ")"
    elif output_type=="sin":
        eqn = "acos(" + eqn + ")"
    elif output_type=="sqrt":
        eqn = "(" + eqn + ")**2"
    elif output_type=="squared":
        eqn = "sqrt(" + eqn + ")"
    elif output_type=="tan":
        eqn = "atan(" + eqn + ")"
    
    polyfit_err = get_symbolic_expr_error(pathdir,filename,eqn)
    expr = parse_expr(eqn)
    is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
    numbers_expr = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)]
    complexity = 0
    for j in numbers_expr:
        complexity = complexity + get_number_DL(float(j))
    try:
        # Add the complexity due to symbols
        n_variables = len(polyfit_result[0].free_symbols)
        n_operations = len(count_ops(polyfit_result[0],visual=True).free_symbols)
        if n_operations!=0 or n_variables!=0:
            complexity = complexity + (n_variables+n_operations)*np.log2((n_variables+n_operations))
    except:
        pass

    
    #run zero snap on polyfit output
    PA_poly = ParetoSet()
    PA_poly.add(Point(x=complexity, y=polyfit_err, data=str(eqn)))
    PA_poly = add_snap_expr_on_pareto_polyfit(pathdir, filename, str(eqn), PA_poly)
    
    
    for l in range(len(PA_poly.get_pareto_points())):
        PA.add(Point(PA_poly.get_pareto_points()[l][0],PA_poly.get_pareto_points()[l][1],PA_poly.get_pareto_points()[l][2]))

    print("Complexity  RMSE  Expression")
    for pareto_i in range(len(PA.get_pareto_points())):
        print(PA.get_pareto_points()[pareto_i])
    
    return PA