def run_bf_polyfit(pathdir,pathdir_transformed,filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg=4, output_type=""):
    
#############################################################################################################################
    
    # run BF on the data (+)
    print("Checking for brute force + \n")
    brute_force(pathdir_transformed,filename,BF_try_time,BF_ops_file_type,"+")
    
    try:
        # load the BF output data
        bf_all_output = np.loadtxt("results.dat", dtype="str")
        express = bf_all_output[:,2]
        prefactors = bf_all_output[:,1]
        prefactors = [str(i) for i in prefactors]
        
        # Calculate the complexity of the bf expression the same way as for gradient descent case
        complexity = []
        errors = []
        eqns = []
        for i in range(len(prefactors)):
            try:
                if output_type=="":
                    eqn = prefactors[i] + "+" + RPN_to_eq(express[i])
                elif output_type=="acos":
                    eqn = "cos(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="asin":
                    eqn = "sin(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="atan":
                    eqn = "tan(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="cos":
                    eqn = "acos(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="exp":
                    eqn = "log(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="inverse":
                    eqn = "1/(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="log":
                    eqn = "exp(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="sin":
                    eqn = "acos(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="sqrt":
                    eqn = "(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")**2"
                elif output_type=="squared":
                    eqn = "sqrt(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                elif output_type=="tan":
                    eqn = "atan(" + prefactors[i] + "+" + RPN_to_eq(express[i]) + ")"
                
                eqns = eqns + [eqn]
                errors = errors + [get_symbolic_expr_error(pathdir,filename,eqn)]
                expr = parse_expr(eqn)
                is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
                numbers_expr = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)]
                compl = 0
                for j in numbers_expr:
                    try:
                        compl = compl + get_number_DL(float(j))
                    except:
                        compl = compl + 1000000

                # Add the complexity due to symbols
                n_variables = len(expr.free_symbols)
                n_operations = len(count_ops(expr,visual=True).free_symbols)
                if n_operations!=0 or n_variables!=0:
                    compl = compl + (n_variables+n_operations)*np.log2((n_variables+n_operations))

                complexity = complexity + [compl]
            except:
                continue

        for i in range(len(complexity)):
            PA.add(Point(x=complexity[i], y=errors[i], data=eqns[i]))

        # run gradient descent of BF output parameters and add the results to the Pareto plot
        for i in range(len(express)):
            try:
                bf_gd_update = RPN_to_pytorch(pathdir+filename,eqns[i])
                PA.add(Point(x=bf_gd_update[1],y=bf_gd_update[0],data=bf_gd_update[2]))
            except:
                continue
    except:
        pass

#############################################################################################################################
    # run BF on the data (*)
    print("Checking for brute force * \n")
    brute_force(pathdir_transformed,filename,BF_try_time,BF_ops_file_type,"*")

    try:
        # load the BF output data
        bf_all_output = np.loadtxt("results.dat", dtype="str")
        express = bf_all_output[:,2]
        prefactors = bf_all_output[:,1]
        prefactors = [str(i) for i in prefactors]
        
        # Calculate the complexity of the bf expression the same way as for gradient descent case
        complexity = []
        errors = []
        eqns = []
        for i in range(len(prefactors)):
            try:
                if output_type=="":
                    eqn = prefactors[i] + "*" + RPN_to_eq(express[i])
                elif output_type=="acos":
                    eqn = "cos(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="asin":
                    eqn = "sin(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="atan":
                    eqn = "tan(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="cos":
                    eqn = "acos(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="exp":
                    eqn = "log(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="inverse":
                    eqn = "1/(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="log":
                    eqn = "exp(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="sin":
                    eqn = "acos(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="sqrt":
                    eqn = "(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")**2"
                elif output_type=="squared":
                    eqn = "sqrt(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                elif output_type=="tan":
                    eqn = "atan(" + prefactors[i] + "*" + RPN_to_eq(express[i]) + ")"
                
                eqns = eqns + [eqn]
                errors = errors + [get_symbolic_expr_error(pathdir,filename,eqn)]
                expr = parse_expr(eqn)
                is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
                numbers_expr = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)]
                compl = 0
                for j in numbers_expr:
                    try:
                        compl = compl + get_number_DL(float(j))
                    except:
                        compl = compl + 1000000

                # Add the complexity due to symbols
                n_variables = len(expr.free_symbols)
                n_operations = len(count_ops(expr,visual=True).free_symbols)
                if n_operations!=0 or n_variables!=0:
                    compl = compl + (n_variables+n_operations)*np.log2((n_variables+n_operations))

                complexity = complexity + [compl]
            except:
                continue

        # add the BF output to the Pareto plot
        for i in range(len(complexity)):
            PA.add(Point(x=complexity[i], y=errors[i], data=eqns[i]))

        # run gradient descent of BF output parameters and add the results to the Pareto plot
        for i in range(len(express)):
            try:
                bf_gd_update = RPN_to_pytorch(pathdir+filename,eqns[i])
                PA.add(Point(x=bf_gd_update[1],y=bf_gd_update[0],data=bf_gd_update[2]))
            except:
                continue
    except:
        pass
#############################################################################################################################
    # run polyfit on the data
    print("Checking polyfit \n")
    polyfit_result = polyfit(polyfit_deg, pathdir_transformed+filename)
    eqn = str(polyfit_result[0])
    
    # Calculate the complexity of the polyfit expression the same way as for gradient descent case    
    if output_type=="":
        eqn = eqn
    elif output_type=="acos":
        eqn = "cos(" + eqn + ")"
    elif output_type=="asin":
        eqn = "sin(" + eqn + ")" 
    elif output_type=="atan":
        eqn = "tan(" + eqn + ")"
    elif output_type=="cos":
        eqn = "acos(" + eqn + ")"
    elif output_type=="exp":
        eqn = "log(" + eqn + ")"
    elif output_type=="inverse":
        eqn = "1/(" + eqn + ")"
    elif output_type=="log":
        eqn = "exp(" + eqn + ")"
    elif output_type=="sin":
        eqn = "acos(" + eqn + ")"
    elif output_type=="sqrt":
        eqn = "(" + eqn + ")**2"
    elif output_type=="squared":
        eqn = "sqrt(" + eqn + ")"
    elif output_type=="tan":
        eqn = "atan(" + eqn + ")"
    
    polyfit_err = get_symbolic_expr_error(pathdir,filename,eqn)
    expr = parse_expr(eqn)
    is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
    numbers_expr = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)]
    complexity = 0
    for j in numbers_expr:
        complexity = complexity + get_number_DL(float(j))
    try:
        # Add the complexity due to symbols
        n_variables = len(polyfit_result[0].free_symbols)
        n_operations = len(count_ops(polyfit_result[0],visual=True).free_symbols)
        if n_operations!=0 or n_variables!=0:
            complexity = complexity + (n_variables+n_operations)*np.log2((n_variables+n_operations))
    except:
        pass

    
    #run zero snap on polyfit output
    PA_poly = ParetoSet()
    PA_poly.add(Point(x=complexity, y=polyfit_err, data=str(eqn)))
    PA_poly = add_snap_expr_on_pareto_polyfit(pathdir, filename, str(eqn), PA_poly)
    
    
    for l in range(len(PA_poly.get_pareto_points())):
        PA.add(Point(PA_poly.get_pareto_points()[l][0],PA_poly.get_pareto_points()[l][1],PA_poly.get_pareto_points()[l][2]))

    print("Complexity  RMSE  Expression")
    for pareto_i in range(len(PA.get_pareto_points())):
        print(PA.get_pareto_points()[pareto_i])
    
    return PA
def add_snap_expr_on_pareto_polyfit(pathdir, filename, math_expr, PA):
    input_data = np.loadtxt(pathdir + filename)

    def unsnap_recur(expr, param_dict, unsnapped_param_dict):
        """Recursively transform each numerical value into a learnable parameter."""
        import sympy
        from sympy import Symbol
        if isinstance(expr, sympy.numbers.Float) or isinstance(
                expr, sympy.numbers.Integer) or isinstance(
                    expr, sympy.numbers.Rational) or isinstance(
                        expr, sympy.numbers.Pi):
            used_param_names = list(
                param_dict.keys()) + list(unsnapped_param_dict)
            unsnapped_param_name = get_next_available_key(used_param_names,
                                                          "p",
                                                          is_underscore=False)
            unsnapped_param_dict[unsnapped_param_name] = float(expr)
            unsnapped_expr = Symbol(unsnapped_param_name)
            return unsnapped_expr
        elif isinstance(expr, sympy.symbol.Symbol):
            return expr
        else:
            unsnapped_sub_expr_list = []
            for sub_expr in expr.args:
                unsnapped_sub_expr = unsnap_recur(sub_expr, param_dict,
                                                  unsnapped_param_dict)
                unsnapped_sub_expr_list.append(unsnapped_sub_expr)
            return expr.func(*unsnapped_sub_expr_list)

    def get_next_available_key(iterable,
                               key,
                               midfix="",
                               suffix="",
                               is_underscore=True):
        """Get the next available key that does not collide with the keys in the dictionary."""
        if key + suffix not in iterable:
            return key + suffix
        else:
            i = 0
            underscore = "_" if is_underscore else ""
            while "{}{}{}{}{}".format(key, underscore, midfix, i,
                                      suffix) in iterable:
                i += 1
            new_key = "{}{}{}{}{}".format(key, underscore, midfix, i, suffix)
            return new_key

    eq = parse_expr(str(math_expr))
    expr = eq

    #    # Get the numbers appearing in the expression
    #    is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
    #    eq_numbers = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)]
    #
    #    # Do zero snap one parameter at a time
    #    zero_snapped_expr = []
    #    for w in range(len(eq_numbers)):
    #        try:
    #            param_dict = {}
    #            unsnapped_param_dict = {'p':1}
    #            eq = unsnap_recur(expr,param_dict,unsnapped_param_dict)
    #            new_numbers = zeroSnap(eq_numbers,w+1)
    #            for kk in range(len(new_numbers)):
    #                eq_numbers[new_numbers[kk][0]] = new_numbers[kk][1]
    #            jj = 0
    #            for parm in unsnapped_param_dict:
    #                if parm!="p":
    #                    eq = eq.subs(parm, eq_numbers[jj])
    #                    jj = jj + 1
    #            zero_snapped_expr = zero_snapped_expr + [eq]
    #        except:
    #            continue

    # Get the numbers appearing in the expression
    is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
    eq_numbers = [
        subexpression for subexpression in preorder_traversal(expr)
        if is_atomic_number(subexpression)
    ]

    # Do integer snap one parameter at a time
    integer_snapped_expr = []
    for w in range(len(eq_numbers)):
        try:
            param_dict = {}
            unsnapped_param_dict = {'p': 1}
            eq = unsnap_recur(expr, param_dict, unsnapped_param_dict)
            del unsnapped_param_dict["p"]
            temp_unsnapped_param_dict = copy.deepcopy(unsnapped_param_dict)
            new_numbers = integerSnap(eq_numbers, w + 1)
            new_numbers = {"p" + str(k): v for k, v in new_numbers.items()}
            temp_unsnapped_param_dict.update(new_numbers)
            #for kk in range(len(new_numbers)):
            #    eq_numbers[new_numbers[kk][0]] = new_numbers[kk][1]
            new_eq = re.sub(r"(p\d*)", r"{\1}", str(eq))
            new_eq = new_eq.format_map(temp_unsnapped_param_dict)
            integer_snapped_expr = integer_snapped_expr + [parse_expr(new_eq)]
        except:
            continue

            # Get the numbers appearing in the expression

    is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
    eq_numbers = [
        subexpression for subexpression in preorder_traversal(expr)
        if is_atomic_number(subexpression)
    ]

    # Do rational snap one parameter at a time
    rational_snapped_expr = []
    for w in range(len(eq_numbers)):
        try:
            param_dict = {}
            unsnapped_param_dict = {'p': 1}
            eq = unsnap_recur(expr, param_dict, unsnapped_param_dict)
            del unsnapped_param_dict["p"]
            temp_unsnapped_param_dict = copy.deepcopy(unsnapped_param_dict)
            new_numbers = rationalSnap(eq_numbers, w + 1)
            new_numbers = {"p" + str(k): v for k, v in new_numbers.items()}
            temp_unsnapped_param_dict.update(new_numbers)
            #for kk in range(len(new_numbers)):
            #    eq_numbers_snap[new_numbers[kk][0]] = new_numbers[kk][1][1:3]
            new_eq = re.sub(r"(p\d*)", r"{\1}", str(eq))
            new_eq = new_eq.format_map(temp_unsnapped_param_dict)
            rational_snapped_expr = rational_snapped_expr + [
                parse_expr(new_eq)
            ]
        except:
            continue

    snapped_expr = np.append(integer_snapped_expr, rational_snapped_expr)
    #    snapped_expr = np.append(snapped_expr,rational_snapped_expr)

    integer_snapped_expr = snapped_expr

    for i in range(len(snapped_expr)):
        try:
            # Calculate the error of the new, snapped expression
            snapped_error = get_symbolic_expr_error(input_data,
                                                    str(snapped_expr[i]))
            # Calculate the complexity of the new, snapped expression
            expr = snapped_expr[i]
            for s in (expr.free_symbols):
                s = symbols(str(s), real=True)
            expr = parse_expr(str(snapped_expr[i]), locals())
            expr = intify(expr)
            is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
            numbers_expr = [
                subexpression for subexpression in preorder_traversal(expr)
                if is_atomic_number(subexpression)
            ]
            snapped_complexity = 0
            for j in numbers_expr:
                snapped_complexity = snapped_complexity + get_number_DL_snapped(
                    float(j))
            # Add the complexity due to symbols
            n_variables = len(expr.free_symbols)
            n_operations = len(count_ops(expr, visual=True).free_symbols)
            if n_operations != 0 or n_variables != 0:
                snapped_complexity = snapped_complexity + (
                    n_variables + n_operations) * np.log2(
                        (n_variables + n_operations))
            PA.add(Point(x=snapped_complexity, y=snapped_error,
                         data=str(expr)))
        except:
            continue

    return (PA)
Beispiel #3
0
def add_bf_on_numbers_on_pareto(pathdir, filename, PA, math_expr):
    def unsnap_recur(expr, param_dict, unsnapped_param_dict):
        """Recursively transform each numerical value into a learnable parameter."""
        import sympy
        from sympy import Symbol
        if isinstance(expr, sympy.numbers.Float) or isinstance(
                expr, sympy.numbers.Integer) or isinstance(
                    expr, sympy.numbers.Rational) or isinstance(
                        expr, sympy.numbers.Pi):
            used_param_names = list(
                param_dict.keys()) + list(unsnapped_param_dict)
            unsnapped_param_name = get_next_available_key(used_param_names,
                                                          "p",
                                                          is_underscore=False)
            unsnapped_param_dict[unsnapped_param_name] = float(expr)
            unsnapped_expr = Symbol(unsnapped_param_name)
            return unsnapped_expr
        elif isinstance(expr, sympy.symbol.Symbol):
            return expr
        else:
            unsnapped_sub_expr_list = []
            for sub_expr in expr.args:
                unsnapped_sub_expr = unsnap_recur(sub_expr, param_dict,
                                                  unsnapped_param_dict)
                unsnapped_sub_expr_list.append(unsnapped_sub_expr)
            return expr.func(*unsnapped_sub_expr_list)

    def get_next_available_key(iterable,
                               key,
                               midfix="",
                               suffix="",
                               is_underscore=True):
        """Get the next available key that does not collide with the keys in the dictionary."""
        if key + suffix not in iterable:
            return key + suffix
        else:
            i = 0
            underscore = "_" if is_underscore else ""
            while "{}{}{}{}{}".format(key, underscore, midfix, i,
                                      suffix) in iterable:
                i += 1
            new_key = "{}{}{}{}{}".format(key, underscore, midfix, i, suffix)
            return new_key

    eq = parse_expr(str(math_expr))
    expr = eq
    # Get the numbers appearing in the expression
    is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
    eq_numbers = [
        subexpression for subexpression in preorder_traversal(expr)
        if is_atomic_number(subexpression)
    ]
    # Do bf on one parameter at a time
    bf_on_numbers_expr = []
    for w in range(len(eq_numbers)):
        param_dict = {}
        unsnapped_param_dict = {'p': 1}
        eq_ = unsnap_recur(expr, param_dict, unsnapped_param_dict)
        eq = eq_

        np.savetxt(pathdir + "number_for_bf_%s.txt" % w, [eq_numbers[w]])
        brute_force_number(pathdir, "number_for_bf_%s.txt" % w)
        # Load the predictions made by the bf code
        bf_numbers = np.loadtxt("results.dat", usecols=(1, ), dtype="str")
        new_numbers = copy.deepcopy(eq_numbers)

        # replace the number under consideration by all the proposed bf numbers
        for kk in range(len(bf_numbers)):
            eq = eq_
            new_numbers[w] = parse_expr(RPN_to_eq(bf_numbers[kk]))

            jj = 0
            for parm in unsnapped_param_dict:
                if parm != "p":
                    eq = eq.subs(parm, new_numbers[jj])
                    jj = jj + 1

            bf_on_numbers_expr = bf_on_numbers_expr + [eq]
    for i in range(len(bf_on_numbers_expr)):
        try:
            # Calculate the error of the new, snapped expression
            snapped_error = get_symbolic_expr_error(pathdir, filename,
                                                    str(bf_on_numbers_expr[i]))
            # Calculate the complexity of the new, snapped expression
            expr = simplify(powsimp(bf_on_numbers_expr[i]))
            is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
            numbers_expr = [
                subexpression for subexpression in preorder_traversal(expr)
                if is_atomic_number(subexpression)
            ]

            snapped_complexity = 0
            for j in numbers_expr:
                snapped_complexity = snapped_complexity + get_number_DL_snapped(
                    float(j))
            # Add the complexity due to symbols
            n_variables = len(expr.free_symbols)
            n_operations = len(count_ops(expr, visual=True).free_symbols)
            if n_operations != 0 or n_variables != 0:
                snapped_complexity = snapped_complexity + (
                    n_variables + n_operations) * np.log2(
                        (n_variables + n_operations))

            PA.add(Point(x=snapped_complexity, y=snapped_error,
                         data=str(expr)))
        except:
            continue

    return (PA)
def add_snap_expr_on_pareto_polyfit(pathdir, filename, math_expr, PA): 
    def unsnap_recur(expr, param_dict, unsnapped_param_dict):
        """Recursively transform each numerical value into a learnable parameter."""
        import sympy
        from sympy import Symbol
        if isinstance(expr, sympy.numbers.Float) or isinstance(expr, sympy.numbers.Integer) or isinstance(expr, sympy.numbers.Rational) or isinstance(expr, sympy.numbers.Pi):
            used_param_names = list(param_dict.keys()) + list(unsnapped_param_dict)
            unsnapped_param_name = get_next_available_key(used_param_names, "p", is_underscore=False)
            unsnapped_param_dict[unsnapped_param_name] = float(expr)
            unsnapped_expr = Symbol(unsnapped_param_name)
            return unsnapped_expr
        elif isinstance(expr, sympy.symbol.Symbol):
            return expr
        else:
            unsnapped_sub_expr_list = []
            for sub_expr in expr.args:
                unsnapped_sub_expr = unsnap_recur(sub_expr, param_dict, unsnapped_param_dict)
                unsnapped_sub_expr_list.append(unsnapped_sub_expr)
            return expr.func(*unsnapped_sub_expr_list)


    def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=True):
        """Get the next available key that does not collide with the keys in the dictionary."""
        if key + suffix not in iterable:
            return key + suffix
        else:
            i = 0
            underscore = "_" if is_underscore else ""
            while "{}{}{}{}{}".format(key, underscore, midfix, i, suffix) in iterable:
                i += 1
            new_key = "{}{}{}{}{}".format(key, underscore, midfix, i, suffix)
            return new_key

    eq = parse_expr(str(math_expr))
    expr = eq
    
    # Get the numbers appearing in the expression
    is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
    eq_numbers = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)]
       
    # Do zero snap one parameter at a time
    zero_snapped_expr = []
    for w in range(len(eq_numbers)):
        try:
            param_dict = {}
            unsnapped_param_dict = {'p':1}
            eq = unsnap_recur(expr,param_dict,unsnapped_param_dict)
            new_numbers = zeroSnap(eq_numbers,w+1)
            for kk in range(len(new_numbers)):
                eq_numbers[new_numbers[kk][0]] = new_numbers[kk][1]
            jj = 0
            for parm in unsnapped_param_dict:
                if parm!="p":
                    eq = eq.subs(parm, eq_numbers[jj])
                    jj = jj + 1
            zero_snapped_expr = zero_snapped_expr + [eq]
        except:
            continue

    for i in range(len(zero_snapped_expr)):
        try:
        
            # Calculate the error of the new, snapped expression
            snapped_error = get_symbolic_expr_error(pathdir,filename,str(zero_snapped_expr[i]))
            # Calculate the complexity of the new, snapped expression
            expr = simplify(powsimp(zero_snapped_expr[i]))
            for s in (expr.free_symbols):
                s = symbols(str(s), real = True)
            expr =  simplify(parse_expr(str(zero_snapped_expr[i]),locals()))
            expr = intify(expr)

            is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
            numbers_expr = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)]

            snapped_complexity = 0
            for j in numbers_expr:
                snapped_complexity = snapped_complexity + get_number_DL_snapped(float(j))

            # Add the complexity due to symbols
            n_variables = len(expr.free_symbols)
            n_operations = len(count_ops(expr,visual=True).free_symbols)
            if n_operations!=0 or n_variables!=0:
                snapped_complexity = snapped_complexity + (n_variables+n_operations)*np.log2((n_variables+n_operations))

            PA.add(Point(x=snapped_complexity, y=snapped_error, data=str(expr)))
        except:
            print("error")
            print("")
            continue
    return(PA)
        
        
        
            
def add_snap_expr_on_pareto(pathdir, filename, math_expr, PA, DR_file=""):
    def unsnap_recur(expr, param_dict, unsnapped_param_dict):
        """Recursively transform each numerical value into a learnable parameter."""
        import sympy
        from sympy import Symbol
        if isinstance(expr, sympy.numbers.Float) or isinstance(
                expr, sympy.numbers.Integer) or isinstance(
                    expr, sympy.numbers.Rational) or isinstance(
                        expr, sympy.numbers.Pi):
            used_param_names = list(
                param_dict.keys()) + list(unsnapped_param_dict)
            unsnapped_param_name = get_next_available_key(used_param_names,
                                                          "p",
                                                          is_underscore=False)
            unsnapped_param_dict[unsnapped_param_name] = float(expr)
            unsnapped_expr = Symbol(unsnapped_param_name)
            return unsnapped_expr
        elif isinstance(expr, sympy.symbol.Symbol):
            return expr
        else:
            unsnapped_sub_expr_list = []
            for sub_expr in expr.args:
                unsnapped_sub_expr = unsnap_recur(sub_expr, param_dict,
                                                  unsnapped_param_dict)
                unsnapped_sub_expr_list.append(unsnapped_sub_expr)
            return expr.func(*unsnapped_sub_expr_list)

    def get_next_available_key(iterable,
                               key,
                               midfix="",
                               suffix="",
                               is_underscore=True):
        """Get the next available key that does not collide with the keys in the dictionary."""
        if key + suffix not in iterable:
            return key + suffix
        else:
            i = 0
            underscore = "_" if is_underscore else ""
            while "{}{}{}{}{}".format(key, underscore, midfix, i,
                                      suffix) in iterable:
                i += 1
            new_key = "{}{}{}{}{}".format(key, underscore, midfix, i, suffix)
            return new_key

    eq = parse_expr(str(math_expr))
    expr = eq

    # Get the numbers appearing in the expression
    is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
    eq_numbers = [
        subexpression for subexpression in preorder_traversal(expr)
        if is_atomic_number(subexpression)
    ]

    # Do integer snap one parameter at a time
    integer_snapped_expr = []
    for w in range(len(eq_numbers)):
        try:
            param_dict = {}
            unsnapped_param_dict = {'p': 1}
            eq = unsnap_recur(expr, param_dict, unsnapped_param_dict)
            new_numbers = integerSnap(eq_numbers, w + 1)
            for kk in range(len(new_numbers)):
                eq_numbers[new_numbers[kk][0]] = new_numbers[kk][1]
            jj = 0
            for parm in unsnapped_param_dict:
                if parm != "p":
                    eq = eq.subs(parm, eq_numbers[jj])
                    jj = jj + 1
            integer_snapped_expr = integer_snapped_expr + [eq]
        except:
            continue

#    # Get the numbers appearing in the expression
#    is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
#    eq_numbers = [subexpression for subexpression in preorder_traversal(expr) if is_atomic_number(subexpression)]
#
#    # Do zero snap one parameter at a time
#    zero_snapped_expr = []
#    for w in range(len(eq_numbers)):
#        try:
#            param_dict = {}
#            unsnapped_param_dict = {'p':1}
#            eq = unsnap_recur(expr,param_dict,unsnapped_param_dict)
#            new_numbers = zeroSnap(eq_numbers,w+1)
#            for kk in range(len(new_numbers)):
#                eq_numbers[new_numbers[kk][0]] = new_numbers[kk][1]
#            jj = 0
#            for parm in unsnapped_param_dict:
#                if parm!="p":
#                    eq = eq.subs(parm, eq_numbers[jj])
#                    jj = jj + 1
#            zero_snapped_expr = zero_snapped_expr + [eq]
#        except:
#            continue

# Get the numbers appearing in the expression
    is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
    eq_numbers = [
        subexpression for subexpression in preorder_traversal(expr)
        if is_atomic_number(subexpression)
    ]

    # Do rational snap one parameter at a time
    rational_snapped_expr = []
    for w in range(len(eq_numbers)):
        try:
            eq_numbers_snap = copy.deepcopy(eq_numbers)
            param_dict = {}
            unsnapped_param_dict = {'p': 1}
            eq = unsnap_recur(expr, param_dict, unsnapped_param_dict)
            new_numbers = rationalSnap(eq_numbers, w + 1)
            for kk in range(len(new_numbers)):
                eq_numbers_snap[new_numbers[kk][0]] = new_numbers[kk][1][1:3]
            jj = 0
            for parm in unsnapped_param_dict:
                if parm != "p":

                    try:
                        eq = eq.subs(
                            parm,
                            Rational(eq_numbers_snap[jj][0],
                                     eq_numbers_snap[jj][1]))
                    except:
                        eq = eq.subs(parm, eq_numbers_snap[jj])
                    jj = jj + 1
            rational_snapped_expr = rational_snapped_expr + [eq]
        except:
            continue

    snapped_expr = np.append(integer_snapped_expr, rational_snapped_expr)
    #    snapped_expr = np.append(snapped_expr,rational_snapped_expr)

    for i in range(len(snapped_expr)):
        try:
            # Calculate the error of the new, snapped expression
            snapped_error = get_symbolic_expr_error(pathdir, filename,
                                                    str(snapped_expr[i]))
            # Calculate the complexity of the new, snapped expression
            expr = simplify(powsimp(snapped_expr[i]))
            for s in (expr.free_symbols):
                s = symbols(str(s), real=True)
            expr = simplify(parse_expr(str(snapped_expr[i]), locals()))
            #print("expr 0", expr)
            expr = intify(expr)
            is_atomic_number = lambda expr: expr.is_Atom and expr.is_number
            numbers_expr = [
                subexpression for subexpression in preorder_traversal(expr)
                if is_atomic_number(subexpression)
            ]

            if DR_file == "":
                snapped_complexity = 0
                for j in numbers_expr:
                    snapped_complexity = snapped_complexity + get_number_DL_snapped(
                        float(j))

                n_variables = len(expr.free_symbols)
                n_operations = len(count_ops(expr, visual=True).free_symbols)
                if n_operations != 0 or n_variables != 0:
                    snapped_complexity = snapped_complexity + (
                        n_variables + n_operations) * np.log2(
                            (n_variables + n_operations))

            # If a bf file is provided, replace the variables with the actual ones before calculating the complexity
            else:
                dr_data = np.loadtxt(DR_file, dtype="str", delimiter=",")

                expr = str(expr)
                old_vars = ["x%s" % k for k in range(len(dr_data) - 3)]
                for i_dr in range(len(old_vars)):
                    expr = expr.replace(old_vars[i_dr],
                                        "(" + dr_data[i_dr + 2] + ")")
                expr = "(" + dr_data[1] + ")*(" + expr + ")"

                expr = parse_expr(expr)
                for s in (expr.free_symbols):
                    s = symbols(str(s), real=True)
                expr = simplify(parse_expr(str(expr), locals()))
                #print("expr 1", expr)
                #expr = intify(expr)
                #print("expr 2", expr)
                snapped_complexity = 0
                for j in numbers_expr:
                    snapped_complexity = snapped_complexity + get_number_DL_snapped(
                        float(j))

                n_variables = len(expr.free_symbols)
                n_operations = len(count_ops(expr, visual=True).free_symbols)
                if n_operations != 0 or n_variables != 0:
                    snapped_complexity = snapped_complexity + (
                        n_variables + n_operations) * np.log2(
                            (n_variables + n_operations))

            PA.add(Point(x=snapped_complexity, y=snapped_error,
                         data=str(expr)))
        except:
            continue
    return (PA)
Beispiel #6
0
def run_aifeynman(pathdir,
                  filename,
                  BF_try_time,
                  BF_ops_file_type,
                  polyfit_deg=4,
                  NN_epochs=4000,
                  vars_name=[],
                  test_percentage=20):
    # If the variable names are passed, do the dimensional analysis first
    filename_orig = filename
    try:
        if vars_name != []:
            dimensionalAnalysis(pathdir, filename, vars_name)
            DR_file = filename + "_dim_red_variables.txt"
            filename = filename + "_dim_red"
        else:
            DR_file = ""
    except:
        DR_file = ""

    # Split the data into train and test set
    input_data = np.loadtxt(pathdir + filename)
    sep_idx = np.random.permutation(len(input_data))

    train_data = input_data[sep_idx[0:(100 - test_percentage) *
                                    len(input_data) // 100]]
    test_data = input_data[sep_idx[test_percentage * len(input_data) //
                                   100:len(input_data)]]

    np.savetxt(pathdir + filename + "_train", train_data)
    if test_data.size != 0:
        np.savetxt(pathdir + filename + "_test", test_data)

    # Run the code on the train data
    PA = run_AI_all(pathdir, filename + "_train", BF_try_time,
                    BF_ops_file_type, polyfit_deg, NN_epochs)
    PA_list = PA.get_pareto_points()

    # Run bf snap on the resulted equations
    for i in range(len(PA_list)):
        try:
            PA = add_bf_on_numbers_on_pareto(pathdir, filename, PA,
                                             PA_list[i][-1])
        except:
            continue
    PA_list = PA.get_pareto_points()

    np.savetxt("results/solution_before_snap_%s.txt" % filename,
               PA_list,
               fmt="%s")

    # Run zero, integer and rational snap on the resulted equations
    PA_snapped_1 = ParetoSet()
    for j in range(len(PA_list)):
        PA_snapped_1 = add_snap_expr_on_pareto(pathdir, filename,
                                               PA_list[j][-1], PA_snapped_1,
                                               "")

    PA_list = PA_snapped_1.get_pareto_points()
    np.savetxt("results/solution_first_snap_%s.txt" % filename,
               PA_list,
               fmt="%s")

    # Run gradient descent on the data one more time
    for i in range(len(PA_list)):
        try:
            gd_update = final_gd(pathdir + filename, PA_list[i][-1])
            PA_snapped_1.add(
                Point(x=gd_update[1], y=gd_update[0], data=gd_update[2]))
        except:
            continue

    PA_list = PA_snapped_1.get_pareto_points()

    PA_snapped = ParetoSet()
    for j in range(len(PA_list)):
        PA_snapped = add_snap_expr_on_pareto(pathdir, filename, PA_list[j][-1],
                                             PA_snapped, DR_file)

    list_dt = np.array(PA_snapped.get_pareto_points())
    data_file_len = len(np.loadtxt(pathdir + filename))
    log_err = []
    log_err_all = []
    for i in range(len(list_dt)):
        log_err = log_err + [np.log2(float(list_dt[i][1]))]
        log_err_all = log_err_all + [
            data_file_len * np.log2(float(list_dt[i][1]))
        ]
    log_err = np.array(log_err)
    log_err_all = np.array(log_err_all)

    # Try the found expressions on the test data
    if DR_file == "" and test_data.size != 0:
        test_errors = []
        for i in range(len(list_dt)):
            test_errors = test_errors + [
                get_symbolic_expr_error(pathdir, filename + "_test",
                                        str(list_dt[i][-1]))
            ]
        test_errors = np.array(test_errors)
        # Save all the data to file
        save_data = np.column_stack(
            (test_errors, log_err, log_err_all, list_dt))
    else:
        save_data = np.column_stack((log_err, log_err_all, list_dt))
    np.savetxt("results/solution_%s" % filename_orig, save_data, fmt="%s")