def test_multioutput_weighted_with_callable_temp_equation(self): y = self.X[:, [0, 1]]**2 w = np.random.rand(*y.shape) w[w < 0.5] = 0.0 w[w >= 0.5] = 1.0 # Double equation when weights are 0: y = (2 - w) * y # Thus, pysr needs to use the weights to find the right equation! pysr( self.X, y, weights=w, unary_operators=["sq(x) = x^2"], binary_operators=["plus"], extra_sympy_mappings={"sq": lambda x: x**2}, **self.default_test_kwargs, procs=0, temp_equation_file=True, delete_tempfiles=False, ) np.testing.assert_almost_equal(best_callable()[0](self.X), self.X[:, 0]**2, decimal=4) np.testing.assert_almost_equal(best_callable()[1](self.X), self.X[:, 1]**2, decimal=4)
def test_multiprocessing(self): y = self.X[:, 0] equations = pysr(self.X, y, **self.default_test_kwargs, procs=2, multithreading=False) print(equations) self.assertLessEqual(equations.iloc[-1]["MSE"], 1e-4)
def test_empty_operators_single_input(self): X = np.random.randn(100, 1) y = X[:, 0] + 3.0 equations = pysr( X, y, unary_operators=[], binary_operators=["plus"], **self.default_test_kwargs, ) self.assertLessEqual(equations.iloc[-1]["MSE"], 1e-4)
def test_multioutput_custom_operator(self): y = self.X[:, [0, 1]]**2 equations = pysr( self.X, y, unary_operators=["sq(x) = x^2"], binary_operators=["plus"], extra_sympy_mappings={"sq": lambda x: x**2}, **self.default_test_kwargs, procs=0, ) print(equations) self.assertLessEqual(equations[0].iloc[-1]["MSE"], 1e-4) self.assertLessEqual(equations[1].iloc[-1]["MSE"], 1e-4)
def test_noisy(self): np.random.seed(1) y = self.X[:, [0, 1]]**2 + np.random.randn(self.X.shape[0], 1) * 0.05 equations = pysr( self.X, y, # Test that passing a single operator works: unary_operators="sq(x) = x^2", binary_operators="plus", extra_sympy_mappings={"sq": lambda x: x**2}, **self.default_test_kwargs, procs=0, denoise=True, ) self.assertLessEqual(best_row(equations=equations)[0]["MSE"], 1e-2) self.assertLessEqual(best_row(equations=equations)[1]["MSE"], 1e-2)
def test_pandas_resample(self): np.random.seed(1) X = pd.DataFrame({ "T": np.random.randn(500), "x": np.random.randn(500), "unused_feature": np.random.randn(500), }) true_fn = lambda x: np.array(x["T"] + x["x"]**2 + 1.323837) y = true_fn(X) noise = np.random.randn(500) * 0.01 y = y + noise # Resampled array is a different order of features: Xresampled = pd.DataFrame({ "unused_feature": np.random.randn(100), "x": np.random.randn(100), "T": np.random.randn(100), }) equations = pysr( X, y, unary_operators=[], binary_operators=["+", "*", "/", "-"], **self.default_test_kwargs, Xresampled=Xresampled, denoise=True, select_k_features=2, ) self.assertNotIn("unused_feature", best_tex()) self.assertIn("T", best_tex()) self.assertIn("x", best_tex()) self.assertLessEqual(equations.iloc[-1]["MSE"], 1e-2) fn = best_callable() self.assertListEqual(list(sorted(fn._selection)), [0, 1]) X2 = pd.DataFrame({ "T": np.random.randn(100), "unused_feature": np.random.randn(100), "x": np.random.randn(100), }) self.assertLess(np.average((fn(X2) - true_fn(X2))**2), 1e-2)
import numpy as np from pysr import pysr, best # Dataset X = 2 * np.random.randn(100, 5) y = 2 * np.cos(X[:, 3]) + X[:, 0]**2 - 2 # Learn equations equations = pysr( X, y, niterations=5, binary_operators=["plus", "mult"], unary_operators=[ "cos", "exp", "sin", # Pre-defined library of operators (see https://pysr.readthedocs.io/en/latest/docs/operators/) "inv(x) = 1/x", ], loss="loss(x, y) = abs(x - y)", # Custom loss function ) # Define your own operator! (Julia syntax) ... # (you can use ctl-c to exit early) print(best(equations))
def run_trial(args): """Evaluate the model loss using the hyperparams in args :args: A dictionary containing all hyperparameters :returns: Dict with status and loss from cross-validation """ print("Running on", args) for key in 'niterations npop'.split(' '): args[key] = int(args[key]) total_steps = 10*100*1000 niterations = args['niterations'] npop = args['npop'] if niterations == 0 or npop == 0: print("Bad parameters") return {'status': 'ok', 'loss': np.inf} args['ncyclesperiteration'] = int(total_steps / (niterations * npop)) args['topn'] = 10 args['parsimony'] = 1e-3 args['annealing'] = True if args['npop'] < 20 or args['ncyclesperiteration'] < 3: print("Bad parameters") return {'status': 'ok', 'loss': np.inf} args['weightDoNothing'] = 1.0 maxTime = 30 ntrials = 2 equation_file = f'.hall_of_fame_{np.random.rand():f}.csv' with temp_seed(0): X = np.random.randn(100, 5)*3 eval_str = ["np.sign(X[:, 2])*np.abs(X[:, 2])**2.5 + 5*np.cos(X[:, 3]) - 5", "np.sign(X[:, 2])*np.abs(X[:, 2])**3.5 + 1/(np.abs(X[:, 0])+1)", "np.exp(X[:, 0]/2) + 12.0 + np.log(np.abs(X[:, 0])*10 + 1)", "1.0 + 3*X[:, 0]**2 - 0.5*X[:, 0]**3 + 0.1*X[:, 0]**4", "(np.exp(X[:, 3]) + 3)/(np.abs(X[:, 1]) + np.cos(X[:, 0]) + 1.1)"] print(f"Starting", str(args)) try: trials = [] for i in range(3, 6): print(f"Starting test {i}") for j in range(ntrials): print(f"Starting trial {j}") trial = pysr.pysr( test=f"simple{i}", threads=4, binary_operators=["plus", "mult", "pow", "div"], unary_operators=["cos", "exp", "sin", "loga", "abs"], equation_file=equation_file, timeout=maxTime, maxsize=25, verbosity=0, **args) if len(trial) == 0: raise ValueError trials.append( np.min(trial['MSE'])**0.5 / np.std(eval(eval_str[i-1])) ) print(f"Test {i} trial {j} with", str(args), f"got {trials[-1]}") except ValueError: print(f"Broken", str(args)) return { 'status': 'ok', # or 'fail' if nan loss 'loss': np.inf } loss = np.average(trials) print(f"Finished with {loss}", str(args)) return { 'status': 'ok', # or 'fail' if nan loss 'loss': loss }
import numpy as np from pysr import pysr import sympy X = np.random.randn(100, 5) print("Test 1 - defaults; simple linear relation") y = X[:, 0] equations = pysr(X, y, niterations=10, user_input=False) print(equations) assert equations.iloc[-1]['MSE'] < 1e-4 print("Test 2 - test custom operator") y = X[:, 0]**2 equations = pysr(X, y, unary_operators=["sq(x) = x^2"], binary_operators=["plus"], extra_sympy_mappings={'square': lambda x: x**2}, niterations=10, user_input=False) print(equations) assert equations.iloc[-1]['MSE'] < 1e-4 X = np.random.randn(100, 1) y = X[:, 0] + 3.0 print("Test 3 - empty operator list, and single dimension input") equations = pysr(X, y, unary_operators=[], binary_operators=["plus"], niterations=10, user_input=False) print(equations)
def test_linear_relation(self): y = self.X[:, 0] equations = pysr(self.X, y, **self.default_test_kwargs) print(equations) self.assertLessEqual(equations.iloc[-1]["MSE"], 1e-4)
def run_trial(args): """Evaluate the model loss using the hyperparams in args :args: A dictionary containing all hyperparameters :returns: Dict with status and loss from cross-validation """ print("Running on", args) args['niterations'] = 100 args['npop'] = 100 args['ncyclesperiteration'] = 1000 args['topn'] = 10 args['parsimony'] = 0.0 args['useFrequency'] = True args['annealing'] = True if args['npop'] < 20 or args['ncyclesperiteration'] < 3: print("Bad parameters") return {'status': 'ok', 'loss': np.inf} args['weightDoNothing'] = 1.0 ntrials = 3 with temp_seed(0): X = np.random.randn(100, 10) * 3 eval_str = [ "np.sign(X[:, 2])*np.abs(X[:, 2])**2.5 + 5*np.cos(X[:, 3]) - 5", "np.exp(X[:, 0]/2) + 12.0 + np.log(np.abs(X[:, 0])*10 + 1)", "(np.exp(X[:, 3]) + 3)/(np.abs(X[:, 1]) + np.cos(X[:, 0]) + 1.1)", "X[:, 0] * np.sin(2*np.pi * (X[:, 1] * X[:, 2] - X[:, 3] / X[:, 4])) + 3.0" ] print(f"Starting", str(args)) try: trials = [] for i in range(len(eval_str)): print(f"Starting test {i}") for j in range(ntrials): print(f"Starting trial {j}") y = eval(eval_str[i]) trial = pysr.pysr( X, y, procs=4, populations=20, binary_operators=["plus", "mult", "pow", "div"], unary_operators=["cos", "exp", "sin", "logm", "abs"], maxsize=25, constraints={'pow': (-1, 1)}, **args) if len(trial) == 0: raise ValueError trials.append( np.min(trial['MSE'])**0.5 / np.std(eval(eval_str[i - 1]))) print(f"Test {i} trial {j} with", str(args), f"got {trials[-1]}") except ValueError: print(f"Broken", str(args)) return { 'status': 'ok', # or 'fail' if nan loss 'loss': np.inf } loss = np.average(trials) print(f"Finished with {loss}", str(args)) return { 'status': 'ok', # or 'fail' if nan loss 'loss': loss }
import numpy as np from pysr import pysr, best # Dataset X = 2 * np.random.randn(100, 5) y = 2 * np.cos(X[:, 3]) + X[:, 0]**2 - 2 # Learn equations equations = pysr( X, y, niterations=5, binary_operators=["plus", "mult"], unary_operators=[ "cos", "exp", "sin", #Pre-defined library of operators (see https://pysr.readthedocs.io/en/latest/docs/operators/) "inv(x) = 1/x" ], loss='loss(x, y) = abs(x - y)', # Custom loss function julia_project="../SymbolicRegression.jl" ) # Define your own operator! (Julia syntax) ... # (you can use ctl-c to exit early) print(best(equations))