def setUp(self): self.domain = Domain(lower_limits_dict={ "x": 0, "y": 0 }, upper_limits_dict={ "x": 5, "y": 6 }, step_width_dict={ "x": 1, "y": 1 }) self.f = np.zeros( (self.domain.get_shape("x")["x"], self.domain.get_shape("y")["y"])) self.g = np.zeros( (self.domain.get_shape("x")["x"], self.domain.get_shape("y")["y"])) for i, x in enumerate(self.domain.get_range("x")["x"]): for j, y in enumerate(self.domain.get_range("y")["y"]): self.f[i, j] = x**y self.g[i, j] = self.f[i, j] self.v = Variable(self.f, self.domain, domain2axis={ "x": 0, "y": 1 }, variable_name="v") self.w = Variable(self.g, self.domain, domain2axis={ "x": 0, "y": 1 }, variable_name="w") self.x = Variable(self.domain.get_range("x")["x"], self.domain.get_subdomain("x"), domain2axis={"x": 0}, variable_name="x") self.y = Variable(self.domain.get_range("y")["y"], self.domain.get_subdomain("y"), domain2axis={"y": 0}, variable_name="y") self.sym_v = SymVariable( *SymVariable.get_init_info_from_variable(self.v)) self.sym_w = SymVariable( *SymVariable.get_init_info_from_variable(self.w)) self.sym_x = SymVariable( *SymVariable.get_init_info_from_variable(self.x))
def setUp(self): self.domain = Domain(lower_limits_dict={"x": -10}, upper_limits_dict={"x": 10}, step_width_dict={"x": 0.001}) self.v = Variable(np.sin(self.domain.get_range("x")["x"]), self.domain, domain2axis={"x": 0}, variable_name="v") self.x = Variable(self.domain.get_range("x")["x"], self.domain, domain2axis={"x": 0}, variable_name="x") self.sym_v = SymVariable( *SymVariable.get_init_info_from_variable(self.v))
def prepare_data(self, X): # ---------- Prepare data ---------- # no time is defined in input so invents dt=1 and starting from 0 domain = Domain(lower_limits_dict={"t": X.index.min()}, upper_limits_dict={"t": X.index.max()}, step_width_dict={"t": np.diff(X.index)[0]}) # define variables X = pd.DataFrame(X) variables = [ Variable(X[series_name].values.ravel(), domain, domain2axis={"t": 0}, variable_name=series_name) for i, series_name in enumerate(X.columns) ] self.data_manager.add_variables(variables) self.data_manager.add_regressors(self.get_regressors( domain, variables)) self.data_manager.set_domain() self.data_manager.set_X_operator(self.get_x_operator_func()) self.data_manager.set_y_operator(self.get_y_operator_func()) self.var_names = [ var.get_full_name() for var in self.data_manager.field.data ]
def get_test_time(self, data_manager, type='Variable'): t = Variable(data_manager.domain.get_range('t')['t'], data_manager.domain, domain2axis={'t': 0}, variable_name='t') t = self.testSplit * t if type == 'Variable': return t elif type == 'numpy': return np.array(t.data) else: raise Exception('Not implemented return type: only Variable and numpy')
def get_variables(self): domain = self.get_domain() return [ Variable(self.data, domain, domain2axis={ "t": 0, "x": 1 }, variable_name="u") ]
def get_regressors(self): # TODO: only works with time variable regressor reggressors = [] domain = self.get_domain() variables = self.get_variables() for reg_builder in self.regressors_builders: for variable in variables: reg_builder.fit(self.trainSplit * variable.domain, self.trainSplit * variable) serie = reg_builder.transform( domain.get_range(axis_names=[reg_builder.domain_axes_name])[reg_builder.domain_axes_name]) reggressors.append(Variable(serie, domain, domain2axis={reg_builder.domain_axes_name: 0}, variable_name='{}_{}'.format(variable.get_name(), reg_builder.name))) return reggressors
def var_operator_func(self, var): """ :type var: Variable """ # TODO: roll should put nan or something in the border return Variable(data=np.roll(var.data, shift=self.delay, axis=var.get_axis(self.axis_name)), domain=var.domain, domain2axis=var.domain2axis, variable_name=var.name.subs( sympy.Symbol(self.axis_name), sympy.Symbol(self.axis_name) - self.delay))
def difference(self, var): """ :type var: Variable :param var: :return: """ # axis = var.get_axis(self.axis_name) # np.take(np.diff(var.data, axis=axis), np.arange(var.data.shape[axis] + 1), axis=axis, mode='clip') # diff = np.diff(var.data, axis=axis) # diff = np.concatenate([diff, np.take(diff, [-1], axis=axis)], axis=axis) # return Variable(data=diff, # domain=var.domain, # domain2axis=var.domain2axis, # variable_name=var.name.diff(sympy.Symbol(self.axis_name), 1)) # TODO: it is not the derivative in the name, but for now is easier this way # return Variable( data=np.gradient(var.data, axis=var.get_axis(self.axis_name)), domain=var.domain, domain2axis=var.domain2axis, variable_name=var.name.diff(sympy.Symbol(self.axis_name), 1) * (var.domain.step_width[self.axis_name]))
class TestVariables(unittest.TestCase): def setUp(self): self.domain = Domain(lower_limits_dict={ "x": 0, "y": 0 }, upper_limits_dict={ "x": 5, "y": 6 }, step_width_dict={ "x": 1, "y": 1 }) self.f = np.zeros( (self.domain.get_shape("x")["x"], self.domain.get_shape("y")["y"])) self.g = np.zeros( (self.domain.get_shape("x")["x"], self.domain.get_shape("y")["y"])) for i, x in enumerate(self.domain.get_range("x")["x"]): for j, y in enumerate(self.domain.get_range("y")["y"]): self.f[i, j] = x**y self.g[i, j] = self.f[i, j] self.v = Variable(self.f, self.domain, domain2axis={ "x": 0, "y": 1 }, variable_name="v") self.w = Variable(self.g, self.domain, domain2axis={ "x": 0, "y": 1 }, variable_name="w") self.x = Variable(self.domain.get_range("x")["x"], self.domain.get_subdomain("x"), domain2axis={"x": 0}, variable_name="x") self.y = Variable(self.domain.get_range("y")["y"], self.domain.get_subdomain("y"), domain2axis={"y": 0}, variable_name="y") def test_Variable(self): assert self.v.domain.shape["x"] == self.domain.shape["x"] assert self.v.domain.shape["y"] == self.domain.shape["y"] def test_Variable_mul_1(self): a = self.x * self.y a.reorder_axis(self.domain) a.reorder_axis({"x": 0, "y": 1}) mg = [[dx * dy for dy in self.y.data] for dx in self.x.data] assert np.all(a.data == mg) def test_Variable_mul_2(self): a = (self.v * self.x) a.reorder_axis(self.domain) b = (self.x * self.v) b.reorder_axis(self.domain) assert np.all(a.data == b.data) def test_add(self): a = (self.v + self.w) a.reorder_axis(self.domain) self.v.reorder_axis(self.domain) self.w.reorder_axis(self.domain) assert np.all(a.data == (self.v.data + self.w.data)) assert np.all((self.v + 2).data == self.v.data + 2) def test_sub(self): a = (self.v - self.w) a.reorder_axis(self.domain) assert np.all(a.data == 0) assert np.all((self.v - 2).data == self.v.data - 2) def test_eval(self): assert self.v.eval({"x": 0, "y": 0}) == 1 def test_index_eval(self): assert self.v.index_eval({"x": 0, "y": 0}) == 1 assert self.v.index_eval({"x": -1, "y": -1}) == 4**5 def test_pow(self): a = self.v**self.w a.reorder_axis(self.domain) self.v.reorder_axis(self.domain) self.w.reorder_axis(self.domain) assert np.all(a.data == (self.v.data**self.w.data)) a = self.v**2 a.reorder_axis(self.domain) self.v.reorder_axis(self.domain) assert np.all(a.data == (self.v.data**2)) a = 2**self.v a.reorder_axis(self.domain) self.v.reorder_axis(self.domain) assert np.all(a.data == (2**self.v.data)) def test_get_subset_from_index_range(self): new_var = self.v.get_subset_from_index_limits({ "x": [0, 2], "y": [0, 2] }) assert new_var.shape == (2, 2) if __name__ == '__main__': unittest.main()
def get_variables(self): domain = self.get_domain() return [Variable(measurements, domain, domain2axis={"t": 0}, variable_name=var_name) for var_name, measurements in self.data.items()]
def explore_noise_discretization(self, noise_range, discretization_range, derivative_in_y, derivatives_in_x, poly_degree, std_of_discrete_grad=False): """ :param noise_range: :param discretization_range: :param derivative_in_y: :param derivatives_in_x: :param poly_degree: :param std_of_discrete_grad: True if we wan to calculate the std of the gradient of the series using the discretized version. Otherwise will be the original one. :return: """ # ---------- save params of experiment ---------- self.experiments.append({'explore_noise_discretization': { 'date': datetime.now(), 'noise_range': noise_range, 'discretization_range': discretization_range, 'derivative_in_y': derivative_in_y, 'derivatives_in_x': derivatives_in_x, 'poly_degree': poly_degree} }) # ---------------------------------------- rsquares = pd.DataFrame(np.nan, index=noise_range, columns=discretization_range) rsquares.index.name = "Noise" rsquares.columns.name = "Discretization" # ---------------------------------------- data_manager = self.get_data_manager() std_of_vars = [] for var in data_manager.field.data: series_grad = np.abs(np.gradient(var.data)) std_of_vars.append(np.std(series_grad)) with savefig('Distribution_series_differences_{}'.format(var.get_full_name()), self.experiment_name, subfolders=['noise_derivative_in_y_{}'.format(derivative_in_y)]): sns.distplot(series_grad, bins=int(np.sqrt(len(var.data)))) plt.axvline(x=std_of_vars[-1]) # ---------- Noise evaluation ---------- for measure_dt in discretization_range: print("\n---------------------") print("meassure dt: {}".format(measure_dt)) print("Noise: ", end='') for noise in noise_range: print(noise, end='') # choose steps with bigger dt; and sum normal noise. new_t = data_manager.domain.get_range("t")['t'][::measure_dt] domain_temp = Domain(lower_limits_dict={"t": np.min(new_t)}, upper_limits_dict={"t": np.max(new_t)}, step_width_dict={"t": data_manager.domain.step_width['t'] * measure_dt}) data_manager_temp = DataManager() data_manager_original_temp = DataManager() for std, var in zip(std_of_vars, data_manager.field.data): data_original = var.data[::measure_dt] if std_of_discrete_grad: series_grad = np.abs(np.gradient(data_original)) std = np.std(series_grad) data = data_original + np.random.normal(loc=0, scale=std * noise, size=len(data_original)) data_manager_temp.add_variables( Variable(data, domain_temp, domain2axis={"t": 0}, variable_name=var.name)) data_manager_original_temp.add_variables( Variable(data_original, domain_temp, domain2axis={"t": 0}, variable_name=var.name)) data_manager_temp.add_regressors([]) data_manager_temp.set_domain() data_manager_original_temp.add_regressors([]) data_manager_original_temp.set_domain() data_manager_temp.set_X_operator(get_x_operator_func(derivatives_in_x, poly_degree)) data_manager_temp.set_y_operator( get_y_operator_func(self.get_derivative_in_y(derivative_in_y, derivatives_in_x))) data_manager_original_temp.set_X_operator(get_x_operator_func(derivatives_in_x, poly_degree)) data_manager_original_temp.set_y_operator( get_y_operator_func(self.get_derivative_in_y(derivative_in_y, derivatives_in_x))) pde_finder = self.fit_eqdifff(data_manager_temp) y = data_manager_original_temp.get_y_dframe(self.testSplit) yhat = pd.DataFrame(pde_finder.transform(data_manager_temp.get_X_dframe(self.testSplit)), columns=y.columns) rsquares.loc[noise, measure_dt] = evaluator.rsquare(yhat=yhat, y=y).values # rsquares.loc[noise, measure_dt] = self.get_rsquare_of_eqdiff_fit(pde_finder, data_manager_temp).values with savefig('fit_vs_real_der_y{}_noise{}_dt{}'.format(derivative_in_y, str(noise).replace('.', ''), measure_dt), self.experiment_name, subfolders=['noise_derivative_in_y_{}'.format(derivative_in_y), 'fit_vs_real']): self.plot_fitted_vs_real(pde_finder, data_manager_temp) with savefig('fit_and_real_der_y{}_noise{}_dt{}'.format(derivative_in_y, str(noise).replace('.', ''), measure_dt), self.experiment_name, subfolders=['noise_derivative_in_y_{}'.format(derivative_in_y), 'fit_and_real']): self.plot_fitted_and_real(pde_finder, data_manager_temp) with savefig('zoom_fit_and_real_der_y{}_dt{}_noise{}'.format(derivative_in_y, measure_dt, str(noise).replace('.', '')), self.experiment_name, subfolders=['noise_derivative_in_y_{}'.format(derivative_in_y), 'fit_and_real_zoom']): self.plot_fitted_and_real(pde_finder, data_manager_temp, subinit=self.sub_set_init, sublen=self.sub_set_len) save_csv(rsquares, 'noise_discretization_rsquares_eqfit_der_y{}'.format(derivative_in_y), self.experiment_name) # plt.pcolor(rsquares * (rsquares > 0), cmap='autumn') # plt.yticks(np.arange(0.5, len(rsquares.index), 1), np.round(rsquares.index, decimals=2)) # plt.xticks(np.arange(0.5, len(rsquares.columns), 1), rsquares.columns) # ---------- plot heatmap of rsquares ---------- with savefig('noise_discretization_rsquares_eqfit_der_y{}'.format(derivative_in_y), self.experiment_name): rsquares.index = np.round(rsquares.index, decimals=2) plt.close('all') sns.heatmap(rsquares * (rsquares > 0), annot=True) plt.xlabel("Discretization (dt)") plt.ylabel("Noise (k*std)") plt.title("Noise and discretization for derivative in y {}".format(derivative_in_y)) return rsquares