def setUp(self): rand_x, rand_y = np.random.randint(-20, 20, size=400), \ np.random.randint(-1000, 1000, size=400) self.df = pd.DataFrame() self.df['x'] = rand_x self.df['y1'] = rand_y self.df['y2'] = rand_y self.df['y3'] = rand_y self.df['y4'] = rand_y self.df.sort_values(by=['x', 'y1', 'y2', 'y3', 'y4'], inplace=True) self.testGraph = Graph('train', df=self.df) self.subdir = './tests' self.m1 = Model(self.df['x'], self.df['y1'], 1, rss=0.02345, order=1) self.m2 = Model(self.df['x'], self.df['y2'], 1, rss=0.02345, order=2) self.m3 = Model(self.df['x'], self.df['y3'], 1, rss=0.12345, order=3) self.m4 = Model(self.df['x'], self.df['y4'], 1, rss=0.12345, order=4) self.m1d = { 'm1': { 'y1': self.m1, 'y2': self.m2, 'y3': self.m3, 'y4': self.m4 } } self.m2d = { 'm2': { 'y1': self.m1, 'y2': self.m2, 'y3': self.m3, 'y4': self.m4 } } self.m3d = { 'm3': { 'y1': self.m1, 'y2': self.m2, 'y3': self.m3, 'y4': self.m4 } }
def test_plot_model(self): with self.assertRaises(PlotTypeException): self.testGraph.plot_model(self.m1) self.assertTrue( self.testGraph.plot_model(self.m1, plt_type='best fit', subdir='./tests')) rand_x, rand_y = np.random.randint(-20, 20, size=400), \ np.random.randint(-1000, 1000, size=400) df = pd.DataFrame() df['x'] = rand_x df['y'] = rand_y fit_model = Model(df['x'], df['y'], 1) testGraph = Graph('idealtest', df=df) self.assertTrue( testGraph.plot_model(fit_model, plt_type='ideal', subdir='./tests', fit_model=fit_model))
def compare_errors(self, test_map, train_df): """Identify errors and plot processed data""" models = {} for k, v in test_map.items(): ideal_ = test_map[k]['ideal_fun'] test_arr = test_map[k][k] ideal_arr = test_map[k][ideal_] test_axes = list(starmap(lambda x, y: [x, y], test_arr)) ideal_axes = list(starmap(lambda x, y: [x, y], ideal_arr)) t_x = [ax[0] for ax in test_axes] t_y = [ax[1] for ax in test_axes] id_x = [ax[0] for ax in ideal_axes] id_y = [ax[1] for ax in ideal_axes] # Graph test vs ideal funs t_df = pd.DataFrame() t_df['x'] = t_x t_df['y'] = t_y plot_ = Graph(f'Ideal vs. Test', df=t_df) model = Model(id_x, id_y, ideal_[1:]) # identify max error of training function c = f'{k}_max_err' tr_err = round(train_df[c][0], 6) # match ideal function with training function bf = f'{k}_best_fit' fit = train_df[bf][0] model.set_best_fit(fit) model.set_max_dev(tr_err) models[k] = model.__bf # plot graph plot_.plot_model(model, plt_type='test_vs_ideal', fit_model=model.__bf) return self.df, models
def test_make_subplots(self): testGraphPlots = self.testGraph.make_subplots(self.testGraph.title, subdir=self.subdir) self.assertTrue(testGraphPlots) with open(f'{self.subdir}/{self.testGraph.title}.pdf', 'r') as file: self.assertTrue(file, 'file should have been created in (subdir) dir') # test model comparison testGraphTest = Graph('model', df=self.df) tg = testGraphTest.make_subplots('model', models={ 'm1': self.m1d, 'm2': self.m2d, 'm3': self.m3d }, subdir=self.subdir) self.assertTrue(tg) for i in range(1, 4): with open(f'{self.subdir}/y{i}_{testGraphTest.title}.pdf', 'r') as file: self.assertTrue( file, 'file should have been created in (subdir) dir')
def fit_model(self, *args, **kwargs): """Fit a regression model with training data function""" col = args[0] _ideal = args[1] r_type = args[2] order = kwargs.get('order', 1) subplot = kwargs.get('subplot', False) print_table = kwargs.get('print_table', False) table_name = kwargs.get('table_name', '') col_name = f'y{col}' subplot_array, _n, _rss, _rmse, _max_e, _var = [], [], [], [], [], [] if r_type == "linear": x = self.df['x'].values lr = stats.linregress(self.df['x'], self.df[col_name]) fun = lr.slope * x + lr.intercept model = Model(x, fun, col) try: # match ideal function if ideal Data object is passed if not _ideal.is_empty(): model.find_ideal_function(_ideal) _rmse.append(model.rmse) _max_e.append(model.max_dev) if print_table: model_df = pd.DataFrame() model_df['RMSE'] = [round(float(i), 5) for i in _rmse] model_df['MRE'] = [round(float(i), 5) for i in _max_e] if not table_name: self.df_to_html(model_df, f'{col_name}_linear') else: out = table_name.rsplit('/', 1) self.df_to_html(model_df, f'{out[1]}', output_dir=out[0]) return model except AttributeError: # raised if _ideal df is empty raise Exception("ideal df empty or None") if r_type == "poly.fit": model = Model([], [], col) rss_max = 1000 # Iterates through orders and returns fit with # minimum residual error, with weight=1/y for i in range(1, order + 1): weight = 1 / self.df[col_name] fn = P.fit(self.df['x'], self.df[col_name], i, full=True, w=weight) coeff, det = fn fn_x, fn_y = coeff.linspace(n=400) model = Model(fn_x, fn_y, col, rss=det[0], order=i) # match ideal function if ideal Data object is passed if not _ideal.is_empty(): model.find_ideal_function(_ideal) _n.append(i) _rss.append(model.rss) _rmse.append(model.rmse) _max_e.append(model.max_dev) if det[0] < rss_max: subplot_array.append(model) if print_table: model_df = pd.DataFrame() model_df['Order'] = _n model_df['RSS'] = [i[0].round(5) for i in _rss] model_df['RMSE'] = [i.round(5) for i in _rmse] model_df['MRE'] = [i.round(5) for i in _max_e] self.df_to_html(model_df, f'{col_name}_order-{order}') return model elif not subplot or len(subplot_array) <= 1: return model else: subplot_graph = Graph('Polynomial order and weighted NLR', self.df) subplot_graph.make_subplots(subplot_array) return model else: raise RegressionException("You must provide a valid type " "of regression via keyword arg.")
def __init__(self, **kwargs): """ :keyword map_train: run Model functions entirely and plot matched ideal vs test data :keyword to_db: create SQLite db table for created Data obj :keyword plt_type: which type of fit algorithm to run, 'linear' or 'best fit' :keyword with_rmse: include rmse values in graph :keyword print_table: save stats from model comparisons as a .pdf table :keyword plot: plot data and save :keyword plot_training_subplots: display and save training data as subplots :keyword compare_models: shortcut to just plot comparison of fitted models, values is dict of fitted model dicts """ to_db = kwargs.get('to_db', True) _create = kwargs.get('create_tables', True) self.train = Data('training_data', to_db=to_db) self.ideal = Data('ideal_functions', to_db=to_db) self.test = Data('test_data') self.train_master = self.train.csv_to_df() self.train_graph = Graph("Training Data", df=self.train.csv_to_df()) self.ideal_fn_dict = {'x': self.train.df['x']} self._n = kwargs.get('_n', {}) self.plt_type = kwargs.get('plt_type', 'best fit') self.with_rmse = kwargs.get('with_rmse', True) self.print_table = kwargs.get('print_table', True) self.plot = kwargs.get('plot', True) map_train = kwargs.get('map_train', True) continue_matching = kwargs.get('continue_matching', True) self.models = dict() self.models_master_1 = dict() self.models_master_2 = dict() self.models_master_3 = dict() self.result = tuple() global model if 'compare_models' in kwargs.keys(): models = kwargs.get('compare_models') self.train_graph.make_subplots('Model Comparison', models={ 'm1': models['m1'], 'm2': models['m2'], 'm3': models['m3'] }) if 'plot_training_subplots' in kwargs.keys(): self.train_graph.make_subplots(self.train_graph.title) if continue_matching: check_n_size(self._n) idx = 1 while self._n['y1']: n = { 'y1': self._n['y1'].pop(0), 'y2': self._n['y2'].pop(0), 'y4': self._n['y4'].pop(0) } self._fit(n, idx) idx += 1 self.ideal_fn_df = pd.DataFrame(data=self.ideal_fn_dict) self.ideal_fn_df = self.ideal_fn_df.set_index('x') self.test_df = self.test.csv_to_df() test_model = Model(self.test_df['x'], self.test_df['y'], 1, df=self.test_df) finals = test_model.match_ideal_functions(self.ideal_fn_df, self.train_master, self.models, map_train=map_train) if 'run_complete' in kwargs.keys(): self.test.df_to_db(finals[0]) else: self.result = finals
class VisualizeTest(unittest.TestCase): def setUp(self): rand_x, rand_y = np.random.randint(-20, 20, size=400), \ np.random.randint(-1000, 1000, size=400) self.df = pd.DataFrame() self.df['x'] = rand_x self.df['y1'] = rand_y self.df['y2'] = rand_y self.df['y3'] = rand_y self.df['y4'] = rand_y self.df.sort_values(by=['x', 'y1', 'y2', 'y3', 'y4'], inplace=True) self.testGraph = Graph('train', df=self.df) self.subdir = './tests' self.m1 = Model(self.df['x'], self.df['y1'], 1, rss=0.02345, order=1) self.m2 = Model(self.df['x'], self.df['y2'], 1, rss=0.02345, order=2) self.m3 = Model(self.df['x'], self.df['y3'], 1, rss=0.12345, order=3) self.m4 = Model(self.df['x'], self.df['y4'], 1, rss=0.12345, order=4) self.m1d = { 'm1': { 'y1': self.m1, 'y2': self.m2, 'y3': self.m3, 'y4': self.m4 } } self.m2d = { 'm2': { 'y1': self.m1, 'y2': self.m2, 'y3': self.m3, 'y4': self.m4 } } self.m3d = { 'm3': { 'y1': self.m1, 'y2': self.m2, 'y3': self.m3, 'y4': self.m4 } } def test_make_subplots(self): testGraphPlots = self.testGraph.make_subplots(self.testGraph.title, subdir=self.subdir) self.assertTrue(testGraphPlots) with open(f'{self.subdir}/{self.testGraph.title}.pdf', 'r') as file: self.assertTrue(file, 'file should have been created in (subdir) dir') # test model comparison testGraphTest = Graph('model', df=self.df) tg = testGraphTest.make_subplots('model', models={ 'm1': self.m1d, 'm2': self.m2d, 'm3': self.m3d }, subdir=self.subdir) self.assertTrue(tg) for i in range(1, 4): with open(f'{self.subdir}/y{i}_{testGraphTest.title}.pdf', 'r') as file: self.assertTrue( file, 'file should have been created in (subdir) dir') def test_plot_model(self): with self.assertRaises(PlotTypeException): self.testGraph.plot_model(self.m1) self.assertTrue( self.testGraph.plot_model(self.m1, plt_type='best fit', subdir='./tests')) rand_x, rand_y = np.random.randint(-20, 20, size=400), \ np.random.randint(-1000, 1000, size=400) df = pd.DataFrame() df['x'] = rand_x df['y'] = rand_y fit_model = Model(df['x'], df['y'], 1) testGraph = Graph('idealtest', df=df) self.assertTrue( testGraph.plot_model(fit_model, plt_type='ideal', subdir='./tests', fit_model=fit_model))