Esempio n. 1
0
    def setUp(self):
        rand_x, rand_y = np.random.randint(-20, 20, size=400), \
                         np.random.randint(-1000, 1000, size=400)
        self.df = pd.DataFrame()
        self.df['x'] = rand_x
        self.df['y1'] = rand_y
        self.df['y2'] = rand_y
        self.df['y3'] = rand_y
        self.df['y4'] = rand_y
        self.df.sort_values(by=['x', 'y1', 'y2', 'y3', 'y4'], inplace=True)
        self.testGraph = Graph('train', df=self.df)
        self.subdir = './tests'

        self.m1 = Model(self.df['x'], self.df['y1'], 1, rss=0.02345, order=1)
        self.m2 = Model(self.df['x'], self.df['y2'], 1, rss=0.02345, order=2)
        self.m3 = Model(self.df['x'], self.df['y3'], 1, rss=0.12345, order=3)
        self.m4 = Model(self.df['x'], self.df['y4'], 1, rss=0.12345, order=4)

        self.m1d = {
            'm1': {
                'y1': self.m1,
                'y2': self.m2,
                'y3': self.m3,
                'y4': self.m4
            }
        }
        self.m2d = {
            'm2': {
                'y1': self.m1,
                'y2': self.m2,
                'y3': self.m3,
                'y4': self.m4
            }
        }
        self.m3d = {
            'm3': {
                'y1': self.m1,
                'y2': self.m2,
                'y3': self.m3,
                'y4': self.m4
            }
        }
Esempio n. 2
0
    def test_plot_model(self):
        with self.assertRaises(PlotTypeException):
            self.testGraph.plot_model(self.m1)
        self.assertTrue(
            self.testGraph.plot_model(self.m1,
                                      plt_type='best fit',
                                      subdir='./tests'))

        rand_x, rand_y = np.random.randint(-20, 20, size=400), \
                        np.random.randint(-1000, 1000, size=400)
        df = pd.DataFrame()
        df['x'] = rand_x
        df['y'] = rand_y
        fit_model = Model(df['x'], df['y'], 1)
        testGraph = Graph('idealtest', df=df)
        self.assertTrue(
            testGraph.plot_model(fit_model,
                                 plt_type='ideal',
                                 subdir='./tests',
                                 fit_model=fit_model))
Esempio n. 3
0
    def compare_errors(self, test_map, train_df):
        """Identify errors and plot processed data"""

        models = {}

        for k, v in test_map.items():
            ideal_ = test_map[k]['ideal_fun']
            test_arr = test_map[k][k]
            ideal_arr = test_map[k][ideal_]
            test_axes = list(starmap(lambda x, y: [x, y], test_arr))
            ideal_axes = list(starmap(lambda x, y: [x, y], ideal_arr))
            t_x = [ax[0] for ax in test_axes]
            t_y = [ax[1] for ax in test_axes]
            id_x = [ax[0] for ax in ideal_axes]
            id_y = [ax[1] for ax in ideal_axes]

            # Graph test vs ideal funs
            t_df = pd.DataFrame()
            t_df['x'] = t_x
            t_df['y'] = t_y
            plot_ = Graph(f'Ideal vs. Test', df=t_df)
            model = Model(id_x, id_y, ideal_[1:])

            # identify max error of training function
            c = f'{k}_max_err'
            tr_err = round(train_df[c][0], 6)

            # match ideal function with training function
            bf = f'{k}_best_fit'
            fit = train_df[bf][0]

            model.set_best_fit(fit)
            model.set_max_dev(tr_err)
            models[k] = model.__bf

            # plot graph
            plot_.plot_model(model, plt_type='test_vs_ideal', fit_model=model.__bf)

        return self.df, models
Esempio n. 4
0
    def test_make_subplots(self):
        testGraphPlots = self.testGraph.make_subplots(self.testGraph.title,
                                                      subdir=self.subdir)
        self.assertTrue(testGraphPlots)
        with open(f'{self.subdir}/{self.testGraph.title}.pdf', 'r') as file:
            self.assertTrue(file,
                            'file should have been created in (subdir) dir')

        # test model comparison

        testGraphTest = Graph('model', df=self.df)
        tg = testGraphTest.make_subplots('model',
                                         models={
                                             'm1': self.m1d,
                                             'm2': self.m2d,
                                             'm3': self.m3d
                                         },
                                         subdir=self.subdir)
        self.assertTrue(tg)
        for i in range(1, 4):
            with open(f'{self.subdir}/y{i}_{testGraphTest.title}.pdf',
                      'r') as file:
                self.assertTrue(
                    file, 'file should have been created in (subdir) dir')
Esempio n. 5
0
    def fit_model(self, *args, **kwargs):
        """Fit a regression model with training data function"""

        col = args[0]
        _ideal = args[1]
        r_type = args[2]

        order = kwargs.get('order', 1)
        subplot = kwargs.get('subplot', False)
        print_table = kwargs.get('print_table', False)
        table_name = kwargs.get('table_name', '')

        col_name = f'y{col}'
        subplot_array, _n, _rss, _rmse, _max_e, _var = [], [], [], [], [], []

        if r_type == "linear":
            x = self.df['x'].values
            lr = stats.linregress(self.df['x'], self.df[col_name])
            fun = lr.slope * x + lr.intercept
            model = Model(x, fun, col)
            try:
                # match ideal function if ideal Data object is passed
                if not _ideal.is_empty():
                    model.find_ideal_function(_ideal)
                    _rmse.append(model.rmse)
                    _max_e.append(model.max_dev)
                if print_table:
                    model_df = pd.DataFrame()
                    model_df['RMSE'] = [round(float(i), 5) for i in _rmse]
                    model_df['MRE'] = [round(float(i), 5) for i in _max_e]
                    if not table_name:
                        self.df_to_html(model_df, f'{col_name}_linear')
                    else:
                        out = table_name.rsplit('/', 1)
                        self.df_to_html(model_df,
                                        f'{out[1]}',
                                        output_dir=out[0])
                return model
            except AttributeError:
                # raised if _ideal df is empty
                raise Exception("ideal df empty or None")

        if r_type == "poly.fit":
            model = Model([], [], col)
            rss_max = 1000
            # Iterates through orders and returns fit with
            # minimum residual error, with weight=1/y
            for i in range(1, order + 1):
                weight = 1 / self.df[col_name]
                fn = P.fit(self.df['x'],
                           self.df[col_name],
                           i,
                           full=True,
                           w=weight)
                coeff, det = fn
                fn_x, fn_y = coeff.linspace(n=400)
                model = Model(fn_x, fn_y, col, rss=det[0], order=i)
                # match ideal function if ideal Data object is passed
                if not _ideal.is_empty():
                    model.find_ideal_function(_ideal)
                    _n.append(i)
                    _rss.append(model.rss)
                    _rmse.append(model.rmse)
                    _max_e.append(model.max_dev)
                if det[0] < rss_max:
                    subplot_array.append(model)
            if print_table:
                model_df = pd.DataFrame()
                model_df['Order'] = _n
                model_df['RSS'] = [i[0].round(5) for i in _rss]
                model_df['RMSE'] = [i.round(5) for i in _rmse]
                model_df['MRE'] = [i.round(5) for i in _max_e]
                self.df_to_html(model_df, f'{col_name}_order-{order}')
                return model
            elif not subplot or len(subplot_array) <= 1:
                return model
            else:
                subplot_graph = Graph('Polynomial order and weighted NLR',
                                      self.df)
                subplot_graph.make_subplots(subplot_array)
                return model
        else:
            raise RegressionException("You must provide a valid type "
                                      "of regression via keyword arg.")
Esempio n. 6
0
    def __init__(self, **kwargs):
        """
        :keyword map_train: run Model functions entirely and
        plot matched ideal vs test data

        :keyword to_db: create SQLite db table for created Data obj

        :keyword plt_type: which type of fit algorithm to run, 'linear' or 'best fit'

        :keyword with_rmse: include rmse values in graph

        :keyword print_table: save stats from model comparisons as a .pdf table

        :keyword plot: plot data and save

        :keyword plot_training_subplots: display and save training data as subplots

        :keyword compare_models: shortcut to just plot comparison of fitted models,
        values is dict of fitted model dicts
        """

        to_db = kwargs.get('to_db', True)
        _create = kwargs.get('create_tables', True)

        self.train = Data('training_data', to_db=to_db)
        self.ideal = Data('ideal_functions', to_db=to_db)
        self.test = Data('test_data')

        self.train_master = self.train.csv_to_df()
        self.train_graph = Graph("Training Data", df=self.train.csv_to_df())
        self.ideal_fn_dict = {'x': self.train.df['x']}

        self._n = kwargs.get('_n', {})
        self.plt_type = kwargs.get('plt_type', 'best fit')
        self.with_rmse = kwargs.get('with_rmse', True)
        self.print_table = kwargs.get('print_table', True)
        self.plot = kwargs.get('plot', True)

        map_train = kwargs.get('map_train', True)
        continue_matching = kwargs.get('continue_matching', True)

        self.models = dict()
        self.models_master_1 = dict()
        self.models_master_2 = dict()
        self.models_master_3 = dict()
        self.result = tuple()

        global model

        if 'compare_models' in kwargs.keys():
            models = kwargs.get('compare_models')
            self.train_graph.make_subplots('Model Comparison',
                                           models={
                                               'm1': models['m1'],
                                               'm2': models['m2'],
                                               'm3': models['m3']
                                           })

        if 'plot_training_subplots' in kwargs.keys():
            self.train_graph.make_subplots(self.train_graph.title)

        if continue_matching:

            check_n_size(self._n)
            idx = 1
            while self._n['y1']:
                n = {
                    'y1': self._n['y1'].pop(0),
                    'y2': self._n['y2'].pop(0),
                    'y4': self._n['y4'].pop(0)
                }
                self._fit(n, idx)
                idx += 1

            self.ideal_fn_df = pd.DataFrame(data=self.ideal_fn_dict)
            self.ideal_fn_df = self.ideal_fn_df.set_index('x')

            self.test_df = self.test.csv_to_df()
            test_model = Model(self.test_df['x'],
                               self.test_df['y'],
                               1,
                               df=self.test_df)

            finals = test_model.match_ideal_functions(self.ideal_fn_df,
                                                      self.train_master,
                                                      self.models,
                                                      map_train=map_train)

            if 'run_complete' in kwargs.keys():
                self.test.df_to_db(finals[0])
            else:
                self.result = finals
Esempio n. 7
0
class VisualizeTest(unittest.TestCase):
    def setUp(self):
        rand_x, rand_y = np.random.randint(-20, 20, size=400), \
                         np.random.randint(-1000, 1000, size=400)
        self.df = pd.DataFrame()
        self.df['x'] = rand_x
        self.df['y1'] = rand_y
        self.df['y2'] = rand_y
        self.df['y3'] = rand_y
        self.df['y4'] = rand_y
        self.df.sort_values(by=['x', 'y1', 'y2', 'y3', 'y4'], inplace=True)
        self.testGraph = Graph('train', df=self.df)
        self.subdir = './tests'

        self.m1 = Model(self.df['x'], self.df['y1'], 1, rss=0.02345, order=1)
        self.m2 = Model(self.df['x'], self.df['y2'], 1, rss=0.02345, order=2)
        self.m3 = Model(self.df['x'], self.df['y3'], 1, rss=0.12345, order=3)
        self.m4 = Model(self.df['x'], self.df['y4'], 1, rss=0.12345, order=4)

        self.m1d = {
            'm1': {
                'y1': self.m1,
                'y2': self.m2,
                'y3': self.m3,
                'y4': self.m4
            }
        }
        self.m2d = {
            'm2': {
                'y1': self.m1,
                'y2': self.m2,
                'y3': self.m3,
                'y4': self.m4
            }
        }
        self.m3d = {
            'm3': {
                'y1': self.m1,
                'y2': self.m2,
                'y3': self.m3,
                'y4': self.m4
            }
        }

    def test_make_subplots(self):
        testGraphPlots = self.testGraph.make_subplots(self.testGraph.title,
                                                      subdir=self.subdir)
        self.assertTrue(testGraphPlots)
        with open(f'{self.subdir}/{self.testGraph.title}.pdf', 'r') as file:
            self.assertTrue(file,
                            'file should have been created in (subdir) dir')

        # test model comparison

        testGraphTest = Graph('model', df=self.df)
        tg = testGraphTest.make_subplots('model',
                                         models={
                                             'm1': self.m1d,
                                             'm2': self.m2d,
                                             'm3': self.m3d
                                         },
                                         subdir=self.subdir)
        self.assertTrue(tg)
        for i in range(1, 4):
            with open(f'{self.subdir}/y{i}_{testGraphTest.title}.pdf',
                      'r') as file:
                self.assertTrue(
                    file, 'file should have been created in (subdir) dir')

    def test_plot_model(self):
        with self.assertRaises(PlotTypeException):
            self.testGraph.plot_model(self.m1)
        self.assertTrue(
            self.testGraph.plot_model(self.m1,
                                      plt_type='best fit',
                                      subdir='./tests'))

        rand_x, rand_y = np.random.randint(-20, 20, size=400), \
                        np.random.randint(-1000, 1000, size=400)
        df = pd.DataFrame()
        df['x'] = rand_x
        df['y'] = rand_y
        fit_model = Model(df['x'], df['y'], 1)
        testGraph = Graph('idealtest', df=df)
        self.assertTrue(
            testGraph.plot_model(fit_model,
                                 plt_type='ideal',
                                 subdir='./tests',
                                 fit_model=fit_model))