Ejemplo n.º 1
0
    def test_pipeline_first_fit_stage(self):
        fit_stages = self.pipeline_event.fitStages
        fit_event1 = fit_stages[0].fe
        # First Stage
        transformer = fit_event1.model
        expected_transformer = modeldb_types.Transformer(
            -1, 'PCA', 'decomposition PCA')
        utils.is_equal_transformer(transformer, expected_transformer, self)

        df = fit_event1.df
        expected_df = modeldb_types.DataFrame(-1, [
            modeldb_types.DataFrameColumn('A', 'int64'),
            modeldb_types.DataFrameColumn('B', 'int64'),
        ], 100, 'digits-dataset')
        utils.is_equal_dataframe(df, expected_df, self)

        spec = fit_event1.spec
        expected_spec = modeldb_types.TransformerSpec(-1, 'PCA', [
            modeldb_types.HyperParameter('copy', 'True', 'bool', FMIN, FMAX),
            modeldb_types.HyperParameter('n_components', 'None', 'NoneType',
                                         FMIN, FMAX),
            modeldb_types.HyperParameter('whiten', 'False', 'bool', FMIN,
                                         FMAX),
        ], 'decomposition PCA')
        utils.is_equal_transformer_spec(spec, expected_spec, self)

        self.assertEqual(fit_event1.featureColumns, ['A', 'B'])
Ejemplo n.º 2
0
    def test_pipeline_first_transform_stage(self):
        transform_stages = self.pipeline_event.transformStages
        transform_event = transform_stages[0].te

        transformer = transform_event.transformer
        expected_transformer = modeldb_types.Transformer(
            -1,
            'PCA',
            'decomposition PCA')
        utils.is_equal_transformer(transformer, expected_transformer, self)

        old_df = transform_event.oldDataFrame
        expected_old_df = modeldb_types.DataFrame(
            -1,
            [
                modeldb_types.DataFrameColumn('A', 'int64'),
                modeldb_types.DataFrameColumn('B', 'int64'),
            ],
            100,
            'digits-dataset')
        utils.is_equal_dataframe(expected_old_df, old_df, self)

        new_df = transform_event.newDataFrame
        expected_new_df = modeldb_types.DataFrame(
            -1,
            [
                modeldb_types.DataFrameColumn('0', 'float64'),
                modeldb_types.DataFrameColumn('1', 'float64'),
            ],
            100,
            '')
        utils.is_equal_dataframe(expected_new_df, new_df, self)
Ejemplo n.º 3
0
    def test_pipeline_second_fit_stage(self):
        fit_stages = self.pipeline_event.fitStages
        fit_event2 = fit_stages[1].fe
        # Second Stage
        transformer = fit_event2.model
        expected_transformer = modeldb_types.Transformer(
            -1,
            'LinearRegression',
            'basic linear reg')
        utils.is_equal_transformer(transformer, expected_transformer, self)

        df = fit_event2.df
        expected_df = modeldb_types.DataFrame(
            -1,
            [],
            100,
            '')
        utils.is_equal_dataframe(df, expected_df, self)

        spec = fit_event2.spec
        expected_spec = modeldb_types.TransformerSpec(
            -1,
            'LinearRegression',
            [
                modeldb_types.HyperParameter(
                    'copy_X', 'True', 'bool', FMIN, FMAX),
                modeldb_types.HyperParameter(
                    'normalize', 'False', 'bool', FMIN, FMAX),
                modeldb_types.HyperParameter('n_jobs', '1', 'int', FMIN, FMAX),
                modeldb_types.HyperParameter(
                    'fit_intercept', 'True', 'bool', FMIN, FMAX)
            ],
            'basic linear reg')
        utils.is_equal_transformer_spec(spec, expected_spec, self)
Ejemplo n.º 4
0
    def test_gridcv_event(self):
        utils.validate_grid_search_cv_event(self.grid_search_event, self)
        self.assertEqual(self.grid_search_event.numFolds, 3)
        best_fit_event = self.grid_search_event.bestFit
        df = best_fit_event.df
        expected_df = modeldb_types.DataFrame(
            -1,
            [
                modeldb_types.DataFrameColumn('A', 'int64'),
                modeldb_types.DataFrameColumn('B', 'int64'),
                modeldb_types.DataFrameColumn('C', 'int64'),
                modeldb_types.DataFrameColumn('D', 'int64'),
            ],
            2000,
            'digits-dataset')
        utils.is_equal_dataframe(df, expected_df, self)

        transformer = best_fit_event.model
        utils.validate_transformer_struct(transformer, self)
        expected_transformer = modeldb_types.Transformer(
            -1,
            'SVC',
            '')
        utils.is_equal_transformer(transformer, expected_transformer, self)
        self.assertEqual(best_fit_event.featureColumns, [
                              'A', 'B', 'C', 'D'])
Ejemplo n.º 5
0
    def test_new_dataframe(self):
        new_df = self.transform_event.newDataFrame
        utils.validate_dataframe_struct(new_df, self)

        new_df_column = new_df.schema[0]
        df_column = modeldb_types.DataFrameColumn('0', 'int64')
        expected_new_df = modeldb_types.DataFrame(-1, [df_column], 100,
                                                  '')  # fix columns
        utils.is_equal_dataframe(expected_new_df, new_df, self)
Ejemplo n.º 6
0
 def test_dataframe_fit_event(self):
     df = self.fit_event.df
     utils.validate_dataframe_struct(df, self)
     expected_df = modeldb_types.DataFrame(
         -1,
         [],
         100,
         '')
     utils.is_equal_dataframe(expected_df, df, self)
Ejemplo n.º 7
0
 def test_old_dataframe(self):
     old_df = self.transform_event.oldDataFrame
     utils.validate_dataframe_struct(old_df, self)
     expected_old_df = modeldb_types.DataFrame(
         -1,
         [],
         100,
         '')
     utils.is_equal_dataframe(expected_old_df, old_df, self)
Ejemplo n.º 8
0
 def test_old_dataframe(self):
     old_df = self.random_split_event.oldDataFrame
     expected_df = modeldb_types.DataFrame(-1, [
         modeldb_types.DataFrameColumn('A', 'int64'),
         modeldb_types.DataFrameColumn('B', 'int64'),
         modeldb_types.DataFrameColumn('C', 'int64'),
         modeldb_types.DataFrameColumn('D', 'int64'),
     ], 100, 'digits-dataset')
     utils.is_equal_dataframe(old_df, expected_df, self)
Ejemplo n.º 9
0
 def test_dataframe(self):
     df = self.fit_event.df
     expected_df = modeldb_types.DataFrame(-1, [
         modeldb_types.DataFrameColumn('A', 'int64'),
         modeldb_types.DataFrameColumn('B', 'int64'),
         modeldb_types.DataFrameColumn('C', 'int64'),
         modeldb_types.DataFrameColumn('D', 'int64')
     ], 100, 'digits-dataset')
     utils.is_equal_dataframe(df, expected_df, self)
Ejemplo n.º 10
0
    def test_overall_pipeline_fit_event(self):
        fit_event = self.pipeline_event.pipelineFit
        utils.validate_fit_event_struct(fit_event, self)
        transformer = fit_event.model
        expected_transformer = modeldb_types.Transformer(
            -1,
            'Pipeline',
            'pipeline with pca + logistic')
        utils.is_equal_transformer(transformer, expected_transformer, self)

        df = fit_event.df
        expected_df = modeldb_types.DataFrame(
            -1,
            [
                modeldb_types.DataFrameColumn('A', 'int64'),
                modeldb_types.DataFrameColumn('B', 'int64'),
            ],
            100,
            'digits-dataset')
        utils.is_equal_dataframe(df, expected_df, self)

        spec = fit_event.spec
        expected_spec = modeldb_types.TransformerSpec(
            -1,
            'Pipeline',
            [
                modeldb_types.HyperParameter(
                    'logistic__n_jobs', '1', 'int', FMIN, FMAX),
                modeldb_types.HyperParameter(
                    'pca__copy', 'True', 'bool', FMIN, FMAX),
                modeldb_types.HyperParameter(
                    'pca__n_components', 'None', 'NoneType', FMIN, FMAX),
                modeldb_types.HyperParameter(
                    'logistic__fit_intercept', 'True', 'bool', FMIN, FMAX),
                modeldb_types.HyperParameter(
                    'pca__whiten', 'False', 'bool', FMIN, FMAX),
                modeldb_types.HyperParameter(
                    'steps', "[('pca', PCA(copy=True, n_components=None, whiten=False)), ('logistic', LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False))]", 'list', FMIN, FMAX),
                modeldb_types.HyperParameter(
                    'logistic', 'LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)', 'LinearRegression', FMIN, FMAX),
                modeldb_types.HyperParameter(
                    'pca', 'PCA(copy=True, n_components=None, whiten=False)', 'PCA', FMIN, FMAX),
                modeldb_types.HyperParameter(
                    'logistic__normalize', 'False', 'bool', FMIN, FMAX),
                modeldb_types.HyperParameter(
                    'logistic__copy_X', 'True', 'bool', FMIN, FMAX)
            ],
            'pipeline with pca + logistic')
        utils.is_equal_transformer_spec(spec, expected_spec, self)

        self.assertItemsEqual(fit_event.featureColumns, ['A', 'B'])