def test_pipeline_first_transform_stage(self): transform_stages = self.pipeline_event.transformStages transform_event = transform_stages[0].te transformer = transform_event.transformer expected_transformer = modeldb_types.Transformer( -1, 'PCA', 'decomposition PCA') utils.is_equal_transformer(transformer, expected_transformer, self) old_df = transform_event.oldDataFrame expected_old_df = modeldb_types.DataFrame( -1, [ modeldb_types.DataFrameColumn('A', 'int64'), modeldb_types.DataFrameColumn('B', 'int64'), ], 100, 'digits-dataset') utils.is_equal_dataframe(expected_old_df, old_df, self) new_df = transform_event.newDataFrame expected_new_df = modeldb_types.DataFrame( -1, [ modeldb_types.DataFrameColumn('0', 'float64'), modeldb_types.DataFrameColumn('1', 'float64'), ], 100, '') utils.is_equal_dataframe(expected_new_df, new_df, self)
def test_pipeline_first_fit_stage(self): fit_stages = self.pipeline_event.fitStages fit_event1 = fit_stages[0].fe # First Stage transformer = fit_event1.model expected_transformer = modeldb_types.Transformer( -1, 'PCA', 'decomposition PCA') utils.is_equal_transformer(transformer, expected_transformer, self) df = fit_event1.df expected_df = modeldb_types.DataFrame(-1, [ modeldb_types.DataFrameColumn('A', 'int64'), modeldb_types.DataFrameColumn('B', 'int64'), ], 100, 'digits-dataset') utils.is_equal_dataframe(df, expected_df, self) spec = fit_event1.spec expected_spec = modeldb_types.TransformerSpec(-1, 'PCA', [ modeldb_types.HyperParameter('copy', 'True', 'bool', FMIN, FMAX), modeldb_types.HyperParameter('n_components', 'None', 'NoneType', FMIN, FMAX), modeldb_types.HyperParameter('whiten', 'False', 'bool', FMIN, FMAX), ], 'decomposition PCA') utils.is_equal_transformer_spec(spec, expected_spec, self) self.assertEqual(fit_event1.featureColumns, ['A', 'B'])
def test_gridcv_event(self): utils.validate_grid_search_cv_event(self.grid_search_event, self) self.assertEqual(self.grid_search_event.numFolds, 3) best_fit_event = self.grid_search_event.bestFit df = best_fit_event.df expected_df = modeldb_types.DataFrame( -1, [ modeldb_types.DataFrameColumn('A', 'int64'), modeldb_types.DataFrameColumn('B', 'int64'), modeldb_types.DataFrameColumn('C', 'int64'), modeldb_types.DataFrameColumn('D', 'int64'), ], 2000, 'digits-dataset') utils.is_equal_dataframe(df, expected_df, self) transformer = best_fit_event.model utils.validate_transformer_struct(transformer, self) expected_transformer = modeldb_types.Transformer( -1, 'SVC', '') utils.is_equal_transformer(transformer, expected_transformer, self) self.assertEqual(best_fit_event.featureColumns, [ 'A', 'B', 'C', 'D'])
def test_old_dataframe(self): old_df = self.random_split_event.oldDataFrame expected_df = modeldb_types.DataFrame(-1, [ modeldb_types.DataFrameColumn('A', 'int64'), modeldb_types.DataFrameColumn('B', 'int64'), modeldb_types.DataFrameColumn('C', 'int64'), modeldb_types.DataFrameColumn('D', 'int64'), ], 100, 'digits-dataset') utils.is_equal_dataframe(old_df, expected_df, self)
def test_dataframe(self): df = self.fit_event.df expected_df = modeldb_types.DataFrame(-1, [ modeldb_types.DataFrameColumn('A', 'int64'), modeldb_types.DataFrameColumn('B', 'int64'), modeldb_types.DataFrameColumn('C', 'int64'), modeldb_types.DataFrameColumn('D', 'int64') ], 100, 'digits-dataset') utils.is_equal_dataframe(df, expected_df, self)
def test_overall_pipeline_fit_event(self): fit_event = self.pipeline_event.pipelineFit utils.validate_fit_event_struct(fit_event, self) transformer = fit_event.model expected_transformer = modeldb_types.Transformer( -1, 'Pipeline', 'pipeline with pca + logistic') utils.is_equal_transformer(transformer, expected_transformer, self) df = fit_event.df expected_df = modeldb_types.DataFrame( -1, [ modeldb_types.DataFrameColumn('A', 'int64'), modeldb_types.DataFrameColumn('B', 'int64'), ], 100, 'digits-dataset') utils.is_equal_dataframe(df, expected_df, self) spec = fit_event.spec expected_spec = modeldb_types.TransformerSpec( -1, 'Pipeline', [ modeldb_types.HyperParameter( 'logistic__n_jobs', '1', 'int', FMIN, FMAX), modeldb_types.HyperParameter( 'pca__copy', 'True', 'bool', FMIN, FMAX), modeldb_types.HyperParameter( 'pca__n_components', 'None', 'NoneType', FMIN, FMAX), modeldb_types.HyperParameter( 'logistic__fit_intercept', 'True', 'bool', FMIN, FMAX), modeldb_types.HyperParameter( 'pca__whiten', 'False', 'bool', FMIN, FMAX), modeldb_types.HyperParameter( 'steps', "[('pca', PCA(copy=True, n_components=None, whiten=False)), ('logistic', LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False))]", 'list', FMIN, FMAX), modeldb_types.HyperParameter( 'logistic', 'LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)', 'LinearRegression', FMIN, FMAX), modeldb_types.HyperParameter( 'pca', 'PCA(copy=True, n_components=None, whiten=False)', 'PCA', FMIN, FMAX), modeldb_types.HyperParameter( 'logistic__normalize', 'False', 'bool', FMIN, FMAX), modeldb_types.HyperParameter( 'logistic__copy_X', 'True', 'bool', FMIN, FMAX) ], 'pipeline with pca + logistic') utils.is_equal_transformer_spec(spec, expected_spec, self) self.assertItemsEqual(fit_event.featureColumns, ['A', 'B'])
def test_new_dataframe(self): new_df = self.transform_event.newDataFrame utils.validate_dataframe_struct(new_df, self) new_df_column = new_df.schema[0] df_column = modeldb_types.DataFrameColumn('0', 'int64') expected_new_df = modeldb_types.DataFrame(-1, [df_column], 100, '') # fix columns utils.is_equal_dataframe(expected_new_df, new_df, self)