def test_dataset_object_by_name_queries_with_name_returns_model_object( self, mocked): service = DatasetService() actual = service.dataset_object_by_name(DATABASE_NAME, DATASET_NAME) expected = mocked.query.filter.return_value.one_or_none.return_value self.assertEqual(actual, expected)
def test_rename_dataset_by_name(self, mock_func): service = DatasetService() service.rename_dataset_by_name(DATASET_NEW_NAME, "database_name", "dataset_name") mock_func.assert_called_once_with("database_name", "dataset_name") mock_func.return_value.update.assert_called_once_with( _name=DATASET_NEW_NAME)
def test_create_dataset_should_create_model(self, mocked): service = DatasetService() service.create_dataset(DATASET_NAME, DATASET_URL, identifier_id=IDENTIFIER) mocked.assert_called_with( name=DATASET_NAME, file_url=DATASET_URL, identifier_id=IDENTIFIER, is_video=False, ) mocked.return_value.save.assert_called_once()
def test_dataset_by_name_queries_model_with_name_and_return_id( self, mocked): service = DatasetService() expected_output = 1 mocked.query.with_entities.return_value.\ filter.return_value.one.return_value = [expected_output] result = service.dataset_by_name(DATASET_NAME) mocked.query.with_entities.assert_called_with(mocked._id) mocked.query.with_entities.return_value.filter.assert_called_with( mocked._name == DATASET_NAME) self.assertEqual(result, expected_output)
def test_drop_dataset_by_name_should_raise_exception(self): with patch.object(DatasetService, "dataset_object_by_name") as mock_func: ERR_MSG = "err_message" mock_func.side_effect = Exception(ERR_MSG) service = DatasetService() with self.assertRaises(Exception) as cm: service.drop_dataset_by_name(DATABASE_NAME, DATASET_NAME) self.assertEqual( "Delete dataset failed for name {} with error {}".format( DATASET_NAME, ERR_MSG), str(cm.exception), ) mock_func.assert_called_once_with(DATABASE_NAME, DATASET_NAME) mock_func.return_value.delete.assert_not_called()
def test_dataset_by_id_should_query_model_with_id(self, mocked): service = DatasetService() service.dataset_by_id(DATASET_ID) mocked.query.filter.assert_called_with(mocked._id == DATASET_ID) mocked.query.filter.return_value.one.assert_called_once()
def test_drop_dataset_by_name(self, mock_func): service = DatasetService() service.drop_dataset_by_name(DATABASE_NAME, DATASET_NAME) mock_func.assert_called_once_with(DATABASE_NAME, DATASET_NAME) mock_func.return_value.delete.assert_called_once()
def __init__(self): self._dataset_service = DatasetService() self._column_service = DatasetColumnService() self._udf_service = UdfService() self._udf_io_service = UdfIOService()
class CatalogManager(object): _instance = None def __new__(cls): if cls._instance is None: cls._instance = super(CatalogManager, cls).__new__(cls) cls._instance._bootstrap_catalog() return cls._instance def __init__(self): self._dataset_service = DatasetService() self._column_service = DatasetColumnService() self._udf_service = UdfService() self._udf_io_service = UdfIOService() def reset(self): """ This method resets the state of the singleton instance. It should drop the catalog table and reinitialize all the member variables and services. """ self._shutdown_catalog() self._bootstrap_catalog() self.__init__() def _bootstrap_catalog(self): """Bootstraps catalog. This method runs all tasks required for using catalog. Currently, it includes only one task ie. initializing database. It creates the catalog database and tables if they do not exist. """ logger.info("Bootstrapping catalog") init_db() def _shutdown_catalog(self): """ This method is responsible for gracefully shutting the catalog manager. Currently, it includes dropping the catalog database """ logger.info("Shutting catalog") drop_db() def create_metadata( self, name: str, file_url: str, column_list: List[DataFrameColumn], identifier_column="id", is_video=False, ) -> DataFrameMetadata: """Creates metadata object Creates a metadata object and column objects and persists them in database. Sets the schema field of the metadata object. Args: name: name of the dataset/video to which this metdata corresponds file_url: #todo column_list: list of columns identifier_column (str): A unique identifier column for each row is_video (bool): True if the table is a video Returns: The persisted DataFrameMetadata object with the id field populated. """ metadata = self._dataset_service.create_dataset( name, file_url, identifier_id=identifier_column, is_video=is_video) for column in column_list: column.metadata_id = metadata.id column_list = self._column_service.create_column(column_list) metadata.schema = column_list return metadata def create_column_metadata( self, column_name: str, data_type: ColumnType, array_type: NdArrayType, dimensions: List[int], ) -> DataFrameColumn: """Create a dataframe column object this column. This function won't commit this object in the catalog database. If you want to commit it into catalog table call create_metadata with corresponding table_id Arguments: column_name {str} -- column name to be created data_type {ColumnType} -- type of column created array_type {NdArrayType} -- type of ndarray dimensions {List[int]} -- dimensions of the column created """ return DataFrameColumn( column_name, data_type, array_type=array_type, array_dimensions=dimensions, ) def get_dataset_metadata(self, database_name: str, dataset_name: str) -> DataFrameMetadata: """ Returns the Dataset metadata for the given dataset name Arguments: dataset_name (str): name of the dataset Returns: DataFrameMetadata """ metadata = self._dataset_service.dataset_object_by_name( database_name, dataset_name) if metadata is None: return None # we are forced to set schema every time metadata is fetched # ToDo: maybe keep schema as a part of persistent metadata object df_columns = self._column_service.columns_by_id_and_dataset_id( metadata.id, None) metadata.schema = df_columns return metadata def get_column_object(self, table_obj: DataFrameMetadata, col_name: str) -> DataFrameColumn: col_objs = self._column_service.columns_by_dataset_id_and_names( table_obj.id, column_names=[col_name]) if col_objs: return col_objs[0] else: return None def get_all_column_objects(self, table_obj: DataFrameMetadata): col_objs = self._column_service.get_dataset_columns(table_obj) return col_objs def udf_io( self, io_name: str, data_type: ColumnType, array_type: NdArrayType, dimensions: List[int], is_input: bool, ): """Constructs an in memory udf_io object with given info. This function won't commit this object in the catalog database. If you want to commit it into catalog call create_udf with corresponding udf_id and io list Arguments: name(str): io name to be created data_type(ColumnType): type of io created array_type(NdArrayType): type of array content dimensions(List[int]):dimensions of the io created is_input(bool): whether a input or output, if true it is an input """ return UdfIO( io_name, data_type, array_type=array_type, array_dimensions=dimensions, is_input=is_input, ) def create_udf( self, name: str, impl_file_path: str, type: str, udf_io_list: List[UdfIO], ) -> UdfMetadata: """Creates an udf metadata object and udf_io objects and persists them in database. Arguments: name(str): name of the udf to which this metdata corresponds impl_file_path(str): implementation path of the udf, relative to eva/udf type(str): what kind of udf operator like classification, detection etc udf_io_list(List[UdfIO]): input/output info of this udf Returns: The persisted UdfMetadata object with the id field populated. """ metadata = self._udf_service.create_udf(name, impl_file_path, type) for udf_io in udf_io_list: udf_io.udf_id = metadata.id self._udf_io_service.add_udf_io(udf_io_list) return metadata def get_udf_by_name(self, name: str) -> UdfMetadata: """ Get the UDF information based on name. Arguments: name (str): name of the UDF Returns: UdfMetadata object """ return self._udf_service.udf_by_name(name) def get_udf_inputs(self, udf_obj: UdfMetadata) -> List[UdfIO]: if not isinstance(udf_obj, UdfMetadata): raise ValueError("""Expected UdfMetadata object, got {}""".format(type(udf_obj))) return self._udf_io_service.get_inputs_by_udf_id(udf_obj.id) def get_udf_outputs(self, udf_obj: UdfMetadata) -> List[UdfIO]: if not isinstance(udf_obj, UdfMetadata): raise ValueError("""Expected UdfMetadata object, got {}""".format(type(udf_obj))) return self._udf_io_service.get_outputs_by_udf_id(udf_obj.id) def drop_dataset_metadata(self, database_name: str, table_name: str) -> bool: """ This method deletes the table along with its columns from df_metadata and df_columns respectively Arguments: table_name: table name to be deleted. Returns: True if successfully deleted else False """ return self._dataset_service.drop_dataset_by_name( database_name, table_name) def drop_udf(self, udf_name: str) -> bool: """ This method drops the udf entry and corresponding udf_io from the catalog Arguments: udf_name: udf name to be dropped. Returns: True if successfully deleted else False """ return self._udf_service.drop_udf_by_name(udf_name) def rename_table(self, new_name: TableInfo, curr_table: TableInfo): return self._dataset_service.rename_dataset_by_name( new_name.table_name, curr_table.database_name, curr_table.table_name, ) def check_table_exists(self, database_name: str, table_name: str): metadata = self._dataset_service.dataset_object_by_name( database_name, table_name) if metadata is None: return False else: return True def get_all_udf_entries(self): return self._udf_service.get_all_udfs()