예제 #1
0
def create_video_metadata(name: str) -> DataFrameMetadata:
    """Create video metadata object.
        We have predefined columns for such a object
        id:  the frame id
        data: the frame data

    Arguments:
        name (str): name of the metadata to be added to the catalog

    Returns:
        DataFrameMetadata:  corresponding metadata for the input table info
    """
    catalog = CatalogManager()
    columns = [ColumnDefinition('id', ColumnType.INTEGER, None,
                                [], ColConstraintInfo(unique=True))]
    # the ndarray dimensions are set as None. We need to fix this as we
    # cannot assume. Either ask the user to provide this with load or
    # we infer this from the provided video.
    columns.append(
        ColumnDefinition(
            'data', ColumnType.NDARRAY, NdArrayType.UINT8, [None, None, None]
        )
    )
    col_metadata = create_column_metadata(columns)
    uri = str(generate_file_path(name))
    metadata = catalog.create_metadata(
        name, uri, col_metadata, identifier_column='id', is_video=True)
    return metadata
예제 #2
0
    def exec(self):
        """Drop table executor"""
        catalog_manager = CatalogManager()
        if len(self.node.table_refs) > 1:
            logger.exception('Drop supports only single table')
        table_ref = self.node.table_refs[0]

        if not catalog_manager.check_table_exists(
                table_ref.table.database_name, table_ref.table.table_name):
            err_msg = "Table: {} does not exsits".format(table_ref)
            if self.node.if_exists:
                logger.warn(err_msg)
            else:
                logger.exception(err_msg)

        if table_ref.table.table_obj.is_video:
            VideoStorageEngine.drop(table=table_ref.table.table_obj)
        else:
            StorageEngine.drop(table=table_ref.table.table_obj)

        success = catalog_manager.drop_dataset_metadata(
            table_ref.table.database_name, table_ref.table.table_name)

        if not success:
            err_msg = "Failed to drop {}".format(table_ref)
            logger.exception(err_msg)

        yield Batch(
            pd.DataFrame(
                {
                    "Table Successfully dropped: {}".format(
                        table_ref.table.table_name)
                },
                index=[0],
            ))
예제 #3
0
 def test_catalog_manager_reset(self, mock_bootstrap, mock_shutdown):
     x = CatalogManager()
     mock_init = MagicMock()
     with mock.patch.object(CatalogManager, '__init__', mock_init):
         x.reset()
         mock_init.assert_called_once_with()
         mock_bootstrap.assert_called_once_with()
         mock_shutdown.assert_called_once_with()
예제 #4
0
 def test_create_udf(self, udfio_mock, udf_mock):
     catalog = CatalogManager()
     udf_io_list = [MagicMock()]
     actual = catalog.create_udf('udf', 'sample.py', 'classification',
                                 udf_io_list)
     udfio_mock.return_value.add_udf_io.assert_called_with(udf_io_list)
     udf_mock.return_value.create_udf.assert_called_with(
         'udf', 'sample.py', 'classification')
     self.assertEqual(actual, udf_mock.return_value.create_udf.return_value)
예제 #5
0
    def test_get_udf_inputs(self, udf_mock):
        mock_func = udf_mock.return_value.get_inputs_by_udf_id
        udf_obj = MagicMock(spec=UdfMetadata)
        CatalogManager().get_udf_inputs(udf_obj)
        mock_func.assert_called_once_with(udf_obj.id)

        # should raise error
        with self.assertRaises(ValueError):
            CatalogManager().get_udf_inputs(MagicMock())
예제 #6
0
 def test_create_udf_io_object(self, udfio_mock):
     catalog = CatalogManager()
     actual = catalog.udf_io('name', ColumnType.NDARRAY, NdArrayType.UINT8,
                             [2, 3, 4], True)
     udfio_mock.assert_called_with('name',
                                   ColumnType.NDARRAY,
                                   array_type=NdArrayType.UINT8,
                                   array_dimensions=[2, 3, 4],
                                   is_input=True)
     self.assertEqual(actual, udfio_mock.return_value)
예제 #7
0
    def exec(self):
        """Create udf executor

        Calls the catalog to create udf metadata.
        """
        catalog_manager = CatalogManager()
        show_entries = []
        if self.node.show_type is ShowType.UDFS:
            udfs = catalog_manager.get_all_udf_entries()
            for udf in udfs:
                show_entries.append(udf.display_format())

        yield Batch(pd.DataFrame(show_entries))
예제 #8
0
    def test_get_dataset_metadata_when_table_doesnot_exists(
            self, dcs_mock, ds_mock, initdb_mock):
        catalog = CatalogManager()
        dataset_name = "name"

        database_name = "database"
        metadata_obj = None

        ds_mock.return_value.dataset_object_by_name.return_value = metadata_obj

        actual = catalog.get_dataset_metadata(database_name, dataset_name)
        ds_mock.return_value.dataset_object_by_name.assert_called_with(
            database_name, dataset_name)
        dcs_mock.return_value.columns_by_id_and_dataset_id.assert_not_called()
        self.assertEqual(actual, metadata_obj)
예제 #9
0
    def exec(self):
        """rename table executor

        Calls the catalog to modified metadata corresponding to the table.
        """
        CatalogManager().rename_table(self.node.new_name,
                                      self.node.old_table.table)
예제 #10
0
 def setUpClass(cls):
     CatalogManager().reset()
     copy_sample_video_to_prefix()
     query = """LOAD DATA INFILE 'ua_detrac.mp4'
                INTO MyVideo;"""
     execute_query_fetch_all(query)
     load_inbuilt_udfs()
예제 #11
0
    def exec(self):
        """Create udf executor

        Calls the catalog to create udf metadata.
        """
        catalog_manager = CatalogManager()
        if (self.node.if_not_exists):
            # check catalog if it already has this udf entry
            if catalog_manager.get_udf_by_name(self.node.name):
                return
        io_list = []
        io_list.extend(self.node.inputs)
        io_list.extend(self.node.outputs)
        impl_path = self.node.impl_path.absolute().as_posix()
        catalog_manager.create_udf(
            self.node.name, impl_path, self.node.udf_type,
            io_list)
예제 #12
0
 def setUpClass(cls):
     CatalogManager().reset()
     create_sample_video(NUM_FRAMES)
     load_query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;"""
     execute_query_fetch_all(load_query)
     load_inbuilt_udfs()
     cls.table1 = create_table("table1", 100, 3)
     cls.table2 = create_table("table2", 500, 3)
     cls.table3 = create_table("table3", 1000, 3)
예제 #13
0
    def test_drop_plan(self):
        dummy_info = TableInfo('dummy')
        dummy_table = TableRef(dummy_info)

        CatalogManager().reset()
        dummy_plan_node = DropPlan([dummy_table], False)

        self.assertEqual(dummy_plan_node.opr_type, PlanOprType.DROP)
        self.assertEqual(dummy_plan_node.table_refs[0].table.table_name,
                         "dummy")
예제 #14
0
    def test_rename_plan(self):
        dummy_info = TableInfo("old")
        dummy_old = TableRef(dummy_info)
        dummy_new = TableInfo("new")

        CatalogManager().reset()
        dummy_plan_node = RenamePlan(dummy_old, dummy_new)
        self.assertEqual(dummy_plan_node.opr_type, PlanOprType.RENAME)
        self.assertEqual(dummy_plan_node.old_table.table.table_name, "old")
        self.assertEqual(dummy_plan_node.new_name.table_name, "new")
예제 #15
0
def create_table_metadata(table_ref: TableRef,
                          columns: List[ColumnDefinition])\
        -> DataFrameMetadata:
    table_name = table_ref.table.table_name
    column_metadata_list = create_column_metadata(columns)
    file_url = str(generate_file_path(table_name))
    metadata = CatalogManager().create_metadata(table_name,
                                                file_url,
                                                column_metadata_list)
    return metadata
예제 #16
0
def bind_table_info(table_info: TableInfo) -> DataFrameMetadata:
    """
    Uses catalog to bind the dataset information for given video string.

    Arguments:
         video_info (TableInfo): video information obtained in SQL query

    Returns:
        DataFrameMetadata  -  corresponding metadata for the input table info
    """
    catalog = CatalogManager()
    obj = catalog.get_dataset_metadata(table_info.database_name,
                                       table_info.table_name)
    if obj:
        table_info.table_obj = obj
    else:
        error = '{} does not exists. Create the table using \
                        CREATE TABLE.'.format(table_info.table_name)
        logger.error(error)
        raise RuntimeError(error)
예제 #17
0
    def test_get_dataset_metadata_when_table_exists(self, dcs_mock, ds_mock,
                                                    initdb_mock):
        catalog = CatalogManager()
        dataset_name = "name"

        database_name = "database"
        schema = [1, 2, 3]
        id = 1
        metadata_obj = MagicMock(id=id, schema=None)
        ds_mock.return_value.dataset_object_by_name.return_value = metadata_obj
        dcs_mock.return_value. \
            columns_by_id_and_dataset_id.return_value = schema

        actual = catalog.get_dataset_metadata(database_name, dataset_name)
        ds_mock.return_value.dataset_object_by_name.assert_called_with(
            database_name, dataset_name)
        dcs_mock.return_value.columns_by_id_and_dataset_id.assert_called_with(
            id, None)
        self.assertEqual(actual.id, id)
        self.assertEqual(actual.schema, schema)
예제 #18
0
 def setUpClass(cls):
     # reset the catalog manager before running each test
     CatalogManager().reset()
     create_sample_video()
     copy_sample_video_to_prefix()
     load_query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;"""
     execute_query_fetch_all(load_query)
     query = """LOAD DATA INFILE 'ua_detrac.mp4'
                INTO UATRAC;"""
     execute_query_fetch_all(query)
     load_inbuilt_udfs()
예제 #19
0
def handle_if_not_exists(table_ref: TableRef, if_not_exist=False):
    if CatalogManager().check_table_exists(table_ref.table.database_name,
                                           table_ref.table.table_name):
        err_msg = 'Table: {} already exsits'.format(table_ref)
        if if_not_exist:
            logger.warn(err_msg)
            return True
        else:
            logger.error(err_msg)
            raise RuntimeError(err_msg)
    else:
        return False
예제 #20
0
    def test_create_metadata_should_create_dataset_and_columns(
            self, dcs_mock, ds_mock, initdb_mock):
        catalog = CatalogManager()
        file_url = "file1"
        dataset_name = "name"

        columns = [(DataFrameColumn("c1", ColumnType.INTEGER))]
        actual = catalog.create_metadata(dataset_name, file_url, columns)
        ds_mock.return_value.create_dataset.assert_called_with(
            dataset_name, file_url, identifier_id='id', is_video=False)
        for column in columns:
            column.metadata_id = \
                ds_mock.return_value.create_dataset.return_value.id

        dcs_mock.return_value.create_column.assert_called_with(columns)

        expected = ds_mock.return_value.create_dataset.return_value
        expected.schema = \
            dcs_mock.return_value.create_column.return_value

        self.assertEqual(actual, expected)
예제 #21
0
    def setUp(self):
        CatalogManager().reset()
        create_sample_video(NUM_FRAMES)
        load_query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;"""
        execute_query_fetch_all(load_query)

        create_udf_query = """CREATE UDF DummyObjectDetector
                  INPUT  (Frame_Array NDARRAY UINT8(3, 256, 256))
                  OUTPUT (label NDARRAY STR(10))
                  TYPE  Classification
                  IMPL  'test/util.py';
        """
        execute_query_fetch_all(create_udf_query)
예제 #22
0
    def test_create_plan(self):
        dummy_info = TableInfo('dummy')
        dummy_table = TableRef(dummy_info)

        CatalogManager().reset()
        columns = [
            DataFrameColumn('id', ColumnType.INTEGER),
            DataFrameColumn('name', ColumnType.TEXT, array_dimensions=[50])
        ]
        dummy_plan_node = CreatePlan(dummy_table, columns, False)
        self.assertEqual(dummy_plan_node.opr_type, PlanOprType.CREATE)
        self.assertEqual(dummy_plan_node.if_not_exists, False)
        self.assertEqual(dummy_plan_node.table_ref.table.table_name, "dummy")
        self.assertEqual(dummy_plan_node.column_list[0].name, "id")
        self.assertEqual(dummy_plan_node.column_list[1].name, "name")
예제 #23
0
def column_definition_to_udf_io(col_list: List[ColumnDefinition],
                                is_input: bool):
    """Create the UdfIO object fro each column definition provided

    Arguments:
        col_list(List[ColumnDefinition]): parsed input/output definitions
        is_input(bool): true if input else false
    """
    if isinstance(col_list, ColumnDefinition):
        col_list = [col_list]

    result_list = []
    for col in col_list:
        if col is None:
            logger.error("Empty column definition while creating udf io")
            result_list.append(col)
        result_list.append(CatalogManager().udf_io(col.name,
                                                   col.type,
                                                   array_type=col.array_type,
                                                   dimensions=col.dimension,
                                                   is_input=is_input))
    return result_list
예제 #24
0
    def test_should_drop_table(self):
        catalog_manager = CatalogManager()
        query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;"""
        execute_query_fetch_all(query)

        metadata_obj = catalog_manager.get_dataset_metadata(None, "MyVideo")
        video_dir = metadata_obj.file_url
        self.assertFalse(metadata_obj is None)
        column_objects = catalog_manager.get_all_column_objects(metadata_obj)
        self.assertEqual(len(column_objects), 2)
        self.assertTrue(Path(video_dir).exists())
        drop_query = """DROP TABLE MyVideo;"""
        execute_query_fetch_all(drop_query)
        self.assertTrue(
            catalog_manager.get_dataset_metadata(None, "MyVideo") is None)
        column_objects = catalog_manager.get_all_column_objects(metadata_obj)
        self.assertEqual(len(column_objects), 0)
        self.assertFalse(Path(video_dir).exists())
예제 #25
0
def create_column_metadata(col_list: List[ColumnDefinition]):
    """Create column metadata for the input parsed column list. This function
    will not commit the provided column into catalog table.
    Will only return in memory list of ColumnDataframe objects

    Arguments:
        col_list {List[ColumnDefinition]} -- parsed col list to be created
    """
    if isinstance(col_list, ColumnDefinition):
        col_list = [col_list]

    result_list = []
    for col in col_list:
        if col is None:
            logger.error(
                "Empty column while creating column metadata")
            result_list.append(col)
        result_list.append(
            CatalogManager().create_column_metadata(
                col.name, col.type, col.array_type, col.dimension
            )
        )

    return result_list
예제 #26
0
    def test_should_rename_table(self):
        catalog_manager = CatalogManager()
        query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;"""
        execute_query_fetch_all(query)

        self.assertTrue(
            catalog_manager.get_dataset_metadata(None, "MyVideo") is not None
        )
        self.assertTrue(
            catalog_manager.get_dataset_metadata(None, "MyVideo1") is None
        )

        rename_query = """RENAME TABLE MyVideo TO MyVideo1;"""
        execute_query_fetch_all(rename_query)

        self.assertTrue(
            catalog_manager.get_dataset_metadata(None, "MyVideo") is None
        )
        self.assertTrue(
            catalog_manager.get_dataset_metadata(None, "MyVideo1") is not None
        )
예제 #27
0
 def setUp(self):
     CatalogManager().reset()
     queries = [Fastrcnn_udf_query, ArrayCount_udf_query]
     for query in queries:
         execute_query_fetch_all(query)
예제 #28
0
 def __init__(self, binder_context: StatementBinderContext):
     self._binder_context = binder_context
     self._catalog = CatalogManager()
예제 #29
0
class StatementBinder:
    def __init__(self, binder_context: StatementBinderContext):
        self._binder_context = binder_context
        self._catalog = CatalogManager()

    @singledispatchmethod
    def bind(self, node):
        raise NotImplementedError(f'Cannot bind {type(node)}')

    @bind.register(AbstractStatement)
    def _bind_abstract_statement(self, node: AbstractStatement):
        pass

    @bind.register(AbstractExpression)
    def _bind_abstract_expr(self, node: AbstractExpression):
        for child in node.children:
            self.bind(child)

    @bind.register(SelectStatement)
    def _bind_select_statement(self, node: SelectStatement):
        self.bind(node.from_table)
        if node.where_clause:
            self.bind(node.where_clause)
        if node.target_list:
            # SELECT * support
            if len(node.target_list) == 1 and \
                    isinstance(node.target_list[0], TupleValueExpression) and \
                    node.target_list[0].col_name == '*':
                node.target_list = extend_star(self._binder_context)
            for expr in node.target_list:
                self.bind(expr)
        if node.orderby_list:
            for expr in node.orderby_list:
                self.bind(expr[0])
        if node.union_link:
            current_context = self._binder_context
            self._binder_context = StatementBinderContext()
            self.bind(node.union_link)
            self._binder_context = current_context

    @bind.register(CreateMaterializedViewStatement)
    def _bind_create_mat_statement(self,
                                   node: CreateMaterializedViewStatement):
        self.bind(node.query)
        # Todo Verify if the number projected columns matches table

    @bind.register(LoadDataStatement)
    def _bind_load_data_statement(self, node: LoadDataStatement):
        table_ref = node.table_ref
        if node.file_options['file_format'] == FileFormatType.VIDEO:
            # Create a new metadata object
            create_video_metadata(table_ref.table.table_name)

        self.bind(table_ref)

        table_ref_obj = table_ref.table.table_obj
        if table_ref_obj is None:
            error = '{} does not exists. Create the table using \
                            CREATE TABLE.'.format(table_ref.table.table_name)
            logger.error(error)
            raise RuntimeError(error)

        # if query had columns specified, we just copy them
        if node.column_list is not None:
            column_list = node.column_list

        # else we curate the column list from the metadata
        else:
            column_list = []
            for column in table_ref_obj.columns:
                column_list.append(
                    TupleValueExpression(
                        col_name=column.name,
                        table_alias=table_ref_obj.name.lower(),
                        col_object=column))

        # bind the columns
        for expr in column_list:
            self.bind(expr)

        node.column_list = column_list

    @bind.register(DropTableStatement)
    def _bind_drop_table_statement(self, node: DropTableStatement):
        for table in node.table_refs:
            self.bind(table)

    @bind.register(TableRef)
    def _bind_tableref(self, node: TableRef):
        if node.is_table_atom():
            # Table
            self._binder_context.add_table_alias(node.alias,
                                                 node.table.table_name)
            bind_table_info(node.table)
        elif node.is_select():
            current_context = self._binder_context
            self._binder_context = StatementBinderContext()
            self.bind(node.select_statement)
            self._binder_context = current_context
            self._binder_context.add_derived_table_alias(
                node.alias, node.select_statement.target_list)
        elif node.is_join():
            self.bind(node.join_node.left)
            self.bind(node.join_node.right)
            if node.join_node.predicate:
                self.bind(node.join_node.predicate)
        elif node.is_func_expr():
            self.bind(node.func_expr)
            self._binder_context.add_derived_table_alias(
                node.func_expr.alias, [node.func_expr])
        else:
            raise ValueError(f'Unsupported node {type(node)}')

    @bind.register(TupleValueExpression)
    def _bind_tuple_expr(self, node: TupleValueExpression):
        table_alias, col_obj = self._binder_context.get_binded_column(
            node.col_name, node.table_alias)
        node.col_alias = '{}.{}'.format(table_alias, node.col_name.lower())
        node.col_object = col_obj

    @bind.register(FunctionExpression)
    def _bind_func_expr(self, node: FunctionExpression):
        # bind all the children
        for child in node.children:
            self.bind(child)

        node.alias = node.alias or node.name.lower()
        udf_obj = self._catalog.get_udf_by_name(node.name)
        assert udf_obj is not None, (
            'UDF with name {} does not exist in the catalog. Please '
            'create the UDF using CREATE UDF command'.format(node.name))

        output_objs = self._catalog.get_udf_outputs(udf_obj)
        if node.output:
            for obj in output_objs:
                if obj.name.lower() == node.output:
                    node.output_col_aliases.append('{}.{}'.format(
                        node.alias, obj.name.lower()))
                    node.output_objs = [obj]
            assert len(node.output_col_aliases) == 1, (
                'Duplicate columns {} in UDF {}'.format(
                    node.output, udf_obj.name))
        else:
            node.output_col_aliases = [
                '{}.{}'.format(node.alias, obj.name.lower())
                for obj in output_objs
            ]
            node.output_objs = output_objs

        node.function = path_to_class(udf_obj.impl_file_path, udf_obj.name)()
예제 #30
0
 def setUp(self):
     # reset the catalog manager before running each test
     CatalogManager().reset()
     create_sample_video()