Ejemplo n.º 1
0
    def test_should_load_and_select_using_udf_video(self):
        # Equality test
        select_query = "SELECT id,DummyObjectDetector(data) FROM MyVideo \
            WHERE DummyObjectDetector(data).label = ['person'] ORDER BY id;"

        actual_batch = execute_query_fetch_all(select_query)
        expected = [{
            'myvideo.id': i * 2,
            'dummyobjectdetector.label': ['person']
        } for i in range(NUM_FRAMES // 2)]
        expected_batch = Batch(frames=pd.DataFrame(expected))
        self.assertEqual(actual_batch, expected_batch)

        # Contain test
        select_query = "SELECT id, DummyObjectDetector(data) FROM MyVideo \
            WHERE DummyObjectDetector(data).label <@ ['person'] ORDER BY id;"

        actual_batch = execute_query_fetch_all(select_query)
        self.assertEqual(actual_batch, expected_batch)

        select_query = "SELECT id FROM MyVideo WHERE \
            DummyMultiObjectDetector(data).labels @> ['person'] ORDER BY id;"

        actual_batch = execute_query_fetch_all(select_query)
        expected = [{'myvideo.id': i} for i in range(0, NUM_FRAMES, 3)]
        expected_batch = Batch(frames=pd.DataFrame(expected))
        self.assertEqual(actual_batch, expected_batch)
Ejemplo n.º 2
0
    def test_select_and_where_video_in_table(self):
        select_query = "SELECT id,data FROM MyVideo WHERE id = 5;"
        actual_batch = execute_query_fetch_all(select_query)
        expected_batch = list(create_dummy_batches(filters=[5]))[0]
        self.assertEqual(actual_batch, expected_batch)

        select_query = "SELECT data FROM MyVideo WHERE id = 5;"
        actual_batch = execute_query_fetch_all(select_query)
        expected_rows = [{
            "myvideo.data":
            np.array(np.ones((2, 2, 3)) * float(5 + 1) * 25, dtype=np.uint8)
        }]
        expected_batch = Batch(frames=pd.DataFrame(expected_rows))
        self.assertEqual(actual_batch, expected_batch)

        select_query = "SELECT id, data FROM MyVideo WHERE id >= 2;"
        actual_batch = execute_query_fetch_all(select_query)
        actual_batch.sort()
        expected_batch = list(
            create_dummy_batches(filters=range(2, NUM_FRAMES)))[0]
        self.assertEqual(actual_batch, expected_batch)

        select_query = "SELECT id, data FROM MyVideo WHERE id >= 2 AND id < 5;"
        actual_batch = execute_query_fetch_all(select_query)
        actual_batch.sort()
        expected_batch = list(create_dummy_batches(filters=range(2, 5)))[0]

        self.assertEqual(actual_batch, expected_batch)
Ejemplo n.º 3
0
    def test_should_mat_view_to_the_same_table(self):
        materialized_query = """CREATE MATERIALIZED VIEW IF NOT EXISTS
            dummy_view2 (id, label)
            AS SELECT id, DummyObjectDetector(data).label FROM MyVideo
            WHERE id < 5;
        """
        execute_query_fetch_all(materialized_query)

        materialized_query = """CREATE MATERIALIZED VIEW IF NOT EXISTS
            dummy_view2 (id, label)
            AS SELECT id, DummyObjectDetector(data).label FROM MyVideo
            WHERE id >= 5;
        """
        execute_query_fetch_all(materialized_query)

        select_query = 'SELECT id, label FROM dummy_view2;'
        actual_batch = execute_query_fetch_all(select_query)
        actual_batch.sort()

        labels = DummyObjectDetector().labels
        expected = [{
            'dummy_view2.id': i,
            'dummy_view2.label': labels[1 + i % 2]
        } for i in range(5)]
        expected_batch = Batch(frames=pd.DataFrame(expected))
        self.assertEqual(actual_batch, expected_batch)
Ejemplo n.º 4
0
 def setUpClass(cls):
     CatalogManager().reset()
     copy_sample_video_to_prefix()
     query = """LOAD DATA INFILE 'ua_detrac.mp4'
                INTO MyVideo;"""
     execute_query_fetch_all(query)
     load_inbuilt_udfs()
Ejemplo n.º 5
0
 def test_should_upload_video_to_location(self):
     query = """UPLOAD PATH 'dummy.avi' BLOB "b'AAAA'";"""
     execute_query_fetch_all(query)
     expected_blob = "b'AAAA'"
     with open(os.path.join(PATH_PREFIX, 'dummy.avi'), 'rb') as f:
         bytes_read = f.read()
         actual_blob = str(base64.b64encode(bytes_read))
     self.assertEqual(actual_blob, expected_blob)
Ejemplo n.º 6
0
 def setUpClass(cls):
     CatalogManager().reset()
     create_sample_video(NUM_FRAMES)
     load_query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;"""
     execute_query_fetch_all(load_query)
     load_inbuilt_udfs()
     cls.table1 = create_table("table1", 100, 3)
     cls.table2 = create_table("table2", 500, 3)
     cls.table3 = create_table("table3", 1000, 3)
Ejemplo n.º 7
0
    def test_should_select_star_in_table(self):
        select_query = "SELECT * FROM MyVideo;"
        actual_batch = execute_query_fetch_all(select_query)
        actual_batch.sort()
        expected_batch = list(create_dummy_batches())[0]
        self.assertEqual(actual_batch, expected_batch)

        select_query = "SELECT * FROM MyVideo WHERE id = 5;"
        actual_batch = execute_query_fetch_all(select_query)
        expected_batch = list(create_dummy_batches(filters=[5]))[0]
        self.assertEqual(actual_batch, expected_batch)
Ejemplo n.º 8
0
 def setUpClass(cls):
     # reset the catalog manager before running each test
     CatalogManager().reset()
     create_sample_video()
     copy_sample_video_to_prefix()
     load_query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;"""
     execute_query_fetch_all(load_query)
     query = """LOAD DATA INFILE 'ua_detrac.mp4'
                INTO UATRAC;"""
     execute_query_fetch_all(query)
     load_inbuilt_udfs()
Ejemplo n.º 9
0
    def test_should_load_video_in_table(self):
        query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo
                   WITH FORMAT VIDEO;"""
        execute_query_fetch_all(query)

        select_query = """SELECT id, data FROM MyVideo;"""

        actual_batch = execute_query_fetch_all(select_query)
        actual_batch.sort()
        expected_batch = list(create_dummy_batches())[0]
        expected_batch.modify_column_alias('myvideo')
        self.assertEqual(actual_batch, expected_batch)
Ejemplo n.º 10
0
    def test_should_load_and_select_using_udf_video(self):
        # Equality test
        select_query = "SELECT id,DummyObjectDetector(data) FROM MyVideo \
            WHERE DummyObjectDetector(data).label = ['person'] ORDER BY id;"

        actual_batch = execute_query_fetch_all(select_query)
        expected = [{
            'myvideo.id': i * 2,
            'dummyobjectdetector.label': np.array(['person'])
        } for i in range(NUM_FRAMES // 2)]
        expected_batch = Batch(frames=pd.DataFrame(expected))
        self.assertEqual(actual_batch, expected_batch)

        # Contain test
        select_query = "SELECT id,DummyObjectDetector(data) FROM MyVideo \
            WHERE DummyObjectDetector(data).label @> ['person'] ORDER BY id;"

        actual_batch = execute_query_fetch_all(select_query)
        self.assertEqual(actual_batch, expected_batch)

        # Multi element contain test

        select_query = "SELECT id,DummyObjectDetector(data) FROM MyVideo \
            WHERE DummyObjectDetector(data).label <@ ['person', 'bicycle'] \
            ORDER BY id;"

        actual_batch = execute_query_fetch_all(select_query)
        expected = [{
            'myvideo.id': i * 2,
            'dummyobjectdetector.label': np.array(['person'])
        } for i in range(NUM_FRAMES // 2)]
        expected += [{
            'myvideo.id': i,
            'dummyobjectdetector.label': np.array(['bicycle'])
        } for i in range(NUM_FRAMES) if i % 2 + 1 == 2]
        expected_batch = Batch(frames=pd.DataFrame(expected))
        expected_batch.sort()
        self.assertEqual(actual_batch, expected_batch)

        nested_select_query = """SELECT id, data FROM
            (SELECT id, data, DummyObjectDetector(data) FROM MyVideo
                WHERE id >= 2
            ) AS T
            WHERE ['person'] <@ label;
            """
        actual_batch = execute_query_fetch_all(nested_select_query)
        actual_batch.sort()
        expected_batch = list(
            create_dummy_batches(
                filters=[i for i in range(2, NUM_FRAMES) if i % 2 == 0]))[0]
        expected_batch.modify_column_alias('T')
        self.assertEqual(actual_batch, expected_batch)
Ejemplo n.º 11
0
    def test_sort_on_nonprojected_column(self):
        """This tests doing an order by on a column
        that is not projected. The orderby_executor currently
        catches the KeyError, passes, and returns the untouched
        data
        """
        select_query = "SELECT data FROM MyVideo ORDER BY id;"
        actual_batch = execute_query_fetch_all(select_query)

        select_query = "SELECT data FROM MyVideo"
        expected_batch = execute_query_fetch_all(select_query)

        self.assertEqual(actual_batch.batch_size, expected_batch.batch_size)
Ejemplo n.º 12
0
    def setUp(self):
        CatalogManager().reset()
        create_sample_video(NUM_FRAMES)
        load_query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;"""
        execute_query_fetch_all(load_query)

        create_udf_query = """CREATE UDF DummyObjectDetector
                  INPUT  (Frame_Array NDARRAY UINT8(3, 256, 256))
                  OUTPUT (label NDARRAY STR(10))
                  TYPE  Classification
                  IMPL  'test/util.py';
        """
        execute_query_fetch_all(create_udf_query)
Ejemplo n.º 13
0
    def test_should_load_and_select_in_table(self):
        select_query = "SELECT id FROM MyVideo;"
        actual_batch = execute_query_fetch_all(select_query)
        actual_batch.sort()
        expected_rows = [{"myvideo.id": i} for i in range(NUM_FRAMES)]
        expected_batch = Batch(frames=pd.DataFrame(expected_rows))
        self.assertEqual(actual_batch, expected_batch)

        select_query = "SELECT id,data FROM MyVideo;"
        actual_batch = execute_query_fetch_all(select_query)
        actual_batch.sort()
        expected_batch = list(create_dummy_batches())
        self.assertEqual([actual_batch], expected_batch)
Ejemplo n.º 14
0
    def test_should_select_star_in_nested_query(self):
        select_query = """SELECT * FROM (SELECT * FROM MyVideo) AS T;"""
        actual_batch = execute_query_fetch_all(select_query)
        actual_batch.sort()
        expected_batch = list(create_dummy_batches())[0]
        expected_batch.modify_column_alias("T")
        self.assertEqual(actual_batch, expected_batch)

        select_query = """SELECT * FROM (SELECT id FROM MyVideo) AS T;"""
        actual_batch = execute_query_fetch_all(select_query)
        actual_batch.sort()
        expected_rows = [{"T.id": i} for i in range(NUM_FRAMES)]
        expected_batch = Batch(frames=pd.DataFrame(expected_rows))
        self.assertEqual(actual_batch, expected_batch)
Ejemplo n.º 15
0
    def test_should_load_video_in_table(self):
        query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;"""
        execute_query_fetch_all(query)

        insert_query = """ INSERT INTO MyVideo (id, data) VALUES (40,
                            [[[40, 40, 40] , [40, 40, 40]],
                            [[40, 40, 40], [40, 40, 40]]]);"""
        execute_query_fetch_all(insert_query)

        insert_query_2 = """ INSERT INTO MyVideo (id, data) VALUES (41,
                            [[[41, 41, 41] , [41, 41, 41]],
                            [[41, 41, 41], [41, 41, 41]]]);"""
        execute_query_fetch_all(insert_query_2)

        query = 'SELECT id, data FROM MyVideo WHERE id = 40'
        batch = execute_query_fetch_all(query)
        self.assertIsNone(np.testing.assert_array_equal(
            batch.frames['data'][0],
            np.array([[[40, 40, 40], [40, 40, 40]],
                      [[40, 40, 40], [40, 40, 40]]])))

        query = 'SELECT id, data FROM MyVideo WHERE id = 41;'
        batch = execute_query_fetch_all(query)
        self.assertIsNone(np.testing.assert_array_equal(
            batch.frames['data'][0],
            np.array([[[41, 41, 41], [41, 41, 41]],
                      [[41, 41, 41], [41, 41, 41]]])))
Ejemplo n.º 16
0
    def test_should_load_and_select_real_video_in_table(self):
        query = """LOAD DATA INFILE 'data/ua_detrac/ua_detrac.mp4'
                   INTO UADETRAC;"""
        execute_query_fetch_all(query)

        select_query = "SELECT * FROM UADETRAC;"
        actual_batch = execute_query_fetch_all(select_query)
        actual_batch.sort()
        video_reader = OpenCVReader("data/ua_detrac/ua_detrac.mp4",
                                    batch_mem_size=30000000)
        expected_batch = Batch(frames=pd.DataFrame())
        for batch in video_reader.read():
            expected_batch += batch
        expected_batch.modify_column_alias("uadetrac")
        self.assertEqual(actual_batch, expected_batch)
Ejemplo n.º 17
0
    def test_should_load_and_sort_in_table(self):
        select_query = "SELECT data, id FROM MyVideo ORDER BY id;"
        actual_batch = execute_query_fetch_all(select_query)
        expected_rows = [{
            "myvideo.id":
            i,
            "myvideo.data":
            np.array(np.ones((2, 2, 3)) * float(i + 1) * 25, dtype=np.uint8),
        } for i in range(NUM_FRAMES)]
        expected_batch = Batch(frames=pd.DataFrame(expected_rows))
        self.assertEqual(actual_batch, expected_batch)

        select_query = "SELECT data, id FROM MyVideo ORDER BY id DESC;"
        actual_batch = execute_query_fetch_all(select_query)
        expected_batch.reverse()
        self.assertEqual(actual_batch, expected_batch)
Ejemplo n.º 18
0
 def test_hash_join_with_multiple_tables(self):
     select_query = """SELECT * FROM table1 JOIN table2
                       ON table1.a0 = table2.a0 JOIN table3
                       ON table3.a1 = table1.a1 WHERE table1.a2 > 50;"""
     actual_batch = execute_query_fetch_all(select_query)
     tmp = pd.merge(
         self.table1,
         self.table2,
         left_on=["table1.a0"],
         right_on=["table2.a0"],
         how="inner",
     )
     expected = pd.merge(
         tmp,
         self.table3,
         left_on=["table1.a1"],
         right_on=["table3.a1"],
         how="inner",
     )
     expected = expected.where(expected["table1.a2"] > 50)
     if len(expected):
         expected_batch = Batch(expected)
         self.assertEqual(
             expected_batch.sort_orderby(["table1.a0"]),
             actual_batch.sort_orderby(["table1.a0"]),
         )
Ejemplo n.º 19
0
    def test_array_count(self):
        select_query = """SELECT id FROM MyVideo WHERE
            Array_Count(DummyMultiObjectDetector(data).labels, 'person') = 2
            ORDER BY id;"""
        actual_batch = execute_query_fetch_all(select_query)
        expected = [{'myvideo.id': i} for i in range(0, NUM_FRAMES, 3)]
        expected_batch = Batch(frames=pd.DataFrame(expected))
        self.assertEqual(actual_batch, expected_batch)

        select_query = """SELECT id FROM MyVideo
            WHERE Array_Count(DummyObjectDetector(data).label, 'bicycle') = 1
            ORDER BY id;"""
        actual_batch = execute_query_fetch_all(select_query)
        expected = [{'myvideo.id': i} for i in range(1, NUM_FRAMES, 2)]
        expected_batch = Batch(frames=pd.DataFrame(expected))
        self.assertEqual(actual_batch, expected_batch)
Ejemplo n.º 20
0
    def test_show_udfs(self):
        result = execute_query_fetch_all("SHOW UDFS;")
        self.assertEqual(len(result.frames.columns), 5)

        expected = {
            "name": ["FastRCNNObjectDetector", "Array_Count"],
            "inputs": [
                ["Frame_Array NDARRAY UINT8 [3, None, None]"],
                ["Input NDARRAY ANYTYPE []", "Key ANY"],
            ],
            "outputs": [
                [
                    "labels NDARRAY STR [None]",
                    "bboxes NDARRAY FLOAT32 [None, 4]",
                    "scores NDARRAY FLOAT32 [None]",
                ],
                ["count INTEGER"],
            ],
            "type": ["Classification", "Ndarray"],
        }
        expected_df = pd.DataFrame(expected)
        self.assertTrue(all(expected_df.inputs == result.frames.inputs))
        self.assertTrue(all(expected_df.outputs == result.frames.outputs))
        self.assertTrue(all(expected_df.name == result.frames.name))
        self.assertTrue(all(expected_df.type == result.frames.type))
Ejemplo n.º 21
0
def init_builtin_udfs(mode='debug'):
    """
    Loads the builtin udfs into the system.
    This should be called when the system bootstraps.
    In debug mode, it also loads udfs used in the test suite.
    Arguments:
        mode (str): 'debug' or 'release'
    """
    queries = [Fastrcnn_udf_query, Unnest_udf_query, ArrayCount_udf_query]
    if mode == 'debug':
        queries.extend([
            DummyObjectDetector_udf_query, DummyMultiObjectDetector_udf_query
        ])

    for query in queries:
        execute_query_fetch_all(query)
Ejemplo n.º 22
0
def create_table(table_name, num_rows, num_columns):
    # creates a table with num_rows tuples and columns = [a1, a2, a3, ...]
    columns = ''.join('a{} INTEGER, '.format(i)
                      for i in range(num_columns - 1))
    columns += 'a{} INTEGER'.format(num_columns - 1)
    create_table_query = 'CREATE TABLE IF NOT EXISTS {} ( {} );'.format(
        table_name, columns)
    execute_query_fetch_all(create_table_query)
    columns = ['a{}'.format(i) for i in range(num_columns)]
    df = create_csv(num_rows, columns)
    # load the CSV
    load_query = """LOAD DATA INFILE 'dummy.csv' INTO {}
                   WITH FORMAT CSV;""".format(table_name)
    execute_query_fetch_all(load_query)
    df.columns = [f'{table_name}.{col}' for col in df.columns]
    return df
Ejemplo n.º 23
0
    def test_should_mat_view_with_fastrcnn(self):
        select_query = """SELECT id, FastRCNNObjectDetector(data).labels
                            FROM UATRAC WHERE id < 5;"""
        query = '''CREATE MATERIALIZED VIEW IF NOT EXISTS uadtrac_fastRCNN (id, labels) \
        AS {}'''.format(select_query)
        execute_query_fetch_all(query)

        select_view_query = 'SELECT id, labels FROM uadtrac_fastRCNN'
        actual_batch = execute_query_fetch_all(select_view_query)
        actual_batch.sort()

        self.assertEqual(actual_batch.batch_size, 5)
        # non-trivial test case
        res = actual_batch.frames
        for idx in res.index:
            self.assertTrue('car' in res['uadtrac_fastrcnn.labels'][idx])
Ejemplo n.º 24
0
    def test_should_mat_view_with_dummy(self):
        materialized_query = """CREATE MATERIALIZED VIEW dummy_view (id, label)
            AS SELECT id, DummyObjectDetector(data).label FROM MyVideo;
        """
        execute_query_fetch_all(materialized_query)

        select_query = 'SELECT id, label FROM dummy_view;'
        actual_batch = execute_query_fetch_all(select_query)
        actual_batch.sort()

        labels = DummyObjectDetector().labels
        expected = [{
            'dummy_view.id': i,
            'dummy_view.label': labels[1 + i % 2]
        } for i in range(NUM_FRAMES)]
        expected_batch = Batch(frames=pd.DataFrame(expected))
        self.assertEqual(actual_batch, expected_batch)
Ejemplo n.º 25
0
 def test_lateral_join_with_multiple_projects(self):
     select_query = """SELECT id, labels FROM MyVideo JOIN LATERAL
                     FastRCNNObjectDetector(data) WHERE id < 5;"""
     actual_batch = execute_query_fetch_all(select_query)
     self.assertTrue(
         all(actual_batch.frames.columns ==
             ["myvideo.id", "fastrcnnobjectdetector.labels"]))
     self.assertEqual(actual_batch.batch_size, 5)
Ejemplo n.º 26
0
    def test_select_and_sample(self):
        select_query = "SELECT id,data FROM MyVideo SAMPLE 7 ORDER BY id;"
        actual_batch = execute_query_fetch_all(select_query)
        actual_batch.sort()

        expected_batch = list(
            create_dummy_batches(filters=range(0, NUM_FRAMES, 7)))

        self.assertEqual(actual_batch.batch_size, expected_batch[0].batch_size)
Ejemplo n.º 27
0
    def test_should_drop_table(self):
        catalog_manager = CatalogManager()
        query = """LOAD DATA INFILE 'dummy.avi' INTO MyVideo;"""
        execute_query_fetch_all(query)

        metadata_obj = catalog_manager.get_dataset_metadata(None, "MyVideo")
        video_dir = metadata_obj.file_url
        self.assertFalse(metadata_obj is None)
        column_objects = catalog_manager.get_all_column_objects(metadata_obj)
        self.assertEqual(len(column_objects), 2)
        self.assertTrue(Path(video_dir).exists())
        drop_query = """DROP TABLE MyVideo;"""
        execute_query_fetch_all(drop_query)
        self.assertTrue(
            catalog_manager.get_dataset_metadata(None, "MyVideo") is None)
        column_objects = catalog_manager.get_all_column_objects(metadata_obj)
        self.assertEqual(len(column_objects), 0)
        self.assertFalse(Path(video_dir).exists())
Ejemplo n.º 28
0
    def test_select_and_limit(self):
        select_query = "SELECT id,data FROM MyVideo ORDER BY id LIMIT 5;"
        actual_batch = execute_query_fetch_all(select_query)
        actual_batch.sort()
        expected_batch = list(create_dummy_batches(num_frames=10,
                                                   batch_size=5))

        self.assertEqual(actual_batch.batch_size, expected_batch[0].batch_size)
        self.assertEqual(actual_batch, expected_batch[0])
Ejemplo n.º 29
0
    def test_should_run_pytorch_and_ssd(self):
        create_udf_query = """CREATE UDF SSDObjectDetector
                  INPUT  (Frame_Array NDARRAY UINT8(3, 256, 256))
                  OUTPUT (label NDARRAY STR(10))
                  TYPE  Classification
                  IMPL  'eva/udfs/ssd_object_detector.py';
        """
        execute_query_fetch_all(create_udf_query)

        select_query = """SELECT SSDObjectDetector(data) FROM MyVideo
                        WHERE id < 5;"""
        actual_batch = execute_query_fetch_all(select_query)
        self.assertEqual(actual_batch.batch_size, 5)

        # non-trivial test case
        res = actual_batch.frames
        for idx in res.index:
            self.assertTrue('car' in res['ssdobjectdetector.label'][idx])
Ejemplo n.º 30
0
    def test_nested_select_video_in_table(self):
        nested_select_query = """SELECT id, data FROM
            (SELECT id, data FROM MyVideo WHERE id >= 2 AND id < 5) AS T
            WHERE id >= 3;"""
        actual_batch = execute_query_fetch_all(nested_select_query)
        actual_batch.sort()
        expected_batch = list(create_dummy_batches(filters=range(3, 5)))[0]
        expected_batch.modify_column_alias("T")
        self.assertEqual(actual_batch, expected_batch)

        nested_select_query = """SELECT T.id, T.data FROM
            (SELECT id, data FROM MyVideo WHERE id >= 2 AND id < 5) AS T
            WHERE id >= 3;"""
        actual_batch = execute_query_fetch_all(nested_select_query)
        actual_batch.sort()
        expected_batch = list(create_dummy_batches(filters=range(3, 5)))[0]
        expected_batch.modify_column_alias("T")
        self.assertEqual(actual_batch, expected_batch)