예제 #1
0
 def __get_self_event_metrics():
     """
 Gets the tables-refreshed, partitions-refreshed and events-skipped metric values
 from Metastore EventsProcessor
 """
     tbls_refreshed_count = EventProcessorUtils.get_int_metric(
         'tables-refreshed', 0)
     partitions_refreshed_count = EventProcessorUtils.get_int_metric(
         'partitions-refreshed', 0)
     events_skipped_count = EventProcessorUtils.get_int_metric(
         'events-skipped', 0)
     return int(tbls_refreshed_count), int(partitions_refreshed_count), \
       int(events_skipped_count)
예제 #2
0
 def test_event_batching(self, unique_database):
     """Runs queries which generate multiple ALTER_PARTITION events which must be
 batched by events processor. Runs as a custom cluster test to isolate the metric
 values from other tests."""
     testtbl = "test_event_batching"
     test_acid_tbl = "test_event_batching_acid"
     acid_props = self.__get_transactional_tblproperties(True)
     # create test tables
     self.client.execute(
         "create table {}.{} like functional.alltypes".format(
             unique_database, testtbl))
     self.client.execute(
         "insert into {}.{} partition (year,month) select * from functional.alltypes"
         .format(unique_database, testtbl))
     self.client.execute(
         "create table {}.{} (id int) partitioned by (year int, month int) {}"
         .format(unique_database, test_acid_tbl, acid_props))
     self.client.execute(
         "insert into {}.{} partition (year, month) "
         "select id, year, month from functional.alltypes".format(
             unique_database, test_acid_tbl))
     # run compute stats from impala; this should generate 24 ALTER_PARTITION events which
     # should be batched together into 1 or more number of events.
     EventProcessorUtils.wait_for_event_processing(self)
     batch_events_metric = "batch-events-created"
     batch_events_1 = EventProcessorUtils.get_int_metric(
         batch_events_metric)
     self.client.execute("compute stats {}.{}".format(
         unique_database, testtbl))
     EventProcessorUtils.wait_for_event_processing(self)
     batch_events_2 = EventProcessorUtils.get_int_metric(
         batch_events_metric)
     assert batch_events_2 > batch_events_1
     # run analyze stats event from hive which generates ALTER_PARTITION event on each
     # partition of the table
     self.run_stmt_in_hive("analyze table {}.{} compute statistics".format(
         unique_database, testtbl))
     EventProcessorUtils.wait_for_event_processing(self)
     batch_events_3 = EventProcessorUtils.get_int_metric(
         batch_events_metric)
     assert batch_events_3 > batch_events_2
     # in case of transactional table since we batch the events together, the number of
     # tables refreshed must be far lower than number of events generated
     num_table_refreshes_1 = EventProcessorUtils.get_int_metric(
         "tables-refreshed")
     self.client.execute("compute stats {}.{}".format(
         unique_database, test_acid_tbl))
     EventProcessorUtils.wait_for_event_processing(self)
     batch_events_4 = EventProcessorUtils.get_int_metric(
         batch_events_metric)
     num_table_refreshes_2 = EventProcessorUtils.get_int_metric(
         "tables-refreshed")
     # we should generate atleast 1 batch event if not more due to the 24 consecutive
     # ALTER_PARTITION events
     assert batch_events_4 > batch_events_3
     # table should not be refreshed since this is a self-event
     assert num_table_refreshes_2 == num_table_refreshes_1
     self.run_stmt_in_hive("analyze table {}.{} compute statistics".format(
         unique_database, test_acid_tbl))
     EventProcessorUtils.wait_for_event_processing(self)
     batch_events_5 = EventProcessorUtils.get_int_metric(
         batch_events_metric)
     assert batch_events_5 > batch_events_4
     num_table_refreshes_2 = EventProcessorUtils.get_int_metric(
         "tables-refreshed")
     # the analyze table from hive generates 24 ALTER_PARTITION events which should be
     # batched into 1-2 batches (depending on timing of the event poll thread).
     assert num_table_refreshes_2 > num_table_refreshes_1
     assert int(num_table_refreshes_2) - int(num_table_refreshes_1) < 24
     EventProcessorUtils.wait_for_event_processing(self)
     # test for batching of insert events
     batch_events_insert = EventProcessorUtils.get_int_metric(
         batch_events_metric)
     tables_refreshed_insert = EventProcessorUtils.get_int_metric(
         "tables-refreshed")
     partitions_refreshed_insert = EventProcessorUtils.get_int_metric(
         "partitions-refreshed")
     self.client.execute(
         "insert into {}.{} partition (year,month) select * from functional.alltypes"
         .format(unique_database, testtbl))
     EventProcessorUtils.wait_for_event_processing(self)
     batch_events_after_insert = EventProcessorUtils.get_int_metric(
         batch_events_metric)
     tables_refreshed_after_insert = EventProcessorUtils.get_int_metric(
         "tables-refreshed")
     partitions_refreshed_after_insert = EventProcessorUtils.get_int_metric(
         "partitions-refreshed")
     # this is a self-event tables or partitions should not be refreshed
     assert batch_events_after_insert > batch_events_insert
     assert tables_refreshed_after_insert == tables_refreshed_insert
     assert partitions_refreshed_after_insert == partitions_refreshed_insert
     # run the insert from hive to make sure that batch event is refreshing all the
     # partitions
     self.run_stmt_in_hive(
         "SET hive.exec.dynamic.partition.mode=nonstrict; insert into {}.{} partition"
         " (year,month) select * from functional.alltypes".format(
             unique_database, testtbl))
     EventProcessorUtils.wait_for_event_processing(self)
     batch_events_after_hive = EventProcessorUtils.get_int_metric(
         batch_events_metric)
     partitions_refreshed_after_hive = EventProcessorUtils.get_int_metric(
         "partitions-refreshed")
     assert batch_events_after_hive > batch_events_insert
     # 24 partitions inserted and hence we must refresh 24 partitions once.
     assert int(partitions_refreshed_after_hive
                ) == int(partitions_refreshed_insert) + 24
예제 #3
0
    def __run_create_drop_test(self, db, type, rename=False, rename_db=False):
        if type == "table":
            if not rename:
                queries = [
                    "create table {0}.test_{1} (i int)".format(db, 1),
                    "drop table {0}.test_{1}".format(db, 1)
                ]
            else:
                db_1 = "{}_1".format(db)
                if rename_db:
                    self.execute_query_expect_success(
                        self.create_impala_client(),
                        "drop database if exists {0} cascade".format(db_1))
                    self.execute_query_expect_success(
                        self.create_impala_client(),
                        "create database {0}".format(db_1))
                self.execute_query_expect_success(
                    self.create_impala_client(),
                    "create table if not exists {0}.rename_test_1 (i int)".
                    format(db))
                if rename_db:
                    queries = [
                        "alter table {0}.rename_test_1 rename to {1}.rename_test_1"
                        .format(db, db_1),
                        "alter table {0}.rename_test_1 rename to {1}.rename_test_1"
                        .format(db_1, db)
                    ]
                else:
                    queries = [
                        "alter table {0}.rename_test_1 rename to {0}.rename_test_2"
                        .format(db),
                        "alter table {0}.rename_test_2 rename to {0}.rename_test_1"
                        .format(db)
                    ]
            create_metric_name = "tables-added"
            removed_metric_name = "tables-removed"
        elif type == "database":
            self.execute_query_expect_success(
                self.create_impala_client(),
                "drop database if exists {0}".format("test_create_drop_db"))
            queries = [
                "create database {db}".format(db="test_create_drop_db"),
                "drop database {db}".format(db="test_create_drop_db")
            ]
            create_metric_name = "databases-added"
            removed_metric_name = "databases-removed"
        else:
            tbl_name = "test_create_drop_partition"
            self.execute_query_expect_success(
                self.create_impala_client(),
                "create table {db}.{tbl} (c int) partitioned by (p int)".
                format(db=db, tbl=tbl_name))
            queries = [
                "alter table {db}.{tbl} add partition (p=1)".format(
                    db=db, tbl=tbl_name),
                "alter table {db}.{tbl} drop partition (p=1)".format(
                    db=db, tbl=tbl_name)
            ]
            create_metric_name = "partitions-added"
            removed_metric_name = "partitions-removed"

        # get the metric before values
        EventProcessorUtils.wait_for_event_processing(self)
        create_metric_val_before = EventProcessorUtils.get_int_metric(
            create_metric_name, 0)
        removed_metric_val_before = EventProcessorUtils.get_int_metric(
            removed_metric_name, 0)
        events_skipped_before = EventProcessorUtils.get_int_metric(
            'events-skipped', 0)
        num_iters = 100
        for iter in xrange(num_iters):
            for q in queries:
                try:
                    self.execute_query_expect_success(
                        self.create_impala_client(), q)
                except Exception as e:
                    print("Failed in {} iterations. Error {}".format(
                        iter, str(e)))
                    raise
        EventProcessorUtils.wait_for_event_processing(self)
        create_metric_val_after = EventProcessorUtils.get_int_metric(
            create_metric_name, 0)
        removed_metric_val_after = EventProcessorUtils.get_int_metric(
            removed_metric_name, 0)
        events_skipped_after = EventProcessorUtils.get_int_metric(
            'events-skipped', 0)
        num_delete_event_entries = EventProcessorUtils.\
            get_int_metric('delete-event-log-size', 0)
        assert EventProcessorUtils.get_event_processor_status() == "ACTIVE"
        # None of the queries above should actually trigger a add/remove object from events
        assert create_metric_val_after == create_metric_val_before
        assert removed_metric_val_after == removed_metric_val_before
        # each query set generates 2 events and both of them should be skipped
        assert events_skipped_after == num_iters * 2 + events_skipped_before
        # make sure that there are no more entries in the delete event log
        assert num_delete_event_entries == 0