def test_vcpus_by_all_second_stage(self, usage_manager, setter_manager,
                                       insert_manager, data_driven_specs_repo):

        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_pre_hourly_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(
                self.spark_context,
                self.get_pre_transform_specs_json_by_all(),
                self.get_transform_specs_json_by_all())

        # Create an RDD out of the mocked Monasca metrics
        with open(SecondStageDataProvider.kafka_data_path_by_all) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]
        vm_cpu_rdd = self.spark_context.parallelize(raw_tuple_list)
        sql_context = SQLContext(self.spark_context)
        vm_cpu_df = sql_context.read.json(vm_cpu_rdd)
        PreHourlyProcessor.do_transform(vm_cpu_df)

        metrics = DummyAdapter.adapter_impl.metric_list
        vcpus_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') == 'vcpus_agg' and value.get(
                'metric').get('dimensions').get('project_id') == 'all'
        ][0]

        self.assertTrue(vcpus_agg_metric is not None)

        self.assertEqual(7.0, vcpus_agg_metric.get('metric').get('value'))
        self.assertEqual('useast', vcpus_agg_metric.get('meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         vcpus_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            vcpus_agg_metric.get('metric').get('dimensions').get('host'))
        self.assertEqual(
            'prehourly',
            vcpus_agg_metric.get('metric').get('dimensions').get(
                'aggregation_period'))

        self.assertEqual(
            14.0,
            vcpus_agg_metric.get('metric').get('value_meta').get(
                'record_count'))
        self.assertEqual(
            '2016-01-20 16:40:00',
            vcpus_agg_metric.get('metric').get('value_meta').get(
                'firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-01-20 16:40:46',
            vcpus_agg_metric.get('metric').get('value_meta').get(
                'lastrecord_timestamp_string'))
def invoke():
    # object to keep track of offsets
    ConfigInitializer.basic_config()

    # app name
    application_name = "mon_metrics_kafka"

    my_spark_conf = SparkConf().setAppName(application_name)

    spark_context = SparkContext(conf=my_spark_conf)

    # read at the configured interval
    spark_streaming_context = \
        StreamingContext(spark_context, cfg.CONF.service.stream_interval)

    kafka_stream = MonMetricsKafkaProcessor.get_kafka_stream(
        cfg.CONF.messaging.topic,
        spark_streaming_context)

    # transform to recordstore
    MonMetricsKafkaProcessor.transform_to_recordstore(kafka_stream)

    # catch interrupt, stop streaming context gracefully
    # signal.signal(signal.SIGINT, signal_handler)

    # start processing
    spark_streaming_context.start()

    # FIXME: stop spark context to relinquish resources

    # FIXME: specify cores, so as not to use all the resources on the cluster.

    # FIXME: HA deploy multiple masters, may be one on each control node

    try:
        # Wait for the Spark driver to "finish"
        spark_streaming_context.awaitTermination()
    except Exception as e:
        MonMetricsKafkaProcessor.log_debug(
            "Exception raised during Spark execution : " + str(e))
        # One exception that can occur here is the result of the saved
        # kafka offsets being obsolete/out of range.  Delete the saved
        # offsets to improve the chance of success on the next execution.

        # TODO(someone) prevent deleting all offsets for an application,
        # but just the latest revision
        MonMetricsKafkaProcessor.log_debug(
            "Deleting saved offsets for chance of success on next execution")

        MonMetricsKafkaProcessor.reset_kafka_offsets(application_name)

        # delete pre hourly processor offsets
        if cfg.CONF.stage_processors.pre_hourly_processor_enabled:
            PreHourlyProcessor.reset_kafka_offsets()
Beispiel #3
0
def invoke():
    # object to keep track of offsets
    ConfigInitializer.basic_config()

    # app name
    application_name = "mon_metrics_kafka"

    my_spark_conf = SparkConf().setAppName(application_name)

    spark_context = SparkContext(conf=my_spark_conf)

    # read at the configured interval
    spark_streaming_context = \
        StreamingContext(spark_context, cfg.CONF.service.stream_interval)

    kafka_stream = MonMetricsKafkaProcessor.get_kafka_stream(
        cfg.CONF.messaging.topic,
        spark_streaming_context)

    # transform to recordstore
    MonMetricsKafkaProcessor.transform_to_recordstore(kafka_stream)

    # catch interrupt, stop streaming context gracefully
    # signal.signal(signal.SIGINT, signal_handler)

    # start processing
    spark_streaming_context.start()

    # FIXME: stop spark context to relinquish resources

    # FIXME: specify cores, so as not to use all the resources on the cluster.

    # FIXME: HA deploy multiple masters, may be one on each control node

    try:
        # Wait for the Spark driver to "finish"
        spark_streaming_context.awaitTermination()
    except Exception as e:
        MonMetricsKafkaProcessor.log_debug(
            "Exception raised during Spark execution : " + str(e))
        # One exception that can occur here is the result of the saved
        # kafka offsets being obsolete/out of range.  Delete the saved
        # offsets to improve the chance of success on the next execution.

        # TODO(someone) prevent deleting all offsets for an application,
        # but just the latest revision
        MonMetricsKafkaProcessor.log_debug(
            "Deleting saved offsets for chance of success on next execution")

        MonMetricsKafkaProcessor.reset_kafka_offsets(application_name)

        # delete pre hourly processor offsets
        if cfg.CONF.stage_processors.pre_hourly_processor_enabled:
            PreHourlyProcessor.reset_kafka_offsets()
Beispiel #4
0
    def rdd_to_recordstore(rdd_transform_context_rdd):

        if rdd_transform_context_rdd.isEmpty():
            MonMetricsKafkaProcessor.log_debug(
                "rdd_to_recordstore: nothing to process...")
        else:

            sql_context = SQLContext.getOrCreate(
                rdd_transform_context_rdd.context)
            data_driven_specs_repo = DataDrivenSpecsRepoFactory.\
                get_data_driven_specs_repo()
            pre_transform_specs_df = data_driven_specs_repo.\
                get_data_driven_specs(
                    sql_context=sql_context,
                    data_driven_spec_type=DataDrivenSpecsRepo.
                    pre_transform_specs_type)

            #
            # extract second column containing raw metric data
            #
            raw_mon_metrics = rdd_transform_context_rdd.map(
                lambda nt: nt.rdd_info[1])

            #
            # convert raw metric data rdd to dataframe rdd
            #
            raw_mon_metrics_df = \
                MonMetricUtils.create_mon_metrics_df_from_json_rdd(
                    sql_context,
                    raw_mon_metrics)

            #
            # filter out unwanted metrics and keep metrics we are interested in
            #
            cond = [
                raw_mon_metrics_df.metric["name"] ==
                pre_transform_specs_df.event_type]
            filtered_metrics_df = raw_mon_metrics_df.join(
                pre_transform_specs_df, cond)

            #
            # validate filtered metrics to check if required fields
            # are present and not empty
            # In order to be able to apply filter function had to convert
            # data frame rdd to normal rdd. After validation the rdd is
            # converted back to dataframe rdd
            #
            # FIXME: find a way to apply filter function on dataframe rdd data
            validated_mon_metrics_rdd = filtered_metrics_df.rdd.filter(
                MonMetricsKafkaProcessor._validate_raw_mon_metrics)
            validated_mon_metrics_df = sql_context.createDataFrame(
                validated_mon_metrics_rdd, filtered_metrics_df.schema)

            #
            # record generator
            # generate a new intermediate metric record if a given metric
            # metric_id_list, in pre_transform_specs table has several
            # intermediate metrics defined.
            # intermediate metrics are used as a convenient way to
            # process (aggregated) metric in mutiple ways by making a copy
            # of the source data for each processing
            #
            gen_mon_metrics_df = validated_mon_metrics_df.select(
                validated_mon_metrics_df.meta,
                validated_mon_metrics_df.metric,
                validated_mon_metrics_df.event_processing_params,
                validated_mon_metrics_df.event_type,
                explode(validated_mon_metrics_df.metric_id_list).alias(
                    "this_metric_id"),
                validated_mon_metrics_df.service_id)

            #
            # transform metrics data to record_store format
            # record store format is the common format which will serve as
            # source to aggregation processing.
            # converting the metric to common standard format helps in writing
            # generic aggregation routines driven by configuration parameters
            #  and can be reused
            #
            record_store_df = gen_mon_metrics_df.select(
                (gen_mon_metrics_df.metric.timestamp / 1000).alias(
                    "event_timestamp_unix"),
                from_unixtime(
                    gen_mon_metrics_df.metric.timestamp / 1000).alias(
                    "event_timestamp_string"),
                gen_mon_metrics_df.event_type.alias("event_type"),
                gen_mon_metrics_df.event_type.alias("event_quantity_name"),
                (gen_mon_metrics_df.metric.value / 1.0).alias(
                    "event_quantity"),
                when(gen_mon_metrics_df.metric.dimensions.state != '',
                     gen_mon_metrics_df.metric.dimensions.state).otherwise(
                    'NA').alias("event_status"),
                lit('1.0').alias('event_version'),
                lit('metrics').alias("record_type"),

                # resource_uuid
                when(gen_mon_metrics_df.metric.dimensions.instanceId != '',
                     gen_mon_metrics_df.metric.dimensions.instanceId).when(
                    gen_mon_metrics_df.metric.dimensions.resource_id != '',
                    gen_mon_metrics_df.metric.dimensions.resource_id).
                otherwise('NA').alias("resource_uuid"),

                when(gen_mon_metrics_df.metric.dimensions.tenantId != '',
                     gen_mon_metrics_df.metric.dimensions.tenantId).when(
                    gen_mon_metrics_df.metric.dimensions.tenant_id != '',
                    gen_mon_metrics_df.metric.dimensions.tenant_id).when(
                    gen_mon_metrics_df.metric.dimensions.project_id != '',
                    gen_mon_metrics_df.metric.dimensions.project_id).otherwise(
                    'NA').alias("tenant_id"),

                when(gen_mon_metrics_df.metric.dimensions.mount != '',
                     gen_mon_metrics_df.metric.dimensions.mount).otherwise(
                    'NA').alias("mount"),

                when(gen_mon_metrics_df.metric.dimensions.device != '',
                     gen_mon_metrics_df.metric.dimensions.device).otherwise(
                    'NA').alias("device"),

                when(gen_mon_metrics_df.metric.dimensions.namespace != '',
                     gen_mon_metrics_df.metric.dimensions.namespace).otherwise(
                    'NA').alias("namespace"),

                when(gen_mon_metrics_df.metric.dimensions.pod_name != '',
                     gen_mon_metrics_df.metric.dimensions.pod_name).otherwise(
                    'NA').alias("pod_name"),

                when(gen_mon_metrics_df.metric.dimensions.container_name != '',
                     gen_mon_metrics_df.metric.dimensions
                     .container_name).otherwise('NA').alias("container_name"),

                when(gen_mon_metrics_df.metric.dimensions.app != '',
                     gen_mon_metrics_df.metric.dimensions.app).otherwise(
                    'NA').alias("app"),

                when(gen_mon_metrics_df.metric.dimensions.interface != '',
                     gen_mon_metrics_df.metric.dimensions.interface).otherwise(
                    'NA').alias("interface"),

                when(gen_mon_metrics_df.metric.dimensions.deployment != '',
                     gen_mon_metrics_df.metric.dimensions
                     .deployment).otherwise('NA').alias("deployment"),

                when(gen_mon_metrics_df.metric.dimensions.daemon_set != '',
                     gen_mon_metrics_df.metric.dimensions
                     .daemon_set).otherwise('NA').alias("daemon_set"),

                when(gen_mon_metrics_df.meta.userId != '',
                     gen_mon_metrics_df.meta.userId).otherwise('NA').alias(
                    "user_id"),

                when(gen_mon_metrics_df.meta.region != '',
                     gen_mon_metrics_df.meta.region).when(
                    gen_mon_metrics_df.event_processing_params
                    .set_default_region_to != '',
                    gen_mon_metrics_df.event_processing_params
                    .set_default_region_to).otherwise(
                    'NA').alias("region"),

                when(gen_mon_metrics_df.meta.zone != '',
                     gen_mon_metrics_df.meta.zone).when(
                    gen_mon_metrics_df.event_processing_params
                    .set_default_zone_to != '',
                    gen_mon_metrics_df.event_processing_params
                    .set_default_zone_to).otherwise(
                    'NA').alias("zone"),

                when(gen_mon_metrics_df.metric.dimensions.hostname != '',
                     gen_mon_metrics_df.metric.dimensions.hostname).when(
                    gen_mon_metrics_df.metric.value_meta.host != '',
                    gen_mon_metrics_df.metric.value_meta.host).otherwise(
                    'NA').alias("host"),

                when(gen_mon_metrics_df.service_id != '',
                     gen_mon_metrics_df.service_id).otherwise(
                    'NA').alias("service_group"),

                when(gen_mon_metrics_df.service_id != '',
                     gen_mon_metrics_df.service_id).otherwise(
                    'NA').alias("service_id"),

                from_unixtime(gen_mon_metrics_df.metric.timestamp / 1000,
                              'yyyy-MM-dd').alias("event_date"),
                from_unixtime(gen_mon_metrics_df.metric.timestamp / 1000,
                              'HH').alias("event_hour"),
                from_unixtime(gen_mon_metrics_df.metric.timestamp / 1000,
                              'mm').alias("event_minute"),
                from_unixtime(gen_mon_metrics_df.metric.timestamp / 1000,
                              'ss').alias("event_second"),
                gen_mon_metrics_df.this_metric_id.alias("metric_group"),
                gen_mon_metrics_df.this_metric_id.alias("metric_id"))

            #
            # get transform context
            #
            rdd_transform_context = rdd_transform_context_rdd.first()
            transform_context = rdd_transform_context.transform_context_info

            #
            # cache record store rdd
            #
            if cfg.CONF.service.enable_record_store_df_cache:
                storage_level_prop = \
                    cfg.CONF.service.record_store_df_cache_storage_level
                try:
                    storage_level = StorageUtils.get_storage_level(
                        storage_level_prop)
                except InvalidCacheStorageLevelException as storage_error:
                    storage_error.value += \
                        " (as specified in " \
                        "service.record_store_df_cache_storage_level)"
                    raise
                record_store_df.persist(storage_level)

            #
            # start processing metrics available in record_store data
            #
            MonMetricsKafkaProcessor.process_metrics(transform_context,
                                                     record_store_df)

            # remove df from cache
            if cfg.CONF.service.enable_record_store_df_cache:
                record_store_df.unpersist()

            #
            # extract kafka offsets and batch processing time
            # stored in transform_context and save offsets
            #
            offsets = transform_context.offset_info

            # batch time
            batch_time_info = \
                transform_context.batch_time_info

            MonMetricsKafkaProcessor.save_kafka_offsets(
                offsets, rdd_transform_context_rdd.context.appName,
                batch_time_info)

            # call pre hourly processor, if its time to run
            if (cfg.CONF.stage_processors.pre_hourly_processor_enabled and
                    PreHourlyProcessor.is_time_to_run(batch_time_info)):
                PreHourlyProcessor.run_processor(
                    record_store_df.rdd.context,
                    batch_time_info)
    def test_fetch_quantity_sum(self,
                                usage_manager,
                                setter_manager,
                                insert_manager,
                                data_driven_specs_repo):

        # test operation
        test_operation = "sum"

        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_pre_hourly_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(self.spark_context,
                                    self.get_pre_transform_specs_json(),
                                    self.get_transform_specs_json_by_operation(
                                        test_operation))

        # Create an emulated set of Kafka messages (these were gathered
        # by extracting Monasca messages from the Metrics queue on mini-mon).

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.fetch_quantity_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [
            OffsetRange("metrics", 1, 10, 20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        # Call the primary method in mon_metrics_kafka
        MonMetricsKafkaProcessor.rdd_to_recordstore(
            rdd_monasca_with_offsets)

        # get the metrics that have been submitted to the dummy message adapter
        instance_usage_list = DummyAdapter.adapter_impl.metric_list
        instance_usage_list = map(dump_as_ascii_string,
                                  instance_usage_list)
        DummyAdapter.adapter_impl.metric_list = []
        instance_usage_rdd = self.spark_context.parallelize(
            instance_usage_list)
        sql_context = SQLContext(self.spark_context)
        instance_usage_df = sql_context.read.json(instance_usage_rdd)
        PreHourlyProcessor.do_transform(instance_usage_df)

        metrics = DummyAdapter.adapter_impl.metric_list
        mem_total_mb_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'mem.total_mb_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'mini-mon'][0]

        self.assertTrue(mem_total_mb_agg_metric is not None)

        self.assertEqual('mem.total_mb_agg',
                         mem_total_mb_agg_metric
                         .get('metric').get('name'))

        self.assertEqual(15360.0,
                         mem_total_mb_agg_metric
                         .get('metric').get('value'))
        self.assertEqual('useast',
                         mem_total_mb_agg_metric
                         .get('meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         mem_total_mb_agg_metric
                         .get('meta').get('tenantId'))
        self.assertEqual('mini-mon',
                         mem_total_mb_agg_metric
                         .get('metric').get('dimensions').get('host'))
        self.assertEqual('all',
                         mem_total_mb_agg_metric
                         .get('metric').get('dimensions').get('project_id'))
        self.assertEqual('hourly',
                         mem_total_mb_agg_metric
                         .get('metric').get('dimensions')
                         .get('aggregation_period'))
        self.assertEqual(4.0,
                         mem_total_mb_agg_metric
                         .get('metric').get('value_meta').get('record_count'))
        self.assertEqual('2016-01-20 16:40:00',
                         mem_total_mb_agg_metric
                         .get('metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-01-20 16:40:46',
                         mem_total_mb_agg_metric
                         .get('metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))
    def test_fetch_quantity_sum_second_stage(self,
                                             usage_manager,
                                             setter_manager,
                                             insert_manager,
                                             data_driven_specs_repo):

        # test operation
        test_operation = "sum"
        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_pre_hourly_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(self.spark_context,
                                    self.get_pre_transform_specs_json(),
                                    self.get_transform_specs_json_by_operation(
                                        test_operation))

        # Create an emulated set of Kafka messages (these were gathered
        # by extracting Monasca messages from the Metrics queue on mini-mon).

        # Create an RDD out of the mocked Monasca metrics
        with open(SecondStageDataProvider.fetch_quantity_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]
        instance_usage_list = map(dump_as_ascii_string,
                                  raw_tuple_list)

        # create a json RDD from instance_usage_list
        instance_usage_rdd = self.spark_context.parallelize(
            instance_usage_list)

        sql_context = SQLContext(self.spark_context)
        instance_usage_df = sql_context.read.json(
            instance_usage_rdd)

        # call pre hourly processor
        PreHourlyProcessor.do_transform(instance_usage_df)

        metrics = DummyAdapter.adapter_impl.metric_list
        mem_total_mb_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'mem.total_mb_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'mini-mon'][0]
        self.assertTrue(mem_total_mb_agg_metric is not None)

        self.assertEqual('mem.total_mb_agg',
                         mem_total_mb_agg_metric
                         .get('metric').get('name'))

        self.assertEqual(8679.0,
                         mem_total_mb_agg_metric
                         .get('metric').get('value'))
        self.assertEqual('useast',
                         mem_total_mb_agg_metric
                         .get('meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         mem_total_mb_agg_metric
                         .get('meta').get('tenantId'))
        self.assertEqual('mini-mon',
                         mem_total_mb_agg_metric
                         .get('metric').get('dimensions').get('host'))
        self.assertEqual('all',
                         mem_total_mb_agg_metric
                         .get('metric').get('dimensions').get('project_id'))
        self.assertEqual('prehourly',
                         mem_total_mb_agg_metric
                         .get('metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(39.0,
                         mem_total_mb_agg_metric
                         .get('metric').get('value_meta').get('record_count'))
        self.assertEqual('2016-01-20 16:40:00',
                         mem_total_mb_agg_metric
                         .get('metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-01-20 17:40:00',
                         mem_total_mb_agg_metric
                         .get('metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))
    def test_rdd_to_recordstore(self,
                                usage_manager,
                                setter_manager,
                                insert_manager):

        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_pre_hourly_cmpt_mgr()

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.kafka_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [
            OffsetRange("metrics", 1, 10, 20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        # Call the primary method in mon_metrics_kafka
        MonMetricsKafkaProcessor.rdd_to_recordstore(
            rdd_monasca_with_offsets)

        host_usage_list = DummyAdapter.adapter_impl.metric_list
        host_usage_list = map(dump_as_ascii_string,
                              host_usage_list)
        DummyAdapter.adapter_impl.metric_list = []
        host_usage_rdd = self.spark_context.parallelize(host_usage_list)
        sql_context = SQLContext(self.spark_context)
        host_usage_df = sql_context.read.json(host_usage_rdd)
        PreHourlyProcessor.do_transform(host_usage_df)

        # get the metrics that have been submitted to the dummy message adapter
        metrics = DummyAdapter.adapter_impl.metric_list

        # Verify cpu.total_logical_cores_agg for all hosts
        total_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.total_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'all'][0]

        self.assertEqual(15.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         total_cpu_logical_agg_metric.get(
                             'meta').get('region'))
        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         total_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('hourly',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(13.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-03-07 16:10:38',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))

        # Verify cpu.total_logical_cores_agg for mini-mon host
        total_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.total_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'test-cp1-comp0333-mgmt'][0]

        self.assertEqual(9.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         total_cpu_logical_agg_metric.get(
                             'meta').get('region'))
        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         total_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('hourly',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(6.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-03-07 16:10:38',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))

        # Verify cpu.total_logical_cores_agg for devstack host
        total_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.total_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'test-cp1-comp0027-mgmt'][0]

        self.assertEqual(6.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         total_cpu_logical_agg_metric.get(
                             'meta').get('region'))
        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         total_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('hourly',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(7.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-03-07 16:10:38',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))

        # Verify cpu.utilized_logical_cores_agg for all hosts
        utilized_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.utilized_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'all'][0]

        self.assertEqual(7.134214285714285,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('hourly',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(13.0,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-03-07 16:10:38',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))

        # Verify cpu.utilized_logical_cores_agg for the mini-mon host
        utilized_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.utilized_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'test-cp1-comp0333-mgmt'][0]

        self.assertEqual(4.9665,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('hourly',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(6.0,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-03-07 16:10:38',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))

        # Verify cpu.utilized_logical_cores_agg for the devstack host
        utilized_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.utilized_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'test-cp1-comp0027-mgmt'][0]

        self.assertEqual(2.1677142857142853,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('hourly',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(7.0,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-03-07 16:10:38',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))
    def rdd_to_recordstore(rdd_transform_context_rdd):

        if rdd_transform_context_rdd.isEmpty():
            MonMetricsKafkaProcessor.log_debug(
                "rdd_to_recordstore: nothing to process...")
        else:

            sql_context = SQLContext(rdd_transform_context_rdd.context)
            data_driven_specs_repo = DataDrivenSpecsRepoFactory.\
                get_data_driven_specs_repo()
            pre_transform_specs_df = data_driven_specs_repo.\
                get_data_driven_specs(
                    sql_context=sql_context,
                    data_driven_spec_type=DataDrivenSpecsRepo.
                    pre_transform_specs_type)

            #
            # extract second column containing raw metric data
            #
            raw_mon_metrics = rdd_transform_context_rdd.map(
                lambda nt: nt.rdd_info[1])

            #
            # convert raw metric data rdd to dataframe rdd
            #
            raw_mon_metrics_df = \
                MonMetricUtils.create_mon_metrics_df_from_json_rdd(
                    sql_context,
                    raw_mon_metrics)

            #
            # filter out unwanted metrics and keep metrics we are interested in
            #
            cond = [
                raw_mon_metrics_df.metric.name ==
                pre_transform_specs_df.event_type]
            filtered_metrics_df = raw_mon_metrics_df.join(
                pre_transform_specs_df, cond)

            #
            # validate filtered metrics to check if required fields
            # are present and not empty
            # In order to be able to apply filter function had to convert
            # data frame rdd to normal rdd. After validation the rdd is
            # converted back to dataframe rdd
            #
            # FIXME: find a way to apply filter function on dataframe rdd data
            validated_mon_metrics_rdd = filtered_metrics_df.rdd.filter(
                MonMetricsKafkaProcessor._validate_raw_mon_metrics)
            validated_mon_metrics_df = sql_context.createDataFrame(
                validated_mon_metrics_rdd, filtered_metrics_df.schema)

            #
            # record generator
            # generate a new intermediate metric record if a given metric
            # metric_id_list, in pre_transform_specs table has several
            # intermediate metrics defined.
            # intermediate metrics are used as a convenient way to
            # process (aggregated) metric in mutiple ways by making a copy
            # of the source data for each processing
            #
            gen_mon_metrics_df = validated_mon_metrics_df.select(
                validated_mon_metrics_df.meta,
                validated_mon_metrics_df.metric,
                validated_mon_metrics_df.event_processing_params,
                validated_mon_metrics_df.event_type,
                explode(validated_mon_metrics_df.metric_id_list).alias(
                    "this_metric_id"),
                validated_mon_metrics_df.service_id)

            #
            # transform metrics data to record_store format
            # record store format is the common format which will serve as
            # source to aggregation processing.
            # converting the metric to common standard format helps in writing
            # generic aggregation routines driven by configuration parameters
            #  and can be reused
            #
            record_store_df = gen_mon_metrics_df.select(
                (gen_mon_metrics_df.metric.timestamp / 1000).alias(
                    "event_timestamp_unix"),
                from_unixtime(
                    gen_mon_metrics_df.metric.timestamp / 1000).alias(
                    "event_timestamp_string"),
                gen_mon_metrics_df.event_type.alias("event_type"),
                gen_mon_metrics_df.event_type.alias("event_quantity_name"),
                (gen_mon_metrics_df.metric.value / 1.0).alias(
                    "event_quantity"),
                when(gen_mon_metrics_df.metric.dimensions.state != '',
                     gen_mon_metrics_df.metric.dimensions.state).otherwise(
                    'NA').alias("event_status"),
                lit('1.0').alias('event_version'),
                lit('metrics').alias("record_type"),

                # resource_uuid
                when(gen_mon_metrics_df.metric.dimensions.instanceId != '',
                     gen_mon_metrics_df.metric.dimensions.instanceId).when(
                    gen_mon_metrics_df.metric.dimensions.resource_id != '',
                    gen_mon_metrics_df.metric.dimensions.resource_id).
                otherwise('NA').alias("resource_uuid"),

                when(gen_mon_metrics_df.metric.dimensions.tenantId != '',
                     gen_mon_metrics_df.metric.dimensions.tenantId).when(
                    gen_mon_metrics_df.metric.dimensions.tenant_id != '',
                    gen_mon_metrics_df.metric.dimensions.tenant_id).when(
                    gen_mon_metrics_df.metric.dimensions.project_id != '',
                    gen_mon_metrics_df.metric.dimensions.project_id).otherwise(
                    'NA').alias("tenant_id"),

                when(gen_mon_metrics_df.metric.dimensions.mount != '',
                     gen_mon_metrics_df.metric.dimensions.mount).otherwise(
                    'NA').alias("mount"),

                when(gen_mon_metrics_df.metric.dimensions.device != '',
                     gen_mon_metrics_df.metric.dimensions.device).otherwise(
                    'NA').alias("device"),

                when(gen_mon_metrics_df.meta.userId != '',
                     gen_mon_metrics_df.meta.userId).otherwise('NA').alias(
                    "user_id"),

                when(gen_mon_metrics_df.meta.region != '',
                     gen_mon_metrics_df.meta.region).when(
                    gen_mon_metrics_df.event_processing_params
                    .set_default_region_to != '',
                    gen_mon_metrics_df.event_processing_params
                    .set_default_region_to).otherwise(
                    'NA').alias("region"),

                when(gen_mon_metrics_df.meta.zone != '',
                     gen_mon_metrics_df.meta.zone).when(
                    gen_mon_metrics_df.event_processing_params
                    .set_default_zone_to != '',
                    gen_mon_metrics_df.event_processing_params
                    .set_default_zone_to).otherwise(
                    'NA').alias("zone"),

                when(gen_mon_metrics_df.metric.dimensions.hostname != '',
                     gen_mon_metrics_df.metric.dimensions.hostname).when(
                    gen_mon_metrics_df.metric.value_meta.host != '',
                    gen_mon_metrics_df.metric.value_meta.host).otherwise(
                    'NA').alias("host"),

                when(gen_mon_metrics_df.service_id != '',
                     gen_mon_metrics_df.service_id).otherwise(
                    'NA').alias("service_group"),

                when(gen_mon_metrics_df.service_id != '',
                     gen_mon_metrics_df.service_id).otherwise(
                    'NA').alias("service_id"),

                from_unixtime(gen_mon_metrics_df.metric.timestamp / 1000,
                              'yyyy-MM-dd').alias("event_date"),
                from_unixtime(gen_mon_metrics_df.metric.timestamp / 1000,
                              'HH').alias("event_hour"),
                from_unixtime(gen_mon_metrics_df.metric.timestamp / 1000,
                              'mm').alias("event_minute"),
                from_unixtime(gen_mon_metrics_df.metric.timestamp / 1000,
                              'ss').alias("event_second"),
                gen_mon_metrics_df.this_metric_id.alias("metric_group"),
                gen_mon_metrics_df.this_metric_id.alias("metric_id"))

            #
            # get transform context
            #
            rdd_transform_context = rdd_transform_context_rdd.first()
            transform_context = rdd_transform_context.transform_context_info

            #
            # cache record store rdd
            #
            if cfg.CONF.service.enable_record_store_df_cache:
                storage_level_prop = \
                    cfg.CONF.service.record_store_df_cache_storage_level
                storage_level = StorageUtils.get_storage_level(
                    storage_level_prop)
                record_store_df.persist(storage_level)

            #
            # start processing metrics available in record_store data
            #
            MonMetricsKafkaProcessor.process_metrics(transform_context,
                                                     record_store_df)

            # remove df from cache
            if cfg.CONF.service.enable_record_store_df_cache:
                record_store_df.unpersist()

            #
            # extract kafka offsets and batch processing time
            # stored in transform_context and save offsets
            #
            offsets = transform_context.offset_info

            # batch time
            batch_time_info = \
                transform_context.batch_time_info

            MonMetricsKafkaProcessor.save_kafka_offsets(
                offsets, rdd_transform_context_rdd.context.appName,
                batch_time_info)

            # call pre hourly processor, if its time to run
            if (cfg.CONF.stage_processors.pre_hourly_processor_enabled
                    is True and PreHourlyProcessor.is_time_to_run(
                        batch_time_info)):
                PreHourlyProcessor.run_processor(
                    record_store_df.rdd.context,
                    batch_time_info)
    def test_pre_hourly_processor(self,
                                  offset_range_list,
                                  pre_hourly_data):

        # load components
        myOffsetRanges = [
            OffsetRange("metrics_pre_hourly", 1, 10, 20)]
        offset_range_list.return_value = myOffsetRanges

        # Create an RDD out of the mocked instance usage data
        with open(DataProvider.metrics_pre_hourly_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]
        pre_hourly_rdd_data = self.spark_context.parallelize(raw_tuple_list)
        pre_hourly_data.return_value = pre_hourly_rdd_data

        # Do something simple with the RDD
        result = self.simple_count_transform(pre_hourly_rdd_data)

        # run pre hourly processor
        PreHourlyProcessor.run_processor(
            self.spark_context, self.get_dummy_batch_time())

        # get the metrics that have been submitted to the dummy message adapter
        metrics = DummyAdapter.adapter_impl.metric_list

        # Verify count of instance usage data
        self.assertEqual(result, 6)

        # check aggregation result
        mem_total_mb_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'mem.total_mb_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'all'][0]
        self.assertTrue(mem_total_mb_agg_metric is not None)
        self.assertEqual(16049.0,
                         mem_total_mb_agg_metric
                         .get('metric').get('value'))
        # agg meta
        self.assertEqual("2016-06-20 11:49:44",
                         mem_total_mb_agg_metric
                         .get("metric")
                         .get('value_meta').get('lastrecord_timestamp'))
        self.assertEqual("2016-06-20 11:24:59",
                         mem_total_mb_agg_metric
                         .get("metric")
                         .get('value_meta').get('firstrecord_timestamp'))
        self.assertEqual(60.0,
                         mem_total_mb_agg_metric
                         .get("metric")
                         .get('value_meta').get('record_count'))

        mem_usable_mb_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'mem.usable_mb_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'all'][0]
        self.assertTrue(mem_usable_mb_agg_metric is not None)
        self.assertEqual(10283.1,
                         mem_usable_mb_agg_metric
                         .get('metric').get('value'))
        # agg meta
        self.assertEqual("2016-06-20 11:49:44",
                         mem_usable_mb_agg_metric
                         .get("metric")
                         .get('value_meta').get('lastrecord_timestamp'))
        self.assertEqual("2016-06-20 11:24:59",
                         mem_usable_mb_agg_metric
                         .get("metric")
                         .get('value_meta').get('firstrecord_timestamp'))
        self.assertEqual(60.0,
                         mem_usable_mb_agg_metric
                         .get("metric")
                         .get('value_meta').get('record_count'))
    def test_vcpus_by_all_second_stage(self,
                                       usage_manager,
                                       setter_manager,
                                       insert_manager,
                                       data_driven_specs_repo):

        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_pre_hourly_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(
                self.spark_context,
                self.get_pre_transform_specs_json_by_all(),
                self.get_transform_specs_json_by_all())

        # Create an RDD out of the mocked Monasca metrics
        with open(SecondStageDataProvider.kafka_data_path_by_all) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]
        vm_cpu_rdd = self.spark_context.parallelize(raw_tuple_list)
        sql_context = SQLContext(self.spark_context)
        vm_cpu_df = sql_context.read.json(vm_cpu_rdd)
        PreHourlyProcessor.do_transform(vm_cpu_df)

        metrics = DummyAdapter.adapter_impl.metric_list
        vcpus_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'vcpus_agg' and
            value.get('metric').get('dimensions').get('project_id') ==
            'all'][0]

        self.assertTrue(vcpus_agg_metric is not None)

        self.assertEqual(7.0,
                         vcpus_agg_metric
                         .get('metric').get('value'))
        self.assertEqual('useast',
                         vcpus_agg_metric
                         .get('meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         vcpus_agg_metric
                         .get('meta').get('tenantId'))
        self.assertEqual('all',
                         vcpus_agg_metric
                         .get('metric').get('dimensions').get('host'))
        self.assertEqual('prehourly',
                         vcpus_agg_metric
                         .get('metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(14.0,
                         vcpus_agg_metric
                         .get('metric').get('value_meta').get('record_count'))
        self.assertEqual('2016-01-20 16:40:00',
                         vcpus_agg_metric
                         .get('metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-01-20 16:40:46',
                         vcpus_agg_metric
                         .get('metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))
    def test_fetch_quantity_avg(self,
                                usage_manager,
                                setter_manager,
                                insert_manager,
                                data_driven_specs_repo):

        # test operation
        test_operation = "avg"

        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_pre_hourly_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(self.spark_context,
                                    self.get_pre_transform_specs_json(),
                                    self.get_transform_specs_json_by_operation(
                                        test_operation, 'hourly'))

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.kafka_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [
            OffsetRange("metrics", 1, 10, 20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        # Call the primary method in mon_metrics_kafka
        MonMetricsKafkaProcessor.rdd_to_recordstore(
            rdd_monasca_with_offsets)

        # get the metrics that have been submitted to the dummy message adapter
        metrics = DummyAdapter.adapter_impl.metric_list
        quantity_util_list = map(dump_as_ascii_string, metrics)

        DummyAdapter.adapter_impl.metric_list = []
        quantity_util_rdd = self.spark_context.parallelize(quantity_util_list)
        sql_context = SQLContext(self.spark_context)
        quantity_util_df = sql_context.read.json(quantity_util_rdd)
        PreHourlyProcessor.do_transform(quantity_util_df)
        metrics = DummyAdapter.adapter_impl.metric_list

        utilized_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get(
                'name') == 'cpu.utilized_logical_cores_agg'][0]

        self.assertEqual(7.134214285714285,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('host'))
        self.assertEqual('all',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('hourly',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(13.0,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-03-07 16:10:38',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))
    def test_fetch_quantity_avg_second_stage(self,
                                             usage_manager,
                                             setter_manager,
                                             insert_manager,
                                             data_driven_specs_repo):

        # test operation
        test_operation = "avg"

        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_pre_hourly_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(self.spark_context,
                                    self.get_pre_transform_specs_json(),
                                    self.get_transform_specs_json_by_operation(
                                        test_operation, 'prehourly'))

        # Create an RDD out of the mocked Monasca metrics
        with open(SecondStageDataProvider.kafka_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]
        util_list = map(dump_as_ascii_string, raw_tuple_list)

        quantity_util_rdd = self.spark_context.parallelize(util_list)
        sql_context = SQLContext(self.spark_context)
        quantity_util_df = sql_context.read.json(quantity_util_rdd)
        PreHourlyProcessor.do_transform(quantity_util_df)
        metrics = DummyAdapter.adapter_impl.metric_list

        utilized_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get(
                'name') == 'cpu.utilized_logical_cores_agg'][0]

        self.assertEqual(7.134214285714285,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('host'))
        self.assertEqual('all',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('hourly',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(13.0,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-03-07 16:10:38',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))
Beispiel #13
0
    def test_rdd_to_recordstore(self, usage_manager, setter_manager,
                                insert_manager):

        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_pre_hourly_cmpt_mgr()

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.kafka_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [OffsetRange("metrics", 1, 10,
                                      20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        # Call the primary method in mon_metrics_kafka
        MonMetricsKafkaProcessor.rdd_to_recordstore(rdd_monasca_with_offsets)

        host_usage_list = DummyAdapter.adapter_impl.metric_list
        host_usage_list = map(dump_as_ascii_string, host_usage_list)
        DummyAdapter.adapter_impl.metric_list = []
        host_usage_rdd = self.spark_context.parallelize(host_usage_list)
        sql_context = SQLContext(self.spark_context)
        host_usage_df = sql_context.read.json(host_usage_rdd)
        PreHourlyProcessor.do_transform(host_usage_df)

        # get the metrics that have been submitted to the dummy message adapter
        metrics = DummyAdapter.adapter_impl.metric_list

        # Verify cpu.total_logical_cores_agg for all hosts
        total_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') == 'cpu.total_logical_cores_agg'
            and value.get('metric').get('dimensions').get('host') == 'all'
        ][0]

        self.assertEqual(
            15.0,
            total_cpu_logical_agg_metric.get('metric').get('value'))
        self.assertEqual(
            'useast',
            total_cpu_logical_agg_metric.get('meta').get('region'))
        self.assertEqual(
            cfg.CONF.messaging.publish_kafka_project_id,
            total_cpu_logical_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            total_cpu_logical_agg_metric.get('metric').get('dimensions').get(
                'project_id'))
        self.assertEqual(
            'hourly',
            total_cpu_logical_agg_metric.get('metric').get('dimensions').get(
                'aggregation_period'))

        self.assertEqual(
            13.0,
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'record_count'))
        self.assertEqual(
            '2016-03-07 16:09:23',
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-03-07 16:10:38',
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'lastrecord_timestamp_string'))

        # Verify cpu.total_logical_cores_agg for mini-mon host
        total_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') == 'cpu.total_logical_cores_agg'
            and value.get('metric').get('dimensions').get(
                'host') == 'test-cp1-comp0333-mgmt'
        ][0]

        self.assertEqual(
            9.0,
            total_cpu_logical_agg_metric.get('metric').get('value'))
        self.assertEqual(
            'useast',
            total_cpu_logical_agg_metric.get('meta').get('region'))
        self.assertEqual(
            cfg.CONF.messaging.publish_kafka_project_id,
            total_cpu_logical_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            total_cpu_logical_agg_metric.get('metric').get('dimensions').get(
                'project_id'))
        self.assertEqual(
            'hourly',
            total_cpu_logical_agg_metric.get('metric').get('dimensions').get(
                'aggregation_period'))

        self.assertEqual(
            6.0,
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'record_count'))
        self.assertEqual(
            '2016-03-07 16:09:23',
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-03-07 16:10:38',
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'lastrecord_timestamp_string'))

        # Verify cpu.total_logical_cores_agg for devstack host
        total_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') == 'cpu.total_logical_cores_agg'
            and value.get('metric').get('dimensions').get(
                'host') == 'test-cp1-comp0027-mgmt'
        ][0]

        self.assertEqual(
            6.0,
            total_cpu_logical_agg_metric.get('metric').get('value'))
        self.assertEqual(
            'useast',
            total_cpu_logical_agg_metric.get('meta').get('region'))
        self.assertEqual(
            cfg.CONF.messaging.publish_kafka_project_id,
            total_cpu_logical_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            total_cpu_logical_agg_metric.get('metric').get('dimensions').get(
                'project_id'))
        self.assertEqual(
            'hourly',
            total_cpu_logical_agg_metric.get('metric').get('dimensions').get(
                'aggregation_period'))

        self.assertEqual(
            7.0,
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'record_count'))
        self.assertEqual(
            '2016-03-07 16:09:23',
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-03-07 16:10:38',
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'lastrecord_timestamp_string'))

        # Verify cpu.utilized_logical_cores_agg for all hosts
        utilized_cpu_logical_agg_metric = [
            value for value in metrics if
            value.get('metric').get('name') == 'cpu.utilized_logical_cores_agg'
            and value.get('metric').get('dimensions').get('host') == 'all'
        ][0]

        self.assertEqual(
            7.134214285714285,
            utilized_cpu_logical_agg_metric.get('metric').get('value'))
        self.assertEqual(
            'useast',
            utilized_cpu_logical_agg_metric.get('meta').get('region'))

        self.assertEqual(
            cfg.CONF.messaging.publish_kafka_project_id,
            utilized_cpu_logical_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('project_id'))
        self.assertEqual(
            'hourly',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('aggregation_period'))

        self.assertEqual(
            13.0,
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('record_count'))
        self.assertEqual(
            '2016-03-07 16:09:23',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-03-07 16:10:38',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('lastrecord_timestamp_string'))

        # Verify cpu.utilized_logical_cores_agg for the mini-mon host
        utilized_cpu_logical_agg_metric = [
            value for value in metrics if value.get('metric').get('name') ==
            'cpu.utilized_logical_cores_agg' and value.get('metric').get(
                'dimensions').get('host') == 'test-cp1-comp0333-mgmt'
        ][0]

        self.assertEqual(
            4.9665,
            utilized_cpu_logical_agg_metric.get('metric').get('value'))
        self.assertEqual(
            'useast',
            utilized_cpu_logical_agg_metric.get('meta').get('region'))

        self.assertEqual(
            cfg.CONF.messaging.publish_kafka_project_id,
            utilized_cpu_logical_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('project_id'))
        self.assertEqual(
            'hourly',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('aggregation_period'))

        self.assertEqual(
            6.0,
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('record_count'))
        self.assertEqual(
            '2016-03-07 16:09:23',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-03-07 16:10:38',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('lastrecord_timestamp_string'))

        # Verify cpu.utilized_logical_cores_agg for the devstack host
        utilized_cpu_logical_agg_metric = [
            value for value in metrics if value.get('metric').get('name') ==
            'cpu.utilized_logical_cores_agg' and value.get('metric').get(
                'dimensions').get('host') == 'test-cp1-comp0027-mgmt'
        ][0]

        self.assertEqual(
            2.1677142857142853,
            utilized_cpu_logical_agg_metric.get('metric').get('value'))
        self.assertEqual(
            'useast',
            utilized_cpu_logical_agg_metric.get('meta').get('region'))

        self.assertEqual(
            cfg.CONF.messaging.publish_kafka_project_id,
            utilized_cpu_logical_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('project_id'))
        self.assertEqual(
            'hourly',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('aggregation_period'))

        self.assertEqual(
            7.0,
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('record_count'))
        self.assertEqual(
            '2016-03-07 16:09:23',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-03-07 16:10:38',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('lastrecord_timestamp_string'))
Beispiel #14
0
    def test_rdd_to_recordstore_second_stage(self, usage_manager,
                                             setter_manager, insert_manager):

        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_pre_hourly_cmpt_mgr()

        # Create an RDD out of the mocked Monasca metrics
        with open(SecondStageDataProvider.kafka_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]
        host_usage_list = map(dump_as_ascii_string, raw_tuple_list)
        sql_context = SQLContext(self.spark_context)
        host_usage_rdd = self.spark_context.parallelize(host_usage_list)
        host_usage_df = sql_context.read.json(host_usage_rdd)
        PreHourlyProcessor.do_transform(host_usage_df)

        # get the metrics that have been submitted to the dummy message adapter
        metrics = DummyAdapter.adapter_impl.metric_list

        # Verify cpu.total_logical_cores_agg for all hosts
        total_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') == 'cpu.total_logical_cores_agg'
            and value.get('metric').get('dimensions').get('host') == 'all'
        ][0]

        self.assertEqual(
            15.0,
            total_cpu_logical_agg_metric.get('metric').get('value'))
        self.assertEqual(
            'useast',
            total_cpu_logical_agg_metric.get('meta').get('region'))
        self.assertEqual(
            cfg.CONF.messaging.publish_kafka_project_id,
            total_cpu_logical_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            total_cpu_logical_agg_metric.get('metric').get('dimensions').get(
                'project_id'))
        self.assertEqual(
            'prehourly',
            total_cpu_logical_agg_metric.get('metric').get('dimensions').get(
                'aggregation_period'))

        self.assertEqual(
            13.0,
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'record_count'))
        self.assertEqual(
            '2016-03-07 16:09:23',
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-03-07 16:10:38',
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'lastrecord_timestamp_string'))

        # Verify cpu.total_logical_cores_agg for mini-mon host
        total_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') == 'cpu.total_logical_cores_agg'
            and value.get('metric').get('dimensions').get('host') == 'mini-mon'
        ][0]

        self.assertEqual(
            9.0,
            total_cpu_logical_agg_metric.get('metric').get('value'))
        self.assertEqual(
            'useast',
            total_cpu_logical_agg_metric.get('meta').get('region'))
        self.assertEqual(
            cfg.CONF.messaging.publish_kafka_project_id,
            total_cpu_logical_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            total_cpu_logical_agg_metric.get('metric').get('dimensions').get(
                'project_id'))
        self.assertEqual(
            'prehourly',
            total_cpu_logical_agg_metric.get('metric').get('dimensions').get(
                'aggregation_period'))

        self.assertEqual(
            6.0,
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'record_count'))
        self.assertEqual(
            '2016-03-07 16:09:23',
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-03-07 16:10:38',
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'lastrecord_timestamp_string'))

        # Verify cpu.total_logical_cores_agg for devstack host
        total_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') == 'cpu.total_logical_cores_agg'
            and value.get('metric').get('dimensions').get('host') == 'devstack'
        ][0]

        self.assertEqual(
            6.0,
            total_cpu_logical_agg_metric.get('metric').get('value'))
        self.assertEqual(
            'useast',
            total_cpu_logical_agg_metric.get('meta').get('region'))
        self.assertEqual(
            cfg.CONF.messaging.publish_kafka_project_id,
            total_cpu_logical_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            total_cpu_logical_agg_metric.get('metric').get('dimensions').get(
                'project_id'))
        self.assertEqual(
            'prehourly',
            total_cpu_logical_agg_metric.get('metric').get('dimensions').get(
                'aggregation_period'))

        self.assertEqual(
            7.0,
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'record_count'))
        self.assertEqual(
            '2016-03-07 16:09:23',
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-03-07 16:10:38',
            total_cpu_logical_agg_metric.get('metric').get('value_meta').get(
                'lastrecord_timestamp_string'))

        # Verify cpu.utilized_logical_cores_agg for all hosts
        utilized_cpu_logical_agg_metric = [
            value for value in metrics if
            value.get('metric').get('name') == 'cpu.utilized_logical_cores_agg'
            and value.get('metric').get('dimensions').get('host') == 'all'
        ][0]

        self.assertEqual(
            8.0,
            utilized_cpu_logical_agg_metric.get('metric').get('value'))
        self.assertEqual(
            'useast',
            utilized_cpu_logical_agg_metric.get('meta').get('region'))

        self.assertEqual(
            cfg.CONF.messaging.publish_kafka_project_id,
            utilized_cpu_logical_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('project_id'))
        self.assertEqual(
            'prehourly',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('aggregation_period'))

        self.assertEqual(
            13.0,
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('record_count'))
        self.assertEqual(
            '2016-03-07 16:09:23',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-03-07 16:10:38',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('lastrecord_timestamp_string'))

        # Verify cpu.utilized_logical_cores_agg for the mini-mon host
        utilized_cpu_logical_agg_metric = [
            value for value in metrics if
            value.get('metric').get('name') == 'cpu.utilized_logical_cores_agg'
            and value.get('metric').get('dimensions').get('host') == 'mini-mon'
        ][0]

        self.assertEqual(
            5.0,
            utilized_cpu_logical_agg_metric.get('metric').get('value'))
        self.assertEqual(
            'useast',
            utilized_cpu_logical_agg_metric.get('meta').get('region'))

        self.assertEqual(
            cfg.CONF.messaging.publish_kafka_project_id,
            utilized_cpu_logical_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('project_id'))
        self.assertEqual(
            'prehourly',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('aggregation_period'))

        self.assertEqual(
            6.0,
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('record_count'))
        self.assertEqual(
            '2016-03-07 16:09:23',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-03-07 16:10:38',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('lastrecord_timestamp_string'))

        # Verify cpu.utilized_logical_cores_agg for the devstack host
        utilized_cpu_logical_agg_metric = [
            value for value in metrics if
            value.get('metric').get('name') == 'cpu.utilized_logical_cores_agg'
            and value.get('metric').get('dimensions').get('host') == 'devstack'
        ][0]

        self.assertEqual(
            3.0,
            utilized_cpu_logical_agg_metric.get('metric').get('value'))
        self.assertEqual(
            'useast',
            utilized_cpu_logical_agg_metric.get('meta').get('region'))

        self.assertEqual(
            cfg.CONF.messaging.publish_kafka_project_id,
            utilized_cpu_logical_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('project_id'))
        self.assertEqual(
            'prehourly',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('aggregation_period'))

        self.assertEqual(
            7.0,
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('record_count'))
        self.assertEqual(
            '2016-03-07 16:09:23',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-03-07 16:10:38',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('lastrecord_timestamp_string'))
    def test_fetch_quantity_avg_second_stage(self, usage_manager,
                                             setter_manager, insert_manager,
                                             data_driven_specs_repo):

        # test operation
        test_operation = "avg"

        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_pre_hourly_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(self.spark_context,
                                    self.get_pre_transform_specs_json(),
                                    self.get_transform_specs_json_by_operation(
                                        test_operation, 'prehourly'))

        # Create an RDD out of the mocked Monasca metrics
        with open(SecondStageDataProvider.kafka_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]
        util_list = map(dump_as_ascii_string, raw_tuple_list)

        quantity_util_rdd = self.spark_context.parallelize(util_list)
        sql_context = SQLContext(self.spark_context)
        quantity_util_df = sql_context.read.json(quantity_util_rdd)
        PreHourlyProcessor.do_transform(quantity_util_df)
        metrics = DummyAdapter.adapter_impl.metric_list

        utilized_cpu_logical_agg_metric = [
            value for value in metrics if value.get('metric').get('name') ==
            'cpu.utilized_logical_cores_agg'
        ][0]

        self.assertEqual(
            7.134214285714285,
            utilized_cpu_logical_agg_metric.get('metric').get('value'))
        self.assertEqual(
            'useast',
            utilized_cpu_logical_agg_metric.get('meta').get('region'))

        self.assertEqual(
            cfg.CONF.messaging.publish_kafka_project_id,
            utilized_cpu_logical_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('host'))
        self.assertEqual(
            'all',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('project_id'))
        self.assertEqual(
            'hourly',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'dimensions').get('aggregation_period'))

        self.assertEqual(
            13.0,
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('record_count'))
        self.assertEqual(
            '2016-03-07 16:09:23',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-03-07 16:10:38',
            utilized_cpu_logical_agg_metric.get('metric').get(
                'value_meta').get('lastrecord_timestamp_string'))
    def test_vcpus_by_all(self, usage_manager, setter_manager, insert_manager,
                          data_driven_specs_repo):

        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_pre_hourly_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(
                self.spark_context,
                self.get_pre_transform_specs_json_by_all(),
                self.get_transform_specs_json_by_all())

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.kafka_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [OffsetRange("metrics", 1, 10,
                                      20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        # Call the primary method in mon_metrics_kafka
        MonMetricsKafkaProcessor.rdd_to_recordstore(rdd_monasca_with_offsets)

        # get the metrics that have been submitted to the dummy message adapter
        metrics = DummyAdapter.adapter_impl.metric_list
        vm_cpu_list = map(dump_as_ascii_string, metrics)
        DummyAdapter.adapter_impl.metric_list = []

        vm_cpu_rdd = self.spark_context.parallelize(vm_cpu_list)
        sql_context = SQLContext(self.spark_context)
        vm_cpu_df = sql_context.read.json(vm_cpu_rdd)
        PreHourlyProcessor.do_transform(vm_cpu_df)

        metrics = DummyAdapter.adapter_impl.metric_list
        vcpus_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') == 'vcpus_agg' and value.get(
                'metric').get('dimensions').get('project_id') == 'all'
        ][0]

        self.assertTrue(vcpus_agg_metric is not None)

        self.assertEqual(7.0, vcpus_agg_metric.get('metric').get('value'))
        self.assertEqual('useast', vcpus_agg_metric.get('meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         vcpus_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'all',
            vcpus_agg_metric.get('metric').get('dimensions').get('host'))
        self.assertEqual(
            'hourly',
            vcpus_agg_metric.get('metric').get('dimensions').get(
                'aggregation_period'))

        self.assertEqual(
            14.0,
            vcpus_agg_metric.get('metric').get('value_meta').get(
                'record_count'))
        self.assertEqual(
            '2016-01-20 16:40:00',
            vcpus_agg_metric.get('metric').get('value_meta').get(
                'firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-01-20 16:40:46',
            vcpus_agg_metric.get('metric').get('value_meta').get(
                'lastrecord_timestamp_string'))
    def test_rdd_to_recordstore_second_stage(self,
                                             usage_manager,
                                             setter_manager,
                                             insert_manager):

        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_pre_hourly_cmpt_mgr()

        # Create an RDD out of the mocked Monasca metrics
        with open(SecondStageDataProvider.kafka_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]
        host_usage_list = map(dump_as_ascii_string,
                              raw_tuple_list)
        sql_context = SQLContext(self.spark_context)
        host_usage_rdd = self.spark_context.parallelize(host_usage_list)
        host_usage_df = sql_context.read.json(host_usage_rdd)
        PreHourlyProcessor.do_transform(host_usage_df)

        # get the metrics that have been submitted to the dummy message adapter
        metrics = DummyAdapter.adapter_impl.metric_list

        # Verify cpu.total_logical_cores_agg for all hosts
        total_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.total_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'all'][0]

        self.assertEqual(15.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         total_cpu_logical_agg_metric.get(
                             'meta').get('region'))
        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         total_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('prehourly',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(13.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-03-07 16:10:38',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))

        # Verify cpu.total_logical_cores_agg for mini-mon host
        total_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.total_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'mini-mon'][0]

        self.assertEqual(9.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         total_cpu_logical_agg_metric.get(
                             'meta').get('region'))
        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         total_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('prehourly',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(6.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-03-07 16:10:38',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))

        # Verify cpu.total_logical_cores_agg for devstack host
        total_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.total_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'devstack'][0]

        self.assertEqual(6.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         total_cpu_logical_agg_metric.get(
                             'meta').get('region'))
        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         total_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('prehourly',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(7.0,
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-03-07 16:10:38',
                         total_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))

        # Verify cpu.utilized_logical_cores_agg for all hosts
        utilized_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.utilized_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'all'][0]

        self.assertEqual(8.0,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('prehourly',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(13.0,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-03-07 16:10:38',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))

        # Verify cpu.utilized_logical_cores_agg for the mini-mon host
        utilized_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.utilized_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'mini-mon'][0]

        self.assertEqual(5.0,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('prehourly',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(6.0,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-03-07 16:10:38',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))

        # Verify cpu.utilized_logical_cores_agg for the devstack host
        utilized_cpu_logical_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'cpu.utilized_logical_cores_agg' and
            value.get('metric').get('dimensions').get('host') ==
            'devstack'][0]

        self.assertEqual(3.0,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value'))
        self.assertEqual('useast',
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         utilized_cpu_logical_agg_metric.get(
                             'meta').get('tenantId'))
        self.assertEqual('all',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('project_id'))
        self.assertEqual('prehourly',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(7.0,
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('record_count'))
        self.assertEqual('2016-03-07 16:09:23',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-03-07 16:10:38',
                         utilized_cpu_logical_agg_metric.get(
                             'metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))
Beispiel #18
0
    def test_fetch_quantity_sum_second_stage(self, usage_manager,
                                             setter_manager, insert_manager,
                                             data_driven_specs_repo):

        # test operation
        test_operation = "sum"
        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_pre_hourly_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(self.spark_context,
                                    self.get_pre_transform_specs_json(),
                                    self.get_transform_specs_json_by_operation(
                                        test_operation))

        # Create an emulated set of Kafka messages (these were gathered
        # by extracting Monasca messages from the Metrics queue on mini-mon).

        # Create an RDD out of the mocked Monasca metrics
        with open(SecondStageDataProvider.fetch_quantity_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]
        instance_usage_list = map(dump_as_ascii_string, raw_tuple_list)

        # create a json RDD from instance_usage_list
        instance_usage_rdd = self.spark_context.parallelize(
            instance_usage_list)

        sql_context = SQLContext(self.spark_context)
        instance_usage_df = sql_context.read.json(instance_usage_rdd)

        # call pre hourly processor
        PreHourlyProcessor.do_transform(instance_usage_df)

        metrics = DummyAdapter.adapter_impl.metric_list
        mem_total_mb_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') == 'mem.total_mb_agg'
            and value.get('metric').get('dimensions').get('host') == 'mini-mon'
        ][0]
        self.assertTrue(mem_total_mb_agg_metric is not None)

        self.assertEqual('mem.total_mb_agg',
                         mem_total_mb_agg_metric.get('metric').get('name'))

        self.assertEqual(8679.0,
                         mem_total_mb_agg_metric.get('metric').get('value'))
        self.assertEqual('useast',
                         mem_total_mb_agg_metric.get('meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         mem_total_mb_agg_metric.get('meta').get('tenantId'))
        self.assertEqual(
            'mini-mon',
            mem_total_mb_agg_metric.get('metric').get('dimensions').get(
                'host'))
        self.assertEqual(
            'all',
            mem_total_mb_agg_metric.get('metric').get('dimensions').get(
                'project_id'))
        self.assertEqual(
            'prehourly',
            mem_total_mb_agg_metric.get('metric').get('dimensions').get(
                'aggregation_period'))

        self.assertEqual(
            39.0,
            mem_total_mb_agg_metric.get('metric').get('value_meta').get(
                'record_count'))
        self.assertEqual(
            '2016-01-20 16:40:00',
            mem_total_mb_agg_metric.get('metric').get('value_meta').get(
                'firstrecord_timestamp_string'))
        self.assertEqual(
            '2016-01-20 17:40:00',
            mem_total_mb_agg_metric.get('metric').get('value_meta').get(
                'lastrecord_timestamp_string'))
    def test_vcpus_by_project(self,
                              usage_manager,
                              setter_manager,
                              insert_manager,
                              data_driven_specs_repo):

        # load components
        usage_manager.return_value = MockComponentManager.get_usage_cmpt_mgr()
        setter_manager.return_value = \
            MockComponentManager.get_setter_cmpt_mgr()
        insert_manager.return_value = \
            MockComponentManager.get_insert_pre_hourly_cmpt_mgr()

        # init mock driver tables
        data_driven_specs_repo.return_value = \
            MockDataDrivenSpecsRepo(self.spark_context,
                                    self.
                                    get_pre_transform_specs_json_by_project(),
                                    self.get_transform_specs_json_by_project())

        # Create an RDD out of the mocked Monasca metrics
        with open(DataProvider.kafka_data_path) as f:
            raw_lines = f.read().splitlines()
        raw_tuple_list = [eval(raw_line) for raw_line in raw_lines]

        rdd_monasca = self.spark_context.parallelize(raw_tuple_list)

        # decorate mocked RDD with dummy kafka offsets
        myOffsetRanges = [
            OffsetRange("metrics", 1, 10, 20)]  # mimic rdd.offsetRanges()

        transform_context = TransformContextUtils.get_context(
            offset_info=myOffsetRanges,
            batch_time_info=self.get_dummy_batch_time())

        rdd_monasca_with_offsets = rdd_monasca.map(
            lambda x: RddTransformContext(x, transform_context))

        # Call the primary method in mon_metrics_kafka
        MonMetricsKafkaProcessor.rdd_to_recordstore(
            rdd_monasca_with_offsets)

        # get the metrics that have been submitted to the dummy message adapter
        vm_cpu_list = DummyAdapter.adapter_impl.metric_list
        vm_cpu_list = map(dump_as_ascii_string, vm_cpu_list)
        DummyAdapter.adapter_impl.metric_list = []

        vm_cpu_rdd = self.spark_context.parallelize(vm_cpu_list)
        sql_context = SQLContext(self.spark_context)
        vm_cpu_df = sql_context.read.json(vm_cpu_rdd)
        PreHourlyProcessor.do_transform(vm_cpu_df)

        metrics = DummyAdapter.adapter_impl.metric_list

        vcpus_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'vcpus_agg' and
            value.get('metric').get('dimensions').get('project_id') ==
            '9647fd5030b04a799b0411cc38c4102d'][0]

        self.assertTrue(vcpus_agg_metric is not None)

        self.assertEqual(6.0,
                         vcpus_agg_metric
                         .get('metric').get('value'))
        self.assertEqual('useast',
                         vcpus_agg_metric
                         .get('meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         vcpus_agg_metric
                         .get('meta').get('tenantId'))
        self.assertEqual('all',
                         vcpus_agg_metric
                         .get('metric').get('dimensions').get('host'))
        self.assertEqual('hourly',
                         vcpus_agg_metric
                         .get('metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(8.0,
                         vcpus_agg_metric
                         .get('metric').get('value_meta').get('record_count'))
        self.assertEqual('2016-01-20 16:40:05',
                         vcpus_agg_metric
                         .get('metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-01-20 16:40:46',
                         vcpus_agg_metric
                         .get('metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))

        vcpus_agg_metric = [
            value for value in metrics
            if value.get('metric').get('name') ==
            'vcpus_agg' and
            value.get('metric').get('dimensions').get('project_id') ==
            '8647fd5030b04a799b0411cc38c4102d'][0]

        self.assertTrue(vcpus_agg_metric is not None)

        self.assertEqual(1.0,
                         vcpus_agg_metric
                         .get('metric').get('value'))
        self.assertEqual('useast',
                         vcpus_agg_metric
                         .get('meta').get('region'))

        self.assertEqual(cfg.CONF.messaging.publish_kafka_project_id,
                         vcpus_agg_metric
                         .get('meta').get('tenantId'))
        self.assertEqual('all',
                         vcpus_agg_metric
                         .get('metric').get('dimensions').get('host'))
        self.assertEqual('hourly',
                         vcpus_agg_metric
                         .get('metric').get('dimensions')
                         .get('aggregation_period'))

        self.assertEqual(6.0,
                         vcpus_agg_metric
                         .get('metric').get('value_meta').get('record_count'))
        self.assertEqual('2016-01-20 16:40:00',
                         vcpus_agg_metric
                         .get('metric').get('value_meta')
                         .get('firstrecord_timestamp_string'))
        self.assertEqual('2016-01-20 16:40:42',
                         vcpus_agg_metric
                         .get('metric').get('value_meta')
                         .get('lastrecord_timestamp_string'))