Ejemplo n.º 1
0
    def __init__(self, app_id, info_plugin, collect_period=2, retries=10):
        Plugin.__init__(self, app_id, info_plugin,
                        collect_period, retries=retries)
        self.enable_visualizer = info_plugin['enable_visualizer']
        self.submission_url = info_plugin['count_jobs_url']
        self.expected_time = int(info_plugin['expected_time'])
        self.number_of_jobs = int(info_plugin['number_of_jobs'])
        self.submission_time = datetime.strptime(info_plugin['submission_time'],
                                                 '%Y-%m-%dT%H:%M:%S.%fGMT')
        self.dimensions = {'application_id': self.app_id,
                           'service': 'kubejobs'}
        self.rds = redis.StrictRedis(host=info_plugin['redis_ip'],
                                     port=info_plugin['redis_port'])
        self.metric_queue = "%s:metrics" % self.app_id
        self.current_job_id = 0
        
        kubernetes.config.load_kube_config()
        self.b_v1 = kubernetes.client.BatchV1Api()

        if self.enable_visualizer:
            datasource_type = info_plugin['datasource_type']
            if datasource_type == "monasca":
                self.datasource = MonascaConnector()
            elif datasource_type == "influxdb":
                influx_url = info_plugin['database_data']['url']
                influx_port = info_plugin['database_data']['port']
                database_name = info_plugin['database_data']['name']
                self.datasource = InfluxConnector(influx_url, influx_port, database_name)
            else:
                print("Unknown datasource type...!")
Ejemplo n.º 2
0
 def setup_datasource(self, info_plugin):
     if self.enable_detailed_report:
         datasource_type = info_plugin['datasource_type']
         if datasource_type == "monasca":
             return MonascaConnector()
         elif datasource_type == "influxdb":
             influx_url = info_plugin['database_data']['url']
             influx_port = info_plugin['database_data']['port']
             database_name = info_plugin['database_data']['name']
             return InfluxConnector(influx_url, influx_port, database_name)
         else:
             raise ex.BadRequestException("Unknown datasource type...!")
Ejemplo n.º 3
0
class KubeJobProgress(Plugin):
    def __init__(self, app_id, info_plugin, collect_period=2, retries=10):
        Plugin.__init__(self,
                        app_id,
                        info_plugin,
                        collect_period,
                        retries=retries)
        self.LOG = Log(LOG_NAME, LOG_FILE)
        self.enable_visualizer = info_plugin['enable_visualizer']
        self.expected_time = int(info_plugin['expected_time'])
        self.number_of_jobs = int(info_plugin['number_of_jobs'])
        self.submission_time = datetime.\
            strptime(info_plugin['submission_time'],
                     '%Y-%m-%dT%H:%M:%S.%fGMT')
        self.dimensions = {
            'application_id': self.app_id,
            'service': 'kubejobs'
        }
        self.rds = redis.StrictRedis(host=info_plugin['redis_ip'],
                                     port=info_plugin['redis_port'])
        self.metric_queue = "%s:metrics" % self.app_id
        self.current_job_id = 0

        kubernetes.config.load_kube_config(api.k8s_manifest)
        self.b_v1 = kubernetes.client.BatchV1Api()

        if self.enable_visualizer:
            datasource_type = info_plugin['datasource_type']
            if datasource_type == "monasca":
                self.datasource = MonascaConnector()
            elif datasource_type == "influxdb":
                influx_url = info_plugin['database_data']['url']
                influx_port = info_plugin['database_data']['port']
                database_name = info_plugin['database_data']['name']
                self.datasource = InfluxConnector(influx_url, influx_port,
                                                  database_name)
            else:
                self.LOG.log("Unknown datasource type...!")

    def _publish_measurement(self, jobs_completed):

        application_progress_error = {}
        job_progress_error = {}
        time_progress_error = {}
        parallelism = {}

        # Init
        self.LOG.log("Jobs Completed: %i" % jobs_completed)

        # Job Progress

        job_progress = min(1.0, (float(jobs_completed) / self.number_of_jobs))
        # Elapsed Time
        elapsed_time = float(self._get_elapsed_time())

        # Reference Value
        ref_value = (elapsed_time / self.expected_time)
        replicas = self._get_num_replicas()
        # Error
        self.LOG.log("Job progress: %s\nTime Progress: %s\nReplicas: %s"
                     "\n========================" %
                     (job_progress, ref_value, replicas))

        error = job_progress - ref_value

        application_progress_error['name'] = ('application-progress' '.error')

        application_progress_error['value'] = error
        application_progress_error['timestamp'] = time.time() * 1000
        application_progress_error['dimensions'] = self.dimensions

        job_progress_error['name'] = 'job-progress'
        job_progress_error['value'] = job_progress
        job_progress_error['timestamp'] = time.time() * 1000
        job_progress_error['dimensions'] = self.dimensions

        time_progress_error['name'] = 'time-progress'
        time_progress_error['value'] = ref_value
        time_progress_error['timestamp'] = time.time() * 1000
        time_progress_error['dimensions'] = self.dimensions

        parallelism['name'] = "job-parallelism"
        parallelism['value'] = replicas
        parallelism['timestamp'] = time.time() * 1000
        parallelism['dimensions'] = self.dimensions

        self.LOG.log("Error: %s " % application_progress_error['value'])

        self.rds.rpush(self.metric_queue, str(application_progress_error))

        if self.enable_visualizer:
            self.datasource.send_metrics([application_progress_error])
            self.datasource.send_metrics([job_progress_error])
            self.datasource.send_metrics([time_progress_error])
            self.datasource.send_metrics([parallelism])

        time.sleep(MONITORING_INTERVAL)

    def _get_num_replicas(self):
        job = self.b_v1.read_namespaced_job(name=self.app_id,
                                            namespace="default")
        return job.status.active

    def _get_elapsed_time(self):
        datetime_now = datetime.now()
        elapsed_time = datetime_now - self.submission_time
        self.LOG.log("Elapsed Time: %.2f" % elapsed_time.seconds)

        return elapsed_time.seconds

    def monitoring_application(self):
        try:
            num_queued_jobs = self.rds.llen('job')
            num_processing_jobs = self.rds.llen('job:processing')

            job_progress = self.number_of_jobs - \
                (num_queued_jobs + num_processing_jobs)
            self._publish_measurement(jobs_completed=job_progress)
            return job_progress

        except Exception as ex:
            self.LOG.log(("Error: No application found for %s.\
                 %s remaining attempts") % (self.app_id, self.attempts))

            self.LOG.log(ex.message)
            raise
Ejemplo n.º 4
0
class VerticalProgress(Plugin):
    def __init__(self, app_id, info_plugin, collect_period=2, retries=10):
        Plugin.__init__(self,
                        app_id,
                        info_plugin,
                        collect_period,
                        retries=retries)
        self.cpu_threshold = info_plugin['threshold']
        self.metric_source = info_plugin['metric_source']
        self.get_metric_endpoint = info_plugin['get_metric_endpoint']
        self.k8s_manifest = info_plugin['k8s_manifest']
        self.enable_visualizer = info_plugin['enable_visualizer']
        self.submission_url = info_plugin['count_jobs_url']
        self.expected_time = int(info_plugin['expected_time'])
        self.number_of_jobs = int(info_plugin['number_of_jobs'])
        self.submission_time = datetime.strptime(
            info_plugin['submission_time'], '%Y-%m-%dT%H:%M:%S.%fGMT')
        self.dimensions = {
            'application_id': self.app_id,
            'service': 'kubejobs'
        }
        self.rds = redis.StrictRedis(host=info_plugin['redis_ip'],
                                     port=info_plugin['redis_port'])
        self.metric_queue = "%s:metrics" % self.app_id
        self.current_job_id = 0
        if self.enable_visualizer:
            datasource_type = info_plugin['datasource_type']
            if datasource_type == "monasca":
                self.datasource = MonascaConnector()

            elif datasource_type == "influxdb":
                influx_url = info_plugin['database_data']['url']
                influx_port = info_plugin['database_data']['port']
                database_name = info_plugin['database_data']['name']
                self.datasource = InfluxConnector(influx_url, influx_port,
                                                  database_name)
            else:
                print("Unknown datasource type...!")

    def _publish_measurement(self, cpu_usage):

        application_progress_error = {}
        cpu_usage_metric = {}
        cpu_quota_metric = {}

        # Reference Value
        ref_value = float(self.cpu_threshold)

        # Error

        print("CPU_USAGE: " + str(cpu_usage))
        print("REF_VALUE: " + str(ref_value))

        error = (float(cpu_usage) / 100) - ref_value

        application_progress_error['name'] = ('application-progress' '.error')
        application_progress_error['value'] = error
        application_progress_error['timestamp'] = time.time() * 1000
        application_progress_error['dimensions'] = self.dimensions

        cpu_usage_metric['name'] = 'cpu-usage'
        cpu_usage_metric['value'] = float(cpu_usage)
        cpu_usage_metric['timestamp'] = time.time() * 1000
        cpu_usage_metric['dimensions'] = self.dimensions

        cpu_quota = self.get_cpu_quota()

        cpu_quota_metric['name'] = 'cpu-quota'
        cpu_quota_metric['value'] = float(cpu_quota)
        cpu_quota_metric['timestamp'] = time.time() * 1000
        cpu_quota_metric['dimensions'] = self.dimensions

        print "Error: %s " % application_progress_error['value']

        self.rds.rpush(self.metric_queue, str(application_progress_error))

        if self.enable_visualizer:
            self.datasource.send_metrics([application_progress_error])
            self.datasource.send_metrics([cpu_usage_metric])
            self.datasource.send_metrics([cpu_quota_metric])

        time.sleep(MONITORING_INTERVAL)

    def _get_elapsed_time(self):
        datetime_now = datetime.now()
        elapsed_time = datetime_now - self.submission_time
        print "Elapsed Time: %.2f" % elapsed_time.seconds

        return elapsed_time.seconds

    def monitoring_application(self):
        try:
            cpu_usage = requests.get('http://%s:5000' %
                                     (self.get_api_address())).text

            print("Publishing metric %s value %s: " %
                  (self.metric_source, cpu_usage))

            self._publish_measurement(cpu_usage=cpu_usage)

        except Exception as ex:
            print("Error: No application found for %s. %s remaining attempts" %
                  (self.app_id, self.attempts))

            print ex.message
            raise

    def get_cpu_quota(self):
        try:
            cpu_quota = requests.get(
                'http://%s:5000/%s' %
                (self.get_api_address(), self.get_metric_endpoint)).text
            return cpu_quota

        except Exception as ex:
            print("Error while getting %s metric" % (self.metric_source))
            print ex.message
            raise

    def get_api_address(self):
        # TODO Search a better way to get the internal ip of a node using k8s api
        nodes_ips = os.popen(
            "kubectl --kubeconfig=%s get nodes -o wide | awk '{print $6}'" %
            (self.k8s_manifest)).readlines()
        api_address = nodes_ips.pop(1).replace('\n', '')

        return api_address