Esempio n. 1
0
    def __init__(self,
                 app_id,
                 info_plugin,
                 collect_period=2,
                 retries=10,
                 last_replicas=None):

        Plugin.__init__(self,
                        app_id,
                        info_plugin,
                        collect_period,
                        retries=retries)
        self.validate(info_plugin)
        self.LOG = Log(LOG_NAME, LOG_FILE)
        self.enable_detailed_report = info_plugin['enable_detailed_report']
        self.expected_time = int(info_plugin['expected_time'])
        self.number_of_jobs = int(info_plugin['number_of_jobs'])
        self.submission_time = self.get_submission_time(info_plugin)
        self.dimensions = self.get_dimensions()
        self.rds = self.setup_redis(info_plugin)
        self.metric_queue = "%s:metrics" % self.app_id
        self.current_job_id = 0
        self.job_report = JobReport(info_plugin)
        self.report_flag = True
        self.enable_generate_job_report = False
        self.last_replicas = last_replicas
        self.last_error = 0.0
        self.last_progress = 0.0
        kubernetes.config.load_kube_config(api.k8s_manifest)
        self.b_v1 = kubernetes.client.BatchV1Api()
        self.datasource = self.setup_datasource(info_plugin)
Esempio n. 2
0
 def __init__(self):
     self.monasca_username = api.monasca_username
     self.monasca_password = api.monasca_password
     self.monasca_auth_url = api.monasca_auth_url
     self.monasca_project_name = api.monasca_project_name
     self.monasca_api_version = api.monasca_api_version
     self._get_monasca_client()
     self.LOG = Log('monasca_log', 'monasca.log')
Esempio n. 3
0
    def __init__(self, app_id, info_plugin):

        KubeJobProgress.__init__(self,
                                 app_id,
                                 info_plugin,
                                 collect_period=2,
                                 retries=20)
        self.cluster_info_url = info_plugin.get('cluster_info_url')
        self.desired_cost = info_plugin.get('desired_cost')
        self.last_error = None
        self.last_rep = None
        self.last_cost = None
        self.LOG = Log(LOG_NAME, LOG_FILE)
        self.job_report = JobReport(info_plugin)
Esempio n. 4
0
class Plugin(threading.Thread):

    def __init__(self, app_id, info_plugin, collect_period, retries=30):
        threading.Thread.__init__(self)

        # Contains all the specific information for each plugin
        self.info_plugin = info_plugin

        # Flag that enable or disable the monitoring logic execution
        self.running = False
        self.LOG = Log(LOG_NAME, LOG_FILE)
        # Dimensions is composed by default only application_id, but for each
        # plugin it can change and it is possible to add some relevant
        # information
        self.dimensions = {'application_id': app_id}

        # Time interval between each metric collect
        self.collect_period = collect_period

        # How many times monitoring_application must be re executed when
        # something break into the execution
        self.attempts = retries

        # The identifier for the submitted application
        self.app_id = app_id

    def stop(self):
        self.LOG.log("The {} is stopping for {}...".format(type(self).__name__,
                                                           self.app_id))
        self.running = False

    # This method must be subscribed by each plugin that
    # extends this base class
    def monitoring_application(self):
        pass

    def run(self):
        self.running = True
        while self.running:
            if self.attempts == 0:
                self.stop()
                break
            try:
                time.sleep(self.collect_period)
                self.monitoring_application()

            except Exception as ex:
                self.attempts -= 1
                self.LOG.log(ex)
                time.sleep(self.collect_period)
Esempio n. 5
0
    def __init__(self, app_id, info_plugin, collect_period, retries=30):
        threading.Thread.__init__(self)

        # Contains all the specific information for each plugin
        self.info_plugin = info_plugin

        # Flag that enable or disable the monitoring logic execution
        self.running = False
        self.LOG = Log(LOG_NAME, LOG_FILE)
        # Dimensions is composed by default only application_id, but for each
        # plugin it can change and it is possible to add some relevant
        # information
        self.dimensions = {'application_id': app_id}

        # Time interval between each metric collect
        self.collect_period = collect_period

        # How many times monitoring_application must be re executed when
        # something break into the execution
        self.attempts = retries

        # The identifier for the submitted application
        self.app_id = app_id
Esempio n. 6
0
    def __init__(self, app_id, info_plugin, collect_period=2, retries=10):
        Plugin.__init__(self,
                        app_id,
                        info_plugin,
                        collect_period,
                        retries=retries)
        self.LOG = Log(LOG_NAME, LOG_FILE)
        self.enable_visualizer = info_plugin['enable_visualizer']
        self.expected_time = int(info_plugin['expected_time'])
        self.number_of_jobs = int(info_plugin['number_of_jobs'])
        self.submission_time = datetime.\
            strptime(info_plugin['submission_time'],
                     '%Y-%m-%dT%H:%M:%S.%fGMT')
        self.dimensions = {
            'application_id': self.app_id,
            'service': 'kubejobs'
        }
        self.rds = redis.StrictRedis(host=info_plugin['redis_ip'],
                                     port=info_plugin['redis_port'])
        self.metric_queue = "%s:metrics" % self.app_id
        self.current_job_id = 0

        kubernetes.config.load_kube_config(api.k8s_manifest)
        self.b_v1 = kubernetes.client.BatchV1Api()

        if self.enable_visualizer:
            datasource_type = info_plugin['datasource_type']
            if datasource_type == "monasca":
                self.datasource = MonascaConnector()
            elif datasource_type == "influxdb":
                influx_url = info_plugin['database_data']['url']
                influx_port = info_plugin['database_data']['port']
                database_name = info_plugin['database_data']['name']
                self.datasource = InfluxConnector(influx_url, influx_port,
                                                  database_name)
            else:
                self.LOG.log("Unknown datasource type...!")
Esempio n. 7
0
class KubeJobCost(KubeJobProgress):
    def __init__(self, app_id, info_plugin):

        KubeJobProgress.__init__(self,
                                 app_id,
                                 info_plugin,
                                 collect_period=2,
                                 retries=20)
        self.cluster_info_url = info_plugin.get('cluster_info_url')
        self.desired_cost = info_plugin.get('desired_cost')
        self.last_error = None
        self.last_rep = None
        self.last_cost = None
        self.LOG = Log(LOG_NAME, LOG_FILE)
        self.job_report = JobReport(info_plugin)

    def monitoring_application(self):
        try:
            if self.report_flag:

                self.calculate_error()
                self.LOG.log("Calculated error")
                timestamp = time.time() * 1000
                err_manifest = \
                    self.get_application_cost_error_manifest(self.last_error,
                                                             timestamp)
                self.LOG.log(err_manifest)
                self.LOG.log("Publishing error")
                self.rds.rpush(self.metric_queue, str(err_manifest))

                self.LOG.log("Getting replicas")
                replicas_manifest = \
                    self.get_parallelism_manifest(self.last_replicas,
                                                  timestamp)
                self.LOG.log(replicas_manifest)

                reference_manifest = self.get_reference_manifest(timestamp)

                self.LOG.log("Getting cost")
                current_cost_manifest = \
                    self.get_current_cost_manifest(timestamp)
                self.LOG.log(current_cost_manifest)

                self.publish_persistent_measurement(err_manifest,
                                                    reference_manifest,
                                                    current_cost_manifest,
                                                    replicas_manifest)

                self.report_job(timestamp)

        except Exception as ex:
            self.LOG.log(ex)

    def report_job(self, timestamp):
        if self.report_flag:
            self.LOG.log("report_flag-cost")
            self.job_report.set_start_timestamp(timestamp)
            current_time = datetime.fromtimestamp(timestamp/1000)\
                                   .strftime('%Y-%m-%dT%H:%M:%SZ')
            if self.last_progress == 1:
                self.job_report.calculate_execution_time(timestamp)
            self.job_report.\
                verify_and_set_max_error(self.last_error, current_time)
            self.job_report.\
                verify_and_set_min_error(self.last_error, current_time)

            if self.job_is_completed():
                self.report_flag = False
                self.job_report.calculate_execution_time(timestamp)
                self.generate_report(current_time)

    # TODO: We need to think in a better design solution
    # for this
    def get_detailed_report(self):
        if not self.report_flag:
            return self.datasource.get_cost_measurements()
        return {'message': 'Job is still running...'}

    def get_reference_manifest(self, timestamp):
        reference_manifest = {
            'name': 'desired_cost',
            'value': self.desired_cost,
            'timestamp': timestamp,
            'dimensions': self.dimensions
        }
        return reference_manifest

    def get_current_cost_manifest(self, timestamp):
        current_cost_manifest = {
            'name': 'current_spent',
            'value': self.last_cost,
            'timestamp': timestamp,
            'dimensions': self.dimensions
        }
        return current_cost_manifest

    def calculate_error(self):
        rep = self._get_num_replicas()
        cpu_cost, memory_cost = self.get_current_cost()
        cpu_usage, memory_usage = \
            k8s.get_current_job_resources_usage(self.app_id)
        job_cpu_cost = cpu_cost * cpu_usage
        job_memory_cost = memory_cost * memory_usage
        job_total_cost = job_cpu_cost + job_memory_cost

        err = job_total_cost - self.desired_cost

        self.pretty_print(cpu_cost, memory_cost, cpu_usage, memory_usage,
                          job_total_cost, err)
        self.last_error = err
        self.last_cost = job_total_cost
        self.last_rep = rep
        return err

    def pretty_print(self, cpu_cost, memory_cost, cpu_usage, memory_usage,
                     job_total_cost, err):

        self.LOG.log('Cpu usage: {}\nCpu cost: {}\nMemory usage:'
                     ' {}\nMemory cost: {}\nJob cost: {}\nError: {}'.format(
                         cpu_usage, cpu_cost, memory_usage, memory_cost,
                         job_total_cost, err))

    def get_application_cost_error_manifest(self, error, timestamp):
        application_progress_error = {
            'name': 'application_cost_error',
            'value': error,
            'timestamp': timestamp,
            'dimensions': self.dimensions
        }
        return application_progress_error

    def get_current_cost(self):

        cost = json.loads(requests.get(self.cluster_info_url.strip()).text)
        total_cost = float(cost.get('cpu_price')),\
            float(cost.get('memory_price'))

        return total_cost

    def get_dimensions(self):
        return {'application_id': self.app_id, 'service': 'kubejobs_cost'}
Esempio n. 8
0
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from monitor import exceptions as ex
from monitor.utils.logger import Log
from monitor.service import plugin_service

API_LOG = Log("APIv10", "APIv10.log")

monitored_apps = {}


def start_monitoring(data, app_id):
    """ These conditional cases choose the class executor's constructor of the
    application submitted
    Note: some executors need the keypair to access remotely some machine and
    execute the monitoring logic, but this attribute is not mandatory for all
    the executors."""

    if 'plugin' not in data or 'plugin_info' not in data:
        API_LOG.log("Missing parameters in request")
        raise ex.BadRequestException()
Esempio n. 9
0
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import configparser
from monitor.utils.logger import Log

LOG_FILE = "progress.log"
LOG_NAME = "kubejobs-progress"
LOG = Log(LOG_NAME, LOG_FILE)

CONFIG_PATH = "./data/conf"

try:
    # Conf reading
    config = configparser.RawConfigParser()
    config.read('./monitor.cfg')
    """ General configuration """
    address = config.get('general', 'host')
    port = config.getint('general', 'port')
    plugins = config.get('general', 'plugins').split(',')
    use_debug = config.get('general', 'debug')
    retries = config.getint('general', 'retries')
    """ Validate if really exists a section to listed plugins """
    for plugin in plugins:
Esempio n. 10
0
class KubeJobProgress(Plugin):
    def __init__(self,
                 app_id,
                 info_plugin,
                 collect_period=2,
                 retries=10,
                 last_replicas=None):

        Plugin.__init__(self,
                        app_id,
                        info_plugin,
                        collect_period,
                        retries=retries)
        self.validate(info_plugin)
        self.LOG = Log(LOG_NAME, LOG_FILE)
        self.enable_detailed_report = info_plugin['enable_detailed_report']
        self.expected_time = int(info_plugin['expected_time'])
        self.number_of_jobs = int(info_plugin['number_of_jobs'])
        self.submission_time = self.get_submission_time(info_plugin)
        self.dimensions = self.get_dimensions()
        self.rds = self.setup_redis(info_plugin)
        self.metric_queue = "%s:metrics" % self.app_id
        self.current_job_id = 0
        self.job_report = JobReport(info_plugin)
        self.report_flag = True
        self.enable_generate_job_report = False
        self.last_replicas = last_replicas
        self.last_error = 0.0
        self.last_progress = 0.0
        kubernetes.config.load_kube_config(api.k8s_manifest)
        self.b_v1 = kubernetes.client.BatchV1Api()
        self.datasource = self.setup_datasource(info_plugin)

    def get_dimensions(self):
        return {'application_id': self.app_id, 'service': 'kubejobs'}

    def get_submission_time(self, info_plugin):
        return datetime.strptime(info_plugin['submission_time'],
                                 '%Y-%m-%dT%H:%M:%S.%fGMT')

    def setup_redis(self, info_plugin):
        return redis.StrictRedis(host=info_plugin['redis_ip'],
                                 port=info_plugin['redis_port'])

    def setup_datasource(self, info_plugin):
        if self.enable_detailed_report:
            datasource_type = info_plugin['datasource_type']
            if datasource_type == "monasca":
                return MonascaConnector()
            elif datasource_type == "influxdb":
                influx_url = info_plugin['database_data']['url']
                influx_port = info_plugin['database_data']['port']
                database_name = info_plugin['database_data']['name']
                return InfluxConnector(influx_url, influx_port, database_name)
            else:
                raise ex.BadRequestException("Unknown datasource type...!")

    def calculate_measurement(self, jobs_completed):
        job_progress = \
            self.get_job_progress(jobs_completed) or self.last_progress
        ref_value = self.get_ref_value()
        replicas = self._get_num_replicas() or self.last_replicas
        error = self.get_error(job_progress, ref_value) or self.last_error

        return job_progress, ref_value, replicas, error

    def get_error(self, job_progress, ref_value):
        error = job_progress - ref_value
        self.last_error = error
        return error

    def get_ref_value(self):
        elapsed_time = float(self._get_elapsed_time())
        ref_value = (elapsed_time / self.expected_time)
        return ref_value

    def get_job_progress(self, jobs_completed):
        job_progress = min(1.0, (float(jobs_completed) / self.number_of_jobs))
        return job_progress

    def get_parallelism_manifest(self, replicas, timestamp):
        parallelism = {
            'name': 'job_parallelism',
            'value': replicas,
            'timestamp': timestamp,
            'dimensions': self.dimensions
        }
        return parallelism

    def get_time_progress_error_manifest(self, ref_value, timestamp):
        time_progress_error = {
            'name': 'time_progress',
            'value': ref_value,
            'timestamp': timestamp,
            'dimensions': self.dimensions
        }

        return time_progress_error

    def get_job_progress_error_manifest(self, job_progress, timestamp):
        job_progress_error = {
            'name': 'job_progress',
            'value': job_progress,
            'timestamp': timestamp,
            'dimensions': self.dimensions
        }
        return job_progress_error

    def get_application_progress_error_manifest(self, error, timestamp):
        application_progress_error = {
            'name': 'application_progress_error',
            'value': error,
            'timestamp': timestamp,
            'dimensions': self.dimensions
        }
        return application_progress_error

    def get_detailed_report(self):
        if not self.report_flag:
            return self.datasource.get_measurements()
        return {'message': 'Job is still running...'}

    def _publish_measurement(self, jobs_completed):

        if self.report_flag:
            self.LOG.log("Jobs Completed: %i" % jobs_completed)
            job_progress, ref_value, replicas, error = \
                self.calculate_measurement(jobs_completed)

            self.last_progress = job_progress
            timestamp = time.time() * 1000
            application_progress_error = \
                self.get_application_progress_error_manifest(error, timestamp)

            self.rds.rpush(self.metric_queue, str(application_progress_error))

            if self.enable_detailed_report:
                job_progress_error = \
                    self.get_job_progress_error_manifest(job_progress,
                                                         timestamp)
                time_progress_error = \
                    self.get_time_progress_error_manifest(ref_value,
                                                          timestamp)
                parallelism = \
                    self.get_parallelism_manifest(replicas, timestamp)

                self.LOG.log("Error: %s " %
                             application_progress_error['value'])
                self.publish_persistent_measurement(application_progress_error,
                                                    job_progress_error,
                                                    time_progress_error,
                                                    parallelism)
            self.report_job(timestamp)

    def publish_persistent_measurement(self, application_progress_error,
                                       job_progress_error, time_progress_error,
                                       parallelism):
        self.datasource.send_metrics([application_progress_error])
        self.datasource.send_metrics([job_progress_error])
        self.datasource.send_metrics([time_progress_error])
        self.datasource.send_metrics([parallelism])

    def report_job(self, timestamp):
        if self.report_flag:
            self.job_report.set_start_timestamp(timestamp)
            current_time = datetime.fromtimestamp(timestamp/1000)\
                                   .strftime('%Y-%m-%dT%H:%M:%SZ')
            if self.last_progress == 1:
                self.job_report.calculate_execution_time(timestamp)
            self.job_report.\
                verify_and_set_max_error(self.last_error, current_time)
            self.job_report.\
                verify_and_set_min_error(self.last_error, current_time)

            if self.job_is_completed():
                if self.last_progress != 1 \
                        and not self.enable_generate_job_report:
                    self.enable_generate_job_report = True
                    self.monitoring_application()
                else:
                    self.report_flag = False
                    self.job_report.calculate_execution_time(timestamp)
                    self.generate_report(current_time)

    def generate_report(self, current_time):
        self.job_report.set_final_error(self.last_error, current_time)
        self.job_report.set_final_replicas(self.last_replicas)
        self.job_report.generate_report(self.app_id)

    def _get_num_replicas(self):
        job = self.b_v1.read_namespaced_job(name=self.app_id,
                                            namespace="default")
        replicas = job.status.active
        if replicas is not None:
            self.last_replicas = replicas
        return replicas

    def job_is_completed(self):

        job = self.b_v1.read_namespaced_job(name=self.app_id,
                                            namespace="default")

        if job.status.active is None:
            return True
        return False

    def _get_elapsed_time(self):
        datetime_now = datetime.now()
        elapsed_time = datetime_now - self.submission_time
        self.LOG.log("Elapsed Time: %.2f" % elapsed_time.seconds)

        return elapsed_time.seconds

    def monitoring_application(self):
        try:
            num_queued_jobs = self.rds.llen('job')
            num_processing_jobs = self.rds.llen('job:processing')

            job_progress = self.number_of_jobs - \
                (num_queued_jobs + num_processing_jobs)
            self._publish_measurement(jobs_completed=job_progress)
            return job_progress

        except Exception as ex:
            self.LOG.log(("Error: No application found for %s.\
                 %s remaining attempts") % (self.app_id, self.attempts))
            self.LOG.log(ex)
            raise

    def run(self):
        self.running = True
        while self.running:
            if self.attempts == 0:
                timestamp = time.time() * 1000
                self.report_job(timestamp)
                current_time = datetime.\
                    fromtimestamp(timestamp/1000).\
                    strftime('%Y-%m-%dT%H:%M:%SZ')
                self.generate_report(current_time)
                self.stop()
                break
            try:
                time.sleep(self.collect_period)
                self.monitoring_application()

            except Exception:
                self.attempts -= 1

    def validate(self, data):
        data_model = {
            "enable_detailed_report": bool,
            "expected_time": int,
            "number_of_jobs": int,
            "redis_ip": six.string_types,
            "redis_port": int,
            "submission_time": six.string_types,
            "scaling_strategy": six.string_types
        }

        if 'enable_detailed_report' in data and data['enable_detailed_report']:
            data_model.update({
                "datasource_type": six.string_types,
                "database_data": dict
            })

        if 'scaling_strategy' in data and data['scaling_strategy'] == 'pid':
            data_model.update({"heuristic_options": dict})

        for key in data_model:
            if (key not in data):
                raise ex.BadRequestException(
                    "Variable \"{}\" is missing".format(key))

            if (not isinstance(data[key], data_model[key])):
                raise ex.BadRequestException(
                    "\"{}\" has unexpected variable type: {}. Was expecting {}"
                    .format(key, type(data[key]), data_model[key]))
Esempio n. 11
0
import time
from datetime import datetime

import pytz
import requests
import tzlocal
from monitor.utils.monasca.connector import MonascaConnector
from monitor.plugins.base import Plugin
from monitor.utils.logger import Log, configure_logging

LOG_FILE = "progress.log"
TIME_PROGRESS_FILE = "time_progress.log"
MONITORING_INTERVAL = 1

plugin_log = Log("Spark_Progress", "monitor.log")
configure_logging()

class SparkProgress(Plugin):

    def __init__(self, app_id, info_plugin, collect_period, retries=60):
        Plugin.__init__(self, app_id, info_plugin,
                        collect_period, retries=retries)

        self.monasca = MonascaConnector()

        self.submission_url = info_plugin['spark_submisson_url']
        self.expected_time = info_plugin['expected_time']


        self.number_of_jobs = int(info_plugin['number_of_jobs'])
Esempio n. 12
0
class MonascaConnector:
    def __init__(self):
        self.monasca_username = api.monasca_username
        self.monasca_password = api.monasca_password
        self.monasca_auth_url = api.monasca_auth_url
        self.monasca_project_name = api.monasca_project_name
        self.monasca_api_version = api.monasca_api_version
        self._get_monasca_client()
        self.LOG = Log('monasca_log', 'monasca.log')

    def get_measurements(self,
                         metric_name,
                         dimensions,
                         start_time='2014-01-01T00:00:00Z'):

        measurements = []
        try:
            monasca_client = self._get_monasca_client()
            dimensions = {
                'application_id': dimensions['application_id'],
                'service': dimensions['service']
            }
            measurements = monasca_client.metrics.list_measurements(
                name=metric_name,
                dimensions=dimensions,
                start_time=start_time,
                debug=False)
        except exc.HTTPException as httpex:
            self.LOG.log(httpex)
        except Exception as ex:
            self.LOG.log(ex)
        if len(measurements) > 0:
            return measurements[0]['measurements']
        else:
            return None

    def first_measurement(self, name, dimensions):
        return ([None, None, None]
                if self.get_measurements(name, dimensions) is None else
                self.get_measurements(name, dimensions)[0])

    def last_measurement(self, name, dimensions):
        return ([None, None, None]
                if self.get_measurements(name, dimensions) is None else
                self.get_measurements(name, dimensions)[-1])

    def _get_monasca_client(self):

        # Authenticate to Keystone
        ks = ksclient.KSClient(auth_url=self.monasca_auth_url,
                               username=self.monasca_username,
                               password=self.monasca_password,
                               project_name=self.monasca_project_name,
                               debug=False)

        # Monasca Client
        monasca_client = monclient.Client(self.monasca_api_version,
                                          ks.monasca_url,
                                          token=ks.token,
                                          debug=False)

        return monasca_client

    def send_metrics(self, measurements):

        batch_metrics = {'jsonbody': measurements}
        try:
            monasca_client = self._get_monasca_client()
            monasca_client.metrics.create(**batch_metrics)

        except exc.HTTPException as httpex:
            self.LOG.log(httpex)
        except Exception as ex:
            self.LOG.log(ex)
Esempio n. 13
0
class KubeJobProgress(Plugin):
    def __init__(self, app_id, info_plugin, collect_period=2, retries=10):
        Plugin.__init__(self,
                        app_id,
                        info_plugin,
                        collect_period,
                        retries=retries)
        self.LOG = Log(LOG_NAME, LOG_FILE)
        self.enable_visualizer = info_plugin['enable_visualizer']
        self.expected_time = int(info_plugin['expected_time'])
        self.number_of_jobs = int(info_plugin['number_of_jobs'])
        self.submission_time = datetime.\
            strptime(info_plugin['submission_time'],
                     '%Y-%m-%dT%H:%M:%S.%fGMT')
        self.dimensions = {
            'application_id': self.app_id,
            'service': 'kubejobs'
        }
        self.rds = redis.StrictRedis(host=info_plugin['redis_ip'],
                                     port=info_plugin['redis_port'])
        self.metric_queue = "%s:metrics" % self.app_id
        self.current_job_id = 0

        kubernetes.config.load_kube_config(api.k8s_manifest)
        self.b_v1 = kubernetes.client.BatchV1Api()

        if self.enable_visualizer:
            datasource_type = info_plugin['datasource_type']
            if datasource_type == "monasca":
                self.datasource = MonascaConnector()
            elif datasource_type == "influxdb":
                influx_url = info_plugin['database_data']['url']
                influx_port = info_plugin['database_data']['port']
                database_name = info_plugin['database_data']['name']
                self.datasource = InfluxConnector(influx_url, influx_port,
                                                  database_name)
            else:
                self.LOG.log("Unknown datasource type...!")

    def _publish_measurement(self, jobs_completed):

        application_progress_error = {}
        job_progress_error = {}
        time_progress_error = {}
        parallelism = {}

        # Init
        self.LOG.log("Jobs Completed: %i" % jobs_completed)

        # Job Progress

        job_progress = min(1.0, (float(jobs_completed) / self.number_of_jobs))
        # Elapsed Time
        elapsed_time = float(self._get_elapsed_time())

        # Reference Value
        ref_value = (elapsed_time / self.expected_time)
        replicas = self._get_num_replicas()
        # Error
        self.LOG.log("Job progress: %s\nTime Progress: %s\nReplicas: %s"
                     "\n========================" %
                     (job_progress, ref_value, replicas))

        error = job_progress - ref_value

        application_progress_error['name'] = ('application-progress' '.error')

        application_progress_error['value'] = error
        application_progress_error['timestamp'] = time.time() * 1000
        application_progress_error['dimensions'] = self.dimensions

        job_progress_error['name'] = 'job-progress'
        job_progress_error['value'] = job_progress
        job_progress_error['timestamp'] = time.time() * 1000
        job_progress_error['dimensions'] = self.dimensions

        time_progress_error['name'] = 'time-progress'
        time_progress_error['value'] = ref_value
        time_progress_error['timestamp'] = time.time() * 1000
        time_progress_error['dimensions'] = self.dimensions

        parallelism['name'] = "job-parallelism"
        parallelism['value'] = replicas
        parallelism['timestamp'] = time.time() * 1000
        parallelism['dimensions'] = self.dimensions

        self.LOG.log("Error: %s " % application_progress_error['value'])

        self.rds.rpush(self.metric_queue, str(application_progress_error))

        if self.enable_visualizer:
            self.datasource.send_metrics([application_progress_error])
            self.datasource.send_metrics([job_progress_error])
            self.datasource.send_metrics([time_progress_error])
            self.datasource.send_metrics([parallelism])

        time.sleep(MONITORING_INTERVAL)

    def _get_num_replicas(self):
        job = self.b_v1.read_namespaced_job(name=self.app_id,
                                            namespace="default")
        return job.status.active

    def _get_elapsed_time(self):
        datetime_now = datetime.now()
        elapsed_time = datetime_now - self.submission_time
        self.LOG.log("Elapsed Time: %.2f" % elapsed_time.seconds)

        return elapsed_time.seconds

    def monitoring_application(self):
        try:
            num_queued_jobs = self.rds.llen('job')
            num_processing_jobs = self.rds.llen('job:processing')

            job_progress = self.number_of_jobs - \
                (num_queued_jobs + num_processing_jobs)
            self._publish_measurement(jobs_completed=job_progress)
            return job_progress

        except Exception as ex:
            self.LOG.log(("Error: No application found for %s.\
                 %s remaining attempts") % (self.app_id, self.attempts))

            self.LOG.log(ex.message)
            raise
Esempio n. 14
0
 def __init__(self):
     self.map = {}
     self.logger = Log('redis_mock_log', 'redis_mock.log')
Esempio n. 15
0
class MockRedis():

    """ Constructor of the mock of a redis object
    Returns:
        MockRedis: The simulation of a redis object
    """

    def __init__(self):
        self.map = {}
        self.logger = Log('redis_mock_log', 'redis_mock.log')

    """ Function the simulates the push of a job in the
        redis queue
    Args:
        metric_queue (string): Representing the metric queue
        metric (Object): Representing the metric to be pushed in the
                         queue.
    Returns:
        None
    """

    def rpush(self, metric_queue, metric):
        if self.map.get(metric_queue) is None:
            self.map[metric_queue] = []

        self.map[metric_queue].append(metric)

    """ Function the simulates the pop of a job from the
        redis queue
    Args:
        metric_queue (string): Representing the metric queue
    Returns:
        Object: Representing the metric pop from the queue
    """

    def rpop(self, metric_queue):
        try:
            return self.map.get(metric_queue).pop(0)
        except Exception as e:
            self.logger.log(e)

    """ Function the simulates the deletion of a
        redis queue
    Args:
        queue_name (string): Representing the name of the queue to
                             be deleted.
    Returns:
        None
    """

    def delete(self, queue_name):
        self.map.pop(queue_name)

    """ Function the simulates getting the length of a
        redis queue
    Args:
        queue_name (string): Representing the name of the queue to
                             be get.
    Returns:
        Int: Represents the length of the queue
    """

    def llen(self, queue_name):
        queue = self.map.get(queue_name)
        return len(queue) if queue else 0