Esempio n. 1
0
class RedisMetricSource(MetricSource):
    def __init__(self, parameters):
        self.rds = redis.StrictRedis(host=parameters['redis_ip'],
                                     port=parameters['redis_port'])
        self.LOG = Log('redis_log', 'redis.log')
        self.last_metric = 0.0
        self.last_timestamp = datetime.datetime.now()

    def get_most_recent_value(self, app_id):
        measurement = self.rds.rpop("%s:metrics" % app_id)
        self.LOG.log("\n%s\n%s\n\n" % (measurement, app_id))
        if measurement is not None:
            measurement = str(measurement, 'utf-8')
            measurement = ast.literal_eval(measurement)
            timestamp = datetime.datetime.fromtimestamp(
                measurement['timestamp'] / 1000)
            value = float(measurement['value'])
            if timestamp > self.last_timestamp:
                self.last_timestamp = timestamp
                self.last_metric = value
                return timestamp, value
            else:
                return self.last_timestamp, self.last_metric
        else:
            return self.last_timestamp, self.last_metric
Esempio n. 2
0
class K8sActuator:

    def __init__(self, app_id, k8s_manifest):

        try:
            config.load_kube_config(k8s_manifest)
        except Exception:
            raise Exception("Couldn't load kube config")
        self.k8s_api = client.BatchV1Api()
        self.app_id = app_id
        self.logger = Log("basic.controller.log", "controller.log")

    # TODO: validation
    def adjust_resources(self, replicas, namespace="default"):
        patch_object = {"spec": {"parallelism": replicas}}
        try:
            self.k8s_api.patch_namespaced_job(self.app_id,
                                              namespace,
                                              patch_object)
        except Exception as e:
            self.logger.log(str(e))

    # TODO: validation
    def get_number_of_replicas(self, namespace="default"):
        all_jobs = self.k8s_api.list_namespaced_job(namespace)
        for job in all_jobs.items:
            if job.metadata.name == self.app_id:
                return job.spec.parallelism
Esempio n. 3
0
class OpenstackGenericMetricSource(MetricSource):
    def __init__(self, parameters):
        self.keypair_path = parameters['keypair_path']
        self.host_ip = parameters['host_ip']
        self.log_path = parameters['log_path']
        self.start_time = parameters['start_time']
        self.expected_time = parameters['reference_value']
        self.host_username = '******'
        self.last_checked = ''
        self.logger = Log("metrics.log", "metrics.log")
        configure_logging()

    def _get_metric_value_from_log_line(self, log):
        value = None
        for i in range(len(log) - 1, 0, -1):
            if log[i] == '#':
                value = float(log[i + 1:-1])
        return value

    def _get_elapsed_time(self):
        delay = time.time() - self.start_time
        return delay

    # This is an auxiliary function to prepare and publish the metric.
    # The point is to keep
    # monitoring_application as simple as possible.
    def _extract_metric_from_log(self, last_log):
        # Check if this log line contains a new metric measurement.
        if '[Progress]' in last_log and self.last_checked != last_log:
            self.last_checked = last_log
            ref_value = self._get_elapsed_time() / self.expected_time
            measurement_value = self._get_metric_value_from_log_line(last_log)
            error = measurement_value - ref_value
            self.logger.log("ref-value:%f|measurement-value:%f|error:%f" %
                            (ref_value, measurement_value, error))
            return 100 * error
        # Flag that checks if the log capture is ended
        elif '[END]' in last_log:
            self.running = False

    def _monitoring_application(self):
        try:
            result = SSHUtils().run_and_get_result(
                "sudo tail -1 %s" % self.log_path, self.host_username,
                self.host_ip, self.keypair_path)
            timestamp = datetime.datetime.fromtimestamp(time.time())
            return timestamp, self._extract_metric_from_log(result)

        except Exception as ex:
            print "Monitoring is not possible. \nError: %s" % (ex.message)
            raise ex

    def get_most_recent_value(self, metric_name, options):
        return self._monitoring_application()
Esempio n. 4
0
class SingleApplicationController(Controller):
    def __init__(self, application_id, plugin_info):
        self.logger = Log("single.controller.log", "controller.log")
        configure_logging()

        plugin_info = plugin_info["plugin_info"]

        self.application_id = application_id
        self.instances = plugin_info["instances"]
        self.check_interval = plugin_info["check_interval"]
        self.trigger_down = plugin_info["trigger_down"]
        self.trigger_up = plugin_info["trigger_up"]
        self.min_cap = plugin_info["min_cap"]
        self.max_cap = plugin_info["max_cap"]
        self.actuation_size = plugin_info["actuation_size"]
        self.metric_rounding = plugin_info["metric_rounding"]
        self.actuator_type = plugin_info["actuator"]
        self.metric_source_type = plugin_info["metric_source"]

        self.running = True
        self.running_lock = threading.RLock()

        metric_source = MetricSourceBuilder().get_metric_source(
            self.metric_source_type, plugin_info)

        actuator = ActuatorBuilder().get_actuator(self.actuator_type,
                                                  plugin_info)

        self.alarm = BasicAlarm(actuator, metric_source, self.trigger_down,
                                self.trigger_up, self.min_cap, self.max_cap,
                                self.actuation_size, self.metric_rounding)

    def start_application_scaling(self):
        run = True

        while run:
            self.logger.log("Monitoring application: %s" %
                            (self.application_id))

            self.alarm.check_application_state(self.application_id,
                                               self.instances)

            time.sleep(float(self.check_interval))

            with self.running_lock:
                run = self.running

    def stop_application_scaling(self):
        with self.running_lock:
            self.running = False

    def status(self):
        return ""
Esempio n. 5
0
class _BasicControllerThread():
    def __init__(self, applications, applications_lock, alarm, check_interval):
        self.logger = Log("basic.controller_thread.log", "controller.log")
        configure_logging()

        self.applications = applications
        self.applications_lock = applications_lock
        self.alarm = alarm
        self.check_interval = check_interval
        self.running = True

    def start(self):
        self.logger.log("Starting controller thread")

        while self.running:
            # acquire lock, check applications and wait
            with self.applications_lock:
                self.logger.log("Monitoring applications: %s" %
                                (str(self.applications.keys())))

                applications_ids = self.applications.keys()

                # for each application check state
                for application_id in applications_ids:
                    instances = self.applications[application_id]["instances"]

                    self.logger.log("Checking application:%s|instances:%s" %
                                    (application_id, instances))

                    self.alarm.check_application_state(application_id,
                                                       instances)

            time.sleep(float(self.check_interval))
Esempio n. 6
0
class BasicController(Controller):
    def __init__(self, metric_source, actuator, plugin_info):
        # Set up logging
        self.logger = Log("basic.controller.log", "controller.log")
        configure_logging()

        check_interval = plugin_info["check_interval"]
        trigger_down = plugin_info["trigger_down"]
        trigger_up = plugin_info["trigger_up"]
        min_cap = plugin_info["min_cap"]
        max_cap = plugin_info["max_cap"]
        actuation_size = plugin_info["actuation_size"]
        metric_rounding = plugin_info["metric_rounding"]

        # Start alarm
        self.alarm = BasicAlarm(actuator, metric_source, trigger_down,
                                trigger_up, min_cap, max_cap, actuation_size,
                                metric_rounding)

        # Start up controller thread
        # Create lock to access application list
        self.applications_lock = threading.RLock()
        self.applications = {}
        self.controller = _BasicControllerThread(self.applications,
                                                 self.applications_lock,
                                                 self.alarm, check_interval)

        self.controller_thread = threading.Thread(target=self.controller.start)
        self.controller_thread.start()

    def start_application_scaling(self, app_id, plugin_info):
        self.logger.log("Adding application id: %s" % (app_id))
        # Acquire lock and add application
        with self.applications_lock:
            self.applications[app_id] = plugin_info

    def stop_application_scaling(self, app_id):
        #  Acquire lock and remove application
        with self.applications_lock:
            if app_id in self.applications.keys():
                self.logger.log("Removing application id: %s" % (app_id))
                self.applications.pop(app_id)
            else:
                self.logger.log("Application %s not found" % (app_id))

    def stop_controller(self):
        self.controller.running = False

    def status(self):
        return ""
Esempio n. 7
0
class TendencyAwareProportionalController(Controller):
    def __init__(self, application_id, plugin_info):
        self.logger = Log("tendency.proportional.controller.log",
                          "controller.log")
        configure_logging()

        plugin_info = plugin_info["plugin_info"]

        self.application_id = application_id
        self.instances = plugin_info["instances"]
        self.check_interval = plugin_info["check_interval"]
        self.trigger_down = plugin_info["trigger_down"]
        self.trigger_up = plugin_info["trigger_up"]
        self.min_cap = plugin_info["min_cap"]
        self.max_cap = plugin_info["max_cap"]
        self.metric_rounding = plugin_info["metric_rounding"]
        self.actuation_size = plugin_info["actuation_size"]
        self.actuator_type = plugin_info["actuator"]
        self.metric_source_type = plugin_info["metric_source"]

        self.running = True
        self.running_lock = threading.RLock()

        # Gets a new metric source plugin using the given name
        metric_source = MetricSourceBuilder().get_metric_source(
            self.metric_source_type, plugin_info)

        # Gets a new actuator plugin using the given name
        actuator = ActuatorBuilder().get_actuator(self.actuator_type,
                                                  plugin_info)
        """ The alarm here is responsible for deciding whether to scale up or
            down, or even do nothing """
        self.alarm = TendencyAwareProportionalAlarm(actuator, metric_source,
                                                    self.trigger_down,
                                                    self.trigger_up,
                                                    self.min_cap, self.max_cap,
                                                    self.actuation_size,
                                                    self.metric_rounding)

    def start_application_scaling(self):
        run = True

        while run:
            self.logger.log("Monitoring application: %s" %
                            (self.application_id))

            # Call the alarm to check the application
            try:
                self.alarm.check_application_state(self.application_id,
                                                   self.instances)
            except MetricNotFoundException:
                self.logger.log("No metrics available")
            except Exception as e:
                self.logger.log(str(e))

            # Wait some time
            time.sleep(float(self.check_interval))

            with self.running_lock:
                run = self.running

    def stop_application_scaling(self):
        with self.running_lock:
            self.running = False

    def status(self):
        return self.alarm.status()
class BasicAlarm:

    # TODO: Think about these constants placements
    PROGRESS_METRIC_NAME = "spark.job_progress"
    ELAPSED_TIME_METRIC_NAME = 'spark.elapsed_time'

    def __init__(self, actuator, metric_source, trigger_down, trigger_up,
                 min_cap, max_cap, actuation_size, metric_rounding):

        # TODO: Check parameters
        self.metric_source = metric_source
        self.actuator = actuator
        self.trigger_down = trigger_down
        self.trigger_up = trigger_up
        self.min_cap = min_cap
        self.max_cap = max_cap
        self.actuation_size = actuation_size
        self.metric_rounding = metric_rounding

        self.logger = Log("basic.alarm.log", "controller.log")
        configure_logging()

        self.last_time_progress_timestamp = datetime.datetime.strptime(
            "0001-01-01T00:00:00.0Z", '%Y-%m-%dT%H:%M:%S.%fZ')

        self.last_job_progress_timestamp = datetime.datetime.strptime(
            "0001-01-01T00:00:00.0Z", '%Y-%m-%dT%H:%M:%S.%fZ')

    def get_job_progress(self, application_id):
        job_progress_measurement = self.metric_source.get_most_recent_value(
            Basic_Alarm.PROGRESS_METRIC_NAME,
            {"application_id": application_id})

        job_progress_timestamp = job_progress_measurement[0]
        job_progress = job_progress_measurement[1]
        job_progress = round(job_progress, self.metric_rounding)

        return job_progress_timestamp, job_progress

    def get_time_progress(self, application_id):
        time_progress_measurement = self.metric_source.get_most_recent_value(
            Basic_Alarm.ELAPSED_TIME_METRIC_NAME,
            {"application_id": application_id})

        time_progress_timestamp = time_progress_measurement[0]
        time_progress = time_progress_measurement[1]
        time_progress = round(time_progress, self.metric_rounding)

        return time_progress_timestamp, time_progress

    def check_measurements_are_new(self, job_progress_timestamp,
                                   time_progress_timestamp):

        return (self.last_job_progress_timestamp < job_progress_timestamp and
                self.last_time_progress_timestamp < time_progress_timestamp)

    def check_application_state(self, application_id, instances):
        # TODO: Check parameters
        try:
            self.logger.log("Getting progress")
            job_progress_timestamp, job_progress = self.get_job_progress(
                application_id)

            self.logger.log("Getting time progress")
            time_progress_timestamp, time_progress = self.get_time_progress(
                application_id)

            self.logger.log("Progress-[%s]-%f|Time progress-[%s]-%f" %
                            (str(job_progress_timestamp), job_progress,
                             str(time_progress_timestamp), time_progress))

            if self.check_measurements_are_new(job_progress_timestamp,
                                               time_progress_timestamp):

                diff = job_progress - time_progress

                self.scale_down(diff, instances)
                self.scale_up(diff, instances)

                self.last_job_progress_timestamp = job_progress_timestamp
                self.last_time_progress_timestamp = time_progress_timestamp

            else:
                self.logger.log("Could not acquire more recent metrics")

        except Exception:
            # TODO: Check exception type
            self.logger.log("Could not get metrics")
            return

    def scale_down(self, diff, instances):
        if diff > 0 and diff >= self.trigger_down:
            self.logger.log("Scaling down")
            cap = self.actuator.get_allocated_resources(instances[0])
            new_cap = max(cap - self.actuation_size, self.min_cap)
            cap_instances = {instance: new_cap for instance in instances}

            self.actuator.adjust_resources(cap_instances)

    def scale_up(self, diff, instances):
        if diff < 0 and abs(diff) >= self.trigger_up:
            self.logger.log("Scaling up")
            cap = self.actuator.get_allocated_resources(instances[0])
            new_cap = min(cap + self.actuation_size, self.max_cap)
            cap_instances = {instance: new_cap for instance in instances}

            self.actuator.adjust_resources(cap_instances)
Esempio n. 9
0
    """ Validate if really exists a section to listed plugins """
    for plugin in actuator_plugins:
        if plugin != '' and plugin not in config.sections():
            raise Exception("plugin '%s' section missing" % plugin)

    for plugin in metric_source_plugins:
        if plugin != '' and plugin not in config.sections():
            raise Exception("plugin '%s' section missing" % plugin)

    if 'monasca' in metric_source_plugins:
        monasca_endpoint = config.get('monasca', 'monasca_endpoint')
        monasca_username = config.get('monasca', 'username')
        monasca_password = config.get('monasca', 'password')
        monasca_auth_url = config.get('monasca', 'auth_url')
        monasca_project_name = config.get('monasca', 'project_name')
        monasca_api_version = config.get('monasca', 'api_version')

    if 'k8s_replicas' in actuator_plugins:

        # Setting default value
        k8s_manifest = "./data/conf"

        # If explicitly stated in the cfg file, overwrite the variable
        if (config.has_section('k8s_replicas')):
            if (config.has_option('k8s_replicas', 'k8s_manifest')):
                k8s_manifest = config.get("k8s_replicas", "k8s_manifest")

except Exception as e:
    LOG.log("Error: %s" % e)
    quit()
Esempio n. 10
0
class MonascaClient:
    def __init__(self):
        self.monasca_username = api.monasca_username
        self.monasca_password = api.monasca_password
        self.monasca_auth_url = api.monasca_auth_url
        self.monasca_project_name = api.monasca_project_name
        self.monasca_api_version = api.monasca_api_version
        self._get_monasca_client()
        self.LOG = Log('monasca_client_log', 'monasca_client.log')

    def get_measurements(self, metric_name, dimensions,
                         start_time='2014-01-01T00:00:00Z'):

        measurements = []
        try:
            monasca_client = self._get_monasca_client()
            dimensions = {'application_id': dimensions['application_id']}
            measurements = monasca_client.metrics.list_measurements(
                name=metric_name, dimensions=dimensions,
                start_time=start_time, debug=False)
        except exc.HTTPException as httpex:
            self.LOG.log(httpex)
        except Exception as ex:
            self.LOG.log(ex)
        if len(measurements) > 0:
            return measurements[0]['measurements']
        else:
            return None

    def first_measurement(self, name, dimensions):
        return (
            [None, None, None]
            if self.get_measurements(name, dimensions) is None
            else self.get_measurements(name, dimensions)[0])

    def last_measurement(self, name, dimensions):
        measurements = self.get_measurements(name, dimensions)

        if measurements is None:
            raise MetricNotFoundException()
        else:
            return measurements[-1]

    def _get_monasca_client(self):

        # Authenticate to Keystone
        ks = ksclient.KSClient(
            auth_url=self.monasca_auth_url,
            username=self.monasca_username,
            password=self.monasca_password,
            project_name=self.monasca_project_name,
            debug=False
        )

        # Monasca Client
        monasca_client = monclient.Client(self.monasca_api_version,
                                          ks.monasca_url,
                                          token=ks.token,
                                          debug=False)

        return monasca_client

    def send_metrics(self, measurements):

        batch_metrics = {'jsonbody': measurements}
        try:
            monasca_client = self._get_monasca_client()
            monasca_client.metrics.create(**batch_metrics)
        except exc.HTTPException as httpex:
            self.LOG.log(httpex)
        except Exception as ex:
            self.LOG.log(ex)
Esempio n. 11
0
class TendencyAwareProportionalAlarm:

    ERROR_METRIC_NAME = "application-progress.error"

    def __init__(self, actuator, metric_source, trigger_down, trigger_up,
                 min_cap, max_cap, actuation_size, metric_rounding):

        self.metric_source = metric_source
        self.actuator = actuator
        self.trigger_down = trigger_down
        self.trigger_up = trigger_up
        self.min_cap = min_cap
        self.max_cap = max_cap
        self.metric_rounding = metric_rounding
        self.actuation_size = actuation_size

        self.logger = Log("proportional.alarm.log", "controller.log")
        self.cap_logger = Log("cap.log", "cap.log")

        configure_logging()

        self.last_progress_error_timestamp = datetime.datetime.strptime(
            "0001-01-01T00:00:00.0Z", '%Y-%m-%dT%H:%M:%S.%fZ')
        self.last_progress_error = None
        self.cap = -1
        self.last_action = ""

    def check_application_state(self, application_id, instances):
        """
            Checks the application progress by getting progress metrics from a
            metric source, checks if the metrics are new and tries to modify
            the amount of allocated resources if necessary.
        """

        self.logger.log("Getting progress error")
        self.last_action = "getting progress error"

        # Get the progress error value and timestamp
        progress_error_timestamp, progress_error = self._get_progress_error(
            application_id)
        self.logger.log("Progress error-[%s]-%f" %
                        (str(progress_error_timestamp), progress_error))
        self.last_action = "Progress error-[%s]-%f" % (
            str(progress_error_timestamp), progress_error)

        """ Check if the metric is new by comparing the timestamps of the
            current metric and most recent metric """
        if self._check_measurements_are_new(progress_error_timestamp):
            self._scale(progress_error, instances)

            if self.cap != -1:
                self.cap_logger.log("%.0f|%s|%s" % (
                    time.time(), str(application_id), str(self.cap)))

            self.last_progress_error = progress_error
            self.last_progress_error_timestamp = progress_error_timestamp
        else:
            self.logger.log("Could not acquire more recent metrics")

    def _scale(self, progress_error, instances):

        # If error is positive and its absolute value is too high, scale down
        if progress_error > 0 and progress_error >= self.trigger_down:
            self._scale_down(instances)

        # If the error is negative and its absolute value is too high, scale up
        elif progress_error < 0 and abs(progress_error) >= self.trigger_up:
            self._scale_up(instances)
        else:
            self._tendency_scale(progress_error, instances)

    def _scale_down(self, instances):
        self.logger.log("Scaling down")
        self.last_action = "Getting allocated resources"

        # Get current CPU cap
        cap = self.actuator.get_allocated_resources_to_cluster(instances)
        new_cap = max(cap - self.actuation_size, self.min_cap)

        self.logger.log("Scaling from %d to %d" % (cap, new_cap))
        self.last_action = "Scaling from %d to %d" % (cap, new_cap)

        # Currently, we use the same cap for all the vms
        cap_instances = {instance: new_cap for instance in instances}

        # Set the new cap
        self.actuator.adjust_resources(cap_instances)

        self.cap = new_cap

    def _scale_up(self, instances):
        self.logger.log("Scaling up")
        self.last_action = "Getting allocated resources"

        # Get current CPU cap
        cap = self.actuator.get_allocated_resources_to_cluster(instances)
        new_cap = min(cap + self.actuation_size, self.max_cap)

        self.logger.log("Scaling from %d to %d" % (cap, new_cap))
        self.last_action = "Scaling from %d to %d" % (cap, new_cap)

        # Currently, we use the same cap for all the vms
        cap_instances = {instance: new_cap for instance in instances}

        # Set the new cap
        self.actuator.adjust_resources(cap_instances)

        self.cap = new_cap

    def _tendency_scale(self, progress_error, instances):
        if self.last_progress_error is not None:
            difference = progress_error - self.last_progress_error
        else:
            difference = 0.0

        if difference < 0.0:
            cap = self.actuator.get_allocated_resources_to_cluster(instances)
            new_cap = min(cap + self.actuation_size, self.max_cap)

            self.logger.log("Scaling from %d to %d" % (cap, new_cap))
            self.last_action = "Scaling from %d to %d" % (cap, new_cap)

            cap_instances = {instance: new_cap for instance in instances}
            self.actuator.adjust_resources(cap_instances)

            self.cap = new_cap
        elif difference > 0.0:
            cap = self.actuator.get_allocated_resources_to_cluster(instances)
            new_cap = max(cap - self.actuation_size, self.min_cap)

            self.logger.log("Scaling from %d to %d" % (cap, new_cap))
            self.last_action = "Scaling from %d to %d" % (cap, new_cap)

            cap_instances = {instance: new_cap for instance in instances}
            self.actuator.adjust_resources(cap_instances)
            self.cap = new_cap

    def _get_progress_error(self, application_id):
        progress_error_measurement = \
            self.metric_source.get_most_recent_value(
                TendencyAwareProportionalAlarm.ERROR_METRIC_NAME,
                {"application_id": application_id}
            )

        progress_error_timestamp = progress_error_measurement[0]
        progress_error = progress_error_measurement[1]
        progress_error = round(progress_error, self.metric_rounding)
        return progress_error_timestamp, progress_error

    def _check_measurements_are_new(self, progress_error_timestamp):
        return self.last_progress_error_timestamp < progress_error_timestamp