Exemple #1
0
def do_request_for_scaling(alert_id):
    alert = alert_db.get_alert(alert_id)
    ns_id = alert['ns_id']
    ns_status = ns_db.get_ns_status(ns_id)
    current_il = ns_db.get_ns_il(alert['ns_id'])
    rule_actions = alert['ruleActions']
    for rule_action in rule_actions:
        if rule_action['scaleNsToLevelData'][
                'nsInstantiationLevel'] == current_il:
            log_queue.put([
                "DEBUG", "Current nsInstantiationLevel for nsId: " + ns_id +
                'and Alert nsInstantiationLevel is the same'
            ])
            continue
        if ns_status in ["FAILED", "TERMINATED", "INSTANTIATING"]:
            log_queue.put([
                "DEBUG",
                "Current Status is " + ns_status + " for nsId: " + ns_id
            ])
            log_queue.put(
                ["DEBUG", "This status is not fit to scaling actions"])
            continue

        log_queue.put(["DEBUG", "Do scaling request for alert: " + alert_id])
        request_to_so_scale_ns(alert)
def do_request_for_scaling(alert_id):
    alert = alert_db.get_alert(alert_id)
    ns_id = alert['ns_id']
    ns_status = ns_db.get_ns_status(ns_id)
    # adding code to allow auto-scaling of nested NSs
    # the alert and the autoscaling rules are defined in the nested descriptors
    nested_info = ns_db.get_nested_service_info(ns_id)
    if nested_info:
        # we need to look for the corresponding nested
        nsdId = alert['nsd_id']
        nsId_tmp = ns_id + '_' + nsdId
        particular_nested_info = ns_db.get_particular_nested_service_info(ns_id, nsId_tmp)
        current_il = particular_nested_info['nested_il']
    else:
        current_il = ns_db.get_ns_il(alert['ns_id'])
    rule_actions = alert['ruleActions']
    for rule_action in rule_actions:
        if rule_action['scaleNsToLevelData']['nsInstantiationLevel'] == current_il:
            log_queue.put(["DEBUG", "Current nsInstantiationLevel for nsId: " + ns_id + 'and Alert nsInstantiationLevel is the same'])
            continue
        if ns_status in ["FAILED", "TERMINATED", "INSTANTIATING", "SCALING"]:
            log_queue.put(["DEBUG","Current Status is " + ns_status + " for nsId: " + ns_id ])
            log_queue.put(["DEBUG", "This status is not fit to scaling actions"])
            continue

        log_queue.put(["DEBUG", "Do scaling request for alert: " + alert_id])
        request_to_so_scale_ns(alert)
Exemple #3
0
def update_ns_aiml_scale_work(nsId, aiml_scaling_info):
    """
    After the scaling produced by the AIML notification, the 
    spark job has to be resubmitted and the new IL published in the kafka topic
    Parameters
    ----------
    nsId:
        String with the Network Service Id 
    aiml_scaling_info: dict
        Dictionary with the information generated when creating the scaling aiml work
    Returns
    -------
    """
    # steps:
    log_queue.put([
        "DEBUG",
        "Updating the AIML info after scaling for nsId: %s and info:" % nsId
    ])
    log_queue.put(["DEBUG", json.dumps(aiml_scaling_info, indent=4)])
    kafka_topic = aiml_scaling_info["topicId"]
    streaming_class = aiml_scaling_info["streamingClass"]
    model_name = aiml_scaling_info["model"]
    collectionPeriod = aiml_scaling_info["collectionPeriod"]
    # 1 - restart spark job
    # spark_job_id = start_spark_streaming_job(nsId, kafka_topic, streaming_class, model_name)
    status_file = spark_folder + "/" + kafka_topic + ".txt"
    spark_job_id = start_spark_streaming_job(nsId, kafka_topic, streaming_class, model_name, \
                   collectionPeriod, kafka_ip + ":" + kafka_port, alert_target, status_file)
    aiml_scaling_info["streamingJobId"] = spark_job_id
    # 2 - publish the IL in the kafka topic
    currentIL = ns_db.get_ns_il(nsId)
    #current_IL = { "key": "currentIL",
    #               "value": currentIL}
    current_IL = {
        "type_message": "nsStatusMetrics",
        "metric": {
            "__name__": "nsInstantiationLevel",
            "nsId": nsId,
        },
        "value": currentIL
    }
    monitoring.publish_json_kafka(kafka_topic, currentIL)
    # 3 - update the db
    log_queue.put(["DEBUG", "New scaling info: "])
    log_queue.put(["DEBUG", json.dumps(aiml_scaling_info, indent=4)])
    ns_db.set_aiml_info(nsId, "scaling", aiml_scaling_info)
Exemple #4
0
def scale_ns_process(nsId, body):
    """
    Performs the scaling of the service identified by "nsId" according to the info at body 
    Parameters
    ----------
    nsId: string
        Identifier of the Network Service Instance.
    body: request body including scaling operation
    Returns
    -------
    """
    log_queue.put(
        ["INFO",
         "scale_ns_process with nsId %s, body %s" % (nsId, body)])
    # get the nsdId that corresponds to nsId
    nsdId = ns_db.get_nsdId(nsId)
    # get current instantiation level
    current_df = ns_db.get_ns_df(nsId)
    current_il = ns_db.get_ns_il(nsId)
    # first get the ns and vnfs descriptors
    nsd_json = nsd_db.get_nsd_json(nsdId, None)
    # for each vnf in the NSD, get its json descriptor
    vnfdIds = nsd_json["nsd"]["vnfdId"]
    vnfds_json = {}
    for vnfdId in vnfdIds:
        vnfds_json[vnfdId] = vnfd_db.get_vnfd_json(vnfdId, None)
    #request RO
    sap_info_pre_scaling = ns_db.get_ns_sap_info(nsId)
    rooe.scale_ns(nsId, nsd_json, vnfds_json, body, current_df, current_il)
    # maybe we have to update the monitoring jobs: we assume that new performance monitoring jobs
    # will be similar to one already present
    sap_info = ns_db.get_ns_sap_info(nsId)
    log_queue.put(["INFO", "new sapInfo after scaling: %s" % (sap_info)])
    monitoring.update_ns_monitoring(nsId, nsd_json, vnfds_json, sap_info)
    log_queue.put([
        "DEBUG",
        "monitoring exporters updated after scaling for nsId %s" % (nsId)
    ])
    # update alerts: it is not needed
    log_queue.put(["INFO", "scale_ns_process finished for nsId %s" % (nsId)])
    def post(self):
        #data_json = request.data
        # data = json.loads(data_json)
        data = request.get_json(force=True)
        if "alerts" in data:
            alerts = data['alerts']
            for alert in alerts:

                labels = (alert['labels'])
                str_starts_at = str(alert['startsAt'])
                alertname = labels["alertname"]
                log_massage = "Received alert: " + alertname + " startsAt: " + str_starts_at + " status: " + alert['status']
                log_queue.put(["INFO", log_massage])

                if alert['status'] == 'resolved':
                    alert_db.set_timestamp(alertname, "")
                    continue

                if alert_db.exists_alert_id(alertname):
                    if is_problem_resolved(alert) == False:
                        log_queue.put(["DEBUG", "Alert is not resolved= " + alertname + " start date = " + str_starts_at])
                        do_request_for_scaling(alertname)
                    continue
                else:
                    continue

        # checks if this log from elastalert
        if "alertname" in data:
            str_starts_at = str(data['startsAt'])
            date_time_obj = datetime.strptime(str_starts_at, "%a %b %d %H:%M:%S %Z %Y")
            str_starts_at = date_time_obj.isoformat()
            alertname = data["alertname"]
            log_massage = "Received log alert: " + alertname + " startsAt: " + str_starts_at
            log_queue.put(["INFO", log_massage])
            if alert_db.exists_alert_id(alertname):
                alert = {'startsAt': str_starts_at}
                alert.update({"labels":{"alertname": alertname}})
                if is_problem_resolved(alert) == False:
                    log_queue.put(["DEBUG", "Alert is not resolved= " + alertname + " start date = " + str_starts_at])
                    do_request_for_scaling(alertname)

        if "aiml" in data: # added to manage the notifications from the execution of the aiml model
            curent_time = datetime.now(pytz.utc)
            notification = data["aiml"]
            log_queue.put(["DEBUG", "Notification from Spark Job: %s" % notification])
            ns_id = notification["nsID"]
            nsInstantiationLevel = notification["nsInstantiationLevel"]
            # cpu_measurement = notification["cpu_measurement"]
            aiml_scaling_info = ns_db.get_aiml_info(ns_id, "scaling")
            currentIL = ns_db.get_ns_il(ns_id)
            if (aiml_scaling_info and (nsInstantiationLevel != currentIL)):
                curent_time2 = datetime.now(pytz.utc)
                timeout = curent_time2-curent_time
                log_queue.put(["INFO", "*****Time measure: SLAManager SLAManager webhook processing scaling: %s"%timeout])
                log_queue.put(["DEBUG", "Generating a scaling operation for nsId: %s from currentIL: %s to newIL: %s" % (ns_id, currentIL, nsInstantiationLevel)])
                # 1 - stop the spark job
                alert_configure.delete_spark_streaming_job(aiml_scaling_info["streamingJobId"])             
                log_queue.put(["INFO", "*****Time measure: SLAManager SLAManager webhook stopped spark job"])
                # 1.5 - remove the kafka topic
                monitoring.delete_kafka_topic(aiml_scaling_info["topicId"])
                log_queue.put(["INFO", "*****Time measure: SLAManager SLAManager webhook deleted kafka topic"])
                # 2 - generate the scaling request
                scale_request = {
                      "scaleType": "SCALE_NS",
                      "scaleNsData": {
                        "scaleNsToLevelData": {
                          "nsInstantiationLevel": nsInstantiationLevel
                        }
                      },
                      "scaleTime": "0"
                   }
                log_queue.put(["DEBUG", "AIML makes an scaling request for nsId: %s"% ns_id])
                log_queue.put(["DEBUG", "AIML scale request:" ])
                #log_queue.put(["DEBUG", json.dumps(scale_request, indent=4)])
                make_request_to_so_nbi(ns_id, scale_request)
                log_queue.put(["INFO", "*****Time measure: SLAManager SLAManager webhook made scaling request"])

            else:
                log_queue.put(["DEBUG", "Not generating a scaling operation for nsId: %s" % (ns_id)])

        return "OK", 200
Exemple #6
0
def scale_ns_process(nsId, body, nestedInfo=None):
    """
    Performs the scaling of the service identified by "nsId" according to the info at body 
    Parameters
    ----------
    nsId: string
        Identifier of the Network Service Instance.
    body: request body including scaling operation
    Returns
    -------
    """
    log_queue.put([
        "INFO",
        "*****Time measure for nsId: %s: SOEc SOEc scaling a nested/regular NS"
        % nsId
    ])
    log_queue.put(
        ["INFO",
         "scale_ns_process with nsId %s, body %s" % (nsId, body)])
    # get the nsdId that corresponds to nsId
    if nestedInfo:
        nsdId = next(iter(nestedInfo))
        current_df = nestedInfo[nsdId][0]
        current_il = nestedInfo[nsdId][1]
    else:
        nsdId = ns_db.get_nsdId(nsId)
        # get current instantiation level
        current_df = ns_db.get_ns_df(nsId)
        current_il = ns_db.get_ns_il(nsId)
    # first get the ns and vnfs descriptors
    nsd_json = nsd_db.get_nsd_json(nsdId, None)
    # for each vnf in the NSD, get its json descriptor
    vnfdIds = nsd_json["nsd"]["vnfdId"]
    vnfds_json = {}
    for vnfdId in vnfdIds:
        vnfds_json[vnfdId] = vnfd_db.get_vnfd_json(vnfdId, None)
    #request RO
    sap_info_pre_scaling = ns_db.get_ns_sap_info(nsId)
    log_queue.put([
        "INFO",
        "*****Time measure for nsId: %s: SOEc SOEc-ROE prepared info for scaling"
        % (nsId)
    ])
    rooe.scale_ns(nsId, nsd_json, vnfds_json, body, current_df, current_il,
                  nestedInfo)
    log_queue.put([
        "INFO",
        "*****Time measure for nsId: %s: SOEc SOEc-ROE updated DBs scaling a NS"
        % (nsId)
    ])
    # checks the result of scaling, maybe it has not be done due to lack of resources
    operationId = operation_db.get_operationId(nsId, "INSTANTIATION")
    if ((operation_db.get_operation_status(operationId) == "SUCCESSFULLY_DONE")
            and ns_db.get_ns_status(nsId) == "INSTANTIATED"):
        # maybe we have to update the monitoring jobs: we assume that new performance monitoring jobs
        # will be similar to one already present
        sap_info = ns_db.get_ns_sap_info(nsId)
        log_queue.put(["INFO", "new sapInfo after scaling: %s" % (sap_info)])
        monitoring.update_ns_monitoring(nsId, nsd_json, vnfds_json, sap_info)
        log_queue.put([
            "INFO",
            "*****Time measure for nsId: %s: SOEc SOEc updated monitoring info"
            % nsId
        ])
        log_queue.put([
            "DEBUG",
            "monitoring exporters updated after scaling for nsId %s" % (nsId)
        ])
        # update alerts: it is not needed
    # however, in the case of aiml_scaling it is needed, to restart the spark job
    else:
        if ns_db.get_ns_status(nsId) == "INSTANTIATED":
            log_queue.put(
                ["DEBUG", "Scaling operation failed due to lack of resources"])
        elif ns_db.get_ns_status(nsId) == "FAILED":
            log_queue.put(
                ["DEBUG", "Scaling operation failed at the MANO platform"])
    aiml_scaling_info = ns_db.get_aiml_info(nsId, "scaling")
    if (aiml_scaling_info and (ns_db.get_ns_status(nsId) == "INSTANTIATED")):
        log_queue.put(
            ["DEBUG", "The AIML platform is triggering the scaling operation"])
        alert_configure.update_ns_aiml_scale_work(nsId, aiml_scaling_info)
        log_queue.put([
            "INFO",
            "*****Time measure for nsId: %s: SOEc SOEc updated AIML alert job"
            % nsId
        ])
    log_queue.put(["INFO", "scale_ns_process finished for nsId %s" % (nsId)])
    log_queue.put([
        "INFO",
        "*****Time measure for nsId: %s: SOEc SOEc finished scaling a nested/regular NS"
        % (nsId)
    ])
    notification_db.create_notification_record({
        "nsId":
        nsId,
        "type":
        "fa-gears",
        "text":
        nsId + " SCALED",
        "time":
        datetime.now().strftime("%d/%m/%Y %H:%M:%S.%f")
    })
Exemple #7
0
    def post(self):
        #data_json = request.data
        # data = json.loads(data_json)
        data = request.get_json(force=True)
        if "alerts" in data:
            alerts = data['alerts']
            for alert in alerts:

                labels = (alert['labels'])
                str_starts_at = str(alert['startsAt'])
                alertname = labels["alertname"]
                log_massage = "Received alert: " + alertname + " startsAt: " + str_starts_at + " status: " + alert[
                    'status']
                log_queue.put(["INFO", log_massage])

                if alert['status'] == 'resolved':
                    alert_db.set_timestamp(alertname, "")
                    continue

                if alert_db.exists_alert_id(alertname):
                    if is_problem_resolved(alert) == False:
                        log_queue.put([
                            "DEBUG", "Alert is not resolved= " + alertname +
                            " start date = " + str_starts_at
                        ])
                        do_request_for_scaling(alertname)
                    continue
                else:
                    continue
        if "aiml" in data:  # added to manage the notifications from the execution of the aiml model
            notification = data["aiml"]
            ns_id = notification["nsID"]
            nsInstantiationLevel = notification["nsInstantiationLevel"]
            aiml_scaling_info = ns_db.get_aiml_info(ns_id, "scaling")
            currentIL = ns_db.get_ns_il(ns_id)
            if (aiml_scaling_info and (nsInstantiationLevel != currentIL)):
                # 1 - stop the spark job
                alert_configure.delete_spark_streaming_job(
                    aiml_scaling_info["streamingJobId"])
                # 2 - generate the scaling request
                scale_request = {
                    "scaleType": "SCALE_NS",
                    "scaleNsData": {
                        "scaleNsToLevelData": {
                            "nsInstantiationLevel": nsInstantiationLevel
                        }
                    },
                    "scaleTime": "0"
                }
                log_queue.put([
                    "DEBUG",
                    "AIML makes an scaling request for nsId: %s" % ns_id
                ])
                log_queue.put(["DEBUG", "AIML scale request:"])
                log_queue.put(["DEBUG", json.dumps(scale_request, indent=4)])
                make_request_to_so_nbi(ns_id, scale_request)
            else:
                log_queue.put([
                    "DEBUG",
                    "Not generating a scaling operation for nsId: %s" % (ns_id)
                ])

        return "OK", 200
Exemple #8
0
def configure_ns_aiml_scale_work(nsId, nsdId, nsd_json, vnfds_json, sap_info):
    """
    Parses the nsd to find possible aiml scale work
    Parameters
    ----------
    nsId:
        String with the Network Service Id 
    nsdId: string
        String with the kind of Ns associated to the nsId
    nsd_json: json 
        Network service descriptor
    vnfds_json: dict
        Dict with json of the virtual network functions
    sap_info:  dict
        information with the service access point associated to the deployed vnfs
    Returns
    -------
    """
    aiml_scale_dict = {}
    aiml_scaling = False
    # steps:
    # 1 - check that there is an scaling aiml work. Assuming, there is one:
    if "aimlRules" in nsd_json["nsd"].keys():
        for rule in nsd_json["nsd"]["aimlRules"]:
            if (rule["problem"] == "scaling"):
                aiml_scaling = True
                aiml_element = rule
                log_queue.put(["DEBUG", "Scaling operation driven by AIML"])
                break
    if (aiml_scaling):
        # 2 - create kafka topic
        problem = aiml_element["problem"]
        kafka_topic = monitoring.create_kafka_topic(nsId, problem)
        log_queue.put(
            ["DEBUG",
             "The created kafka_topic is: %s" % (kafka_topic)])
        if (kafka_topic):
            # 3 - make a call to config manager to create association between monitoring
            #     parameters and kafka topic, so Prometheus publish the info in kafka topic
            scrape_jobs = get_performance_metric_for_aiml_rule(
                nsId, aiml_element, nsd_json)
            log_queue.put(["DEBUG", "Scraper jobs: "])
            log_queue.put(["DEBUG", json.dumps(scrape_jobs, indent=4)])

            scrapes_dict = {}
            collectionPeriod = 1  # we will choose the biggest one, between those used
            for scrape_job in scrape_jobs:
                scraper = monitoring.create_prometheus_scraper(
                    nsId, kafka_topic, scrape_job['vnf'], scrape_job['metric'],
                    scrape_job['expression'], scrape_job['collectionPeriod'])
                scrapes_dict.update({scraper['scraperId']: scraper})
                if (scrape_job["collectionPeriod"] > collectionPeriod):
                    collectionPeriod = scrape_job["collectionPeriod"]
            # 4 - download the model and the streaming class, save the files in the spark_folder
            # 4.1 - for the streaming class (jar file), we need a common folder and rename the file as class+kafka_topic, but for the model,
            # 4.2 - we will create a new folder in the spark folder, called like the kafka_topic, for the moment static
            # streaming_class = "5growth_polito_2.11-0.1.jar"
            # model_name = "spark-random-forest-model"
            streaming_class = "5growth_vCDN_2.11-0.1.jar"
            model_name = "spark-random-forest-model-vCDN"
            status_file = spark_folder + "/" + kafka_topic + ".txt"
            log_queue.put(["DEBUG", "Status file: %s" % status_file])
            # 5 - start the spark job
            # spark_job_id = start_spark_streaming_job(nsId, kafka_topic, streaming_class, model_name)
            spark_job_id = start_spark_streaming_job(nsId, kafka_topic, streaming_class, model_name, collectionPeriod, \
                          kafka_ip + ":" + kafka_port, alert_target, status_file)
            if (spark_job_id == None):
                log_queue.put([
                    "DEBUG",
                    "Failure in the creation of the spark streaming job"
                ])
                return
            log_queue.put(
                ["DEBUG",
                 "The created spark_job_id is: %s" % (spark_job_id)])
            # 6 - publish the currentIL in kafka topic
            currentIL = ns_db.get_ns_il(nsId)
            current_IL = [{
                "type_message": "nsStatusMetrics",
                "metric": {
                    "__name__": "nsInstantiationLevel",
                    "nsId": nsId,
                },
                "value": currentIL
            }]
            monitoring.publish_json_kafka(kafka_topic, current_IL)
            # 7.1 - create the element to be saved in the database
            aiml_scale_dict["topicId"] = kafka_topic
            aiml_scale_dict["streamingClass"] = streaming_class
            aiml_scale_dict["model"] = model_name
            aiml_scale_dict["streamingJobId"] = spark_job_id
            aiml_scale_dict["collectionPeriod"] = collectionPeriod
            # identifiers returned in step 3
            aiml_scale_dict["scrapperJobs"] = scrapes_dict
    # 7.2 - save the info in ns_db. Since there maybe other aiml job, we save this info as another element
    # save the list of alerts in the database
    ns_db.set_aiml_info(nsId, "scaling", aiml_scale_dict)