Beispiel #1
0
 async def check_RO_version(self):
     tries = 14
     last_error = None
     while True:
         try:
             if self.config["ro_config"].get("ng"):
                 ro_server = NgRoClient(self.loop,
                                        **self.config["ro_config"])
             else:
                 ro_server = ROClient(self.loop, **self.config["ro_config"])
             ro_version = await ro_server.get_version()
             if versiontuple(ro_version) < versiontuple(min_RO_version):
                 raise LcmException(
                     "Not compatible osm/RO version '{}'. Needed '{}' or higher"
                     .format(ro_version, min_RO_version))
             self.logger.info(
                 "Connected to RO version {}".format(ro_version))
             return
         except (ROClientException, NgRoException) as e:
             tries -= 1
             error_text = "Error while connecting to RO on {}: {}".format(
                 self.config["ro_config"]["uri"], e)
             if tries <= 0:
                 self.logger.critical(error_text)
                 raise LcmException(error_text)
             if last_error != error_text:
                 last_error = error_text
                 self.logger.error(
                     error_text +
                     ". Waiting until {} seconds".format(5 * tries))
             await asyncio.sleep(5)
Beispiel #2
0
    async def send_data(self, new_config):
        restore_backup = False
        del new_config["_id"]
        del new_config["_admin"]
        new_scrape_configs = []

        # generate a list with the values of scrape_configs
        for scrape_config in new_config["scrape_configs"].values():
            scrape_config = scrape_config.copy()
            # remove nsr_id metadata from scrape_configs
            scrape_config.pop("nsr_id", None)
            new_scrape_configs.append(scrape_config)
        new_config["scrape_configs"] = new_scrape_configs

        try:
            if os.path.exists(self.cfg_file):
                os.rename(self.cfg_file, self.cfg_file_backup)
                restore_backup = True
            with open(self.cfg_file, "w+") as f:
                yaml.safe_dump(new_config,
                               f,
                               indent=4,
                               default_flow_style=False)
            # self.logger.debug("new configuration: {}".format(yaml.safe_dump(new_config, indent=4,
            #                                                                 default_flow_style=False)))
            async with aiohttp.ClientSession() as session:
                async with session.post(self.server + "-/reload") as resp:
                    if resp.status > 204:
                        raise LcmException(await resp.text())
                await asyncio.sleep(5, loop=self.loop)
                # If prometheus does not admit this configuration, remains with the old one
                # Then, to check if the configuration has been accepted, get the configuration from prometheus
                # and compares with the inserted one
                async with session.get(self.server +
                                       "api/v1/status/config") as resp:
                    if resp.status > 204:
                        raise LcmException(await resp.text())
                    current_config = await resp.json()
                    if not self._check_configuration_equal(
                            current_config, new_config):
                        return False
                    else:
                        restore_backup = False
            return True
        except Exception as e:
            self.logger.error("Error updating configuration url={}: {}".format(
                self.server, e))
            return False
        finally:
            if restore_backup:
                try:
                    os.rename(self.cfg_file_backup, self.cfg_file)
                except Exception as e:
                    self.logger.critical(
                        "Exception while rolling back: {}".format(e))
Beispiel #3
0
 def parse_job(job_data: str, variables: dict) -> dict:
     try:
         template = Template(job_data)
         job_parsed = template.render(variables or {})
         return yaml.safe_load(job_parsed)
     except (TemplateError, TemplateNotFound, TemplateSyntaxError) as e:
         # TODO yaml exceptions
         raise LcmException(
             "Error parsing Jinja2 to prometheus job. job_data={}, variables={}. Error={}"
             .format(job_data, variables, e))
Beispiel #4
0
        def vim_account_2_RO(vim_account):
            """
            Translate a RO vim_account from OSM vim_account params
            :param ns_params: OSM instantiate params
            :return: The RO ns descriptor
            """
            if vim_account in vim_2_RO:
                return vim_2_RO[vim_account]

            db_vim = self.db.get_one("vim_accounts", {"_id": vim_account})
            if db_vim["_admin"]["operationalState"] != "ENABLED":
                raise LcmException(
                    "VIM={} is not available. operationalState={}".format(
                        vim_account, db_vim["_admin"]["operationalState"]))
            RO_vim_id = db_vim["_admin"]["deployed"]["RO"]
            vim_2_RO[vim_account] = RO_vim_id
            return RO_vim_id
Beispiel #5
0
 async def kafka_ping(self):
     self.logger.debug("Task kafka_ping Enter")
     consecutive_errors = 0
     first_start = True
     kafka_has_received = False
     self.pings_not_received = 1
     while True:
         try:
             await self.msg_admin.aiowrite(
                 "admin", "ping", {
                     "from": "lcm",
                     "to": "lcm",
                     "worker_id": self.worker_id,
                     "version": lcm_version
                 }, self.loop)
             # time between pings are low when it is not received and at starting
             wait_time = self.ping_interval_boot if not kafka_has_received else self.ping_interval_pace
             if not self.pings_not_received:
                 kafka_has_received = True
             self.pings_not_received += 1
             await asyncio.sleep(wait_time, loop=self.loop)
             if self.pings_not_received > 10:
                 raise LcmException(
                     "It is not receiving pings from Kafka bus")
             consecutive_errors = 0
             first_start = False
         except LcmException:
             raise
         except Exception as e:
             # if not first_start is the first time after starting. So leave more time and wait
             # to allow kafka starts
             if consecutive_errors == 8 if not first_start else 30:
                 self.logger.error(
                     "Task kafka_read task exit error too many errors. Exception: {}"
                     .format(e))
                 raise
             consecutive_errors += 1
             self.logger.error(
                 "Task kafka_read retrying after Exception {}".format(e))
             wait_time = 2 if not first_start else 5
             await asyncio.sleep(wait_time, loop=self.loop)
Beispiel #6
0
 async def start(self):
     for retry in range(4):
         try:
             # self.logger("Starting prometheus ")
             # read from database
             prometheus_data = self.db.get_one("admin",
                                               {"_id": "prometheus"},
                                               fail_on_empty=False)
             if not prometheus_data:
                 self.logger.info("Init db.admin.prometheus content")
                 self.db.create("admin", initial_prometheus_data)
             # send database config file to prometheus. Ignore loading errors, as prometheus may be starting
             # but at least an initial configuration file is set
             await self.update()
             return
         except DbException as e:
             if retry == 3:
                 raise LcmException(
                     "Max retries trying to init prometheus configuration: {}"
                     .format(e))
             await asyncio.sleep(5, loop=self.loop)
Beispiel #7
0
    def nsi_update_nsir(self, nsi_update_nsir, db_nsir, nsir_desc_RO):
        """
        Updates database nsir with the RO info for the created vld
        :param nsi_update_nsir: dictionary to be filled with the updated info
        :param db_nsir: content of db_nsir. This is also modified
        :param nsir_desc_RO: nsir descriptor from RO
        :return: Nothing, LcmException is raised on errors
        """

        for vld_index, vld in enumerate(get_iterable(db_nsir, "vld")):
            for net_RO in get_iterable(nsir_desc_RO, "nets"):
                if vld["id"] != net_RO.get("ns_net_osm_id"):
                    continue
                vld["vim-id"] = net_RO.get("vim_net_id")
                vld["name"] = net_RO.get("vim_name")
                vld["status"] = net_RO.get("status")
                vld["status-detailed"] = net_RO.get("error_msg")
                nsi_update_nsir["vld.{}".format(vld_index)] = vld
                break
            else:
                raise LcmException(
                    "ns_update_nsir: Not found vld={} at RO info".format(
                        vld["id"]))
Beispiel #8
0
    def __init__(self, config_file, loop=None):
        """
        Init, Connect to database, filesystem storage, and messaging
        :param config: two level dictionary with configuration. Top level should contain 'database', 'storage',
        :return: None
        """
        self.db = None
        self.msg = None
        self.msg_admin = None
        self.fs = None
        self.pings_not_received = 1
        self.consecutive_errors = 0
        self.first_start = False

        # logging
        self.logger = logging.getLogger('lcm')
        # get id
        self.worker_id = self.get_process_id()
        # load configuration
        config = self.read_config_file(config_file)
        self.config = config
        self.config["ro_config"] = {
            "ng": config["RO"].get("ng", False),
            "uri": config["RO"].get("uri"),
            "tenant": config.get("tenant", "osm"),
            "logger_name": "lcm.roclient",
            "loglevel": config["RO"].get("loglevel", "ERROR"),
        }
        if not self.config["ro_config"]["uri"]:
            if not self.config["ro_config"]["ng"]:
                self.config["ro_config"][
                    "uri"] = "http://{}:{}/openmano".format(
                        config["RO"]["host"], config["RO"]["port"])
            else:
                self.config["ro_config"]["uri"] = "http://{}:{}/ro".format(
                    config["RO"]["host"], config["RO"]["port"])

        self.loop = loop or asyncio.get_event_loop()

        # logging
        log_format_simple = "%(asctime)s %(levelname)s %(name)s %(filename)s:%(lineno)s %(message)s"
        log_formatter_simple = logging.Formatter(log_format_simple,
                                                 datefmt='%Y-%m-%dT%H:%M:%S')
        config["database"]["logger_name"] = "lcm.db"
        config["storage"]["logger_name"] = "lcm.fs"
        config["message"]["logger_name"] = "lcm.msg"
        if config["global"].get("logfile"):
            file_handler = logging.handlers.RotatingFileHandler(
                config["global"]["logfile"],
                maxBytes=100e6,
                backupCount=9,
                delay=0)
            file_handler.setFormatter(log_formatter_simple)
            self.logger.addHandler(file_handler)
        if not config["global"].get("nologging"):
            str_handler = logging.StreamHandler()
            str_handler.setFormatter(log_formatter_simple)
            self.logger.addHandler(str_handler)

        if config["global"].get("loglevel"):
            self.logger.setLevel(config["global"]["loglevel"])

        # logging other modules
        for k1, logname in self.cfg_logger_name.items():
            config[k1]["logger_name"] = logname
            logger_module = logging.getLogger(logname)
            if config[k1].get("logfile"):
                file_handler = logging.handlers.RotatingFileHandler(
                    config[k1]["logfile"],
                    maxBytes=100e6,
                    backupCount=9,
                    delay=0)
                file_handler.setFormatter(log_formatter_simple)
                logger_module.addHandler(file_handler)
            if config[k1].get("loglevel"):
                logger_module.setLevel(config[k1]["loglevel"])
        self.logger.critical("starting osm/lcm version {} {}".format(
            lcm_version, lcm_version_date))

        # check version of N2VC
        # TODO enhance with int conversion or from distutils.version import LooseVersion
        # or with list(map(int, version.split(".")))
        if versiontuple(n2vc_version) < versiontuple(min_n2vc_version):
            raise LcmException(
                "Not compatible osm/N2VC version '{}'. Needed '{}' or higher".
                format(n2vc_version, min_n2vc_version))
        # check version of common
        if versiontuple(common_version) < versiontuple(min_common_version):
            raise LcmException(
                "Not compatible osm/common version '{}'. Needed '{}' or higher"
                .format(common_version, min_common_version))

        try:
            # TODO check database version
            if config["database"]["driver"] == "mongo":
                self.db = dbmongo.DbMongo()
                self.db.db_connect(config["database"])
            elif config["database"]["driver"] == "memory":
                self.db = dbmemory.DbMemory()
                self.db.db_connect(config["database"])
            else:
                raise LcmException(
                    "Invalid configuration param '{}' at '[database]':'driver'"
                    .format(config["database"]["driver"]))

            if config["storage"]["driver"] == "local":
                self.fs = fslocal.FsLocal()
                self.fs.fs_connect(config["storage"])
            elif config["storage"]["driver"] == "mongo":
                self.fs = fsmongo.FsMongo()
                self.fs.fs_connect(config["storage"])
            else:
                raise LcmException(
                    "Invalid configuration param '{}' at '[storage]':'driver'".
                    format(config["storage"]["driver"]))

            # copy message configuration in order to remove 'group_id' for msg_admin
            config_message = config["message"].copy()
            config_message["loop"] = self.loop
            if config_message["driver"] == "local":
                self.msg = msglocal.MsgLocal()
                self.msg.connect(config_message)
                self.msg_admin = msglocal.MsgLocal()
                config_message.pop("group_id", None)
                self.msg_admin.connect(config_message)
            elif config_message["driver"] == "kafka":
                self.msg = msgkafka.MsgKafka()
                self.msg.connect(config_message)
                self.msg_admin = msgkafka.MsgKafka()
                config_message.pop("group_id", None)
                self.msg_admin.connect(config_message)
            else:
                raise LcmException(
                    "Invalid configuration param '{}' at '[message]':'driver'".
                    format(config["message"]["driver"]))
        except (DbException, FsException, MsgException) as e:
            self.logger.critical(str(e), exc_info=True)
            raise LcmException(str(e))

        # contains created tasks/futures to be able to cancel
        self.lcm_tasks = TaskRegistry(self.worker_id, self.db, self.logger)

        if self.config.get("tsdb") and self.config["tsdb"].get("driver"):
            if self.config["tsdb"]["driver"] == "prometheus":
                self.prometheus = prometheus.Prometheus(
                    self.config["tsdb"], self.worker_id, self.db, self.loop)
            else:
                raise LcmException(
                    "Invalid configuration param '{}' at '[tsdb]':'driver'".
                    format(config["tsdb"]["driver"]))
        else:
            self.prometheus = None
        self.ns = ns.NsLcm(self.db, self.msg, self.fs, self.lcm_tasks,
                           self.config, self.loop, self.prometheus)
        self.netslice = netslice.NetsliceLcm(self.db, self.msg, self.fs,
                                             self.lcm_tasks, self.config,
                                             self.loop, self.ns)
        self.vim = vim_sdn.VimLcm(self.db, self.msg, self.fs, self.lcm_tasks,
                                  self.config, self.loop)
        self.wim = vim_sdn.WimLcm(self.db, self.msg, self.fs, self.lcm_tasks,
                                  self.config, self.loop)
        self.sdn = vim_sdn.SdnLcm(self.db, self.msg, self.fs, self.lcm_tasks,
                                  self.config, self.loop)
        self.k8scluster = vim_sdn.K8sClusterLcm(self.db, self.msg, self.fs,
                                                self.lcm_tasks, self.config,
                                                self.loop)
        self.k8srepo = vim_sdn.K8sRepoLcm(self.db, self.msg, self.fs,
                                          self.lcm_tasks, self.config,
                                          self.loop)
Beispiel #9
0
    async def instantiate(self, nsir_id, nsilcmop_id):

        # Try to lock HA task here
        task_is_locked_by_me = self.lcm_tasks.lock_HA('nsi', 'nsilcmops',
                                                      nsilcmop_id)
        if not task_is_locked_by_me:
            return

        logging_text = "Task netslice={} instantiate={} ".format(
            nsir_id, nsilcmop_id)
        self.logger.debug(logging_text + "Enter")
        # get all needed from database
        exc = None
        db_nsir = None
        db_nsilcmop = None
        db_nsir_update = {"_admin.nsilcmop": nsilcmop_id}
        db_nsilcmop_update = {}
        nsilcmop_operation_state = None
        vim_2_RO = {}
        RO = ROclient.ROClient(self.loop, **self.ro_config)

        def ip_profile_2_RO(ip_profile):
            RO_ip_profile = deepcopy((ip_profile))
            if "dns-server" in RO_ip_profile:
                if isinstance(RO_ip_profile["dns-server"], list):
                    RO_ip_profile["dns-address"] = []
                    for ds in RO_ip_profile.pop("dns-server"):
                        RO_ip_profile["dns-address"].append(ds['address'])
                else:
                    RO_ip_profile["dns-address"] = RO_ip_profile.pop(
                        "dns-server")
            if RO_ip_profile.get("ip-version") == "ipv4":
                RO_ip_profile["ip-version"] = "IPv4"
            if RO_ip_profile.get("ip-version") == "ipv6":
                RO_ip_profile["ip-version"] = "IPv6"
            if "dhcp-params" in RO_ip_profile:
                RO_ip_profile["dhcp"] = RO_ip_profile.pop("dhcp-params")
            return RO_ip_profile

        def vim_account_2_RO(vim_account):
            """
            Translate a RO vim_account from OSM vim_account params
            :param ns_params: OSM instantiate params
            :return: The RO ns descriptor
            """
            if vim_account in vim_2_RO:
                return vim_2_RO[vim_account]

            db_vim = self.db.get_one("vim_accounts", {"_id": vim_account})
            if db_vim["_admin"]["operationalState"] != "ENABLED":
                raise LcmException(
                    "VIM={} is not available. operationalState={}".format(
                        vim_account, db_vim["_admin"]["operationalState"]))
            RO_vim_id = db_vim["_admin"]["deployed"]["RO"]
            vim_2_RO[vim_account] = RO_vim_id
            return RO_vim_id

        async def netslice_scenario_create(self, vld_item, nsir_id, db_nsir,
                                           db_nsir_admin, db_nsir_update):
            """
            Create a network slice VLD through RO Scenario
            :param vld_id The VLD id inside nsir to be created
            :param nsir_id The nsir id
            """
            ip_vld = None
            mgmt_network = False
            RO_vld_sites = []
            vld_id = vld_item["id"]
            netslice_vld = vld_item
            # logging_text = "Task netslice={} instantiate_vld={} ".format(nsir_id, vld_id)
            # self.logger.debug(logging_text + "Enter")

            vld_shared = None
            for shared_nsrs_item in get_iterable(vld_item, "shared-nsrs-list"):
                _filter = {
                    "_id.ne": nsir_id,
                    "_admin.nsrs-detailed-list.ANYINDEX.nsrId":
                    shared_nsrs_item
                }
                shared_nsi = self.db.get_one("nsis",
                                             _filter,
                                             fail_on_empty=False,
                                             fail_on_more=False)
                if shared_nsi:
                    for vlds in get_iterable(shared_nsi["_admin"]["deployed"],
                                             "RO"):
                        if vld_id == vlds["vld_id"]:
                            vld_shared = {
                                "instance_scenario_id":
                                vlds["netslice_scenario_id"],
                                "osm_id":
                                vld_id
                            }
                            break
                    break

            # Creating netslice-vld at RO
            RO_nsir = deep_get(db_nsir, ("_admin", "deployed", "RO"), [])

            if vld_id in RO_nsir:
                db_nsir_update["_admin.deployed.RO"] = RO_nsir

            # If netslice-vld doesn't exists then create it
            else:
                # TODO: Check VDU type in all descriptors finding SRIOV / PT
                # Updating network names and datacenters from instantiation parameters for each VLD
                RO_ns_params = {}
                RO_ns_params["name"] = netslice_vld["name"]
                RO_ns_params["datacenter"] = vim_account_2_RO(
                    db_nsir["instantiation_parameters"]["vimAccountId"])
                for instantiation_params_vld in get_iterable(
                        db_nsir["instantiation_parameters"], "netslice-vld"):
                    if instantiation_params_vld.get(
                            "name") == netslice_vld["name"]:
                        ip_vld = deepcopy(instantiation_params_vld)

                if netslice_vld.get("mgmt-network"):
                    mgmt_network = True

                # Creating scenario if vim-network-name / vim-network-id are present as instantiation parameter
                # Use vim-network-id instantiation parameter
                vim_network_option = None
                if ip_vld:
                    if ip_vld.get("vim-network-id"):
                        vim_network_option = "vim-network-id"
                    elif ip_vld.get("vim-network-name"):
                        vim_network_option = "vim-network-name"
                    if ip_vld.get("ip-profile"):
                        populate_dict(
                            RO_ns_params,
                            ("networks", netslice_vld["name"], "ip-profile"),
                            ip_profile_2_RO(ip_vld["ip-profile"]))

                if vim_network_option:
                    if ip_vld.get(vim_network_option):
                        if isinstance(ip_vld.get(vim_network_option), list):
                            for vim_net_id in ip_vld.get(vim_network_option):
                                for vim_account, vim_net in vim_net_id.items():
                                    RO_vld_sites.append({
                                        "netmap-use":
                                        vim_net,
                                        "datacenter":
                                        vim_account_2_RO(vim_account)
                                    })
                        elif isinstance(ip_vld.get(vim_network_option), dict):
                            for vim_account, vim_net in ip_vld.get(
                                    vim_network_option).items():
                                RO_vld_sites.append({
                                    "netmap-use":
                                    vim_net,
                                    "datacenter":
                                    vim_account_2_RO(vim_account)
                                })
                        else:
                            RO_vld_sites.append({
                                "netmap-use":
                                ip_vld[vim_network_option],
                                "datacenter":
                                vim_account_2_RO(netslice_vld["vimAccountId"])
                            })

                # Use default netslice vim-network-name from template
                else:
                    for nss_conn_point_ref in get_iterable(
                            netslice_vld, "nss-connection-point-ref"):
                        if nss_conn_point_ref.get("vimAccountId"):
                            if nss_conn_point_ref[
                                    "vimAccountId"] != netslice_vld[
                                        "vimAccountId"]:
                                RO_vld_sites.append({
                                    "netmap-create":
                                    None,
                                    "datacenter":
                                    vim_account_2_RO(
                                        nss_conn_point_ref["vimAccountId"])
                                })

                if vld_shared:
                    populate_dict(
                        RO_ns_params,
                        ("networks", netslice_vld["name"], "use-network"),
                        vld_shared)

                if RO_vld_sites:
                    populate_dict(RO_ns_params,
                                  ("networks", netslice_vld["name"], "sites"),
                                  RO_vld_sites)

                RO_ns_params["scenario"] = {
                    "nets": [{
                        "name": netslice_vld["name"],
                        "external": mgmt_network,
                        "type": "bridge"
                    }]
                }

                # self.logger.debug(logging_text + step)
                desc = await RO.create("ns", descriptor=RO_ns_params)
                db_nsir_update_RO = {}
                db_nsir_update_RO["netslice_scenario_id"] = desc["uuid"]
                db_nsir_update_RO["vld_id"] = RO_ns_params["name"]
                db_nsir_update["_admin.deployed.RO"].append(db_nsir_update_RO)

        def overwrite_nsd_params(self, db_nsir, nslcmop):
            RO_list = []
            vld_op_list = []
            vld = None
            nsr_id = nslcmop.get("nsInstanceId")
            # Overwrite instantiation parameters in netslice runtime
            if db_nsir.get("_admin"):
                if db_nsir["_admin"].get("deployed"):
                    db_admin_deployed_nsir = db_nsir["_admin"].get("deployed")
                    if db_admin_deployed_nsir.get("RO"):
                        RO_list = db_admin_deployed_nsir["RO"]

            for RO_item in RO_list:
                for netslice_vld in get_iterable(db_nsir["_admin"],
                                                 "netslice-vld"):
                    # if is equal vld of _admin with vld of netslice-vld then go for the CPs
                    if RO_item.get("vld_id") == netslice_vld.get("id"):
                        # Search the cp of netslice-vld that match with nst:netslice-subnet
                        for nss_cp_item in get_iterable(
                                netslice_vld, "nss-connection-point-ref"):
                            # Search the netslice-subnet of nst that match
                            for nss in get_iterable(db_nsir["_admin"],
                                                    "netslice-subnet"):
                                # Compare nss-ref equal nss from nst
                                if nss_cp_item["nss-ref"] == nss["nss-id"]:
                                    db_nsds = self.db.get_one(
                                        "nsds", {"_id": nss["nsdId"]})
                                    # Go for nsd, and search the CP that match with nst:CP to get vld-id-ref
                                    for cp_nsd in db_nsds.get(
                                            "connection-point", ()):
                                        if cp_nsd["name"] == nss_cp_item[
                                                "nsd-connection-point-ref"]:
                                            if nslcmop.get("operationParams"):
                                                if nslcmop[
                                                        "operationParams"].get(
                                                            "nsName"
                                                        ) == nss["nsName"]:
                                                    vld_id = RO_item["vld_id"]
                                                    netslice_scenario_id = RO_item[
                                                        "netslice_scenario_id"]
                                                    nslcmop_vld = {}
                                                    nslcmop_vld["ns-net"] = {
                                                        vld_id:
                                                        netslice_scenario_id
                                                    }
                                                    nslcmop_vld[
                                                        "name"] = cp_nsd[
                                                            "vld-id-ref"]
                                                    for vld in get_iterable(
                                                            nslcmop[
                                                                "operationParams"],
                                                            "vld"):
                                                        if vld["name"] == cp_nsd[
                                                                "vld-id-ref"]:
                                                            nslcmop_vld.update(
                                                                vld)
                                                    vld_op_list.append(
                                                        nslcmop_vld)
            nslcmop["operationParams"]["vld"] = vld_op_list
            self.update_db_2("nslcmops", nslcmop["_id"],
                             {"operationParams.vld": vld_op_list})
            return nsr_id, nslcmop

        try:
            # wait for any previous tasks in process
            await self.lcm_tasks.waitfor_related_HA('nsi', 'nsilcmops',
                                                    nsilcmop_id)

            step = "Getting nsir={} from db".format(nsir_id)
            db_nsir = self.db.get_one("nsis", {"_id": nsir_id})
            step = "Getting nsilcmop={} from db".format(nsilcmop_id)
            db_nsilcmop = self.db.get_one("nsilcmops", {"_id": nsilcmop_id})

            start_deploy = time()
            nsi_params = db_nsilcmop.get("operationParams")
            if nsi_params and nsi_params.get("timeout_nsi_deploy"):
                timeout_nsi_deploy = nsi_params["timeout_nsi_deploy"]
            else:
                timeout_nsi_deploy = self.timeout.get("nsi_deploy",
                                                      self.timeout_nsi_deploy)

            # Empty list to keep track of network service records status in the netslice
            nsir_admin = db_nsir_admin = db_nsir.get("_admin")

            step = "Creating slice operational-status init"
            # Slice status Creating
            db_nsir_update["detailed-status"] = "creating"
            db_nsir_update["operational-status"] = "init"
            db_nsir_update["_admin.nsiState"] = "INSTANTIATED"

            step = "Instantiating netslice VLDs before NS instantiation"
            # Creating netslice VLDs networking before NS instantiation
            db_nsir_update["detailed-status"] = step
            self.update_db_2("nsis", nsir_id, db_nsir_update)
            db_nsir_update["_admin.deployed.RO"] = db_nsir_admin["deployed"][
                "RO"]
            for vld_item in get_iterable(nsir_admin, "netslice-vld"):
                await netslice_scenario_create(self, vld_item, nsir_id,
                                               db_nsir, db_nsir_admin,
                                               db_nsir_update)
            self.update_db_2("nsis", nsir_id, db_nsir_update)

            step = "Instantiating netslice subnets"
            db_nsir_update["detailed-status"] = step
            self.update_db_2("nsis", nsir_id, db_nsir_update)

            db_nsir = self.db.get_one("nsis", {"_id": nsir_id})

            # Check status of the VLDs and wait for creation
            # netslice_scenarios = db_nsir["_admin"]["deployed"]["RO"]
            # db_nsir_update_RO = deepcopy(netslice_scenarios)
            # for netslice_scenario in netslice_scenarios:
            #    await netslice_scenario_check(self, netslice_scenario["netslice_scenario_id"],
            #                                  nsir_id, db_nsir_update_RO)

            # db_nsir_update["_admin.deployed.RO"] = db_nsir_update_RO
            # self.update_db_2("nsis", nsir_id, db_nsir_update)

            # Iterate over the network services operation ids to instantiate NSs
            step = "Instantiating Netslice Subnets"
            db_nsir = self.db.get_one("nsis", {"_id": nsir_id})
            nslcmop_ids = db_nsilcmop["operationParams"].get("nslcmops_ids")
            for nslcmop_id in nslcmop_ids:
                nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id})
                # Overwriting netslice-vld vim-net-id to ns
                nsr_id, nslcmop = overwrite_nsd_params(self, db_nsir, nslcmop)
                step = "Launching ns={} instantiate={} task".format(
                    nsr_id, nslcmop_id)
                task = asyncio.ensure_future(
                    self.ns.instantiate(nsr_id, nslcmop_id))
                self.lcm_tasks.register("ns", nsr_id, nslcmop_id,
                                        "ns_instantiate", task)

            # Wait until Network Slice is ready
            step = " Waiting nsi ready."
            nsrs_detailed_list_old = None
            self.logger.debug(logging_text + step)

            # For HA, it is checked from database, as the ns operation may be managed by other LCM worker
            while time() <= start_deploy + timeout_nsi_deploy:
                # Check ns instantiation status
                nsi_ready = True
                nsir = self.db.get_one("nsis", {"_id": nsir_id})
                nsrs_detailed_list = nsir["_admin"]["nsrs-detailed-list"]
                nsrs_detailed_list_new = []
                for nslcmop_item in nslcmop_ids:
                    nslcmop = self.db.get_one("nslcmops",
                                              {"_id": nslcmop_item})
                    status = nslcmop.get("operationState")
                    # TODO: (future improvement) other possible status: ROLLING_BACK,ROLLED_BACK
                    for nss in nsrs_detailed_list:
                        if nss["nsrId"] == nslcmop["nsInstanceId"]:
                            nss.update({
                                "nsrId":
                                nslcmop["nsInstanceId"],
                                "status":
                                nslcmop["operationState"],
                                "detailed-status":
                                nslcmop.get("detailed-status"),
                                "instantiated":
                                True
                            })
                            nsrs_detailed_list_new.append(nss)
                    if status not in [
                            "COMPLETED", "PARTIALLY_COMPLETED", "FAILED",
                            "FAILED_TEMP"
                    ]:
                        nsi_ready = False

                if nsrs_detailed_list_new != nsrs_detailed_list_old:
                    nsrs_detailed_list_old = nsrs_detailed_list_new
                    self.update_db_2(
                        "nsis", nsir_id,
                        {"_admin.nsrs-detailed-list": nsrs_detailed_list_new})

                if nsi_ready:
                    error_list = []
                    step = "Network Slice Instance instantiated"
                    for nss in nsrs_detailed_list:
                        if nss["status"] in ("FAILED", "FAILED_TEMP"):
                            error_list.append("NS {} {}: {}".format(
                                nss["nsrId"], nss["status"],
                                nss["detailed-status"]))
                    if error_list:
                        step = "instantiating"
                        raise LcmException("; ".join(error_list))
                    break

                # TODO: future improvement due to synchronism -> await asyncio.wait(vca_task_list, timeout=300)
                await asyncio.sleep(5, loop=self.loop)

            else:  # timeout_nsi_deploy reached:
                raise LcmException("Timeout waiting nsi to be ready.")

            db_nsir_update["operational-status"] = "running"
            db_nsir_update["detailed-status"] = "done"
            db_nsir_update["config-status"] = "configured"
            db_nsilcmop_update[
                "operationState"] = nsilcmop_operation_state = "COMPLETED"
            db_nsilcmop_update["statusEnteredTime"] = time()
            db_nsilcmop_update["detailed-status"] = "done"
            return

        except (LcmException, DbException) as e:
            self.logger.error(logging_text +
                              "Exit Exception while '{}': {}".format(step, e))
            exc = e
        except asyncio.CancelledError:
            self.logger.error(logging_text +
                              "Cancelled Exception while '{}'".format(step))
            exc = "Operation was cancelled"
        except Exception as e:
            exc = traceback.format_exc()
            self.logger.critical(logging_text +
                                 "Exit Exception {} while '{}': {}".format(
                                     type(e).__name__, step, e),
                                 exc_info=True)
        finally:
            if exc:
                if db_nsir:
                    db_nsir_update["detailed-status"] = "ERROR {}: {}".format(
                        step, exc)
                    db_nsir_update["operational-status"] = "failed"
                    db_nsir_update["config-status"] = "configured"
                if db_nsilcmop:
                    db_nsilcmop_update[
                        "detailed-status"] = "FAILED {}: {}".format(step, exc)
                    db_nsilcmop_update[
                        "operationState"] = nsilcmop_operation_state = "FAILED"
                    db_nsilcmop_update["statusEnteredTime"] = time()
            try:
                if db_nsir:
                    db_nsir_update["_admin.nsilcmop"] = None
                    self.update_db_2("nsis", nsir_id, db_nsir_update)
                if db_nsilcmop:
                    self.update_db_2("nsilcmops", nsilcmop_id,
                                     db_nsilcmop_update)
            except DbException as e:
                self.logger.error(logging_text +
                                  "Cannot update database: {}".format(e))
            if nsilcmop_operation_state:
                try:
                    await self.msg.aiowrite(
                        "nsi", "instantiated", {
                            "nsir_id": nsir_id,
                            "nsilcmop_id": nsilcmop_id,
                            "operationState": nsilcmop_operation_state
                        })
                except Exception as e:
                    self.logger.error(
                        logging_text +
                        "kafka_write notification Exception {}".format(e))
            self.logger.debug(logging_text + "Exit")
            self.lcm_tasks.remove("nsi", nsir_id, nsilcmop_id,
                                  "nsi_instantiate")
Beispiel #10
0
    async def terminate(self, nsir_id, nsilcmop_id):

        # Try to lock HA task here
        task_is_locked_by_me = self.lcm_tasks.lock_HA('nsi', 'nsilcmops',
                                                      nsilcmop_id)
        if not task_is_locked_by_me:
            return

        logging_text = "Task nsi={} terminate={} ".format(nsir_id, nsilcmop_id)
        self.logger.debug(logging_text + "Enter")
        exc = None
        db_nsir = None
        db_nsilcmop = None
        db_nsir_update = {"_admin.nsilcmop": nsilcmop_id}
        db_nsilcmop_update = {}
        RO = ROclient.ROClient(self.loop, **self.ro_config)
        nsir_deployed = None
        failed_detail = []  # annotates all failed error messages
        nsilcmop_operation_state = None
        autoremove = False  # autoremove after terminated
        try:
            # wait for any previous tasks in process
            await self.lcm_tasks.waitfor_related_HA('nsi', 'nsilcmops',
                                                    nsilcmop_id)

            step = "Getting nsir={} from db".format(nsir_id)
            db_nsir = self.db.get_one("nsis", {"_id": nsir_id})
            nsir_deployed = deepcopy(db_nsir["_admin"].get("deployed"))
            step = "Getting nsilcmop={} from db".format(nsilcmop_id)
            db_nsilcmop = self.db.get_one("nsilcmops", {"_id": nsilcmop_id})

            # TODO: Check if makes sense check the nsiState=NOT_INSTANTIATED when terminate
            # CASE: Instance was terminated but there is a second request to terminate the instance
            if db_nsir["_admin"]["nsiState"] == "NOT_INSTANTIATED":
                return

            # Slice status Terminating
            db_nsir_update["operational-status"] = "terminating"
            db_nsir_update["config-status"] = "terminating"
            db_nsir_update["detailed-status"] = "Terminating Netslice subnets"
            self.update_db_2("nsis", nsir_id, db_nsir_update)

            # Gets the list to keep track of network service records status in the netslice
            nsrs_detailed_list = []

            # Iterate over the network services operation ids to terminate NSs
            # TODO: (future improvement) look another way check the tasks instead of keep asking
            # -> https://docs.python.org/3/library/asyncio-task.html#waiting-primitives
            # steps: declare ns_tasks, add task when terminate is called, await asyncio.wait(vca_task_list, timeout=300)
            step = "Terminating Netslice Subnets"
            nslcmop_ids = db_nsilcmop["operationParams"].get("nslcmops_ids")
            nslcmop_new = []
            for nslcmop_id in nslcmop_ids:
                nslcmop = self.db.get_one("nslcmops", {"_id": nslcmop_id})
                nsr_id = nslcmop["operationParams"].get("nsInstanceId")
                nss_in_use = self.db.get_list(
                    "nsis", {
                        "_admin.netslice-vld.ANYINDEX.shared-nsrs-list":
                        nsr_id,
                        "operational-status": {
                            "$nin": ["terminated", "failed"]
                        }
                    })
                if len(nss_in_use) < 2:
                    task = asyncio.ensure_future(
                        self.ns.terminate(nsr_id, nslcmop_id))
                    self.lcm_tasks.register("ns", nsr_id, nslcmop_id,
                                            "ns_instantiate", task)
                    nslcmop_new.append(nslcmop_id)
                else:
                    # Update shared nslcmop shared with active nsi
                    netsliceInstanceId = db_nsir["_id"]
                    for nsis_item in nss_in_use:
                        if db_nsir["_id"] != nsis_item["_id"]:
                            netsliceInstanceId = nsis_item["_id"]
                            break
                    self.db.set_one("nslcmops", {"_id": nslcmop_id}, {
                        "operationParams.netsliceInstanceId":
                        netsliceInstanceId
                    })
            self.db.set_one("nsilcmops", {"_id": nsilcmop_id},
                            {"operationParams.nslcmops_ids": nslcmop_new})

            # Wait until Network Slice is terminated
            step = nsir_status_detailed = " Waiting nsi terminated. nsi_id={}".format(
                nsir_id)
            nsrs_detailed_list_old = None
            self.logger.debug(logging_text + step)

            termination_timeout = 2 * 3600  # Two hours
            while termination_timeout > 0:
                # Check ns termination status
                nsi_ready = True
                db_nsir = self.db.get_one("nsis", {"_id": nsir_id})
                nsrs_detailed_list = db_nsir["_admin"].get(
                    "nsrs-detailed-list")
                nsrs_detailed_list_new = []
                for nslcmop_item in nslcmop_ids:
                    nslcmop = self.db.get_one("nslcmops",
                                              {"_id": nslcmop_item})
                    status = nslcmop["operationState"]
                    # TODO: (future improvement) other possible status: ROLLING_BACK,ROLLED_BACK
                    for nss in nsrs_detailed_list:
                        if nss["nsrId"] == nslcmop["nsInstanceId"]:
                            nss.update({
                                "nsrId":
                                nslcmop["nsInstanceId"],
                                "status":
                                nslcmop["operationState"],
                                "detailed-status":
                                nsir_status_detailed +
                                "; {}".format(nslcmop.get("detailed-status"))
                            })
                            nsrs_detailed_list_new.append(nss)
                    if status not in [
                            "COMPLETED", "PARTIALLY_COMPLETED", "FAILED",
                            "FAILED_TEMP"
                    ]:
                        nsi_ready = False

                if nsrs_detailed_list_new != nsrs_detailed_list_old:
                    nsrs_detailed_list_old = nsrs_detailed_list_new
                    self.update_db_2(
                        "nsis", nsir_id,
                        {"_admin.nsrs-detailed-list": nsrs_detailed_list_new})

                if nsi_ready:
                    # Check if it is the last used nss and mark isinstantiate: False
                    db_nsir = self.db.get_one("nsis", {"_id": nsir_id})
                    nsrs_detailed_list = db_nsir["_admin"].get(
                        "nsrs-detailed-list")
                    for nss in nsrs_detailed_list:
                        _filter = {
                            "_admin.nsrs-detailed-list.ANYINDEX.nsrId":
                            nss["nsrId"],
                            "operational-status.ne":
                            "terminated",
                            "_id.ne":
                            nsir_id
                        }
                        nsis_list = self.db.get_one("nsis",
                                                    _filter,
                                                    fail_on_empty=False,
                                                    fail_on_more=False)
                        if not nsis_list:
                            nss.update({"instantiated": False})

                    step = "Network Slice Instance is terminated. nsi_id={}".format(
                        nsir_id)
                    for items in nsrs_detailed_list:
                        if "FAILED" in items.values():
                            raise LcmException(
                                "Error terminating NSI: {}".format(nsir_id))
                    break

                await asyncio.sleep(5, loop=self.loop)
                termination_timeout -= 5

            if termination_timeout <= 0:
                raise LcmException(
                    "Timeout waiting nsi to be terminated. nsi_id={}".format(
                        nsir_id))

            # Delete netslice-vlds
            RO_nsir_id = RO_delete_action = None
            for nsir_deployed_RO in get_iterable(nsir_deployed, "RO"):
                RO_nsir_id = nsir_deployed_RO.get("netslice_scenario_id")
                try:
                    step = db_nsir_update[
                        "detailed-status"] = "Deleting netslice-vld at RO"
                    db_nsilcmop_update[
                        "detailed-status"] = "Deleting netslice-vld at RO"
                    self.logger.debug(logging_text + step)
                    desc = await RO.delete("ns", RO_nsir_id)
                    RO_delete_action = desc["action_id"]
                    nsir_deployed_RO["vld_delete_action_id"] = RO_delete_action
                    nsir_deployed_RO["vld_status"] = "DELETING"
                    db_nsir_update["_admin.deployed"] = nsir_deployed
                    self.update_db_2("nsis", nsir_id, db_nsir_update)
                    if RO_delete_action:
                        # wait until NS is deleted from VIM
                        step = "Waiting ns deleted from VIM. RO_id={}".format(
                            RO_nsir_id)
                        self.logger.debug(logging_text + step)
                except ROclient.ROClientException as e:
                    if e.http_code == 404:  # not found
                        nsir_deployed_RO["vld_id"] = None
                        nsir_deployed_RO["vld_status"] = "DELETED"
                        self.logger.debug(
                            logging_text +
                            "RO_ns_id={} already deleted".format(RO_nsir_id))
                    elif e.http_code == 409:  # conflict
                        failed_detail.append(
                            "RO_ns_id={} delete conflict: {}".format(
                                RO_nsir_id, e))
                        self.logger.debug(logging_text + failed_detail[-1])
                    else:
                        failed_detail.append(
                            "RO_ns_id={} delete error: {}".format(
                                RO_nsir_id, e))
                        self.logger.error(logging_text + failed_detail[-1])

                if failed_detail:
                    self.logger.error(logging_text + " ;".join(failed_detail))
                    db_nsir_update["operational-status"] = "failed"
                    db_nsir_update[
                        "detailed-status"] = "Deletion errors " + "; ".join(
                            failed_detail)
                    db_nsilcmop_update["detailed-status"] = "; ".join(
                        failed_detail)
                    db_nsilcmop_update[
                        "operationState"] = nsilcmop_operation_state = "FAILED"
                    db_nsilcmop_update["statusEnteredTime"] = time()
                else:
                    db_nsir_update["operational-status"] = "terminating"
                    db_nsir_update["config-status"] = "terminating"
                    db_nsir_update["_admin.nsiState"] = "NOT_INSTANTIATED"
                    db_nsilcmop_update[
                        "operationState"] = nsilcmop_operation_state = "COMPLETED"
                    db_nsilcmop_update["statusEnteredTime"] = time()
                    if db_nsilcmop["operationParams"].get("autoremove"):
                        autoremove = True

            db_nsir_update["detailed-status"] = "done"
            db_nsir_update["operational-status"] = "terminated"
            db_nsir_update["config-status"] = "terminated"
            db_nsilcmop_update["statusEnteredTime"] = time()
            db_nsilcmop_update["detailed-status"] = "done"
            return

        except (LcmException, DbException) as e:
            self.logger.error(logging_text +
                              "Exit Exception while '{}': {}".format(step, e))
            exc = e
        except asyncio.CancelledError:
            self.logger.error(logging_text +
                              "Cancelled Exception while '{}'".format(step))
            exc = "Operation was cancelled"
        except Exception as e:
            exc = traceback.format_exc()
            self.logger.critical(logging_text +
                                 "Exit Exception {} while '{}': {}".format(
                                     type(e).__name__, step, e),
                                 exc_info=True)
        finally:
            if exc:
                if db_nsir:
                    db_nsir_update["_admin.deployed"] = nsir_deployed
                    db_nsir_update["detailed-status"] = "ERROR {}: {}".format(
                        step, exc)
                    db_nsir_update["operational-status"] = "failed"
                if db_nsilcmop:
                    db_nsilcmop_update[
                        "detailed-status"] = "FAILED {}: {}".format(step, exc)
                    db_nsilcmop_update[
                        "operationState"] = nsilcmop_operation_state = "FAILED"
                    db_nsilcmop_update["statusEnteredTime"] = time()
            try:
                if db_nsir:
                    db_nsir_update["_admin.deployed"] = nsir_deployed
                    db_nsir_update["_admin.nsilcmop"] = None
                    self.update_db_2("nsis", nsir_id, db_nsir_update)
                if db_nsilcmop:
                    self.update_db_2("nsilcmops", nsilcmop_id,
                                     db_nsilcmop_update)
            except DbException as e:
                self.logger.error(logging_text +
                                  "Cannot update database: {}".format(e))

            if nsilcmop_operation_state:
                try:
                    await self.msg.aiowrite(
                        "nsi",
                        "terminated", {
                            "nsir_id": nsir_id,
                            "nsilcmop_id": nsilcmop_id,
                            "operationState": nsilcmop_operation_state,
                            "autoremove": autoremove
                        },
                        loop=self.loop)
                except Exception as e:
                    self.logger.error(
                        logging_text +
                        "kafka_write notification Exception {}".format(e))
            self.logger.debug(logging_text + "Exit")
            self.lcm_tasks.remove("nsi", nsir_id, nsilcmop_id, "nsi_terminate")
Beispiel #11
0
    async def update(self,
                     add_jobs: dict = None,
                     remove_jobs: list = None) -> bool:
        """

        :param add_jobs: dictionary with {job_id_1: job_content, job_id_2: job_content}
        :param remove_jobs: list with jobs to remove [job_id_1, job_id_2]
        :return: result. If false prometheus denies this configuration. Exception on error
        """
        for retry in range(4):
            result = True
            if retry:  # first time do not wait
                await asyncio.sleep(self.PROMETHEUS_LOCKED_TIME / 2,
                                    loop=self.loop)

            # lock database
            now = time()
            if not self.db.set_one("admin",
                                   q_filter={
                                       "_id":
                                       "prometheus",
                                       "_admin.locked_at.lt":
                                       now - self.PROMETHEUS_LOCKED_TIME
                                   },
                                   update_dict={
                                       "_admin.locked_at": now,
                                       "_admin.locked_by": self.worker_id
                                   },
                                   fail_on_empty=False):
                continue
            # read database
            prometheus_data = self.db.get_one("admin", {"_id": "prometheus"})
            update_dict = {"_admin.locked_at": 0, "_admin.locked_by": None}

            # Make changes from prometheus_incremental
            push_dict = pull_dict = None
            if add_jobs or remove_jobs:
                log_text_list = []
                if add_jobs:
                    log_text_list.append("adding jobs: {}".format(
                        list(add_jobs.keys())))
                    prometheus_data["scrape_configs"].update(add_jobs)
                    push_dict = {
                        "scrape_configs." + job_id: job_data
                        for job_id, job_data in add_jobs.items()
                    }
                elif remove_jobs:
                    log_text_list.append("removing jobs: {}".format(
                        list(remove_jobs)))
                    for job_id in remove_jobs:
                        prometheus_data["scrape_configs"].pop(job_id, None)
                    pull_dict = {
                        "scrape_configs." + job_id: None
                        for job_id in remove_jobs
                    }
                self.logger.debug("Updating. " + ". ".join(log_text_list))

            if not await self.send_data(prometheus_data):
                self.logger.error(
                    "Cannot update add_jobs: {}. remove_jobs: {}".format(
                        add_jobs, remove_jobs))
                push_dict = pull_dict = None
                result = False

            # unblock database
            if push_dict:
                update_dict.update(push_dict)
            if push_dict or pull_dict:
                update_dict["_admin.modified_at"] = now
            if not self.db.set_one("admin", {
                    "_id": "prometheus",
                    "_admin.locked_at": now,
                    "_admin.locked_by": self.worker_id
            },
                                   update_dict=update_dict,
                                   unset=pull_dict,
                                   fail_on_empty=False):
                continue
            return result
        raise LcmException(
            "Cannot update prometheus database. Reached max retries")