Esempio n. 1
0
    def run(self):
        try:
            # Lock nodes
            create_cluster_utils.acquire_node_lock(self.parameters)
            integration_id = self.parameters['TendrlContext.integration_id']
            sds_name = self.parameters['DetectedCluster.sds_pkg_name']

            if not self.parameters.get('import_after_expand', False) and \
                not self.parameters.get('import_after_create', False):

                # check if gdeploy in already provisioned in this cluster
                # if no it has to be provisioned here
                if sds_name.find("gluster") > -1 and \
                    not self.parameters.get("gdeploy_provisioned", False) and \
                    not self._probe_and_mark_provisioner(
                        self.parameters["Node[]"], integration_id
                    ):
                    create_cluster_utils.install_gdeploy()
                    create_cluster_utils.install_python_gdeploy()
                    ssh_job_ids = create_cluster_utils.gluster_create_ssh_setup_jobs(
                        self.parameters)

                    while True:
                        gevent.sleep(3)
                        all_status = {}
                        for job_id in ssh_job_ids:
                            all_status[job_id] = NS._int.client.read(
                                "/queue/%s/status" % job_id).value

                        _failed = {
                            _jid: status
                            for _jid, status in all_status.iteritems()
                            if status == "failed"
                        }
                        if _failed:
                            raise AtomExecutionFailedError(
                                "SSH setup failed for jobs %s cluster %s" %
                                (str(_failed), integration_id))
                        if all([
                                status == "finished"
                                for status in all_status.values()
                        ]):
                            Event(
                                Message(
                                    job_id=self.parameters['job_id'],
                                    flow_id=self.parameters['flow_id'],
                                    priority="info",
                                    publisher=NS.publisher_id,
                                    payload={
                                        "message":
                                        "SSH setup completed for all nodes in cluster %s"
                                        % integration_id
                                    }))
                            # set this node as gluster provisioner
                            tags = ["provisioner/%s" % integration_id]
                            NS.node_context = NS.node_context.load()
                            tags += NS.node_context.tags
                            NS.node_context.tags = list(set(tags))
                            NS.node_context.save()

                            # set gdeploy_provisioned to true so that no other nodes
                            # tries to configure gdeploy
                            self.parameters['gdeploy_provisioned'] = True
                            break

            NS.tendrl_context = NS.tendrl_context.load()
            NS.tendrl_context.integration_id = integration_id
            _detected_cluster = NS.tendrl.objects.DetectedCluster().load()
            NS.tendrl_context.cluster_id = _detected_cluster.detected_cluster_id
            NS.tendrl_context.cluster_name = _detected_cluster.detected_cluster_name
            NS.tendrl_context.sds_name = _detected_cluster.sds_pkg_name
            NS.tendrl_context.sds_version = _detected_cluster.sds_pkg_version
            NS.tendrl_context.save()
            Event(
                Message(job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message":
                            "Registered Node %s with cluster %s" %
                            (NS.node_context.node_id,
                             NS.tendrl_context.integration_id)
                        }))

            node_list = self.parameters['Node[]']
            cluster_nodes = []
            if len(node_list) > 1:
                # This is the master node for this flow
                for node in node_list:
                    if NS.node_context.node_id != node:
                        new_params = self.parameters.copy()
                        new_params['Node[]'] = [node]
                        # create same flow for each node in node list except $this
                        payload = {
                            "tags": ["tendrl/node_%s" % node],
                            "run": "tendrl.flows.ImportCluster",
                            "status": "new",
                            "parameters": new_params,
                            "parent": self.parameters['job_id'],
                            "type": "node"
                        }
                        _job_id = str(uuid.uuid4())
                        cluster_nodes.append(_job_id)
                        Job(job_id=_job_id, status="new",
                            payload=payload).save()
                        Event(
                            Message(
                                job_id=self.parameters['job_id'],
                                flow_id=self.parameters['flow_id'],
                                priority="info",
                                publisher=NS.publisher_id,
                                payload={
                                    "message":
                                    "Importing (job: %s) Node %s to cluster %s"
                                    % (_job_id, node, integration_id)
                                }))

            if "ceph" in sds_name.lower():
                node_context = NS.node_context.load()
                is_mon = False
                for tag in node_context.tags:
                    mon_tag = NS.compiled_definitions.get_parsed_defs(
                    )['namespace.tendrl']['tags']['ceph-mon']
                    if mon_tag in tag:
                        is_mon = True
                if is_mon:
                    # Check if minimum required version of underlying ceph
                    # cluster met. If not fail the import task
                    detected_cluster = NS.tendrl.objects.DetectedCluster(
                    ).load()
                    detected_cluster_ver = detected_cluster.sds_pkg_version.split(
                        '.')
                    maj_ver = detected_cluster_ver[0]
                    min_ver = detected_cluster_ver[1]
                    reqd_ceph_ver = NS.compiled_definitions.get_parsed_defs(
                    )['namespace.tendrl']['min_reqd_ceph_ver']
                    req_maj_ver, req_min_ver, req_rel = reqd_ceph_ver.split(
                        '.')
                    Event(
                        Message(
                            job_id=self.parameters['job_id'],
                            flow_id=self.parameters['flow_id'],
                            priority="info",
                            publisher=NS.publisher_id,
                            payload={
                                "message":
                                "Check: Minimum required version (%s.%s.%s) of Ceph Storage"
                                % (req_maj_ver, req_min_ver, req_rel)
                            }))
                    if int(maj_ver) < int(req_maj_ver) or \
                        int(min_ver) < int(req_min_ver):
                        Event(
                            Message(
                                job_id=self.parameters['job_id'],
                                flow_id=self.parameters['flow_id'],
                                priority="error",
                                publisher=NS.publisher_id,
                                payload={
                                    "message":
                                    "Error: Minimum required version (%s.%s.%s) "
                                    "doesnt match that of detected Ceph Storage (%s.%s.%s)"
                                    % (req_maj_ver, req_min_ver, req_rel,
                                       maj_ver, min_ver, 0)
                                }))

                        raise FlowExecutionFailedError(
                            "Detected ceph version: %s"
                            " is lesser than required version: %s" %
                            (detected_cluster.sds_pkg_version, reqd_ceph_ver))
                    import_ceph(self.parameters)
            else:
                # Check if minimum required version of underlying gluster
                # cluster met. If not fail the import task
                detected_cluster = NS.tendrl.objects.DetectedCluster().load()
                detected_cluster_ver = detected_cluster.sds_pkg_version.split(
                    '.')
                maj_ver = detected_cluster_ver[0]
                min_ver = detected_cluster_ver[1]
                reqd_gluster_ver = NS.compiled_definitions.get_parsed_defs(
                )['namespace.tendrl']['min_reqd_gluster_ver']
                req_maj_ver, req_min_ver, req_rel = reqd_gluster_ver.split('.')
                Event(
                    Message(
                        job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message":
                            "Check: Minimum required version (%s.%s.%s) of Gluster Storage"
                            % (req_maj_ver, req_min_ver, req_rel)
                        }))
                if int(maj_ver) < int(req_maj_ver) or \
                    int(min_ver) < int(req_min_ver):
                    Event(
                        Message(
                            job_id=self.parameters['job_id'],
                            flow_id=self.parameters['flow_id'],
                            priority="error",
                            publisher=NS.publisher_id,
                            payload={
                                "message":
                                "Error: Minimum required version (%s.%s.%s) "
                                "doesnt match that of detected Gluster Storage (%s.%s.%s)"
                                % (req_maj_ver, req_min_ver, req_rel, maj_ver,
                                   min_ver, 0)
                            }))

                    raise FlowExecutionFailedError(
                        "Detected gluster version: %s"
                        " is lesser than required version: %s" %
                        (detected_cluster.sds_pkg_version, reqd_gluster_ver))
                import_gluster(self.parameters)

            Event(
                Message(job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message":
                            "Waiting for participant nodes %s to be "
                            "imported %s" % (node_list, integration_id)
                        }))

            # An import is sucessfull once all Node[] register to
            # /clusters/:integration_id/nodes/:node_id
            while True:
                _all_node_status = []
                gevent.sleep(3)
                for node_id in self.parameters['Node[]']:
                    _status = NS.tendrl.objects.ClusterNodeContext(node_id=node_id).exists() \
                        and NS.tendrl.objects.ClusterTendrlContext(
                            integration_id=integration_id
                        ).exists()
                    _all_node_status.append(_status)
                if _all_node_status:
                    if all(_all_node_status):
                        Event(
                            Message(
                                job_id=self.parameters['job_id'],
                                flow_id=self.parameters['flow_id'],
                                priority="info",
                                publisher=NS.publisher_id,
                                payload={
                                    "message":
                                    "Import Cluster completed for all nodes "
                                    "in cluster %s" % integration_id
                                }))

                        break

            Event(
                Message(job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message":
                            "Sucessfully imported cluster %s" % integration_id
                        }))
        except Exception as ex:
            # For traceback
            Event(
                ExceptionMessage(priority="error",
                                 publisher=NS.publisher_id,
                                 payload={
                                     "message": ex.message,
                                     "exception": ex
                                 }))
            # raising exception to mark job as failed
            raise ex
        finally:
            # release lock
            create_cluster_utils.release_node_lock(self.parameters)

        return True
Esempio n. 2
0
    def run(self):
        try:
            # Locking nodes
            create_cluster_utils.acquire_node_lock(self.parameters)
            integration_id = self.parameters['TendrlContext.integration_id']
            sds_name = self.parameters["TendrlContext.sds_name"]

            ssh_job_ids = []
            if "ceph" in sds_name:
                ssh_job_ids = create_cluster_utils.ceph_create_ssh_setup_jobs(
                    self.parameters)
            else:
                ssh_job_ids = \
                    create_cluster_utils.gluster_create_ssh_setup_jobs(
                        self.parameters
                    )

            while True:
                gevent.sleep(3)
                all_status = {}
                for job_id in ssh_job_ids:
                    # noinspection PyUnresolvedReferences
                    all_status[job_id] = NS._int.client.read(
                        "/queue/%s/status" % job_id).value

                _failed = {
                    _jid: status
                    for _jid, status in all_status.iteritems()
                    if status == "failed"
                }
                if _failed:
                    raise AtomExecutionFailedError(
                        "SSH setup failed for jobs %s cluster %s" %
                        (str(_failed), integration_id))
                if all(
                    [status == "finished" for status in all_status.values()]):
                    Event(
                        Message(job_id=self.parameters['job_id'],
                                flow_id=self.parameters['flow_id'],
                                priority="info",
                                publisher=NS.publisher_id,
                                payload={
                                    "message":
                                    "SSH setup completed for all "
                                    "nodes in cluster %s" % integration_id
                                }))
                    # set this node as gluster provisioner
                    if "gluster" in self.parameters["TendrlContext.sds_name"]:
                        tags = ["provisioner/%s" % integration_id]
                        NS.node_context = NS.node_context.load()
                        tags += NS.node_context.tags
                        NS.node_context.tags = list(set(tags))
                        NS.node_context.save()
                    break

            Event(
                Message(job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message":
                            "Starting SDS install and config %s" %
                            integration_id
                        }))

            # SSH setup jobs finished above, now install sds bits and create
            #  cluster
            if "ceph" in sds_name:
                Event(
                    Message(job_id=self.parameters['job_id'],
                            flow_id=self.parameters['flow_id'],
                            priority="info",
                            publisher=NS.publisher_id,
                            payload={
                                "message":
                                "Creating Ceph Storage Cluster "
                                "%s" % integration_id
                            }))

                self.parameters.update({'create_mon_secret': True})
                create_ceph(self.parameters)
            else:
                Event(
                    Message(job_id=self.parameters['job_id'],
                            flow_id=self.parameters['flow_id'],
                            priority="info",
                            publisher=NS.publisher_id,
                            payload={
                                "message":
                                "Creating Gluster Storage "
                                "Cluster %s" % integration_id
                            }))

                create_gluster(self.parameters)
        except Exception as ex:
            # For traceback
            Event(
                ExceptionMessage(priority="error",
                                 publisher=NS.publisher_id,
                                 payload={
                                     "message": ex.message,
                                     "exception": ex
                                 }))
            # raising exception to mark job as failed
            raise ex
        finally:
            # releasing nodes if any exception came
            create_cluster_utils.release_node_lock(self.parameters)

        return True
Esempio n. 3
0
    def run(self):
        try:
            integration_id = self.parameters['TendrlContext.integration_id']

            # Lock nodes
            create_cluster_utils.acquire_node_lock(self.parameters)
            NS.tendrl_context = NS.tendrl_context.load()

            # TODO(team) when Tendrl supports create/expand/shrink cluster
            # setup passwordless ssh for all gluster nodes with given
            # integration_id (check
            # /indexes/tags/tendrl/integration/$integration_id for list of
            # nodes in cluster

            node_list = self.parameters['Node[]']
            cluster_nodes = []
            if len(node_list) > 1:
                # This is the master node for this flow
                for node in node_list:
                    if NS.node_context.node_id != node:
                        new_params = self.parameters.copy()
                        new_params['Node[]'] = [node]
                        # create same flow for each node in node list except
                        #  $this
                        payload = {"tags": ["tendrl/node_%s" % node],
                                   "run": "tendrl.flows.ImportCluster",
                                   "status": "new",
                                   "parameters": new_params,
                                   "parent": self.parameters['job_id'],
                                   "type": "node"
                                   }
                        _job_id = str(uuid.uuid4())
                        cluster_nodes.append(_job_id)
                        Job(job_id=_job_id,
                            status="new",
                            payload=payload).save()
                        Event(
                            Message(
                                job_id=self.parameters['job_id'],
                                flow_id=self.parameters['flow_id'],
                                priority="info",
                                publisher=NS.publisher_id,
                                payload={
                                    "message": "Importing (job: %s) Node %s "
                                               "to cluster %s" %
                                    (_job_id, node, integration_id)
                                }
                            )
                        )
            # Check if minimum required version of underlying gluster
            # cluster met. If not fail the import task
            cluster_ver = \
                NS.tendrl_context.sds_version.split('.')
            maj_ver = cluster_ver[0]
            min_ver = re.findall(r'\d+', cluster_ver[1])[0]
            reqd_gluster_ver = NS.compiled_definitions.get_parsed_defs()[
                'namespace.tendrl'
            ]['min_reqd_gluster_ver']
            req_maj_ver, req_min_ver, req_rel = reqd_gluster_ver.split('.')
            Event(
                Message(
                    job_id=self.parameters['job_id'],
                    flow_id=self.parameters['flow_id'],
                    priority="info",
                    publisher=NS.publisher_id,
                    payload={
                        "message": "Check: Minimum required version ("
                                   "%s.%s.%s) of Gluster Storage" %
                        (req_maj_ver, req_min_ver, req_rel)
                    }
                )
            )
            ver_check_failed = False
            if int(maj_ver) < int(req_maj_ver):
                ver_check_failed = True
            else:
                if int(maj_ver) == int(req_maj_ver) and \
                        int(min_ver) < int(req_min_ver):
                        ver_check_failed = True

            if ver_check_failed:
                Event(
                    Message(
                        job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id'],
                        priority="error",
                        publisher=NS.publisher_id,
                        payload={
                            "message": "Error: Minimum required version "
                                       "(%s.%s.%s) "
                            "doesnt match that of detected Gluster "
                                       "Storage (%s.%s.%s)" %
                            (req_maj_ver, req_min_ver, req_rel,
                             maj_ver, min_ver, 0)
                        }
                    )
                )

                raise FlowExecutionFailedError(
                    "Detected gluster version: %s"
                    " is lesser than required version: %s" %
                    (
                        NS.tendrl_context.sds_version,
                        reqd_gluster_ver
                    )
                )
            if not import_gluster(self.parameters):
                return False

            if len(node_list) > 1:
                Event(
                    Message(
                        job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message": "Waiting for participant nodes %s to "
                                       "be "
                            "imported %s" % (node_list, integration_id)
                        }
                    )
                )
                loop_count = 0
                # Wait for (no of nodes) * 6 minutes for import to complete
                wait_count = (len(node_list) - 1) * 36
                while True:
                    parent_job = Job(job_id=self.parameters['job_id']).load()
                    if loop_count >= wait_count:
                        Event(
                            Message(
                                job_id=self.parameters['job_id'],
                                flow_id=self.parameters['flow_id'],
                                priority="info",
                                publisher=NS.publisher_id,
                                payload={
                                    "message": "Import jobs not yet complete "
                                    "on all nodes. Timing out. (%s, %s)" %
                                    (str(node_list), integration_id)
                                }
                            )
                        )
                        return False
                    time.sleep(10)
                    finished = True
                    for child_job_id in parent_job.children:
                        child_job = Job(job_id=child_job_id).load()
                        if child_job.status != "finished":
                            finished = False
                            break
                    if finished:
                        break
                    else:
                        loop_count += 1
                        continue

        except Exception as ex:
            # For traceback
            Event(
                ExceptionMessage(
                    priority="error",
                    publisher=NS.publisher_id,
                    payload={
                        "message": ex.message,
                        "exception": ex
                    }
                )
            )
            # raising exception to mark job as failed
            raise ex
        finally:
            # release lock
            create_cluster_utils.release_node_lock(self.parameters)

        return True
Esempio n. 4
0
    def run(self):
        try:
            # Lock nodes
            create_cluster_utils.acquire_node_lock(self.parameters)
            integration_id = self.parameters['TendrlContext.integration_id']
            if integration_id is None:
                raise FlowExecutionFailedError(
                    "TendrlContext.integration_id cannot be empty")

            supported_sds = NS.compiled_definitions.get_parsed_defs(
            )['namespace.tendrl']['supported_sds']
            sds_name = self.parameters["TendrlContext.sds_name"]
            if sds_name not in supported_sds:
                raise FlowExecutionFailedError("SDS (%s) not supported" %
                                               sds_name)

            ssh_job_ids = []
            if "ceph" in sds_name:
                ssh_job_ids = create_cluster_utils.ceph_create_ssh_setup_jobs(
                    self.parameters)
            else:
                ssh_job_ids = \
                    create_cluster_utils.gluster_create_ssh_setup_jobs(
                        self.parameters,
                        skip_current_node=True
                    )

            while True:
                time.sleep(3)
                all_status = {}
                for job_id in ssh_job_ids:
                    all_status[job_id] = NS._int.client.read(
                        "/queue/%s/status" % job_id).value

                _failed = {
                    _jid: status
                    for _jid, status in all_status.iteritems()
                    if status == "failed"
                }
                if _failed:
                    raise FlowExecutionFailedError(
                        "SSH setup failed for jobs %s cluster %s" %
                        (str(_failed), integration_id))
                if all(
                    [status == "finished" for status in all_status.values()]):
                    Event(
                        Message(job_id=self.parameters['job_id'],
                                flow_id=self.parameters['flow_id'],
                                priority="info",
                                publisher=NS.publisher_id,
                                payload={
                                    "message":
                                    "SSH setup completed for all "
                                    "nodes in cluster %s" % integration_id
                                }))

                    break

            # SSH setup jobs finished above, now install sds
            # bits and create cluster
            if "ceph" in sds_name:
                Event(
                    Message(job_id=self.parameters['job_id'],
                            flow_id=self.parameters['flow_id'],
                            priority="info",
                            publisher=NS.publisher_id,
                            payload={
                                "message":
                                "Expanding ceph cluster %s" % integration_id
                            }))
                ceph_help.expand_cluster(self.parameters)
            else:
                Event(
                    Message(job_id=self.parameters['job_id'],
                            flow_id=self.parameters['flow_id'],
                            priority="info",
                            publisher=NS.publisher_id,
                            payload={
                                "message":
                                "Expanding Gluster Storage"
                                " Cluster %s" % integration_id
                            }))
                gluster_help.expand_gluster(self.parameters)
            Event(
                Message(
                    job_id=self.parameters['job_id'],
                    flow_id=self.parameters['flow_id'],
                    priority="info",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "SDS install/config completed on newly "
                        "expanded nodes, Please wait while "
                        "tendrl-node-agents detect sds details on the newly "
                        "expanded nodes %s" % self.parameters['Node[]']
                    }))

            # Wait till detected cluster in populated for nodes
            while True:
                time.sleep(3)
                all_status = []
                detected_cluster = ""
                different_cluster_id = False
                dc = ""
                for node in self.parameters['Node[]']:
                    try:
                        dc = NS._int.client.read(
                            "/nodes/%s/DetectedCluster/detected_cluster_id" %
                            node).value
                        if not detected_cluster:
                            detected_cluster = dc
                        else:
                            if detected_cluster != dc:
                                all_status.append(False)
                                different_cluster_id = True
                                break
                        all_status.append(True)
                    except etcd.EtcdKeyNotFound:
                        all_status.append(False)
                if different_cluster_id:
                    raise FlowExecutionFailedError(
                        "Seeing different detected cluster id in"
                        " different nodes. %s and %s" % (detected_cluster, dc))

                if all_status:
                    if all(all_status):
                        break

            # Create the params list for import cluster flow
            new_params = dict()
            new_params['Node[]'] = self.parameters['Node[]']
            new_params['TendrlContext.integration_id'] = integration_id

            # Get node context for one of the nodes from list
            sds_pkg_name = NS._int.client.read(
                "nodes/%s/DetectedCluster/"
                "sds_pkg_name" % self.parameters['Node[]'][0]).value
            new_params['import_after_expand'] = True
            sds_pkg_version = NS._int.client.read(
                "nodes/%s/DetectedCluster/sds_pkg_"
                "version" % self.parameters['Node[]'][0]).value
            new_params['DetectedCluster.sds_pkg_name'] = \
                sds_pkg_name
            new_params['DetectedCluster.sds_pkg_version'] = \
                sds_pkg_version

            tags = []
            for node in self.parameters['Node[]']:
                tags.append("tendrl/node_%s" % node)
            payload = {
                "tags": tags,
                "run": "tendrl.flows.ImportCluster",
                "status": "new",
                "parameters": new_params,
                "parent": self.parameters['job_id'],
                "type": "node"
            }
            _job_id = str(uuid.uuid4())
            # release lock before import cluster
            create_cluster_utils.release_node_lock(self.parameters)

            Job(job_id=_job_id, status="new", payload=payload).save()
            Event(
                Message(job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message":
                            "Please wait while Tendrl imports ("
                            "job_id: %s) newly expanded "
                            "%s storage nodes %s" %
                            (_job_id, sds_pkg_name, integration_id)
                        }))
        except Exception as ex:
            Event(
                ExceptionMessage(priority="error",
                                 publisher=NS.publisher_id,
                                 payload={
                                     "message": ex.message,
                                     "exception": ex
                                 }))
            # raising exception to mark job as failed
            raise ex
        finally:
            # release lock if any exception came
            create_cluster_utils.release_node_lock(self.parameters)