예제 #1
0
def test_acquire_node_lock():
    testParams = maps.NamedDict()
    testParams['Node[]'] = [0]
    testParams["job_id"] = "1"
    testParams["flow_id"] = "test_id"
    setattr(__builtin__, "NS", maps.NamedDict())
    setattr(NS, "_int", maps.NamedDict())
    NS.tendrl = maps.NamedDict()
    NS.tendrl.objects = maps.NamedDict()
    NS.tendrl.objects.NodeContext = MockNodeContextExists
    NS.tendrl.objects.Job = MockJob
    NS.publisher_id = 0
    utils.acquire_node_lock(testParams)
    NS.tendrl.objects.NodeContext = MockNodeContextExists2
    with pytest.raises(FlowExecutionFailedError):
        utils.acquire_node_lock(testParams)
    NS.tendrl.objects.NodeContext = MockNodeContextDoesNotExist
    with pytest.raises(FlowExecutionFailedError):
        utils.acquire_node_lock(testParams)
    NS.tendrl.objects.NodeContext = MockNodeContextExists
    NS.tendrl.objects.Job = MockJobNoParent
    utils.acquire_node_lock(testParams)
예제 #2
0
def test_acquire_node_lock():
    testParams = maps.NamedDict()
    testParams['Node[]'] = [0]
    testParams["job_id"] = "1"
    testParams["flow_id"] = "test_id"
    setattr(__builtin__, "NS", maps.NamedDict())
    setattr(NS, "_int", maps.NamedDict())
    NS.tendrl = maps.NamedDict()
    NS.tendrl.objects = maps.NamedDict()
    NS.tendrl.objects.NodeContext = MockNodeContextExists
    NS.tendrl.objects.Job = MockJob
    NS.publisher_id = 0
    utils.acquire_node_lock(testParams)
    NS.tendrl.objects.NodeContext = MockNodeContextExists2
    with pytest.raises(FlowExecutionFailedError):
        utils.acquire_node_lock(testParams)
    NS.tendrl.objects.NodeContext = MockNodeContextDoesNotExist
    with pytest.raises(FlowExecutionFailedError):
        utils.acquire_node_lock(testParams)
    NS.tendrl.objects.NodeContext = MockNodeContextExists
    NS.tendrl.objects.Job = MockJobNoParent
    utils.acquire_node_lock(testParams)
예제 #3
0
    def run(self):
        try:
            # Lock nodes
            flow_utils.acquire_node_lock(self.parameters)
            integration_id = self.parameters['TendrlContext.integration_id']
            if integration_id is None:
                raise FlowExecutionFailedError(
                    "TendrlContext.integration_id cannot be empty")

            supported_sds = NS.compiled_definitions.get_parsed_defs(
            )['namespace.tendrl']['supported_sds']
            sds_name = self.parameters["TendrlContext.sds_name"]
            if sds_name not in supported_sds:
                raise FlowExecutionFailedError("SDS (%s) not supported" %
                                               sds_name)

            ssh_job_ids = []
            ssh_job_ids = \
                flow_utils.gluster_create_ssh_setup_jobs(
                    self.parameters,
                    skip_current_node=True
                )

            while True:
                time.sleep(3)
                all_status = {}
                for job_id in ssh_job_ids:
                    job = NS.tendrl.objects.Job(job_id=job_id).load()
                    all_status[job_id] = job.status

                _failed = {
                    _jid: status
                    for _jid, status in all_status.iteritems()
                    if status == "failed"
                }
                if _failed:
                    raise FlowExecutionFailedError(
                        "SSH setup failed for jobs %s cluster %s" %
                        (str(_failed), integration_id))
                if all(
                    [status == "finished" for status in all_status.values()]):
                    logger.log("info",
                               NS.publisher_id, {
                                   "message":
                                   "SSH setup completed for all "
                                   "nodes in cluster %s" % integration_id
                               },
                               job_id=self.parameters['job_id'],
                               flow_id=self.parameters['flow_id'])

                    break

            # SSH setup jobs finished above, now install sds
            # bits and create cluster
            logger.log("info",
                       NS.publisher_id, {
                           "message":
                           "Expanding Gluster Storage"
                           " Cluster %s" % integration_id
                       },
                       job_id=self.parameters['job_id'],
                       flow_id=self.parameters['flow_id'])
            gluster_help.expand_gluster(self.parameters)
            logger.log(
                "info",
                NS.publisher_id, {
                    "message":
                    "SDS install/config completed on newly "
                    "expanded nodes, Please wait while "
                    "tendrl-node-agents detect sds details on the newly "
                    "expanded nodes %s" % self.parameters['Node[]']
                },
                job_id=self.parameters['job_id'],
                flow_id=self.parameters['flow_id'])

            # Wait till detected cluster in populated for nodes
            while True:
                time.sleep(3)
                all_status = []
                detected_cluster = ""
                different_cluster_id = False
                dc = ""
                for node in self.parameters['Node[]']:
                    try:
                        dc = NS.tendrl.objects.DetectedCluster(
                            node_id=node).load()
                        if not detected_cluster:
                            detected_cluster = dc.detected_cluster_id
                        else:
                            if detected_cluster != dc.detected_cluster_id:
                                all_status.append(False)
                                different_cluster_id = True
                                break
                        all_status.append(True)
                    except etcd.EtcdKeyNotFound:
                        all_status.append(False)
                if different_cluster_id:
                    raise FlowExecutionFailedError(
                        "Seeing different detected cluster id in"
                        " different nodes. %s and %s" %
                        (detected_cluster, dc.detected_cluster_id))

                if all_status:
                    if all(all_status):
                        break

            # Create the params list for import cluster flow
            new_params = dict()
            new_params['Node[]'] = self.parameters['Node[]']
            new_params['TendrlContext.integration_id'] = integration_id

            # Get node context for one of the nodes from list
            dc = NS.tendrl.objects.DetectedCluster(
                node_id=self.parameters['Node[]'][0]).load()
            sds_pkg_name = dc.sds_pkg_name
            new_params['import_after_expand'] = True
            sds_pkg_version = dc.sds_pkg_version
            new_params['DetectedCluster.sds_pkg_name'] = \
                sds_pkg_name
            new_params['DetectedCluster.sds_pkg_version'] = \
                sds_pkg_version

            tags = []
            for node in self.parameters['Node[]']:
                tags.append("tendrl/node_%s" % node)
            payload = {
                "tags": tags,
                "run": "tendrl.flows.ImportCluster",
                "status": "new",
                "parameters": new_params,
                "parent": self.parameters['job_id'],
                "type": "node"
            }
            _job_id = str(uuid.uuid4())
            # release lock before import cluster
            flow_utils.release_node_lock(self.parameters)

            NS.tendrl.objects.Job(job_id=_job_id,
                                  status="new",
                                  payload=payload).save()
            logger.log(
                "info",
                NS.publisher_id, {
                    "message":
                    "Please wait while Tendrl imports ("
                    "job_id: %s) newly expanded "
                    "%s storage nodes in cluster %s" %
                    (_job_id, sds_pkg_name,
                     NS.tendrl.objects.Cluster(
                         integration_id=integration_id).load().short_name)
                },
                job_id=self.parameters['job_id'],
                flow_id=self.parameters['flow_id'])
        except Exception as ex:
            Event(
                ExceptionMessage(priority="error",
                                 publisher=NS.publisher_id,
                                 payload={
                                     "message": ex.message,
                                     "exception": ex
                                 }))
            # raising exception to mark job as failed
            raise ex
        finally:
            # release lock if any exception came
            flow_utils.release_node_lock(self.parameters)
예제 #4
0
    def run(self):
        try:
            integration_id = self.parameters['TendrlContext.integration_id']
            _cluster = NS.tendrl.objects.Cluster(
                integration_id=integration_id
            ).load()

            # Lock nodes
            flow_utils.acquire_node_lock(self.parameters)
            NS.tendrl_context = NS.tendrl_context.load()

            # TODO(team) when Tendrl supports create/expand/shrink cluster
            # setup passwordless ssh for all gluster nodes with given
            # integration_id (check
            # /indexes/tags/tendrl/integration/$integration_id for list of
            # nodes in cluster

            node_list = self.parameters['Node[]']
            cluster_nodes = []
            if len(node_list) > 1:
                # This is the master node for this flow
                for node in node_list:
                    if NS.node_context.node_id != node:
                        new_params = self.parameters.copy()
                        new_params['Node[]'] = [node]
                        # create same flow for each node in node list except
                        #  $this
                        payload = {"tags": ["tendrl/node_%s" % node],
                                   "run": "tendrl.flows.ImportCluster",
                                   "status": "new",
                                   "parameters": new_params,
                                   "parent": self.parameters['job_id'],
                                   "type": "node"
                                   }
                        _job_id = str(uuid.uuid4())
                        cluster_nodes.append(_job_id)
                        NS.tendrl.objects.Job(
                            job_id=_job_id,
                            status="new",
                            payload=payload
                        ).save()
                        logger.log(
                            "info",
                            NS.publisher_id,
                            {"message": "ImportCluster %s (jobID: %s) :"
                                        "importing host %s" %
                             (_cluster.short_name, _job_id, node)},
                            job_id=self.parameters['job_id'],
                            flow_id=self.parameters['flow_id']
                        )
            # Check if minimum required version of underlying gluster
            # cluster met. If not fail the import task
            # A sample output from "rpm -qa | grep glusterfs-server"
            # looks as below
            # `glusterfs-server-3.8.4-54.4.el7rhgs.x86_64`
            # In case of upstream build the format could be as below
            # `glusterfs-server-4.1dev-0.203.gitc3e1a2e.el7.centos.x86_64`
            # `glusterfs-server-3.12.8-0.0.el7.centos.x86_64.rpm`
            cmd = subprocess.Popen(
                'rpm -q glusterfs-server',
                shell=True,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
            )
            out, err = cmd.communicate()
            if out in [None, ""] or err:
                logger.log(
                    "error",
                    NS.publisher_id,
                    {"message": "Failed to detect underlying cluster version"},
                    job_id=self.parameters['job_id'],
                    flow_id=self.parameters['flow_id']
                )
                return False
            lines = out.split('\n')
            build_no = None
            req_build_no = None
            ver_det = lines[0].split('glusterfs-server-')[-1].split('.')
            maj_ver = ver_det[0]
            min_ver = ver_det[1]
            if 'dev' in min_ver:
                min_ver = min_ver[0]
            rel = ver_det[2]
            if '-' in rel:
                build_no = rel.split('-')[-1]
                rel = rel.split('-')[0]
            reqd_gluster_ver = NS.compiled_definitions.get_parsed_defs()[
                'namespace.tendrl'
            ]['min_reqd_gluster_ver']
            req_maj_ver, req_min_ver, req_rel = reqd_gluster_ver.split('.')
            if '-' in req_rel:
                req_build_no = req_rel.split('-')[-1]
                req_rel = req_rel.split('-')[0]
            logger.log(
                "info",
                NS.publisher_id,
                {"message": "Checking minimum required version ("
                            "%s.%s.%s) of Gluster Storage" %
                 (req_maj_ver, req_min_ver, req_rel)},
                job_id=self.parameters['job_id'],
                flow_id=self.parameters['flow_id']
            )
            ver_check_failed = False
            if int(maj_ver) < int(req_maj_ver):
                ver_check_failed = True
            else:
                if int(maj_ver) == int(req_maj_ver):
                    if int(min_ver) < int(req_min_ver):
                        ver_check_failed = True
                    else:
                        if int(min_ver) == int(req_min_ver):
                            if int(rel) < int(req_rel):
                                ver_check_failed = True
                            else:
                                if int(rel) == int(req_rel):
                                    if build_no is not None and \
                                        req_build_no is not None and \
                                        int(build_no) < int(req_build_no):
                                        ver_check_failed = True
            if ver_check_failed:
                logger.log(
                    "error",
                    NS.publisher_id,
                    {"message": "Error: Minimum required version "
                                "(%s.%s.%s) "
                     "doesnt match that of detected Gluster "
                                "Storage (%s.%s.%s)" %
                     (req_maj_ver, req_min_ver, req_rel,
                      maj_ver, min_ver, 0)},
                    job_id=self.parameters['job_id'],
                    flow_id=self.parameters['flow_id']
                )
                return False

            ret_val, err = import_gluster(self.parameters)
            if not ret_val:
                logger.log(
                    "error",
                    NS.publisher_id,
                    {"message": "Error importing the cluster (integration_id:"
                                " %s). Error: %s" % (integration_id, err)
                     },
                    job_id=self.parameters['job_id'],
                    flow_id=self.parameters['flow_id']
                )
                return False

            if len(node_list) > 1:
                logger.log(
                    "info",
                    NS.publisher_id,
                    {"message": "ImportCluster %s waiting for hosts %s "
                        "to be imported" % (_cluster.short_name, node_list)},
                    job_id=self.parameters['job_id'],
                    flow_id=self.parameters['flow_id']
                )
                loop_count = 0
                # Wait for (no of nodes) * 6 minutes for import to complete
                wait_count = (len(node_list) - 1) * 36
                while True:
                    child_jobs_failed = []
                    parent_job = NS.tendrl.objects.Job(
                        job_id=self.parameters['job_id']
                    ).load()
                    if loop_count >= wait_count:
                        logger.log(
                            "error",
                            NS.publisher_id,
                            {"message": "Import jobs on cluster(%s) not yet "
                             "complete on all nodes(%s). Timing out." %
                             (_cluster.short_name, str(node_list))},
                            job_id=self.parameters['job_id'],
                            flow_id=self.parameters['flow_id']
                        )
                        # Marking child jobs as failed which did not complete
                        # as the parent job has timed out. This has to be done
                        # explicitly because these jobs will still be processed
                        # by the node-agent, and will keep it busy, which might
                        # defer the new jobs or lead to their timeout.
                        for child_job_id in parent_job.children:
                            child_job = NS.tendrl.objects.Job(
                                job_id=child_job_id
                            ).load()
                            if child_job.status not in ["finished", "failed"]:
                                child_job.status = "failed"
                                child_job.save()
                        return False
                    time.sleep(10)
                    completed = True
                    for child_job_id in parent_job.children:
                        child_job = NS.tendrl.objects.Job(
                            job_id=child_job_id
                        ).load()
                        if child_job.status not in ["finished", "failed"]:
                            completed = False
                        elif child_job.status == "failed":
                            child_jobs_failed.append(child_job.job_id)
                    if completed:
                        break
                    else:
                        loop_count += 1
                        continue
                if len(child_jobs_failed) > 0:
                    _msg = "Child jobs failed are %s" % child_jobs_failed
                    logger.log(
                        "error",
                        NS.publisher_id,
                        {"message": _msg},
                        job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id']
                    )
                    return False
        except Exception as ex:
            # For traceback
            Event(
                ExceptionMessage(
                    priority="error",
                    publisher=NS.publisher_id,
                    payload={
                        "message": ex.message,
                        "exception": ex
                    }
                )
            )
            # raising exception to mark job as failed
            raise ex
        finally:
            # release lock
            flow_utils.release_node_lock(self.parameters)

        return True
예제 #5
0
    def run(self):
        try:
            integration_id = self.parameters['TendrlContext.integration_id']
            _cluster = NS.tendrl.objects.Cluster(
                integration_id=integration_id
            ).load()

            # Lock nodes
            flow_utils.acquire_node_lock(self.parameters)
            NS.tendrl_context = NS.tendrl_context.load()

            # TODO(team) when Tendrl supports create/expand/shrink cluster
            # setup passwordless ssh for all gluster nodes with given
            # integration_id (check
            # /indexes/tags/tendrl/integration/$integration_id for list of
            # nodes in cluster

            node_list = self.parameters['Node[]']
            cluster_nodes = []
            if len(node_list) > 1:
                # This is the master node for this flow
                # Find number of volumes in the cluster to run profiling job
                cmd = cmd_utils.Command('gluster volume list')
                out, err, rc = cmd.run()
                if not err:
                    volumes = filter(None, out.split("\n"))
                    ret_val, err = enable_disable_volume_profiling(
                        volumes, self.parameters)
                    if not ret_val:
                        logger.log(
                            "error",
                            NS.publisher_id,
                            {"message": "Failed to %s profiling. Error: %s"
                                        % (_cluster.volume_profiling_flag, err)
                             },
                            job_id=self.parameters['job_id'],
                            flow_id=self.parameters['flow_id']
                        )
                        return False
                else:
                    logger.log(
                        "warning",
                        NS.publisher_id,
                        {"message": "Unable to find list of volumes in a "
                                    "cluster, Skipping volume enable/disable "
                                    "profiling flow. error: %s" % err},
                        job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id']
                    )
                    # continue the import flow
                for node in node_list:
                    if NS.node_context.node_id != node:
                        new_params = self.parameters.copy()
                        new_params['Node[]'] = [node]
                        # create same flow for each node in node list except
                        #  $this
                        payload = {"tags": ["tendrl/node_%s" % node],
                                   "run": "tendrl.flows.ImportCluster",
                                   "status": "new",
                                   "parameters": new_params,
                                   "parent": self.parameters['job_id'],
                                   "type": "node"
                                   }
                        _job_id = str(uuid.uuid4())
                        cluster_nodes.append(_job_id)
                        NS.tendrl.objects.Job(
                            job_id=_job_id,
                            status="new",
                            payload=payload
                        ).save()
                        logger.log(
                            "info",
                            NS.publisher_id,
                            {"message": "ImportCluster %s (jobID: %s) :"
                                        "importing host %s" %
                             (_cluster.short_name, _job_id, node)},
                            job_id=self.parameters['job_id'],
                            flow_id=self.parameters['flow_id']
                        )
            # Check if minimum required version of underlying gluster
            # cluster met. If not fail the import task
            # A sample output from "rpm -qa | grep glusterfs-server"
            # looks as below
            # `glusterfs-server-3.8.4-54.4.el7rhgs.x86_64`
            # In case of upstream build the format could be as below
            # `glusterfs-server-4.1dev-0.203.gitc3e1a2e.el7.centos.x86_64`
            # `glusterfs-server-3.12.8-0.0.el7.centos.x86_64.rpm`
            cmd = subprocess.Popen(
                'rpm -q glusterfs-server',
                shell=True,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
            )
            out, err = cmd.communicate()
            if out in [None, ""] or err:
                logger.log(
                    "error",
                    NS.publisher_id,
                    {"message": "Failed to detect underlying cluster version"},
                    job_id=self.parameters['job_id'],
                    flow_id=self.parameters['flow_id']
                )
                return False
            lines = out.split('\n')
            build_no = None
            req_build_no = None
            ver_det = lines[0].split('glusterfs-server-')[-1].split('.')
            maj_ver = ver_det[0]
            min_ver = ver_det[1]
            if 'dev' in min_ver:
                min_ver = min_ver[0]
            rel = ver_det[2]
            if '-' in rel:
                build_no = rel.split('-')[-1]
                rel = rel.split('-')[0]
            reqd_gluster_ver = NS.compiled_definitions.get_parsed_defs()[
                'namespace.tendrl'
            ]['min_reqd_gluster_ver']
            req_maj_ver, req_min_ver, req_rel = reqd_gluster_ver.split('.')
            if '-' in req_rel:
                req_build_no = req_rel.split('-')[-1]
                req_rel = req_rel.split('-')[0]
            logger.log(
                "info",
                NS.publisher_id,
                {"message": "Checking minimum required version ("
                            "%s.%s.%s) of Gluster Storage" %
                 (req_maj_ver, req_min_ver, req_rel)},
                job_id=self.parameters['job_id'],
                flow_id=self.parameters['flow_id']
            )
            ver_check_failed = False
            if int(maj_ver) < int(req_maj_ver):
                ver_check_failed = True
            else:
                if int(maj_ver) == int(req_maj_ver):
                    if int(min_ver) < int(req_min_ver):
                        ver_check_failed = True
                    else:
                        if int(min_ver) == int(req_min_ver):
                            if int(rel) < int(req_rel):
                                ver_check_failed = True
                            else:
                                if int(rel) == int(req_rel):
                                    if build_no is not None and \
                                        req_build_no is not None and \
                                        int(build_no) < int(req_build_no):
                                        ver_check_failed = True
            if ver_check_failed:
                logger.log(
                    "error",
                    NS.publisher_id,
                    {"message": "Error: Minimum required version "
                                "(%s.%s.%s) "
                     "doesnt match that of detected Gluster "
                                "Storage (%s.%s.%s)" %
                     (req_maj_ver, req_min_ver, req_rel,
                      maj_ver, min_ver, 0)},
                    job_id=self.parameters['job_id'],
                    flow_id=self.parameters['flow_id']
                )
                return False

            ret_val, err = import_gluster(self.parameters)
            if not ret_val:
                logger.log(
                    "error",
                    NS.publisher_id,
                    {"message": "Error importing the cluster (integration_id:"
                                " %s). Error: %s" % (integration_id, err)
                     },
                    job_id=self.parameters['job_id'],
                    flow_id=self.parameters['flow_id']
                )
                return False

            if len(node_list) > 1:
                # find number of volumes in a cluster
                cmd = cmd_utils.Command('gluster volume list')
                out, err, rc = cmd.run()
                # default intervel is 8 min
                # 5 sec sleep for one count increment (480 / 5)
                wait_count = 96
                if not err:
                    volumes = out.split("\n")
                    # 15 sec for each volume
                    wait_count = wait_count + (len(volumes) * 3)
                logger.log(
                    "info",
                    NS.publisher_id,
                    {"message": "ImportCluster %s waiting for hosts %s "
                        "to be imported" % (_cluster.short_name, node_list)},
                    job_id=self.parameters['job_id'],
                    flow_id=self.parameters['flow_id']
                )
                loop_count = 0
                # Wait for (no of nodes) * 6 minutes for import to complete
                while True:
                    child_jobs_failed = []
                    parent_job = NS.tendrl.objects.Job(
                        job_id=self.parameters['job_id']
                    ).load()
                    if loop_count >= wait_count:
                        logger.log(
                            "error",
                            NS.publisher_id,
                            {"message": "Import jobs on cluster(%s) not yet "
                             "complete on all nodes(%s). Timing out." %
                             (_cluster.short_name, str(node_list))},
                            job_id=self.parameters['job_id'],
                            flow_id=self.parameters['flow_id']
                        )
                        # Marking child jobs as failed which did not complete
                        # as the parent job has timed out. This has to be done
                        # explicitly because these jobs will still be processed
                        # by the node-agent, and will keep it busy, which might
                        # defer the new jobs or lead to their timeout.
                        for child_job_id in parent_job.children:
                            child_job = NS.tendrl.objects.Job(
                                job_id=child_job_id
                            ).load()
                            if child_job.status not in ["finished", "failed"]:
                                child_job.status = "failed"
                                child_job.save()
                        return False
                    time.sleep(10)
                    completed = True
                    for child_job_id in parent_job.children:
                        child_job = NS.tendrl.objects.Job(
                            job_id=child_job_id
                        ).load()
                        if child_job.status not in ["finished", "failed"]:
                            completed = False
                        elif child_job.status == "failed":
                            child_jobs_failed.append(child_job.job_id)
                    if completed:
                        break
                    else:
                        loop_count += 1
                        continue
                if len(child_jobs_failed) > 0:
                    _msg = "Child jobs failed are %s" % child_jobs_failed
                    logger.log(
                        "error",
                        NS.publisher_id,
                        {"message": _msg},
                        job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id']
                    )
                    return False
        except Exception as ex:
            # For traceback
            Event(
                ExceptionMessage(
                    priority="error",
                    publisher=NS.publisher_id,
                    payload={
                        "message": ex.message,
                        "exception": ex
                    }
                )
            )
            # raising exception to mark job as failed
            raise ex
        finally:
            # release lock
            flow_utils.release_node_lock(self.parameters)

        return True
예제 #6
0
    def run(self):
        try:
            # Lock nodes
            flow_utils.acquire_node_lock(self.parameters)
            integration_id = self.parameters['TendrlContext.integration_id']
            if integration_id is None:
                raise FlowExecutionFailedError(
                    "TendrlContext.integration_id cannot be empty"
                )

            supported_sds = NS.compiled_definitions.get_parsed_defs()[
                'namespace.tendrl']['supported_sds']
            sds_name = self.parameters["TendrlContext.sds_name"]
            if sds_name not in supported_sds:
                raise FlowExecutionFailedError("SDS (%s) not supported" %
                                               sds_name)

            ssh_job_ids = []
            ssh_job_ids = \
                flow_utils.gluster_create_ssh_setup_jobs(
                    self.parameters,
                    skip_current_node=True
                )

            while True:
                time.sleep(3)
                all_status = {}
                for job_id in ssh_job_ids:
                    job = NS.tendrl.objects.Job(job_id=job_id).load()
                    all_status[job_id] = job.status

                _failed = {_jid: status for _jid, status in
                           all_status.iteritems() if status == "failed"}
                if _failed:
                    raise FlowExecutionFailedError(
                        "SSH setup failed for jobs %s cluster %s" % (str(
                            _failed), integration_id))
                if all([status == "finished" for status in
                        all_status.values()]):
                    logger.log(
                        "info",
                        NS.publisher_id,
                        {"message": "SSH setup completed for all "
                         "nodes in cluster %s" % integration_id},
                        job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id']
                    )

                    break

            # SSH setup jobs finished above, now install sds
            # bits and create cluster
            logger.log(
                "info",
                NS.publisher_id,
                {"message": "Expanding Gluster Storage"
                 " Cluster %s" % integration_id},
                job_id=self.parameters['job_id'],
                flow_id=self.parameters['flow_id']
            )
            gluster_help.expand_gluster(self.parameters)
            logger.log(
                "info",
                NS.publisher_id,
                {"message": "SDS install/config completed on newly "
                 "expanded nodes, Please wait while "
                 "tendrl-node-agents detect sds details on the newly "
                 "expanded nodes %s" % self.parameters['Node[]']},
                job_id=self.parameters['job_id'],
                flow_id=self.parameters['flow_id']
            )

            # Wait till detected cluster in populated for nodes
            while True:
                time.sleep(3)
                all_status = []
                detected_cluster = ""
                different_cluster_id = False
                dc = ""
                for node in self.parameters['Node[]']:
                    try:
                        dc = NS.tendrl.objects.DetectedCluster(
                            node_id=node
                        ).load()
                        if not detected_cluster:
                            detected_cluster = dc.detected_cluster_id
                        else:
                            if detected_cluster != dc.detected_cluster_id:
                                all_status.append(False)
                                different_cluster_id = True
                                break
                        all_status.append(True)
                    except etcd.EtcdKeyNotFound:
                        all_status.append(False)
                if different_cluster_id:
                    raise FlowExecutionFailedError(
                        "Seeing different detected cluster id in"
                        " different nodes. %s and %s" % (
                            detected_cluster, dc.detected_cluster_id)
                    )

                if all_status:
                    if all(all_status):
                        break

            # Create the params list for import cluster flow
            new_params = dict()
            new_params['Node[]'] = self.parameters['Node[]']
            new_params['TendrlContext.integration_id'] = integration_id

            # Get node context for one of the nodes from list
            dc = NS.tendrl.objects.DetectedCluster(
                node_id=self.parameters['Node[]'][0]
            ).load()
            sds_pkg_name = dc.sds_pkg_name
            new_params['import_after_expand'] = True
            sds_pkg_version = dc.sds_pkg_version
            new_params['DetectedCluster.sds_pkg_name'] = \
                sds_pkg_name
            new_params['DetectedCluster.sds_pkg_version'] = \
                sds_pkg_version

            tags = []
            for node in self.parameters['Node[]']:
                tags.append("tendrl/node_%s" % node)
            payload = {
                "tags": tags,
                "run": "tendrl.flows.ImportCluster",
                "status": "new",
                "parameters": new_params,
                "parent": self.parameters['job_id'],
                "type": "node"
            }
            _job_id = str(uuid.uuid4())
            # release lock before import cluster
            flow_utils.release_node_lock(self.parameters)

            NS.tendrl.objects.Job(
                job_id=_job_id,
                status="new",
                payload=payload
            ).save()
            logger.log(
                "info",
                NS.publisher_id,
                {"message": "Please wait while Tendrl imports ("
                            "job_id: %s) newly expanded "
                 "%s storage nodes in cluster %s" % (
                     _job_id,
                     sds_pkg_name,
                     NS.tendrl.objects.Cluster(
                            integration_id=integration_id
                     ).load().short_name)},
                job_id=self.parameters['job_id'],
                flow_id=self.parameters['flow_id']
            )
        except Exception as ex:
            Event(
                ExceptionMessage(
                    priority="error",
                    publisher=NS.publisher_id,
                    payload={"message": ex.message,
                             "exception": ex
                             }
                )
            )
            # raising exception to mark job as failed
            raise ex
        finally:
            # release lock if any exception came
            flow_utils.release_node_lock(self.parameters)
예제 #7
0
    def run(self):
        try:
            integration_id = self.parameters['TendrlContext.integration_id']

            # Lock nodes
            flow_utils.acquire_node_lock(self.parameters)
            NS.tendrl_context = NS.tendrl_context.load()

            # TODO(team) when Tendrl supports create/expand/shrink cluster
            # setup passwordless ssh for all gluster nodes with given
            # integration_id (check
            # /indexes/tags/tendrl/integration/$integration_id for list of
            # nodes in cluster

            node_list = self.parameters['Node[]']
            cluster_nodes = []
            if len(node_list) > 1:
                # This is the master node for this flow
                for node in node_list:
                    if NS.node_context.node_id != node:
                        new_params = self.parameters.copy()
                        new_params['Node[]'] = [node]
                        # create same flow for each node in node list except
                        #  $this
                        payload = {
                            "tags": ["tendrl/node_%s" % node],
                            "run": "tendrl.flows.ImportCluster",
                            "status": "new",
                            "parameters": new_params,
                            "parent": self.parameters['job_id'],
                            "type": "node"
                        }
                        _job_id = str(uuid.uuid4())
                        cluster_nodes.append(_job_id)
                        NS.tendrl.objects.Job(job_id=_job_id,
                                              status="new",
                                              payload=payload).save()
                        logger.log("info",
                                   NS.publisher_id, {
                                       "message":
                                       "Importing (job: %s) Node %s "
                                       "to cluster %s" %
                                       (_job_id, node, integration_id)
                                   },
                                   job_id=self.parameters['job_id'],
                                   flow_id=self.parameters['flow_id'])
            # Check if minimum required version of underlying gluster
            # cluster met. If not fail the import task
            cluster_ver = \
                NS.tendrl_context.sds_version.split('.')
            maj_ver = cluster_ver[0]
            min_ver = re.findall(r'\d+', cluster_ver[1])[0]
            reqd_gluster_ver = NS.compiled_definitions.get_parsed_defs(
            )['namespace.tendrl']['min_reqd_gluster_ver']
            req_maj_ver, req_min_ver, req_rel = reqd_gluster_ver.split('.')
            logger.log("info",
                       NS.publisher_id, {
                           "message":
                           "Check: Minimum required version ("
                           "%s.%s.%s) of Gluster Storage" %
                           (req_maj_ver, req_min_ver, req_rel)
                       },
                       job_id=self.parameters['job_id'],
                       flow_id=self.parameters['flow_id'])
            ver_check_failed = False
            if int(maj_ver) < int(req_maj_ver):
                ver_check_failed = True
            else:
                if int(maj_ver) == int(req_maj_ver) and \
                        int(min_ver) < int(req_min_ver):
                    ver_check_failed = True

            if ver_check_failed:
                logger.log("error",
                           NS.publisher_id, {
                               "message":
                               "Error: Minimum required version "
                               "(%s.%s.%s) "
                               "doesnt match that of detected Gluster "
                               "Storage (%s.%s.%s)" %
                               (req_maj_ver, req_min_ver, req_rel, maj_ver,
                                min_ver, 0)
                           },
                           job_id=self.parameters['job_id'],
                           flow_id=self.parameters['flow_id'])

                raise AtomExecutionFailedError(
                    "Detected gluster version: %s"
                    " is lesser than required version: %s" %
                    (NS.tendrl_context.sds_version, reqd_gluster_ver))
            ret_val, err = import_gluster(self.parameters)
            if not ret_val:
                raise AtomExecutionFailedError(
                    "Error importing the cluster (integration_id: %s). "
                    "Error: %s" % (integration_id, err))

            if len(node_list) > 1:
                logger.log("info",
                           NS.publisher_id, {
                               "message":
                               "Waiting for participant nodes %s to "
                               "be "
                               "imported %s" % (node_list, integration_id)
                           },
                           job_id=self.parameters['job_id'],
                           flow_id=self.parameters['flow_id'])
                loop_count = 0
                # Wait for (no of nodes) * 6 minutes for import to complete
                wait_count = (len(node_list) - 1) * 36
                while True:
                    parent_job = NS.tendrl.objects.Job(
                        job_id=self.parameters['job_id']).load()
                    if loop_count >= wait_count:
                        logger.log("info",
                                   NS.publisher_id, {
                                       "message":
                                       "Import jobs not yet complete "
                                       "on all nodes. Timing out. (%s, %s)" %
                                       (str(node_list), integration_id)
                                   },
                                   job_id=self.parameters['job_id'],
                                   flow_id=self.parameters['flow_id'])
                        return False
                    time.sleep(10)
                    finished = True
                    for child_job_id in parent_job.children:
                        child_job = NS.tendrl.objects.Job(
                            job_id=child_job_id).load()
                        if child_job.status != "finished":
                            finished = False
                            break
                    if finished:
                        break
                    else:
                        loop_count += 1
                        continue

        except Exception as ex:
            # For traceback
            Event(
                ExceptionMessage(priority="error",
                                 publisher=NS.publisher_id,
                                 payload={
                                     "message": ex.message,
                                     "exception": ex
                                 }))
            # raising exception to mark job as failed
            raise ex
        finally:
            # release lock
            flow_utils.release_node_lock(self.parameters)

        return True