예제 #1
0
def load(*args):
    parent = Job(job_id="Test Parent Job Id", status="Active",
                 payload=maps.NamedDict())
    if args[0]:
        parent.errors = "Error Message"
        parent.children = ["Test_child"]
    return parent
예제 #2
0
    def run(self):
        # Generate the journal mapping for the nodes
        mapping = utils.generate_journal_mapping(
            self.parameters['Cluster.node_configuration'],
            integration_id=self.parameters.get("TendrlContext.integration_id"))

        # Update output dict
        job = Job(job_id=self.job_id).load()
        job.output[self.__class__.__name__] = json.dumps(mapping)
        job.save()
예제 #3
0
    def run(self):
        integration_id = self.parameters['TendrlContext.integration_id']
        if "provisioner/%s" % integration_id not in NS.node_context.tags:
            return True

        _job_id = str(uuid.uuid4())
        payload = {
            "tags": ["tendrl/integration/monitoring"],
            "run": "monitoring.flows.SetupClusterAlias",
            "status": "new",
            "parameters": self.parameters,
            "type": "monitoring",
            "parent": self.parameters['job_id']
        }
        Job(job_id=_job_id, status="new", payload=payload).save()

        # Wait for 2 mins for the job to complete
        loop_count = 0
        wait_count = 24
        while True:
            child_job_failed = False
            if loop_count >= wait_count:
                logger.log(
                    "error",
                    NS.publisher_id,
                    {
                        "message":
                        "Setting up cluster alias"
                        "not yet complete. Timing out. (%s)" % integration_id
                    },
                    job_id=self.parameters['job_id'],
                    flow_id=self.parameters['flow_id'],
                )
                return False
            time.sleep(5)
            finished = True
            job = Job(job_id=_job_id).load()
            if job.status not in ["finished", "failed"]:
                finished = False
            elif job.status == "failed":
                child_job_failed = True
            if finished:
                break
            else:
                loop_count += 1
                continue
        if child_job_failed:
            _msg = "Child job setting up cluster alias " \
                   "failed %s" % _job_id
            logger.log("error",
                       NS.publisher_id, {"message": _msg},
                       job_id=self.parameters['job_id'],
                       flow_id=self.parameters['flow_id'])
            return False
        return True
예제 #4
0
def test_constructor():
    '''Testing for constructor involves checking if all needed

    variables are declared initialized
    '''
    job = Job()
    assert job.job_id is None
    # Passing Dummy Values
    job = Job(job_id="Test job id", payload="Test Payload",
              status=True, errors=None, children=None,
              locked_by=None, output="Job Done")
    assert job.output == "Job Done"
예제 #5
0
def initiate_config_generation(node_det):
    try:
        job_params = {
            'node_ids': [node_det.get('node_id')],
            "run": 'node_monitoring.flows.ConfigureCollectd',
            'type': 'monitoring',
            "parameters": {
                'plugin_name':
                node_det['plugin'],
                'plugin_conf_params':
                json.dumps(node_det['plugin_conf']).encode('utf-8'),
                'Node.fqdn':
                node_det['fqdn'],
                'Service.name':
                'collectd',
            },
        }
        Job(
            job_id=str(uuid.uuid4()),
            status='new',
            payload=job_params,
        ).save()
    except (EtcdException, EtcdConnectionFailed, Exception) as ex:
        raise TendrlPerformanceMonitoringException(
            'Failed to intiate monitoring configuration for plugin \
            %s on %s with parameters %s.Error %s' %
            (node_det['plugin'], node_det['fqdn'],
             json.dumps(node_det['plugin_conf']), str(ex)))
예제 #6
0
def test_gluster_create_ssh_setup_jobs_fails2():
    testParams = maps.NamedDict()
    testParams['Node[]'] = []
    testParams["job_id"] = "test_id"
    testParams["flow_id"] = "test_id"
    with patch.object(etcd, "Client", return_value=etcd.Client()) as client:
        obj = importlib.import_module("tendrl.commons.tests.fixtures.client")
        NS._int.wclient = obj.Client()
        NS._int.client = client
        NS._int.watchers = dict()
        NS.tendrl = maps.NamedDict()
        NS.tendrl.objects = maps.NamedDict()
        NS.tendrl.objects.Job = Job(job_id=1,
                                    status="",
                                    payload=maps.NamedDict()).save()
        NS.gluster_provisioner = importlib.import_module(
            "tendrl.commons.tests.fixtures.plugin").Plugin()
        with patch.object(NS.gluster_provisioner,
                          'setup',
                          return_value=["", ""]):
            with patch.object(authorize_key,
                              'AuthorizeKey',
                              return_value=MockKey()):
                with pytest.raises(FlowExecutionFailedError):
                    utils.gluster_create_ssh_setup_jobs(
                        testParams, skip_current_node=False)
예제 #7
0
def acquire_node_lock(parameters):
    # check node_id is present
    for node in parameters['Node[]']:
        try:
            NS._int.client.read("/nodes/%s" % node)
        except EtcdKeyNotFound:
            raise FlowExecutionFailedError(
                "Unknown Node %s, cannot lock" %
                node)
    # check job is parent or child
    job = Job(job_id=parameters['job_id']).load()
    p_job_id = None
    if "parent" in job.payload:
        p_job_id = job.payload['parent']

    for node in parameters['Node[]']:
        key = "/nodes/%s/locked_by" % node
        try:
            lock_owner_job = NS._int.client.read(key).value
            # If the parent job has aquired lock on participating nodes,
            # dont you worry child job :)
            if p_job_id == lock_owner_job:
                continue
            else:
                raise FlowExecutionFailedError("Cannot proceed further, "
                                               "Node (%s) is already locked "
                                               "by Job (%s)" % (node,
                                                                lock_owner_job)
                                               )
        except EtcdKeyNotFound:
            # To check what are all the nodes are already locked
            continue

    for node in parameters['Node[]']:
        try:
            lock_owner_job = NS._int.client.read(key).value
            if p_job_id == lock_owner_job:
                continue
        except EtcdKeyNotFound:
            lock_owner_job = str(parameters["job_id"])
            key = "nodes/%s/locked_by" % node
            NS._int.client.write(key, lock_owner_job)
            Event(
                Message(
                    job_id=parameters['job_id'],
                    flow_id=parameters['flow_id'],
                    priority="info",
                    publisher=NS.publisher_id,
                    payload={
                        "message": "Acquired lock (%s) for Node (%s)" % (
                            lock_owner_job, node)
                    }
                )
            )
예제 #8
0
 def update_brick_status(self, fqdn, integration_id, status):
     _job_id = str(uuid.uuid4())
     _params = {
         "TendrlContext.integration_id": integration_id,
         "Node.fqdn": fqdn,
         "Brick.status": status
     }
     _job_payload = {
         "tags": ["tendrl/integration/{0}".format(integration_id)],
         "run": "gluster.flows.UpdateBrickStatus",
         "status": "new",
         "parameters": _params,
         "type": "sds"
     }
     Job(job_id=_job_id, status="new", payload=_job_payload).save()
예제 #9
0
def run():
    try:
        nodes = NS._int.client.read("/nodes")
    except etcd.EtcdKeyNotFound:
        return

    for node in nodes.leaves:
        node_id = node.key.split('/')[-1]
        try:
            NS._int.wclient.write(
                "/nodes/{0}/NodeContext/status".format(node_id),
                "DOWN",
                prevExist=False)
            _node_context = NS.tendrl.objects.NodeContext(
                node_id=node_id).load()
            _tc = NS.tendrl.objects.TendrlContext(node_id=node_id).load()
            _cluster = NS.tendrl.objects.Cluster(
                integration_id=_tc.integration_id).load()

            # Remove stale provisioner tag
            if _cluster.is_managed == "yes":
                _tag = "provisioner/%s" % _cluster.integration_id
                if _tag in _node_context.tags:
                    _index_key = "/indexes/tags/%s" % _tag
                    _node_context.tags.remove(_tag)
                    _node_context.save()
                    etcd_utils.delete(_index_key)
                    _msg = "node_sync, STALE provisioner node found! re-configuring monitoring (job-id: %s) on this node"
                    payload = {
                        "tags": ["tendrl/node_%s" % node_id],
                        "run": "tendrl.flows.ConfigureMonitoring",
                        "status": "new",
                        "parameters": {
                            'TendrlContext.integration_id': _tc.integration_id
                        },
                        "type": "node"
                    }
                    _job_id = str(uuid.uuid4())
                    Job(job_id=_job_id, status="new", payload=payload).save()
                    Event(
                        Message(priority="debug",
                                publisher=NS.publisher_id,
                                payload={"message": _msg % _job_id}))

        except etcd.EtcdAlreadyExist:
            pass
    return
예제 #10
0
def initiate_config_generation(node_det):
    try:
        plugin = NodeMonitoringPlugin(plugin_name=node_det['plugin'],
                                      node_id=node_det.get('node_id'))
        if plugin.exists():
            # More powers like fixed retrials can be added here.This is common
            # point through which all monitoring plugin configuration jobs land
            # into etcd and hence any action here is reflected to all of them.
            return
        job_params = {
            'tags': ["tendrl/node_%s" % node_det.get('node_id')],
            "run": 'node_monitoring.flows.ConfigureCollectd',
            'type': 'monitoring',
            "parameters": {
                'plugin_name':
                node_det['plugin'],
                'plugin_conf_params':
                json.dumps(node_det['plugin_conf']).encode('utf-8'),
                'Node.fqdn':
                node_det['fqdn'],
                'Service.name':
                'collectd',
            },
        }
        job_id = str(uuid.uuid4())
        Job(
            job_id=job_id,
            status='new',
            payload=job_params,
        ).save()
        NodeMonitoringPlugin(plugin_name=node_det['plugin'],
                             node_id=node_det.get('node_id'),
                             job_id=job_id).save(update=False)
    except (EtcdException, AttributeError) as ex:
        Event(
            ExceptionMessage(
                priority="debug",
                publisher=NS.publisher_id,
                payload={
                    "message":
                    'Failed to intiate monitoring configuration for'
                    ' plugin %s on %s with parameters %s.' %
                    (node_det['plugin'], node_det['fqdn'],
                     json.dumps(node_det['plugin_conf'])),
                    "exception":
                    ex
                }))
예제 #11
0
def initiate_config_generation(node_det):
    try:
        plugin = NodeMonitoringPlugin(
            plugin_name=node_det['plugin'],
            node_id=node_det.get('node_id')
        )
        if plugin.exists():
            # More powers like fixed retrials can be added here.This is common
            # point through which all monitoring plugin configuration jobs land
            # into etcd and hence any action here is reflected to all of them.
            return
        job_params = {
            'node_ids': [node_det.get('node_id')],
            "run": 'node_monitoring.flows.ConfigureCollectd',
            'type': 'monitoring',
            "parameters": {
                'plugin_name': node_det['plugin'],
                'plugin_conf_params': json.dumps(
                    node_det['plugin_conf']
                ).encode('utf-8'),
                'Node.fqdn': node_det['fqdn'],
                'Service.name': 'collectd',
            },
        }
        job_id = str(uuid.uuid4())
        Job(
            job_id=job_id,
            status='new',
            payload=job_params,
        ).save()
        NodeMonitoringPlugin(
            plugin_name=node_det['plugin'],
            node_id=node_det.get('node_id'),
            job_id=job_id
        ).save(update=False)
    except (EtcdException, EtcdConnectionFailed, Exception) as ex:
        raise TendrlPerformanceMonitoringException(
            'Failed to intiate monitoring configuration for plugin \
            %s on %s with parameters %s.Error %s' % (
                node_det['plugin'],
                node_det['fqdn'],
                json.dumps(node_det['plugin_conf']),
                str(ex)
            )
        )
예제 #12
0
def update_dashboard(res_name, res_type, integration_id, action):
    _job_id = str(uuid.uuid4())
    _params = {
        "TendrlContext.integration_id": NS.tendrl_context.integration_id,
        "TendrlContext.cluster_name": NS.tendrl_context.cluster_name,
        "Trigger.resource_name": res_name,
        "Trigger.resource_type": res_type,
        "Trigger.action": action
    }
    _job_payload = {
        "tags": ["tendrl/integration/monitoring"],
        "run": "monitoring.flows.UpdateDashboard",
        "status": "new",
        "parameters": _params,
        "type": "monitoring"
    }
    Job(job_id=_job_id, status="new", payload=_job_payload).save()

    return _job_id
예제 #13
0
def ceph_create_ssh_setup_jobs(parameters):
    node_list = parameters['Node[]']
    ssh_job_ids = []
    ssh_setup_script = NS.ceph_provisioner.get_plugin().setup()
    if len(node_list) > 0:
        for node in node_list:
            if NS.node_context.node_id != node:
                new_params = parameters.copy()
                new_params['Node[]'] = [node]
                new_params['ssh_setup_script'] = ssh_setup_script
                # create same flow for each node in node list except $this
                payload = {
                    "tags": ["tendrl/node_%s" % node],
                    "run": "tendrl.flows.SetupSsh",
                    "status": "new",
                    "parameters": new_params,
                    "parent": parameters['job_id'],
                    "type": "node"
                }
                _job_id = str(uuid.uuid4())
                Job(job_id=_job_id,
                    status="new",
                    payload=payload).save()
                ssh_job_ids.append(_job_id)
                Event(
                    Message(
                        job_id=parameters['job_id'],
                        flow_id=parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={"message": "Created SSH setup job %s for node"
                                            " %s" % (_job_id, node)
                                 }
                    )
                )
    return ssh_job_ids
예제 #14
0
def test_save(mock_save):
    job = Job()
    payload = maps.NamedDict()
    payload['parent'] = "Test Parent Job Id"
    job.payload = payload
    with patch.object(objects.BaseObject, 'load') as mock_load:
        mock_load.return_value = load(False)
        job.status = "true"
        job.save()
    with patch.object(objects.BaseObject, 'load') as mock_load:
        mock_load.return_value = load(False)
        job.status = "failed"
        job.save()
    with patch.object(objects.BaseObject, 'load') as mock_load:
        mock_load.return_value = load(True)
        job.status = "failed"
        job.save()
예제 #15
0
def test_render():
    job = Job()
    assert job.render() is not None
예제 #16
0
def test_save(mock_save):
    job = Job()
    payload = maps.NamedDict()
    payload['parent'] = "Test Parent Job Id"
    job.payload = payload
    with patch.object(objects.BaseObject, 'load') as mock_load:
        mock_load.return_value = load(False)
        job.status = "true"
        job.save()
    with patch.object(objects.BaseObject, 'load') as mock_load:
        mock_load.return_value = load(False)
        job.status = "failed"
        job.save()
    with patch.object(objects.BaseObject, 'load') as mock_load:
        mock_load.return_value = load(True)
        job.status = "failed"
        job.save()
예제 #17
0
def test_render():
    job = Job()
    assert job.render() is not None
예제 #18
0
def process_job(job):
    jid = job.key.split('/')[-1]
    job_status_key = "/queue/%s/status" % jid
    job_lock_key = "/queue/%s/locked_by" % jid
    NS.node_context = NS.node_context.load()
    # Check job not already locked by some agent
    try:
        _locked_by = NS._int.client.read(job_lock_key).value
        if _locked_by:
            return
    except etcd.EtcdKeyNotFound:
        pass

    # Check job not already "finished", or "processing"
    try:
        _status = NS._int.client.read(job_status_key).value
        if _status in ["finished", "processing"]:
            return
    except etcd.EtcdKeyNotFound:
        pass

    # tendrl-node-agent tagged as tendrl/monitor will ensure
    # >10 min old "new" jobs are timed out and marked as
    # "failed" (the parent job of these jobs will also be
    # marked as "failed")
    if "tendrl/monitor" in NS.node_context.tags:
        _job_valid_until_key = "/queue/%s/valid_until" % jid
        _valid_until = None
        try:
            _valid_until = NS._int.client.read(_job_valid_until_key).value
        except etcd.EtcdKeyNotFound:
            pass

        if _valid_until:
            _now_epoch = (time_utils.now() - datetime.datetime(
                1970, 1, 1).replace(tzinfo=utc)).total_seconds()
            if int(_now_epoch) >= int(_valid_until):
                # Job has "new" status since 10 minutes,
                # mark status as "failed" and Job.error =
                # "Timed out"
                try:
                    NS._int.wclient.write(job_status_key,
                                          "failed",
                                          prevValue="new")
                except etcd.EtcdCompareFailed:
                    pass
                else:
                    job = Job(job_id=jid).load()
                    _msg = str("Timed-out (>10min as 'new')")
                    job.errors = _msg
                    job.save()
                    return
        else:
            _now_plus_10 = time_utils.now() + datetime.timedelta(minutes=10)
            _epoch_start = datetime.datetime(1970, 1, 1).replace(tzinfo=utc)

            # noinspection PyTypeChecker
            _now_plus_10_epoch = (_now_plus_10 - _epoch_start).total_seconds()
            NS._int.wclient.write(_job_valid_until_key,
                                  int(_now_plus_10_epoch))

    job = Job(job_id=jid).load()
    if job.payload["type"] == NS.type and \
            job.status == "new":
        # Job routing
        # Flows created by tendrl-api use 'tags' from flow
        # definition to target jobs
        _tag_match = False
        if job.payload.get("tags", []):
            for flow_tag in job.payload['tags']:
                if flow_tag in NS.node_context.tags:
                    _tag_match = True

        if not _tag_match:
            _job_tags = ", ".join(job.payload.get("tags", []))
            _msg = "Node (%s)(type: %s)(tags: %s) will not " \
                   "process job-%s (tags: %s)" % \
                   (NS.node_context.node_id, NS.type,
                    NS.node_context.tags, jid,
                    _job_tags)
            Event(
                Message(priority="info",
                        publisher=NS.publisher_id,
                        payload={"message": _msg}))
            return

        job_status_key = "/queue/%s/status" % job.job_id
        job_lock_key = "/queue/%s/locked_by" % job.job_id
        try:
            lock_info = dict(node_id=NS.node_context.node_id,
                             fqdn=NS.node_context.fqdn,
                             tags=NS.node_context.tags,
                             type=NS.type)
            NS._int.wclient.write(job_lock_key, json.dumps(lock_info))
            NS._int.wclient.write(job_status_key,
                                  "processing",
                                  prevValue="new")
        except etcd.EtcdCompareFailed:
            # job is already being processed by some tendrl
            # agent
            return

        the_flow = None
        try:
            current_ns, flow_name, obj_name = \
                _extract_fqdn(job.payload['run'])

            if obj_name:
                runnable_flow = current_ns.ns.get_obj_flow(obj_name, flow_name)
            else:
                runnable_flow = current_ns.ns.get_flow(flow_name)

            the_flow = runnable_flow(parameters=job.payload['parameters'],
                                     job_id=job.job_id)
            Event(
                Message(job_id=job.job_id,
                        flow_id=the_flow.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={"message": "Processing Job %s" % job.job_id}))

            Event(
                Message(job_id=job.job_id,
                        flow_id=the_flow.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message": "Running Flow %s" % job.payload['run']
                        }))
            the_flow.run()
            try:
                NS._int.wclient.write(job_status_key,
                                      "finished",
                                      prevValue="processing")
            except etcd.EtcdCompareFailed:
                # This should not happen!
                _msg = "Cannot mark job as 'finished', " \
                       "current job status invalid"
                raise FlowExecutionFailedError(_msg)

            Event(
                Message(job_id=job.job_id,
                        flow_id=the_flow.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message":
                            "Job (%s):  Finished "
                            "Flow %s" % (job.job_id, job.payload['run'])
                        }))
        except (FlowExecutionFailedError, AtomExecutionFailedError,
                Exception) as e:
            _trace = str(traceback.format_exc(e))
            _msg = "Failure in Job %s Flow %s with error:" % \
                   (job.job_id, job.payload['run'])
            Event(
                ExceptionMessage(priority="error",
                                 publisher=NS.publisher_id,
                                 payload={
                                     "message": _msg + _trace,
                                     "exception": e
                                 }))
            if the_flow:
                Event(
                    Message(job_id=job.job_id,
                            flow_id=the_flow.parameters['flow_id'],
                            priority="error",
                            publisher=NS.publisher_id,
                            payload={"message": _msg + "\n" + _trace}))
            else:
                Event(
                    Message(priority="error",
                            publisher=NS.publisher_id,
                            payload={"message": _msg + "\n" + _trace}))

            try:
                NS._int.wclient.write(job_status_key,
                                      "failed",
                                      prevValue="processing")
            except etcd.EtcdCompareFailed:
                # This should not happen!
                _msg = "Cannot mark job as 'failed', current" \
                       "job status invalid"
                raise FlowExecutionFailedError(_msg)
            else:
                job = job.load()
                job.errors = _trace
                job.save()
예제 #19
0
def load_job_new(*args):
    return Job(job_id="uuid", status='new')
예제 #20
0
def gluster_create_ssh_setup_jobs(parameters, skip_current_node=False):
    node_list = copy.deepcopy(parameters['Node[]'])

    ssh_job_ids = []
    ssh_key, err = NS.gluster_provisioner.get_plugin().setup()
    if err != "":
        _msg = "Error generating ssh key on node %s" % NS.node_context.node_id
        Event(
            Message(
                job_id=parameters['job_id'],
                flow_id=parameters['flow_id'],
                priority="error",
                publisher=NS.publisher_id,
                payload={"message": _msg
                         }
            )
        )
        raise FlowExecutionFailedError(_msg)

    if not skip_current_node:
        ret_val, err = authorize_key.AuthorizeKey(ssh_key).run()
        if ret_val is not True or err != "":
            _msg = "Error adding authorized key for node %s" % \
                   NS.node_context.node_id
            Event(
                Message(
                    job_id=parameters['job_id'],
                    flow_id=parameters['flow_id'],
                    priority="error",
                    publisher=NS.publisher_id,
                    payload={
                        "message": _msg
                    }
                )
            )
            raise FlowExecutionFailedError(_msg)
        node_list.remove(NS.node_context.node_id)

    for node in node_list:
        if node == NS.node_context.node_id:
            continue
        new_params = parameters.copy()
        new_params['Node[]'] = [node]
        new_params['ssh_key'] = ssh_key
        # Create same flow for each node from list except this one
        payload = {
            "tags": ["tendrl/node_%s" % node],
            "run": "tendrl.flows.AuthorizeSshKey",
            "status": "new",
            "parameters": new_params,
            "parent": parameters['job_id'],
            "type": "node"
        }
        _job_id = str(uuid.uuid4())
        Job(
            job_id=_job_id,
            status="new",
            payload=payload
        ).save()
        ssh_job_ids.append(_job_id)
        Event(
            Message(
                job_id=parameters['job_id'],
                flow_id=parameters['flow_id'],
                priority="info",
                publisher=NS.publisher_id,
                payload={"message": "Created SSH setup job %s for node %s" %
                                    (_job_id, node)
                         }
            )
        )
    return ssh_job_ids
예제 #21
0
    def run(self):
        integration_id = self.parameters['TendrlContext.integration_id']
        if integration_id is None:
            _msg = "TendrlContext.integration_id cannot be empty"
            raise FlowExecutionFailedError(_msg)
        if "Cluster.node_configuration" not in self.parameters.keys():
            _msg = "Cluster.node_configuration cannot be empty"
            raise FlowExecutionFailedError(_msg)

        ssh_job_ids = []
        ssh_setup_script = NS.ceph_provisioner.get_plugin().setup()
        for node_id in self.parameters["Cluster.node_configuration"].keys():
            new_params = dict()
            new_params['Node[]'] = [node_id]
            new_params['ssh_setup_script'] = ssh_setup_script
            payload = {
                "tags": ["tendrl/node_%s" % node_id],
                "run": "tendrl.flows.SetupSsh",
                "status": "new",
                "parameters": new_params,
                "parent": self.parameters['job_id'],
                "type": "node"
            }
            _job_id = str(uuid.uuid4())
            Job(job_id=_job_id, status="new", payload=payload).save()
            ssh_job_ids.append(_job_id)
            Event(
                Message(job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message":
                            "Created SSH setup job %s for node"
                            " %s" % (_job_id, node_id)
                        }))
        while True:
            time.sleep(3)
            all_status = {}
            for job_id in ssh_job_ids:
                # noinspection PyUnresolvedReferences
                all_status[job_id] = NS._int.client.read("/queue/%s/status" %
                                                         job_id).value

            _failed = {
                _jid: status
                for _jid, status in all_status.iteritems()
                if status == "failed"
            }
            if _failed:
                raise FlowExecutionFailedError(
                    "SSH setup failed for jobs %s cluster %s" %
                    (str(_failed), integration_id))
            if all([status == "finished" for status in all_status.values()]):
                Event(
                    Message(job_id=self.parameters['job_id'],
                            flow_id=self.parameters['flow_id'],
                            priority="info",
                            publisher=NS.publisher_id,
                            payload={
                                "message": "SSH setup completed for all nodes"
                            }))
                break
        Event(
            Message(job_id=self.parameters['job_id'],
                    flow_id=self.parameters['flow_id'],
                    priority="info",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "Adding OSDs to ceph cluster %s" % integration_id
                    }))
        add_osds(self.parameters)
예제 #22
0
    def run(self):
        self.pre_run = []
        self.atoms = []
        self.post_run = []

        integration_id = self.parameters['TendrlContext.integration_id']
        tendrl_ns.tendrl_context.integration_id = integration_id
        tendrl_ns.tendrl_context.save()
        node_list = self.parameters['Node[]']
        if len(node_list) > 1:
            # This is the master node for this flow
            for node in node_list:
                if tendrl_ns.node_context.node_id != node:
                    new_params = self.parameters.copy()
                    new_params['Node[]'] = [node]
                    # create same flow for each node in node list except $this
                    # TODO(team) The .save() below needs to save the job exactly as the API does
                    Job(job_id=str(uuid.uuid4()),
                        integration_id=integration_id,
                        run="tendrl.node_agent.flows.ImportCluster",
                        status="new",
                        parameters=new_params,
                        type="node",
                        parent=self.parameters['request_id'],
                        node_ids=[node]).save()

                    Event(
                        Message(
                            priority="info",
                            publisher=tendrl_ns.publisher_id,
                            payload={
                                "message": "Import cluster job created on node"
                                " %s" % node
                            },
                            request_id=self.parameters['request_id'],
                            flow_id=self.uuid,
                            cluster_id=tendrl_ns.tendrl_context.integration_id,
                        )
                    )

        Event(
            Message(
                priority="info",
                publisher=tendrl_ns.publisher_id,
                payload={
                    "message": "Import cluster job started on node %s" %
                    tendrl_ns.node_context.fqdn
                },
                request_id=self.parameters['request_id'],
                flow_id=self.uuid,
                cluster_id=tendrl_ns.tendrl_context.integration_id,
            )
        )
        sds_name = self.parameters['DetectedCluster.sds_pkg_name']
        if "ceph" in sds_name.lower():
            node_context = tendrl_ns.node_context.load()
            if "mon" in node_context.tags:
                import_ceph(
                    tendrl_ns.tendrl_context.integration_id,
                    self.parameters['request_id'],
                    self.uuid
                )
        else:
            import_gluster(
                tendrl_ns.tendrl_context.integration_id,
                self.parameters['request_id'],
                self.uuid
            )

        # import cluster's run() should not return unless the new cluster entry
        # is updated in etcd, as the job is marked as finished if this
        # function is returned. This might lead to inconsistancy in the API
        # functionality. The below loop waits for the cluster details
        # to be updated in etcd.
        while True:
            gevent.sleep(2)
            try:
                tendrl_ns.etcd_orm.client.read("/clusters/%s" % integration_id)
                break
            except etcd.EtcdKeyNotFound:
                continue
def load_job_failed(*args):
    return Job(job_id="uuid", status='failed')
예제 #24
0
def load_unfinished_job(*args):
    return Job(job_id="uuid", status='in_progress')
예제 #25
0
    def run(self):
        Event(
            Message(
                priority="info",
                publisher=NS.publisher_id,
                payload={
                    "message": "Generating brick mapping for gluster volume"
                },
                job_id=self.parameters["job_id"],
                flow_id=self.parameters["flow_id"],
                cluster_id=NS.tendrl_context.integration_id,
            )
        )
        brick_count = self.parameters.get('Volume.brick_count')
        subvol_size = self.parameters.get('Volume.subvol_size')
        message = ""
        # get brick_count number of bricks from all the selected nodes

        nodes = {}
        for node in self.parameters.get('Cluster.node_configuration'):
            key = "nodes/%s/NodeContext/fqdn" % node
            host = NS._int.client.read(key).value
            nodes[host] = []

        hosts = NS._int.client.read(
            '/clusters/%s/Bricks/free/' % NS.tendrl_context.integration_id
        )
        for host in hosts.leaves:
            host = host.key.split("/")[-1]
            bricks = NS._int.client.read(
                '/clusters/%s/Bricks/free/%s' % (
                    NS.tendrl_context.integration_id,
                    host
                )
            )
            for brick in bricks.leaves:
                brick = brick.key.split("/")[-1]
                if host in nodes:
                    if len(nodes[host]) < brick_count:
                        nodes[host].append(brick)

        # Form a brick list such that when you fill sub volumes with
        # bricks from this list, it should honour the failure domains

        brick_list = []
        total_bricks = len(nodes) * brick_count
        for iterator in range(total_bricks):
            brick_list.append("")

        counter = 0
        node_count = len(nodes)
        for key, value in nodes.iteritems():
            if len(value) < brick_count:
                message = "Host %s has %s bricks which is less than" + \
                          " bricks per host %s" % (
                              key,
                              len(value),
                              brick_count
                          )
                job = Job(job_id=self.parameters["job_id"]).load()
                res = {"message": message, "result": [[]], "optimal": False}
                job.output["GenerateBrickMapping"] = json.dumps(res)
                job.save()
                return False

            for i in range(brick_count):
                brick_list[node_count * i + counter] = value[i]
            counter += 1

        # Check if total number of bricks available is less than the
        # sub volume size. If its less, then return accordingly

        if len(brick_list) < subvol_size:
            message = "Total bricks available %s less than subvol_size %s" % (
                len(brick_list), subvol_size
            )
            job = Job(job_id=self.parameters["job_id"]).load()
            res = {"message": message, "result": [[]], "optimal": False}
            job.output["GenerateBrickMapping"] = json.dumps(res)
            job.save()
            return False

        # Fill the result list with bricks from the brick_list,
        # try to fill untill you exhaust the brick list and
        # also the number of sub volumes is maximum for the
        # available list

        result = []
        lower_bound = 0
        upper_bound = subvol_size
        while True:
            if upper_bound > len(brick_list):
                break
            subvol = brick_list[lower_bound:upper_bound]
            result.append(subvol)
            lower_bound = upper_bound
            upper_bound += subvol_size

        # check if the mapping provided is optimal as per expected
        # failure domain or not

        optimal = True
        if node_count < subvol_size:
            optimal = False

        # Write the result back to the job

        job = Job(job_id=self.parameters["job_id"]).load()
        res = {"message": message, "result": result, "optimal": optimal}
        job.output["GenerateBrickMapping"] = json.dumps(res)
        job.save()

        return True
예제 #26
0
    def run(self):
        try:
            # Lock nodes
            create_cluster_utils.acquire_node_lock(self.parameters)
            integration_id = self.parameters['TendrlContext.integration_id']
            sds_name = self.parameters['DetectedCluster.sds_pkg_name']

            if not self.parameters.get('import_after_expand', False) and \
                not self.parameters.get('import_after_create', False):

                # check if gdeploy in already provisioned in this cluster
                # if no it has to be provisioned here
                if sds_name.find("gluster") > -1 and \
                    not self.parameters.get("gdeploy_provisioned", False) and \
                    not self._probe_and_mark_provisioner(
                        self.parameters["Node[]"], integration_id
                    ):
                    create_cluster_utils.install_gdeploy()
                    create_cluster_utils.install_python_gdeploy()
                    ssh_job_ids = create_cluster_utils.gluster_create_ssh_setup_jobs(
                        self.parameters)

                    while True:
                        gevent.sleep(3)
                        all_status = {}
                        for job_id in ssh_job_ids:
                            all_status[job_id] = NS._int.client.read(
                                "/queue/%s/status" % job_id).value

                        _failed = {
                            _jid: status
                            for _jid, status in all_status.iteritems()
                            if status == "failed"
                        }
                        if _failed:
                            raise AtomExecutionFailedError(
                                "SSH setup failed for jobs %s cluster %s" %
                                (str(_failed), integration_id))
                        if all([
                                status == "finished"
                                for status in all_status.values()
                        ]):
                            Event(
                                Message(
                                    job_id=self.parameters['job_id'],
                                    flow_id=self.parameters['flow_id'],
                                    priority="info",
                                    publisher=NS.publisher_id,
                                    payload={
                                        "message":
                                        "SSH setup completed for all nodes in cluster %s"
                                        % integration_id
                                    }))
                            # set this node as gluster provisioner
                            tags = ["provisioner/%s" % integration_id]
                            NS.node_context = NS.node_context.load()
                            tags += NS.node_context.tags
                            NS.node_context.tags = list(set(tags))
                            NS.node_context.save()

                            # set gdeploy_provisioned to true so that no other nodes
                            # tries to configure gdeploy
                            self.parameters['gdeploy_provisioned'] = True
                            break

            NS.tendrl_context = NS.tendrl_context.load()
            NS.tendrl_context.integration_id = integration_id
            _detected_cluster = NS.tendrl.objects.DetectedCluster().load()
            NS.tendrl_context.cluster_id = _detected_cluster.detected_cluster_id
            NS.tendrl_context.cluster_name = _detected_cluster.detected_cluster_name
            NS.tendrl_context.sds_name = _detected_cluster.sds_pkg_name
            NS.tendrl_context.sds_version = _detected_cluster.sds_pkg_version
            NS.tendrl_context.save()
            Event(
                Message(job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message":
                            "Registered Node %s with cluster %s" %
                            (NS.node_context.node_id,
                             NS.tendrl_context.integration_id)
                        }))

            node_list = self.parameters['Node[]']
            cluster_nodes = []
            if len(node_list) > 1:
                # This is the master node for this flow
                for node in node_list:
                    if NS.node_context.node_id != node:
                        new_params = self.parameters.copy()
                        new_params['Node[]'] = [node]
                        # create same flow for each node in node list except $this
                        payload = {
                            "tags": ["tendrl/node_%s" % node],
                            "run": "tendrl.flows.ImportCluster",
                            "status": "new",
                            "parameters": new_params,
                            "parent": self.parameters['job_id'],
                            "type": "node"
                        }
                        _job_id = str(uuid.uuid4())
                        cluster_nodes.append(_job_id)
                        Job(job_id=_job_id, status="new",
                            payload=payload).save()
                        Event(
                            Message(
                                job_id=self.parameters['job_id'],
                                flow_id=self.parameters['flow_id'],
                                priority="info",
                                publisher=NS.publisher_id,
                                payload={
                                    "message":
                                    "Importing (job: %s) Node %s to cluster %s"
                                    % (_job_id, node, integration_id)
                                }))

            if "ceph" in sds_name.lower():
                node_context = NS.node_context.load()
                is_mon = False
                for tag in node_context.tags:
                    mon_tag = NS.compiled_definitions.get_parsed_defs(
                    )['namespace.tendrl']['tags']['ceph-mon']
                    if mon_tag in tag:
                        is_mon = True
                if is_mon:
                    # Check if minimum required version of underlying ceph
                    # cluster met. If not fail the import task
                    detected_cluster = NS.tendrl.objects.DetectedCluster(
                    ).load()
                    detected_cluster_ver = detected_cluster.sds_pkg_version.split(
                        '.')
                    maj_ver = detected_cluster_ver[0]
                    min_ver = detected_cluster_ver[1]
                    reqd_ceph_ver = NS.compiled_definitions.get_parsed_defs(
                    )['namespace.tendrl']['min_reqd_ceph_ver']
                    req_maj_ver, req_min_ver, req_rel = reqd_ceph_ver.split(
                        '.')
                    Event(
                        Message(
                            job_id=self.parameters['job_id'],
                            flow_id=self.parameters['flow_id'],
                            priority="info",
                            publisher=NS.publisher_id,
                            payload={
                                "message":
                                "Check: Minimum required version (%s.%s.%s) of Ceph Storage"
                                % (req_maj_ver, req_min_ver, req_rel)
                            }))
                    if int(maj_ver) < int(req_maj_ver) or \
                        int(min_ver) < int(req_min_ver):
                        Event(
                            Message(
                                job_id=self.parameters['job_id'],
                                flow_id=self.parameters['flow_id'],
                                priority="error",
                                publisher=NS.publisher_id,
                                payload={
                                    "message":
                                    "Error: Minimum required version (%s.%s.%s) "
                                    "doesnt match that of detected Ceph Storage (%s.%s.%s)"
                                    % (req_maj_ver, req_min_ver, req_rel,
                                       maj_ver, min_ver, 0)
                                }))

                        raise FlowExecutionFailedError(
                            "Detected ceph version: %s"
                            " is lesser than required version: %s" %
                            (detected_cluster.sds_pkg_version, reqd_ceph_ver))
                    import_ceph(self.parameters)
            else:
                # Check if minimum required version of underlying gluster
                # cluster met. If not fail the import task
                detected_cluster = NS.tendrl.objects.DetectedCluster().load()
                detected_cluster_ver = detected_cluster.sds_pkg_version.split(
                    '.')
                maj_ver = detected_cluster_ver[0]
                min_ver = detected_cluster_ver[1]
                reqd_gluster_ver = NS.compiled_definitions.get_parsed_defs(
                )['namespace.tendrl']['min_reqd_gluster_ver']
                req_maj_ver, req_min_ver, req_rel = reqd_gluster_ver.split('.')
                Event(
                    Message(
                        job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message":
                            "Check: Minimum required version (%s.%s.%s) of Gluster Storage"
                            % (req_maj_ver, req_min_ver, req_rel)
                        }))
                if int(maj_ver) < int(req_maj_ver) or \
                    int(min_ver) < int(req_min_ver):
                    Event(
                        Message(
                            job_id=self.parameters['job_id'],
                            flow_id=self.parameters['flow_id'],
                            priority="error",
                            publisher=NS.publisher_id,
                            payload={
                                "message":
                                "Error: Minimum required version (%s.%s.%s) "
                                "doesnt match that of detected Gluster Storage (%s.%s.%s)"
                                % (req_maj_ver, req_min_ver, req_rel, maj_ver,
                                   min_ver, 0)
                            }))

                    raise FlowExecutionFailedError(
                        "Detected gluster version: %s"
                        " is lesser than required version: %s" %
                        (detected_cluster.sds_pkg_version, reqd_gluster_ver))
                import_gluster(self.parameters)

            Event(
                Message(job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message":
                            "Waiting for participant nodes %s to be "
                            "imported %s" % (node_list, integration_id)
                        }))

            # An import is sucessfull once all Node[] register to
            # /clusters/:integration_id/nodes/:node_id
            while True:
                _all_node_status = []
                gevent.sleep(3)
                for node_id in self.parameters['Node[]']:
                    _status = NS.tendrl.objects.ClusterNodeContext(node_id=node_id).exists() \
                        and NS.tendrl.objects.ClusterTendrlContext(
                            integration_id=integration_id
                        ).exists()
                    _all_node_status.append(_status)
                if _all_node_status:
                    if all(_all_node_status):
                        Event(
                            Message(
                                job_id=self.parameters['job_id'],
                                flow_id=self.parameters['flow_id'],
                                priority="info",
                                publisher=NS.publisher_id,
                                payload={
                                    "message":
                                    "Import Cluster completed for all nodes "
                                    "in cluster %s" % integration_id
                                }))

                        break

            Event(
                Message(job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message":
                            "Sucessfully imported cluster %s" % integration_id
                        }))
        except Exception as ex:
            # For traceback
            Event(
                ExceptionMessage(priority="error",
                                 publisher=NS.publisher_id,
                                 payload={
                                     "message": ex.message,
                                     "exception": ex
                                 }))
            # raising exception to mark job as failed
            raise ex
        finally:
            # release lock
            create_cluster_utils.release_node_lock(self.parameters)

        return True
예제 #27
0
    def run(self):
        integration_id = self.parameters['TendrlContext.integration_id']
        # Wait till detected cluster in populated for nodes
        Event(
            Message(
                job_id=self.parameters['job_id'],
                flow_id=self.parameters['flow_id'],
                priority="info",
                publisher=NS.publisher_id,
                payload={
                    "message": "SDS install and config completed, "
                               "Waiting for tendrl-node-agent to "
                               "detect newly installed sds details %s %s" % (
                                   integration_id,
                                   self.parameters['Node[]']
                               )
                }
            )
        )

        while True:
            time.sleep(3)
            all_status = []
            for node in self.parameters['Node[]']:
                try:
                    NS._int.client.read(
                        "/nodes/%s/DetectedCluster/detected_cluster_id" %
                        node
                    )
                    all_status.append(True)
                except etcd.EtcdKeyNotFound:
                    all_status.append(False)
            if all_status:
                if all(all_status):
                    break

        # Create the params list for import cluster flow
        new_params = dict()
        new_params['Node[]'] = self.parameters['Node[]']
        new_params['TendrlContext.integration_id'] = integration_id

        # Get node context for one of the nodes from list
        detected_cluster_id = NS._int.client.read(
            "nodes/%s/DetectedCluster/detected_cluster_id" %
            self.parameters['Node[]'][0]
        ).value
        sds_pkg_name = NS._int.client.read(
            "nodes/%s/DetectedCluster/sds_pkg_name" % self.parameters['Node['
                                                                      ']'][0]
        ).value
        sds_pkg_version = NS._int.client.read(
            "nodes/%s/DetectedCluster/sds_pkg_version" % self.parameters[
                'Node[]'][0]
        ).value
        new_params['DetectedCluster.sds_pkg_name'] = \
            sds_pkg_name
        new_params['DetectedCluster.sds_pkg_version'] = \
            sds_pkg_version
        new_params['import_after_create'] = True
        payload = {"tags": ["detected_cluster/%s" % detected_cluster_id],
                   "run": "tendrl.flows.ImportCluster",
                   "status": "new",
                   "parameters": new_params,
                   "parent": self.parameters['job_id'],
                   "type": "node"
                   }
        _job_id = str(uuid.uuid4())

        Job(job_id=_job_id,
            status="new",
            payload=payload).save()
        Event(
            Message(
                job_id=self.parameters['job_id'],
                flow_id=self.parameters['flow_id'],
                priority="info",
                publisher=NS.publisher_id,
                payload={"message": "Please wait while Tendrl imports newly "
                                    "created %s SDS Cluster %s"
                         " Import job id :%s" % (sds_pkg_name, integration_id,
                                                 _job_id)
                         }
            )
        )

        return True
예제 #28
0
    def run(self):
        try:
            integration_id = self.parameters['TendrlContext.integration_id']

            # Lock nodes
            create_cluster_utils.acquire_node_lock(self.parameters)
            NS.tendrl_context = NS.tendrl_context.load()

            # TODO(team) when Tendrl supports create/expand/shrink cluster
            # setup passwordless ssh for all gluster nodes with given
            # integration_id (check
            # /indexes/tags/tendrl/integration/$integration_id for list of
            # nodes in cluster

            node_list = self.parameters['Node[]']
            cluster_nodes = []
            if len(node_list) > 1:
                # This is the master node for this flow
                for node in node_list:
                    if NS.node_context.node_id != node:
                        new_params = self.parameters.copy()
                        new_params['Node[]'] = [node]
                        # create same flow for each node in node list except
                        #  $this
                        payload = {"tags": ["tendrl/node_%s" % node],
                                   "run": "tendrl.flows.ImportCluster",
                                   "status": "new",
                                   "parameters": new_params,
                                   "parent": self.parameters['job_id'],
                                   "type": "node"
                                   }
                        _job_id = str(uuid.uuid4())
                        cluster_nodes.append(_job_id)
                        Job(job_id=_job_id,
                            status="new",
                            payload=payload).save()
                        Event(
                            Message(
                                job_id=self.parameters['job_id'],
                                flow_id=self.parameters['flow_id'],
                                priority="info",
                                publisher=NS.publisher_id,
                                payload={
                                    "message": "Importing (job: %s) Node %s "
                                               "to cluster %s" %
                                    (_job_id, node, integration_id)
                                }
                            )
                        )
            # Check if minimum required version of underlying gluster
            # cluster met. If not fail the import task
            cluster_ver = \
                NS.tendrl_context.sds_version.split('.')
            maj_ver = cluster_ver[0]
            min_ver = re.findall(r'\d+', cluster_ver[1])[0]
            reqd_gluster_ver = NS.compiled_definitions.get_parsed_defs()[
                'namespace.tendrl'
            ]['min_reqd_gluster_ver']
            req_maj_ver, req_min_ver, req_rel = reqd_gluster_ver.split('.')
            Event(
                Message(
                    job_id=self.parameters['job_id'],
                    flow_id=self.parameters['flow_id'],
                    priority="info",
                    publisher=NS.publisher_id,
                    payload={
                        "message": "Check: Minimum required version ("
                                   "%s.%s.%s) of Gluster Storage" %
                        (req_maj_ver, req_min_ver, req_rel)
                    }
                )
            )
            ver_check_failed = False
            if int(maj_ver) < int(req_maj_ver):
                ver_check_failed = True
            else:
                if int(maj_ver) == int(req_maj_ver) and \
                        int(min_ver) < int(req_min_ver):
                        ver_check_failed = True

            if ver_check_failed:
                Event(
                    Message(
                        job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id'],
                        priority="error",
                        publisher=NS.publisher_id,
                        payload={
                            "message": "Error: Minimum required version "
                                       "(%s.%s.%s) "
                            "doesnt match that of detected Gluster "
                                       "Storage (%s.%s.%s)" %
                            (req_maj_ver, req_min_ver, req_rel,
                             maj_ver, min_ver, 0)
                        }
                    )
                )

                raise FlowExecutionFailedError(
                    "Detected gluster version: %s"
                    " is lesser than required version: %s" %
                    (
                        NS.tendrl_context.sds_version,
                        reqd_gluster_ver
                    )
                )
            if not import_gluster(self.parameters):
                return False

            if len(node_list) > 1:
                Event(
                    Message(
                        job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message": "Waiting for participant nodes %s to "
                                       "be "
                            "imported %s" % (node_list, integration_id)
                        }
                    )
                )
                loop_count = 0
                # Wait for (no of nodes) * 6 minutes for import to complete
                wait_count = (len(node_list) - 1) * 36
                while True:
                    parent_job = Job(job_id=self.parameters['job_id']).load()
                    if loop_count >= wait_count:
                        Event(
                            Message(
                                job_id=self.parameters['job_id'],
                                flow_id=self.parameters['flow_id'],
                                priority="info",
                                publisher=NS.publisher_id,
                                payload={
                                    "message": "Import jobs not yet complete "
                                    "on all nodes. Timing out. (%s, %s)" %
                                    (str(node_list), integration_id)
                                }
                            )
                        )
                        return False
                    time.sleep(10)
                    finished = True
                    for child_job_id in parent_job.children:
                        child_job = Job(job_id=child_job_id).load()
                        if child_job.status != "finished":
                            finished = False
                            break
                    if finished:
                        break
                    else:
                        loop_count += 1
                        continue

        except Exception as ex:
            # For traceback
            Event(
                ExceptionMessage(
                    priority="error",
                    publisher=NS.publisher_id,
                    payload={
                        "message": ex.message,
                        "exception": ex
                    }
                )
            )
            # raising exception to mark job as failed
            raise ex
        finally:
            # release lock
            create_cluster_utils.release_node_lock(self.parameters)

        return True
def load_job_finished(*args):
    return Job(job_id="uuid", status='finished')
예제 #30
0
    def run(self):
        try:
            # Lock nodes
            create_cluster_utils.acquire_node_lock(self.parameters)
            integration_id = self.parameters['TendrlContext.integration_id']
            if integration_id is None:
                raise FlowExecutionFailedError(
                    "TendrlContext.integration_id cannot be empty")

            supported_sds = NS.compiled_definitions.get_parsed_defs(
            )['namespace.tendrl']['supported_sds']
            sds_name = self.parameters["TendrlContext.sds_name"]
            if sds_name not in supported_sds:
                raise FlowExecutionFailedError("SDS (%s) not supported" %
                                               sds_name)

            ssh_job_ids = []
            if "ceph" in sds_name:
                ssh_job_ids = create_cluster_utils.ceph_create_ssh_setup_jobs(
                    self.parameters)
            else:
                ssh_job_ids = \
                    create_cluster_utils.gluster_create_ssh_setup_jobs(
                        self.parameters,
                        skip_current_node=True
                    )

            while True:
                time.sleep(3)
                all_status = {}
                for job_id in ssh_job_ids:
                    all_status[job_id] = NS._int.client.read(
                        "/queue/%s/status" % job_id).value

                _failed = {
                    _jid: status
                    for _jid, status in all_status.iteritems()
                    if status == "failed"
                }
                if _failed:
                    raise FlowExecutionFailedError(
                        "SSH setup failed for jobs %s cluster %s" %
                        (str(_failed), integration_id))
                if all(
                    [status == "finished" for status in all_status.values()]):
                    Event(
                        Message(job_id=self.parameters['job_id'],
                                flow_id=self.parameters['flow_id'],
                                priority="info",
                                publisher=NS.publisher_id,
                                payload={
                                    "message":
                                    "SSH setup completed for all "
                                    "nodes in cluster %s" % integration_id
                                }))

                    break

            # SSH setup jobs finished above, now install sds
            # bits and create cluster
            if "ceph" in sds_name:
                Event(
                    Message(job_id=self.parameters['job_id'],
                            flow_id=self.parameters['flow_id'],
                            priority="info",
                            publisher=NS.publisher_id,
                            payload={
                                "message":
                                "Expanding ceph cluster %s" % integration_id
                            }))
                ceph_help.expand_cluster(self.parameters)
            else:
                Event(
                    Message(job_id=self.parameters['job_id'],
                            flow_id=self.parameters['flow_id'],
                            priority="info",
                            publisher=NS.publisher_id,
                            payload={
                                "message":
                                "Expanding Gluster Storage"
                                " Cluster %s" % integration_id
                            }))
                gluster_help.expand_gluster(self.parameters)
            Event(
                Message(
                    job_id=self.parameters['job_id'],
                    flow_id=self.parameters['flow_id'],
                    priority="info",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "SDS install/config completed on newly "
                        "expanded nodes, Please wait while "
                        "tendrl-node-agents detect sds details on the newly "
                        "expanded nodes %s" % self.parameters['Node[]']
                    }))

            # Wait till detected cluster in populated for nodes
            while True:
                time.sleep(3)
                all_status = []
                detected_cluster = ""
                different_cluster_id = False
                dc = ""
                for node in self.parameters['Node[]']:
                    try:
                        dc = NS._int.client.read(
                            "/nodes/%s/DetectedCluster/detected_cluster_id" %
                            node).value
                        if not detected_cluster:
                            detected_cluster = dc
                        else:
                            if detected_cluster != dc:
                                all_status.append(False)
                                different_cluster_id = True
                                break
                        all_status.append(True)
                    except etcd.EtcdKeyNotFound:
                        all_status.append(False)
                if different_cluster_id:
                    raise FlowExecutionFailedError(
                        "Seeing different detected cluster id in"
                        " different nodes. %s and %s" % (detected_cluster, dc))

                if all_status:
                    if all(all_status):
                        break

            # Create the params list for import cluster flow
            new_params = dict()
            new_params['Node[]'] = self.parameters['Node[]']
            new_params['TendrlContext.integration_id'] = integration_id

            # Get node context for one of the nodes from list
            sds_pkg_name = NS._int.client.read(
                "nodes/%s/DetectedCluster/"
                "sds_pkg_name" % self.parameters['Node[]'][0]).value
            new_params['import_after_expand'] = True
            sds_pkg_version = NS._int.client.read(
                "nodes/%s/DetectedCluster/sds_pkg_"
                "version" % self.parameters['Node[]'][0]).value
            new_params['DetectedCluster.sds_pkg_name'] = \
                sds_pkg_name
            new_params['DetectedCluster.sds_pkg_version'] = \
                sds_pkg_version

            tags = []
            for node in self.parameters['Node[]']:
                tags.append("tendrl/node_%s" % node)
            payload = {
                "tags": tags,
                "run": "tendrl.flows.ImportCluster",
                "status": "new",
                "parameters": new_params,
                "parent": self.parameters['job_id'],
                "type": "node"
            }
            _job_id = str(uuid.uuid4())
            # release lock before import cluster
            create_cluster_utils.release_node_lock(self.parameters)

            Job(job_id=_job_id, status="new", payload=payload).save()
            Event(
                Message(job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message":
                            "Please wait while Tendrl imports ("
                            "job_id: %s) newly expanded "
                            "%s storage nodes %s" %
                            (_job_id, sds_pkg_name, integration_id)
                        }))
        except Exception as ex:
            Event(
                ExceptionMessage(priority="error",
                                 publisher=NS.publisher_id,
                                 payload={
                                     "message": ex.message,
                                     "exception": ex
                                 }))
            # raising exception to mark job as failed
            raise ex
        finally:
            # release lock if any exception came
            create_cluster_utils.release_node_lock(self.parameters)
예제 #31
0
    def run(self):
        Event(
            Message(
                priority="info",
                publisher=NS.publisher_id,
                payload={
                    "message":
                    "Checking if a new  pool has to be created or "
                    "not for rbd creation"
                },
                job_id=self.parameters['job_id'],
                flow_id=self.parameters['flow_id'],
                cluster_id=NS.tendrl_context.integration_id,
            ))
        if not self.parameters.get('Rbd.pool_id'):
            # Checking if mandatory parameters for pool creation are present
            mandatory_pool_params = Set([
                "Rbd.pool_poolname", "Rbd.pool_pg_num", "Rbd.pool_size",
                "Rbd.pool_min_size"
            ])
            missing_params = list(
                mandatory_pool_params.difference(Set(self.parameters.keys())))
            if not missing_params:
                # Mapping the passed pool parameters into required keys
                pool_parameters = {}
                for key, value in self.parameters.items():
                    if "Rbd.pool_" in key:
                        pool_parameters[key.replace("Rbd.pool_", "Pool.")] =\
                            value
                payload = {
                    "integration_id": NS.tendrl_context.integration_id,
                    "run": "ceph.flows.CreatePool",
                    "status": "new",
                    "parameters": pool_parameters,
                    "parent": self.parameters['job_id'],
                    "type": "sds",
                    "tags":
                    ["tendrl/integration/$TendrlContext."
                     "integration_id"]
                }
                Event(
                    Message(
                        priority="error",
                        publisher=NS.publisher_id,
                        payload={"message": "Creating job for pool creation"},
                        job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id'],
                        cluster_id=NS.tendrl_context.integration_id,
                    ))
                _job_id = str(uuid.uuid4())
                Job(job_id=_job_id, status="new", payload=payload).save()
                Event(
                    Message(
                        priority="error",
                        publisher=NS.publisher_id,
                        payload={
                            "message": "Checking for successful pool creation"
                        },
                        job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id'],
                        cluster_id=NS.tendrl_context.integration_id,
                    ))
                pool_created = False
                job_status = "new"
                while not pool_created:
                    try:
                        job_status = NS._int.client.read("/queue/%s/status" %
                                                         _job_id).value
                    except etcd.EtcdKeyNotFound:
                        Event(
                            Message(
                                priority="error",
                                publisher=NS.publisher_id,
                                payload={
                                    "message":
                                    "Failed to fetch pool "
                                    "creation status for rbd "
                                    "creation"
                                },
                                job_id=self.parameters['job_id'],
                                flow_id=self.parameters['flow_id'],
                                cluster_id=NS.tendrl_context.integration_id,
                            ))
                        break
                    if job_status == "finished":
                        pool_created = True
                    elif job_status == "failed":
                        break
                if pool_created:
                    # Setting pool_id for rbd creation
                    pool_id = self._get_pool_id(
                        self.parameters['Rbd.pool_poolname'])
                    if pool_id:
                        self.parameters['Rbd.pool_id'] = pool_id
                    else:
                        Event(
                            Message(
                                priority="error",
                                publisher=NS.publisher_id,
                                payload={
                                    "message":
                                    "Failed to fetch pool_id %s ."
                                    "Cannot create rbd without "
                                    "pool_id." % pool_id
                                },
                                job_id=self.parameters['job_id'],
                                flow_id=self.parameters['flow_id'],
                                cluster_id=NS.tendrl_context.integration_id,
                            ))
                        return False
                else:
                    Event(
                        Message(
                            priority="error",
                            publisher=NS.publisher_id,
                            payload={
                                "message":
                                "Failed to create pool. "
                                "Cannot proceed with rbd creation."
                            },
                            job_id=self.parameters['job_id'],
                            flow_id=self.parameters['flow_id'],
                            cluster_id=NS.tendrl_context.integration_id,
                        ))
                    return False
            else:
                Event(
                    Message(
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message":
                            "Mandatory parameters %s for pool "
                            "creation not present. Cannot continue"
                            " with rbd creation." % ', '.join(missing_params)
                        },
                        job_id=self.parameters['job_id'],
                        flow_id=self.parameters['flow_id'],
                        cluster_id=NS.tendrl_context.integration_id,
                    ))
                return False

        attrs = dict(name=self.parameters['Rbd.name'],
                     size=str(self.parameters['Rbd.size']),
                     pool_id=self.parameters.get('Rbd.pool_id'))
        Event(
            Message(
                priority="info",
                publisher=NS.publisher_id,
                payload={
                    "message":
                    "Creating rbd %s on pool %s" %
                    (self.parameters['Rbd.name'],
                     self.parameters['Rbd.pool_id'])
                },
                job_id=self.parameters['job_id'],
                flow_id=self.parameters['flow_id'],
                cluster_id=NS.tendrl_context.integration_id,
            ))

        crud = Crud()
        resp = crud.create("rbd", attrs)
        try:
            crud.sync_request_status(resp['request'])
        except RequestStateError as ex:
            Event(
                Message(
                    priority="info",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "Failed to create rbd %s."
                        " Error: %s" % (self.parameters['Rbd.name'], ex)
                    },
                    job_id=self.parameters['job_id'],
                    flow_id=self.parameters['flow_id'],
                    cluster_id=NS.tendrl_context.integration_id,
                ))
            return False

        Event(
            Message(
                priority="info",
                publisher=NS.publisher_id,
                payload={
                    "message":
                    "Successfully created rbd %s on pool %s" %
                    (self.parameters['Rbd.name'],
                     self.parameters['Rbd.pool_id'])
                },
                job_id=self.parameters['job_id'],
                flow_id=self.parameters['flow_id'],
                cluster_id=NS.tendrl_context.integration_id,
            ))

        pool_name = NS._int.client.read("clusters/%s/Pools/%s/pool_name" %
                                        (NS.tendrl_context.integration_id,
                                         self.parameters['Rbd.pool_id'])).value
        rbd_details = NS.state_sync_thread._get_rbds(pool_name)
        for k, v in rbd_details.iteritems():
            NS.ceph.objects.Rbd(
                name=k,
                size=v['size'],
                pool_id=self.parameters['Rbd.pool_id'],
                flags=v['flags'],
                provisioned=NS.state_sync_thread._to_bytes(v['provisioned']),
                used=NS.state_sync_thread._to_bytes(v['used'])).save()

        return True
예제 #32
0
def sync(sync_ttl=None):
    try:
        tags = []
        # update node agent service details

        Event(
            Message(priority="debug",
                    publisher=NS.publisher_id,
                    payload={"message": "node_sync, Updating Service data"}))
        for service in TENDRL_SERVICES:
            s = NS.tendrl.objects.Service(service=service)
            if s.running:
                service_tag = NS.compiled_definitions.get_parsed_defs(
                )['namespace.tendrl']['tags'][service.strip("@*")]
                tags.append(service_tag)

                if service_tag == "tendrl/server":
                    tags.append("tendrl/monitor")
            s.save()

        _cluster = NS.tendrl.objects.Cluster(
            integration_id=NS.tendrl_context.integration_id).load()
        if _cluster.is_managed == "yes":
            # Try to claim orphan "provisioner_%integration_id" tag
            _tag = "provisioner/%s" % _cluster.integration_id
            _is_new_provisioner = False
            NS.node_context = NS.tendrl.objects.NodeContext().load()
            if _tag not in NS.node_context.tags:
                try:
                    _index_key = "/indexes/tags/%s" % _tag
                    _node_id = json.dumps([NS.node_context.node_id])
                    NS._int.wclient.write(_index_key,
                                          _node_id,
                                          prevExist=False)
                    etcd_utils.refresh(_index_key, sync_ttl)
                    tags.append(_tag)
                    _is_new_provisioner = True
                except etcd.EtcdAlreadyExist:
                    pass

        # updating node context with latest tags
        Event(
            Message(priority="debug",
                    publisher=NS.publisher_id,
                    payload={
                        "message":
                        "node_sync, updating node context "
                        "data with tags"
                    }))
        NS.node_context = NS.tendrl.objects.NodeContext().load()
        current_tags = list(NS.node_context.tags)
        tags += current_tags
        NS.node_context.tags = list(set(tags))
        NS.node_context.tags.sort()
        current_tags.sort()
        if NS.node_context.tags != current_tags:
            NS.node_context.save()

        if _cluster.is_managed == "yes":
            if _is_new_provisioner:
                _msg = "node_sync, NEW provisioner node found! re-configuring monitoring (job-id: %s) on this node"
                payload = {
                    "tags": ["tendrl/node_%s" % NS.node_context.node_id],
                    "run": "tendrl.flows.ConfigureMonitoring",
                    "status": "new",
                    "parameters": {
                        'TendrlContext.integration_id':
                        NS.tendrl_context.integration_id
                    },
                    "type": "node"
                }
                _job_id = str(uuid.uuid4())
                Job(job_id=_job_id, status="new", payload=payload).save()
                Event(
                    Message(priority="debug",
                            publisher=NS.publisher_id,
                            payload={"message": _msg % _job_id}))

        # Update /indexes/tags/:tag = [node_ids]
        for tag in NS.node_context.tags:

            index_key = "/indexes/tags/%s" % tag
            _node_ids = []
            try:
                _node_ids = NS._int.client.read(index_key).value
                _node_ids = json.loads(_node_ids)
            except etcd.EtcdKeyNotFound:
                pass

            if _node_ids:
                if "provisioner" in tag:
                    # Check if this is a stale provisioner
                    if NS.node_context.node_id != _node_ids[0]:
                        NS.node_context.tags.remove(tag)
                        NS.node_context.save()
                        continue
                if NS.node_context.node_id in _node_ids:
                    continue
                else:
                    _node_ids += [NS.node_context.node_id]
            else:
                _node_ids = [NS.node_context.node_id]
            _node_ids = list(set(_node_ids))

            etcd_utils.write(index_key, json.dumps(_node_ids))
            if sync_ttl and len(_node_ids) == 1:
                etcd_utils.refresh(index_key, sync_ttl)

        Event(
            Message(priority="debug",
                    publisher=NS.publisher_id,
                    payload={
                        "message": "node_sync, Updating detected "
                        "platform"
                    }))
    except Exception as ex:
        Event(
            ExceptionMessage(priority="error",
                             publisher=NS.publisher_id,
                             payload={
                                 "message":
                                 "node_sync service and indexes "
                                 "sync failed: " + ex.message,
                                 "exception":
                                 ex
                             }))