예제 #1
0
    def testUpdatePriorityService(self):
        """
        _testUpdatePriorityService_

        Check that we can update the priority correctly also
        check the available workflows feature
        """
        specName = "RerecoSpec"
        specUrl = self.specGenerator.createReRecoSpec(
            specName, "file", assignKwargs={'SiteWhitelist': ["T2_XX_SiteA"]})
        globalQ = globalQueue(DbName='workqueue_t',
                              QueueURL=self.testInit.couchUrl,
                              UnittestFlag=True,
                              **self.queueParams)
        localQ = localQueue(DbName='local_workqueue_t',
                            QueueURL=self.testInit.couchUrl,
                            CacheDir=self.testInit.testDir,
                            ParentQueueCouchUrl='%s/workqueue_t' %
                            self.testInit.couchUrl,
                            ParentQueueInboxCouchDBName='workqueue_t_inbox',
                            **self.queueParams)
        # Try a full chain of priority update and propagation
        self.assertTrue(globalQ.queueWork(specUrl, "RerecoSpec", "teamA") > 0)
        globalApi = WorkQueueDS(self.testInit.couchUrl, 'workqueue_t')
        # overwrite default - can't test with stale view
        globalApi.defaultOptions = {'reduce': True, 'group': True}
        globalApi.updatePriority(specName, 100)
        self.assertEqual(globalQ.backend.getWMSpec(specName).priority(), 100)
        storedElements = globalQ.backend.getElementsForWorkflow(specName)
        for element in storedElements:
            self.assertEqual(element['Priority'], 100)
        numWorks = localQ.pullWork({'T2_XX_SiteA': 10})
        self.assertTrue(numWorks > 0)
        # replicate from GQ to LQ manually
        localQ.backend.pullFromParent(continuous=False)
        # wait until replication is done
        time.sleep(2)

        localQ.processInboundWork(continuous=False)
        storedElements = localQ.backend.getElementsForWorkflow(specName)
        for element in storedElements:
            self.assertEqual(element['Priority'], 100)
        localApi = WorkQueueDS(self.testInit.couchUrl, 'local_workqueue_t')
        # overwrite default - can't test with stale view
        localApi.defaultOptions = {'reduce': True, 'group': True}
        localApi.updatePriority(specName, 500)
        self.assertEqual(localQ.backend.getWMSpec(specName).priority(), 500)
        storedElements = localQ.backend.getElementsForWorkflow(specName)
        for element in storedElements:
            self.assertEqual(element['Priority'], 500)
        availableWF = localApi.getAvailableWorkflows()
        self.assertEqual(availableWF, set([(specName, 500)]))
        # Attempt to update an inexistent workflow in the queue
        try:
            globalApi.updatePriority('NotExistent', 2)
        except Exception as ex:
            self.fail('No exception should be raised.: %s' % str(ex))
예제 #2
0
    def testUpdatePriorityService(self):
        """
        _testUpdatePriorityService_

        Check that we can update the priority correctly also
        check the available workflows feature
        """
        specName = "RerecoSpec"
        specUrl = self.specGenerator.createReRecoSpec(specName, "file",
                                                      assignKwargs={'SiteWhitelist':["T2_XX_SiteA"]})
        globalQ = globalQueue(DbName='workqueue_t',
                              QueueURL=self.testInit.couchUrl,
                              UnittestFlag=True)
        localQ = localQueue(DbName='local_workqueue_t',
                            QueueURL=self.testInit.couchUrl,
                            CacheDir=self.testInit.testDir,
                            ParentQueueCouchUrl='%s/workqueue_t' % self.testInit.couchUrl,
                            ParentQueueInboxCouchDBName='workqueue_t_inbox'
                            )
        # Try a full chain of priority update and propagation
        self.assertTrue(globalQ.queueWork(specUrl, "RerecoSpec", "teamA") > 0)
        globalApi = WorkQueueDS(self.testInit.couchUrl, 'workqueue_t')
        # overwrite default - can't test with stale view
        globalApi.defaultOptions = {'reduce': True, 'group': True}
        globalApi.updatePriority(specName, 100)
        self.assertEqual(globalQ.backend.getWMSpec(specName).priority(), 100)
        storedElements = globalQ.backend.getElementsForWorkflow(specName)
        for element in storedElements:
            self.assertEqual(element['Priority'], 100)
        numWorks = localQ.pullWork({'T2_XX_SiteA': 10})
        self.assertTrue(numWorks > 0)
        # replicate from GQ to LQ manually
        localQ.backend.pullFromParent(continuous=False)
        # wait until replication is done
        time.sleep(2)

        localQ.processInboundWork(continuous=False)
        storedElements = localQ.backend.getElementsForWorkflow(specName)
        for element in storedElements:
            self.assertEqual(element['Priority'], 100)
        localApi = WorkQueueDS(self.testInit.couchUrl, 'local_workqueue_t')
        # overwrite default - can't test with stale view
        localApi.defaultOptions = {'reduce': True, 'group': True}
        localApi.updatePriority(specName, 500)
        self.assertEqual(localQ.backend.getWMSpec(specName).priority(), 500)
        storedElements = localQ.backend.getElementsForWorkflow(specName)
        for element in storedElements:
            self.assertEqual(element['Priority'], 500)
        availableWF = localApi.getAvailableWorkflows()
        self.assertEqual(availableWF, set([(specName, 500)]))
        # Attempt to update an inexistent workflow in the queue
        try:
            globalApi.updatePriority('NotExistent', 2)
        except Exception as ex:
            self.fail('No exception should be raised.: %s' % str(ex))
예제 #3
0
    def testUpdatePriorityService(self):
        """
        _testUpdatePriorityService_

        Check that we can update the priority correctly also
        check the available workflows feature
        """
        specName = "RerecoSpec"
        specUrl = self.specGenerator.createReRecoSpec(specName, "file")
        globalQ = globalQueue(DbName='workqueue_t',
                              QueueURL=self.testInit.couchUrl)
        localQ = localQueue(DbName='local_workqueue_t',
                            QueueURL=self.testInit.couchUrl,
                            CacheDir=self.testInit.testDir,
                            ParentQueueCouchUrl='%s/workqueue_t' %
                            self.testInit.couchUrl,
                            ParentQueueInboxCouchDBName='workqueue_t_inbox')
        # Try a full chain of priority update and propagation
        self.assertTrue(globalQ.queueWork(specUrl, "RerecoSpec", "teamA") > 0)
        globalApi = WorkQueueDS(self.testInit.couchUrl, 'workqueue_t')
        #overwrite default - can't test with stale view
        globalApi.defaultOptions = {'reduce': True, 'group': True}
        globalApi.updatePriority(specName, 100)
        self.assertEqual(globalQ.backend.getWMSpec(specName).priority(), 100)
        storedElements = globalQ.backend.getElementsForWorkflow(specName)
        for element in storedElements:
            self.assertEqual(element['Priority'], 100)
        self.assertTrue(
            localQ.pullWork({'T2_XX_SiteA': 10}, continuousReplication=False) >
            0)
        localQ.processInboundWork(continuous=False)
        storedElements = localQ.backend.getElementsForWorkflow(specName)
        for element in storedElements:
            self.assertEqual(element['Priority'], 100)
        localApi = WorkQueueDS(self.testInit.couchUrl, 'local_workqueue_t')
        #overwrite default - can't test with stale view
        localApi.defaultOptions = {'reduce': True, 'group': True}
        localApi.updatePriority(specName, 500)
        self.assertEqual(localQ.backend.getWMSpec(specName).priority(), 500)
        storedElements = localQ.backend.getElementsForWorkflow(specName)
        for element in storedElements:
            self.assertEqual(element['Priority'], 500)
        self.assertEqual(localApi.getAvailableWorkflows(),
                         set([(specName, 500)]))
        # Attempt to update an inexistent workflow in the queue
        try:
            globalApi.updatePriority('NotExistent', 2)
        except:
            self.fail('No exception should be raised.')
예제 #4
0
    def testUpdatePriorityService(self):
        """
        _testUpdatePriorityService_

        Check that we can update the priority correctly also
        check the available workflows feature
        """
        specName = "RerecoSpec"
        specUrl = self.specGenerator.createReRecoSpec(specName, "file")
        globalQ = globalQueue(DbName = 'workqueue_t',
                              QueueURL = self.testInit.couchUrl)
        localQ = localQueue(DbName = 'local_workqueue_t',
                            QueueURL = self.testInit.couchUrl,
                            CacheDir = self.testInit.testDir,
                            ParentQueueCouchUrl = '%s/workqueue_t' % self.testInit.couchUrl,
                            ParentQueueInboxCouchDBName = 'workqueue_t_inbox'
                            )
        # Try a full chain of priority update and propagation
        self.assertTrue(globalQ.queueWork(specUrl, "RerecoSpec", "teamA") > 0)
        globalApi = WorkQueueDS(self.testInit.couchUrl, 'workqueue_t')
        #overwrite default - can't test with stale view
        globalApi.defaultOptions =  {'reduce' : True, 'group' : True}
        globalApi.updatePriority(specName, 100)
        self.assertEqual(globalQ.backend.getWMSpec(specName).priority(), 100)
        storedElements = globalQ.backend.getElementsForWorkflow(specName)
        for element in storedElements:
            self.assertEqual(element['Priority'], 100)
        self.assertTrue(localQ.pullWork({'T2_XX_SiteA' : 10}) > 0)
        localQ.processInboundWork(continuous = False)
        storedElements = localQ.backend.getElementsForWorkflow(specName)
        for element in storedElements:
            self.assertEqual(element['Priority'], 100)
        localApi = WorkQueueDS(self.testInit.couchUrl, 'local_workqueue_t')
        #overwrite default - can't test with stale view
        localApi.defaultOptions =  {'reduce' : True, 'group' : True}
        localApi.updatePriority(specName, 500)
        self.assertEqual(localQ.backend.getWMSpec(specName).priority(), 500)
        storedElements = localQ.backend.getElementsForWorkflow(specName)
        for element in storedElements:
            self.assertEqual(element['Priority'], 500)
        self.assertEqual(localApi.getAvailableWorkflows(), set([(specName, 500)]))
        # Attempt to update an inexistent workflow in the queue
        try:
            globalApi.updatePriority('NotExistent', 2)
        except:
            self.fail('No exception should be raised.')
예제 #5
0
class Request(RESTEntity):
    def __init__(self, app, api, config, mount):
        # main CouchDB database where requests/workloads are stored
        RESTEntity.__init__(self, app, api, config, mount)
        self.reqmgr_db = api.db_handler.get_db(config.couch_reqmgr_db)
        self.reqmgr_db_service = RequestDBWriter(self.reqmgr_db, couchapp="ReqMgr")
        # this need for the post validtiaon
        self.reqmgr_aux_db = api.db_handler.get_db(config.couch_reqmgr_aux_db)
        self.gq_service = WorkQueue(config.couch_host, config.couch_workqueue_db)

    def _requestArgMapFromBrowser(self, request_args):
        """
        This is specific mapping function data from browser

        TODO: give a key word so it doesn't have to loop though in general
        """
        docs = []
        for doc in request_args:
            for key in doc.keys():
                if key.startswith('request'):
                    rid = key.split('request-')[-1]
                    if rid != 'all':
                        docs.append(rid)
                    del doc[key]
        return docs

    def _validateGET(self, param, safe):
        # TODO: need proper validation but for now pass everything
        args_length = len(param.args)
        if args_length == 1:
            safe.kwargs["name"] = param.args[0]
            param.args.pop()
            return
        
        no_multi_key = ["detail", "_nostale", "date_range", "common_dict"]
        for key, value in param.kwargs.items():
            # convert string to list
            if key not in no_multi_key and isinstance(value, basestring):
                param.kwargs[key] = [value]
        
        detail = param.kwargs.get('detail', True)
        if detail in (False, "false", "False", "FALSE"):
            detail = False
            
        if "status" in param.kwargs and detail:
            for status in param.kwargs["status"]:
                if status.endswith("-archived"):
                    raise InvalidSpecParameterValue(
                        """Can't retrieve bulk archived status requests with detail option True, 
                           set detail=false or use other search arguments""")
                    
        for prop in param.kwargs:
            safe.kwargs[prop] = param.kwargs[prop]

        for prop in safe.kwargs:
            del param.kwargs[prop]

        return

    def _validateRequestBase(self, param, safe, valFunc, requestName=None):
        data = cherrypy.request.body.read()
        if data:
            request_args = json.loads(data)
            if requestName:
                request_args["RequestName"] = requestName

        else:
            # actually this is error case
            # cherrypy.log(str(param.kwargs))
            request_args = {}
            for prop in param.kwargs:
                request_args[prop] = param.kwargs[prop]

            for prop in request_args:
                del param.kwargs[prop]
            if requestName:
                request_args["RequestName"] = requestName
            request_args = [request_args]

        safe.kwargs['workload_pair_list'] = []
        if isinstance(request_args, dict):
            request_args = [request_args]
        for args in request_args:
            workload, r_args = valFunc(args, self.config, self.reqmgr_db_service, param)
            safe.kwargs['workload_pair_list'].append((workload, r_args))

    def _get_request_names(self, ids):
        "Extract request names from given documents"
        # cherrypy.log("request names %s" % ids)
        doc = {}
        if isinstance(ids, list):
            for rid in ids:
                doc[rid] = 'on'
        elif isinstance(ids, basestring):
            doc[ids] = 'on'

        docs = []
        for key in doc.keys():
            if key.startswith('request'):
                rid = key.split('request-')[-1]
                if rid != 'all':
                    docs.append(rid)
                del doc[key]
        return docs

    def _getMultiRequestArgs(self, multiRequestForm):
        request_args = {}
        for prop in multiRequestForm:
            if prop == "ids":
                request_names = self._get_request_names(multiRequestForm["ids"])
            elif prop == "new_status":
                request_args["RequestStatus"] = multiRequestForm[prop]
            # remove this
            # elif prop in ["CustodialSites", "AutoApproveSubscriptionSites"]:
            #    request_args[prop] = [multiRequestForm[prop]]
            else:
                request_args[prop] = multiRequestForm[prop]
        return request_names, request_args

    def _validateMultiRequests(self, param, safe, valFunc):

        data = cherrypy.request.body.read()
        if data:
            request_names, request_args = self._getMultiRequestArgs(json.loads(data))
        else:
            # actually this is error case
            # cherrypy.log(str(param.kwargs))
            request_names, request_args = self._getMultiRequestArgs(param.kwargs)

            for prop in request_args:
                if prop == "RequestStatus":
                    del param.kwargs["new_status"]
                else:
                    del param.kwargs[prop]

            del param.kwargs["ids"]

            # remove this
            # tmp = []
            # for prop in param.kwargs:
            #    tmp.append(prop)
            # for prop in tmp:
            #    del param.kwargs[prop]

        safe.kwargs['workload_pair_list'] = []

        for request_name in request_names:
            request_args["RequestName"] = request_name
            workload, r_args = valFunc(request_args, self.config, self.reqmgr_db_service, param)
            safe.kwargs['workload_pair_list'].append((workload, r_args))

        safe.kwargs["multi_update_flag"] = True

    def _getRequestNamesFromBody(self, param, safe, valFunc):

        request_names = json.loads(cherrypy.request.body.read())
        safe.kwargs['workload_pair_list'] = request_names
        safe.kwargs["multi_names_flag"] = True

    def validate(self, apiobj, method, api, param, safe):
        # to make validate successful
        # move the validated argument to safe
        # make param empty
        # other wise raise the error
        try:
            if method == 'GET':
                self._validateGET(param, safe)

            if method == 'PUT':
                args_length = len(param.args)
                if args_length == 1:
                    requestName = param.args[0]
                    param.args.pop()
                else:
                    requestName = None
                self._validateRequestBase(param, safe, validate_request_update_args, requestName)
                # TO: handle multiple clone
            #                 if len(param.args) == 2:
            #                     #validate clone case
            #                     if param.args[0] == "clone":
            #                         param.args.pop()
            #                         return None, request_args

            if method == 'POST':
                args_length = len(param.args)
                if args_length == 1 and param.args[0] == "multi_update":
                    # special case for multi update from browser.
                    param.args.pop()
                    self._validateMultiRequests(param, safe, validate_request_update_args)
                elif args_length == 1 and param.args[0] == "bynames":
                    # special case for multi update from browser.
                    param.args.pop()
                    self._getRequestNamesFromBody(param, safe, validate_request_update_args)
                else:
                    self._validateRequestBase(param, safe, validate_request_create_args)
        except InvalidSpecParameterValue as ex:
            raise ex
        except Exception as ex:
            # TODO add proper error message instead of trace back
            msg = traceback.format_exc()
            cherrypy.log("Error: %s" % msg)
            if hasattr(ex, "message"):
                if hasattr(ex.message, '__call__'):
                    msg = ex.message()
                else:
                    msg = str(ex)
            else:
                msg = str(ex)
            raise InvalidSpecParameterValue(msg)

    def initialize_clone(self, request_name):
        requests = self.reqmgr_db_service.getRequestByNames(request_name)
        clone_args = requests.values()[0]
        # overwrite the name and time stamp.
        initialize_request_args(clone_args, self.config, clone=True)
        # timestamp status update

        spec = loadSpecByType(clone_args["RequestType"])
        workload = spec.factoryWorkloadConstruction(clone_args["RequestName"],
                                                    clone_args)
        return (workload, clone_args)
    
    def _maskTaskStepChain(self, masked_dict, req_dict, chain_name, mask_key):
        
        mask_exist = False
        num_loop = req_dict["%sChain" % chain_name]
        for i in range(num_loop):
            if mask_key in req_dict["%s%s" % (chain_name, i+1)]:
                mask_exist = True
                break
        if mask_exist: 
            defaultValue = masked_dict[mask_key]
            masked_dict[mask_key] = []
            # assume mask_key is list if the condition doesn't meet.
            
            for i in range(num_loop):
                chain = req_dict["%s%s" % (chain_name, i+1)]
                if mask_key in chain:
                    chain_key = "%sName" % chain_name
                    masked_dict[mask_key].append({chain_key: chain[chain_key], mask_key: chain[mask_key]})
                else:
                    if isinstance(defaultValue, dict):
                        value = defaultValue.get(chain_key, None)
                    else:
                        value = defaultValue
                    masked_dict[mask_key].append({chain_key: chain[chain_key], mask_key: chain[mask_key]})
        return
                 
    def _mask_result(self, mask, result):
        
        if len(mask) == 1 and mask[0] == "DAS":
            mask = ReqMgrConfigDataCache.getConfig("DAS_RESULT_FILTER")["filter_list"]
        
        if len(mask) > 0:
            masked_result = {}
            for req_name, req_info in result.items():
                masked_result.setdefault(req_name, {})
                for mask_key in mask:
                    masked_result[req_name].update({mask_key: req_info.get(mask_key, None)})
                    if "TaskChain" in req_info:
                        self._maskTaskStepChain(masked_result[req_name], req_info, "Task", mask_key)
                    elif "StepChain" in req_info:
                        self._maskTaskStepChain(masked_result[req_name], req_info,"Step", mask_key)
                        
            return masked_result
        else:
            return result
    
    @restcall(formats=[('text/plain', PrettyJSONFormat()), ('application/json', JSONFormat())])
    def get(self, **kwargs):
        """
        Returns request info depending on the conditions set by kwargs
        Currently defined kwargs are following.
        statusList, requestNames, requestType, prepID, inputDataset, outputDataset, dateRange
        If jobInfo is True, returns jobInfomation about the request as well.

        TODO:
        stuff like this has to masked out from result of this call:
            _attachments: {u'spec': {u'stub': True, u'length': 51712, u'revpos': 2, u'content_type': u'application/json'}}
            _id: maxa_RequestString-OVERRIDE-ME_130621_174227_9225
            _rev: 4-c6ceb2737793aaeac3f1cdf591593da4

        """
        # list of status
        status = kwargs.get("status", [])
        # list of request names
        name = kwargs.get("name", [])
        request_type = kwargs.get("request_type", [])
        prep_id = kwargs.get("prep_id", [])
        inputdataset = kwargs.get("inputdataset", [])
        outputdataset = kwargs.get("outputdataset",[])
        date_range = kwargs.get("date_range", False)
        campaign = kwargs.get("campaign", [])
        workqueue = kwargs.get("workqueue", [])
        team = kwargs.get("team", [])
        mc_pileup = kwargs.get("mc_pileup", [])
        data_pileup = kwargs.get("data_pileup", [])
        requestor = kwargs.get("requestor", [])
        mask = kwargs.get("mask", [])
        detail = kwargs.get("detail", True)
        # set the return format. default format has requset name as a key
        # if is set to one it returns list of dictionary with RequestName field.
        common_dict = int(kwargs.get("common_dict", 0))
        if detail in (False, "false", "False", "FALSE"):
            option = {"include_docs": False}
        else:
            option = {"include_docs": True}
        # eventhing should be stale view. this only needs for test
        _nostale = kwargs.get("_nostale", False)
        if _nostale:
            self.reqmgr_db_service._setNoStale()

        request_info = []
        
        if len(status) == 1 and status[0] == "ACTIVE":
            status = ACTIVE_STATUS
        if status and not team and not request_type and not requestor:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("bystatus", option, status))
        if status and team:
            query_keys = [[t, s] for t in team for s in status] 
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView("byteamandstatus", option, query_keys))
        if status and request_type:
            query_keys = [[s, rt] for rt in request_type for s in status]
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("requestsbystatusandtype", 
                                                                             option, query_keys))
        if status and requestor:
            query_keys = [[s, r] for r in requestor for s in status] 
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView("bystatusandrequestor", option, query_keys))        
            
        if name:
            request_info.append(self.reqmgr_db_service.getRequestByNames(name))
        if prep_id:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("byprepid", option, prep_id))
        if inputdataset:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("byinputdataset", option, inputdataset))
        if outputdataset:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("byoutputdataset", option, outputdataset))
        if date_range:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("bydate", option, date_range))
        if campaign:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("bycampaign", option, campaign))
        if workqueue:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("byworkqueue", option, workqueue))
        if mc_pileup:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("bymcpileup", option, mc_pileup))
        if data_pileup:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("bydatapileup", option, data_pileup))
        # get interaction of the request
        result = self._intersection_of_request_info(request_info)
        
        if len(result) == 0:
            return []
        
        result = self._mask_result(mask, result)
        # If detail is set to False return just list of request name
        if not option["include_docs"]:
            return result.keys()
        
        if common_dict == 1:
            response_list = result.values()
        else:
            response_list = [result] 
        return rows(response_list)

    def _intersection_of_request_info(self, request_info):
        requests = {}
        if len(request_info) < 1:
            return requests

        request_key_set = set(request_info[0].keys())
        for info in request_info:
            request_key_set = set(request_key_set) & set(info.keys())
        # TODO: need to assume some data maight not contains include docs
        for request_name in request_key_set:
            requests[request_name] = request_info[0][request_name]
        return requests

        # TODO move this out of this class

    def filterCouchInfo(self, couchInfo):
        for key in ['_rev', '_attachments']:
            if key in couchInfo:
                del couchInfo[key]

    def _combine_request(self, request_info, requestAgentUrl, cache):
        keys = {}
        requestAgentUrlList = []
        for row in requestAgentUrl["rows"]:
            request = row["key"][0]
            if not keys[request]:
                keys[request] = []
            keys[request].append(row["key"][1])

        for request in request_info:
            for agentUrl in keys[request]:
                requestAgentUrlList.append([request, agentUrl])

        return requestAgentUrlList

    def _retrieveResubmissionChildren(self, request_name):

        result = self.reqmgr_db.loadView('ReqMgr', 'childresubmissionrequests', keys=[request_name])['rows']
        childrenRequestNames = []
        for child in result:
            childrenRequestNames.append(child['id'])
            childrenRequestNames.extend(self._retrieveResubmissionChildren(child['id']))
        return childrenRequestNames

    def _handleNoStatusUpdate(self, workload, request_args):
        """
        only few values can be updated without state transition involved
        currently 'RequestPriority' and 'total_jobs', 'input_lumis', 'input_events', 'input_num_files'
        """
        if 'RequestPriority' in request_args:
            # must update three places: GQ elements, workload_cache and workload spec
            self.gq_service.updatePriority(workload.name(), request_args['RequestPriority'])
            report = self.reqmgr_db_service.updateRequestProperty(workload.name(), request_args)
            workload.setPriority(request_args['RequestPriority'])
            workload.saveCouchUrl(workload.specUrl())
        elif "total_jobs" in request_args:
            # only GQ update this stats
            # request_args should contain only 4 keys 'total_jobs', 'input_lumis', 'input_events', 'input_num_files'}
            report = self.reqmgr_db_service.updateRequestStats(workload.name(), request_args)
        else:
            raise InvalidSpecParameterValue("can't update value without state transition: %s" % request_args)
        
        return report

    def _handleAssignmentApprovedTransition(self, workload, request_args, dn):
        report = self.reqmgr_db_service.updateRequestProperty(workload.name(), request_args, dn)
        return report
        
    def _handleAssignmentStateTransition(self, workload, request_args, dn):
        
        req_status = request_args["RequestStatus"]
        if req_status == "assigned" and not request_args.get('Team', '').strip():
            raise InvalidSpecParameterValue("Team must be set during workflow assignment: %s" % request_args)
            
        if ('SoftTimeout' in request_args) and ('GracePeriod' in request_args):
            request_args['SoftTimeout'] = int(request_args['SoftTimeout'])
            #TODO: not sure why GracePeriod when passed from web ingerface but convert here
            request_args['GracePeriod'] = int(request_args['GracePeriod'])
            request_args['HardTimeout'] = request_args['SoftTimeout'] + request_args['GracePeriod']
        
        #Only allow extra value update for assigned status
        cherrypy.log("INFO: Assign request, input args: %s ..." % request_args)
        try:
            workload.updateArguments(request_args)
        except Exception as ex:
            msg = traceback.format_exc()
            cherrypy.log("Error for request args %s: %s" % (request_args, msg))
            raise InvalidSpecParameterValue(str(ex))
        
        # validate/update OutputDatasets after ProcessingString and AcquisionEra is updated
        request_args['OutputDatasets'] = workload.listOutputDatasets()
        validateOutputDatasets(request_args['OutputDatasets'], workload.getDbsUrl())

        # legacy update schema to support ops script
        loadRequestSchema(workload, request_args)

        report = self.reqmgr_db_service.updateRequestProperty(workload.name(), request_args, dn)
        workload.saveCouch(self.config.couch_host, self.config.couch_reqmgr_db)
        return report

    def _handleCascadeUpdate(self, workload, request_args, dn):
        
        """
        only closed-out and announced has this option
        """
        req_status = request_args["RequestStatus"]
        # check whehter it is casecade option
        if request_args["cascade"]:
            cascade_list = self._retrieveResubmissionChildren(workload.name())
            for req_name in cascade_list:
                self.reqmgr_db_service.updateRequestStatus(req_name, req_status, dn)
        # update original workflow status
        report = self.reqmgr_db_service.updateRequestStatus(workload.name(), req_status, dn)
        return report
    
    def _handleOnlyStateTransition(self, workload, req_status, dn):
        """
        It handles only the state transition. Special handling needed if a
        request is aborted or force completed.
        """
        if req_status in ["aborted", "force-complete"]:
            # cancel the workflow first
            self.gq_service.cancelWorkflow(workload.name())
        #update the request status in couchdb   
        report = self.reqmgr_db_service.updateRequestStatus(workload.name(), req_status, dn)
        return report
    
    def _updateRequest(self, workload, request_args):
        dn = cherrypy.request.user.get("dn", "unknown")

        if workload is None:
            (workload, request_args) = self.initialize_clone(request_args["OriginalRequestName"])
            return self.post([workload, request_args])

        if "RequestStatus" not in request_args:
            report = self._handleNoStatusUpdate(workload, request_args)
            
        else:
            req_status = request_args["RequestStatus"]
            # assignment-approved only allow Priority update
            if len(request_args) == 2 and req_status == "assignment-approved":
                report = self._handleAssignmentApprovedTransition(workload, request_args, dn)
            elif len(request_args) > 1 and req_status == "assigned":
                report = self._handleAssignmentStateTransition(workload, request_args, dn)
            elif len(request_args) == 2 and req_status in ["closed-out", "announced"] and \
                "cascade" in request_args:
                report = self._handleCascadeUpdate(workload, request_args, dn)
            elif len(request_args) == 1:
                # If status chnage is to aborted, force-complete, rejected, ignore other argument
                report = self._handleOnlyStateTransition(workload, req_status, dn)
            else:
                raise InvalidSpecParameterValue(
                    "can't update value except transition to assigned status: %s" % request_args)

        if report == 'OK':
            return {workload.name(): "OK"}
        else:
            return {workload.name(): "ERROR"}

    @restcall(formats=[('application/json', JSONFormat())])
    def put(self, workload_pair_list):
        """workloadPairList is a list of tuple containing (workload, requeat_args)"""
        report = []
        for workload, request_args in workload_pair_list:
            result = self._updateRequest(workload, request_args)
            report.append(result)
        return report

    @restcall(formats=[('application/json', JSONFormat())])
    def delete(self, request_name):
        cherrypy.log("INFO: Deleting request document '%s' ..." % request_name)
        try:
            self.reqmgr_db.delete_doc(request_name)
        except CouchError as ex:
            msg = "ERROR: Delete failed."
            cherrypy.log(msg + " Reason: %s" % ex)
            raise cherrypy.HTTPError(404, msg)
            # TODO
        # delete should also happen on WMStats
        cherrypy.log("INFO: Delete '%s' done." % request_name)

    def _update_additional_request_args(self, workload, request_args):
        """
        add to request_args properties which is not initially set from user.
        This data will put in to couchdb.
        Update request_args here if additional information need to be put in couchdb
        """
        request_args['RequestWorkflow'] = sanitizeURL("%s/%s/%s/spec" % (request_args["CouchURL"],
                                                                         request_args["CouchWorkloadDBName"],
                                                                         workload.name()))['url']

        # Add the output datasets if necessary
        # for some bizarre reason OutpuDatasets is list of lists
        request_args['OutputDatasets'] = workload.listOutputDatasets()
        
        #Add initial priority only for the creation of the request
        request_args['InitialPriority'] = request_args["RequestPriority"]
         
        # TODO: remove this after reqmgr2 replice reqmgr (reqmgr2Only)
        request_args['ReqMgr2Only'] = True
        return

    @restcall(formats=[('application/json', JSONFormat())])
    def post(self, workload_pair_list, multi_update_flag=False, multi_names_flag=False):
        """
        Create and update couchDB with  a new request.
        request argument is passed from validation
        (validation convert cherrypy.request.body data to argument)

        TODO:
        this method will have some parts factored out so that e.g. clone call
        can share functionality.

        NOTES:
        1) do not strip spaces, #4705 will fails upon injection with spaces;
            currently the chain relies on a number of things coming in #4705
        2) reqInputArgs = Utilities.unidecode(json.loads(body))
            (from ReqMgrRESTModel.putRequest)
        """

        # storing the request document into Couch

        if multi_update_flag:
            return self.put(workload_pair_list)
        if multi_names_flag:
            return self.get(name=workload_pair_list)

        out = []
        for workload, request_args in workload_pair_list:
            self._update_additional_request_args(workload, request_args)
            
            # legacy update schema to support ops script
            loadRequestSchema(workload, request_args)
            
            cherrypy.log("INFO: Create request, input args: %s ..." % request_args)
            workload.saveCouch(request_args["CouchURL"], request_args["CouchWorkloadDBName"],
                               metadata=request_args)
            out.append({'request': workload.name()})
        return out
예제 #6
0
class Request(RESTEntity):
    def __init__(self, app, api, config, mount):
        # main CouchDB database where requests/workloads are stored
        RESTEntity.__init__(self, app, api, config, mount)
        self.reqmgr_db = api.db_handler.get_db(config.couch_reqmgr_db)
        self.reqmgr_db_service = RequestDBWriter(self.reqmgr_db, couchapp="ReqMgr")
        # this need for the post validtiaon
        self.gq_service = WorkQueue(config.couch_host, config.couch_workqueue_db)

    def _validateGET(self, param, safe):
        # TODO: need proper validation but for now pass everything
        args_length = len(param.args)
        if args_length == 1:
            safe.kwargs["name"] = param.args[0]
            param.args.pop()
            return

        no_multi_key = ["detail", "_nostale", "date_range", "common_dict"]
        for key, value in param.kwargs.items():
            # convert string to list
            if key not in no_multi_key and isinstance(value, basestring):
                param.kwargs[key] = [value]

        detail = param.kwargs.get('detail', True)
        if detail in (False, "false", "False", "FALSE"):
            detail = False

        if "status" in param.kwargs and detail:
            for status in param.kwargs["status"]:
                if status.endswith("-archived"):
                    raise InvalidSpecParameterValue(
                        """Can't retrieve bulk archived status requests with detail option True,
                           set detail=false or use other search arguments""")

        for prop in param.kwargs.keys():
            safe.kwargs[prop] = param.kwargs.pop(prop)
        return

    def _validateRequestBase(self, param, safe, valFunc, requestName=None):
        data = cherrypy.request.body.read()
        if data:
            request_args = json.loads(data)
        else:
            request_args = {}
        cherrypy.log('Updating request "%s" with these user-provided args: %s' % (requestName, request_args))

        # In case key args are also passed and request body also exists.
        # If the request.body is dictionary update the key args value as well
        if isinstance(request_args, dict):
            for prop in param.kwargs.keys():
                request_args[prop] = param.kwargs.pop(prop)

            if requestName:
                request_args["RequestName"] = requestName
            request_args = [request_args]

        safe.kwargs['workload_pair_list'] = []
        for args in request_args:
            workload, r_args = valFunc(args, self.config, self.reqmgr_db_service, param)
            safe.kwargs['workload_pair_list'].append((workload, r_args))

    def _get_request_names(self, ids):
        "Extract request names from given documents"
        # cherrypy.log("request names %s" % ids)
        doc = {}
        if isinstance(ids, list):
            for rid in ids:
                doc[rid] = 'on'
        elif isinstance(ids, basestring):
            doc[ids] = 'on'

        docs = []
        for key in doc.keys():
            if key.startswith('request'):
                rid = key.split('request-')[-1]
                if rid != 'all':
                    docs.append(rid)
                del doc[key]
        return docs

    def _getMultiRequestArgs(self, multiRequestForm):
        request_args = {}
        for prop in multiRequestForm:
            if prop == "ids":
                request_names = self._get_request_names(multiRequestForm["ids"])
            elif prop == "new_status":
                request_args["RequestStatus"] = multiRequestForm[prop]
            # remove this
            # elif prop in ["CustodialSites", "AutoApproveSubscriptionSites"]:
            #    request_args[prop] = [multiRequestForm[prop]]
            else:
                request_args[prop] = multiRequestForm[prop]
        return request_names, request_args

    def _validateMultiRequests(self, param, safe, valFunc):

        data = cherrypy.request.body.read()
        if data:
            request_names, request_args = self._getMultiRequestArgs(json.loads(data))
        else:
            # actually this is error case
            # cherrypy.log(str(param.kwargs))
            request_names, request_args = self._getMultiRequestArgs(param.kwargs)

            for prop in request_args:
                if prop == "RequestStatus":
                    del param.kwargs["new_status"]
                else:
                    del param.kwargs[prop]

            del param.kwargs["ids"]

        safe.kwargs['workload_pair_list'] = []

        for request_name in request_names:
            request_args["RequestName"] = request_name
            workload, r_args = valFunc(request_args, self.config, self.reqmgr_db_service, param)
            safe.kwargs['workload_pair_list'].append((workload, r_args))

        safe.kwargs["multi_update_flag"] = True

    def _getRequestNamesFromBody(self, safe):

        request_names = json.loads(cherrypy.request.body.read())
        safe.kwargs['workload_pair_list'] = request_names
        safe.kwargs["multi_names_flag"] = True

    def validate(self, apiobj, method, api, param, safe):
        # to make validate successful
        # move the validated argument to safe
        # make param empty
        # other wise raise the error
        try:
            if method == 'GET':
                self._validateGET(param, safe)

            elif method == 'PUT':
                args_length = len(param.args)

                if args_length == 1:
                    requestName = param.args[0]
                    param.args.pop()
                else:
                    requestName = None
                self._validateRequestBase(param, safe, validate_request_update_args, requestName)

            elif method == 'POST':
                args_length = len(param.args)
                if args_length == 2 and param.args[0] == "clone":
                    # handles clone workflow.- don't validtate args here
                    param.kwargs['OriginalRequestName'] = param.args[1]
                    param.args.pop()
                    param.args.pop()
                    self._validateRequestBase(param, safe, validate_clone_create_args)
                elif args_length == 1 and param.args[0] == "multi_update":
                    # special case for multi update from browser.
                    param.args.pop()
                    self._validateMultiRequests(param, safe, validate_request_update_args)
                elif args_length == 1 and param.args[0] == "bynames":
                    # special case for multi update from browser.
                    param.args.pop()
                    self._getRequestNamesFromBody(safe)
                else:
                    self._validateRequestBase(param, safe, validate_request_create_args)
        except InvalidSpecParameterValue as ex:
            raise ex
        except Exception as ex:
            # TODO add proper error message instead of trace back
            msg = traceback.format_exc()
            cherrypy.log("Error: %s" % msg)
            if hasattr(ex, "message"):
                if hasattr(ex.message, '__call__'):
                    msg = ex.message()
                else:
                    msg = str(ex)
            else:
                msg = str(ex)
            raise InvalidSpecParameterValue(msg)

    def _maskResult(self, mask, result):
        """
        If a mask of parameters was provided in the query string, then filter
        the request key/values accordingly.
        :param mask: a list of strings (keys of the request dictionary)
        :param result: a dict key'ed by the request name, with the whole
            request dictionary as a value
        :return: updates the result object in place and returns it (dict)
        """

        if len(mask) == 1 and mask[0] == "DAS":
            mask = ReqMgrConfigDataCache.getConfig("DAS_RESULT_FILTER")["filter_list"]

        if len(mask) > 0:
            maskedResult = {}
            for reqName, reqDict in result.items():
                reqInfo = RequestInfo(reqDict)
                maskedResult.setdefault(reqName, {})
                for maskKey in mask:
                    foundValue = reqInfo.get(maskKey, None)
                    maskedResult[reqName].update({maskKey: foundValue})

            return maskedResult
        else:
            return result

    @restcall(formats=[('text/plain', PrettyJSONFormat()), ('application/json', JSONFormat())])
    def get(self, **kwargs):
        """
        Returns request info depending on the conditions set by kwargs
        Currently defined kwargs are following.
        statusList, requestNames, requestType, prepID, inputDataset, outputDataset, dateRange
        If jobInfo is True, returns jobInfomation about the request as well.

        TODO:
        stuff like this has to masked out from result of this call:
            _attachments: {u'spec': {u'stub': True, u'length': 51712, u'revpos': 2, u'content_type': u'application/json'}}
            _id: maxa_RequestString-OVERRIDE-ME_130621_174227_9225
            _rev: 4-c6ceb2737793aaeac3f1cdf591593da4

        """
        ### pop arguments unrelated to the user query
        mask = kwargs.pop("mask", [])
        detail = kwargs.pop("detail", True)
        common_dict = int(kwargs.pop("common_dict", 0))  # modifies the response format
        nostale = kwargs.pop("_nostale", False)

        ### these are the query strings supported by this API
        status = kwargs.get("status", [])
        name = kwargs.get("name", [])
        request_type = kwargs.get("request_type", [])
        prep_id = kwargs.get("prep_id", [])
        inputdataset = kwargs.get("inputdataset", [])
        outputdataset = kwargs.get("outputdataset", [])
        date_range = kwargs.get("date_range", False)
        campaign = kwargs.get("campaign", [])
        team = kwargs.get("team", [])
        mc_pileup = kwargs.get("mc_pileup", [])
        data_pileup = kwargs.get("data_pileup", [])
        requestor = kwargs.get("requestor", [])

        # further tweaks to the couch queries
        if len(status) == 1 and status[0] == "ACTIVE":
            status = ACTIVE_STATUS
        if detail in (False, "false", "False", "FALSE"):
            option = {"include_docs": False}
        else:
            option = {"include_docs": True}
        # everything should be stale view. this only needs for test
        if nostale:
            self.reqmgr_db_service._setNoStale()

        request_info = []
        queryMatched = False  # flag to avoid calling the same view twice
        if len(kwargs) == 2:
            if status and team:
                query_keys = [[t, s] for t in team for s in status]
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("byteamandstatus",
                                                                                 option, query_keys))
                queryMatched = True
            elif status and request_type:
                query_keys = [[s, rt] for rt in request_type for s in status]
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("requestsbystatusandtype",
                                                                                 option, query_keys))
                queryMatched = True
            elif status and requestor:
                query_keys = [[s, r] for r in requestor for s in status]
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("bystatusandrequestor",
                                                                                 option, query_keys))
                queryMatched = True
        elif len(kwargs) == 3:
            if status and request_type and requestor:
                query_keys = [[s, rt, req] for s in status for rt in request_type for req in requestor]
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("bystatusandtypeandrequestor",
                                                                                 option, query_keys))
                queryMatched = True

        # anything else that hasn't matched the query combination above
        if not queryMatched:
            if status:
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("bystatus",
                                                                                 option, status))
            if name:
                request_info.append(self.reqmgr_db_service.getRequestByNames(name))
            if request_type:
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("bytype",
                                                                                 option, request_type))
            if prep_id:
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("byprepid",
                                                                                 option, prep_id))
            if inputdataset:
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("byinputdataset",
                                                                                 option, inputdataset))
            if outputdataset:
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("byoutputdataset",
                                                                                 option, outputdataset))
            if date_range:
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("bydate",
                                                                                 option, date_range))
            if campaign:
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("bycampaign",
                                                                                 option, campaign))
            if mc_pileup:
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("bymcpileup",
                                                                                 option, mc_pileup))
            if data_pileup:
                request_info.append(self.reqmgr_db_service.getRequestByCouchView("bydatapileup",
                                                                                 option, data_pileup))

        # get the intersection of the request data
        result = self._intersection_of_request_info(request_info)

        if not result:
            return []

        result = self._maskResult(mask, result)

        if not option["include_docs"]:
            return result.keys()

        # set the return format. default format has request name as a key
        # if is set to one it returns list of dictionary with RequestName field.
        if common_dict == 1:
            response_list = result.values()
        else:
            response_list = [result]
        return rows(response_list)

    def _intersection_of_request_info(self, request_info):
        requests = {}
        if len(request_info) < 1:
            return requests

        request_key_set = set(request_info[0].keys())
        for info in request_info:
            request_key_set = set(request_key_set) & set(info.keys())
        # TODO: need to assume some data maight not contains include docs
        for request_name in request_key_set:
            requests[request_name] = request_info[0][request_name]
        return requests

    def _retrieveResubmissionChildren(self, request_name):
        """
        Fetches all the direct children requests from CouchDB.
        Response from CouchDB view is in the following format:
            [{u'id': u'child_workflow_name',
              u'key': u'parent_workflow_name',
              u'value': 'current_request_status'}]
        :param request_name: string with the parent workflow name
        :return: a list of dictionaries with the parent and child workflow and the child status
        """
        result = self.reqmgr_db.loadView('ReqMgr', 'childresubmissionrequests', keys=[request_name])['rows']
        childrenRequestAndStatus = []
        for childInfo in result:
            childrenRequestAndStatus.append(childInfo)
            childrenRequestAndStatus.extend(self._retrieveResubmissionChildren(childInfo['id']))
        return childrenRequestAndStatus

    def _handleNoStatusUpdate(self, workload, request_args, dn):
        """
        For no-status update, we only support the following parameters:
         1. RequestPriority
         2. Global workqueue statistics, while acquiring a workflow
        """
        if 'RequestPriority' in request_args:
            # Yes, we completely ignore any other arguments posted by the user (web UI case)
            request_args = {'RequestPriority': request_args['RequestPriority']}
            validate_request_priority(request_args)
            # must update three places: GQ elements, workload_cache and workload spec
            self.gq_service.updatePriority(workload.name(), request_args['RequestPriority'])
            report = self.reqmgr_db_service.updateRequestProperty(workload.name(), request_args, dn)
            workload.setPriority(request_args['RequestPriority'])
            workload.saveCouchUrl(workload.specUrl())
            cherrypy.log('Updated priority of "{}" to: {}'.format(workload.name(), request_args['RequestPriority']))
        elif workqueue_stat_validation(request_args):
            report = self.reqmgr_db_service.updateRequestStats(workload.name(), request_args)
            cherrypy.log('Updated workqueue statistics of "{}", with:  {}'.format(workload.name(), request_args))
        else:
            msg = "There are invalid arguments for no-status update: %s" % request_args
            raise InvalidSpecParameterValue(msg)

        return report

    def _handleAssignmentApprovedTransition(self, workload, request_args, dn):
        """
        Allows only two arguments: RequestStatus and RequestPriority
        """
        if "RequestPriority" not in request_args:
            msg = "There are invalid arguments for assignment-approved transition: %s" % request_args
            raise InvalidSpecParameterValue(msg)

        validate_request_priority(request_args)
        report = self.reqmgr_db_service.updateRequestProperty(workload.name(), request_args, dn)
        return report

    def _handleAssignmentStateTransition(self, workload, request_args, dn):
        if ('SoftTimeout' in request_args) and ('GracePeriod' in request_args):
            request_args['HardTimeout'] = request_args['SoftTimeout'] + request_args['GracePeriod']

        # Only allow extra value update for assigned status
        cherrypy.log("Assign request %s, input args: %s ..." % (workload.name(), request_args))
        try:
            workload.updateArguments(request_args)
        except Exception as ex:
            msg = traceback.format_exc()
            cherrypy.log("Error for request args %s: %s" % (request_args, msg))
            raise InvalidSpecParameterValue(str(ex))

        # validate/update OutputDatasets after ProcessingString and AcquisionEra is updated
        request_args['OutputDatasets'] = workload.listOutputDatasets()
        validateOutputDatasets(request_args['OutputDatasets'], workload.getDbsUrl())

        # by default, it contains all unmerged LFNs (used by sites to protect the unmerged area)
        request_args['OutputModulesLFNBases'] = workload.listAllOutputModulesLFNBases()

        # Add parentage relation for step chain, task chain:
        chainMap = workload.getChainParentageSimpleMapping()
        if chainMap:
            request_args["ChainParentageMap"] = chainMap

        # save the spec first before update the reqmgr request status to prevent race condition
        # when workflow is pulled to GQ before site white list is updated
        workload.saveCouch(self.config.couch_host, self.config.couch_reqmgr_db)
        report = self.reqmgr_db_service.updateRequestProperty(workload.name(), request_args, dn)

        return report

    def _handleOnlyStateTransition(self, workload, request_args, dn):
        """
        It handles only the state transition.
        Special handling needed if a request is aborted or force completed.
        """
        # if we got here, then the main workflow has been already validated
        # and the status transition is allowed
        req_status = request_args["RequestStatus"]
        cascade = request_args.get("cascade", False)

        if req_status in ["aborted", "force-complete"]:
            # cancel the workflow first
            self.gq_service.cancelWorkflow(workload.name())

        # cascade option is only supported for these 3 statuses. If set, we need to
        # find all the children requests and perform the same status transition
        if req_status in ["rejected", "closed-out", "announced"] and cascade:
            childrenNamesAndStatus = self._retrieveResubmissionChildren(workload.name())
            msg = "Workflow {} has {} ".format(workload.name(), len(childrenNamesAndStatus))
            msg += "children workflows to have a status transition to: {}".format(req_status)
            cherrypy.log(msg)
            for childInfo in childrenNamesAndStatus:
                if check_allowed_transition(childInfo['value'], req_status):
                    cherrypy.log('Updating request status for {} to {}.'.format(childInfo['id'], req_status))
                    self.reqmgr_db_service.updateRequestStatus(childInfo['id'], req_status, dn)
                else:
                    msg = "Status transition from {} to {} ".format(childInfo['value'], req_status)
                    msg += "not allowed for workflow: {}, skipping it!".format(childInfo['id'])
                    cherrypy.log(msg)
        # then update the original/parent workflow status in couchdb
        cherrypy.log('Updating request status for {} to {}.'.format(workload.name(), req_status))
        report = self.reqmgr_db_service.updateRequestStatus(workload.name(), req_status, dn)
        return report

    def _updateRequest(self, workload, request_args):
        dn = get_user_info().get("dn", "unknown")

        if "RequestStatus" not in request_args:
            report = self._handleNoStatusUpdate(workload, request_args, dn)
        else:
            req_status = request_args["RequestStatus"]
            if len(request_args) == 2 and req_status == "assignment-approved":
                report = self._handleAssignmentApprovedTransition(workload, request_args, dn)
            elif len(request_args) > 1 and req_status == "assigned":
                report = self._handleAssignmentStateTransition(workload, request_args, dn)
            elif len(request_args) == 1 or (len(request_args) == 2 and "cascade" in request_args):
                report = self._handleOnlyStateTransition(workload, request_args, dn)
            else:
                msg = "There are invalid arguments with this status transition: %s" % request_args
                raise InvalidSpecParameterValue(msg)

        if report == 'OK':
            return {workload.name(): "OK"}
        return {workload.name(): "ERROR"}

    @restcall(formats=[('application/json', JSONFormat())])
    def put(self, workload_pair_list):
        """workloadPairList is a list of tuple containing (workload, request_args)"""
        report = []
        for workload, request_args in workload_pair_list:
            result = self._updateRequest(workload, request_args)
            report.append(result)
        return report

    @restcall(formats=[('application/json', JSONFormat())])
    def delete(self, request_name):
        cherrypy.log("INFO: Deleting request document '%s' ..." % request_name)
        try:
            self.reqmgr_db.delete_doc(request_name)
        except CouchError as ex:
            msg = "ERROR: Delete failed."
            cherrypy.log(msg + " Reason: %s" % ex)
            raise cherrypy.HTTPError(404, msg)
            # TODO
        # delete should also happen on WMStats
        cherrypy.log("INFO: Delete '%s' done." % request_name)

    def _update_additional_request_args(self, workload, request_args):
        """
        add to request_args properties which is not initially set from user.
        This data will put in to couchdb.
        Update request_args here if additional information need to be put in couchdb
        """
        request_args['RequestWorkflow'] = sanitizeURL("%s/%s/%s/spec" % (request_args["CouchURL"],
                                                                         request_args["CouchWorkloadDBName"],
                                                                         workload.name()))['url']

        # Add the output datasets if necessary
        # for some bizarre reason OutpuDatasets is list of lists
        request_args['OutputDatasets'] = workload.listOutputDatasets()

        # Add initial priority only for the creation of the request
        request_args['InitialPriority'] = request_args["RequestPriority"]

        return

    @restcall(formats=[('application/json', JSONFormat())])
    def post(self, workload_pair_list, multi_update_flag=False, multi_names_flag=False):
        """
        Create and update couchDB with  a new request.
        request argument is passed from validation
        (validation convert cherrypy.request.body data to argument)

        TODO:
        this method will have some parts factored out so that e.g. clone call
        can share functionality.

        NOTES:
        1) do not strip spaces, #4705 will fails upon injection with spaces;
            currently the chain relies on a number of things coming in #4705
        2) reqInputArgs = Utilities.unidecode(json.loads(body))
            (from ReqMgrRESTModel.putRequest)
        """

        # storing the request document into Couch

        if multi_update_flag:
            return self.put(workload_pair_list)
        if multi_names_flag:
            return self.get(name=workload_pair_list)

        out = []
        for workload, request_args in workload_pair_list:
            self._update_additional_request_args(workload, request_args)

            cherrypy.log("Create request, input args: %s ..." % request_args)
            try:
                workload.saveCouch(request_args["CouchURL"], request_args["CouchWorkloadDBName"],
                                   metadata=request_args)
                out.append({'request': workload.name()})
            except Exception as ex:
                # then it failed to add the spec file as attachment
                # we better delete the original request to avoid confusion in wmstats
                cherrypy.log("Error saving request spec to couch: %s " % str(ex))
                self.delete(request_args['RequestName'])

        return out
예제 #7
0
class JobUpdaterPoller(BaseWorkerThread):
    """
    _JobUpdaterPoller_

    Poller class for the JobUpdater
    """

    def __init__(self, config):
        """
        __init__
        """
        BaseWorkerThread.__init__(self)
        self.config = config

        self.bossAir = BossAirAPI(config=self.config)
        self.reqmgr2 = ReqMgr(self.config.JobUpdater.reqMgr2Url)
        self.workqueue = WorkQueue(self.config.WorkQueueManager.couchurl,
                                   self.config.WorkQueueManager.dbname)

        myThread = threading.currentThread()

        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)

        self.listWorkflowsDAO = self.daoFactory(classname="Workflow.ListForJobUpdater")
        self.updateWorkflowPrioDAO = self.daoFactory(classname="Workflow.UpdatePriority")
        self.executingJobsDAO = self.daoFactory(classname="Jobs.GetNumberOfJobsForWorkflowTaskStatus")

    def setup(self, parameters=None):
        """
        _setup_
        """
        pass

    def terminate(self, parameters=None):
        """
        _terminate_

        Terminate gracefully.
        """
        pass

    def algorithm(self, parameters=None):
        """
        _algorithm_
        """
        try:
            logging.info("Synchronizing priorities with ReqMgr...")
            self.synchronizeJobPriority()
            logging.info("Priorities were synchronized, wait until the next cycle")
        except CouchConnectionError as ex:
            msg = "Caught CouchConnectionError exception in JobUpdater\n"
            msg += "transactions postponed until the next polling cycle\n"
            msg += str(ex)
            logging.exception(msg)
        except CouchConflictError as ex:
            msg = "Caught CouchConflictError exception in JobUpdater\n"
            msg += "transactions postponed until the next polling cycle\n"
            msg += str(ex)
            logging.exception(msg)
        except Exception as ex:
            if 'Connection refused' in str(ex):
                logging.warn("Failed to sync priorities. Trying in the next cycle")
            else:
                msg = "Caught unexpected exception in JobUpdater: %s\n" % str(ex)
                logging.exception(msg)
                raise JobUpdaterException(msg)

    def synchronizeJobPriority(self):
        """
        _synchronizeJobPriority_

        Check WMBS and WorkQueue for active workflows and compare with the
        ReqMgr for priority changes. If a priority change occurs
        then update the job priority in the batch system and
        the elements in the local queue that have not been injected yet.
        """
        # Update the priority of workflows that are not in WMBS and just in local queue
        priorityCache = {}
        workflowsToUpdate = {}
        workflowsToCheck = [x for x in self.workqueue.getAvailableWorkflows()]
        for workflow, priority in workflowsToCheck:
            if workflow not in priorityCache:
                try:
                    priorityCache[workflow] = self.reqmgr2.getRequestByNames(workflow)[workflow]['RequestPriority']
                except Exception as ex:
                    logging.error("Couldn't retrieve the priority of request %s", workflow)
                    logging.error("Error: %s", str(ex))
                    continue
            if priority != priorityCache[workflow]:
                workflowsToUpdate[workflow] = priorityCache[workflow]
        logging.info("Found %d workflows to update in workqueue", len(workflowsToUpdate))
        for workflow in workflowsToUpdate:
            self.workqueue.updatePriority(workflow, workflowsToUpdate[workflow])

        # Check the workflows in WMBS
        priorityCache = {}
        workflowsToUpdateWMBS = {}
        workflowsToCheck = self.listWorkflowsDAO.execute()
        for workflowEntry in workflowsToCheck:
            workflow = workflowEntry['name']
            if workflow not in priorityCache:
                try:
                    priorityCache[workflow] = self.reqmgr2.getRequestByNames(workflow)[workflow]['RequestPriority']
                except Exception as ex:
                    logging.error("Couldn't retrieve the priority of request %s", workflow)
                    logging.error("Error: %s", str(ex))
                    continue
            requestPriority = int(priorityCache[workflow])
            if requestPriority != int(workflowEntry['workflow_priority']):
                # Update the workqueue priority for the Available elements
                self.workqueue.updatePriority(workflow, requestPriority)
                # Check if there are executing jobs for this particular task
                if self.executingJobsDAO.execute(workflow, workflowEntry['task']) > 0:
                    self.bossAir.updateJobInformation(workflow, workflowEntry['task'],
                                                      requestPriority=priorityCache[workflow],
                                                      taskPriority=workflowEntry['task_priority'])
                workflowsToUpdateWMBS[workflow] = priorityCache[workflow]
        if workflowsToUpdateWMBS:
            logging.info("Updating %d workflows in WMBS.", len(workflowsToUpdateWMBS))
            self.updateWorkflowPrioDAO.execute(workflowsToUpdateWMBS)
예제 #8
0
class JobUpdaterPoller(BaseWorkerThread):
    """
    _JobUpdaterPoller_

    Poller class for the JobUpdater
    """
    def __init__(self, config):
        """
        __init__
        """
        BaseWorkerThread.__init__(self)
        self.config = config

        self.bossAir = BossAirAPI(config=self.config)
        self.reqmgr2 = ReqMgr(self.config.General.ReqMgr2ServiceURL)
        self.workqueue = WorkQueue(self.config.WorkQueueManager.couchurl,
                                   self.config.WorkQueueManager.dbname)

        myThread = threading.currentThread()

        self.daoFactory = DAOFactory(package="WMCore.WMBS",
                                     logger=myThread.logger,
                                     dbinterface=myThread.dbi)

        self.listWorkflowsDAO = self.daoFactory(
            classname="Workflow.ListForJobUpdater")
        self.updateWorkflowPrioDAO = self.daoFactory(
            classname="Workflow.UpdatePriority")
        self.executingJobsDAO = self.daoFactory(
            classname="Jobs.GetNumberOfJobsForWorkflowTaskStatus")

    def setup(self, parameters=None):
        """
        _setup_
        """
        pass

    def terminate(self, parameters=None):
        """
        _terminate_

        Terminate gracefully.
        """
        pass

    @timeFunction
    def algorithm(self, parameters=None):
        """
        _algorithm_
        """
        try:
            logging.info("Synchronizing priorities with ReqMgr...")
            self.synchronizeJobPriority()
            logging.info(
                "Priorities were synchronized, wait until the next cycle")
        except CouchConnectionError as ex:
            msg = "Caught CouchConnectionError exception in JobUpdater\n"
            msg += "transactions postponed until the next polling cycle\n"
            msg += str(ex)
            logging.exception(msg)
        except CouchConflictError as ex:
            msg = "Caught CouchConflictError exception in JobUpdater\n"
            msg += "transactions postponed until the next polling cycle\n"
            msg += str(ex)
            logging.exception(msg)
        except Exception as ex:
            if 'Connection refused' in str(ex):
                logging.warn(
                    "Failed to sync priorities. Trying in the next cycle")
            else:
                msg = "Caught unexpected exception in JobUpdater: %s\n" % str(
                    ex)
                logging.exception(msg)
                raise JobUpdaterException(msg)

    def synchronizeJobPriority(self):
        """
        _synchronizeJobPriority_

        Check WMBS and WorkQueue for active workflows and compare with the
        ReqMgr for priority changes. If a priority change occurs
        then update the job priority in the batch system and
        the elements in the local queue that have not been injected yet.
        """
        # Update the priority of workflows that are not in WMBS and just in local queue
        priorityCache = {}
        workflowsToUpdate = {}
        workflowsToCheck = [x for x in self.workqueue.getAvailableWorkflows()]
        for workflow, priority in workflowsToCheck:
            if workflow not in priorityCache:
                try:
                    result = self.reqmgr2.getRequestByNames(workflow)[0]
                    priorityCache[workflow] = result[workflow][
                        'RequestPriority']
                except Exception as ex:
                    logging.error(
                        "Couldn't retrieve the priority of request %s",
                        workflow)
                    logging.error("Error: %s", str(ex))
                    continue
            if priority != priorityCache[workflow]:
                workflowsToUpdate[workflow] = priorityCache[workflow]
        logging.info("Found %d workflows to update in workqueue",
                     len(workflowsToUpdate))
        for workflow in workflowsToUpdate:
            self.workqueue.updatePriority(workflow,
                                          workflowsToUpdate[workflow])

        # Check the workflows in WMBS
        priorityCache = {}
        workflowsToUpdateWMBS = {}
        workflowsToCheck = self.listWorkflowsDAO.execute()
        for workflowEntry in workflowsToCheck:
            workflow = workflowEntry['name']
            if workflow not in priorityCache:
                try:
                    result = self.reqmgr2.getRequestByNames(workflow)[0]
                    priorityCache[workflow] = result[workflow][
                        'RequestPriority']
                except Exception as ex:
                    logging.error(
                        "Couldn't retrieve the priority of request %s",
                        workflow)
                    logging.error("Error: %s", str(ex))
                    continue
            requestPriority = int(priorityCache[workflow])
            if requestPriority != int(workflowEntry['workflow_priority']):
                # Update the workqueue priority for the Available elements
                self.workqueue.updatePriority(workflow, requestPriority)
                # Check if there are executing jobs for this particular task
                if self.executingJobsDAO.execute(workflow,
                                                 workflowEntry['task']) > 0:
                    self.bossAir.updateJobInformation(
                        workflow,
                        workflowEntry['task'],
                        requestPriority=priorityCache[workflow],
                        taskPriority=workflowEntry['task_priority'])
                workflowsToUpdateWMBS[workflow] = priorityCache[workflow]
        if workflowsToUpdateWMBS:
            logging.info("Updating %d workflows in WMBS.",
                         len(workflowsToUpdateWMBS))
            self.updateWorkflowPrioDAO.execute(workflowsToUpdateWMBS)
예제 #9
0
class Request(RESTEntity):
    def __init__(self, app, api, config, mount):
        # main CouchDB database where requests/workloads are stored
        RESTEntity.__init__(self, app, api, config, mount)
        self.reqmgr_db = api.db_handler.get_db(config.couch_reqmgr_db)
        self.reqmgr_db_service = RequestDBWriter(self.reqmgr_db, couchapp="ReqMgr")
        # this need for the post validtiaon
        self.reqmgr_aux_db = api.db_handler.get_db(config.couch_reqmgr_aux_db)
        self.gq_service = WorkQueue(config.couch_host, config.couch_workqueue_db)

    def _requestArgMapFromBrowser(self, request_args):
        """
        This is specific mapping function data from browser

        TODO: give a key word so it doesn't have to loop though in general
        """
        docs = []
        for doc in request_args:
            for key in doc.keys():
                if key.startswith('request'):
                    rid = key.split('request-')[-1]
                    if rid != 'all':
                        docs.append(rid)
                    del doc[key]
        return docs

    def _validateGET(self, param, safe):
        # TODO: need proper validation but for now pass everything
        args_length = len(param.args)
        if args_length == 1:
            safe.kwargs["name"] = param.args[0]
            param.args.pop()
            return

        if "status" in param.kwargs and isinstance(param.kwargs["status"], basestring):
            param.kwargs["status"] = [param.kwargs["status"]]
        if "status" in param.kwargs:
            for status in param.kwargs["status"]:
                if status.endswith("-archived"):
                    raise InvalidSpecParameterValue(
                        "Can't retrieve bulk archived status requests, use other search arguments")

        for prop in param.kwargs:
            safe.kwargs[prop] = param.kwargs[prop]

        for prop in safe.kwargs:
            del param.kwargs[prop]

        return

    def _validateRequestBase(self, param, safe, valFunc, requestName=None):

        data = cherrypy.request.body.read()
        if data:
            request_args = JsonWrapper.loads(data)
            if requestName:
                request_args["RequestName"] = requestName
            if isinstance(request_args, dict):
                request_args = [request_args]

        else:
            # actually this is error case
            # cherrypy.log(str(param.kwargs))
            request_args = {}
            for prop in param.kwargs:
                request_args[prop] = param.kwargs[prop]

            for prop in request_args:
                del param.kwargs[prop]
            if requestName:
                request_args["RequestName"] = requestName
            request_args = [request_args]

        safe.kwargs['workload_pair_list'] = []
        if isinstance(request_args, dict):
            request_args = [request_args]
        for args in request_args:
            workload, r_args = valFunc(args, self.config, self.reqmgr_db_service, param)
            safe.kwargs['workload_pair_list'].append((workload, r_args))

    def _get_request_names(self, ids):
        "Extract request names from given documents"
        # cherrypy.log("request names %s" % ids)
        doc = {}
        if isinstance(ids, list):
            for rid in ids:
                doc[rid] = 'on'
        elif isinstance(ids, basestring):
            doc[ids] = 'on'

        docs = []
        for key in doc.keys():
            if key.startswith('request'):
                rid = key.split('request-')[-1]
                if rid != 'all':
                    docs.append(rid)
                del doc[key]
        return docs

    def _getMultiRequestArgs(self, multiRequestForm):
        request_args = {}
        for prop in multiRequestForm:
            if prop == "ids":
                request_names = self._get_request_names(multiRequestForm["ids"])
            elif prop == "new_status":
                request_args["RequestStatus"] = multiRequestForm[prop]
            # remove this
            # elif prop in ["CustodialSites", "AutoApproveSubscriptionSites"]:
            #    request_args[prop] = [multiRequestForm[prop]]
            else:
                request_args[prop] = multiRequestForm[prop]
        return request_names, request_args

    def _validateMultiRequests(self, param, safe, valFunc):

        data = cherrypy.request.body.read()
        if data:
            request_names, request_args = self._getMultiRequestArgs(JsonWrapper.loads(data))
        else:
            # actually this is error case
            # cherrypy.log(str(param.kwargs))
            request_names, request_args = self._getMultiRequestArgs(param.kwargs)

            for prop in request_args:
                if prop == "RequestStatus":
                    del param.kwargs["new_status"]
                else:
                    del param.kwargs[prop]

            del param.kwargs["ids"]

            # remove this
            # tmp = []
            # for prop in param.kwargs:
            #    tmp.append(prop)
            # for prop in tmp:
            #    del param.kwargs[prop]

        safe.kwargs['workload_pair_list'] = []

        for request_name in request_names:
            request_args["RequestName"] = request_name
            workload, r_args = valFunc(request_args, self.config, self.reqmgr_db_service, param)
            safe.kwargs['workload_pair_list'].append((workload, r_args))

        safe.kwargs["multi_update_flag"] = True

    def _getRequestNamesFromBody(self, param, safe, valFunc):

        request_names = JsonWrapper.loads(cherrypy.request.body.read())
        safe.kwargs['workload_pair_list'] = request_names
        safe.kwargs["multi_names_flag"] = True

    def validate(self, apiobj, method, api, param, safe):
        # to make validate successful
        # move the validated argument to safe
        # make param empty
        # other wise raise the error
        try:
            if method in ['GET']:
                self._validateGET(param, safe)

            if method == 'PUT':
                args_length = len(param.args)
                if args_length == 1:
                    requestName = param.args[0]
                    param.args.pop()
                else:
                    requestName = None
                self._validateRequestBase(param, safe, validate_request_update_args, requestName)
                # TO: handle multiple clone
            #                 if len(param.args) == 2:
            #                     #validate clone case
            #                     if param.args[0] == "clone":
            #                         param.args.pop()
            #                         return None, request_args

            if method == 'POST':
                args_length = len(param.args)
                if args_length == 1 and param.args[0] == "multi_update":
                    # special case for multi update from browser.
                    param.args.pop()
                    self._validateMultiRequests(param, safe, validate_request_update_args)
                elif args_length == 1 and param.args[0] == "bynames":
                    # special case for multi update from browser.
                    param.args.pop()
                    self._getRequestNamesFromBody(param, safe, validate_request_update_args)
                else:
                    self._validateRequestBase(param, safe, validate_request_create_args)
        except InvalidSpecParameterValue as ex:
            raise ex
        except Exception as ex:
            # TODO add proper error message instead of trace back
            msg = traceback.format_exc()
            cherrypy.log("Error: %s" % msg)
            if hasattr(ex, "message"):
                if hasattr(ex.message, '__call__'):
                    msg = ex.message()
                else:
                    msg = str(ex)
            else:
                msg = str(ex)
            raise InvalidSpecParameterValue(msg)

    def initialize_clone(self, request_name):
        requests = self.reqmgr_db_service.getRequestByNames(request_name)
        clone_args = requests.values()[0]
        # overwrite the name and time stamp.
        initialize_request_args(clone_args, self.config, clone=True)
        # timestamp status update

        spec = loadSpecByType(clone_args["RequestType"])
        workload = spec.factoryWorkloadConstruction(clone_args["RequestName"],
                                                    clone_args)
        return (workload, clone_args)

    @restcall(formats=[('application/json', JSONFormat())])
    def get(self, **kwargs):
        """
        Returns request info depending on the conditions set by kwargs
        Currently defined kwargs are following.
        statusList, requestNames, requestType, prepID, inputDataset, outputDataset, dateRange
        If jobInfo is True, returns jobInfomation about the request as well.

        TODO:
        stuff like this has to filtered out from result of this call:
            _attachments: {u'spec': {u'stub': True, u'length': 51712, u'revpos': 2, u'content_type': u'application/json'}}
            _id: maxa_RequestString-OVERRIDE-ME_130621_174227_9225
            _rev: 4-c6ceb2737793aaeac3f1cdf591593da4

        """
        if len(kwargs) == 0:
            kwargs['status'] = "running"
            options = {"descending": True, 'include_docs': True, 'limit': 200}
            request_docs = self.reqmgr_db.loadView("ReqMgr", "bystatus", options)
            return rows([request_docs])

        # list of status
        status = kwargs.get("status", False)
        # list of request names
        name = kwargs.get("name", False)
        request_type = kwargs.get("request_type", False)
        prep_id = kwargs.get("prep_id", False)
        inputdataset = kwargs.get("inputdataset", False)
        outputdataset = kwargs.get("outputdataset", False)
        date_range = kwargs.get("date_range", False)
        campaign = kwargs.get("campaign", False)
        workqueue = kwargs.get("workqueue", False)
        team = kwargs.get("team", False)
        mc_pileup = kwargs.get("mc_pileup", False)
        data_pileup = kwargs.get("data_pileup", False)
        detail = kwargs.get("detail", True)
        if detail in (False, "false", "False"):
            option = {"include_docs": False}
        else:
            option = {"include_docs": True}
        # eventhing should be stale view. this only needs for test
        _nostale = kwargs.get("_nostale", False)
        if _nostale:
            self.reqmgr_db_service._setNoStale()

        request_info = []

        if status and not team and not request_type:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("bystatus", option, status))
        if status and team:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView("byteamandstatus", option, [[team, status]]))
        if status and request_type:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("requestsbystatusandtype", option,
                                                                             [[status, request_type]]))
        if name:
            request_info.append(self.reqmgr_db_service.getRequestByNames(name))
        if prep_id:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("byprepid", option, prep_id))
        if inputdataset:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("byinputdataset", option, inputdataset))
        if outputdataset:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("byoutputdataset", option, outputdataset))
        if date_range:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("bydate", option, date_range))
        if campaign:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("bycampaign", option, campaign))
        if workqueue:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("byworkqueue", option, workqueue))
        if mc_pileup:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("bymcpileup", option, mc_pileup))
        if data_pileup:
            request_info.append(self.reqmgr_db_service.getRequestByCouchView("bydatapileup", option, data_pileup))
        # get interaction of the request
        result = self._intersection_of_request_info(request_info)
        if len(result) == 0:
            return []
        return rows([result])

    def _intersection_of_request_info(self, request_info):
        requests = {}
        if len(request_info) < 1:
            return requests

        request_key_set = set(request_info[0].keys())
        for info in request_info:
            request_key_set = set(request_key_set) & set(info.keys())
        # TODO: need to assume some data maight not contains include docs
        for request_name in request_key_set:
            requests[request_name] = request_info[0][request_name]
        return requests

        # TODO move this out of this class

    def filterCouchInfo(self, couchInfo):
        for key in ['_rev', '_attachments']:
            if key in couchInfo:
                del couchInfo[key]

    def _combine_request(self, request_info, requestAgentUrl, cache):
        keys = {}
        requestAgentUrlList = []
        for row in requestAgentUrl["rows"]:
            request = row["key"][0]
            if not keys[request]:
                keys[request] = []
            keys[request].append(row["key"][1])

        for request in request_info:
            for agentUrl in keys[request]:
                requestAgentUrlList.append([request, agentUrl])

        return requestAgentUrlList

    def _retrieveResubmissionChildren(self, request_name):

        result = self.reqmgr_db.loadView('ReqMgr', 'childresubmissionrequests', keys=[request_name])['rows']
        childrenRequestNames = []
        for child in result:
            childrenRequestNames.append(child['id'])
            childrenRequestNames.extend(self._retrieveResubmissionChildren(child['id']))
        return childrenRequestNames

    def _updateRequest(self, workload, request_args):

        if workload == None:
            (workload, request_args) = self.initialize_clone(request_args["OriginalRequestName"])
            return self.post(workload, request_args)

        dn = cherrypy.request.user.get("dn", "unknown")

        if ('SoftTimeout' in request_args) and ('GracePeriod' in request_args):
            request_args['HardTimeout'] = request_args['SoftTimeout'] + request_args['GracePeriod']

        if 'RequestPriority' in request_args:
            self.gq_service.updatePriority(workload.name(), request_args['RequestPriority'])

        if "total_jobs" in request_args:
            # only GQ update this stats
            # request_args should contain only 4 keys 'total_jobs', 'input_lumis', 'input_events', 'input_num_files'}
            report = self.reqmgr_db_service.updateRequestStats(workload.name(), request_args)
        # if is not just updating status
        else:
            req_status = request_args.get("RequestStatus", None)

            if len(request_args) >= 1 and req_status == None:
                try:
                    workload.updateArguments(request_args)
                except Exception as ex:
                    msg = traceback.format_exc()
                    cherrypy.log("Error for request args %s: %s" % (request_args, msg))
                    raise InvalidSpecParameterValue(str(ex))

                # trailing / is needed for the savecouchUrl function
                workload.saveCouch(self.config.couch_host, self.config.couch_reqmgr_db)

            elif (req_status in ["closed-out"  "announced"]) and request_args.get("cascade", False):
                cascade_list = self._retrieveResubmissionChildren(workload.name)
                for req_name in cascade_list:
                    report = self.reqmgr_db_service.updateRequestStatus(req_name, req_status)

            # If it is aborted or force-complete transition call workqueue to cancel the request
            else:
                if req_status == "aborted" or req_status == "force-complete":
                    self.gq_service.cancelWorkflow(workload.name())
                report = self.reqmgr_db_service.updateRequestProperty(workload.name(), request_args, dn)

        if report == 'OK':
            return {workload.name(): "OK"}
        else:
            return {workload.name(): "ERROR"}

    @restcall(formats=[('application/json', JSONFormat())])
    def put(self, workload_pair_list):
        "workloadPairList is a list of tuple containing (workload, requeat_args)"
        report = []
        for workload, request_args in workload_pair_list:
            result = self._updateRequest(workload, request_args)
            report.append(result)
        return report

    @restcall(formats=[('application/json', JSONFormat())])
    def delete(self, request_name):
        cherrypy.log("INFO: Deleting request document '%s' ..." % request_name)
        try:
            self.reqmgr_db.delete_doc(request_name)
        except CouchError as ex:
            msg = "ERROR: Delete failed."
            cherrypy.log(msg + " Reason: %s" % ex)
            raise cherrypy.HTTPError(404, msg)
            # TODO
        # delete should also happen on WMStats
        cherrypy.log("INFO: Delete '%s' done." % request_name)

    def _update_additional_request_args(self, workload, request_args):
        """
        add to request_args properties which is not initially set from user.
        This data will put in to couchdb.
        Update request_args here if additional information need to be put in couchdb
        """
        request_args['RequestWorkflow'] = sanitizeURL("%s/%s/%s/spec" % (request_args["CouchURL"],
                                                                         request_args["CouchWorkloadDBName"],
                                                                         workload.name()))['url']

        # Add the output datasets if necessary
        # for some bizarre reason OutpuDatasets is list of lists
        request_args['OutputDatasets'] = workload.listOutputDatasets()

        # TODO: remove this after reqmgr2 replice reqmgr (reqmgr2Only)
        request_args['ReqMgr2Only'] = True
        return

    @restcall(formats=[('application/json', JSONFormat())])
    def post(self, workload_pair_list, multi_update_flag=False, multi_names_flag=False):
        """
        Create and update couchDB with  a new request.
        request argument is passed from validation
        (validation convert cherrypy.request.body data to argument)

        TODO:
        this method will have some parts factored out so that e.g. clone call
        can share functionality.

        NOTES:
        1) do not strip spaces, #4705 will fails upon injection with spaces;
            currently the chain relies on a number of things coming in #4705
        2) reqInputArgs = Utilities.unidecode(JsonWrapper.loads(body))
            (from ReqMgrRESTModel.putRequest)
        """

        # storing the request document into Couch

        if multi_update_flag:
            return self.put(workload_pair_list)
        if multi_names_flag:
            return self.get(name=workload_pair_list)

        out = []
        for workload, request_args in workload_pair_list:
            self._update_additional_request_args(workload, request_args)

            cherrypy.log("INFO: Create request, input args: %s ..." % request_args)
            workload.saveCouch(request_args["CouchURL"], request_args["CouchWorkloadDBName"],
                               metadata=request_args)
            out.append({'request': workload.name()})
        return out
예제 #10
0
파일: Request.py 프로젝트: menglu21/WMCore
class Request(RESTEntity):
    def __init__(self, app, api, config, mount):
        # main CouchDB database where requests/workloads are stored
        RESTEntity.__init__(self, app, api, config, mount)
        self.reqmgr_db = api.db_handler.get_db(config.couch_reqmgr_db)
        self.reqmgr_db_service = RequestDBWriter(self.reqmgr_db,
                                                 couchapp="ReqMgr")
        # this need for the post validtiaon
        self.reqmgr_aux_db = api.db_handler.get_db(config.couch_reqmgr_aux_db)
        self.gq_service = WorkQueue(config.couch_host,
                                    config.couch_workqueue_db)

    def _requestArgMapFromBrowser(self, request_args):
        """
        This is specific mapping function data from browser

        TODO: give a key word so it doesn't have to loop though in general
        """
        docs = []
        for doc in request_args:
            for key in doc.keys():
                if key.startswith('request'):
                    rid = key.split('request-')[-1]
                    if rid != 'all':
                        docs.append(rid)
                    del doc[key]
        return docs

    def _validateGET(self, param, safe):
        # TODO: need proper validation but for now pass everything
        args_length = len(param.args)
        if args_length == 1:
            safe.kwargs["name"] = param.args[0]
            param.args.pop()
            return

        no_multi_key = ["detail", "_nostale", "date_range", "common_dict"]
        for key, value in param.kwargs.items():
            # convert string to list
            if key not in no_multi_key and isinstance(value, basestring):
                param.kwargs[key] = [value]

        detail = param.kwargs.get('detail', True)
        if detail in (False, "false", "False", "FALSE"):
            detail = False

        if "status" in param.kwargs and detail:
            for status in param.kwargs["status"]:
                if status.endswith("-archived"):
                    raise InvalidSpecParameterValue(
                        """Can't retrieve bulk archived status requests with detail option True, 
                           set detail=false or use other search arguments""")

        for prop in param.kwargs:
            safe.kwargs[prop] = param.kwargs[prop]

        for prop in safe.kwargs:
            del param.kwargs[prop]

        return

    def _validateRequestBase(self, param, safe, valFunc, requestName=None):
        data = cherrypy.request.body.read()
        if data:
            request_args = json.loads(data)
            if requestName:
                request_args["RequestName"] = requestName

        else:
            # actually this is error case
            # cherrypy.log(str(param.kwargs))
            request_args = {}
            for prop in param.kwargs:
                request_args[prop] = param.kwargs[prop]

            for prop in request_args:
                del param.kwargs[prop]
            if requestName:
                request_args["RequestName"] = requestName
            request_args = [request_args]

        safe.kwargs['workload_pair_list'] = []
        if isinstance(request_args, dict):
            request_args = [request_args]
        for args in request_args:
            workload, r_args = valFunc(args, self.config,
                                       self.reqmgr_db_service, param)
            safe.kwargs['workload_pair_list'].append((workload, r_args))

    def _get_request_names(self, ids):
        "Extract request names from given documents"
        # cherrypy.log("request names %s" % ids)
        doc = {}
        if isinstance(ids, list):
            for rid in ids:
                doc[rid] = 'on'
        elif isinstance(ids, basestring):
            doc[ids] = 'on'

        docs = []
        for key in doc.keys():
            if key.startswith('request'):
                rid = key.split('request-')[-1]
                if rid != 'all':
                    docs.append(rid)
                del doc[key]
        return docs

    def _getMultiRequestArgs(self, multiRequestForm):
        request_args = {}
        for prop in multiRequestForm:
            if prop == "ids":
                request_names = self._get_request_names(
                    multiRequestForm["ids"])
            elif prop == "new_status":
                request_args["RequestStatus"] = multiRequestForm[prop]
            # remove this
            # elif prop in ["CustodialSites", "AutoApproveSubscriptionSites"]:
            #    request_args[prop] = [multiRequestForm[prop]]
            else:
                request_args[prop] = multiRequestForm[prop]
        return request_names, request_args

    def _validateMultiRequests(self, param, safe, valFunc):

        data = cherrypy.request.body.read()
        if data:
            request_names, request_args = self._getMultiRequestArgs(
                json.loads(data))
        else:
            # actually this is error case
            # cherrypy.log(str(param.kwargs))
            request_names, request_args = self._getMultiRequestArgs(
                param.kwargs)

            for prop in request_args:
                if prop == "RequestStatus":
                    del param.kwargs["new_status"]
                else:
                    del param.kwargs[prop]

            del param.kwargs["ids"]

            # remove this
            # tmp = []
            # for prop in param.kwargs:
            #    tmp.append(prop)
            # for prop in tmp:
            #    del param.kwargs[prop]

        safe.kwargs['workload_pair_list'] = []

        for request_name in request_names:
            request_args["RequestName"] = request_name
            workload, r_args = valFunc(request_args, self.config,
                                       self.reqmgr_db_service, param)
            safe.kwargs['workload_pair_list'].append((workload, r_args))

        safe.kwargs["multi_update_flag"] = True

    def _getRequestNamesFromBody(self, param, safe, valFunc):

        request_names = json.loads(cherrypy.request.body.read())
        safe.kwargs['workload_pair_list'] = request_names
        safe.kwargs["multi_names_flag"] = True

    def validate(self, apiobj, method, api, param, safe):
        # to make validate successful
        # move the validated argument to safe
        # make param empty
        # other wise raise the error
        try:
            if method == 'GET':
                self._validateGET(param, safe)

            if method == 'PUT':
                args_length = len(param.args)
                if args_length == 1:
                    requestName = param.args[0]
                    param.args.pop()
                else:
                    requestName = None
                self._validateRequestBase(param, safe,
                                          validate_request_update_args,
                                          requestName)
                # TO: handle multiple clone
            #                 if len(param.args) == 2:
            #                     #validate clone case
            #                     if param.args[0] == "clone":
            #                         param.args.pop()
            #                         return None, request_args

            if method == 'POST':
                args_length = len(param.args)
                if args_length == 1 and param.args[0] == "multi_update":
                    # special case for multi update from browser.
                    param.args.pop()
                    self._validateMultiRequests(param, safe,
                                                validate_request_update_args)
                elif args_length == 1 and param.args[0] == "bynames":
                    # special case for multi update from browser.
                    param.args.pop()
                    self._getRequestNamesFromBody(
                        param, safe, validate_request_update_args)
                else:
                    self._validateRequestBase(param, safe,
                                              validate_request_create_args)
        except InvalidSpecParameterValue as ex:
            raise ex
        except Exception as ex:
            # TODO add proper error message instead of trace back
            msg = traceback.format_exc()
            cherrypy.log("Error: %s" % msg)
            if hasattr(ex, "message"):
                if hasattr(ex.message, '__call__'):
                    msg = ex.message()
                else:
                    msg = str(ex)
            else:
                msg = str(ex)
            raise InvalidSpecParameterValue(msg)

    def initialize_clone(self, request_name):
        requests = self.reqmgr_db_service.getRequestByNames(request_name)
        clone_args = requests.values()[0]
        # overwrite the name and time stamp.
        initialize_request_args(clone_args, self.config, clone=True)
        # timestamp status update

        spec = loadSpecByType(clone_args["RequestType"])
        workload = spec.factoryWorkloadConstruction(clone_args["RequestName"],
                                                    clone_args)
        return (workload, clone_args)

    def _maskTaskStepChain(self, masked_dict, req_dict, chain_name, mask_key):

        mask_exist = False
        num_loop = req_dict["%sChain" % chain_name]
        for i in range(num_loop):
            if mask_key in req_dict["%s%s" % (chain_name, i + 1)]:
                mask_exist = True
                break
        if mask_exist:
            defaultValue = masked_dict[mask_key]
            masked_dict[mask_key] = []
            # assume mask_key is list if the condition doesn't meet.

            for i in range(num_loop):
                chain = req_dict["%s%s" % (chain_name, i + 1)]
                if mask_key in chain:
                    chain_key = "%sName" % chain_name
                    masked_dict[mask_key].append({
                        chain_key: chain[chain_key],
                        mask_key: chain[mask_key]
                    })
                else:
                    if isinstance(defaultValue, dict):
                        value = defaultValue.get(chain_key, None)
                    else:
                        value = defaultValue
                    masked_dict[mask_key].append({
                        chain_key: chain[chain_key],
                        mask_key: chain[mask_key]
                    })
        return

    def _mask_result(self, mask, result):

        if len(mask) == 1 and mask[0] == "DAS":
            mask = ReqMgrConfigDataCache.getConfig(
                "DAS_RESULT_FILTER")["filter_list"]

        if len(mask) > 0:
            masked_result = {}
            for req_name, req_info in result.items():
                masked_result.setdefault(req_name, {})
                for mask_key in mask:
                    masked_result[req_name].update(
                        {mask_key: req_info.get(mask_key, None)})
                    if "TaskChain" in req_info:
                        self._maskTaskStepChain(masked_result[req_name],
                                                req_info, "Task", mask_key)
                    elif "StepChain" in req_info:
                        self._maskTaskStepChain(masked_result[req_name],
                                                req_info, "Step", mask_key)

            return masked_result
        else:
            return result

    @restcall(formats=[('text/plain', PrettyJSONFormat()),
                       ('application/json', JSONFormat())])
    def get(self, **kwargs):
        """
        Returns request info depending on the conditions set by kwargs
        Currently defined kwargs are following.
        statusList, requestNames, requestType, prepID, inputDataset, outputDataset, dateRange
        If jobInfo is True, returns jobInfomation about the request as well.

        TODO:
        stuff like this has to masked out from result of this call:
            _attachments: {u'spec': {u'stub': True, u'length': 51712, u'revpos': 2, u'content_type': u'application/json'}}
            _id: maxa_RequestString-OVERRIDE-ME_130621_174227_9225
            _rev: 4-c6ceb2737793aaeac3f1cdf591593da4

        """
        # list of status
        status = kwargs.get("status", [])
        # list of request names
        name = kwargs.get("name", [])
        request_type = kwargs.get("request_type", [])
        prep_id = kwargs.get("prep_id", [])
        inputdataset = kwargs.get("inputdataset", [])
        outputdataset = kwargs.get("outputdataset", [])
        date_range = kwargs.get("date_range", False)
        campaign = kwargs.get("campaign", [])
        workqueue = kwargs.get("workqueue", [])
        team = kwargs.get("team", [])
        mc_pileup = kwargs.get("mc_pileup", [])
        data_pileup = kwargs.get("data_pileup", [])
        requestor = kwargs.get("requestor", [])
        mask = kwargs.get("mask", [])
        detail = kwargs.get("detail", True)
        # set the return format. default format has requset name as a key
        # if is set to one it returns list of dictionary with RequestName field.
        common_dict = int(kwargs.get("common_dict", 0))
        if detail in (False, "false", "False", "FALSE"):
            option = {"include_docs": False}
        else:
            option = {"include_docs": True}
        # eventhing should be stale view. this only needs for test
        _nostale = kwargs.get("_nostale", False)
        if _nostale:
            self.reqmgr_db_service._setNoStale()

        request_info = []

        if len(status) == 1 and status[0] == "ACTIVE":
            status = ACTIVE_STATUS
        if status and not team and not request_type and not requestor:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "bystatus", option, status))
        if status and team:
            query_keys = [[t, s] for t in team for s in status]
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "byteamandstatus", option, query_keys))
        if status and request_type:
            query_keys = [[s, rt] for rt in request_type for s in status]
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "requestsbystatusandtype", option, query_keys))
        if status and requestor:
            query_keys = [[s, r] for r in requestor for s in status]
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "bystatusandrequestor", option, query_keys))

        if name:
            request_info.append(self.reqmgr_db_service.getRequestByNames(name))
        if prep_id:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "byprepid", option, prep_id))
        if inputdataset:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "byinputdataset", option, inputdataset))
        if outputdataset:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "byoutputdataset", option, outputdataset))
        if date_range:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "bydate", option, date_range))
        if campaign:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "bycampaign", option, campaign))
        if workqueue:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "byworkqueue", option, workqueue))
        if mc_pileup:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "bymcpileup", option, mc_pileup))
        if data_pileup:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "bydatapileup", option, data_pileup))
        # get interaction of the request
        result = self._intersection_of_request_info(request_info)

        if len(result) == 0:
            return []

        result = self._mask_result(mask, result)
        # If detail is set to False return just list of request name
        if not option["include_docs"]:
            return result.keys()

        if common_dict == 1:
            response_list = result.values()
        else:
            response_list = [result]
        return rows(response_list)

    def _intersection_of_request_info(self, request_info):
        requests = {}
        if len(request_info) < 1:
            return requests

        request_key_set = set(request_info[0].keys())
        for info in request_info:
            request_key_set = set(request_key_set) & set(info.keys())
        # TODO: need to assume some data maight not contains include docs
        for request_name in request_key_set:
            requests[request_name] = request_info[0][request_name]
        return requests

        # TODO move this out of this class

    def filterCouchInfo(self, couchInfo):
        for key in ['_rev', '_attachments']:
            if key in couchInfo:
                del couchInfo[key]

    def _combine_request(self, request_info, requestAgentUrl, cache):
        keys = {}
        requestAgentUrlList = []
        for row in requestAgentUrl["rows"]:
            request = row["key"][0]
            if not keys[request]:
                keys[request] = []
            keys[request].append(row["key"][1])

        for request in request_info:
            for agentUrl in keys[request]:
                requestAgentUrlList.append([request, agentUrl])

        return requestAgentUrlList

    def _retrieveResubmissionChildren(self, request_name):

        result = self.reqmgr_db.loadView('ReqMgr',
                                         'childresubmissionrequests',
                                         keys=[request_name])['rows']
        childrenRequestNames = []
        for child in result:
            childrenRequestNames.append(child['id'])
            childrenRequestNames.extend(
                self._retrieveResubmissionChildren(child['id']))
        return childrenRequestNames

    def _handleNoStatusUpdate(self, workload, request_args):
        """
        only few values can be updated without state transition involved
        currently 'RequestPriority' and 'total_jobs', 'input_lumis', 'input_events', 'input_num_files'
        """
        if 'RequestPriority' in request_args:
            # must update three places: GQ elements, workload_cache and workload spec
            self.gq_service.updatePriority(workload.name(),
                                           request_args['RequestPriority'])
            report = self.reqmgr_db_service.updateRequestProperty(
                workload.name(), request_args)
            workload.setPriority(request_args['RequestPriority'])
            workload.saveCouchUrl(workload.specUrl())
        elif "total_jobs" in request_args:
            # only GQ update this stats
            # request_args should contain only 4 keys 'total_jobs', 'input_lumis', 'input_events', 'input_num_files'}
            report = self.reqmgr_db_service.updateRequestStats(
                workload.name(), request_args)
        else:
            raise InvalidSpecParameterValue(
                "can't update value without state transition: %s" %
                request_args)

        return report

    def _handleAssignmentStateTransition(self, workload, request_args, dn):

        req_status = request_args["RequestStatus"]
        if req_status == "assigned" and not request_args.get('Team',
                                                             '').strip():
            raise InvalidSpecParameterValue(
                "Team must be set during workflow assignment: %s" %
                request_args)

        if ('SoftTimeout' in request_args) and ('GracePeriod' in request_args):
            request_args['SoftTimeout'] = int(request_args['SoftTimeout'])
            #TODO: not sure why GracePeriod when passed from web ingerface but convert here
            request_args['GracePeriod'] = int(request_args['GracePeriod'])
            request_args['HardTimeout'] = request_args[
                'SoftTimeout'] + request_args['GracePeriod']

        #Only allow extra value update for assigned status
        try:
            workload.updateArguments(request_args)
        except Exception as ex:
            msg = traceback.format_exc()
            cherrypy.log("Error for request args %s: %s" % (request_args, msg))
            raise InvalidSpecParameterValue(str(ex))

        # legacy update schema to support ops script
        loadRequestSchema(workload, request_args)
        #update OutputDatasets after ProcessingString and AcquisionEra is updated
        request_args['OutputDatasets'] = workload.listOutputDatasets()
        report = self.reqmgr_db_service.updateRequestProperty(
            workload.name(), request_args, dn)
        workload.saveCouch(self.config.couch_host, self.config.couch_reqmgr_db)
        return report

    def _handleCascadeUpdate(self, workload, request_args, dn):
        """
        only closed-out and announced has this option
        """
        req_status = request_args["RequestStatus"]
        # check whehter it is casecade option
        if request_args["cascade"]:
            cascade_list = self._retrieveResubmissionChildren(workload.name())
            for req_name in cascade_list:
                self.reqmgr_db_service.updateRequestStatus(
                    req_name, req_status, dn)
        # update original workflow status
        report = self.reqmgr_db_service.updateRequestStatus(
            workload.name(), req_status, dn)
        return report

    def _handleOnlyStateTransition(self, workload, req_status, dn):
        """
        It handles only the state transition. Special handling needed if a
        request is aborted or force completed.
        """
        if req_status in ["aborted", "force-complete"]:
            # cancel the workflow first
            self.gq_service.cancelWorkflow(workload.name())
        #update the request status in couchdb
        report = self.reqmgr_db_service.updateRequestStatus(
            workload.name(), req_status, dn)
        return report

    def _updateRequest(self, workload, request_args):
        dn = cherrypy.request.user.get("dn", "unknown")

        if workload is None:
            (workload, request_args) = self.initialize_clone(
                request_args["OriginalRequestName"])
            return self.post([workload, request_args])

        if "RequestStatus" not in request_args:
            report = self._handleNoStatusUpdate(workload, request_args)

        else:
            req_status = request_args["RequestStatus"]
            if len(request_args) > 1 and req_status == "assigned":
                report = self._handleAssignmentStateTransition(
                    workload, request_args, dn)
            elif len(request_args) == 2 and req_status in ["closed-out", "announced"] and \
                "cascade" in request_args:
                report = self._handleCascadeUpdate(workload, request_args, dn)

            elif len(request_args) == 1:
                report = self._handleOnlyStateTransition(
                    workload, req_status, dn)
            else:
                raise InvalidSpecParameterValue(
                    "can't update value except transition to assigned status: %s"
                    % request_args)

        if report == 'OK':
            return {workload.name(): "OK"}
        else:
            return {workload.name(): "ERROR"}

    @restcall(formats=[('application/json', JSONFormat())])
    def put(self, workload_pair_list):
        """workloadPairList is a list of tuple containing (workload, requeat_args)"""
        report = []
        for workload, request_args in workload_pair_list:
            result = self._updateRequest(workload, request_args)
            report.append(result)
        return report

    @restcall(formats=[('application/json', JSONFormat())])
    def delete(self, request_name):
        cherrypy.log("INFO: Deleting request document '%s' ..." % request_name)
        try:
            self.reqmgr_db.delete_doc(request_name)
        except CouchError as ex:
            msg = "ERROR: Delete failed."
            cherrypy.log(msg + " Reason: %s" % ex)
            raise cherrypy.HTTPError(404, msg)
            # TODO
        # delete should also happen on WMStats
        cherrypy.log("INFO: Delete '%s' done." % request_name)

    def _update_additional_request_args(self, workload, request_args):
        """
        add to request_args properties which is not initially set from user.
        This data will put in to couchdb.
        Update request_args here if additional information need to be put in couchdb
        """
        request_args['RequestWorkflow'] = sanitizeURL(
            "%s/%s/%s/spec" %
            (request_args["CouchURL"], request_args["CouchWorkloadDBName"],
             workload.name()))['url']

        # Add the output datasets if necessary
        # for some bizarre reason OutpuDatasets is list of lists
        request_args['OutputDatasets'] = workload.listOutputDatasets()

        #Add initial priority only for the creation of the request
        request_args['InitialPriority'] = request_args["RequestPriority"]

        # TODO: remove this after reqmgr2 replice reqmgr (reqmgr2Only)
        request_args['ReqMgr2Only'] = True
        return

    @restcall(formats=[('application/json', JSONFormat())])
    def post(self,
             workload_pair_list,
             multi_update_flag=False,
             multi_names_flag=False):
        """
        Create and update couchDB with  a new request.
        request argument is passed from validation
        (validation convert cherrypy.request.body data to argument)

        TODO:
        this method will have some parts factored out so that e.g. clone call
        can share functionality.

        NOTES:
        1) do not strip spaces, #4705 will fails upon injection with spaces;
            currently the chain relies on a number of things coming in #4705
        2) reqInputArgs = Utilities.unidecode(json.loads(body))
            (from ReqMgrRESTModel.putRequest)
        """

        # storing the request document into Couch

        if multi_update_flag:
            return self.put(workload_pair_list)
        if multi_names_flag:
            return self.get(name=workload_pair_list)

        out = []
        for workload, request_args in workload_pair_list:
            self._update_additional_request_args(workload, request_args)

            # legacy update schema to support ops script
            loadRequestSchema(workload, request_args)

            cherrypy.log("INFO: Create request, input args: %s ..." %
                         request_args)
            workload.saveCouch(request_args["CouchURL"],
                               request_args["CouchWorkloadDBName"],
                               metadata=request_args)
            out.append({'request': workload.name()})
        return out
예제 #11
0
파일: Request.py 프로젝트: prozober/WMCore
class Request(RESTEntity):
    def __init__(self, app, api, config, mount):
        # main CouchDB database where requests/workloads are stored
        RESTEntity.__init__(self, app, api, config, mount)
        self.reqmgr_db = api.db_handler.get_db(config.couch_reqmgr_db)
        self.reqmgr_db_service = RequestDBWriter(self.reqmgr_db,
                                                 couchapp="ReqMgr")
        # this need for the post validtiaon
        self.reqmgr_aux_db = api.db_handler.get_db(config.couch_reqmgr_aux_db)
        self.gq_service = WorkQueue(config.couch_host,
                                    config.couch_workqueue_db)

    def _requestArgMapFromBrowser(self, request_args):
        """
        This is specific mapping function data from browser

        TODO: give a key word so it doesn't have to loop though in general
        """
        docs = []
        for doc in request_args:
            for key in doc.keys():
                if key.startswith('request'):
                    rid = key.split('request-')[-1]
                    if rid != 'all':
                        docs.append(rid)
                    del doc[key]
        return docs

    def _validateGET(self, param, safe):
        # TODO: need proper validation but for now pass everything
        args_length = len(param.args)
        if args_length == 1:
            safe.kwargs["name"] = param.args[0]
            param.args.pop()
            return

        if "status" in param.kwargs and isinstance(param.kwargs["status"],
                                                   basestring):
            param.kwargs["status"] = [param.kwargs["status"]]
        if "status" in param.kwargs:
            for status in param.kwargs["status"]:
                if status.endswith("-archived"):
                    raise InvalidSpecParameterValue(
                        "Can't retrieve bulk archived status requests, use other search arguments"
                    )

        for prop in param.kwargs:
            safe.kwargs[prop] = param.kwargs[prop]

        for prop in safe.kwargs:
            del param.kwargs[prop]

        return

    def _validateRequestBase(self, param, safe, valFunc, requestName=None):

        data = cherrypy.request.body.read()
        if data:
            request_args = JsonWrapper.loads(data)
            if requestName:
                request_args["RequestName"] = requestName
            if isinstance(request_args, dict):
                request_args = [request_args]

        else:
            # actually this is error case
            # cherrypy.log(str(param.kwargs))
            request_args = {}
            for prop in param.kwargs:
                request_args[prop] = param.kwargs[prop]

            for prop in request_args:
                del param.kwargs[prop]
            if requestName:
                request_args["RequestName"] = requestName
            request_args = [request_args]

        safe.kwargs['workload_pair_list'] = []
        if isinstance(request_args, dict):
            request_args = [request_args]
        for args in request_args:
            workload, r_args = valFunc(args, self.config,
                                       self.reqmgr_db_service, param)
            safe.kwargs['workload_pair_list'].append((workload, r_args))

    def _get_request_names(self, ids):
        "Extract request names from given documents"
        # cherrypy.log("request names %s" % ids)
        doc = {}
        if isinstance(ids, list):
            for rid in ids:
                doc[rid] = 'on'
        elif isinstance(ids, basestring):
            doc[ids] = 'on'

        docs = []
        for key in doc.keys():
            if key.startswith('request'):
                rid = key.split('request-')[-1]
                if rid != 'all':
                    docs.append(rid)
                del doc[key]
        return docs

    def _getMultiRequestArgs(self, multiRequestForm):
        request_args = {}
        for prop in multiRequestForm:
            if prop == "ids":
                request_names = self._get_request_names(
                    multiRequestForm["ids"])
            elif prop == "new_status":
                request_args["RequestStatus"] = multiRequestForm[prop]
            # remove this
            # elif prop in ["CustodialSites", "AutoApproveSubscriptionSites"]:
            #    request_args[prop] = [multiRequestForm[prop]]
            else:
                request_args[prop] = multiRequestForm[prop]
        return request_names, request_args

    def _validateMultiRequests(self, param, safe, valFunc):

        data = cherrypy.request.body.read()
        if data:
            request_names, request_args = self._getMultiRequestArgs(
                JsonWrapper.loads(data))
        else:
            # actually this is error case
            # cherrypy.log(str(param.kwargs))
            request_names, request_args = self._getMultiRequestArgs(
                param.kwargs)

            for prop in request_args:
                if prop == "RequestStatus":
                    del param.kwargs["new_status"]
                else:
                    del param.kwargs[prop]

            del param.kwargs["ids"]

            # remove this
            # tmp = []
            # for prop in param.kwargs:
            #    tmp.append(prop)
            # for prop in tmp:
            #    del param.kwargs[prop]

        safe.kwargs['workload_pair_list'] = []

        for request_name in request_names:
            request_args["RequestName"] = request_name
            workload, r_args = valFunc(request_args, self.config,
                                       self.reqmgr_db_service, param)
            safe.kwargs['workload_pair_list'].append((workload, r_args))

        safe.kwargs["multi_update_flag"] = True

    def _getRequestNamesFromBody(self, param, safe, valFunc):

        request_names = JsonWrapper.loads(cherrypy.request.body.read())
        safe.kwargs['workload_pair_list'] = request_names
        safe.kwargs["multi_names_flag"] = True

    def validate(self, apiobj, method, api, param, safe):
        # to make validate successful
        # move the validated argument to safe
        # make param empty
        # other wise raise the error
        try:
            if method in ['GET']:
                self._validateGET(param, safe)

            if method == 'PUT':
                args_length = len(param.args)
                if args_length == 1:
                    requestName = param.args[0]
                    param.args.pop()
                else:
                    requestName = None
                self._validateRequestBase(param, safe,
                                          validate_request_update_args,
                                          requestName)
                # TO: handle multiple clone
            #                 if len(param.args) == 2:
            #                     #validate clone case
            #                     if param.args[0] == "clone":
            #                         param.args.pop()
            #                         return None, request_args

            if method == 'POST':
                args_length = len(param.args)
                if args_length == 1 and param.args[0] == "multi_update":
                    # special case for multi update from browser.
                    param.args.pop()
                    self._validateMultiRequests(param, safe,
                                                validate_request_update_args)
                elif args_length == 1 and param.args[0] == "bynames":
                    # special case for multi update from browser.
                    param.args.pop()
                    self._getRequestNamesFromBody(
                        param, safe, validate_request_update_args)
                else:
                    self._validateRequestBase(param, safe,
                                              validate_request_create_args)
        except InvalidSpecParameterValue as ex:
            raise ex
        except Exception as ex:
            # TODO add proper error message instead of trace back
            msg = traceback.format_exc()
            cherrypy.log("Error: %s" % msg)
            if hasattr(ex, "message"):
                if hasattr(ex.message, '__call__'):
                    msg = ex.message()
                else:
                    msg = str(ex)
            else:
                msg = str(ex)
            raise InvalidSpecParameterValue(msg)

    def initialize_clone(self, request_name):
        requests = self.reqmgr_db_service.getRequestByNames(request_name)
        clone_args = requests.values()[0]
        # overwrite the name and time stamp.
        initialize_request_args(clone_args, self.config, clone=True)
        # timestamp status update

        spec = loadSpecByType(clone_args["RequestType"])
        workload = spec.factoryWorkloadConstruction(clone_args["RequestName"],
                                                    clone_args)
        return (workload, clone_args)

    @restcall(formats=[('application/json', JSONFormat())])
    def get(self, **kwargs):
        """
        Returns request info depending on the conditions set by kwargs
        Currently defined kwargs are following.
        statusList, requestNames, requestType, prepID, inputDataset, outputDataset, dateRange
        If jobInfo is True, returns jobInfomation about the request as well.

        TODO:
        stuff like this has to filtered out from result of this call:
            _attachments: {u'spec': {u'stub': True, u'length': 51712, u'revpos': 2, u'content_type': u'application/json'}}
            _id: maxa_RequestString-OVERRIDE-ME_130621_174227_9225
            _rev: 4-c6ceb2737793aaeac3f1cdf591593da4

        """
        if len(kwargs) == 0:
            kwargs['status'] = "running"
            options = {"descending": True, 'include_docs': True, 'limit': 200}
            request_docs = self.reqmgr_db.loadView("ReqMgr", "bystatus",
                                                   options)
            return rows([request_docs])

        # list of status
        status = kwargs.get("status", False)
        # list of request names
        name = kwargs.get("name", False)
        request_type = kwargs.get("request_type", False)
        prep_id = kwargs.get("prep_id", False)
        inputdataset = kwargs.get("inputdataset", False)
        outputdataset = kwargs.get("outputdataset", False)
        date_range = kwargs.get("date_range", False)
        campaign = kwargs.get("campaign", False)
        workqueue = kwargs.get("workqueue", False)
        team = kwargs.get("team", False)
        mc_pileup = kwargs.get("mc_pileup", False)
        data_pileup = kwargs.get("data_pileup", False)
        detail = kwargs.get("detail", True)
        if detail in (False, "false", "False"):
            option = {"include_docs": False}
        else:
            option = {"include_docs": True}
        # eventhing should be stale view. this only needs for test
        _nostale = kwargs.get("_nostale", False)
        if _nostale:
            self.reqmgr_db_service._setNoStale()

        request_info = []

        if status and not team and not request_type:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "bystatus", option, status))
        if status and team:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "byteamandstatus", option, [[team, status]]))
        if status and request_type:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "requestsbystatusandtype", option,
                    [[status, request_type]]))
        if name:
            request_info.append(self.reqmgr_db_service.getRequestByNames(name))
        if prep_id:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "byprepid", option, prep_id))
        if inputdataset:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "byinputdataset", option, inputdataset))
        if outputdataset:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "byoutputdataset", option, outputdataset))
        if date_range:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "bydate", option, date_range))
        if campaign:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "bycampaign", option, campaign))
        if workqueue:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "byworkqueue", option, workqueue))
        if mc_pileup:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "bymcpileup", option, mc_pileup))
        if data_pileup:
            request_info.append(
                self.reqmgr_db_service.getRequestByCouchView(
                    "bydatapileup", option, data_pileup))
        # get interaction of the request
        result = self._intersection_of_request_info(request_info)
        if len(result) == 0:
            return []
        return rows([result])

    def _intersection_of_request_info(self, request_info):
        requests = {}
        if len(request_info) < 1:
            return requests

        request_key_set = set(request_info[0].keys())
        for info in request_info:
            request_key_set = set(request_key_set) & set(info.keys())
        # TODO: need to assume some data maight not contains include docs
        for request_name in request_key_set:
            requests[request_name] = request_info[0][request_name]
        return requests

        # TODO move this out of this class

    def filterCouchInfo(self, couchInfo):
        for key in ['_rev', '_attachments']:
            if key in couchInfo:
                del couchInfo[key]

    def _combine_request(self, request_info, requestAgentUrl, cache):
        keys = {}
        requestAgentUrlList = []
        for row in requestAgentUrl["rows"]:
            request = row["key"][0]
            if not keys[request]:
                keys[request] = []
            keys[request].append(row["key"][1])

        for request in request_info:
            for agentUrl in keys[request]:
                requestAgentUrlList.append([request, agentUrl])

        return requestAgentUrlList

    def _retrieveResubmissionChildren(self, request_name):

        result = self.reqmgr_db.loadView('ReqMgr',
                                         'childresubmissionrequests',
                                         keys=[request_name])['rows']
        childrenRequestNames = []
        for child in result:
            childrenRequestNames.append(child['id'])
            childrenRequestNames.extend(
                self._retrieveResubmissionChildren(child['id']))
        return childrenRequestNames

    def _updateRequest(self, workload, request_args):

        if workload == None:
            (workload, request_args) = self.initialize_clone(
                request_args["OriginalRequestName"])
            return self.post(workload, request_args)

        dn = cherrypy.request.user.get("dn", "unknown")

        if ('SoftTimeout' in request_args) and ('GracePeriod' in request_args):
            request_args['HardTimeout'] = request_args[
                'SoftTimeout'] + request_args['GracePeriod']

        if 'RequestPriority' in request_args:
            self.gq_service.updatePriority(workload.name(),
                                           request_args['RequestPriority'])

        if "total_jobs" in request_args:
            # only GQ update this stats
            # request_args should contain only 4 keys 'total_jobs', 'input_lumis', 'input_events', 'input_num_files'}
            report = self.reqmgr_db_service.updateRequestStats(
                workload.name(), request_args)
        # if is not just updating status
        else:
            req_status = request_args.get("RequestStatus", None)

            if len(request_args) >= 1 and req_status == None:
                try:
                    workload.updateArguments(request_args)
                except Exception as ex:
                    msg = traceback.format_exc()
                    cherrypy.log("Error for request args %s: %s" %
                                 (request_args, msg))
                    raise InvalidSpecParameterValue(str(ex))

                # trailing / is needed for the savecouchUrl function
                workload.saveCouch(self.config.couch_host,
                                   self.config.couch_reqmgr_db)

            elif (req_status in ["closed-out"
                                 "announced"]) and request_args.get(
                                     "cascade", False):
                cascade_list = self._retrieveResubmissionChildren(
                    workload.name)
                for req_name in cascade_list:
                    report = self.reqmgr_db_service.updateRequestStatus(
                        req_name, req_status)

            # If it is aborted or force-complete transition call workqueue to cancel the request
            else:
                if req_status == "aborted" or req_status == "force-complete":
                    self.gq_service.cancelWorkflow(workload.name())
                report = self.reqmgr_db_service.updateRequestProperty(
                    workload.name(), request_args, dn)

        if report == 'OK':
            return {workload.name(): "OK"}
        else:
            return {workload.name(): "ERROR"}

    @restcall(formats=[('application/json', JSONFormat())])
    def put(self, workload_pair_list):
        "workloadPairList is a list of tuple containing (workload, requeat_args)"
        report = []
        for workload, request_args in workload_pair_list:
            result = self._updateRequest(workload, request_args)
            report.append(result)
        return report

    @restcall(formats=[('application/json', JSONFormat())])
    def delete(self, request_name):
        cherrypy.log("INFO: Deleting request document '%s' ..." % request_name)
        try:
            self.reqmgr_db.delete_doc(request_name)
        except CouchError as ex:
            msg = "ERROR: Delete failed."
            cherrypy.log(msg + " Reason: %s" % ex)
            raise cherrypy.HTTPError(404, msg)
            # TODO
        # delete should also happen on WMStats
        cherrypy.log("INFO: Delete '%s' done." % request_name)

    def _update_additional_request_args(self, workload, request_args):
        """
        add to request_args properties which is not initially set from user.
        This data will put in to couchdb.
        Update request_args here if additional information need to be put in couchdb
        """
        request_args['RequestWorkflow'] = sanitizeURL(
            "%s/%s/%s/spec" %
            (request_args["CouchURL"], request_args["CouchWorkloadDBName"],
             workload.name()))['url']

        # Add the output datasets if necessary
        # for some bizarre reason OutpuDatasets is list of lists
        request_args['OutputDatasets'] = workload.listOutputDatasets()

        # TODO: remove this after reqmgr2 replice reqmgr (reqmgr2Only)
        request_args['ReqMgr2Only'] = True
        return

    @restcall(formats=[('application/json', JSONFormat())])
    def post(self,
             workload_pair_list,
             multi_update_flag=False,
             multi_names_flag=False):
        """
        Create and update couchDB with  a new request.
        request argument is passed from validation
        (validation convert cherrypy.request.body data to argument)

        TODO:
        this method will have some parts factored out so that e.g. clone call
        can share functionality.

        NOTES:
        1) do not strip spaces, #4705 will fails upon injection with spaces;
            currently the chain relies on a number of things coming in #4705
        2) reqInputArgs = Utilities.unidecode(JsonWrapper.loads(body))
            (from ReqMgrRESTModel.putRequest)
        """

        # storing the request document into Couch

        if multi_update_flag:
            return self.put(workload_pair_list)
        if multi_names_flag:
            return self.get(name=workload_pair_list)

        out = []
        for workload, request_args in workload_pair_list:
            self._update_additional_request_args(workload, request_args)

            cherrypy.log("INFO: Create request, input args: %s ..." %
                         request_args)
            workload.saveCouch(request_args["CouchURL"],
                               request_args["CouchWorkloadDBName"],
                               metadata=request_args)
            out.append({'request': workload.name()})
        return out
예제 #12
0
class JobUpdaterPoller(BaseWorkerThread):
    """
    _JobUpdaterPoller_

    Poller class for the JobUpdater
    """

    def __init__(self, config):
        """
        __init__
        """
        BaseWorkerThread.__init__(self)
        self.config = config

        self.bossAir = BossAirAPI(config = self.config)
        self.reqmgr = RequestManager({'endpoint' : self.config.JobUpdater.reqMgrUrl})
        self.workqueue = WorkQueue(self.config.WorkQueueManager.couchurl,
                                   self.config.WorkQueueManager.dbname)

        myThread = threading.currentThread()

        self.daoFactory = DAOFactory(package = "WMCore.WMBS",
                                     logger = myThread.logger,
                                     dbinterface = myThread.dbi)

        self.listWorkflowsDAO = self.daoFactory(classname = "Workflow.ListForJobUpdater")
        self.updateWorkflowPrioDAO = self.daoFactory(classname = "Workflow.UpdatePriority")
        self.executingJobsDAO = self.daoFactory(classname = "Jobs.GetNumberOfJobsForWorkflowTaskStatus")


    def setup(self, parameters = None):
        """
        _setup_
        """
        pass

    def terminate(self, parameters = None):
        """
        _terminate_

        Terminate gracefully.
        """
        pass

    def algorithm(self, parameters = None):
        """
        _algorithm_
        """
        logging.info("Synchronizing priorities with ReqMgr...")
        self.synchronizeJobPriority()

    def synchronizeJobPriority(self):
        """
        _synchronizeJobPriority_

        Check WMBS and WorkQueue for active workflows and compare with the
        ReqMgr for priority changes. If a priority change occurs
        then update the job priority in the batch system and
        the elements in the local queue that have not been injected yet.
        """
        # Update the priority of workflows that are not in WMBS and just in local queue
        priorityCache = {}
        workflowsToUpdate = {}
        workflowsToCheck = [x for x in self.workqueue.getAvailableWorkflows()]
        for workflow, priority in workflowsToCheck:
            if workflow not in priorityCache:
                priorityCache[workflow] = self.reqmgr.getRequest(workflow)['RequestPriority']
            if priority != priorityCache[workflow]:
                workflowsToUpdate[workflow] = priorityCache[workflow]
        for workflow in workflowsToUpdate:
            self.workqueue.updatePriority(workflow, workflowsToUpdate[workflow])

        # Check the workflows in WMBS
        priorityCache = {}
        workflowsToUpdateWMBS = {}
        workflowsToCheck = self.listWorkflowsDAO.execute()
        for workflowEntry in workflowsToCheck:
            workflow = workflowEntry['name']
            if workflow not in priorityCache:
                priorityCache[workflow] = self.reqmgr.getRequest(workflow)['RequestPriority']
            requestPriority = priorityCache[workflow]
            if requestPriority != workflowEntry['workflow_priority']:
                # Update the workqueue priority for the Available elements
                self.workqueue.updatePriority(workflow, priorityCache[workflow])
                # Check if there are executing jobs for this particular task
                if self.executingJobsDAO.execute(workflow, workflowEntry['task']) > 0:
                    self.bossAir.updateJobInformation(workflow, workflowEntry['task'],
                                                      requestPriority = priorityCache[workflow],
                                                      taskPriority = workflowEntry['task_priority'])
                workflowsToUpdateWMBS[workflow] = priorityCache[workflow]
        if workflowsToUpdateWMBS:
            self.updateWorkflowPrioDAO.execute(workflowsToUpdateWMBS)