Example #1
0
def mark_failed(ids, failures_reasons):
    """
    Mark the list of files as failed
    :param ids: list of Oracle file ids to update
    :param failures_reasons: list of strings with transfer failure messages
    :return: 0 success, 1 failure
    """
    try:
        oracleDB = HTTPRequests(rest_filetransfers,
                                proxy,
                                proxy)
        data = dict()
        data['asoworker'] = 'asoless'
        data['subresource'] = 'updateTransfers'
        data['list_of_ids'] = ids
        data['list_of_transfer_state'] = ["FAILED" for _ in ids]
        data['list_of_failure_reason'] = failures_reasons
        data['list_of_retry_value'] = [0 for _ in ids]

        oracleDB.post('/filetransfers',
                      data=encodeRequest(data))
        logging.debug("Marked failed %s", ids)
    except Exception:
        logging.exception("Error updating documents")
        return 1
    return 0
Example #2
0
def mark_transferred(ids):
    """
    Mark the list of files as tranferred
    :param ids: list of Oracle file ids to update
    :return: 0 success, 1 failure
    """
    try:
        oracleDB = HTTPRequests(rest_filetransfers,
                                proxy,
                                proxy)
        logging.debug("Marking done %s", ids)

        data = dict()
        data['asoworker'] = 'asoless'
        data['subresource'] = 'updateTransfers'
        data['list_of_ids'] = ids
        data['list_of_transfer_state'] = ["DONE" for _ in ids]

        oracleDB.post('/filetransfers',
                      data=encodeRequest(data))
        logging.debug("Marked good %s", ids)
    except Exception:
        logging.exception("Error updating documents")
        return 1
    return 0
def getProxiedWebDir(task, host, uri, cert, logFunction=print):
    """ The function simply queries the REST interface specified to get the proxied webdir to use
        for the task. Returns None in case the API could not find the url (either an error or the schedd
        is not configured)
    """
    #This import is here because SeverUtilities is also used on the worker nodes,
    #and I want to avoid the dependency to pycurl right now. We should actually add it one day
    #so that other code in cmscp that uses Requests.py from WMCore can be migrated to RESTInteractions
    from RESTInteractions import HTTPRequests
    data = {
        'subresource': 'webdirprx',
        'workflow': task,
    }
    res = None
    try:
        server = HTTPRequests(host, cert, cert, retry=2)
        dictresult, _, _ = server.get(
            uri, data=data)  #the second and third parameters are deprecated
        if dictresult.get('result'):
            res = dictresult['result'][0]
    except HTTPException as hte:
        logFunction(traceback.format_exc())
        logFunction(hte.headers)
        logFunction(hte.result)

    return res
 def get_backendurls(self):
     self.logger.info("Querying server %s for HTCondor schedds and pool names." % self.resturi)
     server = HTTPRequests(self.resthost, self.config.TaskWorker.cmscert, self.config.TaskWorker.cmskey, retry = 2)
     result = server.get(self.resturi, data={'subresource':'backendurls'})[0]['result'][0]
     self.pool = str(result['htcondorPool'])
     self.schedds = [str(i) for i in result['htcondorSchedds']]
     self.logger.info("Resulting pool %s; schedds %s" % (self.pool, ",".join(self.schedds)))
Example #5
0
    def kill(self, job):
        """Kill all the jobs on the task."""
        """
        if not os.path.exists(job.inputdata.ui_working_dir):
            raise CRABServerError('Workdir "%s" not found.' %
                                  job.inputdata.ui_working_dir)

        if not job.master:
            cmd = 'crab -kill all -c %s' % job.inputdata.ui_working_dir
        else:
            cmd = 'crab -kill %d -c %s' % (int(job.id) + 1,
                                           job.inputdata.ui_working_dir)
        self._send_with_retry(cmd, 'kill', job.backend.crab_env)
        return True
        """
        try:
            server = HTTPRequests(job.backend.server_name, job.backend.userproxy)
            resource = job.backend.apiresource+'workflow'
            dictresult, status, reason = server.delete(resource, data = urllib.urlencode({ 'workflow' : job.backend.taskname}))
            logger.info("Kill answer: %s" % status)
            logger.info("Kill dictresult: %s" % dictresult)
            return True
        except HTTPException, e:
            logger.error(type(e))
            logger.error(e.req_headers)
            logger.error(e.req_data)
            logger.error(e.reason)
            logger.error(e.message)
            logger.error(e.headers)
            logger.error(e.result)
            logger.error(e.status)
            logger.error(e.url)
            logger.error(e.args)
            raise e
Example #6
0
    def __call__(self):
        if self.options.task is None:
            return CommandResult(2001, 'ERROR: Task option is required')

        server = HTTPRequests(self.cachedinfo['Server'] + ':' + str(self.cachedinfo['Port']))

        self.logger.debug('Looking up detailed postmortem of task %s' % self.cachedinfo['RequestName'])
        dictresult, postmortem, reason = server.get(self.uri + self.cachedinfo['RequestName'])

        if postmortem != 200:
            msg = "Problem retrieving postmortem:\ninput:%s\noutput:%s\nreason:%s" % (str(self.cachedinfo['RequestName']), str(dictresult), str(reason))
            return CommandResult(1, msg)

        for workflow in dictresult['errors']:
            self.logger.info("#%i %s" % (workflow['subOrder'], workflow['request']))
            if self.options.verbose or self.options.outputfile:
                self.printVerbose(workflow['details'], self.options.outputfile, os.path.join(self.requestarea, 'results', 'jobFailures.log'))
            else:
                self.logger.debug("   Aggregating job failures")
                groupederrs = self.aggregateFailures(workflow['details'])
                if not groupederrs:
                    self.logger.info("   No failures")
                    continue
                self.logger.info("   List of failures and jobs per each failure: (one job could have more then one failure, one per each step)")
                for hkey in groupederrs:
                    ## removing duplicates and sort
                    joberrs = list(set(groupederrs[hkey]['jobs']))
                    joberrs.sort()
                    self.logger.info('     %s jobs failed with error "%s"' %(len(joberrs), groupederrs[hkey]['error']))
                    self.logger.info('       (%s)'  %(', '.join([ str(jobid[0]) for jobid in joberrs])) )

        return CommandResult(0, None)
Example #7
0
    def __call__(self):
        server = HTTPRequests(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)

        self.logger.debug('Looking up detailed status of task %s' % self.cachedinfo['RequestName'])
        user = self.cachedinfo['RequestName'].split("_")[2].split(":")[-1]
        verbose = int(self.summary or self.long or self.json)
        if self.idle:
            verbose = 2
        dictresult, status, reason = server.get(self.uri, data = { 'workflow' : self.cachedinfo['RequestName'], 'verbose': verbose })
        dictresult = dictresult['result'][0] #take just the significant part

        if status != 200:
            msg = "Problem retrieving status:\ninput:%s\noutput:%s\nreason:%s" % (str(self.cachedinfo['RequestName']), str(dictresult), str(reason))
            raise RESTCommunicationException(msg)

        self.printShort(dictresult, user)
        self.printPublication(dictresult)

        if 'jobs' not in dictresult:
            self.logger.info("\nNo jobs created yet!")
        else:
            # Note several options could be combined
            if self.summary:
                self.printSummary(dictresult)
            if self.long:
               self.printLong(dictresult)
            if self.idle:
               self.printIdle(dictresult, user)
            if self.json:
               self.logger.info(dictresult['jobs'])
Example #8
0
 def setUp(self):
     """
     Setup for unit tests
     """
     self.server = HTTPRequests(os.environ['SERVER_HOST'],
                                os.environ['X509_USER_PROXY'],
                                os.environ['X509_USER_PROXY'])
     self.lfnBase = '/store/temp/user/%s/my_cool_dataset-%s/file-%s-%s.root'
     self.fileDoc = {
         'id': 'OVERWRITE',
         'username': '******',
         'taskname': 'OVERWRITE',
         'start_time': 0,
         'destination': 'T2_CH_CERN',
         'destination_lfn': 'OVERWRITE',
         'source': 'T2_US_Caltech',
         'source_lfn': 'OVERWRITE',
         'filesize': random.randint(1, 9999),
         'publish': 1,
         'transfer_state': 'OVERWRITE',
         'publication_state': 'OVERWRITE',
         'job_id': 1,
         'job_retry_count': 0,
         'type': 'log',
         'rest_host': 'cmsweb.cern.ch',
         'rest_uri': '/crabserver/prod/'
     }
     self.ids = []
     self.users = [
         'jbalcas', 'mmascher', 'dciangot', 'riahi', 'erupeika', 'sbelforte'
     ]  # just random users for tests
     self.tasks = {}
     self.totalFiles = 10
 def get_backendurls(self):
     self.logger.info("Querying server %s for HTCondor schedds and pool names." % self.resturi)
     server = HTTPRequests(self.resthost, self.config.TaskWorker.cmscert, self.config.TaskWorker.cmskey, retry = 2)
     result = server.get(self.resturi, data={'subresource':'backendurls'})[0]['result'][0]
     self.pool = str(result['htcondorPool'])
     self.schedds = [str(i) for i in result['htcondorSchedds']]
     self.logger.info("Resulting pool %s; schedds %s" % (self.pool, ",".join(self.schedds)))
Example #10
0
 def uploadWarning(self, warning, userProxy, taskname):
     try:
         userServer = HTTPRequests(self.server["host"], userProxy, userProxy, retry=2)
         configreq = {"subresource": "addwarning", "workflow": taskname, "warning": b64encode(warning)}
         userServer.post(self.restURInoAPI + "/task", data=urllib.urlencode(configreq))
     except HTTPException as hte:
         self.logger.error(hte.headers)
         self.logger.warning("Cannot add a warning to REST interface. Warning message: %s" % warning)
Example #11
0
def server_info(subresource, server, proxyfilename, baseurl):
    """
    Get relevant information about the server
    """
    server = HTTPRequests(server, proxyfilename, proxyfilename, version=__version__)

    dictresult, status, reason = server.get(baseurl, {'subresource' : subresource})

    return dictresult['result'][0]
Example #12
0
 def deleteWarnings(self, userProxy, taskname):
     userServer = HTTPRequests(self.server['host'], userProxy, userProxy, retry=2,
                               logger = self.logger)
     configreq = {'subresource': 'deletewarnings',
                     'workflow': taskname}
     try:
         userServer.post(self.restURInoAPI + '/task', data = urllib.urlencode(configreq))
     except HTTPException as hte:
         self.logger.error("Error deleting warnings: %s", str(hte))
         self.logger.warning("Can not delete warnings from REST interface.")
Example #13
0
 def getCountTasksByStatusAbs(self):
     try:
         resturi = "/crabserver/prod/task"
         configreq = { 'minutes': "1000000000", 'subresource': "counttasksbystatus" }
         server = HTTPRequests(self.resthost, "/data/certs/servicecert.pem", "/data/certs/servicekey.pem", retry = 2)
         result = server.get(resturi, data = configreq)
         return dict(result[0]['result'])
     except Exception, e:
         self.logger.debug("Error in getCountTasksByStatusAbs: %s"%str(e))
         return []
Example #14
0
 def uploadWarning(self, warning, userProxy, taskname):
     try:
         userServer = HTTPRequests(self.server['host'], userProxy, userProxy, retry=2)
         configreq = {'subresource': 'addwarning',
                      'workflow': taskname,
                      'warning': b64encode(warning)}
         userServer.post(self.restURInoAPI + '/task', data = urllib.urlencode(configreq))
     except HTTPException as hte:
         self.logger.error(hte.headers)
         self.logger.warning("Cannot add a warning to REST interface. Warning message: %s" % warning)
Example #15
0
    def __call__(self):


        proxyfile = self.options.proxyfile if self.options.proxyfile else self.proxyfilename
        server = HTTPRequests(self.serverurl, proxyfile, proxyfile, version=__version__)

        self.logger.debug('Looking type for task %s' % self.cachedinfo['RequestName'])
        dictresult, status, reason = server.get(self.uri, data = {'workflow': self.cachedinfo['RequestName'], 'subresource': 'type'})
        self.logger.debug('Task type %s' % dictresult['result'][0])
        return dictresult['result'][0]
Example #16
0
    def execute(self, *args, **kwargs):
        wmwork = Workflow(name=kwargs['task']['tm_taskname'])

        wmsubs = Subscription(fileset=args[0], workflow=wmwork,
                               split_algo=kwargs['task']['tm_split_algo'],
                               type=self.jobtypeMapper[kwargs['task']['tm_job_type']])
        splitter = SplitterFactory()
        jobfactory = splitter(subscription=wmsubs)
        splitparam = kwargs['task']['tm_split_args']
        splitparam['algorithm'] = kwargs['task']['tm_split_algo']
        if kwargs['task']['tm_job_type'] == 'Analysis':
            if kwargs['task']['tm_split_algo'] == 'FileBased':
                splitparam['total_files'] = kwargs['task']['tm_totalunits']
            elif kwargs['task']['tm_split_algo'] == 'LumiBased':
                splitparam['total_lumis'] = kwargs['task']['tm_totalunits']
            elif kwargs['task']['tm_split_algo'] == 'EventAwareLumiBased':
                splitparam['total_events'] = kwargs['task']['tm_totalunits']
        elif kwargs['task']['tm_job_type'] == 'PrivateMC':
            if 'tm_events_per_lumi' in kwargs['task'] and kwargs['task']['tm_events_per_lumi']:
                splitparam['events_per_lumi'] = kwargs['task']['tm_events_per_lumi']
            if 'tm_generator' in kwargs['task'] and kwargs['task']['tm_generator'] == 'lhe':
                splitparam['lheInputFiles'] = True
        splitparam['applyLumiCorrection'] = True
        factory = jobfactory(**splitparam)
        numJobs = sum([len(jobgroup.getJobs()) for jobgroup in factory])
        maxJobs = getattr(self.config.TaskWorker, 'maxJobsPerTask', 10000)
        if numJobs == 0:
            msg  = "The CRAB3 server backend could not submit any job to the Grid scheduler:"
            msg += " Splitting task %s" % (kwargs['task']['tm_taskname'])
            if kwargs['task']['tm_input_dataset']:
                msg += " on dataset %s" % (kwargs['task']['tm_input_dataset'])
            msg += " with %s method does not generate any job" % (kwargs['task']['tm_split_algo'])
            raise TaskWorkerException(msg)
        elif numJobs > maxJobs:
            raise TaskWorkerException("The splitting on your task generated %s jobs. The maximum number of jobs in each task is %s" %
                                        (numJobs, maxJobs))
        #printing duplicated lumis if any
        lumiChecker = getattr(jobfactory, 'lumiChecker', None)
        if lumiChecker and lumiChecker.splitLumiFiles:
            self.logger.warning("The input dataset contains the following duplicated lumis %s" % lumiChecker.splitLumiFiles.keys())
            #TODO use self.uploadWarning
            try:
                userServer = HTTPRequests(self.server['host'], kwargs['task']['user_proxy'], kwargs['task']['user_proxy'], retry = 2,
                                          logger = self.logger)
                configreq = {'subresource': 'addwarning',
                             'workflow': kwargs['task']['tm_taskname'],
                             'warning': b64encode('The CRAB3 server backend detected lumis split across files in the input dataset.'
                                        ' Will apply the necessary corrections in the splitting algorithms. You can ignore this message.')}
                userServer.post(self.restURInoAPI + '/task', data = urllib.urlencode(configreq))
            except HTTPException as hte:
                self.logger.error(hte.headers)
                self.logger.warning("Cannot add warning to REST after finding duplicates")

        return Result(task = kwargs['task'], result = factory)
Example #17
0
 def uploadWarning(self, warning, userProxy, taskname):
     try:
         userServer = HTTPRequests(self.server['host'], userProxy, userProxy, retry=2,
                                   logger = self.logger)
         configreq = {'subresource': 'addwarning',
                      'workflow': taskname,
                      'warning': b64encode(warning)}
         userServer.post(self.restURInoAPI + '/task', data = urllib.urlencode(configreq))
     except HTTPException as hte:
         self.logger.error(hte.headers)
         self.logger.warning("Cannot add a warning to REST interface. Warning message: %s" % warning)
Example #18
0
    def getOutput(self, job):
        """Retrieve the output of the job."""
        """
        if not os.path.exists(job.inputdata.ui_working_dir):
            raise CRABServerError('Workdir "%s" not found.' %
                                  job.inputdata.ui_working_dir)

        cmd = 'crab -getoutput %d -c %s' % (int(job.id) + 1,
                                            job.inputdata.ui_working_dir)
        self._send_with_retry(cmd, 'getoutput', job.backend.crab_env)
        # Make output files coming from the WMS readable.
        for root, _, files in os.walk(os.path.join(job.inputdata.ui_working_dir,
                                                   'res')): # Just 'res'.
            for f in files:
                os.chmod(os.path.join(root, f), 0644)
        """
        logger.info('getting Output for job %s:%s' % (job.backend.taskname, job.backend.crabid)) 
        inputlist =  [  ('workflow', job.backend.taskname)]
        inputlist.extend([('subresource', 'logs')])
        inputlist.extend( [('jobids', job.backend.crabid)] )

        #srv='hammer-crab3.cern.ch'#  'cmsweb-testbed.cern.ch'
        #proxypath= '/afs/cern.ch/user/r/riahi/public/proxy'#'/afs/cern.ch/user/s/spiga/public/PerValentaina/proxy'
        #resource='/crabserver/dev/workflow'
        #server = HTTPRequests(srv, proxypath)

        server = HTTPRequests(job.backend.server_name, job.backend.userproxy)

        resource = job.backend.apiresource+'workflow'

        try:
            import sys, traceback
            dictresult, status, reason = server.get(resource, data = inputlist)
            input = dictresult['result']
            rcopy = remoteCopy(input, job.outputdir, logger)
            rcopy()
            logger.info("Task: %s - subjob: %s output copied" % (job.backend.taskname, job.backend.crabid))
            tfile = tarfile.open(os.path.join(job.outputdir, "cmsRun_%s.log.tar.gz" % job.backend.crabid))
            tfile.extractall(job.outputdir)

        except HTTPException, e:
            msg = type(e)
            msg += " "+dir(e)
            msg += " "+e.req_headers
            msg += " "+e.req_data
            msg += " "+e.reason
            msg += " "+e.message
            msg += " "+e.headers
            msg += " "+e.result
            msg += " "+e.status
            msg += " "+e.url
            msg += " "+e.args
            logger.error(msg)
Example #19
0
    def _execute(self, resthost, resturi, config, task):
        self.logger.info('Cleaning filemetadata older than 30 days..')
        server = HTTPRequests(resthost, config.TaskWorker.cmscert, config.TaskWorker.cmskey, retry = 2)
        ONE_MONTH = 24 * 30
        try:
            instance = resturi.split('/')[2]
            server.delete('/crabserver/%s/filemetadata' % instance, data=urllib.urlencode({'hours': ONE_MONTH}))
#TODO return fro the server a value (e.g.: ["ok"]) to see if everything is ok
#            result = server.delete('/crabserver/dev/filemetadata', data=urllib.urlencode({'hours': ONE_MONTH}))[0]['result'][0]
#            self.logger.info('FMDCleaner, got %s' % result)
        except HTTPException, hte:
            self.logger.error(hte.headers)
Example #20
0
def server_info(subresource, server, proxyfilename, baseurl):
    """
    Get relevant information about the CRAB REST server
    """
    server = HTTPRequests(url=server,
                          localcert=proxyfilename,
                          localkey=proxyfilename,
                          version='HC')

    dictresult, status, reason = server.get(baseurl,
                                            {'subresource': subresource})

    return dictresult['result'][0]
Example #21
0
 def deleteWarnings(self, userProxy, taskname):
     userServer = HTTPRequests(self.server['host'],
                               userProxy,
                               userProxy,
                               retry=2,
                               logger=self.logger)
     configreq = {'subresource': 'deletewarnings', 'workflow': taskname}
     try:
         userServer.post(self.restURInoAPI + '/task',
                         data=urllib.urlencode(configreq))
     except HTTPException as hte:
         self.logger.error("Error deleting warnings: %s", str(hte))
         self.logger.warning("Can not delete warnings from REST interface.")
Example #22
0
    def __init__(self, logger, config):
        """
        Initialize connection to the db and logging/config

        :param logger: pass the logging
        :param config: refer to the configuration file
        """
        self.oracleDB = HTTPRequests(config.oracleDB,
                                     config.opsProxy,
                                     config.opsProxy)

        self.config = config
        self.logger = logger
Example #23
0
    def __call__(self):
        server = HTTPRequests(self.serverurl,
                              self.proxyfilename,
                              self.proxyfilename,
                              version=__version__)
        dictresult, status, reason = server.get(self.uri,
                                                data={'timestamp': self.date})
        dictresult = dictresult['result']  #take just the significant part

        if status != 200:
            msg = "Problem retrieving tasks:\ninput:%s\noutput:%s\nreason:%s" % (
                str(self.date), str(dictresult), str(reason))
            raise RESTCommunicationException(msg)

        dictresult.sort()
        dictresult.reverse()

        if self.options.status:
            dictresult = [
                item for item in dictresult if item[1] == self.options.status
            ]

        result = [item[0:2] for item in dictresult]

        today = date.today()

        if not dictresult:
            msg = "No tasks found from %s until %s" % (self.date, today)
            if self.options.status:
                msg += " with status %s" % (self.options.status)
            self.logger.info(msg)
            return result

        msg = "\nList of tasks from %s until %s" % (self.date, today)
        if self.options.status:
            msg += " with status %s" % (self.options.status)
        self.logger.info(msg)
        msg = "Beware that STATUS here does not include information from grid jobs"
        self.logger.info(msg)
        self.logger.info('=' * 80)
        self.logger.info('NAME\t\t\t\t\t\t\t\tSTATUS')
        self.logger.info('=' * 80)
        for item in dictresult:
            name, status = item[0:2]
            self.logger.info('%s\n\t\t\t\t\t\t\t\t%s' % (name, status))
            self.logger.info('-' * 80)
        self.logger.info('\n')

        return result
Example #24
0
 def getCountTasksByStatusAbs(self):
     try:
         resturi = "/crabserver/prod/task"
         configreq = {'minutes': "1000000000", 'subresource': "counttasksbystatus"}
         server = HTTPRequests(self.resthost, "/data/certs/servicecert.pem", "/data/certs/servicekey.pem", retry=10)
         result = server.get(resturi, data=configreq)
         return dict(result[0]['result'])
     except Exception:
         e = sys.exc_info()
         if hasattr(e,"headers"):
             self.logger.error(str(e.headers))
         self.logger.exception("Error in getCountTasksByStatusAbs:")
         pprint(e[1])
         traceback.print_tb(e[2])
         return []
Example #25
0
 def sendScheddToREST(self, task, schedd):
     """ Try to set the schedd to the oracle database in the REST interface
         Raises TaskWorkerException in case of failure
     """
     task['tm_schedd'] = schedd
     userServer = HTTPRequests(self.server['host'], task['user_proxy'], task['user_proxy'], retry=20, logger=self.logger)
     configreq = {'workflow':task['tm_taskname'], 'subresource':'updateschedd',
         'scheddname':schedd}
     try:
         userServer.post(self.restURInoAPI + '/task', data=urllib.urlencode(configreq))
     except HTTPException as hte:
         msg = "Unable to contact cmsweb and update scheduler on which task will be submitted. Error msg: %s" % hte.headers
         self.logger.warning(msg)
         time.sleep(20)
         raise TaskWorkerException(msg) #we already tried 20 times, give up
Example #26
0
    def __call__(self):
        server = HTTPRequests(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)

        self.logger.debug('Killing task %s' % self.cachedinfo['RequestName'])
        dictresult, status, reason = server.delete(self.uri, data = urlencode({ 'workflow' : self.cachedinfo['RequestName']}) + '&' + urlencode(self.jobids))
        self.logger.debug("Result: %s" % dictresult)

        if status != 200:
            msg = "Problem killing task %s:\ninput:%s\noutput:%s\nreason:%s" % \
                    (self.cachedinfo['RequestName'], str(self.cachedinfo['RequestName']), str(dictresult), str(reason))
            raise RESTCommunicationException(msg)

        self.logger.info("Kill request successfully sent")
        if dictresult['result'][0]['result'] != 'ok':
            self.logger.info(dictresult['result'][0]['result'])
Example #27
0
 def sendScheddToREST(self, task, schedd):
     """ Try to set the schedd to the oracle database in the REST interface
         Raises TaskWorkerException in case of failure
     """
     task['tm_schedd'] = schedd
     userServer = HTTPRequests(self.server['host'], task['user_proxy'], task['user_proxy'], retry=20, logger=self.logger)
     configreq = {'workflow':task['tm_taskname'], 'subresource':'updateschedd',
         'scheddname':schedd}
     try:
         userServer.post(self.restURInoAPI + '/task', data=urllib.urlencode(configreq))
     except HTTPException as hte:
         msg = "Unable to contact cmsweb and update scheduler on which task will be submitted. Error msg: %s" % hte.headers
         self.logger.warning(msg)
         time.sleep(20)
         raise TaskWorkerException(msg) #we already tried 20 times, give up
Example #28
0
def handleNewTask(resthost, resturi, config, task, procnum, *args, **kwargs):
    """Performs the injection of a new task

    :arg str resthost: the hostname where the rest interface is running
    :arg str resturi: the rest base url to contact
    :arg WMCore.Configuration config: input configuration
    :arg TaskWorker.DataObjects.Task task: the task to work on
    :arg int procnum: the process number taking care of the work
    :*args and *kwargs: extra parameters currently not defined
    :return: the handler."""
    server = HTTPRequests(resthost, config.TaskWorker.cmscert, config.TaskWorker.cmskey, retry=20, logger=logging.getLogger(str(procnum)))
    handler = TaskHandler(task, procnum, server, config, 'handleNewTask', createTempDir=True)
    handler.addWork(MyProxyLogon(config=config, server=server, resturi=resturi, procnum=procnum, myproxylen=60 * 60 * 24))
    handler.addWork(StageoutCheck(config=config, server=server, resturi=resturi, procnum=procnum))
    if task['tm_job_type'] == 'Analysis':
        if task.get('tm_user_files'):
            handler.addWork(UserDataDiscovery(config=config, server=server, resturi=resturi, procnum=procnum))
        else:
            handler.addWork(DBSDataDiscovery(config=config, server=server, resturi=resturi, procnum=procnum))
    elif task['tm_job_type'] == 'PrivateMC':
        handler.addWork(MakeFakeFileSet(config=config, server=server, resturi=resturi, procnum=procnum))
    handler.addWork(Splitter(config=config, server=server, resturi=resturi, procnum=procnum))
    handler.addWork(DagmanCreator(config=config, server=server, resturi=resturi, procnum=procnum))
    if task['tm_dry_run'] == 'T':
        handler.addWork(DryRunUploader(config=config, server=server, resturi=resturi, procnum=procnum))
    else:
        handler.addWork(DagmanSubmitter(config=config, server=server, resturi=resturi, procnum=procnum))

    return handler.actionWork(args, kwargs)
Example #29
0
    def __init__(self, config):
        """
        Initialise class members
        """
        BaseDaemon.__init__(self, config, 'RetryManager')

        if self.config.isOracle:
            self.oracleDB = HTTPRequests(self.config.oracleDB,
                                         self.config.opsProxy,
                                         self.config.opsProxy)
        else:
            try:
                server = CouchServer(dburl=self.config.couch_instance,
                                     ckey=self.config.opsProxy,
                                     cert=self.config.opsProxy)
                self.db = server.connectDatabase(self.config.files_database)
            except Exception as e:
                self.logger.exception('A problem occured when connecting to couchDB: %s' % e)
                raise
            self.logger.debug('Connected to files DB')

            # Set up a factory for loading plugins
        self.factory = WMFactory(self.config.retryAlgoDir, namespace=self.config.retryAlgoDir)
        try:
            self.plugin = self.factory.loadObject(self.config.algoName, self.config,
                                                  getFromCache=False, listFlag=True)
        except Exception as ex:
            msg = "Error loading plugin %s on path %s\n" % (self.config.algoName,
                                                            self.config.retryAlgoDir)
            msg += str(ex)
            self.logger.error(msg)
            raise RetryManagerException(msg)
        self.cooloffTime = self.config.cooloffTime
Example #30
0
    def submit(self, job):
        """Submit a new task to CRAB3 """
        logger.info('userproxy: %s' % job.backend.userproxy)
        logger.info('server_name: %s' % job.backend.server_name)
        logger.info('apiresource: %s' % job.backend.apiresource)

        server = HTTPRequests(job.backend.server_name, job.backend.userproxy)
        resource = job.backend.apiresource + 'workflow'

        try:
            cachefilename = self.uploadArchive(job.inputdata.pset,
                                               job.inputdata.cacheurl)[1]
        except HTTPException, e:
            logger.error(type(e))
            logger.error(dir(e))
            logger.error(e.req_headers)
            logger.error(e.req_data)
            logger.error(e.reason)
            logger.error(e.message)
            logger.error(e.headers)
            logger.error(e.result)
            logger.error(e.status)
            logger.error(e.url)
            logger.error(e.args)
            raise CRABServerError("Error uploading cache")
Example #31
0
def handleNewTask(resthost, resturi, config, task, *args, **kwargs):
    """Performs the injection of a new task

    :arg str resthost: the hostname where the rest interface is running
    :arg str resturi: the rest base url to contact
    :arg WMCore.Configuration config: input configuration
    :arg TaskWorker.DataObjects.Task task: the task to work on
    :*args and *kwargs: extra parameters currently not defined
    :return: the handler."""
    server = HTTPRequests(resthost, config.TaskWorker.cmscert, config.TaskWorker.cmskey)
    handler = TaskHandler(task)
    handler.addWork( MyProxyLogon(config=config, server=server, resturi=resturi, myproxylen=60*60*24) )
    if task['tm_job_type'] == 'Analysis': 
        if task.get('tm_arguments', {}).get('userfiles'):
            handler.addWork( UserDataDiscovery(config=config, server=server, resturi=resturi) )
        else:
            handler.addWork( DBSDataDiscovery(config=config, server=server, resturi=resturi) )
    elif task['tm_job_type'] == 'PrivateMC': 
        handler.addWork( MakeFakeFileSet(config=config, server=server, resturi=resturi) )
    handler.addWork( Splitter(config=config, server=server, resturi=resturi) )

    def glidein(config):
        """Performs the injection of a new task into Glidein
        :arg WMCore.Configuration config: input configuration"""
        handler.addWork( DagmanCreator(config=config, server=server, resturi=resturi) )
        handler.addWork( DagmanSubmitter(config=config, server=server, resturi=resturi) )

    def panda(config):
        """Performs the injection into PanDA of a new task
        :arg WMCore.Configuration config: input configuration"""
        handler.addWork( PanDABrokerage(pandaconfig=config, server=server, resturi=resturi) )
        handler.addWork( PanDAInjection(pandaconfig=config, server=server, resturi=resturi) )

    locals()[getattr(config.TaskWorker, 'backend', DEFAULT_BACKEND).lower()](config)
    return handler.actionWork(args)
Example #32
0
    def uploadWarning(self, warning, userProxy, taskname):
        if not self.server: # When testing, the server can be None
            self.logger.warning(warning)
            return

        truncWarning = truncateError(warning)
        userServer = HTTPRequests(self.server['host'], userProxy, userProxy, retry=2,
                                  logger = self.logger)
        configreq = {'subresource': 'addwarning',
                        'workflow': taskname,
                         'warning': b64encode(truncWarning)}
        try:
            userServer.post(self.restURInoAPI + '/task', data = urllib.urlencode(configreq))
        except HTTPException as hte:
            self.logger.error("Error uploading warning: %s", str(hte))
            self.logger.warning("Cannot add a warning to REST interface. Warning message: %s", warning)
    def testExecute(self):
        #recycle DataDiscoveryTest code to create the input of this test
        ddObj, task, requestname, datasetfiles, locations = DataDiscoveryTest.prepareObjects(
        )
        res = ddObj.formatOutput(task=task,
                                 requestname=requestname,
                                 datasetfiles=datasetfiles,
                                 locations=locations)

        #Test the case where the lumimask is empty. Thats the most interesting case
        cert, key = Requests().getKeyCert()
        server = HTTPRequests(os.environ['REST_URL'],
                              cert,
                              key,
                              version="0.debug")
        lmb = LumiMaskBuilder(None, server, "/crabserver/dev/workflowdb")

        task = {}
        task['tm_split_args'] = {}
        #this is a wf name I had in the REST db. Used to check by hand if the db was updated.
        #we should create a RESTMock for unit tests
        task['tm_taskname'] = "130719_090932_mmascher_crab_tmp"
        task['tm_split_args']['lumis'] = {}
        task['tm_split_args']['runs'] = {}
        lmb.execute(res.result, task=task)

        self.assertEqual(lmb.runs, ['1', '2', '3', '4'])
        self.assertEqual(lmb.lumis[1:],
                         ['1,5,8,9,20,22', '11,13', '1,2,5,7,100,100'
                          ])  #first run too long to check in a unit test
Example #34
0
def handleKill(instance, resturl, config, task, *args, **kwargs):
    """Asks to kill jobs

    :arg str instance: the hostname where the rest interface is running
    :arg str resturl: the rest base url to contact
    :arg WMCore.Configuration config: input configuration
    :arg TaskWorker.DataObjects.Task task: the task to work on
    :*args and *kwargs: extra parameters currently not defined
    :return: the result of the handler operation."""
    server = HTTPRequests(instance,
                          config.TaskWorker.cmscert,
                          config.TaskWorker.cmskey,
                          version=__version__)
    handler = TaskHandler(task)
    handler.addWork(
        MyProxyLogon(config=config,
                     server=server,
                     resturl=resturl,
                     myproxylen=60 * 5))

    def glidein(config):
        """Performs kill of jobs sent through Glidein
        :arg WMCore.Configuration config: input configuration"""
        raise NotImplementedError
        #handler.addWork( DagmanKiller(glideinconfig=config, server=server, resturl=resturl) )

    def panda(config):
        """Performs the re-injection into PanDA
        :arg WMCore.Configuration config: input configuration"""
        handler.addWork(
            PanDAKill(pandaconfig=config, server=server, resturl=resturl))

    locals()[getattr(config.TaskWorker, 'backend',
                     DEFAULT_BACKEND).lower()](config)
    return handler.actionWork(args, kwargs)
Example #35
0
def handleKill(resthost, resturi, config, task, procnum, *args, **kwargs):
    """Asks to kill jobs

    :arg str resthost: the hostname where the rest interface is running
    :arg str resturi: the rest base url to contact
    :arg WMCore.Configuration config: input configuration
    :arg TaskWorker.DataObjects.Task task: the task to work on
    :arg int procnum: the process number taking care of the work
    :*args and *kwargs: extra parameters currently not defined
    :return: the result of the handler operation."""
    server = HTTPRequests(resthost,
                          config.TaskWorker.cmscert,
                          config.TaskWorker.cmskey,
                          retry=2)
    handler = TaskHandler(task, procnum, server, 'handleKill')
    handler.addWork(
        MyProxyLogon(config=config,
                     server=server,
                     resturi=resturi,
                     procnum=procnum,
                     myproxylen=60 * 5))

    def glidein(config):
        """Performs kill of jobs sent through Glidein
        :arg WMCore.Configuration config: input configuration"""
        handler.addWork(
            DagmanKiller(config=config,
                         server=server,
                         resturi=resturi,
                         procnum=procnum))

    locals()[getattr(config.TaskWorker, 'backend',
                     DEFAULT_BACKEND).lower()](config)
    return handler.actionWork(args, kwargs)
Example #36
0
def handleKill(resthost, resturi, config, task, procnum, *args, **kwargs):
    """Asks to kill jobs

    :arg str resthost: the hostname where the rest interface is running
    :arg str resturi: the rest base url to contact
    :arg WMCore.Configuration config: input configuration
    :arg TaskWorker.DataObjects.Task task: the task to work on
    :arg int procnum: the process number taking care of the work
    :*args and *kwargs: extra parameters currently not defined
    :return: the result of the handler operation."""
    server = HTTPRequests(resthost,
                          config.TaskWorker.cmscert,
                          config.TaskWorker.cmskey,
                          retry=20,
                          logger=logging.getLogger(str(procnum)))
    handler = TaskHandler(task, procnum, server, config, 'handleKill')
    handler.addWork(
        MyProxyLogon(config=config,
                     server=server,
                     resturi=resturi,
                     procnum=procnum,
                     myproxylen=60 * 5))
    handler.addWork(
        DagmanKiller(config=config,
                     server=server,
                     resturi=resturi,
                     procnum=procnum))

    return handler.actionWork(args, kwargs)
Example #37
0
def handleResubmit(resthost, resturi, config, task, *args, **kwargs):
    """Performs the re-injection of failed jobs

    :arg str resthost: the hostname where the rest interface is running
    :arg str resturi: the rest base url to contact
    :arg WMCore.Configuration config: input configuration
    :arg TaskWorker.DataObjects.Task task: the task to work on
    :*args and *kwargs: extra parameters currently not defined
    :return: the result of the handler operation."""
    server = HTTPRequests(resthost, config.TaskWorker.cmscert, config.TaskWorker.cmskey)
    handler = TaskHandler(task)
    handler.addWork( MyProxyLogon(config=config, server=server, resturi=resturi, myproxylen=60*60*24) )
    def glidein(config):
        """Performs the re-injection into Glidein
        :arg WMCore.Configuration config: input configuration"""
        handler.addWork( DagmanResubmitter(config=config, server=server, resturi=resturi) )

    def panda(config):
        """Performs the re-injection into PanDA
        :arg WMCore.Configuration config: input configuration"""
        handler.addWork( PanDAgetSpecs(pandaconfig=config, server=server, resturi=resturi) )
        handler.addWork( PanDASpecs2Jobs(pandaconfig=config, server=server, resturi=resturi) )
        handler.addWork( PanDABrokerage(pandaconfig=config, server=server, resturi=resturi) )
        handler.addWork( PanDAInjection(pandaconfig=config, server=server, resturi=resturi) )

    locals()[getattr(config.TaskWorker, 'backend', DEFAULT_BACKEND).lower()](config)
    return handler.actionWork(args)
 def setUp(self):
     """
     Setup for unit tests
     """
     self.server = HTTPRequests(os.environ['SERVER_HOST'], os.environ['X509_USER_PROXY'], os.environ['X509_USER_PROXY'])
     self.lfnBase = '/store/temp/user/%s/my_cool_dataset-%s/file-%s-%s.root'
     self.fileDoc = {'id': 'OVERWRITE',
                     'username': '******',
                     'taskname': 'OVERWRITE',
                     'start_time': 0,
                     'destination': 'T2_CH_CERN',
                     'destination_lfn': 'OVERWRITE',
                     'source': 'T2_US_Caltech',
                     'source_lfn': 'OVERWRITE',
                     'filesize': random.randint(1, 9999),
                     'publish': 1,
                     'transfer_state': 'OVERWRITE',
                     'publication_state': 'OVERWRITE',
                     'job_id': 1,
                     'job_retry_count': 0,
                     'type': 'log',
                     'rest_host': 'cmsweb.cern.ch',
                     'rest_uri': '/crabserver/prod/'}
     self.ids = []
     self.users = ['jbalcas', 'mmascher', 'dciangot', 'riahi', 'erupeika', 'sbelforte']  # just random users for tests
     self.tasks = {}
     self.totalFiles = 10
Example #39
0
    def __call__(self):
        ## retrieving output files location from the server
        server = HTTPRequests(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)

        self.logger.debug('Requesting resubmission for failed jobs in task %s' % self.cachedinfo['RequestName'] )
        #inputdict = { "TaskResubmit": "Analysis", "ForceResubmit" : force }
        dictresult, status, reason = server.post(self.uri, data = urlencode({ 'workflow' : self.cachedinfo['RequestName']}) + \
                                                    self.sitewhitelist + self.siteblacklist + '&' + urlencode(self.jobids))
        self.logger.debug("Result: %s" % dictresult)

        if status != 200:
            msg = "Problem retrieving resubmitting the task to the server:\ninput:%s\noutput:%s\nreason:%s" % (str(inputdict), str(dictresult), str(reason))
            raise RESTCommunicationException(msg)

        self.logger.info("Resubmit request successfully sent")
        if dictresult['result'][0]['result'] != 'ok':
            self.logger.info(dictresult['result'][0]['result'])
Example #40
0
def updatewebdir(ad):
    data = {'subresource': 'addwebdir'}
    host = ad['CRAB_RestHost']
    uri = ad['CRAB_RestURInoAPI'] + '/task'
    data['workflow'] = ad['CRAB_ReqName']
    data['webdirurl'] = ad['CRAB_UserWebDir']
    cert = ad['X509UserProxy']
    try:
        from RESTInteractions import HTTPRequests
        from httplib import HTTPException
        import urllib
        server = HTTPRequests(host, cert, cert)
        server.post(uri, data=urllib.urlencode(data))
        return 0
    except:
        print traceback.format_exc()
        return 1
Example #41
0
def serverCall(ddmServer, cert, key, verbose, call, api, data):
    server = HTTPRequests(url=ddmServer,
                          localcert=cert,
                          localkey=key,
                          verbose=verbose)
    commonAPI = '/registry/request'
    ddmRequest = getattr(server, call)(commonAPI + '/' + api, data=data)
    return ddmRequest[0]
Example #42
0
    def getOutput(self, job):
        """Retrieve the output of the job."""
        """
        if not os.path.exists(job.inputdata.ui_working_dir):
            raise CRABServerError('Workdir "%s" not found.' %
                                  job.inputdata.ui_working_dir)

        cmd = 'crab -getoutput %d -c %s' % (int(job.id) + 1,
                                            job.inputdata.ui_working_dir)
        self._send_with_retry(cmd, 'getoutput', job.backend.crab_env)
        # Make output files coming from the WMS readable.
        for root, _, files in os.walk(os.path.join(job.inputdata.ui_working_dir,
                                                   'res')): # Just 'res'.
            for f in files:
                os.chmod(os.path.join(root, f), 0644)
        """
        logger.info('geting Output for jon %s:%s' %
                    (job.backend.taskname, job.backend.crabid))
        inputlist = [('workflow', job.backend.taskname)]
        inputlist.extend([('subresource', 'logs')])
        inputlist.extend([('jobids', job.backend.crabid)])

        #srv='hammer-crab3.cern.ch'#  'cmsweb-testbed.cern.ch'
        #proxypath= '/afs/cern.ch/user/r/riahi/public/proxy'#'/afs/cern.ch/user/s/spiga/public/PerValentaina/proxy'
        #resource='/crabserver/dev/workflow'
        #server = HTTPRequests(srv, proxypath)

        server = HTTPRequests(job.backend.server_name, job.backend.userproxy)

        resource = job.backend.apiresource + 'workflow'

        try:
            dictresult, status, reason = server.get(resource, data=inputlist)

        except HTTPException, e:
            print type(e)
            print dir(e)
            print e.req_headers
            print e.req_data
            print e.reason
            print e.message
            print e.headers
            print e.result
            print e.status
            print e.url
            print e.args
Example #43
0
def updatewebdir(ad):
    data = {'subresource' : 'addwebdir'}
    host = ad['CRAB_RestHost']
    uri = ad['CRAB_RestURInoAPI'] + '/task'
    data['workflow'] = ad['CRAB_ReqName']
    data['webdirurl'] = ad['CRAB_UserWebDir']
    cert = ad['X509UserProxy']
    try:
        from RESTInteractions import HTTPRequests
        from httplib import HTTPException
        import urllib
        server = HTTPRequests(host, cert, cert)
        server.post(uri, data = urllib.urlencode(data))
        return 0
    except:
        print traceback.format_exc()
        return 1
Example #44
0
    def __call__(self, **argv):
        #Setting default destination if -o is not provided
        if not self.dest:
            self.dest = os.path.join(self.requestarea, 'results')

        #Creating the destination directory if necessary
        if not os.path.exists( self.dest ):
            self.logger.debug("Creating directory %s " % self.dest)
            os.makedirs( self.dest )
        elif not os.path.isdir( self.dest ):
            raise ConfigurationException('Destination directory is a file')

        self.logger.info("Setting the destination directory to %s " % self.dest )

        #Retrieving output files location from the server
        self.logger.debug('Retrieving locations for task %s' % self.cachedinfo['RequestName'] )
        inputlist =  [ ('workflow', self.cachedinfo['RequestName']) ]
        inputlist.extend(list(argv.iteritems()))
        if getattr(self.options, 'quantity', None):
            self.logger.debug('Retrieving %s file locations' % self.options.quantity )
            inputlist.append( ('limit',self.options.quantity) )
        if getattr(self.options, 'jobids', None):
            self.logger.debug('Retrieving jobs %s' % self.options.jobids )
            inputlist.extend( self.options.jobids )
        server = HTTPRequests(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)
        dictresult, status, reason = server.get(self.uri, data = inputlist)
        self.logger.debug('Server result: %s' % dictresult )
        dictresult = self.processServerResult(dictresult)

        if status != 200:
            msg = "Problem retrieving information from the server:\ninput:%s\noutput:%s\nreason:%s" % (str(inputlist), str(dictresult), str(reason))
            raise ConfigurationException(msg)

        totalfiles = len( dictresult['result'] )
        cpresults = []
#        for workflow in dictresult['result']: TODO re-enable this when we will have resubmissions
        workflow = dictresult['result']        #TODO assigning workflow to dictresult. for the moment we have only one wf
        arglist = ['-d', self.dest, '-i', workflow, '-t', self.options.task, '-p', self.proxyfilename]
        if len(workflow) > 0:
            self.logger.info("Retrieving %s files" % totalfiles )
            copyoutput = remote_copy( self.logger, arglist )
            copyoutput()

        if totalfiles == 0:
            self.logger.info("No files to retrieve")
Example #45
0
    def getOutput(self, job):
        """Retrieve the output of the job."""
        """
        if not os.path.exists(job.inputdata.ui_working_dir):
            raise CRABServerError('Workdir "%s" not found.' %
                                  job.inputdata.ui_working_dir)

        cmd = 'crab -getoutput %d -c %s' % (int(job.id) + 1,
                                            job.inputdata.ui_working_dir)
        self._send_with_retry(cmd, 'getoutput', job.backend.crab_env)
        # Make output files coming from the WMS readable.
        for root, _, files in os.walk(os.path.join(job.inputdata.ui_working_dir,
                                                   'res')): # Just 'res'.
            for f in files:
                os.chmod(os.path.join(root, f), 0644)
        """
        logger.info('geting Output for jon %s:%s' % (job.backend.taskname, job.backend.crabid)) 
        inputlist =  [  ('workflow', job.backend.taskname)]
        inputlist.extend([('subresource', 'logs')])
        inputlist.extend( [('jobids', job.backend.crabid)] )

        #srv='hammer-crab3.cern.ch'#  'cmsweb-testbed.cern.ch'
        #proxypath= '/afs/cern.ch/user/r/riahi/public/proxy'#'/afs/cern.ch/user/s/spiga/public/PerValentaina/proxy'
        #resource='/crabserver/dev/workflow'
        #server = HTTPRequests(srv, proxypath)

        server = HTTPRequests(job.backend.server_name, job.backend.userproxy)

        resource = job.backend.apiresource+'workflow'

        try:
            dictresult, status, reason = server.get(resource, data = inputlist)

        except HTTPException, e:
            print type(e)
            print dir(e)
            print e.req_headers
            print e.req_data
            print e.reason
            print e.message
            print e.headers
            print e.result
            print e.status
            print e.url
            print e.args
Example #46
0
    def _execute(self, resthost, resturi, config, task):
        self.logger.info('Cleaning filemetadata older than 30 days..')
        server = HTTPRequests(resthost,
                              config.TaskWorker.cmscert,
                              config.TaskWorker.cmskey,
                              retry=2)
        ONE_MONTH = 24 * 30
        try:
            instance = resturi.split('/')[2]
            server.delete('/crabserver/%s/filemetadata' % instance,
                          data=urllib.urlencode({'hours': ONE_MONTH}))


#TODO return fro the server a value (e.g.: ["ok"]) to see if everything is ok
#            result = server.delete('/crabserver/dev/filemetadata', data=urllib.urlencode({'hours': ONE_MONTH}))[0]['result'][0]
#            self.logger.info('FMDCleaner, got %s' % result)
        except HTTPException as hte:
            self.logger.error(hte.headers)
Example #47
0
    def execute(self, *args):
        dag_status = int(args[0])
        failed_count = int(args[1])
        restinstance = args[2]
        resturl = args[3]
        if dag_status in [1, 2, 3]:
            if dag_status == 3:
                msg = "Task aborted because the maximum number of failures was hit; %d total failed jobs." % failed_count
            else:
                msg = "Task failed overall; %d failed jobs" % failed_count
            configreq = {'workflow': kw['task']['tm_taskname'],
                         'substatus': "FAILED",
                         'subfailure': base64.b64encode(str(e)),}
            data = urllib.urlencode(configreq)
            server = HTTPRequests(restinstance, os.environ['X509_USER_PROXY'], os.environ['X509_USER_PROXY'])
            server.put(resturl, data=data)

        return dag_status
Example #48
0
    def __call__(self):
        server = HTTPRequests(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)

        self.logger.debug('Looking up report for task %s' % self.cachedinfo['RequestName'])
        dictresult, status, reason = server.get(self.uri, data = {'workflow': self.cachedinfo['RequestName'], 'subresource': 'report'})

        self.logger.debug("Result: %s" % dictresult)

        if status != 200:
            msg = "Problem retrieving report:\ninput:%s\noutput:%s\nreason:%s" % (str(self.cachedinfo['RequestName']), str(dictresult), str(reason))
            raise RESTCommunicationException(msg)
        if not dictresult['result'][0]['runsAndLumis'] :
            self.logger.info('No jobs finished yet. Report is available when jobs complete')
            return

        runlumiLists = map(lambda x: literal_eval(x['runlumi']), dictresult['result'][0]['runsAndLumis'].values())
        #convert lumi lists from strings to integers
        for runlumi in runlumiLists:
            for run in runlumi:
                runlumi[run] = map(int, runlumi[run])
        analyzed, diff, doublelumis = BasicJobType.mergeLumis(runlumiLists, dictresult['result'][0]['lumiMask'])
        numFiles = len(reduce(set().union, map(lambda x: literal_eval(x['parents']), dictresult['result'][0]['runsAndLumis'].values())))
        self.logger.info("%d files have been read" % numFiles)
        self.logger.info("%d events have been read" % sum(map(lambda x: x['events'], dictresult['result'][0]['runsAndLumis'].values())))

        if self.outdir:
            jsonFileDir = self.outdir
        else:
            jsonFileDir = os.path.join(self.requestarea, 'results')
        if analyzed:
            with open(os.path.join(jsonFileDir, 'analyzed.json'), 'w') as jsonFile:
                json.dump(analyzed, os.path.join(jsonFile))
                jsonFile.write("\n")
                self.logger.info("Analyzed lumi written to %s/analyzed.json" % jsonFileDir)
        if diff:
            with open(os.path.join(jsonFileDir, 'diff.json'), 'w') as jsonFile:
                json.dump(diff, jsonFile)
                jsonFile.write("\n")
                self.logger.info("%sNot Analyzed lumi written to %s/diff.json%s" % (colors.RED, jsonFileDir, colors.NORMAL))
        if doublelumis:
            with open(os.path.join(jsonFileDir, 'double.json'), 'w') as jsonFile:
                json.dump(doublelumis, jsonFile)
                jsonFile.write("\n")
                self.logger.info("%sDouble lumis written to %s/double.json%s" % (colors.RED, jsonFileDir, colors.NORMAL))
Example #49
0
    def __call__(self):
        server = HTTPRequests(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)
        dictresult, status, reason = server.get(self.uri, data={"timestamp": self.date})
        dictresult = dictresult["result"]  # take just the significant part

        if status != 200:
            msg = "Problem retrieving tasks:\ninput:%s\noutput:%s\nreason:%s" % (
                str(self.date),
                str(dictresult),
                str(reason),
            )
            raise RESTCommunicationException(msg)

        dictresult.sort()
        dictresult.reverse()

        if self.options.status:
            dictresult = [item for item in dictresult if item[1] == self.options.status]

        result = [item[0:2] for item in dictresult]

        today = date.today()

        if not dictresult:
            msg = "No tasks found from %s until %s" % (self.date, today)
            if self.options.status:
                msg += " with status %s" % (self.options.status)
            self.logger.info(msg)
            return result

        msg = "\nList of tasks from %s until %s" % (self.date, today)
        if self.options.status:
            msg += " with status %s" % (self.options.status)
        self.logger.info(msg)
        self.logger.info("=" * 80)
        self.logger.info("NAME\t\t\t\t\t\t\t\tSTATUS")
        self.logger.info("=" * 80)
        for item in dictresult:
            name, status = item[0:2]
            self.logger.info("%s\n\t\t\t\t\t\t\t\t%s" % (name, status))
            self.logger.info("-" * 80)
        self.logger.info("\n")

        return result
Example #50
0
def updateWebDir(ad):
    """
    Need a doc string here.
    """
    data = {'subresource' : 'addwebdir'}
    host = ad['CRAB_RestHost']
    uri = ad['CRAB_RestURInoAPI'] + '/task'
    data['workflow'] = ad['CRAB_ReqName']
    data['webdirurl'] = ad['CRAB_UserWebDir']
    cert = ad['X509UserProxy']
    try:
        server = HTTPRequests(host, cert, cert)
        server.post(uri, data = urllib.urlencode(data))
        return 0
    except HTTPException as hte:
        printLog(traceback.format_exc())
        printLog(hte.headers)
        printLog(hte.result)
        return 1
Example #51
0
def updateWebDir(ad):
    """
    Need a doc string here.
    """
    data = {'subresource': 'addwebdir'}
    host = ad['CRAB_RestHost']
    uri = ad['CRAB_RestURInoAPI'] + '/task'
    data['workflow'] = ad['CRAB_ReqName']
    data['webdirurl'] = ad['CRAB_UserWebDir']
    cert = ad['X509UserProxy']
    try:
        server = HTTPRequests(host, cert, cert)
        server.post(uri, data=urllib.urlencode(data))
        return 0
    except HTTPException as hte:
        printLog(traceback.format_exc())
        printLog(hte.headers)
        printLog(hte.result)
        return 1
Example #52
0
    def __call__(self):
        server = HTTPRequests(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)

        self.logger.debug('Looking up detailed status of task %s' % self.cachedinfo['RequestName'])
        dictresult, status, reason = server.get(self.uri, data = { 'workflow' : self.cachedinfo['RequestName']})
        dictresult = dictresult['result'][0] #take just the significant part

        if status != 200:
            msg = "Problem retrieving status:\ninput:%s\noutput:%s\nreason:%s" % (str(self.cachedinfo['RequestName']), str(dictresult), str(reason))
            raise RESTCommunicationException(msg)

        self.logger.debug(dictresult) #should be something like {u'result': [[123, u'ciao'], [456, u'ciao']]}

        self.logger.info("Task name:\t\t\t%s" % self.cachedinfo['RequestName'])
        self.logger.info("Task status:\t\t\t%s" % dictresult['status'])

        def logJDefErr(jdef):
            """Printing job def failures if any"""
            if jdef['jobdefErrors']:
                self.logger.error("%sFailed to inject %s\t%s out of %s:" %(colors.RED, colors.NORMAL,\
                                                                           jdef['failedJobdefs'], jdef['totalJobdefs']))
                for error in jdef['jobdefErrors']:
                    self.logger.info("\t%s" % error)

        #Print the url of the panda monitor
        if dictresult['taskFailureMsg']:
            self.logger.error("%sError during task injection:%s\t%s" % (colors.RED,colors.NORMAL,dictresult['taskFailureMsg']))
            # We might also have more information in the job def errors
            logJDefErr(jdef=dictresult)
        elif dictresult['jobSetID']:
            username = urllib.quote(getUserName(self.logger))
            self.logger.info("Panda url:\t\t\thttp://pandamon-cms-dev.cern.ch/jobinfo?jobtype=*&jobsetID=%s&prodUserName=%s" % (dictresult['jobSetID'], username))
            # We have cases where the job def errors are there but we have a job def id
            logJDefErr(jdef=dictresult)

        #Print information about jobs
        states = dictresult['jobsPerStatus']
        total = sum( states[st] for st in states )
        frmt = ''
        for status in states:
            frmt += status + ' %s\t' % self._percentageString(states[status], total)
        if frmt:
            self.logger.info('Details:\t\t\t%s' % frmt)
Example #53
0
def serverCall(ddmServer, cert, key, verbose, call, api, data):
    server = HTTPRequests(url=ddmServer, localcert=cert, localkey=key, verbose=verbose)
    commonAPI = '/registry/request'
    try:
        ddmRequest = getattr(server, call)(commonAPI+'/'+api, data=data)
    except HTTPException as hte:
        msg = "HTTP Error while contacting the DDM server %s:\n%s" % (ddmServer, str(hte))
        msg += "\nHTTP Headers are: %s" % hte.headers
        raise TaskWorkerException(msg, retry=True)

    return ddmRequest[0]
Example #54
0
    def execute(self, *args):
        dag_status = int(args[0])
        failed_count = int(args[1])
        restinstance = args[2]
        resturl = args[3]
        if dag_status in [1, 2, 3]:
            if dag_status == 3:
                msg = "Task aborted because the maximum number of failures was hit; %d total failed jobs." % failed_count
            else:
                msg = "Task failed overall; %d failed jobs" % failed_count
            configreq = {
                'workflow': kw['task']['tm_taskname'],
                'substatus': "FAILED",
                'subfailure': base64.b64encode(str(e)),
            }
            data = urllib.urlencode(configreq)
            server = HTTPRequests(restinstance, os.environ['X509_USER_PROXY'],
                                  os.environ['X509_USER_PROXY'])
            server.put(resturl, data=data)

        return dag_status
Example #55
0
    def status(self, job):
        """Get the status of a jobset."""
        """
        if not os.path.exists(job.inputdata.ui_working_dir):
            raise CRABServerError('Workdir "%s" not found.' %
                                  job.inputdata.ui_working_dir)

        cmd = 'crab -status -c %s' % job.inputdata.ui_working_dir
        self._send_with_retry(cmd, 'status', job.backend.crab_env)
        return True
        """
        #from RESTInteractions import HTTPRequests

        logger.info('checkin status')

        try:
            server = HTTPRequests(job.backend.server_name,
                                  job.backend.userproxy)
            resource = job.backend.apiresource + 'workflow'
            dictresult, status, reason = server.get(
                resource, data={'workflow': job.backend.taskname})
            logger.info("status %s, reason %s" % (status, reason))
            return dictresult, status, reason

        except HTTPException, e:
            print type(e)
            print dir(e)
            print e.req_headers
            print e.req_data
            print e.reason
            print e.message
            print e.headers
            print e.result
            print e.status
            print e.url
            print e.args

            raise e
Example #56
0
    def kill(self, job):
        """Kill all the jobs on the task."""
        """
        if not os.path.exists(job.inputdata.ui_working_dir):
            raise CRABServerError('Workdir "%s" not found.' %
                                  job.inputdata.ui_working_dir)

        if not job.master:
            cmd = 'crab -kill all -c %s' % job.inputdata.ui_working_dir
        else:
            cmd = 'crab -kill %d -c %s' % (int(job.id) + 1,
                                           job.inputdata.ui_working_dir)
        self._send_with_retry(cmd, 'kill', job.backend.crab_env)
        return True
        """
        try:
            server = HTTPRequests(job.backend.server_name,
                                  job.backend.userproxy)
            resource = job.backend.apiresource + 'workflow'
            dictresult, status, reason = server.delete(
                resource,
                data=urllib.urlencode({'workflow': job.backend.taskname}))
            logger.info("Kill answer: %s" % status)
            logger.info("Kill dictresult: %s" % dictresult)
            return True
        except HTTPException, e:
            logger.error(type(e))
            logger.error(e.req_headers)
            logger.error(e.req_data)
            logger.error(e.reason)
            logger.error(e.message)
            logger.error(e.headers)
            logger.error(e.result)
            logger.error(e.status)
            logger.error(e.url)
            logger.error(e.args)
            raise e
Example #57
0
    def _execute(self, resthost, resturi, config, task):
        mw = MasterWorker(config, quiet=False, debug=True, test=False)

        tapeRecallStatus = 'TAPERECALL'
        self.logger.info("Retrieving %s tasks", tapeRecallStatus)
        recallingTasks = mw.getWork(limit=999999, getstatus=tapeRecallStatus)
        if len(recallingTasks) > 0:
            self.logger.info("Retrieved a total of %d %s tasks",
                             len(recallingTasks), tapeRecallStatus)
            self.logger.debug("Retrieved the following %s tasks: \n%s",
                              tapeRecallStatus, str(recallingTasks))
            for recallingTask in recallingTasks:
                if not recallingTask['tm_DDM_reqid']:
                    self.logger.debug(
                        "tm_DDM_reqid' is not defined for task %s, skipping such task",
                        recallingTask['tm_taskname'])
                    continue
                ddmRequest = statusRequest(recallingTask['tm_DDM_reqid'],
                                           config.TaskWorker.DDMServer,
                                           config.TaskWorker.cmscert,
                                           config.TaskWorker.cmskey,
                                           verbose=False)
                self.logger.info("Contacted %s using %s and %s, got:\n%s",
                                 config.TaskWorker.DDMServer,
                                 config.TaskWorker.cmscert,
                                 config.TaskWorker.cmskey, ddmRequest)
                # The query above returns a JSON with a format {"result": "OK", "message": "Request found", "data": [{"request_id": 14, "site": <site>, "item": [<list of blocks>], "group": "AnalysisOps", "n": 1, "status": "new", "first_request": "2018-02-26 23:25:41", "last_request": "2018-02-26 23:25:41", "request_count": 1}]}
                if ddmRequest["data"][0][
                        "status"] == "completed":  # possible values: new, activated, updated, completed, rejected, cancelled
                    self.logger.info(
                        "Request %d is completed, setting status of task %s to NEW",
                        recallingTask['tm_DDM_reqid'],
                        recallingTask['tm_taskname'])
                    mw.updateWork(recallingTask['tm_taskname'],
                                  recallingTask['tm_task_command'], 'NEW')
                    # Delete all task warnings (the tapeRecallStatus added a dataset warning which is no longer valid now)
                    server = HTTPRequests(config.TaskWorker.resturl,
                                          config.TaskWorker.cmscert,
                                          config.TaskWorker.cmskey,
                                          retry=20,
                                          logger=self.logger)
                    mpl = MyProxyLogon(config=config,
                                       server=server,
                                       resturi=config.TaskWorker.restURInoAPI,
                                       myproxylen=self.pollingTime)
                    mpl.execute(task=recallingTask
                                )  # this adds 'user_proxy' to recallingTask
                    mpl.deleteWarnings(recallingTask['user_proxy'],
                                       recallingTask['tm_taskname'])
Example #58
0
    def uploadWarning(self, warning, userProxy, taskname):
        if not self.server:  # When testing, the server can be None
            self.logger.warning(warning)
            return

        truncWarning = truncateError(warning)
        userServer = HTTPRequests(self.server['host'],
                                  userProxy,
                                  userProxy,
                                  retry=2,
                                  logger=self.logger)
        configreq = {
            'subresource': 'addwarning',
            'workflow': taskname,
            'warning': b64encode(truncWarning)
        }
        try:
            userServer.post(self.restURInoAPI + '/task',
                            data=urllib.urlencode(configreq))
        except HTTPException as hte:
            self.logger.error("Error uploading warning: %s", str(hte))
            self.logger.warning(
                "Cannot add a warning to REST interface. Warning message: %s",
                warning)
Example #59
0
    def __call__(self):
        server = HTTPRequests(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)
	
        dictresult, status, reason = server.get(self.uri, data = { 'timestamp' : self.date })
        dictresult = dictresult['result'] #take just the significant part

        today = date.today() 

        self.logger.info ('\n')

        self.logger.info ('The list of tasks from %s until %s' %(self.date, today))

        self.logger.info ('='*80)
	
        self.logger.info ('NAME\t\t\t\t\t\t\t\tSTATUS')
		
        self.logger.info ('='*80)

        for item in dictresult:
            name, status = item[0:2]
            self.logger.info ('%s\n\t\t\t\t\t\t\t\t%s' %(name, status))
            self.logger.info ('-'*80)
    	
        self.logger.info ('\n')