Beispiel #1
0
    def pubFailed(self, task, files, failure_reasons=list(), force_failure=False):
        """

        :param files:
        :param failure_reasons:
        :return:
        """
        id_list = list()
        for Lfn in files:
            source_lfn = Lfn[0]
            docId = getHashLfn(source_lfn)
            id_list.append(docId)
            self.logger.debug("Marking failed %s" % docId)

        fileDoc = dict()
        fileDoc['asoworker'] = 'asodciangot1'
        fileDoc['subresource'] = 'updatePublication'
        fileDoc['list_of_ids'] = id_list
        fileDoc['list_of_publication_state'] = ['FAILED' for x in id_list]


        # TODO: implement retry, publish_retry_count missing from input?

        fileDoc['list_of_retry_value'] = [1 for x in id_list]
        fileDoc['list_of_failure_reason'] = failure_reasons

        try:
            self.oracleDB.post(self.config.oracleFileTrans,
                                data=encodeRequest(fileDoc))
            self.logger.debug("updated failed: %s " % id_list)
        except Exception:
            msg = "Error updating failed documents"
            self.logger.exception(msg)
Beispiel #2
0
    def pubDone(self, workflow, files):
        """

        :param files:
        :param workflow:
        :return:
        """
        wfnamemsg = "%s: " % workflow
        data = dict()
        id_list = list()
        for lfn in files:
            source_lfn = lfn
            docId = getHashLfn(source_lfn)
            id_list.append(docId)
            msg = "Marking file %s as published." % lfn
            msg += " Document id: %s (source LFN: %s)." % (docId, source_lfn)
            self.logger.info(wfnamemsg + msg)
        data['asoworker'] = self.config.asoworker
        data['subresource'] = 'updatePublication'
        data['list_of_ids'] = id_list
        data['list_of_publication_state'] = ['DONE' for x in id_list]
        try:
            self.oracleDB.post(self.config.oracleFileTrans,
                               data=encodeRequest(data))
            self.logger.debug("updated done: %s " % id_list)
        except Exception as ex:
            self.logger.error("Error during status update for published docs: %s" % ex)
Beispiel #3
0
    def transferred(self, files):
        """
        Mark the list of files as tranferred
        """
        good_ids = list()
        updated_lfn = list()
        try:
            for lfn in files:
                lfn = lfn[0]
                if lfn.find('temp') == 7:
                    docId = getHashLfn(lfn)
                    good_ids.append(docId)
                    updated_lfn.append(lfn)
                    self.logger.debug("Marking done %s" % lfn)
                    self.logger.debug("Marking done %s" % docId)

            data = dict()
            data['asoworker'] = self.config.asoworker
            data['subresource'] = 'updateTransfers'
            data['list_of_ids'] = good_ids
            data['list_of_transfer_state'] = ["DONE" for x in good_ids]
            self.oracleDB.post(self.config.oracleFileTrans,
                               data=encodeRequest(data))
            self.logger.debug("Marked good %s" % good_ids)
        except Exception:
            self.logger.exception("Error updating documents")
            return 1
        return 0
Beispiel #4
0
    def failed(self, files, failures_reasons=[], max_retry=3, force_fail=False, submission_error=False):
        """

        :param files: tuple (source_lfn, dest_lfn)
        :param failures_reasons: list(str) with reasons of failure
        :param max_retry: number of retry before giving up
        :param force_fail: flag for triggering failure without retry
        :param submission_error: error during fts submission
        :return:
        """
        updated_lfn = []
        for Lfn in files:
            lfn = Lfn[0]
            # Load document and get the retry_count
            docId = getHashLfn(lfn)
            self.logger.debug("Marking failed %s" % docId)
            try:
                docbyId = self.oracleDB.get(self.config.oracleUserFileTrans.replace('filetransfer','fileusertransfers'),
                                            data=encodeRequest({'subresource': 'getById', 'id': docId}))
                document = oracleOutputMapping(docbyId, None)[0]
                self.logger.debug("Document: %s" % document)
            except Exception as ex:
                self.logger.error("Error updating failed docs: %s" % ex)
                return 1

            fileDoc = dict()
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'updateTransfers'
            fileDoc['list_of_ids'] = docId
            if not len(failures_reasons) == 0:
                try:
                    fileDoc['list_of_failure_reason'] = failures_reasons[files.index(Lfn)]
                except:
                    fileDoc['list_of_failure_reason'] = "unexcpected error, missing reasons"
                    self.logger.exception("missing reasons")

            if force_fail or document['transfer_retry_count'] + 1 > max_retry:
                fileDoc['list_of_transfer_state'] = 'FAILED'
                fileDoc['list_of_retry_value'] = 1
            else:
                fileDoc['list_of_transfer_state'] = 'RETRY'

            if submission_error:
                fileDoc['list_of_failure_reason'] = "Job could not be submitted to FTS: temporary problem of FTS"
                fileDoc['list_of_retry_value'] = 1
            else:
                fileDoc['list_of_retry_value'] = 1

            self.logger.debug("update: %s" % fileDoc)
            try:
                updated_lfn.append(docId)
                self.oracleDB.post(self.config.oracleFileTrans,
                                   data=encodeRequest(fileDoc))
            except Exception:
                self.logger.exception('ERROR updating failed documents')
                return 1
        self.logger.debug("failed file updated")
        return 0
Beispiel #5
0
    def submitted(self, files):
        """
        Mark the list of files as submitted once the FTS submission succeeded
        ACQUIRED -> SUBMITTED
        Return the lfns updated successfully and report data for dashboard
        :param files: tuple (source_lfn, dest_lfn)
        :return:
        """
        lfn_in_transfer = []
        dash_rep = ()
        id_list = list()
        docId = ''
        for lfn in files:
            lfn = lfn[0]
            if lfn.find('temp') == 7:
                self.logger.debug("Marking acquired %s" % lfn)
                docId = getHashLfn(lfn)
                self.logger.debug("Marking acquired %s" % docId)
                try:
                    id_list.append(docId)
                    lfn_in_transfer.append(lfn)
                except Exception as ex:
                    self.logger.error("Error getting id: %s" % ex)
                    raise

            lfn_in_transfer.append(lfn)
            # TODO: add dashboard stuff
            # dash_rep = (document['jobid'], document['job_retry_count'], document['taskname'])
        try:
            fileDoc = dict()
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'updateTransfers'
            fileDoc['list_of_ids'] = id_list
            fileDoc['list_of_transfer_state'] = ["SUBMITTED" for x in id_list]

            self.oracleDB.post(self.config.oracleFileTrans,
                               data=encodeRequest(fileDoc))
            self.logger.debug("Marked acquired %s" % (id_list))
        except Exception as ex:
            self.logger.error("Error during status update: %s" % ex)
        return lfn_in_transfer, dash_rep
Beispiel #6
0
    def worker(self, i, inputs):
        """
        - Retrieve userDN
        - Retrieve user proxy
        - Delegate proxy to fts is needed
        - submit fts job
        - update doc states

        :param i: thread number
        :param inputs: tuple (lfns, _user, source, dest, tfc_map)
        :return:
        """
        # TODO: differentiate log messages per USER!
        logger = self.logger
        logger.info("Process %s is starting. PID %s", i, os.getpid())
        lock = Lock()
        Update = update(logger, self.config)

        while not self.STOP:
            if inputs.empty():
                time.sleep(10)
                continue
            try:
                lfns, _user, source, dest, tfc_map = inputs.get()
                [user, group, role] = _user
            except (EOFError, IOError):
                crashMessage = "Hit EOF/IO in getting new work\n"
                crashMessage += "Assuming this is a graceful break attempt.\n"
                logger.error(crashMessage)
                continue

            start = time.time()

            if not self.config.TEST:
                try:
                    userDN = getDNFromUserName(user,
                                               logger,
                                               ckey=self.config.opsProxy,
                                               cert=self.config.opsProxy)
                except Exception as ex:
                    logger.exception('Cannot retrieve user DN')
                    self.critical_failure(lfns, lock, inputs)
                    continue

                defaultDelegation = {
                    'logger':
                    logger,
                    'credServerPath':
                    self.config.credentialDir,
                    'myProxySvr':
                    'myproxy.cern.ch',
                    'min_time_left':
                    getattr(self.config, 'minTimeLeft', 36000),
                    'serverDN':
                    self.config.serverDN,
                    'uisource':
                    '',
                    'cleanEnvironment':
                    getattr(self.config, 'cleanEnvironment', False)
                }

                cache_area = self.config.cache_area

                try:
                    defaultDelegation['myproxyAccount'] = re.compile(
                        'https?://([^/]*)/.*').findall(cache_area)[0]
                except IndexError:
                    logger.error(
                        'MyproxyAccount parameter cannot be retrieved from %s . '
                        % self.config.cache_area)
                    self.critical_failure(lfns, lock, inputs)
                    continue

                if getattr(self.config, 'serviceCert', None):
                    defaultDelegation['server_cert'] = self.config.serviceCert
                if getattr(self.config, 'serviceKey', None):
                    defaultDelegation['server_key'] = self.config.serviceKey

                try:
                    defaultDelegation['userDN'] = userDN
                    defaultDelegation['group'] = group
                    defaultDelegation['role'] = role
                    logger.debug('delegation: %s' % defaultDelegation)
                    valid_proxy, user_proxy = getProxy(defaultDelegation,
                                                       logger)
                    if not valid_proxy:
                        logger.error(
                            'Failed to retrieve user proxy... putting docs on retry'
                        )
                        logger.error(
                            'docs on retry: %s' %
                            Update.failed(lfns, submission_error=True))
                        continue
                except Exception:
                    logger.exception('Error retrieving proxy')
                    self.critical_failure(lfns, lock, inputs)
                    continue
            else:
                user_proxy = self.config.opsProxy
                self.logger.debug("Using opsProxy for testmode")

            context = dict()
            try:
                if self.config.TEST:
                    logger.debug("Running in test mode, submitting fake jobs")
                else:
                    context = fts3.Context(self.config.serverFTS,
                                           user_proxy,
                                           user_proxy,
                                           verify=True)
                    logger.debug(
                        fts3.delegate(context,
                                      lifetime=timedelta(hours=48),
                                      force=False))
            except Exception:
                logger.exception("Error submitting to FTS")
                self.critical_failure(lfns, lock, inputs)
                continue

            failed_lfn = list()
            try:
                if self.config.TEST:
                    submitted_lfn = lfns
                    jobid = getHashLfn(lfns[0][0])
                    self.logger.debug('Fake job id: ' + jobid)
                else:
                    failed_lfn, submitted_lfn, jobid = Submission(
                        lfns, source, dest, i, self.logger, fts3, context,
                        tfc_map)
                    if jobid == -1:
                        self.critical_failure(lfns, lock, inputs)
                        continue
                    logger.info('Submitted %s files' % len(submitted_lfn))
            except Exception:
                logger.exception("Unexpected error during FTS job submission!")
                self.critical_failure(lfns, lock, inputs)
                continue

            # TODO: add file FTS id and job id columns for kill command
            try:
                Update.submitted(lfns)
            except Exception:
                logger.exception("Error updating document status")
                self.critical_failure(lfns, lock, inputs)
                continue

            try:
                Update.failed(failed_lfn)
            except Exception:
                logger.exception(
                    "Error updating document status, job submission will be retried later..."
                )
                self.critical_failure(lfns, lock, inputs)
                continue

            try:
                createLogdir('Monitor/' + user)
                with open('Monitor/' + user + '/' + str(jobid) + '.txt',
                          'w') as outfile:
                    json.dump(lfns, outfile)
                logger.info('Monitor files created')
            except Exception:
                logger.exception("Error creating file for monitor")
                self.critical_failure(lfns, lock, inputs)
                continue

            end = time.time()
            self.logger.info('Input processed in %s', str(end - start))
            time.sleep(0.5)

        logger.debug("Worker %s exiting.", i)
        return 0