Example #1
0
    def failed(self, files, failures_reasons=[], max_retry=3, force_fail=False, submission_error=False):
        """

        :param files: tuple (source_lfn, dest_lfn)
        :param failures_reasons: list(str) with reasons of failure
        :param max_retry: number of retry before giving up
        :param force_fail: flag for triggering failure without retry
        :param submission_error: error during fts submission
        :return:
        """
        updated_lfn = []
        for Lfn in files:
            lfn = Lfn[0]
            # Load document and get the retry_count
            docId = getHashLfn(lfn)
            self.logger.debug("Marking failed %s" % docId)
            try:
                docbyId = self.oracleDB.get(self.config.oracleUserFileTrans.replace('filetransfer','fileusertransfers'),
                                            data=encodeRequest({'subresource': 'getById', 'id': docId}))
                document = oracleOutputMapping(docbyId, None)[0]
                self.logger.debug("Document: %s" % document)
            except Exception as ex:
                self.logger.error("Error updating failed docs: %s" % ex)
                return 1

            fileDoc = dict()
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'updateTransfers'
            fileDoc['list_of_ids'] = docId
            if not len(failures_reasons) == 0:
                try:
                    fileDoc['list_of_failure_reason'] = failures_reasons[files.index(Lfn)]
                except:
                    fileDoc['list_of_failure_reason'] = "unexcpected error, missing reasons"
                    self.logger.exception("missing reasons")

            if force_fail or document['transfer_retry_count'] + 1 > max_retry:
                fileDoc['list_of_transfer_state'] = 'FAILED'
                fileDoc['list_of_retry_value'] = 1
            else:
                fileDoc['list_of_transfer_state'] = 'RETRY'

            if submission_error:
                fileDoc['list_of_failure_reason'] = "Job could not be submitted to FTS: temporary problem of FTS"
                fileDoc['list_of_retry_value'] = 1
            else:
                fileDoc['list_of_retry_value'] = 1

            self.logger.debug("update: %s" % fileDoc)
            try:
                updated_lfn.append(docId)
                self.oracleDB.post(self.config.oracleFileTrans,
                                   data=encodeRequest(fileDoc))
            except Exception:
                self.logger.exception('ERROR updating failed documents')
                return 1
        self.logger.debug("failed file updated")
        return 0
Example #2
0
    def algorithm(self, parameters=None):
        """
        Performs the doRetries method, loading the appropriate
        plugin for each job and handling it.
        """
        logging.debug("Running retryManager algorithm")
        if self.config.isOracle:
            fileDoc = dict()
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'retryTransfers'
            fileDoc['time_to'] = self.cooloffTime
            self.logger.debug('fileDoc: %s' % fileDoc)
            try:
                results = self.oracleDB.post(self.config.oracleFileTrans,
                                             data=encodeRequest(fileDoc))
            except Exception:
                self.logger.exception("Failed to get retry transfers in oracleDB: %s")
                return
            logging.info("Retried files in cooloff: %s,\n now getting transfers to kill" % str(results))

            fileDoc = dict()
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'getTransfersToKill'
            fileDoc['grouping'] = 0
            try:
                results = self.oracleDB.get(self.config.oracleFileTrans,
                                            data=encodeRequest(fileDoc))
                result = oracleOutputMapping(results)
            except Exception as ex:
                self.logger.error("Failed to get killed transfers \
                                   from oracleDB: %s" % ex)
                return

            usersToKill = list(set([(x['username'], x['user_group'], x['user_role']) for x in result]))

            self.logger.debug("Users with transfers to kill: %s" % usersToKill)
            transfers = Queue()

            for i in range(self.config.kill_threads):
                worker = Thread(target=self.killThread, args=(i, transfers,))
                worker.setDaemon(True)
                worker.start()

            for user in usersToKill:
                user_trans = [x for x in result if (x['username'], x['user_group'], x['user_role']) == user]
                self.logger.info("Inserting %s transfers of user %s in the killing queue" % (len(user_trans), user))
                transfers.put(user_trans)

            transfers.join()
            self.logger.info("Transfers killed.")
        else:
            self.doRetries()
    def active_tasks(self, db):

        fileDoc = {}
        fileDoc['asoworker'] = self.config.asoworker
        fileDoc['subresource'] = 'acquirePublication'

        self.logger.debug("Retrieving publications from oracleDB")

        results = ''
        try:
            results = db.post(self.config.oracleFileTrans,
                              data=encodeRequest(fileDoc))
        except Exception as ex:
            self.logger.error("Failed to acquire publications \
                                from oracleDB: %s" %ex)
            return []

        fileDoc = dict()
        fileDoc['asoworker'] = self.config.asoworker
        fileDoc['subresource'] = 'acquiredPublication'
        fileDoc['grouping'] = 0
        fileDoc['limit'] = 100000

        self.logger.debug("Retrieving max.100000 acquired publications from oracleDB")

        result = []

        try:
            results = db.get(self.config.oracleFileTrans,
                             data=encodeRequest(fileDoc))
            result.extend(oracleOutputMapping(results))
        except Exception as ex:
            self.logger.error("Failed to acquire publications \
                                from oracleDB: %s" %ex)
            return []

        self.logger.debug("publen: %s" % len(result))

        self.logger.debug("%s acquired publications retrieved" % len(result))
        #TODO: join query for publisher (same of submitter)
        unique_tasks = [list(i) for i in set(tuple([x['username'],
                                                    x['user_group'],
                                                    x['user_role'],
                                                    x['taskname']]
                                                  ) for x in result if x['transfer_state'] == 3)]

        info = []
        for task in unique_tasks:
            info.append([x for x in result if x['taskname'] == task[3]])
        return zip(unique_tasks, info)
Example #4
0
def mark_failed(files, oracleDB, logger, failure_reason=""):
    """
    Something failed for these files so increment the retry count
    """
    h = 0
    for lfn in files:
        h += 1
        logger.debug("Marking failed %s" % h)
        source_lfn = lfn
        docId = getHashLfn(source_lfn)
        logger.debug("Marking failed %s" % docId)
        try:
            docbyId = oracleDB.get(config.General.oracleUserTrans,
                                   data=encodeRequest({
                                       'subresource': 'getById',
                                       'id': docId
                                   }))
        except Exception:
            logger.exception("Error updating failed docs.")
            continue
        document = oracleOutputMapping(docbyId, None)[0]
        logger.debug("Document: %s" % document)

        try:
            fileDoc = dict()
            fileDoc['asoworker'] = 'asodciangot1'
            fileDoc['subresource'] = 'updatePublication'
            fileDoc['list_of_ids'] = docId

            fileDoc['list_of_publication_state'] = 'FAILED'
            #if force_failure or document['publish_retry_count'] > self.max_retry:
            #    fileDoc['list_of_publication_state'] = 'FAILED'
            #else:
            #    fileDoc['list_of_publication_state'] = 'RETRY'
            # TODO: implement retry
            fileDoc['list_of_retry_value'] = 1
            fileDoc['list_of_failure_reason'] = failure_reason

            logger.debug("fileDoc: %s " % fileDoc)

            _ = oracleDB.post(config.General.oracleFileTrans,
                              data=encodeRequest(fileDoc))
            logger.debug("updated: %s " % docId)
        except Exception as ex:
            msg = "Error updating document: %s" % fileDoc
            msg += str(ex)
            msg += str(traceback.format_exc())
            logger.error(msg)
            continue
Example #5
0
def mark_failed(ids, failures_reasons, oracleDB):
    """
    Mark the list of files as failed
    :param ids: list of Oracle file ids to update
    :param failures_reasons: list of strings with transfer failure messages
    :return: 0 success, 1 failure
    """
    os.environ["X509_CERT_DIR"] = os.getcwd()

    if len(ids) > 0:

        try:
            data = dict()
            data['asoworker'] = 'rucio'
            data['subresource'] = 'updateTransfers'
            data['list_of_ids'] = ids
            data['list_of_transfer_state'] = ["FAILED" for _ in ids]
            data['list_of_failure_reason'] = failures_reasons
            data['list_of_retry_value'] = [0 for _ in ids]

            oracleDB.post('/filetransfers', data=encodeRequest(data))
            logging.info("Marked failed %s", ids)
        except Exception:
            logging.exception("Error updating documents")
            return None
    else:
        logging.info("Nothing to update (Failed)")

    return ids
Example #6
0
def mark_good(workflow, files, oracleDB, logger):
    """
    Mark the list of files as tranferred
    """
    wfnamemsg = "%s: " % workflow
    for lfn in files:
        data = {}
        source_lfn = lfn
        docId = getHashLfn(source_lfn)
        msg = "Marking file %s as published." % lfn
        msg += " Document id: %s (source LFN: %s)." % (docId, source_lfn)
        logger.info(wfnamemsg+msg)
        data['asoworker'] = 'asodciangot1'
        data['subresource'] = 'updatePublication'
        data['list_of_ids'] = docId
        data['list_of_publication_state'] = 'DONE'
        data['list_of_retry_value'] = 1
        data['list_of_failure_reason'] = ''

        try:
            result = oracleDB.post(config.General.oracleFileTrans,
                                   data=encodeRequest(data))
            logger.debug("updated: %s %s " % (docId, result))
        except Exception as ex:
            logger.error("Error during status update: %s" %ex)
Example #7
0
def mark_failed(ids, failures_reasons):
    """
    Mark the list of files as failed
    :param ids: list of Oracle file ids to update
    :param failures_reasons: list of strings with transfer failure messages
    :return: 0 success, 1 failure
    """
    try:
        oracleDB = HTTPRequests(rest_filetransfers,
                                proxy,
                                proxy)
        data = dict()
        data['asoworker'] = 'asoless'
        data['subresource'] = 'updateTransfers'
        data['list_of_ids'] = ids
        data['list_of_transfer_state'] = ["FAILED" for _ in ids]
        data['list_of_failure_reason'] = failures_reasons
        data['list_of_retry_value'] = [0 for _ in ids]

        oracleDB.post('/filetransfers',
                      data=encodeRequest(data))
        logging.debug("Marked failed %s", ids)
    except Exception:
        logging.exception("Error updating documents")
        return 1
    return 0
Example #8
0
def mark_transferred(ids, server):
    """
    Mark the list of files as tranferred
    :param ids: list of Oracle file ids to update
    :return: 0 success, 1 failure
    """
    try:

        print("Marking done %d files" % len(ids))

        data = dict()
        data['asoworker'] = asoworker
        data['subresource'] = 'updateTransfers'
        data['list_of_ids'] = ids
        data['list_of_transfer_state'] = ["DONE" for _ in ids]

        t1 = time.time()
        server.post(api='filetransfers', data=encodeRequest(data))
        t2 = time.time()
        print("Marked good")
        elapsed = int(t2 - t1)
    except Exception as ex:
        t2 = time.time()
        elapsed = int(t2 - t1)
        print("Error updating documents:\n %s" % str(ex))
    return elapsed
Example #9
0
    def getPublDescFiles(self, workflow, lfn_ready, logger):
        """
        Download and read the files describing what needs to be published
        CRAB REST does not have any good way to select from the DB only what we need
        most efficient way is to get full list for the task, and then trim it here
        see: https://github.com/dmwm/CRABServer/issues/6124
        """
        out = []

        dataDict = {}
        dataDict['taskname'] = workflow
        dataDict['filetype'] = 'EDM'
        data = encodeRequest(dataDict)
        try:
            res = self.crabServer.get(api='filemetadata', data=data)
            # res is a 3-plu: (result, exit code, status)
            res = res[0]
        except Exception as ex:
            logger.error(
                "Error during metadata retrieving from crabserver:\n%s", ex)
            return out

        metadataList = [json.loads(md) for md in res['result']
                        ]  # CRAB REST returns a list of JSON objects
        for md in metadataList:
            # pick only the metadata we need
            if md['lfn'] in lfn_ready:
                out.append(md)

        logger.info('Got filemetadata for %d LFNs', len(out))
        return out
Example #10
0
    def getAcquired(self, users):
        """
        Get a number of documents to be submitted (in ACQUIRED status) and return results of the query for logs
        :return:
        """
        documents = list()

        for user in users:
            username = user[0]
            group = user[1]
            role = user[2]

            fileDoc = dict()
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'acquiredTransfers'
            fileDoc['grouping'] = 1
            fileDoc['username'] = username
            if group == '':
                group = None
            if role == '':
                role = None
            fileDoc['vogroup'] = group
            fileDoc['vorole'] = role

            self.logger.debug("Retrieving users from oracleDB")

            try:
                results = self.oracleDB.get(self.config.oracleFileTrans,
                                            data=encodeRequest(fileDoc))
                documents += oracleOutputMapping(results)
            except Exception as ex:
                self.logger.error("Failed to get acquired transfers \
                                  from oracleDB: %s" % ex)

        return documents
Example #11
0
    def mark_good(files, crabServer, logger):
        """
        Mark the list of files as tranferred
        """

        msg = "Marking %s file(s) as published." % len(files)
        logger.info(msg)
        if dryRun:
            logger.info("DryRun: skip marking good file")
            return

        nMarked = 0
        for lfn in files:
            data = {}
            source_lfn = lfn
            docId = getHashLfn(source_lfn)
            data['asoworker'] = config.General.asoworker
            data['subresource'] = 'updatePublication'
            data['list_of_ids'] = [docId]
            data['list_of_publication_state'] = ['DONE']
            data['list_of_retry_value'] = [1]
            data['list_of_failure_reason'] = ['']

            try:
                result = crabServer.post(api='filetransfers', data=encodeRequest(data))
                logger.debug("updated DocumentId: %s lfn: %s Result %s", docId, source_lfn, result)
            except Exception as ex:
                logger.error("Error updating status for DocumentId: %s lfn: %s", docId, source_lfn)
                logger.error("Error reason: %s", ex)

            nMarked += 1
            if nMarked % 10 == 0:
                logger.info('marked %d files', nMarked)
Example #12
0
def mark_transferred(ids):
    """
    Mark the list of files as tranferred
    :param ids: list of Oracle file ids to update
    :return: 0 success, 1 failure
    """
    try:
        oracleDB = HTTPRequests(rest_filetransfers,
                                proxy,
                                proxy)
        logging.debug("Marking done %s", ids)

        data = dict()
        data['asoworker'] = 'asoless'
        data['subresource'] = 'updateTransfers'
        data['list_of_ids'] = ids
        data['list_of_transfer_state'] = ["DONE" for _ in ids]

        oracleDB.post('/filetransfers',
                      data=encodeRequest(data))
        logging.debug("Marked good %s", ids)
    except Exception:
        logging.exception("Error updating documents")
        return 1
    return 0
Example #13
0
    def mark_acquired(self, files=[]):
        """
        Mark the list of files as tranferred
        """
        lfn_in_transfer = []
        dash_rep = ()
        if self.config.isOracle:
            toUpdate = list()
            for lfn in files:
                if lfn['value'][0].find('temp') == 7:
                    docId = lfn['key'][5]
                    toUpdate.append(docId)
                    try:
                        docbyId = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers','fileusertransfers'),
                                                    data=encodeRequest({'subresource': 'getById', 'id': docId}))
                        document = oracleOutputMapping(docbyId, None)[0]
                        dash_rep = (document['jobid'], document['job_retry_count'], document['taskname'])
                        lfn_in_transfer.append(lfn)
                    except Exception as ex:
                        self.logger.error("Error during dashboard report update: %s" %ex)
                        return [],()

            return lfn_in_transfer, dash_rep
        else:
            for lfn in files:
                if lfn['value'][0].find('temp') == 7:
                    docId = getHashLfn(lfn['value'][0])
                    self.logger.debug("Marking acquired %s" % docId)
                    # Load document to get the retry_count
                    try:
                        document = self.db.document(docId)
                    except Exception as ex:
                        msg = "Error loading document from couch"
                        msg += str(ex)
                        msg += str(traceback.format_exc())
                        self.logger.error(msg)
                        continue
                    if document['state'] == 'new' or document['state'] == 'retry':
                        data = dict()
                        data['state'] = 'acquired'
                        data['last_update'] = time.time()
                        updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + docId
                        updateUri += "?" + urllib.urlencode(data)
                        try:
                            self.db.makeRequest(uri=updateUri, type="PUT", decode=False)
                        except Exception as ex:
                            msg = "Error updating document in couch"
                            msg += str(ex)
                            msg += str(traceback.format_exc())
                            self.logger.error(msg)
                            continue
                        self.logger.debug("Marked acquired %s of %s" % (docId, lfn))
                        lfn_in_transfer.append(lfn)
                        dash_rep = (document['jobid'], document['job_retry_count'], document['workflow'])
                    else:
                        continue
                else:
                    good_lfn = lfn['value'][0].replace('store', 'store/temp', 1)
                    self.mark_good([good_lfn])
            return lfn_in_transfer, dash_rep
Example #14
0
    def mark_failed(files, crabServer, logger, failure_reason=""):
        """
        Something failed for these files so increment the retry count
        """
        msg = "Marking %s file(s) as failed" % len(files)
        logger.info(msg)
        if dryRun:
            logger.debug("DryRun: skip marking failes files")
            return

        nMarked = 0
        for lfn in files:
            source_lfn = lfn
            docId = getHashLfn(source_lfn)
            data = dict()
            data['asoworker'] = config.General.asoworker
            data['subresource'] = 'updatePublication'
            data['list_of_ids'] = [docId]
            data['list_of_publication_state'] = ['FAILED']
            data['list_of_retry_value'] = [1]
            data['list_of_failure_reason'] = [failure_reason]

            logger.debug("data: %s ", data)
            try:
                result = crabServer.post(api='filetransfers', data=encodeRequest(data))
                logger.debug("updated DocumentId: %s lfn: %s Result %s", docId, source_lfn, result)
            except Exception as ex:
                logger.error("Error updating status for DocumentId: %s lfn: %s", docId, source_lfn)
                logger.error("Error reason: %s", ex)

            nMarked += 1
            if nMarked % 10 == 0:
                logger.info('marked %d files', nMarked)
Example #15
0
    def pubFailed(self, task, files, failure_reasons=list(), force_failure=False):
        """

        :param files:
        :param failure_reasons:
        :return:
        """
        id_list = list()
        for Lfn in files:
            source_lfn = Lfn[0]
            docId = getHashLfn(source_lfn)
            id_list.append(docId)
            self.logger.debug("Marking failed %s" % docId)

        fileDoc = dict()
        fileDoc['asoworker'] = 'asodciangot1'
        fileDoc['subresource'] = 'updatePublication'
        fileDoc['list_of_ids'] = id_list
        fileDoc['list_of_publication_state'] = ['FAILED' for x in id_list]


        # TODO: implement retry, publish_retry_count missing from input?

        fileDoc['list_of_retry_value'] = [1 for x in id_list]
        fileDoc['list_of_failure_reason'] = failure_reasons

        try:
            self.oracleDB.post(self.config.oracleFileTrans,
                                data=encodeRequest(fileDoc))
            self.logger.debug("updated failed: %s " % id_list)
        except Exception:
            msg = "Error updating failed documents"
            self.logger.exception(msg)
Example #16
0
    def pubDone(self, workflow, files):
        """

        :param files:
        :param workflow:
        :return:
        """
        wfnamemsg = "%s: " % workflow
        data = dict()
        id_list = list()
        for lfn in files:
            source_lfn = lfn
            docId = getHashLfn(source_lfn)
            id_list.append(docId)
            msg = "Marking file %s as published." % lfn
            msg += " Document id: %s (source LFN: %s)." % (docId, source_lfn)
            self.logger.info(wfnamemsg + msg)
        data['asoworker'] = self.config.asoworker
        data['subresource'] = 'updatePublication'
        data['list_of_ids'] = id_list
        data['list_of_publication_state'] = ['DONE' for x in id_list]
        try:
            self.oracleDB.post(self.config.oracleFileTrans,
                               data=encodeRequest(data))
            self.logger.debug("updated done: %s " % id_list)
        except Exception as ex:
            self.logger.error("Error during status update for published docs: %s" % ex)
Example #17
0
def mark_good(workflow, files, oracleDB, logger):
    """
    Mark the list of files as tranferred
    """
    wfnamemsg = "%s: " % workflow
    for lfn in files:
        data = {}
        source_lfn = lfn
        docId = getHashLfn(source_lfn)
        msg = "Marking file %s as published." % lfn
        msg += " Document id: %s (source LFN: %s)." % (docId, source_lfn)
        logger.info(wfnamemsg+msg)
        data['asoworker'] = config.General.asoworker 
        data['subresource'] = 'updatePublication'
        data['list_of_ids'] = docId
        data['list_of_publication_state'] = 'DONE'
        data['list_of_retry_value'] = 1
        data['list_of_failure_reason'] = ''

        try:
            result = oracleDB.post(config.General.oracleFileTrans,
                                   data=encodeRequest(data))
            logger.debug("updated: %s %s " % (docId, result))
        except Exception as ex:
            logger.error("Error during status update: %s" %ex)
Example #18
0
    def transferred(self, files):
        """
        Mark the list of files as tranferred
        """
        good_ids = list()
        updated_lfn = list()
        try:
            for lfn in files:
                lfn = lfn[0]
                if lfn.find('temp') == 7:
                    docId = getHashLfn(lfn)
                    good_ids.append(docId)
                    updated_lfn.append(lfn)
                    self.logger.debug("Marking done %s" % lfn)
                    self.logger.debug("Marking done %s" % docId)

            data = dict()
            data['asoworker'] = self.config.asoworker
            data['subresource'] = 'updateTransfers'
            data['list_of_ids'] = good_ids
            data['list_of_transfer_state'] = ["DONE" for x in good_ids]
            self.oracleDB.post(self.config.oracleFileTrans,
                               data=encodeRequest(data))
            self.logger.debug("Marked good %s" % good_ids)
        except Exception:
            self.logger.exception("Error updating documents")
            return 1
        return 0
Example #19
0
def mark_failed(files, oracleDB, logger, failure_reason=""):
    """
    Something failed for these files so increment the retry count
    """
    h = 0
    for lfn in files:
        h += 1
        logger.debug("Marking failed %s" % h)
        source_lfn = lfn
        docId = getHashLfn(source_lfn)
        logger.debug("Marking failed %s" % docId)
        try:
            docbyId = oracleDB.get(config.General.oracleUserTrans,
                                   data=encodeRequest({'subresource': 'getById', 'id': docId}))
        except Exception:
            logger.exception("Error updating failed docs.")
            continue
        document = oracleOutputMapping(docbyId, None)[0]
        logger.debug("Document: %s" % document)

        try:
            fileDoc = dict()
            fileDoc['asoworker'] = config.General.asoworker
            fileDoc['subresource'] = 'updatePublication'
            fileDoc['list_of_ids'] = docId

            fileDoc['list_of_publication_state'] = 'FAILED'
            #if force_failure or document['publish_retry_count'] > self.max_retry:
            #    fileDoc['list_of_publication_state'] = 'FAILED'
            #else:
            #    fileDoc['list_of_publication_state'] = 'RETRY'
            # TODO: implement retry
            fileDoc['list_of_retry_value'] = 1
            fileDoc['list_of_failure_reason'] = failure_reason

            logger.debug("fileDoc: %s " % fileDoc)

            _ = oracleDB.post(config.General.oracleFileTrans,
                              data=encodeRequest(fileDoc))
            logger.debug("updated: %s " % docId)
        except Exception as ex:
            msg = "Error updating document: %s" % fileDoc
            msg += str(ex)
            msg += str(traceback.format_exc())
            logger.error(msg)
            continue
Example #20
0
    def acquire(self):
        """
        Get a number (1k for current oracle rest) of documents and bind them to this aso
        NEW -> ACQUIRED (asoworker NULL -> config.asoworker)
        :return:
        """

        self.logger.info('Retrieving users...')
        fileDoc = dict()
        fileDoc['subresource'] = 'activeUsers'
        fileDoc['grouping'] = 0
        fileDoc['asoworker'] = self.config.asoworker

        try:
            self.oracleDB.get(self.config.oracleFileTrans,
                              data=encodeRequest(fileDoc))
        except Exception as ex:
            self.logger.error("Failed to acquire transfers \
                              from oracleDB: %s" % ex)
            return 1

        users = list()
        try:
            docs = oracleOutputMapping(result)
            users = [[x['username'], x['user_group'], x['user_role']] for x in docs]
            self.logger.info('Users to process: %s' % str(users))
        except:
            self.logger.exception('User data malformed. ')

        for user in users:
            fileDoc = dict()
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'acquireTransfers'
            fileDoc['username'] = user[0]

            self.logger.debug("Retrieving transfers from oracleDB for user: %s " % user)

            try:
                self.oracleDB.post(self.config.oracleFileTrans,
                                   data=encodeRequest(fileDoc))
            except Exception as ex:
                self.logger.error("Failed to acquire transfers \
                                  from oracleDB: %s" % ex)

        return users
Example #21
0
    def getPublDescFiles(self, workflow, lfn_ready, logger):
        """
        Download and read the files describing
        what needs to be published
        """
        dataDict = {}
        dataDict['taskname'] = workflow
        dataDict['filetype'] = 'EDM'

        out = []

        # divide lfn per chunks, avoiding URI-too long exception
        def chunks(l, n):
            """
            Yield successive n-sized chunks from l.
            :param l: list to splitt in chunks
            :param n: chunk size
            :return: yield the next list chunk
            """
            for i in range(0, len(l), n):
                yield l[i:i + n]

        chunkSize = 10
        nIter = 0
        if len(lfn_ready) > chunkSize:
            logger.info(
                "retrieving input file metadata for %s files in chunks of %s",
                len(lfn_ready), chunkSize)
        for lfn_ in chunks(lfn_ready, chunkSize):
            nIter += 1
            dataDict['lfn'] = lfn_
            data = encodeRequest(dataDict, listParams=["lfn"])
            uri = self.REST_filemetadata

            try:
                #res = self.crabServer.get(uri=uri, data=encodeRequest(data, listParams=["lfn"]))
                res = self.crabServer.get(uri=uri, data=data)
                res = res[0]
            except Exception as ex:
                logger.error("Error during metadata retrieving from %s: %s",
                             uri, ex)
                continue

            # print(len(res['result']))
            for obj in res['result']:
                if isinstance(obj, dict):
                    out.append(obj)
                else:
                    # print type(obj)
                    out.append(json.loads(str(obj)))
            if nIter % 10 == 0:
                logger.info("... retrieved %s metadata", len(out))

        return out
def getData(subresource):
    """This function will fetch data from Oracle table"""

    crabserver = CRABRest(hostname=CMSWEB,
                          localcert=CERTIFICATE,
                          localkey=KEY,
                          retry=3,
                          userAgent='CRABTaskWorker')
    crabserver.setDbInstance(dbInstance=DBINSTANCE)
    result = crabserver.get(api='filetransfers',
                            data=encodeRequest({
                                'subresource': subresource,
                                'grouping': 0
                            }))

    return oracleOutputMapping(result)
Example #23
0
    def lastPubTime(self, workflow):
        """

        :param workflow:
        :return:
        """
        data = dict()
        data['workflow'] = workflow
        data['subresource'] = 'updatepublicationtime'
        try:
            result = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers', 'task'),
                                       data=encodeRequest(data))
            self.logger.debug("%s last publication type update: %s " % (workflow, str(result)))
        except Exception:
            msg = "Error updating last publication time"
            self.logger.exception(msg)
Example #24
0
    def acquirePub(self):
        """

        :return:
        """
        fileDoc = dict()
        fileDoc['asoworker'] = self.config.asoworker
        fileDoc['subresource'] = 'acquirePublication'

        self.logger.debug("Retrieving publications from oracleDB")

        try:
            self.oracleDB.post(self.config.oracleFileTrans,
                    data=encodeRequest(fileDoc))
        except Exception as ex:
            self.logger.error("Failed to acquire publications \
                              from oracleDB: %s" % ex)
Example #25
0
    def searchTask(self, workflow):
        """

        :param workflow:
        :return:
        """
        data = dict()
        data['workflow'] = workflow
        data['subresource'] = 'search'
        try:
            result = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers', 'task'),
                                       data=encodeRequest(data))
            self.logger.debug("task: %s " % str(result[0]))
            self.logger.debug("task: %s " % getColumn(result[0], 'tm_last_publication'))
        except Exception as ex:
            self.logger.error("Error during task doc retrieving: %s" % ex)
            return {}

        return oracleOutputMapping(result)
Example #26
0
    def source_destinations_by_user(self):
        """
        Get all the destinations for a user
        """
        if self.config.isOracle:
            self.logger.debug('Running acquiredTransfers query... ' + self.user)
            fileDoc = dict()
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'acquiredTransfers'
            fileDoc['grouping'] = 1
            fileDoc['username'] = self.user
            group = self.group
            if self.group == '':
                group = None
            if self.role == '':
                role = None
            fileDoc['vogroup'] = group
            fileDoc['vorole'] = role
            fileDoc['limit'] =  self.config.max_files_per_transfer
            result = []

            self.logger.debug('Request: ' + str(fileDoc))
            try:
                results = self.oracleDB.get(self.config.oracleFileTrans,
                                            data=encodeRequest(fileDoc))
                result = oracleOutputMapping(results)
                res = [[x['source'], x['destination']] for x in result]
                res.sort()
                res = list(k for k, _ in itertools.groupby(res))
            except Exception as ex:
                self.logger.error("Failed to get acquired transfers \
                                  from oracleDB: %s" %ex)
                return [], {}
            return res, result
        else:
            query = {'group': True,
                     'startkey':[self.user, self.group, self.role], 'endkey':[self.user, self.group, self.role, {}, {}]}
            try:
                sites = self.db.loadView(self.config.ftscp_design, 'ftscp_all', query)
            except:
                return []
            return [[x[4], x[3]] for x in sites['rows']]
Example #27
0
    def retry(self):
        """
        Retry documents older than self.config.cooloffTime
        :return:
        """
        fileDoc = dict()
        fileDoc['asoworker'] = self.config.asoworker
        fileDoc['subresource'] = 'retryTransfers'
        fileDoc['time_to'] = self.config.cooloffTime
        self.logger.debug('fileDoc: %s' % fileDoc)

        results = dict()
        try:
            results = self.oracleDB.post(self.config.oracleFileTrans,
                                         data=encodeRequest(fileDoc))
        except Exception:
            self.logger.exception("Failed to get retry transfers in oracleDB: %s")
        self.logger.info("Retried files in cooloff: %s" % str(results))

        return 0
Example #28
0
 def algorithm(self, parameters=None):
     """
     Performs the doRetries method, loading the appropriate
     plugin for each job and handling it.
     """
     logging.debug("Running retryManager algorithm")
     if self.config.isOracle:
         fileDoc = dict()
         fileDoc['asoworker'] = self.config.asoworker
         fileDoc['subresource'] = 'retryTransfers'
         fileDoc['time_to'] = self.cooloffTime
         self.logger.debug('fileDoc: %s' % fileDoc)
         try:
             results = self.oracleDB.post(self.config.oracleFileTrans,
                                          data=encodeRequest(fileDoc))
         except Exception:
             self.logger.exception("Failed to get retry transfers in oracleDB: %s")
         logging.info("Retried files in cooloff: %s" % str(results))
     else:
         self.doRetries()
Example #29
0
    def submitted(self, files):
        """
        Mark the list of files as submitted once the FTS submission succeeded
        ACQUIRED -> SUBMITTED
        Return the lfns updated successfully and report data for dashboard
        :param files: tuple (source_lfn, dest_lfn)
        :return:
        """
        lfn_in_transfer = []
        dash_rep = ()
        id_list = list()
        docId = ''
        for lfn in files:
            lfn = lfn[0]
            if lfn.find('temp') == 7:
                self.logger.debug("Marking acquired %s" % lfn)
                docId = getHashLfn(lfn)
                self.logger.debug("Marking acquired %s" % docId)
                try:
                    id_list.append(docId)
                    lfn_in_transfer.append(lfn)
                except Exception as ex:
                    self.logger.error("Error getting id: %s" % ex)
                    raise

            lfn_in_transfer.append(lfn)
            # TODO: add dashboard stuff
            # dash_rep = (document['jobid'], document['job_retry_count'], document['taskname'])
        try:
            fileDoc = dict()
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'updateTransfers'
            fileDoc['list_of_ids'] = id_list
            fileDoc['list_of_transfer_state'] = ["SUBMITTED" for x in id_list]

            self.oracleDB.post(self.config.oracleFileTrans,
                               data=encodeRequest(fileDoc))
            self.logger.debug("Marked acquired %s" % (id_list))
        except Exception as ex:
            self.logger.error("Error during status update: %s" % ex)
        return lfn_in_transfer, dash_rep
    def getPublDescFiles(self, workflow, lfn_ready):
        """
        Download and read the files describing
        what needs to be published
        """
        data = {}
        data['taskname'] = workflow
        data['filetype'] = 'EDM'

        out = []
        # divide lfn per chunks, avoiding URI-too long exception
        def chunks(l, n):
            """
            Yield successive n-sized chunks from l.
            :param l: list to splitt in chunks
            :param n: chunk size
            :return: yield the next list chunk
            """
            for i in range(0, len(l), n):
                yield l[i:i + n]

        for  lfn_ in chunks(lfn_ready, 50):
            data['lfn'] = lfn_

            try:
                res = self.oracleDB.get('/crabserver/preprod/filemetadata',
                                        data=encodeRequest(data, listParams=["lfn"]))
                res = res[0]
            except Exception as ex:
                self.logger.error("Error during metadata retrieving: %s" %ex)
                continue

            print(len(res['result']))
            for obj in res['result']:
                if isinstance(obj, dict):
                    out.append(obj)
                else:
                    #print type(obj)
                    out.append(json.loads(str(obj)))

        return out
Example #31
0
    def getPub(self):
        """

        :return:
        """
        to_pub_docs = list()
        filedoc = dict()
        filedoc['asoworker'] = self.config.asoworker
        filedoc['subresource'] = 'acquiredPublication'
        filedoc['grouping'] = 0

        try:
            results = self.oracleDB.get(self.config.oracleFileTrans,
                                        data=encodeRequest(filedoc))
            to_pub_docs = oracleOutputMapping(results)
        except Exception as ex:
            self.logger.error("Failed to get acquired publications \
                              from oracleDB: %s" % ex)
            return to_pub_docs

        return to_pub_docs
Example #32
0
    def update_FTSJobID(self, jobReport):
        """
        """
        for job in jobReport:
            try:
                fileDoc = dict()
                fileDoc['asoworker'] = self.config.asoworker
                fileDoc['subresource'] = 'updateTransfers'
                fileDoc['list_of_ids'] = [getHashLfn(x) for x in job['LFNs']]
                fileDoc['list_of_transfer_state'] = ["SUBMITTED" for x in  job['LFNs']]
                fileDoc['list_of_fts_instance'] = [self.fts_server_for_transfer for x in job['LFNs']]
                fileDoc['list_of_fts_id'] = [ job['FTSJobid'] for  x in  job['LFNs'] ]

                self.logger.debug("Marking submitted %s files " % (len(fileDoc['list_of_ids'])))
                result = self.oracleDB.post(self.config.oracleFileTrans,
                                                        data=encodeRequest(fileDoc))
                self.logger.debug("Marked submitted %s" % (fileDoc['list_of_ids']))
            except Exception as ex:
                self.logger.error("Error during status update: %s" %ex)
                time.sleep(10)
                return False
        return True       
Example #33
0
def mark_transferred(ids, oracleDB):
    """
    Mark the list of files as tranferred
    :param ids: list of Oracle file ids to update
    :return: 0 success, 1 failure
    """
    os.environ["X509_CERT_DIR"] = os.getcwd()

    already_list = []
    if os.path.exists("task_process/transfers/transferred_files.txt"):
        with open("task_process/transfers/transferred_files.txt",
                  "r") as list_file:
            for _data in list_file.readlines():
                already_list.append(_data.split("\n")[0])

    ids = [x for x in ids if x not in already_list]

    if len(ids) > 0:
        try:
            logging.debug("Marking done %s", ids)

            data = dict()
            data['asoworker'] = 'rucio'
            data['subresource'] = 'updateTransfers'
            data['list_of_ids'] = ids
            data['list_of_transfer_state'] = ["DONE" for _ in ids]

            oracleDB.post('/filetransfers', data=encodeRequest(data))
            logging.info("Marked good %s", ids)
            with open("task_process/transfers/transferred_files.txt",
                      "a+") as list_file:
                for id_ in ids:
                    list_file.write("%s\n" % id_)
        except Exception:
            logging.exception("Error updating documents")
            return 1
    else:
        logging.info("Nothing to update (Done)")
    return 0
Example #34
0
def mark_transferred(ids, crabserver):
    """
    Mark the list of files as tranferred
    :param ids: list of Oracle file ids to update
    :param crabserver: a CRABRest object for doing POST to CRAB server REST
    :return: True success, False failure
    """
    try:
        logging.debug("Marking done %s", ids)

        data = dict()
        data['asoworker'] = asoworker
        data['subresource'] = 'updateTransfers'
        data['list_of_ids'] = ids
        data['list_of_transfer_state'] = ["DONE" for _ in ids]

        crabserver.post('/filetransfers', data=encodeRequest(data))
        logging.info("Marked good %s", ids)
    except Exception:
        logging.exception("Error updating documents")
        return False
    return True
Example #35
0
def mark_failed(ids, failures_reasons, crabserver):
    """
    Mark the list of files as failed
    :param ids: list of Oracle file ids to update
    :param failures_reasons: list of strings with transfer failure messages
    :param crabserver: an CRABRest object for doing POST to CRAB server REST
    :return: True success, False failure
    """
    try:
        data = dict()
        data['asoworker'] = asoworker
        data['subresource'] = 'updateTransfers'
        data['list_of_ids'] = ids
        data['list_of_transfer_state'] = ["FAILED" for _ in ids]
        data['list_of_failure_reason'] = failures_reasons
        data['list_of_retry_value'] = [0 for _ in ids]

        crabserver.post('/filetransfers', data=encodeRequest(data))
        logging.info("Marked failed %s", ids)
    except Exception:
        logging.exception("Error updating documents")
        return False
    return True
Example #36
0
def publishInDBS3(taskname):
    """

    """
    def createLogdir(dirname):
        """
        Create the directory dirname ignoring erors in case it exists. Exit if
        the directory cannot be created.
        """
        try:
            os.mkdir(dirname)
        except OSError as ose:
            if ose.errno != 17: #ignore the "Directory already exists error"
                print(str(ose))
                print("The task worker need to access the '%s' directory" % dirname)
                sys.exit(1)
    createLogdir('taskLogs')

    logger = logging.getLogger(taskname)
    logging.basicConfig(filename='taskLogs/'+taskname+'.log', level=logging.INFO, format=config.General.logMsgFormat)

    logger.info("Getting files to publish")

    toPublish = []
    # TODO move from new to done when processed
    with open("/tmp/"+taskname+".json") as f:
        toPublish = json.load(f)

    workflow = taskname

    if len(toPublish) == 0:
        return "EMPTY"

    if not workflow:
        logger.info("NO TASKNAME: %s" % toPublish[0])
    for k, v in toPublish[0].iteritems():
        if k == 'taskname':
            logger.info("Starting: %s: %s" % (k, v))
    wfnamemsg = "%s: " % (workflow)

    user = toPublish[0]["User"]
    try:
        group = toPublish[0]["Group"]
        role = toPublish[0]["Role"]
    except:
        group = ""
        role = ""

    if not group or group in ['null']:
        group = ""
    if not role or role in ['null']:
        role = ""

    userDN = toPublish[0]["UserDN"]
    pnn = toPublish[0]["Destination"]
    logger.info(wfnamemsg+" "+user)

    READ_PATH = "/DBSReader"
    READ_PATH_1 = "/DBSReader/"


    # TODO: get user role and group
    try:
        proxy = Proxy(userDN, group, role, logger)
    except:
        logger.exception("Failed to retrieve user proxy")
        return "FAILED"

    oracelInstance = config.General.oracleDB
    oracleDB = HTTPRequests(oracelInstance,
                            proxy,
                            proxy)

    fileDoc = dict()
    fileDoc['subresource'] = 'search'
    fileDoc['workflow'] = workflow

    try:
        results = oracleDB.get(task_path,
                               data=encodeRequest(fileDoc))
    except Exception as ex:
        logger.error("Failed to get acquired publications from oracleDB for %s: %s" % (workflow, ex))
        return "FAILED"

    logger.info(results[0]['desc']['columns'])

    try:
        inputDatasetIndex = results[0]['desc']['columns'].index("tm_input_dataset")
        inputDataset = results[0]['result'][inputDatasetIndex]
        sourceURLIndex = results[0]['desc']['columns'].index("tm_dbs_url")
        sourceURL = results[0]['result'][sourceURLIndex]
        publish_dbs_urlIndex = results[0]['desc']['columns'].index("tm_publish_dbs_url")
        publish_dbs_url = results[0]['result'][publish_dbs_urlIndex]

        #sourceURL = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader"
        if not sourceURL.endswith(READ_PATH) and not sourceURL.endswith(READ_PATH_1):
            sourceURL += READ_PATH


    except Exception:
        logger.exception("ERROR")
    ## When looking up parents may need to look in global DBS as well.
    globalURL = sourceURL
    globalURL = globalURL.replace('phys01', 'global')
    globalURL = globalURL.replace('phys02', 'global')
    globalURL = globalURL.replace('phys03', 'global')
    globalURL = globalURL.replace('caf', 'global')

    pr = os.environ.get("SOCKS5_PROXY")
    logger.info(wfnamemsg+"Source API URL: %s" % sourceURL)
    sourceApi = dbsClient.DbsApi(url=sourceURL, proxy=pr)
    logger.info(wfnamemsg+"Global API URL: %s" % globalURL)
    globalApi = dbsClient.DbsApi(url=globalURL, proxy=pr)

    WRITE_PATH = "/DBSWriter"
    MIGRATE_PATH = "/DBSMigrate"
    READ_PATH = "/DBSReader"

    if publish_dbs_url.endswith(WRITE_PATH):
        publish_read_url = publish_dbs_url[:-len(WRITE_PATH)] + READ_PATH
        publish_migrate_url = publish_dbs_url[:-len(WRITE_PATH)] + MIGRATE_PATH
    else:
        publish_migrate_url = publish_dbs_url + MIGRATE_PATH
        publish_read_url = publish_dbs_url + READ_PATH
        publish_dbs_url += WRITE_PATH

    try:
        logger.debug(wfnamemsg+"Destination API URL: %s" % publish_dbs_url)
        destApi = dbsClient.DbsApi(url=publish_dbs_url, proxy=pr)
        logger.debug(wfnamemsg+"Destination read API URL: %s" % publish_read_url)
        destReadApi = dbsClient.DbsApi(url=publish_read_url, proxy=pr)
        logger.debug(wfnamemsg+"Migration API URL: %s" % publish_migrate_url)
        migrateApi = dbsClient.DbsApi(url=publish_migrate_url, proxy=pr)
    except:
        logger.exception('Wrong DBS URL %s' % publish_dbs_url)
        return "FAILED"

    logger.info("inputDataset: %s" % inputDataset)
    noInput = len(inputDataset.split("/")) <= 3

    # TODO: fix dbs dep
    if not noInput:
        try:
            existing_datasets = sourceApi.listDatasets(dataset=inputDataset, detail=True, dataset_access_type='*')
            primary_ds_type = existing_datasets[0]['primary_ds_type']
            # There's little chance this is correct, but it's our best guess for now.
            # CRAB2 uses 'crab2_tag' for all cases
            existing_output = destReadApi.listOutputConfigs(dataset=inputDataset)
        except:
            logger.exception('Wrong DBS URL %s' % publish_dbs_url)
            return "FAILED"
        if not existing_output:
            msg = "Unable to list output config for input dataset %s." % (inputDataset)
            logger.error(wfnamemsg+msg)
            global_tag = 'crab3_tag'
        else:
            global_tag = existing_output[0]['global_tag']
    else:
        msg = "This publication appears to be for private MC."
        logger.info(wfnamemsg+msg)
        primary_ds_type = 'mc'
        global_tag = 'crab3_tag'

    acquisition_era_name = "CRAB"
    processing_era_config = {'processing_version': 1, 'description': 'CRAB3_processing_era'}

    appName = 'cmsRun'
    appVer = toPublish[0]["swversion"]
    pset_hash = toPublish[0]['publishname'].split("-")[-1]
    gtag = str(toPublish[0]['globaltag'])
    if gtag == "None":
        gtag = global_tag
    try:
        if toPublish[0]['acquisitionera'] and not toPublish[0]['acquisitionera'] in ["null"]:
            acquisitionera = str(toPublish[0]['acquisitionera'])
        else:
            acquisitionera = acquisition_era_name
    except:
        acquisitionera = acquisition_era_name

    _, primName, procName, tier = toPublish[0]['outdataset'].split('/')

    primds_config = {'primary_ds_name': primName, 'primary_ds_type': primary_ds_type}
    msg = "About to insert primary dataset: %s" % (str(primds_config))
    logger.debug(wfnamemsg+msg)
    destApi.insertPrimaryDataset(primds_config)
    msg = "Successfully inserted primary dataset %s." % (primName)
    logger.debug(wfnamemsg+msg)

    final = {}
    failed = []
    publish_in_next_iteration = []
    published = []

    dataset = toPublish[0]['outdataset']
    # Find all (valid) files already published in this dataset.
    try:
        existingDBSFiles = destReadApi.listFiles(dataset=dataset, detail=True)
        existingFiles = [f['logical_file_name'] for f in existingDBSFiles]
        existingFilesValid = [f['logical_file_name'] for f in existingDBSFiles if f['is_file_valid']]
        msg = "Dataset %s already contains %d files" % (dataset, len(existingFiles))
        msg += " (%d valid, %d invalid)." % (len(existingFilesValid), len(existingFiles) - len(existingFilesValid))
        logger.info(wfnamemsg+msg)
        final['existingFiles'] = len(existingFiles)
    except Exception as ex:
        msg = "Error when listing files in DBS: %s" % (str(ex))
        msg += "\n%s" % (str(traceback.format_exc()))
        logger.error(wfnamemsg+msg)
        return "FAILED"

    # check if actions are needed
    workToDo = False

    for fileTo in toPublish:
        if fileTo['lfn'] not in existingFilesValid:
            workToDo = True

    if not workToDo:
        msg = "Nothing uploaded, %s has these files already or not enough files." % (dataset)
        logger.info(wfnamemsg+msg)
        return "NOTHING TO DO"

    acquisition_era_config = {'acquisition_era_name': acquisitionera, 'start_date': 0}

    output_config = {'release_version': appVer,
                     'pset_hash': pset_hash,
                     'app_name': appName,
                     'output_module_label': 'o',
                     'global_tag': global_tag,
                    }
    msg = "Published output config."
    logger.debug(wfnamemsg+msg)

    dataset_config = {'dataset': dataset,
                      'processed_ds_name': procName,
                      'data_tier_name': tier,
                      'acquisition_era_name': acquisitionera,
                      'dataset_access_type': 'VALID',
                      'physics_group_name': 'CRAB3',
                      'last_modification_date': int(time.time()),
                     }
    msg = "About to insert dataset: %s" % (str(dataset_config))
    logger.info(wfnamemsg+msg)
    del dataset_config['acquisition_era_name']

    # List of all files that must (and can) be published.
    dbsFiles = []
    dbsFiles_f = []
    # Set of all the parent files from all the files requested to be published.
    parentFiles = set()
    # Set of parent files for which the migration to the destination DBS instance
    # should be skipped (because they were not found in DBS).
    parentsToSkip = set()
    # Set of parent files to migrate from the source DBS instance
    # to the destination DBS instance.
    localParentBlocks = set()
    # Set of parent files to migrate from the global DBS instance
    # to the destination DBS instance.
    globalParentBlocks = set()

    # Loop over all files to publish.
    for file_ in toPublish:
        logger.info(file_)
        # Check if this file was already published and if it is valid.
        if file_['lfn'] not in existingFilesValid:
            # We have a file to publish.
            # Get the parent files and for each parent file do the following:
            # 1) Add it to the list of parent files.
            # 2) Find the block to which it belongs and insert that block name in
            #    (one of) the set of blocks to be migrated to the destination DBS.
            for parentFile in list(file_['parents']):
                if parentFile not in parentFiles:
                    parentFiles.add(parentFile)
                    # Is this parent file already in the destination DBS instance?
                    # (If yes, then we don't have to migrate this block.)
                    blocksDict = destReadApi.listBlocks(logical_file_name=parentFile)
                    if not blocksDict:
                        # No, this parent file is not in the destination DBS instance.
                        # Maybe it is in the same DBS instance as the input dataset?
                        blocksDict = sourceApi.listBlocks(logical_file_name=parentFile)
                        if blocksDict:
                            # Yes, this parent file is in the same DBS instance as the input dataset.
                            # Add the corresponding block to the set of blocks from the source DBS
                            # instance that have to be migrated to the destination DBS.
                            localParentBlocks.add(blocksDict[0]['block_name'])
                        else:
                            # No, this parent file is not in the same DBS instance as input dataset.
                            # Maybe it is in global DBS instance?
                            blocksDict = globalApi.listBlocks(logical_file_name=parentFile)
                            if blocksDict:
                                # Yes, this parent file is in global DBS instance.
                                # Add the corresponding block to the set of blocks from global DBS
                                # instance that have to be migrated to the destination DBS.
                                globalParentBlocks.add(blocksDict[0]['block_name'])
                    # If this parent file is not in the destination DBS instance, is not
                    # the source DBS instance, and is not in global DBS instance, then it
                    # means it is not known to DBS and therefore we can not migrate it.
                    # Put it in the set of parent files for which migration should be skipped.
                    if not blocksDict:
                        parentsToSkip.add(parentFile)
                # If this parent file should not be migrated because it is not known to DBS,
                # we remove it from the list of parents in the file-to-publish info dictionary
                # (so that when publishing, this "parent" file will not appear as a parent).
                if parentFile in parentsToSkip:
                    msg = "Skipping parent file %s, as it doesn't seem to be known to DBS." % (parentFile)
                    logger.info(wfnamemsg+msg)
                    if parentFile in file_['parents']:
                        file_['parents'].remove(parentFile)
            # Add this file to the list of files to be published.
            dbsFiles.append(format_file_3(file_))
            dbsFiles_f.append(file_)
        #print file
        published.append(file_['SourceLFN'])
    # Print a message with the number of files to publish.
    msg = "Found %d files not already present in DBS which will be published." % (len(dbsFiles))
    logger.info(wfnamemsg+msg)

    # If there are no files to publish, continue with the next dataset.
    if len(dbsFiles_f) == 0:
        msg = "Nothing to do for this dataset."
        logger.info(wfnamemsg+msg)
        return "NOTHING TO DO"

    # Migrate parent blocks before publishing.
    # First migrate the parent blocks that are in the same DBS instance
    # as the input dataset.
    if localParentBlocks:
        msg = "List of parent blocks that need to be migrated from %s:\n%s" % (sourceApi.url, localParentBlocks)
        logger.info(wfnamemsg+msg)
        statusCode, failureMsg = migrateByBlockDBS3(workflow,
                                                    migrateApi,
                                                    destReadApi,
                                                    sourceApi,
                                                    inputDataset,
                                                    localParentBlocks
                                                   )
        if statusCode:
            failureMsg += " Not publishing any files."
            logger.info(wfnamemsg+failureMsg)
            failed.extend([f['SourceLFN'] for f in dbsFiles_f])
            failure_reason = failureMsg
            published = [x for x in published[dataset] if x not in failed[dataset]]
            return "NOTHING TO DO"
    # Then migrate the parent blocks that are in the global DBS instance.
    if globalParentBlocks:
        msg = "List of parent blocks that need to be migrated from %s:\n%s" % (globalApi.url, globalParentBlocks)
        logger.info(wfnamemsg+msg)
        statusCode, failureMsg = migrateByBlockDBS3(workflow, migrateApi, destReadApi, globalApi, inputDataset, globalParentBlocks)
        if statusCode:
            failureMsg += " Not publishing any files."
            logger.info(wfnamemsg+failureMsg)
            failed.extend([f['SourceLFN'] for f in dbsFiles_f])
            failure_reason = failureMsg
            published = [x for x in published[dataset] if x not in failed[dataset]]
            return "NOTHING TO DO"
    # Publish the files in blocks. The blocks must have exactly max_files_per_block
    # files, unless there are less than max_files_per_block files to publish to
    # begin with. If there are more than max_files_per_block files to publish,
    # publish as many blocks as possible and leave the tail of files for the next
    # PublisherWorker call, unless forced to published.
    block_count = 0
    count = 0
    max_files_per_block = config.General.max_files_per_block
    while True:
        block_name = "%s#%s" % (dataset, str(uuid.uuid4()))
        files_to_publish = dbsFiles[count:count+max_files_per_block]
        try:
            block_config = {'block_name': block_name, 'origin_site_name': pnn, 'open_for_writing': 0}
            msg = "Inserting files %s into block %s." % ([f['logical_file_name']
                                                          for f in files_to_publish], block_name)
            logger.info(wfnamemsg+msg)
            blockDump = createBulkBlock(output_config, processing_era_config,
                                        primds_config, dataset_config,
                                        acquisition_era_config, block_config, files_to_publish)
            #logger.debug(wfnamemsg+"Block to insert: %s\n %s" % (blockDump, destApi.__dict__ ))

            destApi.insertBulkBlock(blockDump)
            block_count += 1
        except Exception as ex:
            logger.error("Error for files: %s" % [f['SourceLFN'] for f in toPublish])
            failed.extend([f['SourceLFN'] for f in toPublish])
            msg = "Error when publishing (%s) " % ", ".join(failed)
            msg += str(ex)
            msg += str(traceback.format_exc())
            logger.error(wfnamemsg+msg)
            failure_reason = str(ex)
        count += max_files_per_block
        files_to_publish_next = dbsFiles_f[count:count+max_files_per_block]
        if len(files_to_publish_next) < max_files_per_block:
            publish_in_next_iteration.extend([f["SourceLFN"] for f in files_to_publish_next])
            break
    published = [x for x in published if x not in failed + publish_in_next_iteration]
    # Fill number of files/blocks published for this dataset.
    final['files'] = len(dbsFiles) - len(failed) - len(publish_in_next_iteration)
    final['blocks'] = block_count
    # Print a publication status summary for this dataset.
    msg = "End of publication status for dataset %s:" % (dataset)
    msg += " failed (%s) %s" % (len(failed), failed)
    msg += ", published (%s) %s" % (len(published), published)
    msg += ", publish_in_next_iteration (%s) %s" % (len(publish_in_next_iteration),
                                                    publish_in_next_iteration)
    msg += ", results %s" % (final)
    logger.info(wfnamemsg+msg)

    try:
        if published:
            mark_good(workflow, published, oracleDB, logger)
        if failed:
            logger.debug("Failed files: %s " % failed)
            mark_failed(failed, oracleDB, logger, failure_reason)
    except:
        logger.exception("Status update failed")

    return 0
Example #37
0
    def testFileTransferPUT(self):
        """
        _testFileTransferPUT_

        Just test simple testFileTransferPUT with fake data
        """
        # We just sent fake data which is not monitored by dashboard.
        # Also only the first time to decide is publication ON or NOT
        for user in self.users:
            timestamp = time.strftime('%y%m%d_%H%M%S', time.gmtime())
            for i in range(self.totalFiles):
                now = int(time.time())
                # Generate a taskname
                workflowName = ""
                taskname = ""
                if user not in self.tasks:
                    workflowName = "".join([
                        random.choice(string.ascii_lowercase)
                        for _ in range(20)
                    ]) + "_" + str(now)
                    publicationState = random.choice(['NEW', 'NOT_REQUIRED'])
                else:
                    workflowName = self.tasks[user]['workflowName']
                    publicationState = self.tasks[user]['publication']
                transferState = random.choice(['NEW', 'DONE'])
                taskname = generateTaskName(user, workflowName, timestamp)
                finalLfn = self.lfnBase % (user, workflowName, i,
                                           random.randint(1, 9999))
                idHash = getHashLfn(finalLfn)
                self.fileDoc['id'] = idHash
                self.fileDoc['job_id'] = i
                self.fileDoc['username'] = user
                self.fileDoc['taskname'] = taskname
                self.fileDoc['start_time'] = int(time.time())
                self.fileDoc['source_lfn'] = finalLfn
                self.fileDoc['destination_lfn'] = finalLfn
                self.fileDoc['transfer_state'] = transferState
                self.fileDoc['publication_state'] = publicationState
                print(self.fileDoc)
                self.server.put('/crabserver/dev/fileusertransfers',
                                data=encodeRequest(self.fileDoc))
                # if I will put the same doc twice, it should raise an error.
                # self.server.put('/crabserver/dev/fileusertransfers', data=urllib.urlencode(self.fileDoc))
                # This tasks are for the future and next calls
                if user not in self.tasks:
                    self.tasks[user] = {
                        'workflowName': workflowName,
                        'taskname': taskname,
                        'listOfIds': [],
                        'publication': publicationState,
                        'toTransfer': 0,
                        'toPublish': 0,
                        'total': self.totalFiles
                    }
                if self.tasks[user]['publication'] == 'NEW':
                    self.tasks[user]['toPublish'] += 1
                if transferState == 'NEW':
                    self.tasks[user]['toTransfer'] += 1
                self.tasks[user]['listOfIds'].append(idHash)
        # This should raise an error
        for username in self.tasks:
            taskname = self.tasks[username]['taskname']
            for query in ['getTransferStatus', 'getPublicationStatus']:
                result = self.server.get('/crabserver/dev/fileusertransfers',
                                         data=encodeRequest({
                                             'subresource':
                                             query,
                                             'username':
                                             username,
                                             'taskname':
                                             taskname
                                         }))
                print(result)
                print(result[0]['result'])
                taskInfoDict = oracleOutputMapping(result, 'id')
                print(taskInfoDict)
                for key, docDict in taskInfoDict.items():
                    result = self.server.get(
                        '/crabserver/dev/fileusertransfers',
                        data=encodeRequest({
                            'subresource': 'getById',
                            'id': key
                        }))

        randomUsers = random.sample(
            set(self.users), 3
        )  # Take half of the users and kill their transfers for specific task
        for username in randomUsers:
            taskname = self.tasks[username]['taskname']
            result = self.server.post('/crabserver/dev/fileusertransfers',
                                      data=encodeRequest({
                                          'subresource': 'killTransfers',
                                          'username': username,
                                          'taskname': taskname
                                      }))
            print(result)
        # oneUser is left for killing a list of IDs
        # leftUsers will be killing transfers one by one for specific id.
        leftUsers = list(set(self.users) - set(randomUsers))
        oneUser = random.sample(set(leftUsers), 1)
        leftUsers = list(set(leftUsers) - set(oneUser))
        for username in leftUsers:
            # First get all left ids for this users
            result = self.server.get('/crabserver/dev/fileusertransfers',
                                     data=encodeRequest({
                                         'subresource':
                                         'getTransferStatus',
                                         'username':
                                         username,
                                         'taskname':
                                         self.tasks[username]['taskname']
                                     }))
            resultOut = oracleOutputMapping(result, None)
            print("**" * 50)
            for outDict in resultOut:
                print(outDict)
                result = self.server.post('/crabserver/dev/fileusertransfers',
                                          data=encodeRequest({
                                              'subresource':
                                              'killTransfersById',
                                              'username':
                                              username,
                                              'listOfIds':
                                              outDict['id']
                                          }))
                print(result)
            print(resultOut)
            print(result)
        for username in oneUser:
            result = self.server.post(
                '/crabserver/dev/fileusertransfers',
                data=encodeRequest(
                    {
                        'subresource': 'killTransfersById',
                        'username': username,
                        'listOfIds': self.tasks[username]['listOfIds']
                    }, ['listOfIds']))
            # As it asks to kill all which are in new, need to double check what we submitted before and if the output of killed is correct
            print(result)
            print(self.tasks[username])
Example #38
0
def submit(phedex, ftsContext, toTrans):
    """
    submit tranfer jobs

    - group files to be transferred by source site
    - prepare jobs chunks of max 200 transfers
    - submit fts job

    :param ftsContext: fts client ftsContext
    :param toTrans: [source pfn, destination pfn, oracle file id, source site]
    :return: list of jobids submitted
    """
    threadLock = threading.Lock()
    threads = []
    jobids = []
    to_update = []

    oracleDB = HTTPRequests(rest_filetransfers,
                            proxy,
                            proxy)

    sources = list(set([x[3] for x in toTrans]))

    for source in sources:

        ids = [x[2] for x in toTrans if x[3] == source]
        username = toTrans[0][5]
        taskname = toTrans[0][6]
        src_lfns = [x[0] for x in toTrans if x[3] == source]
        dst_lfns = [x[1] for x in toTrans if x[3] == source]

        sorted_source_pfns = []
        sorted_dest_pfns = []

        try:
            for chunk in chunks(src_lfns, 10):
                unsorted_source_pfns = [[k[1], str(x)] for k, x in phedex.getPFN(source, chunk).items()]
                for order_lfn in chunk:
                    for lfn, pfn in unsorted_source_pfns:
                        if order_lfn == lfn:
                            sorted_source_pfns.append(pfn)
                            break

            for chunk in chunks(dst_lfns, 10):
                unsorted_dest_pfns = [[k[1], str(x)] for k, x in phedex.getPFN(toTrans[0][4], chunk).items()]
                for order_lfn in chunk:
                    for lfn, pfn in unsorted_dest_pfns:
                        if order_lfn == lfn:
                            sorted_dest_pfns.append(pfn)
                            break
        except Exception as ex:
            logging.error("Failed to map lfns to pfns: %s", ex)
            mark_failed(ids, ["Failed to map lfn to pfn: " + str(ex) for _ in ids])

        source_pfns = sorted_source_pfns
        dest_pfns = sorted_dest_pfns

        tx_from_source = [[x[0], x[1], x[2], source, username, taskname] for x in zip(source_pfns, dest_pfns, ids)] 

        for files in chunks(tx_from_source, 200):
            thread = submit_thread(threadLock, logging, ftsContext, files, source, jobids, to_update)
            thread.start()
            threads.append(thread)

    for t in threads:
        t.join()

    for fileDoc in to_update:
        _ = oracleDB.post('/filetransfers',
                          data=encodeRequest(fileDoc))
        logging.info("Marked submitted %s files", fileDoc['list_of_ids'])

    return jobids
Example #39
0
from __future__ import division


from RESTInteractions import HTTPRequests
from ServerUtilities import encodeRequest, oracleOutputMapping

server = HTTPRequests('cmsweb-testbed.cern.ch',
                      '/data/srv/asyncstageout/state/asyncstageout/creds/OpsProxy',
                      '/data/srv/asyncstageout/state/asyncstageout/creds/OpsProxy')

fileDoc = {}
fileDoc['asoworker'] = 'asodciangot1'
fileDoc['subresource'] = 'acquireTransfers'

result = server.post('/crabserver/dev/filetransfers',
                     data=encodeRequest(fileDoc))


print(result)
"""
fileDoc = {}
fileDoc['asoworker'] = 'asodciangot1'
fileDoc['subresource'] = 'acquiredTransfers'
fileDoc['grouping'] = 0

result = server.get('/crabserver/dev/filetransfers',
                    data=encodeRequest(fileDoc))

#print(oracleOutputMapping(result))

ids = [str(x['id']) for x in oracleOutputMapping(result)]
Example #40
0
    def oracleSiteUser(self, db):
        """
        1. Acquire transfers from DB
        2. Get acquired users and destination sites
        """

        self.logger.info('Retrieving users...')
        fileDoc = dict()
        fileDoc['subresource'] = 'activeUsers'
        fileDoc['grouping'] = 0
        fileDoc['asoworker'] = self.config.asoworker

        result = dict()
        try:
            result = db.get(self.config.oracleFileTrans,
                             data=encodeRequest(fileDoc))
        except Exception as ex:
            self.logger.error("Failed to acquire transfers \
                              from oracleDB: %s" % ex)
            return []
        
        self.logger.debug(oracleOutputMapping(result))
        # TODO: translate result into list((user,group,role),...)
        if len(oracleOutputMapping(result)) != 0:
            self.logger.debug(type( [[x['username'].encode('ascii','ignore'), x['user_group'], x['user_role']] for x in oracleOutputMapping(result)]))
            try:
                docs =  oracleOutputMapping(result)
                users = [[x['username'], x['user_group'], x['user_role']] for x in docs]
                self.logger.info('Users to process: %s' % str(users))
            except:
                self.logger.exception('User data malformed. ')
        else:
            self.logger.info('No new user to acquire')
            return []

        actives = list()
        for user in users:
            fileDoc = dict()
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'acquireTransfers'
            fileDoc['username'] = user[0]

            self.logger.debug("Retrieving transfers from oracleDB for user: %s " % user[0])

            try:
                result = db.post(self.config.oracleFileTrans,
                                 data=encodeRequest(fileDoc))
            except Exception as ex:
                self.logger.error("Failed to acquire transfers \
                                  from oracleDB: %s" %ex)
                continue

            self.doc_acq = str(result)
            for i in range(len(user)):
                if not user[i] or user[i] in ['None', 'NULL']:
                    user[i] = ''
                user[i] = str(user[i])
            actives.append(user)


            self.logger.debug("Transfers retrieved from oracleDB. %s " % users)

        return users
Example #41
0
def monitor(user, taskname, log):
    """ function monitoring the Rucio replica locks of a rule 
    and updating db statuses accordingly 

    :param user: user HN name
    :type user: str
    :param taskname: CRAB taskname
    :type taskname: str
    :param log: log object
    :type log: logging
    """
    os.environ["X509_CERT_DIR"] = os.getcwd()

    proxy = None
    if os.path.exists('task_process/rest_filetransfers.txt'):
        with open("task_process/rest_filetransfers.txt", "r") as _rest:
            rest_filetransfers = _rest.readline().split('\n')[0]
            proxy = os.getcwd() + "/" + _rest.readline()
            log.info("Proxy: %s", proxy)
            os.environ["X509_USER_PROXY"] = proxy

    if not proxy:
        log.info('No proxy available yet - waiting for first post-job')
        return None

    # Prepare user and task info for monitoring
    scope = "user." + user
    name = taskname
    log.info("Initializing Monitor Rucio client for %s", taskname)
    crabInj = CRABDataInjector("",
                               "",
                               scope=scope,
                               account=user,
                               auth_type='x509_proxy')

    id_map = {}
    lfn_map = {}
    source_rse = {}

    # create maps for lfn --> oracle id, source rse
    if os.path.exists('task_process/transfers.txt'):
        with open('task_process/transfers.txt', 'r') as _list:
            for _data in _list.readlines():
                try:
                    doc = json.loads(_data)
                    id_map.update({doc['destination_lfn']: doc['id']})
                    lfn_map.update({doc['id']: doc['destination_lfn']})
                    source_rse.update(
                        {doc['destination_lfn']: doc['source'] + "_Temp"})
                except Exception:
                    continue
    if os.path.exists('task_process/transfers_direct.txt'):
        with open('task_process/transfers_direct.txt', 'r') as _list:
            for _data in _list.readlines():
                try:
                    doc = json.loads(_data)
                    id_map.update({doc['destination_lfn']: doc['id']})
                    lfn_map.update({doc['id']: doc['destination_lfn']})
                except Exception:
                    continue

    # get the rule for this rucio dataset
    try:
        rules_ = crabInj.cli.list_did_rules(scope, name)
        # {u'name': u'/store/user/dciangot/DStarToD0Pi_D0KPi_DStarFilter_TuneCP5_13TeV-pythia8-evtgen/crab_DStar_rucio_rucio_198_7/190129_085050/0000/DS2b_17_1.root', u'rse': u'T2_IT_Pisa', u'state': u'OK', u'scope': u'user.dciangot', u'rse_id': u'200b6830ca424d87a2e0ae855341b084', u'rule_id': u'4bc56a77ac6743e791dfedaa11db1e1c'}
        list_good = []
        list_failed = []
        list_failed_tmp = []
        list_stuck = []
        list_update = []

        rules = next(rules_)
        log.debug("RULES %s", rules)

    except Exception:
        log.exception("Failed to retrieve rule information")
        return

    locks_generator = None

    # get replica locks and monitor status
    try:
        locks_generator = crabInj.cli.list_replica_locks(rules['id'])
    except Exception:
        if rules['state'] == 'STUCK':
            transfers = crabInj.cli.examine_replication_rule(
                rules['id'])['transfers']
            for lfn in transfers:
                list_stuck.append((lfn['name'], 'Rule STUCK.'))
        else:
            log.exception('Unable to get replica locks')
            return

    # analyze replica locks info for each file
    sitename = None
    # TODO: should we split in threads ?
    for file_ in locks_generator:
        log.debug("LOCK %s", file_)
        filename = file_['name']
        status = file_['state']
        log.info("state %s", status)
        sitename = file_['rse']

        if status == "OK":
            list_good.append(filename)
        if status == "STUCK":
            list_failed_tmp.append((filename, "Transfer Stuck", sitename))
        if status == "REPLICATING":
            try:
                ftsJobID = crabInj.cli.list_request_by_did(
                    filename, sitename, scope)["external_id"]
                if ftsJobID:
                    list_update.append((filename, ftsJobID))
            except Exception:
                log.exception("Replica lock not found")

    # Expose FTS job ID in case of failure (if available)
    for name_ in [x[0] for x in list_failed_tmp]:
        try:
            ftsJobID = crabInj.cli.list_request_by_did(name_, sitename,
                                                       scope)["external_id"]
            if ftsJobID:
                list_failed.append((name_, "FTS job ID: %s" % ftsJobID))
            else:
                list_failed.append((
                    name_,
                    "No FTS job ID available for stuck transfers. Rucio could have failed to submit FTS job."
                ))
        except Exception:
            log.error(
                "No FTS job ID available for stuck transfer %s. Rucio could have failed to submit FTS job."
                % name_)
            list_failed.append((
                name_,
                "No FTS job ID available for stuck transfers. Rucio could have failed to submit FTS job."
            ))

    # Filter out files already staged directly from the wn
    direct_files = []
    if os.path.exists('task_process/transfers/registered_direct_files.txt'):
        with open("task_process/transfers/registered_direct_files.txt",
                  "r") as list_file:
            direct_files = [x.split('\n')[0] for x in list_file.readlines()]
            log.debug(
                "Checking if some failed files were directly staged from wn: {0}"
                .format(str(direct_files)))
            list_failed = [x for x in list_failed if x[0] not in direct_files]
            log.debug("{0} files to be marked as failed.".format(
                str(len(list_failed))))

    try:

        oracleDB = HTTPRequests(rest_filetransfers, proxy, proxy)
    except Exception:
        log.exception("Failed to set connection to oracleDB")
        return

    # Mark FAILED files on the DB and remove them from dataset and rucio replicas
    try:
        if len(list_failed) > 0:
            list_failed_name = [{
                'scope': scope,
                'name': x[0]
            } for x in list_failed]
            log.debug("Detaching %s" % list_failed_name)
            crabInj.cli.detach_dids(scope, name, list_failed_name)
            sources = list(
                set([source_rse[x['name']] for x in list_failed_name]))
            for source in sources:
                to_delete = [
                    x for x in list_failed_name
                    if source_rse[x['name']] == source
                ]
                log.debug("Deleting %s from %s" % (to_delete, source))
                crabInj.delete_replicas(source, to_delete)
            mark_failed([id_map[x[0]] for x in list_failed],
                        [x[1] for x in list_failed], oracleDB)
    except ReplicaNotFound:
        try:
            mark_failed([id_map[x[0]] for x in list_failed],
                        [x[1] for x in list_failed], oracleDB)
        except Exception:
            log.exception("Failed to update status for failed files")
    except Exception:
        log.exception("Failed to update status for failed files")

    # Mark files of STUCK rules on the DB and remove them from dataset and rucio replicas
    try:
        if len(list_stuck) > 0:
            list_stuck_name = [{
                'scope': scope,
                'name': x[0]
            } for x in list_stuck]
            log.debug("Detaching %s" % list_stuck_name)
            crabInj.cli.detach_dids(scope, name, list_stuck_name)
            sources = list(
                set([source_rse[x['name']] for x in list_stuck_name]))
            for source in sources:
                to_delete = [
                    x for x in list_stuck_name
                    if source_rse[x['name']] == source
                ]
                log.debug("Deleting %s from %s" % (to_delete, source))
                crabInj.delete_replicas(source, to_delete)
            mark_failed([id_map[x[0]] for x in list_stuck],
                        [x[1] for x in list_stuck], oracleDB)
    except ReplicaNotFound:
        try:
            mark_failed([id_map[x[0]] for x in list_failed],
                        [x[1] for x in list_failed], oracleDB)
        except Exception:
            log.exception("Failed to update status for failed files")
    except Exception:
        log.exception("Failed to update status for stuck rule")

    # Mark successful transfers as done on oracle DB
    try:
        mark_transferred([id_map[x] for x in list_good], oracleDB)
    except Exception:
        log.exception("Failed to update status for transferred files")

    try:
        already_list = []
        list_update_filt = []

        # Keep track of what has been already marked. Avoiding double updates at next iteration
        if os.path.exists("task_process/transfers/submitted_files.txt"):
            with open("task_process/transfers/submitted_files.txt",
                      "r") as list_file:
                for _data in list_file.readlines():
                    already_list.append(_data.split("\n")[0])

        list_update_filt = [
            x for x in list_update
            if x not in already_list and x[0] not in direct_files
        ]

        # Insert FTS job ID in oracle DB for all the available tranfers
        if len(list_update_filt) > 0:
            list_update = list_update_filt
            fileDoc = dict()
            fileDoc['asoworker'] = 'rucio'
            fileDoc['subresource'] = 'updateTransfers'
            fileDoc['list_of_ids'] = [id_map[x[0]] for x in list_update]
            fileDoc['list_of_transfer_state'] = [
                "SUBMITTED" for _ in list_update
            ]
            fileDoc['list_of_fts_instance'] = [
                'https://fts3.cern.ch:8446/' for _ in list_update
            ]
            fileDoc['list_of_fts_id'] = [x[1] for x in list_update]
            oracleDB.post('/filetransfers', data=encodeRequest(fileDoc))
            log.debug("Marked submitted %s" %
                      [id_map[x[0]] for x in list_update])

            with open("task_process/transfers/submitted_files.txt",
                      "a+") as list_file:
                for update in list_update:
                    log.debug("{0}\n".format(str(update)))
                    list_file.write("{0}\n".format(str(update)))
        else:
            log.info("Nothing to update (fts job ID)")
    except Exception:
        log.exception('Failed to update file status for FTSJobID inclusion.')
Example #42
0
    def mark_failed(self, files=[], failures_reasons=[], force_fail=False):
        """
        Something failed for these files so increment the retry count
        """
        updated_lfn = []
        for lfn in files:
            data = {}
            self.logger.debug("Document: %s" % lfn)
            if not isinstance(lfn, dict):
                if 'temp' not in lfn:
                    temp_lfn = lfn.replace('store', 'store/temp', 1)
                else:
                    temp_lfn = lfn
            else:
                if 'temp' not in lfn['value']:
                    temp_lfn = lfn['value'].replace('store', 'store/temp', 1)
                else:
                    temp_lfn = lfn['value']
            docId = getHashLfn(temp_lfn)
            # Load document to get the retry_count
            if self.config.isOracle:
                try:
                    self.logger.debug("Document: %s" %docId)
                    docbyId = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers',
                                                                                    'fileusertransfers'),
                                                data=encodeRequest({'subresource': 'getById', 'id': docId}))
                    document = oracleOutputMapping(docbyId)[0]
                    data = dict()
                    data['asoworker'] = self.config.asoworker
                    data['subresource'] = 'updateTransfers'
                    data['list_of_ids'] = docId

                    if force_fail or document['transfer_retry_count'] + 1 > self.max_retry:
                        data['list_of_transfer_state'] = 'FAILED'
                        data['list_of_retry_value'] = 0
                    else:
                        data['list_of_transfer_state'] = 'RETRY'
                        fatal_error = self.determine_fatal_error(failures_reasons[files.index(lfn)])
                        if fatal_error:
                            data['list_of_transfer_state'] = 'FAILED'
                        
                    data['list_of_failure_reason'] = failures_reasons[files.index(lfn)]
                    data['list_of_retry_value'] = 0

                    self.logger.debug("update: %s" % data)
                    result = self.oracleDB.post(self.config.oracleFileTrans,
                                                data=encodeRequest(data))
                    if not data['list_of_transfer_state'] == 'RETRY':  
                        updated_lfn.append(lfn)
                    self.logger.debug("Marked failed %s" % lfn)
                except Exception as ex:
                    self.logger.error("Error updating document status: %s" %ex)
                    continue
            else:
                try:
                    document = self.db.document( docId )
                except Exception as ex:
                    msg = "Error loading document from couch"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    self.logger.error(msg)
                    continue
                if document['state'] != 'killed' and document['state'] != 'done' and document['state'] != 'failed':
                    now = str(datetime.datetime.now())
                    last_update = time.time()
                    # Prepare data to update the document in couch
                    if force_fail or len(document['retry_count']) + 1 > self.max_retry:
                        data['state'] = 'failed'
                        data['end_time'] = now
                    else:
                        data['state'] = 'retry'
                        fatal_error = self.determine_fatal_error(failures_reasons[files.index(lfn)])
                        if fatal_error:
                            data['state'] = 'failed'
                            data['end_time'] = now

                    self.logger.debug("Failure list: %s" % failures_reasons)
                    self.logger.debug("Files: %s" % files)
                    self.logger.debug("LFN %s" % lfn)

                    data['failure_reason'] = failures_reasons[files.index(lfn)]
                    data['last_update'] = last_update
                    data['retry'] = now
                    # Update the document in couch
                    self.logger.debug("Marking failed %s" % docId)
                    try:
                        updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + docId
                        updateUri += "?" + urllib.urlencode(data)
                        self.db.makeRequest(uri = updateUri, type = "PUT", decode = False)
                        updated_lfn.append(docId)
                        self.logger.debug("Marked failed %s" % docId)
                    except Exception as ex:
                        msg = "Error in updating document in couch"
                        msg += str(ex)
                        msg += str(traceback.format_exc())
                        self.logger.error(msg)
                        continue
                    try:
                        self.db.commit()
                    except Exception as ex:
                        msg = "Error commiting documents in couch"
                        msg += str(ex)
                        msg += str(traceback.format_exc())
                        self.logger.error(msg)
                        continue
                else: updated_lfn.append(docId)
        self.logger.debug("failed file updated")
        return updated_lfn
Example #43
0
    def active_users(self, db):
        """
        Query a view for users with files to transfer. Get this from the
        following view:
            publish?group=true&group_level=1
        """
        if self.config.isOracle:
            active_users = []

            fileDoc = {}
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'acquirePublication'

            self.logger.debug("Retrieving publications from oracleDB")

            results = ''
            try:
                results = db.post(self.config.oracleFileTrans,
                                  data=encodeRequest(fileDoc))
            except Exception as ex:
                self.logger.error("Failed to acquire publications \
                                  from oracleDB: %s" %ex)
                
            fileDoc = dict()
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'acquiredPublication'
            fileDoc['grouping'] = 0

            self.logger.debug("Retrieving acquired puclications from oracleDB")

            try:
                results = db.get(self.config.oracleFileTrans,
                                 data=encodeRequest(fileDoc))
                result = oracleOutputMapping(results)
            except Exception as ex:
                self.logger.error("Failed to acquire publications \
                                  from oracleDB: %s" %ex)

            self.logger.debug("%s acquired puclications retrieved" % len(result))
            #TODO: join query for publisher (same of submitter)
            unique_users = [list(i) for i in set(tuple([x['username'], x['user_group'], x['user_role']]) for x in result 
                                                 if x['transfer_state'] == 3)]
            return unique_users
        else:
            # TODO: Remove stale=ok for now until tested
            # query = {'group': True, 'group_level': 3, 'stale': 'ok'}
            query = {'group': True, 'group_level': 3}
            try:
                users = db.loadView('DBSPublisher', 'publish', query)
            except Exception as e:
                self.logger.exception('A problem occured \
                                      when contacting couchDB: %s' % e)
                return []

            if len(users['rows']) <= self.config.publication_pool_size:
                active_users = users['rows']
                active_users = [x['key'] for x in active_users]
            else:
                pool_size=self.config.publication_pool_size
                sorted_users = self.factory.loadObject(self.config.algoName,
                                                       args=[self.config,
                                                             self.logger,
                                                             users['rows'],
                                                             pool_size],
                                                       getFromCache=False,
                                                       listFlag = True)
                active_users = sorted_users()[:self.config.publication_pool_size]
            self.logger.info('%s active users' % len(active_users))
            self.logger.debug('Active users are: %s' % active_users)

            return active_users
Example #44
0
def publishInDBS3(taskname):
    """

    """
    def createLogdir(dirname):
        """
        Create the directory dirname ignoring erors in case it exists. Exit if
        the directory cannot be created.
        """
        try:
            os.mkdir(dirname)
        except OSError as ose:
            if ose.errno != 17: #ignore the "Directory already exists error"
                print(str(ose))
                print("The task worker need to access the '%s' directory" % dirname)
                sys.exit(1)

    createLogdir('taskLogs')
    logger = logging.getLogger(taskname)
    logging.basicConfig(filename='taskLogs/'+taskname+'.log', level=logging.INFO, format=config.General.logMsgFormat)

    logger.info("Getting files to publish")

    toPublish = []
    # TODO move from new to done when processed
    with open("/tmp/publisher_files/"+taskname+".json") as f:
        toPublish = json.load(f)

    workflow = taskname

    if len(toPublish) == 0:
        return "EMPTY"

    if not workflow:
        logger.info("NO TASKNAME: %s" % toPublish[0])
    for k, v in toPublish[0].iteritems():
        if k == 'taskname':
            logger.info("Starting: %s: %s" % (k, v))
    wfnamemsg = "%s: " % (workflow)

    user = toPublish[0]["User"]
    try:
        group = toPublish[0]["Group"]
        role = toPublish[0]["Role"]
    except:
        group = ""
        role = ""

    if not group or group in ['null']:
        group = ""
    if not role or role in ['null']:
        role = ""

    userDN = toPublish[0]["UserDN"]
    pnn = toPublish[0]["Destination"]
    logger.info(wfnamemsg+" "+user)

    READ_PATH = "/DBSReader"
    READ_PATH_1 = "/DBSReader/"

    # TODO: get user role and group
    try:
        proxy = Proxy(userDN, group, role, logger)
    except:
        logger.exception("Failed to retrieve user proxy")
        return "FAILED"

    oracelInstance = config.General.oracleDB
    oracleDB = HTTPRequests(oracelInstance,
                            proxy,
                            proxy)

    fileDoc = dict()
    fileDoc['subresource'] = 'search'
    fileDoc['workflow'] = workflow

    try:
        results = oracleDB.get(task_path,
                               data=encodeRequest(fileDoc))
    except Exception as ex:
        logger.error("Failed to get acquired publications from oracleDB for %s: %s" % (workflow, ex))
        return "FAILED"

    logger.info(results[0]['desc']['columns'])

    try:
        inputDatasetIndex = results[0]['desc']['columns'].index("tm_input_dataset")
        inputDataset = results[0]['result'][inputDatasetIndex]
        sourceURLIndex = results[0]['desc']['columns'].index("tm_dbs_url")
        sourceURL = results[0]['result'][sourceURLIndex]
        publish_dbs_urlIndex = results[0]['desc']['columns'].index("tm_publish_dbs_url")
        publish_dbs_url = results[0]['result'][publish_dbs_urlIndex]

        #sourceURL = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader"
        if not sourceURL.endswith(READ_PATH) and not sourceURL.endswith(READ_PATH_1):
            sourceURL += READ_PATH
    except Exception:
        logger.exception("ERROR")
    # When looking up parents may need to look in global DBS as well.
    globalURL = sourceURL
    globalURL = globalURL.replace('phys01', 'global')
    globalURL = globalURL.replace('phys02', 'global')
    globalURL = globalURL.replace('phys03', 'global')
    globalURL = globalURL.replace('caf', 'global')

    pr = os.environ.get("SOCKS5_PROXY")
    logger.info(wfnamemsg+"Source API URL: %s" % sourceURL)
    sourceApi = dbsClient.DbsApi(url=sourceURL, proxy=pr)
    logger.info(wfnamemsg+"Global API URL: %s" % globalURL)
    globalApi = dbsClient.DbsApi(url=globalURL, proxy=pr)

    WRITE_PATH = "/DBSWriter"
    MIGRATE_PATH = "/DBSMigrate"
    READ_PATH = "/DBSReader"

    if publish_dbs_url.endswith(WRITE_PATH):
        publish_read_url = publish_dbs_url[:-len(WRITE_PATH)] + READ_PATH
        publish_migrate_url = publish_dbs_url[:-len(WRITE_PATH)] + MIGRATE_PATH
    else:
        publish_migrate_url = publish_dbs_url + MIGRATE_PATH
        publish_read_url = publish_dbs_url + READ_PATH
        publish_dbs_url += WRITE_PATH

    try:
        logger.debug(wfnamemsg+"Destination API URL: %s" % publish_dbs_url)
        destApi = dbsClient.DbsApi(url=publish_dbs_url, proxy=pr)
        logger.debug(wfnamemsg+"Destination read API URL: %s" % publish_read_url)
        destReadApi = dbsClient.DbsApi(url=publish_read_url, proxy=pr)
        logger.debug(wfnamemsg+"Migration API URL: %s" % publish_migrate_url)
        migrateApi = dbsClient.DbsApi(url=publish_migrate_url, proxy=pr)
    except:
        logger.exception('Wrong DBS URL %s' % publish_dbs_url)
        return "FAILED"

    logger.info("inputDataset: %s" % inputDataset)
    noInput = len(inputDataset.split("/")) <= 3

    # TODO: fix dbs dep
    if not noInput:
        try:
            existing_datasets = sourceApi.listDatasets(dataset=inputDataset, detail=True, dataset_access_type='*')
            primary_ds_type = existing_datasets[0]['primary_ds_type']
            # There's little chance this is correct, but it's our best guess for now.
            # CRAB2 uses 'crab2_tag' for all cases
            existing_output = destReadApi.listOutputConfigs(dataset=inputDataset)
        except:
            logger.exception('Wrong DBS URL %s' % publish_dbs_url)
            return "FAILED"
        if not existing_output:
            msg = "Unable to list output config for input dataset %s." % (inputDataset)
            logger.error(wfnamemsg+msg)
            global_tag = 'crab3_tag'
        else:
            global_tag = existing_output[0]['global_tag']
    else:
        msg = "This publication appears to be for private MC."
        logger.info(wfnamemsg+msg)
        primary_ds_type = 'mc'
        global_tag = 'crab3_tag'

    acquisition_era_name = "CRAB"
    processing_era_config = {'processing_version': 1, 'description': 'CRAB3_processing_era'}

    appName = 'cmsRun'
    appVer = toPublish[0]["swversion"]
    pset_hash = toPublish[0]['publishname'].split("-")[-1]
    gtag = str(toPublish[0]['globaltag'])
    if gtag == "None":
        gtag = global_tag
    try:
        if toPublish[0]['acquisitionera'] and not toPublish[0]['acquisitionera'] in ["null"]:
            acquisitionera = str(toPublish[0]['acquisitionera'])
        else:
            acquisitionera = acquisition_era_name
    except:
        acquisitionera = acquisition_era_name

    _, primName, procName, tier = toPublish[0]['outdataset'].split('/')

    primds_config = {'primary_ds_name': primName, 'primary_ds_type': primary_ds_type}
    msg = "About to insert primary dataset: %s" % (str(primds_config))
    logger.debug(wfnamemsg+msg)
    destApi.insertPrimaryDataset(primds_config)
    msg = "Successfully inserted primary dataset %s." % (primName)
    logger.debug(wfnamemsg+msg)

    final = {}
    failed = []
    publish_in_next_iteration = []
    published = []

    dataset = toPublish[0]['outdataset']
    # Find all (valid) files already published in this dataset.
    try:
        existingDBSFiles = destReadApi.listFiles(dataset=dataset, detail=True)
        existingFiles = [f['logical_file_name'] for f in existingDBSFiles]
        existingFilesValid = [f['logical_file_name'] for f in existingDBSFiles if f['is_file_valid']]
        msg = "Dataset %s already contains %d files" % (dataset, len(existingFiles))
        msg += " (%d valid, %d invalid)." % (len(existingFilesValid), len(existingFiles) - len(existingFilesValid))
        logger.info(wfnamemsg+msg)
        final['existingFiles'] = len(existingFiles)
    except Exception as ex:
        msg = "Error when listing files in DBS: %s" % (str(ex))
        msg += "\n%s" % (str(traceback.format_exc()))
        logger.error(wfnamemsg+msg)
        return "FAILED"

    # check if actions are needed
    workToDo = False

    for fileTo in toPublish:
        #print(existingFilesValid)
        if fileTo['lfn'] not in existingFilesValid:
            workToDo = True
            break

    if not workToDo:
        msg = "Nothing uploaded, %s has these files already or not enough files." % (dataset)
        logger.info(wfnamemsg+msg)
        return "NOTHING TO DO"

    acquisition_era_config = {'acquisition_era_name': acquisitionera, 'start_date': 0}

    output_config = {'release_version': appVer,
                     'pset_hash': pset_hash,
                     'app_name': appName,
                     'output_module_label': 'o',
                     'global_tag': global_tag,
                    }
    msg = "Published output config."
    logger.debug(wfnamemsg+msg)

    dataset_config = {'dataset': dataset,
                      'processed_ds_name': procName,
                      'data_tier_name': tier,
                      'acquisition_era_name': acquisitionera,
                      'dataset_access_type': 'VALID',
                      'physics_group_name': 'CRAB3',
                      'last_modification_date': int(time.time()),
                     }
    msg = "About to insert dataset: %s" % (str(dataset_config))
    logger.info(wfnamemsg+msg)
    del dataset_config['acquisition_era_name']

    # List of all files that must (and can) be published.
    dbsFiles = []
    dbsFiles_f = []
    # Set of all the parent files from all the files requested to be published.
    parentFiles = set()
    # Set of parent files for which the migration to the destination DBS instance
    # should be skipped (because they were not found in DBS).
    parentsToSkip = set()
    # Set of parent files to migrate from the source DBS instance
    # to the destination DBS instance.
    localParentBlocks = set()
    # Set of parent files to migrate from the global DBS instance
    # to the destination DBS instance.
    globalParentBlocks = set()

    # Loop over all files to publish.
    for file_ in toPublish:
        logger.info(file_)
        # Check if this file was already published and if it is valid.
        if file_['lfn'] not in existingFilesValid:
            # We have a file to publish.
            # Get the parent files and for each parent file do the following:
            # 1) Add it to the list of parent files.
            # 2) Find the block to which it belongs and insert that block name in
            #    (one of) the set of blocks to be migrated to the destination DBS.
            for parentFile in list(file_['parents']):
                if parentFile not in parentFiles:
                    parentFiles.add(parentFile)
                    # Is this parent file already in the destination DBS instance?
                    # (If yes, then we don't have to migrate this block.)
                    blocksDict = destReadApi.listBlocks(logical_file_name=parentFile)
                    if not blocksDict:
                        # No, this parent file is not in the destination DBS instance.
                        # Maybe it is in the same DBS instance as the input dataset?
                        blocksDict = sourceApi.listBlocks(logical_file_name=parentFile)
                        if blocksDict:
                            # Yes, this parent file is in the same DBS instance as the input dataset.
                            # Add the corresponding block to the set of blocks from the source DBS
                            # instance that have to be migrated to the destination DBS.
                            localParentBlocks.add(blocksDict[0]['block_name'])
                        else:
                            # No, this parent file is not in the same DBS instance as input dataset.
                            # Maybe it is in global DBS instance?
                            blocksDict = globalApi.listBlocks(logical_file_name=parentFile)
                            if blocksDict:
                                # Yes, this parent file is in global DBS instance.
                                # Add the corresponding block to the set of blocks from global DBS
                                # instance that have to be migrated to the destination DBS.
                                globalParentBlocks.add(blocksDict[0]['block_name'])
                    # If this parent file is not in the destination DBS instance, is not
                    # the source DBS instance, and is not in global DBS instance, then it
                    # means it is not known to DBS and therefore we can not migrate it.
                    # Put it in the set of parent files for which migration should be skipped.
                    if not blocksDict:
                        parentsToSkip.add(parentFile)
                # If this parent file should not be migrated because it is not known to DBS,
                # we remove it from the list of parents in the file-to-publish info dictionary
                # (so that when publishing, this "parent" file will not appear as a parent).
                if parentFile in parentsToSkip:
                    msg = "Skipping parent file %s, as it doesn't seem to be known to DBS." % (parentFile)
                    logger.info(wfnamemsg+msg)
                    if parentFile in file_['parents']:
                        file_['parents'].remove(parentFile)
            # Add this file to the list of files to be published.
            dbsFiles.append(format_file_3(file_))
            dbsFiles_f.append(file_)
        #print file
        published.append(file_['SourceLFN'])
        #published.append(file_['lfn'].replace("/store","/store/temp"))

    # Print a message with the number of files to publish.
    msg = "Found %d files not already present in DBS which will be published." % (len(dbsFiles))
    logger.info(wfnamemsg+msg)

    # If there are no files to publish, continue with the next dataset.
    if len(dbsFiles_f) == 0:
        msg = "Nothing to do for this dataset."
        logger.info(wfnamemsg+msg)
        return "NOTHING TO DO"

    # Migrate parent blocks before publishing.
    # First migrate the parent blocks that are in the same DBS instance
    # as the input dataset.
    if localParentBlocks:
        msg = "List of parent blocks that need to be migrated from %s:\n%s" % (sourceApi.url, localParentBlocks)
        logger.info(wfnamemsg+msg)
        statusCode, failureMsg = migrateByBlockDBS3(workflow,
                                                    migrateApi,
                                                    destReadApi,
                                                    sourceApi,
                                                    inputDataset,
                                                    localParentBlocks
                                                   )
        if statusCode:
            failureMsg += " Not publishing any files."
            logger.info(wfnamemsg+failureMsg)
            failed.extend([f['SourceLFN'] for f in dbsFiles_f])
            #failed.extend([f['lfn'].replace("/store","/store/temp") for f in dbsFiles_f])
            failure_reason = failureMsg
            published = [x for x in published[dataset] if x not in failed[dataset]]
            return "NOTHING TO DO"
    # Then migrate the parent blocks that are in the global DBS instance.
    if globalParentBlocks:
        msg = "List of parent blocks that need to be migrated from %s:\n%s" % (globalApi.url, globalParentBlocks)
        logger.info(wfnamemsg+msg)
        statusCode, failureMsg = migrateByBlockDBS3(workflow, migrateApi, destReadApi, globalApi, inputDataset, globalParentBlocks)
        if statusCode:
            failureMsg += " Not publishing any files."
            logger.info(wfnamemsg+failureMsg)
            failed.extend([f['SourceLFN'] for f in dbsFiles_f])
            #failed.extend([f['lfn'].replace("/store","/store/temp") for f in dbsFiles_f])
            failure_reason = failureMsg
            published = [x for x in published[dataset] if x not in failed[dataset]]
            return "NOTHING TO DO"
    # Publish the files in blocks. The blocks must have exactly max_files_per_block
    # files, unless there are less than max_files_per_block files to publish to
    # begin with. If there are more than max_files_per_block files to publish,
    # publish as many blocks as possible and leave the tail of files for the next
    # PublisherWorker call, unless forced to published.
    block_count = 0
    count = 0
    max_files_per_block = config.General.max_files_per_block
    while True:
        block_name = "%s#%s" % (dataset, str(uuid.uuid4()))
        files_to_publish = dbsFiles[count:count+max_files_per_block]
        try:
            block_config = {'block_name': block_name, 'origin_site_name': pnn, 'open_for_writing': 0}
            msg = "Inserting files %s into block %s." % ([f['logical_file_name']
                                                          for f in files_to_publish], block_name)
            logger.info(wfnamemsg+msg)
            blockDump = createBulkBlock(output_config, processing_era_config,
                                        primds_config, dataset_config,
                                        acquisition_era_config, block_config, files_to_publish)
            #logger.debug(wfnamemsg+"Block to insert: %s\n %s" % (blockDump, destApi.__dict__ ))

            destApi.insertBulkBlock(blockDump)
            block_count += 1
        except Exception as ex:
            #logger.error("Error for files: %s" % [f['SourceLFN'] for f in toPublish])
            logger.error("Error for files: %s" % [f['lfn'] for f in toPublish])
            failed.extend([f['SourceLFN'] for f in toPublish])
            #failed.extend([f['lfn'].replace("/store","/store/temp") for f in toPublish])
            msg = "Error when publishing (%s) " % ", ".join(failed)
            msg += str(ex)
            msg += str(traceback.format_exc())
            logger.error(wfnamemsg+msg)
            failure_reason = str(ex)
            file='/tmp/failed-block-at-%s.txt' % time.time()
            with open(file,'write') as fd:
                fd.write(blockDump)
            logger.error("FAILING BLOCK SAVED AS %s" % file)
        count += max_files_per_block
        files_to_publish_next = dbsFiles_f[count:count+max_files_per_block]
        if len(files_to_publish_next) < max_files_per_block:
            publish_in_next_iteration.extend([f["SourceLFN"] for f in files_to_publish_next])
            #publish_in_next_iteration.extend([f["lfn"].replace("/store","/store/temp") for f in files_to_publish_next])
            break
    published = [x for x in published if x not in failed + publish_in_next_iteration]
    # Fill number of files/blocks published for this dataset.
    final['files'] = len(dbsFiles) - len(failed) - len(publish_in_next_iteration)
    final['blocks'] = block_count
    # Print a publication status summary for this dataset.
    msg = "End of publication status for dataset %s:" % (dataset)
    msg += " failed (%s) %s" % (len(failed), failed)
    msg += ", published (%s) %s" % (len(published), published)
    msg += ", publish_in_next_iteration (%s) %s" % (len(publish_in_next_iteration),
                                                    publish_in_next_iteration)
    msg += ", results %s" % (final)
    logger.info(wfnamemsg+msg)

    try:
        if published:
            mark_good(workflow, published, oracleDB, logger)
        if failed:
            logger.debug("Failed files: %s " % failed)
            mark_failed(failed, oracleDB, logger, failure_reason)
    except:
        logger.exception("Status update failed")

    return 0
Example #45
0
    def mark_failed(self, files=[], force_fail=False, submission_error=False):
        """
        Something failed for these files so increment the retry count
        """
        updated_lfn = []
        for lfn in files:
            data = {}
            if not isinstance(lfn, dict):
                if 'temp' not in lfn:
                    temp_lfn = lfn.replace('store', 'store/temp', 1)
                else:
                    temp_lfn = lfn
            else:
                if 'temp' not in lfn['value'][0]:
                    temp_lfn = lfn['value'][0].replace('store', 'store/temp', 1)
                else:
                    temp_lfn = lfn['value'][0]

            # Load document and get the retry_count
            if self.config.isOracle:
                docId = getHashLfn(temp_lfn)
                self.logger.debug("Marking failed %s" % docId)
                try:
                    docbyId = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers',
                                                                                    'fileusertransfers'),
                                                data=encodeRequest({'subresource': 'getById', 'id': docId}))
                except Exception as ex:
                    self.logger.error("Error updating failed docs: %s" %ex)
                    continue
                document = oracleOutputMapping(docbyId, None)[0]
                self.logger.debug("Document: %s" % document)

                fileDoc = dict()
                fileDoc['asoworker'] = self.config.asoworker
                fileDoc['subresource'] = 'updateTransfers'
                fileDoc['list_of_ids'] = docId 

                if force_fail or document['transfer_retry_count'] + 1 > self.max_retry:
                    fileDoc['list_of_transfer_state'] = 'FAILED'
                    fileDoc['list_of_retry_value'] = 1
                else:
                    fileDoc['list_of_transfer_state'] = 'RETRY'
                if submission_error:
                    fileDoc['list_of_failure_reason'] = "Job could not be submitted to FTS: temporary problem of FTS"
                    fileDoc['list_of_retry_value'] = 1
                elif not self.valid_proxy:
                    fileDoc['list_of_failure_reason'] = "Job could not be submitted to FTS: user's proxy expired"
                    fileDoc['list_of_retry_value'] = 1
                else:
                    fileDoc['list_of_failure_reason'] = "Site config problem."
                    fileDoc['list_of_retry_value'] = 1

                self.logger.debug("update: %s" % fileDoc)
                try:
                    updated_lfn.append(docId)
                    result = self.oracleDB.post(self.config.oracleFileTrans,
                                         data=encodeRequest(fileDoc))
                except Exception as ex:
                    msg = "Error updating document"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    self.logger.error(msg)
                    continue

            else:
                docId = getHashLfn(temp_lfn)
                try:
                    document = self.db.document(docId)
                except Exception as ex:
                    msg = "Error loading document from couch"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    self.logger.error(msg)
                    continue
                if document['state'] != 'killed' and document['state'] != 'done' and document['state'] != 'failed':
                    now = str(datetime.datetime.now())
                    last_update = time.time()
                    # Prepare data to update the document in couch
                    if force_fail or len(document['retry_count']) + 1 > self.max_retry:
                        data['state'] = 'failed'
                    else:
                        data['state'] = 'retry'
                    if submission_error:
                        data['failure_reason'] = "Job could not be submitted to FTS: temporary problem of FTS"
                    elif not self.valid_proxy:
                        data['failure_reason'] = "Job could not be submitted to FTS: user's proxy expired"
                    else:
                        data['failure_reason'] = "Site config problem."
                    data['last_update'] = last_update
                    data['retry'] = now

                    # Update the document in couch
                    self.logger.debug("Marking failed %s" % docId)
                    try:
                        updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + docId
                        updateUri += "?" + urllib.urlencode(data)
                        self.db.makeRequest(uri=updateUri, type="PUT", decode=False)
                        updated_lfn.append(docId)
                        self.logger.debug("Marked failed %s" % docId)
                    except Exception as ex:
                        msg = "Error in updating document in couch"
                        msg += str(ex)
                        msg += str(traceback.format_exc())
                        self.logger.error(msg)
                        continue
                    try:
                        self.db.commit()
                    except Exception as ex:
                        msg = "Error commiting documents in couch"
                        msg += str(ex)
                        msg += str(traceback.format_exc())
                        self.logger.error(msg)
                        continue
            self.logger.debug("failed file updated")
            return updated_lfn
Example #46
0
    def killThread(self, thread_id, transfers):
        """This is the worker thread function for kill command.
        """
        while True:
            transfer_list = transfers.get()
            self.logger.info("Starting thread %s" % (thread_id))
            user = transfer_list[0]['username']
            group = transfer_list[0]['user_group']
            role = transfer_list[0]['user_role']

            uiSetupScript = getattr(self.config, 'UISetupScript', None)

            self.logger.debug("Trying to get DN for %s %s %s %s" % (user, self.logger, self.config.opsProxy, self.config.opsProxy))
            try:
                userDN = getDNFromUserName(user, self.logger, ckey=self.config.opsProxy, cert=self.config.opsProxy)
            except Exception as ex:
                msg = "Error retrieving the user DN"
                msg += str(ex)
                msg += str(traceback.format_exc())
                self.logger.error(msg)
                continue
            if not userDN:
                transfers.task_done()
                time.sleep(1)
                continue
            self.logger.debug("user DN: %s" % userDN)

            try:
                defaultDelegation = {'logger': self.logger,
                                     'credServerPath': self.config.credentialDir,
                                     'myProxySvr': 'myproxy.cern.ch',
                                     'min_time_left': getattr(self.config, 'minTimeLeft', 36000),
                                     'serverDN': self.config.serverDN,
                                     'uisource': uiSetupScript,
                                     'cleanEnvironment': getattr(self.config, 'cleanEnvironment', False)}
                if hasattr(self.config, "cache_area"):
                    cache_area = self.config.cache_area
                    defaultDelegation['myproxyAccount'] = re.compile('https?://([^/]*)/.*').findall(cache_area)[0]
            except IndexError:
                self.logger.error('MyproxyAccount parameter cannot be retrieved from %s . ' % self.config.cache_area)
                transfers.task_done()
                time.sleep(1)
                continue
            if getattr(self.config, 'serviceCert', None):
                defaultDelegation['server_cert'] = self.config.serviceCert
            if getattr(self.config, 'serviceKey', None):
                defaultDelegation['server_key'] = self.config.serviceKey
            try:
                defaultDelegation['userDN'] = userDN
                defaultDelegation['group'] = group if group else ''
                defaultDelegation['role'] = role if group else ''
                self.logger.debug('delegation: %s' % defaultDelegation)
                valid_proxy, user_proxy = getProxy(defaultDelegation, self.logger)
            except Exception as ex:
                msg = "Error getting the user proxy"
                msg += str(ex)
                msg += str(traceback.format_exc())
                self.logger.error(msg)
                transfers.task_done()
                time.sleep(1)
                continue

            # TODO: take server from db, right now, take only the first of the list and assuming it valid for all
            try:
                # TODO: debug u added during info upload. To be fixed soon! For now worked around
                fts_server = transfer_list[0]['fts_instance'].split('u')[1]
                self.logger.info("Delegating proxy to %s" % fts_server)
                context = fts3.Context(fts_server, user_proxy, user_proxy, verify=True)
                self.logger.debug(fts3.delegate(context, lifetime=timedelta(hours=48), force=False))

                self.logger.info("Proxy delegated. Grouping files by jobId")
                jobs = {}
                for fileToKill in transfer_list:
                    # TODO: debug u added during info upload. To be fixed soon! For now worked around
                    jid = str(fileToKill['fts_id']).split('u')[1]
                    if jid not in jobs:
                        jobs[jid] = []
                    jobs[jid].append(fileToKill)

                self.logger.info("Found %s jobIds", len(jobs.keys()))
                self.logger.debug("jobIds: %s", jobs.keys)

                # list for files killed or failed to
                killed = []
                too_late = []

                for ftsJobId, files in jobs.iteritems():
                    self.logger.info("Cancelling tranfers in %s" % ftsJobId)

                    ref_lfns = [str(x['destination_lfn'].split('/store/')[1]) for x in files]
                    source_lfns = [x['source_lfn'] for x in files]

                    job_list = fts3.get_job_status(context, ftsJobId, list_files=True)
                    tx = job_list['files']

                    # TODO: this workaround is needed to get FTS file id, we may want to add a column in the db?
                    idListToKill = [x['file_id'] for x in tx
                                    if x['dest_surl'].split('/cms/store/')[1] in ref_lfns]

                    # needed for the state update
                    lfnListToKill = [ref_lfns.index(str(x['dest_surl'].split('/cms/store/')[1])) for x in tx
                                       if x['dest_surl'].split('/cms/store/')[1] in ref_lfns]

                    self.logger.debug("List of ids to cancel for job %s: %s" % (ftsJobId, idListToKill))
                    res = fts3.cancel(context, ftsJobId, idListToKill)
                    self.logger.debug('Kill command result: %s' % json.dumps(res))

                    if not isinstance(res, list):
                        res = [res]

                    # Verify if the kill command succeeded
                    for k, kill_res in enumerate(res):
                        indexToUpdate = lfnListToKill[k]
                        if kill_res in ("FINISHEDDIRTY", "FINISHED", "FAILED"):
                            self.logger.debug(source_lfns[indexToUpdate])
                            too_late.append(getHashLfn(source_lfns[indexToUpdate]))
                        else:
                            killed.append(getHashLfn(source_lfns[indexToUpdate]))

                # TODO: decide how to update status for too_late files
                killed += too_late
                self.logger.debug('Updating status of killed files: %s' % killed)

                if len(killed) > 0:
                    data = dict()
                    data['asoworker'] = self.config.asoworker
                    data['subresource'] = 'updateTransfers'
                    data['list_of_ids'] = killed
                    data['list_of_transfer_state'] = ["KILLED" for _ in killed]
                    self.oracleDB.post(self.config.oracleFileTrans,
                                       data=encodeRequest(data))
                    self.logger.debug("Marked killed %s" % killed)
            except:
                # TODO: split and improve try/except
                self.logger.exception('Kill command failed')

            transfers.task_done()
Example #47
0
    def mark_good(self, files):
        """
        Mark the list of files as tranferred
        """
        updated_lfn = []
        good_ids = []
        if len(files) == 0:
            return updated_lfn
        for it, lfn in enumerate(files):
            hash_lfn = getHashLfn(lfn)
            self.logger.info("Marking good %s" % hash_lfn)
            self.logger.debug("Marking good %s" % lfn)
            if not self.config.isOracle:
                try:
                    document = self.db.document(hash_lfn)
                except Exception as ex:
                    msg = "Error loading document from couch"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    self.logger.error(msg)
                    continue
            self.logger.info("Doc %s Loaded" % hash_lfn)
            try:
                now = str(datetime.datetime.now())
                last_update = time.time()
                if self.config.isOracle:
                    docId = getHashLfn(lfn)
                    good_ids.append(docId)
                    updated_lfn.append(lfn)
                else:
                    if document['state'] != 'killed' and document['state'] != 'done' and document['state'] != 'failed':
                        outputLfn = document['lfn'].replace('store/temp', 'store', 1)
                        data = dict()
                        data['end_time'] = now
                        data['state'] = 'done'
                        data['lfn'] = outputLfn
                        data['last_update'] = last_update
                        updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + getHashLfn(lfn)
                        updateUri += "?" + urllib.urlencode(data)
                        self.db.makeRequest(uri = updateUri, type = "PUT", decode = False)
                        updated_lfn.append(lfn)
                        self.logger.debug("Marked good %s" % lfn)
                    else: 
                        updated_lfn.append(lfn)
                    try:
                        self.db.commit()
                    except Exception as ex:
                        msg = "Error commiting documents in couch"
                        msg += str(ex)
                        msg += str(traceback.format_exc())
                        self.logger.error(msg)
                        continue   
            except Exception as ex:
                msg = "Error updating document"
                msg += str(ex)
                msg += str(traceback.format_exc())
                self.logger.error(msg)
                continue
        if self.config.isOracle:
            try:
                data = dict()
                data['asoworker'] = self.config.asoworker
                data['subresource'] = 'updateTransfers'
                data['list_of_ids'] = good_ids
                data['list_of_transfer_state'] = ["DONE" for x in good_ids]
                result = self.oracleDB.post(self.config.oracleFileTrans,
                                            data=encodeRequest(data))
                self.logger.debug("Marked good %s" % good_ids)
            except Exception:
                self.logger.exception('Error updating document')
                return {}
        
        self.logger.info("Transferred file %s updated, removing now source file" %docId)
        try:
            docbyId = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers','fileusertransfers'),
                                        data=encodeRequest({'subresource': 'getById', 'id': docId}))
            document = oracleOutputMapping(docbyId, None)[0]
        except Exception:
            msg = "Error getting file from source"
            self.logger.exception(msg)
            return {}

        if document["source"] not in self.site_tfc_map:
            self.logger.debug("site not found... gathering info from phedex")
            self.site_tfc_map[document["source"]] = self.get_tfc_rules(document["source"])
        pfn = self.apply_tfc_to_lfn( '%s:%s' %(document["source"], lfn))
        self.logger.debug("File has to be removed now from source site: %s" %pfn)
        self.remove_files(self.userProxy, pfn)
        self.logger.debug("Transferred file removed from source")
        return updated_lfn
    def testFileTransferPUT(self):
        """
        _testFileTransferPUT_

        Just test simple testFileTransferPUT with fake data
        """
        # We just sent fake data which is not monitored by dashboard.
        # Also only the first time to decide is publication ON or NOT
        for user in self.users:
            timestamp = time.strftime('%y%m%d_%H%M%S', time.gmtime())
            for i in range(self.totalFiles):
                now = int(time.time())
                # Generate a taskname
                workflowName = ""
                taskname = ""
                if user not in self.tasks:
                    workflowName = "".join([random.choice(string.ascii_lowercase) for _ in range(20)]) + "_" + str(now)
                    publicationState = random.choice(['NEW', 'NOT_REQUIRED'])
                else:
                    workflowName = self.tasks[user]['workflowName']
                    publicationState = self.tasks[user]['publication']
                transferState = random.choice(['NEW', 'DONE'])
                taskname = generateTaskName(user, workflowName, timestamp)
                finalLfn = self.lfnBase % (user, workflowName, i, random.randint(1, 9999))
                idHash = getHashLfn(finalLfn)
                self.fileDoc['id'] = idHash
                self.fileDoc['job_id'] = i
                self.fileDoc['username'] = user
                self.fileDoc['taskname'] = taskname
                self.fileDoc['start_time'] = int(time.time())
                self.fileDoc['source_lfn'] = finalLfn
                self.fileDoc['destination_lfn'] = finalLfn
                self.fileDoc['transfer_state'] = transferState
                self.fileDoc['publication_state'] = publicationState
                print(self.fileDoc)
                self.server.put('/crabserver/dev/fileusertransfers', data=encodeRequest(self.fileDoc))
                # if I will put the same doc twice, it should raise an error.
                # self.server.put('/crabserver/dev/fileusertransfers', data=urllib.urlencode(self.fileDoc))
                # This tasks are for the future and next calls
                if user not in self.tasks:
                    self.tasks[user] = {'workflowName': workflowName, 'taskname': taskname, 'listOfIds': [],
                                        'publication': publicationState, 'toTransfer': 0, 'toPublish': 0, 'total': self.totalFiles}
                if self.tasks[user]['publication'] == 'NEW':
                    self.tasks[user]['toPublish'] += 1
                if transferState == 'NEW':
                    self.tasks[user]['toTransfer'] += 1
                self.tasks[user]['listOfIds'].append(idHash)
        # This should raise an error
        for username in self.tasks:
            taskname = self.tasks[username]['taskname']
            for query in ['getTransferStatus', 'getPublicationStatus']:
                result = self.server.get('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': query,
                                                                                    'username': username,
                                                                                    'taskname': taskname}))
                print(result)
                print(result[0]['result'])
                taskInfoDict = oracleOutputMapping(result, 'id')
                print(taskInfoDict)
                for key, docDict in taskInfoDict.items():
                    result = self.server.get('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': 'getById', 'id': key}))

        randomUsers = random.sample(set(self.users), 3)  # Take half of the users and kill their transfers for specific task
        for username in randomUsers:
            taskname = self.tasks[username]['taskname']
            result = self.server.post('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': 'killTransfers',
                                                                                                  'username': username,
                                                                                                  'taskname': taskname}))
            print(result)
        # oneUser is left for killing a list of IDs
        # leftUsers will be killing transfers one by one for specific id.
        leftUsers = list(set(self.users) - set(randomUsers))
        oneUser = random.sample(set(leftUsers), 1)
        leftUsers = list(set(leftUsers) - set(oneUser))
        for username in leftUsers:
            # First get all left ids for this users
            result = self.server.get('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': 'getTransferStatus',
                                                                                                 'username': username,
                                                                                                 'taskname': self.tasks[username]['taskname']}))
            resultOut = oracleOutputMapping(result, None)
            print("**"*50)
            for outDict in resultOut:
                print(outDict)
                result = self.server.post('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': 'killTransfersById',
                                                                                                      'username': username,
                                                                                                      'listOfIds': outDict['id']}))
                print(result)
            print(resultOut)
            print(result)
        for username in oneUser:
            result = self.server.post('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': 'killTransfersById',
                                                                                               'username': username,
                                                                                               'listOfIds': self.tasks[username]['listOfIds']}, ['listOfIds']))
            # As it asks to kill all which are in new, need to double check what we submitted before and if the output of killed is correct
            print(result)
            print(self.tasks[username])
Example #49
0
def submit(trans_tuple, job_data, log, direct=False):
    """Manage threads for transfers submission through Rucio

    :param trans_tuple: ordered list of needed xfer info (transfers, to_submit_columns)
    :type trans_tuple: tuple
    :param job_data: general CRAB job metadata
    :type job_data: dict
    :param log: log object
    :type log: logging
    :param direct: job output stored on temp or directly, defaults to False
    :param direct: bool, optional
    """
    threadLock = threading.Lock()
    threads = []
    to_update = []

    toTrans = trans_tuple[0]
    columns = trans_tuple[1]
    proxy = job_data['proxy']
    #rest_filetransfers = job_data['rest']
    crabserver = job_data['crabserver']
    user = job_data['username']
    destination = job_data['destination']
    taskname = job_data['taskname']
    scope = 'user.' + user

    try:
        os.environ["X509_USER_PROXY"] = proxy
        log.info("Initializing Rucio client for %s", taskname)
        crabInj = CRABDataInjector(taskname,
                                    destination,
                                    account=user,
                                    scope=scope,
                                    auth_type='x509_proxy')
    except Exception as ex:
        log.error("Failed to load RUCIO client: %s", ex)
        raise ex

    # Split threads by source RSEs
    sources = list(set([x[columns.index('source')] for x in toTrans]))

    os.environ["X509_CERT_DIR"] = os.getcwd()

    # mapping lfn <--> pfn
    for source in sources:

        ids = [x[columns.index('id')] for x in toTrans if x[columns.index('source')] == source]
        src_lfns = [x[columns.index('source_lfn')] for x in toTrans if x[columns.index('source')] == source]
        dst_lfns = [x[columns.index('destination_lfn')] for x in toTrans if x[columns.index('source')] == source]

        sorted_source_pfns = []
        sorted_dest_lfns = []
        sorted_dest_pfns = []

        # workaround for phedex.getPFN issue --> shuffling output order w.r.t. the list in input
        try:
            for chunk in chunks(src_lfns, 10):
                unsorted_source_pfns = [[k.split(scope+":")[1], str(x)] for k, x in crabInj.cli.lfns2pfns(source, [scope + ":" + y for y in chunk]).items()]
                #log.info(unsorted_source_pfns)
                for order_lfn in chunk:
                    for lfn, pfn in unsorted_source_pfns:
                        if order_lfn == lfn:
                            sorted_source_pfns.append(pfn)
                            break

            for chunk in chunks(dst_lfns, 10):
                unsorted_dest_pfns = [[k.split(scope+":")[1], str(x)] for k, x in  crabInj.cli.lfns2pfns(toTrans[0][4],  [scope + ":" + y for y in chunk]).items()]
                #log.info(unsorted_dest_pfns)
                for order_lfn in chunk:
                    for lfn, pfn in unsorted_dest_pfns:
                        if order_lfn == lfn:
                            sorted_dest_pfns.append(pfn)
                            sorted_dest_lfns.append(lfn)
                            break

        except Exception as ex:
            log.error("Failed to map lfns to pfns: %s", ex)
            mark_failed(ids, ["Failed to map lfn to pfn: " + str(ex) for _ in ids], crabserver)

        source_pfns = sorted_source_pfns
        dest_lfns = sorted_dest_lfns

        # saving file sizes and checksums
        filesizes = [x[columns.index('filesize')] for x in toTrans if x[columns.index('source')] == source]
        checksums = [x[columns.index('checksums')] for x in toTrans if x[columns.index('source')] == source]
        pubnames = [x[columns.index('publishname')] for x in toTrans if x[columns.index('source')] == source]

        # ordered list of replicas information
        try:
            jobs = zip(source_pfns, dest_lfns, ids, checksums, filesizes, pubnames)
        except Exception as ex:
            log.error("Failed to gather all job information: %s", ex)
        
        job_columns = ['source_pfns', 'dest_lfns', 'ids', 'checksums', 'filesizes', 'pubnames']
        # ordered list of transfers details
        tx_from_source = [[job, source, taskname, user, destination] for job in jobs]
        tx_columns = ['job', 'source', 'taskname', 'user', 'destination']

        # split submission process in chunks of max 200 files
        for files in chunks(tx_from_source, 200):
            if not direct:
                log.info("Submitting: %s", files)
                thread = submit_thread(threadLock,
                                       log,
                                       (files, tx_columns),
                                       job_columns,
                                       proxy,
                                       to_update,
                                       crabInj)
                thread.start()
                threads.append(thread)
            elif direct:
                log.info("Registering direct stageout: %s", files)
                thread = submit_thread(threadLock,
                                       log,
                                       (files, tx_columns),
                                       job_columns,
                                       proxy,
                                       to_update,
                                       crabInj,
                                       direct=True)
                thread.start()
                threads.append(thread)

    for t in threads:
        t.join()

    if len(to_update) == 0:
        return False
    # update statuses in oracle table as per threads result
    for fileDoc in to_update:
        try:
            #TODO: split submitted from submitted failed!
            log.debug("POSTing to crabserver 'filetransfer' api:\n%s", encodeRequest(fileDoc))
            crabserver.post('filetransfers', data=encodeRequest(fileDoc))
            log.info("Marked submitted %s files" % (fileDoc['list_of_ids']))
        except Exception:
            log.exception('Failed to mark files as submitted on DBs')

    return True
Example #50
0
        cmd = "curl -i -F file=@%s xsls.cern.ch"%xmllocation
        try:
            pu = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
            break
        except Exception, e:
            logger.debug(str(e))
            maxi = maxi + 1
            continue

if __name__ == "__main__":
    server = HTTPRequests('cmsweb-testbed.cern.ch',
                          '/data/srv/asyncstageout/state/asyncstageout/creds/OpsProxy',
                          '/data/srv/asyncstageout/state/asyncstageout/creds/OpsProxy')

    result = server.get('/crabserver/preprod/filetransfers', 
                        data=encodeRequest({'subresource': 'groupedTransferStatistics', 'grouping': 0}))

    results = oracleOutputMapping(result)


    status = {'transfers':{}, 'publications':{}}
    tmp = {'transfers':{ 'DONE':0, 'ACQUIRED':0, 'SUBMITTED':0, 'FAILED':0, 'RETRY':0 }, 
           'publications':{'DONE':0, 'ACQUIRED':0, 'NEW':0, 'FAILED':0, 'RETRY':0}}

    #past = open("tmp_transfer")
    #tmp = json.load(past)

    for doc in results:
        if doc['aso_worker']=="asodciangot1":
            if 'transfers' in tmp and TRANSFERDB_STATES[doc['transfer_state']] in tmp['transfers']:
                status['transfers'][TRANSFERDB_STATES[doc['transfer_state']]] = - tmp['transfers'][TRANSFERDB_STATES[doc['transfer_state']]] + doc['nt']