Ejemplo n.º 1
0
    def getAcquired(self, users):
        """
        Get a number of documents to be submitted (in ACQUIRED status) and return results of the query for logs
        :return:
        """
        documents = list()

        for user in users:
            username = user[0]
            group = user[1]
            role = user[2]

            fileDoc = dict()
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'acquiredTransfers'
            fileDoc['grouping'] = 1
            fileDoc['username'] = username
            if group == '':
                group = None
            if role == '':
                role = None
            fileDoc['vogroup'] = group
            fileDoc['vorole'] = role

            self.logger.debug("Retrieving users from oracleDB")

            try:
                results = self.oracleDB.get(self.config.oracleFileTrans,
                                            data=encodeRequest(fileDoc))
                documents += oracleOutputMapping(results)
            except Exception as ex:
                self.logger.error("Failed to get acquired transfers \
                                  from oracleDB: %s" % ex)

        return documents
Ejemplo n.º 2
0
    def mark_acquired(self, files=[]):
        """
        Mark the list of files as tranferred
        """
        lfn_in_transfer = []
        dash_rep = ()
        if self.config.isOracle:
            toUpdate = list()
            for lfn in files:
                if lfn['value'][0].find('temp') == 7:
                    docId = lfn['key'][5]
                    toUpdate.append(docId)
                    try:
                        docbyId = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers','fileusertransfers'),
                                                    data=encodeRequest({'subresource': 'getById', 'id': docId}))
                        document = oracleOutputMapping(docbyId, None)[0]
                        dash_rep = (document['jobid'], document['job_retry_count'], document['taskname'])
                        lfn_in_transfer.append(lfn)
                    except Exception as ex:
                        self.logger.error("Error during dashboard report update: %s" %ex)
                        return [],()

            return lfn_in_transfer, dash_rep
        else:
            for lfn in files:
                if lfn['value'][0].find('temp') == 7:
                    docId = getHashLfn(lfn['value'][0])
                    self.logger.debug("Marking acquired %s" % docId)
                    # Load document to get the retry_count
                    try:
                        document = self.db.document(docId)
                    except Exception as ex:
                        msg = "Error loading document from couch"
                        msg += str(ex)
                        msg += str(traceback.format_exc())
                        self.logger.error(msg)
                        continue
                    if document['state'] == 'new' or document['state'] == 'retry':
                        data = dict()
                        data['state'] = 'acquired'
                        data['last_update'] = time.time()
                        updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + docId
                        updateUri += "?" + urllib.urlencode(data)
                        try:
                            self.db.makeRequest(uri=updateUri, type="PUT", decode=False)
                        except Exception as ex:
                            msg = "Error updating document in couch"
                            msg += str(ex)
                            msg += str(traceback.format_exc())
                            self.logger.error(msg)
                            continue
                        self.logger.debug("Marked acquired %s of %s" % (docId, lfn))
                        lfn_in_transfer.append(lfn)
                        dash_rep = (document['jobid'], document['job_retry_count'], document['workflow'])
                    else:
                        continue
                else:
                    good_lfn = lfn['value'][0].replace('store', 'store/temp', 1)
                    self.mark_good([good_lfn])
            return lfn_in_transfer, dash_rep
Ejemplo n.º 3
0
    def failed(self, files, failures_reasons=[], max_retry=3, force_fail=False, submission_error=False):
        """

        :param files: tuple (source_lfn, dest_lfn)
        :param failures_reasons: list(str) with reasons of failure
        :param max_retry: number of retry before giving up
        :param force_fail: flag for triggering failure without retry
        :param submission_error: error during fts submission
        :return:
        """
        updated_lfn = []
        for Lfn in files:
            lfn = Lfn[0]
            # Load document and get the retry_count
            docId = getHashLfn(lfn)
            self.logger.debug("Marking failed %s" % docId)
            try:
                docbyId = self.oracleDB.get(self.config.oracleUserFileTrans.replace('filetransfer','fileusertransfers'),
                                            data=encodeRequest({'subresource': 'getById', 'id': docId}))
                document = oracleOutputMapping(docbyId, None)[0]
                self.logger.debug("Document: %s" % document)
            except Exception as ex:
                self.logger.error("Error updating failed docs: %s" % ex)
                return 1

            fileDoc = dict()
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'updateTransfers'
            fileDoc['list_of_ids'] = docId
            if not len(failures_reasons) == 0:
                try:
                    fileDoc['list_of_failure_reason'] = failures_reasons[files.index(Lfn)]
                except:
                    fileDoc['list_of_failure_reason'] = "unexcpected error, missing reasons"
                    self.logger.exception("missing reasons")

            if force_fail or document['transfer_retry_count'] + 1 > max_retry:
                fileDoc['list_of_transfer_state'] = 'FAILED'
                fileDoc['list_of_retry_value'] = 1
            else:
                fileDoc['list_of_transfer_state'] = 'RETRY'

            if submission_error:
                fileDoc['list_of_failure_reason'] = "Job could not be submitted to FTS: temporary problem of FTS"
                fileDoc['list_of_retry_value'] = 1
            else:
                fileDoc['list_of_retry_value'] = 1

            self.logger.debug("update: %s" % fileDoc)
            try:
                updated_lfn.append(docId)
                self.oracleDB.post(self.config.oracleFileTrans,
                                   data=encodeRequest(fileDoc))
            except Exception:
                self.logger.exception('ERROR updating failed documents')
                return 1
        self.logger.debug("failed file updated")
        return 0
Ejemplo n.º 4
0
    def algorithm(self, parameters=None):
        """
        Performs the doRetries method, loading the appropriate
        plugin for each job and handling it.
        """
        logging.debug("Running retryManager algorithm")
        if self.config.isOracle:
            fileDoc = dict()
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'retryTransfers'
            fileDoc['time_to'] = self.cooloffTime
            self.logger.debug('fileDoc: %s' % fileDoc)
            try:
                results = self.oracleDB.post(self.config.oracleFileTrans,
                                             data=encodeRequest(fileDoc))
            except Exception:
                self.logger.exception("Failed to get retry transfers in oracleDB: %s")
                return
            logging.info("Retried files in cooloff: %s,\n now getting transfers to kill" % str(results))

            fileDoc = dict()
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'getTransfersToKill'
            fileDoc['grouping'] = 0
            try:
                results = self.oracleDB.get(self.config.oracleFileTrans,
                                            data=encodeRequest(fileDoc))
                result = oracleOutputMapping(results)
            except Exception as ex:
                self.logger.error("Failed to get killed transfers \
                                   from oracleDB: %s" % ex)
                return

            usersToKill = list(set([(x['username'], x['user_group'], x['user_role']) for x in result]))

            self.logger.debug("Users with transfers to kill: %s" % usersToKill)
            transfers = Queue()

            for i in range(self.config.kill_threads):
                worker = Thread(target=self.killThread, args=(i, transfers,))
                worker.setDaemon(True)
                worker.start()

            for user in usersToKill:
                user_trans = [x for x in result if (x['username'], x['user_group'], x['user_role']) == user]
                self.logger.info("Inserting %s transfers of user %s in the killing queue" % (len(user_trans), user))
                transfers.put(user_trans)

            transfers.join()
            self.logger.info("Transfers killed.")
        else:
            self.doRetries()
Ejemplo n.º 5
0
    def active_tasks(self, db):

        fileDoc = {}
        fileDoc['asoworker'] = self.config.asoworker
        fileDoc['subresource'] = 'acquirePublication'

        self.logger.debug("Retrieving publications from oracleDB")

        results = ''
        try:
            results = db.post(self.config.oracleFileTrans,
                              data=encodeRequest(fileDoc))
        except Exception as ex:
            self.logger.error("Failed to acquire publications \
                                from oracleDB: %s" %ex)
            return []

        fileDoc = dict()
        fileDoc['asoworker'] = self.config.asoworker
        fileDoc['subresource'] = 'acquiredPublication'
        fileDoc['grouping'] = 0
        fileDoc['limit'] = 100000

        self.logger.debug("Retrieving max.100000 acquired publications from oracleDB")

        result = []

        try:
            results = db.get(self.config.oracleFileTrans,
                             data=encodeRequest(fileDoc))
            result.extend(oracleOutputMapping(results))
        except Exception as ex:
            self.logger.error("Failed to acquire publications \
                                from oracleDB: %s" %ex)
            return []

        self.logger.debug("publen: %s" % len(result))

        self.logger.debug("%s acquired publications retrieved" % len(result))
        #TODO: join query for publisher (same of submitter)
        unique_tasks = [list(i) for i in set(tuple([x['username'],
                                                    x['user_group'],
                                                    x['user_role'],
                                                    x['taskname']]
                                                  ) for x in result if x['transfer_state'] == 3)]

        info = []
        for task in unique_tasks:
            info.append([x for x in result if x['taskname'] == task[3]])
        return zip(unique_tasks, info)
Ejemplo n.º 6
0
def mark_failed(files, oracleDB, logger, failure_reason=""):
    """
    Something failed for these files so increment the retry count
    """
    h = 0
    for lfn in files:
        h += 1
        logger.debug("Marking failed %s" % h)
        source_lfn = lfn
        docId = getHashLfn(source_lfn)
        logger.debug("Marking failed %s" % docId)
        try:
            docbyId = oracleDB.get(config.General.oracleUserTrans,
                                   data=encodeRequest({
                                       'subresource': 'getById',
                                       'id': docId
                                   }))
        except Exception:
            logger.exception("Error updating failed docs.")
            continue
        document = oracleOutputMapping(docbyId, None)[0]
        logger.debug("Document: %s" % document)

        try:
            fileDoc = dict()
            fileDoc['asoworker'] = 'asodciangot1'
            fileDoc['subresource'] = 'updatePublication'
            fileDoc['list_of_ids'] = docId

            fileDoc['list_of_publication_state'] = 'FAILED'
            #if force_failure or document['publish_retry_count'] > self.max_retry:
            #    fileDoc['list_of_publication_state'] = 'FAILED'
            #else:
            #    fileDoc['list_of_publication_state'] = 'RETRY'
            # TODO: implement retry
            fileDoc['list_of_retry_value'] = 1
            fileDoc['list_of_failure_reason'] = failure_reason

            logger.debug("fileDoc: %s " % fileDoc)

            _ = oracleDB.post(config.General.oracleFileTrans,
                              data=encodeRequest(fileDoc))
            logger.debug("updated: %s " % docId)
        except Exception as ex:
            msg = "Error updating document: %s" % fileDoc
            msg += str(ex)
            msg += str(traceback.format_exc())
            logger.error(msg)
            continue
Ejemplo n.º 7
0
def getData(subresource):
    """This function will fetch data from Oracle table"""

    crabserver = CRABRest(hostname=CMSWEB,
                          localcert=CERTIFICATE,
                          localkey=KEY,
                          retry=3,
                          userAgent='CRABTaskWorker')
    crabserver.setDbInstance(dbInstance=DBINSTANCE)
    result = crabserver.get(api='filetransfers',
                            data=encodeRequest({
                                'subresource': subresource,
                                'grouping': 0
                            }))

    return oracleOutputMapping(result)
Ejemplo n.º 8
0
def mark_failed(files, oracleDB, logger, failure_reason=""):
    """
    Something failed for these files so increment the retry count
    """
    h = 0
    for lfn in files:
        h += 1
        logger.debug("Marking failed %s" % h)
        source_lfn = lfn
        docId = getHashLfn(source_lfn)
        logger.debug("Marking failed %s" % docId)
        try:
            docbyId = oracleDB.get(config.General.oracleUserTrans,
                                   data=encodeRequest({'subresource': 'getById', 'id': docId}))
        except Exception:
            logger.exception("Error updating failed docs.")
            continue
        document = oracleOutputMapping(docbyId, None)[0]
        logger.debug("Document: %s" % document)

        try:
            fileDoc = dict()
            fileDoc['asoworker'] = config.General.asoworker
            fileDoc['subresource'] = 'updatePublication'
            fileDoc['list_of_ids'] = docId

            fileDoc['list_of_publication_state'] = 'FAILED'
            #if force_failure or document['publish_retry_count'] > self.max_retry:
            #    fileDoc['list_of_publication_state'] = 'FAILED'
            #else:
            #    fileDoc['list_of_publication_state'] = 'RETRY'
            # TODO: implement retry
            fileDoc['list_of_retry_value'] = 1
            fileDoc['list_of_failure_reason'] = failure_reason

            logger.debug("fileDoc: %s " % fileDoc)

            _ = oracleDB.post(config.General.oracleFileTrans,
                              data=encodeRequest(fileDoc))
            logger.debug("updated: %s " % docId)
        except Exception as ex:
            msg = "Error updating document: %s" % fileDoc
            msg += str(ex)
            msg += str(traceback.format_exc())
            logger.error(msg)
            continue
Ejemplo n.º 9
0
    def acquire(self):
        """
        Get a number (1k for current oracle rest) of documents and bind them to this aso
        NEW -> ACQUIRED (asoworker NULL -> config.asoworker)
        :return:
        """

        self.logger.info('Retrieving users...')
        fileDoc = dict()
        fileDoc['subresource'] = 'activeUsers'
        fileDoc['grouping'] = 0
        fileDoc['asoworker'] = self.config.asoworker

        try:
            self.oracleDB.get(self.config.oracleFileTrans,
                              data=encodeRequest(fileDoc))
        except Exception as ex:
            self.logger.error("Failed to acquire transfers \
                              from oracleDB: %s" % ex)
            return 1

        users = list()
        try:
            docs = oracleOutputMapping(result)
            users = [[x['username'], x['user_group'], x['user_role']] for x in docs]
            self.logger.info('Users to process: %s' % str(users))
        except:
            self.logger.exception('User data malformed. ')

        for user in users:
            fileDoc = dict()
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'acquireTransfers'
            fileDoc['username'] = user[0]

            self.logger.debug("Retrieving transfers from oracleDB for user: %s " % user)

            try:
                self.oracleDB.post(self.config.oracleFileTrans,
                                   data=encodeRequest(fileDoc))
            except Exception as ex:
                self.logger.error("Failed to acquire transfers \
                                  from oracleDB: %s" % ex)

        return users
Ejemplo n.º 10
0
    def searchTask(self, workflow):
        """

        :param workflow:
        :return:
        """
        data = dict()
        data['workflow'] = workflow
        data['subresource'] = 'search'
        try:
            result = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers', 'task'),
                                       data=encodeRequest(data))
            self.logger.debug("task: %s " % str(result[0]))
            self.logger.debug("task: %s " % getColumn(result[0], 'tm_last_publication'))
        except Exception as ex:
            self.logger.error("Error during task doc retrieving: %s" % ex)
            return {}

        return oracleOutputMapping(result)
Ejemplo n.º 11
0
    def source_destinations_by_user(self):
        """
        Get all the destinations for a user
        """
        if self.config.isOracle:
            self.logger.debug('Running acquiredTransfers query... ' + self.user)
            fileDoc = dict()
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'acquiredTransfers'
            fileDoc['grouping'] = 1
            fileDoc['username'] = self.user
            group = self.group
            if self.group == '':
                group = None
            if self.role == '':
                role = None
            fileDoc['vogroup'] = group
            fileDoc['vorole'] = role
            fileDoc['limit'] =  self.config.max_files_per_transfer
            result = []

            self.logger.debug('Request: ' + str(fileDoc))
            try:
                results = self.oracleDB.get(self.config.oracleFileTrans,
                                            data=encodeRequest(fileDoc))
                result = oracleOutputMapping(results)
                res = [[x['source'], x['destination']] for x in result]
                res.sort()
                res = list(k for k, _ in itertools.groupby(res))
            except Exception as ex:
                self.logger.error("Failed to get acquired transfers \
                                  from oracleDB: %s" %ex)
                return [], {}
            return res, result
        else:
            query = {'group': True,
                     'startkey':[self.user, self.group, self.role], 'endkey':[self.user, self.group, self.role, {}, {}]}
            try:
                sites = self.db.loadView(self.config.ftscp_design, 'ftscp_all', query)
            except:
                return []
            return [[x[4], x[3]] for x in sites['rows']]
Ejemplo n.º 12
0
    def getPub(self):
        """

        :return:
        """
        to_pub_docs = list()
        filedoc = dict()
        filedoc['asoworker'] = self.config.asoworker
        filedoc['subresource'] = 'acquiredPublication'
        filedoc['grouping'] = 0

        try:
            results = self.oracleDB.get(self.config.oracleFileTrans,
                                        data=encodeRequest(filedoc))
            to_pub_docs = oracleOutputMapping(results)
        except Exception as ex:
            self.logger.error("Failed to get acquired publications \
                              from oracleDB: %s" % ex)
            return to_pub_docs

        return to_pub_docs
Ejemplo n.º 13
0
    def active_tasks(self, crabserver):
        """
        :param crabserver: CRABRest object to access proper REST as createdin __init__ method
        TODO  detail here the strucutre it returns
        :return: a list tuples [(task,info)]. One element for each task which has jobs to be published
                 each list element has the format (task, info) where task and info are lists
                 task=[] a set fo 4 attributes formatted as a list of strings:
                    [username, usergroup, userrole, taskname]
                    this is a subset of the information present in each element of thee next list !
                 info=[filedic1, filedic2, filedic3...] a list of dictionaries,one per file with keys:
                   u'username', u'cache_url', u'source_lfn', u'publication_state', u'destination',
                   u'user_role', u'last_update', u'input_dataset', u'dbs_url', u'aso_worker',
                   u'user_group', u'taskname', u'transfer_state', u'destination_lfn'
        """

        self.logger.debug("Retrieving publications from oracleDB")
        filesToPublish = []
        asoworkers = self.config.asoworker
        # asoworkers can be a string or a list of strings
        # but if it is a string, do not turn it into a list of chars !
        asoworkers = [asoworkers] if isinstance(asoworkers,
                                                basestring) else asoworkers
        for asoworker in asoworkers:
            self.logger.info(
                "Processing publication requests for asoworker: %s", asoworker)
            fileDoc = {}
            fileDoc['asoworker'] = asoworker
            fileDoc['subresource'] = 'acquirePublication'
            data = encodeRequest(fileDoc)
            try:
                result = crabserver.post(api='filetransfers', data=data)  # pylint: disable=unused-variable
            except Exception as ex:
                self.logger.error(
                    "Failed to acquire publications from crabserver: %s", ex)
                return []

            self.logger.debug(
                "Retrieving max.100000 acquired publications from oracleDB")
            fileDoc = dict()
            fileDoc['asoworker'] = asoworker
            fileDoc['subresource'] = 'acquiredPublication'
            fileDoc['grouping'] = 0
            fileDoc['limit'] = 100000
            data = encodeRequest(fileDoc)
            try:
                results = crabserver.get(api='filetransfers', data=data)
            except Exception as ex:
                self.logger.error(
                    "Failed to acquire publications from crabserver: %s", ex)
                return []
            files = oracleOutputMapping(results)
            self.logger.info(
                "%s acquired publications retrieved for asoworker %s",
                len(files), asoworker)
            filesToPublish.extend(files)

        # TODO: join query for publisher (same of submitter)
        unique_tasks = [
            list(i) for i in set(
                tuple([
                    x['username'], x['user_group'], x['user_role'],
                    x['taskname']
                ]) for x in filesToPublish if x['transfer_state'] == 3)
        ]

        info = []
        for task in unique_tasks:
            info.append(
                [x for x in filesToPublish if x['taskname'] == task[3]])
        return zip(unique_tasks, info)
Ejemplo n.º 14
0
    def testFileTransferPUT(self):
        """
        _testFileTransferPUT_

        Just test simple testFileTransferPUT with fake data
        """
        # We just sent fake data which is not monitored by dashboard.
        # Also only the first time to decide is publication ON or NOT
        for user in self.users:
            timestamp = time.strftime('%y%m%d_%H%M%S', time.gmtime())
            for i in range(self.totalFiles):
                now = int(time.time())
                # Generate a taskname
                workflowName = ""
                taskname = ""
                if user not in self.tasks:
                    workflowName = "".join([
                        random.choice(string.ascii_lowercase)
                        for _ in range(20)
                    ]) + "_" + str(now)
                    publicationState = random.choice(['NEW', 'NOT_REQUIRED'])
                else:
                    workflowName = self.tasks[user]['workflowName']
                    publicationState = self.tasks[user]['publication']
                transferState = random.choice(['NEW', 'DONE'])
                taskname = generateTaskName(user, workflowName, timestamp)
                finalLfn = self.lfnBase % (user, workflowName, i,
                                           random.randint(1, 9999))
                idHash = getHashLfn(finalLfn)
                self.fileDoc['id'] = idHash
                self.fileDoc['job_id'] = i
                self.fileDoc['username'] = user
                self.fileDoc['taskname'] = taskname
                self.fileDoc['start_time'] = int(time.time())
                self.fileDoc['source_lfn'] = finalLfn
                self.fileDoc['destination_lfn'] = finalLfn
                self.fileDoc['transfer_state'] = transferState
                self.fileDoc['publication_state'] = publicationState
                print(self.fileDoc)
                self.server.put('/crabserver/dev/fileusertransfers',
                                data=encodeRequest(self.fileDoc))
                # if I will put the same doc twice, it should raise an error.
                # self.server.put('/crabserver/dev/fileusertransfers', data=urllib.urlencode(self.fileDoc))
                # This tasks are for the future and next calls
                if user not in self.tasks:
                    self.tasks[user] = {
                        'workflowName': workflowName,
                        'taskname': taskname,
                        'listOfIds': [],
                        'publication': publicationState,
                        'toTransfer': 0,
                        'toPublish': 0,
                        'total': self.totalFiles
                    }
                if self.tasks[user]['publication'] == 'NEW':
                    self.tasks[user]['toPublish'] += 1
                if transferState == 'NEW':
                    self.tasks[user]['toTransfer'] += 1
                self.tasks[user]['listOfIds'].append(idHash)
        # This should raise an error
        for username in self.tasks:
            taskname = self.tasks[username]['taskname']
            for query in ['getTransferStatus', 'getPublicationStatus']:
                result = self.server.get('/crabserver/dev/fileusertransfers',
                                         data=encodeRequest({
                                             'subresource':
                                             query,
                                             'username':
                                             username,
                                             'taskname':
                                             taskname
                                         }))
                print(result)
                print(result[0]['result'])
                taskInfoDict = oracleOutputMapping(result, 'id')
                print(taskInfoDict)
                for key, docDict in taskInfoDict.items():
                    result = self.server.get(
                        '/crabserver/dev/fileusertransfers',
                        data=encodeRequest({
                            'subresource': 'getById',
                            'id': key
                        }))

        randomUsers = random.sample(
            set(self.users), 3
        )  # Take half of the users and kill their transfers for specific task
        for username in randomUsers:
            taskname = self.tasks[username]['taskname']
            result = self.server.post('/crabserver/dev/fileusertransfers',
                                      data=encodeRequest({
                                          'subresource': 'killTransfers',
                                          'username': username,
                                          'taskname': taskname
                                      }))
            print(result)
        # oneUser is left for killing a list of IDs
        # leftUsers will be killing transfers one by one for specific id.
        leftUsers = list(set(self.users) - set(randomUsers))
        oneUser = random.sample(set(leftUsers), 1)
        leftUsers = list(set(leftUsers) - set(oneUser))
        for username in leftUsers:
            # First get all left ids for this users
            result = self.server.get('/crabserver/dev/fileusertransfers',
                                     data=encodeRequest({
                                         'subresource':
                                         'getTransferStatus',
                                         'username':
                                         username,
                                         'taskname':
                                         self.tasks[username]['taskname']
                                     }))
            resultOut = oracleOutputMapping(result, None)
            print("**" * 50)
            for outDict in resultOut:
                print(outDict)
                result = self.server.post('/crabserver/dev/fileusertransfers',
                                          data=encodeRequest({
                                              'subresource':
                                              'killTransfersById',
                                              'username':
                                              username,
                                              'listOfIds':
                                              outDict['id']
                                          }))
                print(result)
            print(resultOut)
            print(result)
        for username in oneUser:
            result = self.server.post(
                '/crabserver/dev/fileusertransfers',
                data=encodeRequest(
                    {
                        'subresource': 'killTransfersById',
                        'username': username,
                        'listOfIds': self.tasks[username]['listOfIds']
                    }, ['listOfIds']))
            # As it asks to kill all which are in new, need to double check what we submitted before and if the output of killed is correct
            print(result)
            print(self.tasks[username])
Ejemplo n.º 15
0
    def oracleSiteUser(self, db):
        """
        1. Acquire transfers from DB
        2. Get acquired users and destination sites
        """

        self.logger.info('Retrieving users...')
        fileDoc = dict()
        fileDoc['subresource'] = 'activeUsers'
        fileDoc['grouping'] = 0
        fileDoc['asoworker'] = self.config.asoworker

        result = dict()
        try:
            result = db.get(self.config.oracleFileTrans,
                             data=encodeRequest(fileDoc))
        except Exception as ex:
            self.logger.error("Failed to acquire transfers \
                              from oracleDB: %s" % ex)
            return []
        
        self.logger.debug(oracleOutputMapping(result))
        # TODO: translate result into list((user,group,role),...)
        if len(oracleOutputMapping(result)) != 0:
            self.logger.debug(type( [[x['username'].encode('ascii','ignore'), x['user_group'], x['user_role']] for x in oracleOutputMapping(result)]))
            try:
                docs =  oracleOutputMapping(result)
                users = [[x['username'], x['user_group'], x['user_role']] for x in docs]
                self.logger.info('Users to process: %s' % str(users))
            except:
                self.logger.exception('User data malformed. ')
        else:
            self.logger.info('No new user to acquire')
            return []

        actives = list()
        for user in users:
            fileDoc = dict()
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'acquireTransfers'
            fileDoc['username'] = user[0]

            self.logger.debug("Retrieving transfers from oracleDB for user: %s " % user[0])

            try:
                result = db.post(self.config.oracleFileTrans,
                                 data=encodeRequest(fileDoc))
            except Exception as ex:
                self.logger.error("Failed to acquire transfers \
                                  from oracleDB: %s" %ex)
                continue

            self.doc_acq = str(result)
            for i in range(len(user)):
                if not user[i] or user[i] in ['None', 'NULL']:
                    user[i] = ''
                user[i] = str(user[i])
            actives.append(user)


            self.logger.debug("Transfers retrieved from oracleDB. %s " % users)

        return users
Ejemplo n.º 16
0
    def mark_failed(self, files=[], failures_reasons=[], force_fail=False):
        """
        Something failed for these files so increment the retry count
        """
        updated_lfn = []
        for lfn in files:
            data = {}
            self.logger.debug("Document: %s" % lfn)
            if not isinstance(lfn, dict):
                if 'temp' not in lfn:
                    temp_lfn = lfn.replace('store', 'store/temp', 1)
                else:
                    temp_lfn = lfn
            else:
                if 'temp' not in lfn['value']:
                    temp_lfn = lfn['value'].replace('store', 'store/temp', 1)
                else:
                    temp_lfn = lfn['value']
            docId = getHashLfn(temp_lfn)
            # Load document to get the retry_count
            if self.config.isOracle:
                try:
                    self.logger.debug("Document: %s" %docId)
                    docbyId = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers',
                                                                                    'fileusertransfers'),
                                                data=encodeRequest({'subresource': 'getById', 'id': docId}))
                    document = oracleOutputMapping(docbyId)[0]
                    data = dict()
                    data['asoworker'] = self.config.asoworker
                    data['subresource'] = 'updateTransfers'
                    data['list_of_ids'] = docId

                    if force_fail or document['transfer_retry_count'] + 1 > self.max_retry:
                        data['list_of_transfer_state'] = 'FAILED'
                        data['list_of_retry_value'] = 0
                    else:
                        data['list_of_transfer_state'] = 'RETRY'
                        fatal_error = self.determine_fatal_error(failures_reasons[files.index(lfn)])
                        if fatal_error:
                            data['list_of_transfer_state'] = 'FAILED'
                        
                    data['list_of_failure_reason'] = failures_reasons[files.index(lfn)]
                    data['list_of_retry_value'] = 0

                    self.logger.debug("update: %s" % data)
                    result = self.oracleDB.post(self.config.oracleFileTrans,
                                                data=encodeRequest(data))
                    if not data['list_of_transfer_state'] == 'RETRY':  
                        updated_lfn.append(lfn)
                    self.logger.debug("Marked failed %s" % lfn)
                except Exception as ex:
                    self.logger.error("Error updating document status: %s" %ex)
                    continue
            else:
                try:
                    document = self.db.document( docId )
                except Exception as ex:
                    msg = "Error loading document from couch"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    self.logger.error(msg)
                    continue
                if document['state'] != 'killed' and document['state'] != 'done' and document['state'] != 'failed':
                    now = str(datetime.datetime.now())
                    last_update = time.time()
                    # Prepare data to update the document in couch
                    if force_fail or len(document['retry_count']) + 1 > self.max_retry:
                        data['state'] = 'failed'
                        data['end_time'] = now
                    else:
                        data['state'] = 'retry'
                        fatal_error = self.determine_fatal_error(failures_reasons[files.index(lfn)])
                        if fatal_error:
                            data['state'] = 'failed'
                            data['end_time'] = now

                    self.logger.debug("Failure list: %s" % failures_reasons)
                    self.logger.debug("Files: %s" % files)
                    self.logger.debug("LFN %s" % lfn)

                    data['failure_reason'] = failures_reasons[files.index(lfn)]
                    data['last_update'] = last_update
                    data['retry'] = now
                    # Update the document in couch
                    self.logger.debug("Marking failed %s" % docId)
                    try:
                        updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + docId
                        updateUri += "?" + urllib.urlencode(data)
                        self.db.makeRequest(uri = updateUri, type = "PUT", decode = False)
                        updated_lfn.append(docId)
                        self.logger.debug("Marked failed %s" % docId)
                    except Exception as ex:
                        msg = "Error in updating document in couch"
                        msg += str(ex)
                        msg += str(traceback.format_exc())
                        self.logger.error(msg)
                        continue
                    try:
                        self.db.commit()
                    except Exception as ex:
                        msg = "Error commiting documents in couch"
                        msg += str(ex)
                        msg += str(traceback.format_exc())
                        self.logger.error(msg)
                        continue
                else: updated_lfn.append(docId)
        self.logger.debug("failed file updated")
        return updated_lfn
Ejemplo n.º 17
0
    def mark_good(self, files):
        """
        Mark the list of files as tranferred
        """
        updated_lfn = []
        good_ids = []
        if len(files) == 0:
            return updated_lfn
        for it, lfn in enumerate(files):
            hash_lfn = getHashLfn(lfn)
            self.logger.info("Marking good %s" % hash_lfn)
            self.logger.debug("Marking good %s" % lfn)
            if not self.config.isOracle:
                try:
                    document = self.db.document(hash_lfn)
                except Exception as ex:
                    msg = "Error loading document from couch"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    self.logger.error(msg)
                    continue
            self.logger.info("Doc %s Loaded" % hash_lfn)
            try:
                now = str(datetime.datetime.now())
                last_update = time.time()
                if self.config.isOracle:
                    docId = getHashLfn(lfn)
                    good_ids.append(docId)
                    updated_lfn.append(lfn)
                else:
                    if document['state'] != 'killed' and document['state'] != 'done' and document['state'] != 'failed':
                        outputLfn = document['lfn'].replace('store/temp', 'store', 1)
                        data = dict()
                        data['end_time'] = now
                        data['state'] = 'done'
                        data['lfn'] = outputLfn
                        data['last_update'] = last_update
                        updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + getHashLfn(lfn)
                        updateUri += "?" + urllib.urlencode(data)
                        self.db.makeRequest(uri = updateUri, type = "PUT", decode = False)
                        updated_lfn.append(lfn)
                        self.logger.debug("Marked good %s" % lfn)
                    else: 
                        updated_lfn.append(lfn)
                    try:
                        self.db.commit()
                    except Exception as ex:
                        msg = "Error commiting documents in couch"
                        msg += str(ex)
                        msg += str(traceback.format_exc())
                        self.logger.error(msg)
                        continue   
            except Exception as ex:
                msg = "Error updating document"
                msg += str(ex)
                msg += str(traceback.format_exc())
                self.logger.error(msg)
                continue
        if self.config.isOracle:
            try:
                data = dict()
                data['asoworker'] = self.config.asoworker
                data['subresource'] = 'updateTransfers'
                data['list_of_ids'] = good_ids
                data['list_of_transfer_state'] = ["DONE" for x in good_ids]
                result = self.oracleDB.post(self.config.oracleFileTrans,
                                            data=encodeRequest(data))
                self.logger.debug("Marked good %s" % good_ids)
            except Exception:
                self.logger.exception('Error updating document')
                return {}
        
        self.logger.info("Transferred file %s updated, removing now source file" %docId)
        try:
            docbyId = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers','fileusertransfers'),
                                        data=encodeRequest({'subresource': 'getById', 'id': docId}))
            document = oracleOutputMapping(docbyId, None)[0]
        except Exception:
            msg = "Error getting file from source"
            self.logger.exception(msg)
            return {}

        if document["source"] not in self.site_tfc_map:
            self.logger.debug("site not found... gathering info from phedex")
            self.site_tfc_map[document["source"]] = self.get_tfc_rules(document["source"])
        pfn = self.apply_tfc_to_lfn( '%s:%s' %(document["source"], lfn))
        self.logger.debug("File has to be removed now from source site: %s" %pfn)
        self.remove_files(self.userProxy, pfn)
        self.logger.debug("Transferred file removed from source")
        return updated_lfn
Ejemplo n.º 18
0
    def startSlave(self, task):
        # TODO: lock task!
        # - process logger
        logger = setProcessLogger(str(task[0][3]))
        logger.info("Process %s is starting. PID %s", task[0][3], os.getpid())

        self.force_publication = False
        workflow = str(task[0][3])
        wfnamemsg = "%s: " % (workflow)

        if int(workflow[0:2]) < 18:
            msg = "Skipped. Ignore tasks created before 2018."
            logger.info(wfnamemsg+msg)
            return 0

        if len(task[1]) > self.max_files_per_block:
            self.force_publication = True
            msg = "All datasets have more than %s ready files." % (self.max_files_per_block)
            msg += " No need to retrieve task status nor last publication time."
            logger.info(wfnamemsg+msg)
        else:
            msg = "At least one dataset has less than %s ready files." % (self.max_files_per_block)
            logger.info(wfnamemsg+msg)
            # Retrieve the workflow status. If the status can not be retrieved, continue
            # with the next workflow.
            workflow_status = ''
            url = '/'.join(self.cache_area.split('/')[:-1]) #+ '/workflow'
            msg = "Retrieving status from %s" % (url)
            logger.info(wfnamemsg+msg)
            header = {"Content-Type":"application/json"}
            data = {'workflow': workflow, 'subresource': 'taskads'}
            url = 'cmsweb-testbed.cern.ch'
            connection = HTTPRequests(url,
                                     self.config.opsCert,
                                     self.config.opsKey)
            
            try:
                res = connection.get('/crabserver/preprod/workflow', data=encodeRequest(data))
            except Exception as ex:
                #logger.info(wfnamemsg+encodeRequest(data))
                logger.warn('Error retrieving status from cache for %s.' % workflow)
                return 0

            try:
                workflow_status = res[0]['result'][0]['status']
                msg = "Task status is %s." % workflow_status
                logger.info(wfnamemsg+msg)
            except ValueError:
                msg = "Workflow removed from WM."
                logger.error(wfnamemsg+msg)
                workflow_status = 'REMOVED'
            except Exception as ex:
                msg = "Error loading task status!"
                msg += str(ex)
                msg += str(traceback.format_exc())
                logger.error(wfnamemsg+msg)
            # If the workflow status is terminal, go ahead and publish all the ready files
            # in the workflow.
            if workflow_status in ['COMPLETED', 'FAILED', 'KILLED', 'REMOVED']:
                self.force_publication = True
                if workflow_status in ['KILLED', 'REMOVED']:
                    self.force_failure = True
                msg = "Considering task status as terminal. Will force publication."
                logger.info(wfnamemsg+msg)
            # Otherwise...
            else:
                msg = "Task status is not considered terminal."
                logger.info(wfnamemsg+msg)
                msg = "Getting last publication time."
                logger.info(wfnamemsg+msg)
                # Get when was the last time a publication was done for this workflow (this
                # should be more or less independent of the output dataset in case there are
                # more than one).
                last_publication_time = None
                data = {}
                data['workflow'] = workflow
                data['subresource'] = 'search'
                try:
                    result = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers', 'task'),
                                               data=encodeRequest(data))
                    logger.debug("task: %s " %  str(result[0]))
                    logger.debug("task: %s " %  getColumn(result[0], 'tm_last_publication'))
                except Exception as ex:
                    logger.error("Error during task doc retrieving: %s" %ex)
                if last_publication_time:
                    date = oracleOutputMapping(result)['last_publication']
                    seconds = datetime.strptime(date, "%Y-%m-%d %H:%M:%S.%f").timetuple()
                    last_publication_time = time.mktime(seconds)

                msg = "Last publication time: %s." % str(last_publication_time)
                logger.debug(wfnamemsg+msg)
                # If this is the first time a publication would be done for this workflow, go
                # ahead and publish.
                if not last_publication_time:
                    self.force_publication = True
                    msg = "There was no previous publication. Will force publication."
                    logger.info(wfnamemsg+msg)
                # Otherwise...
                else:
                    last = last_publication_time
                    msg = "Last published block: %s" % (last)
                    logger.debug(wfnamemsg+msg)
                    # If the last publication was long time ago (> our block publication timeout),
                    # go ahead and publish.
                    now = int(time.time()) - time.timezone
                    time_since_last_publication = now - last
                    hours = int(time_since_last_publication/60/60)
                    minutes = int((time_since_last_publication - hours*60*60)/60)
                    timeout_hours = int(self.block_publication_timeout/60/60)
                    timeout_minutes = int((self.block_publication_timeout - timeout_hours*60*60)/60)
                    msg = "Last publication was %sh:%sm ago" % (hours, minutes)
                    if time_since_last_publication > self.block_publication_timeout:
                        self.force_publication = True
                        msg += " (more than the timeout of %sh:%sm)." % (timeout_hours, timeout_minutes)
                        msg += " Will force publication."
                    else:
                        msg += " (less than the timeout of %sh:%sm)." % (timeout_hours, timeout_minutes)
                        msg += " Not enough to force publication."
                    logger.info(wfnamemsg+msg)

        #logger.info(task[1])
        try:
            if self.force_publication:
                # - get info
                active_ = [{'key': [x['username'],
                                    x['user_group'],
                                    x['user_role'],
                                    x['taskname']],
                            'value': [x['destination'],
                                      x['source_lfn'],
                                      x['destination_lfn'],
                                      x['input_dataset'],
                                      x['dbs_url'],
                                      x['last_update']
                                     ]}
                           for x in task[1] if x['transfer_state'] == 3 and x['publication_state'] not in [2, 3, 5]]

                lfn_ready = []
                wf_jobs_endtime = []
                pnn, input_dataset, input_dbs_url = "", "", ""
                for active_file in active_:
                    job_end_time = active_file['value'][5]
                    if job_end_time and self.config.isOracle:
                        wf_jobs_endtime.append(int(job_end_time) - time.timezone)
                    elif job_end_time:
                        wf_jobs_endtime.append(int(time.mktime(time.strptime(str(job_end_time), '%Y-%m-%d %H:%M:%S'))) - time.timezone)
                    source_lfn = active_file['value'][1]
                    dest_lfn = active_file['value'][2]
                    self.lfn_map[dest_lfn] = source_lfn
                    if not pnn or not input_dataset or not input_dbs_url:
                        pnn = str(active_file['value'][0])
                        input_dataset = str(active_file['value'][3])
                        input_dbs_url = str(active_file['value'][4])
                    lfn_ready.append(dest_lfn)

                userDN = ''
                username = task[0][0]
                user_group = ""
                if task[0][1]:
                    user_group = task[0][1]
                user_role = ""
                if task[0][2]:
                    user_role = task[0][2]
                logger.debug("Trying to get DN %s %s %s" % (username, user_group, user_role))

                try:
                    userDN = getDNFromUserName(username, logger)
                except Exception as ex:
                    msg = "Error retrieving the user DN"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    logger.error(msg)
                    return 1

                # Get metadata
                toPublish = []
                publDescFiles_list = self.getPublDescFiles(workflow, lfn_ready)
                for file_ in active_:
                    for _, doc in enumerate(publDescFiles_list):
                        #logger.info(type(doc))
                        #logger.info(doc)
                        if doc["lfn"] == file_["value"][2]:
                            doc["User"] = username
                            doc["Group"] = file_["key"][1]
                            doc["Role"] = file_["key"][2]
                            doc["UserDN"] = userDN
                            doc["Destination"] = file_["value"][0]
                            doc["SourceLFN"] = file_["value"][1]
                            toPublish.append(doc)
                with open("/tmp/publisher_files/"+workflow+'.json', 'w') as outfile:
                    json.dump(toPublish, outfile)
                logger.info(". publisher.py %s" % (workflow))

                # find the location in the current environment of the script we want to run
                import Publisher.TaskPublish as tp
                taskPublishScript = tp.__file__
                subprocess.call(["python", taskPublishScript, workflow])

        except:
            logger.exception("Exception!")


        return 0
Ejemplo n.º 19
0
    def active_tasks(self, db):
        """
        :param db: HTTPRequest object to access proper REST as createdin __init__ method
        TODO  detail here the strucutre it returns
        :return: a list tuples [(task,info)]. One element for each task which has jobs to be published
                 each list element has the format (task, info) where task and info are lists
                 task=[] a set fo 4 attributes formatted as a list of strings:
                    [username, usergroup, userrole, taskname]
                    this is a subset of the information present in each element of thee next list !
                 info=[filedic1, filedic2, filedic3...] a list of dictionaries,one per file with keys:
                   u'username', u'cache_url', u'source_lfn', u'publication_state', u'destination',
                   u'user_role', u'last_update', u'input_dataset', u'dbs_url', u'aso_worker',
                   u'user_group', u'taskname', u'transfer_state', u'destination_lfn'
        """

        self.logger.debug("Retrieving publications from oracleDB")
        uri = self.REST_filetransfers
        fileDoc = {}
        fileDoc['asoworker'] = self.config.asoworker
        fileDoc['subresource'] = 'acquirePublication'
        data = encodeRequest(fileDoc)
        results = ''
        try:
            results = db.post(uri=uri, data=data)
        except Exception as ex:
            self.logger.error("Failed to acquire publications from %s: %s",
                              uri, ex)
            return []

        self.logger.debug(
            "Retrieving max.100000 acquired publications from oracleDB")
        fileDoc = dict()
        fileDoc['asoworker'] = self.config.asoworker
        fileDoc['subresource'] = 'acquiredPublication'
        fileDoc['grouping'] = 0
        fileDoc['limit'] = 100000
        result = []
        uri = self.REST_filetransfers
        data = encodeRequest(fileDoc)

        try:
            results = db.get(uri=uri, data=data)
            result.extend(oracleOutputMapping(results))
        except Exception as ex:
            self.logger.error("Failed to acquire publications from %s: %s",
                              uri, ex)
            return []

        self.logger.debug("publen: %s", len(result))

        self.logger.debug("%s acquired publications retrieved", len(result))

        # TODO: join query for publisher (same of submitter)
        unique_tasks = [
            list(i) for i in set(
                tuple([
                    x['username'], x['user_group'], x['user_role'],
                    x['taskname']
                ]) for x in result if x['transfer_state'] == 3)
        ]

        info = []
        for task in unique_tasks:
            info.append([x for x in result if x['taskname'] == task[3]])
        return zip(unique_tasks, info)
Ejemplo n.º 20
0
    def mark_failed(self, files=[], force_fail=False, submission_error=False):
        """
        Something failed for these files so increment the retry count
        """
        updated_lfn = []
        for lfn in files:
            data = {}
            if not isinstance(lfn, dict):
                if 'temp' not in lfn:
                    temp_lfn = lfn.replace('store', 'store/temp', 1)
                else:
                    temp_lfn = lfn
            else:
                if 'temp' not in lfn['value'][0]:
                    temp_lfn = lfn['value'][0].replace('store', 'store/temp', 1)
                else:
                    temp_lfn = lfn['value'][0]

            # Load document and get the retry_count
            if self.config.isOracle:
                docId = getHashLfn(temp_lfn)
                self.logger.debug("Marking failed %s" % docId)
                try:
                    docbyId = self.oracleDB.get(self.config.oracleFileTrans.replace('filetransfers',
                                                                                    'fileusertransfers'),
                                                data=encodeRequest({'subresource': 'getById', 'id': docId}))
                except Exception as ex:
                    self.logger.error("Error updating failed docs: %s" %ex)
                    continue
                document = oracleOutputMapping(docbyId, None)[0]
                self.logger.debug("Document: %s" % document)

                fileDoc = dict()
                fileDoc['asoworker'] = self.config.asoworker
                fileDoc['subresource'] = 'updateTransfers'
                fileDoc['list_of_ids'] = docId 

                if force_fail or document['transfer_retry_count'] + 1 > self.max_retry:
                    fileDoc['list_of_transfer_state'] = 'FAILED'
                    fileDoc['list_of_retry_value'] = 1
                else:
                    fileDoc['list_of_transfer_state'] = 'RETRY'
                if submission_error:
                    fileDoc['list_of_failure_reason'] = "Job could not be submitted to FTS: temporary problem of FTS"
                    fileDoc['list_of_retry_value'] = 1
                elif not self.valid_proxy:
                    fileDoc['list_of_failure_reason'] = "Job could not be submitted to FTS: user's proxy expired"
                    fileDoc['list_of_retry_value'] = 1
                else:
                    fileDoc['list_of_failure_reason'] = "Site config problem."
                    fileDoc['list_of_retry_value'] = 1

                self.logger.debug("update: %s" % fileDoc)
                try:
                    updated_lfn.append(docId)
                    result = self.oracleDB.post(self.config.oracleFileTrans,
                                         data=encodeRequest(fileDoc))
                except Exception as ex:
                    msg = "Error updating document"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    self.logger.error(msg)
                    continue

            else:
                docId = getHashLfn(temp_lfn)
                try:
                    document = self.db.document(docId)
                except Exception as ex:
                    msg = "Error loading document from couch"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    self.logger.error(msg)
                    continue
                if document['state'] != 'killed' and document['state'] != 'done' and document['state'] != 'failed':
                    now = str(datetime.datetime.now())
                    last_update = time.time()
                    # Prepare data to update the document in couch
                    if force_fail or len(document['retry_count']) + 1 > self.max_retry:
                        data['state'] = 'failed'
                    else:
                        data['state'] = 'retry'
                    if submission_error:
                        data['failure_reason'] = "Job could not be submitted to FTS: temporary problem of FTS"
                    elif not self.valid_proxy:
                        data['failure_reason'] = "Job could not be submitted to FTS: user's proxy expired"
                    else:
                        data['failure_reason'] = "Site config problem."
                    data['last_update'] = last_update
                    data['retry'] = now

                    # Update the document in couch
                    self.logger.debug("Marking failed %s" % docId)
                    try:
                        updateUri = "/" + self.db.name + "/_design/AsyncTransfer/_update/updateJobs/" + docId
                        updateUri += "?" + urllib.urlencode(data)
                        self.db.makeRequest(uri=updateUri, type="PUT", decode=False)
                        updated_lfn.append(docId)
                        self.logger.debug("Marked failed %s" % docId)
                    except Exception as ex:
                        msg = "Error in updating document in couch"
                        msg += str(ex)
                        msg += str(traceback.format_exc())
                        self.logger.error(msg)
                        continue
                    try:
                        self.db.commit()
                    except Exception as ex:
                        msg = "Error commiting documents in couch"
                        msg += str(ex)
                        msg += str(traceback.format_exc())
                        self.logger.error(msg)
                        continue
            self.logger.debug("failed file updated")
            return updated_lfn
Ejemplo n.º 21
0
    def active_users(self, db):
        """
        Query a view for users with files to transfer. Get this from the
        following view:
            publish?group=true&group_level=1
        """
        if self.config.isOracle:
            active_users = []

            fileDoc = {}
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'acquirePublication'

            self.logger.debug("Retrieving publications from oracleDB")

            results = ''
            try:
                results = db.post(self.config.oracleFileTrans,
                                  data=encodeRequest(fileDoc))
            except Exception as ex:
                self.logger.error("Failed to acquire publications \
                                  from oracleDB: %s" %ex)
                
            fileDoc = dict()
            fileDoc['asoworker'] = self.config.asoworker
            fileDoc['subresource'] = 'acquiredPublication'
            fileDoc['grouping'] = 0

            self.logger.debug("Retrieving acquired puclications from oracleDB")

            try:
                results = db.get(self.config.oracleFileTrans,
                                 data=encodeRequest(fileDoc))
                result = oracleOutputMapping(results)
            except Exception as ex:
                self.logger.error("Failed to acquire publications \
                                  from oracleDB: %s" %ex)

            self.logger.debug("%s acquired puclications retrieved" % len(result))
            #TODO: join query for publisher (same of submitter)
            unique_users = [list(i) for i in set(tuple([x['username'], x['user_group'], x['user_role']]) for x in result 
                                                 if x['transfer_state'] == 3)]
            return unique_users
        else:
            # TODO: Remove stale=ok for now until tested
            # query = {'group': True, 'group_level': 3, 'stale': 'ok'}
            query = {'group': True, 'group_level': 3}
            try:
                users = db.loadView('DBSPublisher', 'publish', query)
            except Exception as e:
                self.logger.exception('A problem occured \
                                      when contacting couchDB: %s' % e)
                return []

            if len(users['rows']) <= self.config.publication_pool_size:
                active_users = users['rows']
                active_users = [x['key'] for x in active_users]
            else:
                pool_size=self.config.publication_pool_size
                sorted_users = self.factory.loadObject(self.config.algoName,
                                                       args=[self.config,
                                                             self.logger,
                                                             users['rows'],
                                                             pool_size],
                                                       getFromCache=False,
                                                       listFlag = True)
                active_users = sorted_users()[:self.config.publication_pool_size]
            self.logger.info('%s active users' % len(active_users))
            self.logger.debug('Active users are: %s' % active_users)

            return active_users
Ejemplo n.º 22
0
    def testFileTransferPUT(self):
        """
        _testFileTransferPUT_

        Just test simple testFileTransferPUT with fake data
        """
        # We just sent fake data which is not monitored by dashboard.
        # Also only the first time to decide is publication ON or NOT
        for user in self.users:
            timestamp = time.strftime('%y%m%d_%H%M%S', time.gmtime())
            for i in range(self.totalFiles):
                now = int(time.time())
                # Generate a taskname
                workflowName = ""
                taskname = ""
                if user not in self.tasks:
                    workflowName = "".join([random.choice(string.ascii_lowercase) for _ in range(20)]) + "_" + str(now)
                    publicationState = random.choice(['NEW', 'NOT_REQUIRED'])
                else:
                    workflowName = self.tasks[user]['workflowName']
                    publicationState = self.tasks[user]['publication']
                transferState = random.choice(['NEW', 'DONE'])
                taskname = generateTaskName(user, workflowName, timestamp)
                finalLfn = self.lfnBase % (user, workflowName, i, random.randint(1, 9999))
                idHash = getHashLfn(finalLfn)
                self.fileDoc['id'] = idHash
                self.fileDoc['job_id'] = i
                self.fileDoc['username'] = user
                self.fileDoc['taskname'] = taskname
                self.fileDoc['start_time'] = int(time.time())
                self.fileDoc['source_lfn'] = finalLfn
                self.fileDoc['destination_lfn'] = finalLfn
                self.fileDoc['transfer_state'] = transferState
                self.fileDoc['publication_state'] = publicationState
                print(self.fileDoc)
                self.server.put('/crabserver/dev/fileusertransfers', data=encodeRequest(self.fileDoc))
                # if I will put the same doc twice, it should raise an error.
                # self.server.put('/crabserver/dev/fileusertransfers', data=urllib.urlencode(self.fileDoc))
                # This tasks are for the future and next calls
                if user not in self.tasks:
                    self.tasks[user] = {'workflowName': workflowName, 'taskname': taskname, 'listOfIds': [],
                                        'publication': publicationState, 'toTransfer': 0, 'toPublish': 0, 'total': self.totalFiles}
                if self.tasks[user]['publication'] == 'NEW':
                    self.tasks[user]['toPublish'] += 1
                if transferState == 'NEW':
                    self.tasks[user]['toTransfer'] += 1
                self.tasks[user]['listOfIds'].append(idHash)
        # This should raise an error
        for username in self.tasks:
            taskname = self.tasks[username]['taskname']
            for query in ['getTransferStatus', 'getPublicationStatus']:
                result = self.server.get('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': query,
                                                                                    'username': username,
                                                                                    'taskname': taskname}))
                print(result)
                print(result[0]['result'])
                taskInfoDict = oracleOutputMapping(result, 'id')
                print(taskInfoDict)
                for key, docDict in taskInfoDict.items():
                    result = self.server.get('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': 'getById', 'id': key}))

        randomUsers = random.sample(set(self.users), 3)  # Take half of the users and kill their transfers for specific task
        for username in randomUsers:
            taskname = self.tasks[username]['taskname']
            result = self.server.post('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': 'killTransfers',
                                                                                                  'username': username,
                                                                                                  'taskname': taskname}))
            print(result)
        # oneUser is left for killing a list of IDs
        # leftUsers will be killing transfers one by one for specific id.
        leftUsers = list(set(self.users) - set(randomUsers))
        oneUser = random.sample(set(leftUsers), 1)
        leftUsers = list(set(leftUsers) - set(oneUser))
        for username in leftUsers:
            # First get all left ids for this users
            result = self.server.get('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': 'getTransferStatus',
                                                                                                 'username': username,
                                                                                                 'taskname': self.tasks[username]['taskname']}))
            resultOut = oracleOutputMapping(result, None)
            print("**"*50)
            for outDict in resultOut:
                print(outDict)
                result = self.server.post('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': 'killTransfersById',
                                                                                                      'username': username,
                                                                                                      'listOfIds': outDict['id']}))
                print(result)
            print(resultOut)
            print(result)
        for username in oneUser:
            result = self.server.post('/crabserver/dev/fileusertransfers', data=encodeRequest({'subresource': 'killTransfersById',
                                                                                               'username': username,
                                                                                               'listOfIds': self.tasks[username]['listOfIds']}, ['listOfIds']))
            # As it asks to kill all which are in new, need to double check what we submitted before and if the output of killed is correct
            print(result)
            print(self.tasks[username])
Ejemplo n.º 23
0
            pu = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
            break
        except Exception, e:
            logger.debug(str(e))
            maxi = maxi + 1
            continue

if __name__ == "__main__":
    server = HTTPRequests('cmsweb-testbed.cern.ch',
                          '/data/srv/asyncstageout/state/asyncstageout/creds/OpsProxy',
                          '/data/srv/asyncstageout/state/asyncstageout/creds/OpsProxy')

    result = server.get('/crabserver/preprod/filetransfers', 
                        data=encodeRequest({'subresource': 'groupedTransferStatistics', 'grouping': 0}))

    results = oracleOutputMapping(result)


    status = {'transfers':{}, 'publications':{}}
    tmp = {'transfers':{ 'DONE':0, 'ACQUIRED':0, 'SUBMITTED':0, 'FAILED':0, 'RETRY':0 }, 
           'publications':{'DONE':0, 'ACQUIRED':0, 'NEW':0, 'FAILED':0, 'RETRY':0}}

    #past = open("tmp_transfer")
    #tmp = json.load(past)

    for doc in results:
        if doc['aso_worker']=="asodciangot1":
            if 'transfers' in tmp and TRANSFERDB_STATES[doc['transfer_state']] in tmp['transfers']:
                status['transfers'][TRANSFERDB_STATES[doc['transfer_state']]] = - tmp['transfers'][TRANSFERDB_STATES[doc['transfer_state']]] + doc['nt']
                tmp['transfers'][TRANSFERDB_STATES[doc['transfer_state']]] = doc['nt']
            else: