Esempio n. 1
0
class FTSFileOperation(FileTransferOperation, FileTransferQuery, FileDeletionOperation, FileDeletionQuery):
    def __init__(self, config):
        FileTransferOperation.__init__(self, config)
        FileTransferQuery.__init__(self, config)
        FileDeletionOperation.__init__(self, config)
        FileDeletionQuery.__init__(self, config)

        self.server_url = config.fts_server
        self.server_id = 0 # server id in the DB

        # Parameter "retry" for fts3.new_job. 0 = server default
        self.fts_retry = config.get('fts_retry', 0)

        # String passed to fts3.new_*_job(metadata = _)
        self.metadata_string = config.get('metadata_string', 'Dynamo')

        # Proxy to be forwarded to FTS
        self.x509proxy = config.get('x509proxy', None)
        self.x509proxy_orig = config.get('x509proxy', None)

        # Bookkeeping device
        self.db = MySQL(config.db_params)

        # Reuse the context object
        self.keep_context = config.get('keep_context', False)
        self._context = None

    def num_pending_transfers(self): #override
        # Check the number of files in queue
        # We first thought about counting files with /files, but FTS seems to return only 1000 maximum even when "limit" is set much larger
        #files = self._ftscallurl('/files?state_in=ACTIVE,SUBMITTED,READY&limit=%d' % self.max_pending_transfers)
        #return len(files)

        num_pending = 0
        file_states = ['SUBMITTED', 'READY', 'ACTIVE', 'STAGING', 'STARTED']

        jobs = self._ftscall('list_jobs', state_in = ['SUBMITTED', 'ACTIVE', 'STAGING'])
        from random import shuffle
        shuffle(jobs)

        total_count = 0
        for job in jobs:
            total_count = total_count + 1
            #LOG.info("List_files call 1")
            job_info = self._ftscall('get_job_status', job['job_id'], list_files = True)
            for file_info in job_info['files']:
                if file_info['file_state'] in file_states:
                    num_pending += 1
                    if num_pending == self.max_pending_transfers + 1:
                        # don't need to query more
                        return num_pending

        return num_pending

    def num_pending_deletions(self): #override
        # See above
        #files = self._ftscallurl('/files?state_in=ACTIVE,SUBMITTED,READY&limit=%d' % self.max_pending_deletions)
        #return len(files)

        num_pending = 0
        file_states = ['SUBMITTED', 'READY', 'ACTIVE']

        jobs = self._ftscall('list_jobs', state_in = ['SUBMITTED', 'ACTIVE'])
        from random import shuffle
        shuffle(jobs)

        total_count = 0
        for job in jobs:
            total_count = total_count + 1
            #LOG.info("List_files call 2")
            job_info = self._ftscall('get_job_status', job['job_id'], list_files = True)
            for file_info in job_info['dm']:
                if file_info['file_state'] in file_states:
                    num_pending += 1
                    if num_pending == self.max_pending_deletions + 1:
                        # don't need to query more
                        return num_pending

        return num_pending

    def form_batches(self, tasks): #override
        if len(tasks) == 0:
            return []

        # FTS3 has no restriction on how to group the transfers, but cannot apparently take thousands
        # of tasks at once
        batches = [[]]
        for task in tasks:
            batches[-1].append(task)
            if len(batches[-1]) == self.batch_size:
                batches.append([])

        return batches

    def start_transfers(self, batch_id, batch_tasks): #override
        result = {}

        stage_files = []
        transfers = []

        s_pfn_to_task = {}
        t_pfn_to_task = {}

        for task in batch_tasks:
            sub = task.subscription
            lfn = sub.file.lfn
            dest_pfn = sub.destination.to_pfn(lfn, 'gfal2')
            source_pfn = task.source.to_pfn(lfn, 'gfal2')

            self.x509proxy = sub.destination.x509proxy

            if task.source.storage_type == Site.TYPE_MSS:
                self.x509proxy = task.source.x509proxy

            if dest_pfn is None or source_pfn is None:
                # either gfal2 is not supported or lfn could not be mapped
                LOG.warning('Could not obtain PFN for %s at %s or %s', lfn, sub.destination.name, task.source.name)
                result[task] = False
                continue

            if self.checksum_algorithm:
                checksum = '%s:%s' % (self.checksum_algorithm, str(sub.file.checksum[self.checksum_index]))
                verify_checksum = 'target'
            else:
                checksum = None
                verify_checksum = False

            if task.source.storage_type == Site.TYPE_MSS:
                LOG.debug('Staging %s at %s', lfn, task.source.name)

                # need to stage first
                stage_files.append((source_pfn, dest_pfn, checksum, sub.file.size))

                # task identified by the source PFN
                s_pfn_to_task[source_pfn] = task
            else:
                LOG.info("Here we are")
                LOG.info('Submitting transfer of %s from %s to %s to FTS', lfn, task.source.name, sub.destination.name)

                transfers.append(fts3.new_transfer(source_pfn, dest_pfn, checksum = checksum, filesize = sub.file.size))

                # there should be only one task per destination pfn
                t_pfn_to_task[dest_pfn] = task

        if len(stage_files) != 0:
            LOG.debug('Submit new staging job for %d files', len(stage_files))
            job = fts3.new_staging_job([ff[0] for ff in stage_files], bring_online = 36000, metadata = self.metadata_string)
            success = self._submit_job(job, 'staging', batch_id, dict((pfn, task.id) for pfn, task in s_pfn_to_task.iteritems()), x509=task.source.x509proxy)

            for source_pfn, _, _, _ in stage_files:
                result[s_pfn_to_task[source_pfn]] = success

            if success and not self._read_only:
                LOG.debug('Recording staging queue')
                fields = ('id', 'source', 'destination', 'checksum', 'size')
                mapping = lambda ff: (s_pfn_to_task[ff[0]].id,) + ff
                if not self._read_only:
                    self.db.insert_many('fts_staging_queue', fields, mapping, stage_files)

        if len(transfers) != 0:
            LOG.debug('Submit new transfer job for %d files', len(transfers))
            LOG.info("Submitting transfer job from disk to site %s with proxy %s." % (sub.destination.name, sub.destination.x509proxy))

            job = fts3.new_job(transfers, retry = self.fts_retry, overwrite = True, 
                               verify_checksum = verify_checksum, metadata = self.metadata_string)
            success = self._submit_job(job, 'transfer', batch_id, dict((pfn, task.id) for pfn, task in t_pfn_to_task.iteritems()), x509=sub.destination.x509proxy)

            for transfer in transfers:
                dest_pfn = transfer['destinations'][0]
                result[t_pfn_to_task[dest_pfn]] = success

        return result

    def start_deletions(self, batch_id, batch_tasks): #override
        result = {}

        pfn_to_task = {}

        for task in batch_tasks:
            desub = task.desubscription
            lfn = desub.file.lfn
            pfn = desub.site.to_pfn(lfn, 'gfal2')

            if pfn is None:
                # either gfal2 is not supported or lfn could not be mapped
                result[task] = False
                continue

            # there should be only one task per destination pfn
            pfn_to_task[pfn] = task

        job = fts3.new_delete_job(pfn_to_task.keys(), metadata = self.metadata_string)

        success = self._submit_job(job, 'deletion', batch_id, dict((pfn, task.id) for pfn, task in pfn_to_task.iteritems()))

        for task in pfn_to_task.itervalues():
            result[task] = success

        return result

    def cancel_transfers(self, task_ids): #override
        return self._cancel(task_ids, 'transfer')

    def cancel_deletions(self, task_ids): #override
        return self._cancel(task_ids, 'deletion')

    def cleanup(self): #override
        sql = 'DELETE FROM f USING `fts_transfer_tasks` AS f'
        sql += ' LEFT JOIN `transfer_tasks` AS t ON t.`id` = f.`id`'
        sql += ' LEFT JOIN `fts_transfer_batches` AS b ON b.`id` = f.`fts_batch_id`'
        sql += ' WHERE t.`id` IS NULL OR b.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `fts_staging_queue` AS f'
        sql += ' LEFT JOIN `fts_transfer_tasks` AS t ON t.`id` = f.`id`'
        sql += ' WHERE t.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `fts_deletion_tasks` AS f'
        sql += ' LEFT JOIN `deletion_tasks` AS t ON t.`id` = f.`id`'
        sql += ' LEFT JOIN `fts_deletion_batches` AS b ON b.`id` = f.`fts_batch_id`'
        sql += ' WHERE t.`id` IS NULL OR b.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `fts_transfer_batches` AS f'
        sql += ' LEFT JOIN `transfer_batches` AS t ON t.`id` = f.`batch_id`'
        sql += ' WHERE t.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `fts_deletion_batches` AS f'
        sql += ' LEFT JOIN `deletion_batches` AS t ON t.`id` = f.`batch_id`'
        sql += ' WHERE t.`id` IS NULL'
        self.db.query(sql)

        # Delete the source tasks - caution: wipes out all tasks when switching the operation backend
        sql = 'DELETE FROM t USING `transfer_tasks` AS t'
        sql += ' LEFT JOIN `fts_transfer_tasks` AS f ON f.`id` = t.`id`'
        sql += ' WHERE f.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM t USING `deletion_tasks` AS t'
        sql += ' LEFT JOIN `fts_deletion_tasks` AS f ON f.`id` = t.`id`'
        sql += ' WHERE f.`id` IS NULL'
        self.db.query(sql)

    def get_transfer_status(self, batch_id): #override
        if self.server_id == 0:
            self._set_server_id()

        results = self._get_status(batch_id, 'transfer')

        staged_tasks = []

        for task_id, status, exitcode, msg, start_time, finish_time in self._get_status(batch_id, 'staging'):
            if status == FileQuery.STAT_DONE:
                staged_tasks.append(task_id)
                results.append((task_id, FileQuery.STAT_QUEUED, -1, None, None, None))
            else:
                # these tasks won't appear in results from _get_status('transfer')
                # because no transfer jobs have been submitted yet
                results.append((task_id, status, exitcode, None, start_time, finish_time))

        if len(staged_tasks) != 0:
            if self.checksum_algorithm:
                verify_checksum = 'target'
            else:
                verify_checksum = None

            which_dest = ""
            transfers = []
            pfn_to_tid = {}

            for task_id, source_pfn, dest_pfn, checksum, filesize in self.db.select_many('fts_staging_queue', ('id', 'source', 'destination', 'checksum', 'size'), 'id', staged_tasks):
                for s in inventory.sites.itervalues():
                    if s.backend in dest_pfn and s.backend != "":
                        which_dest = s
                #transfers = []
                #pfn_to_tid = {}
                transfers.append(fts3.new_transfer(source_pfn, dest_pfn, checksum = checksum, filesize = filesize))
                pfn_to_tid[dest_pfn] = task_id

            job = fts3.new_job(transfers, retry = self.fts_retry, overwrite = True, verify_checksum = verify_checksum, metadata = self.metadata_string)
                
                #which_dest = ""
                #for s in inventory.sites.itervalues():
                #    if s.backend in dest_pfn and s.backend != "":
                #        which_dest = s
            LOG.info("Submitting job from buffer to site %s with proxy %s." % (which_dest.name, which_dest.x509proxy))
            #LOG.info("dest_pfn: %s" % dest_pfn) 
            success = self._submit_job(job, 'transfer', batch_id, pfn_to_tid, x509=which_dest.x509proxy)

            if success and not self._read_only:
                self.db.delete_many('fts_staging_queue', 'id', pfn_to_tid.values())

        return results

    def get_deletion_status(self, batch_id): #override
        if self.server_id == 0:
            self._set_server_id()

        return self._get_status(batch_id, 'deletion')

    def write_transfer_history(self, history_db, task_id, history_id): #override
        self._write_history(history_db, task_id, history_id, 'transfer')

    def write_deletion_history(self, history_db, task_id, history_id): #override
        self._write_history(history_db, task_id, history_id, 'deletion')

    def forget_transfer_status(self, task_id): #override
        return self._forget_status(task_id, 'transfer')

    def forget_deletion_status(self, task_id): #override
        return self._forget_status(task_id, 'deletion')

    def forget_transfer_batch(self, task_id): #override
        return self._forget_batch(task_id, 'transfer')

    def forget_deletion_batch(self, task_id): #override
        return self._forget_batch(task_id, 'deletion')

    def _ftscall(self, method, *args, **kwd):
        return self._do_ftscall(binding = (method, args, kwd))

    def _ftscallurl(self, url):
        # Call to FTS URLs that don't have python bindings
        return self._do_ftscall(url = url)

    def _do_ftscall(self, binding = None, url = None):
        
        proxy = self.x509proxy

        if binding is not None:
            method, args, kwd = binding
            for k in kwd.iteritems():
                LOG.info(k)
                if k[0] == "ucert":
                    LOG.info("Setting proxy to %s" % k[1])
                    proxy = k[1]

        if self._context is None:
            # request_class = Request -> use "requests"-based https call (instead of default PyCURL,
            # which may not be able to handle proxy certificates depending on the cURL installation)
            # verify = False -> do not verify the server certificate
            LOG.info("Setting context with proxy %s" % proxy)
            context = fts3.Context(self.server_url, ucert = proxy, ukey = proxy,
                                   request_class = Request, verify = False)

            if self.keep_context:
                self._context = context
        else:
            context = self._context

        if binding is not None:
            reqstring = binding[0]
        else:
            reqstring = url

        LOG.debug('FTS: %s', reqstring)

        wait_time = 1.

        for attempt in xrange(10):
            try:
                if binding is not None:
                    method, args, kwd = binding

                    if "ucert" in kwd:
                        del kwd["ucert"]
                    if "ukey" in kwd:
                        del kwd["ukey"]

                    return getattr(fts3, method)(context, *args, **kwd)
                else:
                    return json.loads(context.get(url))
            except fts_exceptions.ServerError as exc:
                if str(exc.reason) == '500':
                    # Internal server error - let's try again
                    pass
            except fts_exceptions.TryAgain:
                pass

            time.sleep(wait_time)
            wait_time *= 1.5

        LOG.error('Failed to communicate with FTS server: %s', reqstring)
        raise RuntimeError('Failed to communicate with FTS server: %s' % reqstring)

    def _submit_job(self, job, optype, batch_id, pfn_to_tid, x509=None):
        if self._read_only:
            job_id = 'test'
        else:
            try:
                if x509 is not None:
                    job_id = self._ftscall('submit', job, ucert=x509, ukey=x509)
                else:
                    job_id = self._ftscall('submit', job)
            except:
                exc_type, exc, tb = sys.exc_info()
                LOG.error('Failed to submit %s to FTS: Exception %s (%s)', optype, exc_type.__name__, str(exc))
                return False

        # list of file-level operations (one-to-one with pfn)
        try:
            if optype == 'transfer' or optype == 'staging':
                key = 'files'
            else:
                key = 'dm'

            #LOG.info("List_files call 3")
            fts_files = self._ftscall('get_job_status', job_id = job_id, list_files = True)[key]
        except:
            exc_type, exc, tb = sys.exc_info()
            LOG.error('Failed to get status of job %s from FTS: Exception %s (%s)', job_id, exc_type.__name__, str(exc))
            return False

        if self.server_id == 0:
            self._set_server_id()

        if optype == 'transfer' or optype == 'staging':
            table_name = 'fts_transfer_batches'
            columns = ('batch_id', 'task_type', 'fts_server_id', 'job_id')
            values = (batch_id, optype, self.server_id, job_id)
        else:
            table_name = 'fts_deletion_batches'
            columns = ('batch_id', 'fts_server_id', 'job_id')
            values = (batch_id, self.server_id, job_id)

        if not self._read_only:
            fts_batch_id = self.db.insert_get_id(table_name, columns = columns, values = values)

        if optype == 'transfer' or optype == 'staging':
            table_name = 'fts_transfer_tasks'
            pfn_key = 'dest_surl'
        else:
            table_name = 'fts_deletion_tasks'
            pfn_key = 'source_surl'

        fields = ('id', 'fts_batch_id', 'fts_file_id')
        mapping = lambda f: (pfn_to_tid[f[pfn_key]], fts_batch_id, f['file_id'])

        if not self._read_only:
            self.db.insert_many(table_name, fields, mapping, fts_files, do_update = True, update_columns = ('fts_batch_id', 'fts_file_id'))

        return True

    def _cancel(self, task_ids, optype):
        sql = 'SELECT b.`job_id`, f.`fts_file_id` FROM `fts_{op}_tasks` AS f'
        sql += ' INNER JOIN `fts_{op}_batches` AS b ON b.`id` = f.`fts_batch_id`'
        result = self.db.execute_many(sql.format(op = optype), MySQL.bare('f.`id`'), task_ids)

        by_job = collections.defaultdict(list)

        for job_id, file_id in result:
            by_job[job_id].append(file_id)

        if not self._read_only:
            for job_id, ids in by_job.iteritems():
                try:
                    self._ftscall('cancel', job_id, file_ids = ids)
                except:
                    LOG.error('Failed to cancel FTS job %s', job_id)
    
    def _get_status(self, batch_id, optype):
        if optype == 'transfer' or optype == 'staging':
            sql = 'SELECT `id`, `job_id` FROM `fts_transfer_batches`'
            sql += ' WHERE `task_type` = %s AND `fts_server_id` = %s AND `batch_id` = %s'
            batch_data = self.db.query(sql, optype, self.server_id, batch_id)
            task_table_name = 'fts_transfer_tasks'
        else:
            sql = 'SELECT `id`, `job_id` FROM `fts_deletion_batches`'
            sql += ' WHERE `fts_server_id` = %s AND `batch_id` = %s'
            batch_data = self.db.query(sql, self.server_id, batch_id)
            task_table_name = 'fts_deletion_tasks'

        message_pattern = re.compile('(?:DESTINATION|SOURCE|TRANSFER|DELETION) \[([0-9]+)\] (.*)')

        results = []

        for fts_batch_id, job_id in batch_data:
            LOG.debug('Checking status of FTS %s batch %s', optype, job_id)

            sql = 'SELECT `fts_file_id`, `id` FROM `{table}` WHERE `fts_batch_id` = %s'.format(table = task_table_name)
            fts_to_task = dict(self.db.xquery(sql, fts_batch_id))

            try:
                #LOG.info("List_files call 4")
                result = self._ftscall('get_job_status', job_id = job_id, list_files = True)
            except:
                LOG.error('Failed to get job status for FTS job %s', job_id)
                LOG.error(optype)
                continue
    
            if optype == 'transfer' or optype == 'staging':
                fts_files = result['files']
            else:
                fts_files = result['dm']

            for fts_file in fts_files:
                try:
                    task_id = fts_to_task[fts_file['file_id']]
                except KeyError:
                    continue
    
                state = fts_file['file_state']
                exitcode = -1
                start_time = None
                finish_time = None
                get_time = False

                try:
                    message = fts_file['reason']
                except KeyError:
                    message = None

                if message is not None:
                    # Check if reason follows a known format (from which we can get the exit code)
                    matches = message_pattern.match(message)
                    if matches is not None:
                        exitcode = int(matches.group(1))
                        message = matches.group(2)
                    # Additionally, if the message is a known one, convert the exit code
                    c = find_msg_code(message)
                    if c is not None:
                        exitcode = c

                    # HDFS site with gridftp-hdfs gives a I/O error (500) when the file is not there
                    if optype == 'deletion' and 'Input/output error' in message:
                        exitcode = errno.ENOENT

                if state == 'FINISHED':
                    status = FileQuery.STAT_DONE
                    exitcode = 0
                    get_time = True

                elif state == 'FAILED':
                    status = FileQuery.STAT_FAILED
                    get_time = True

                elif state == 'CANCELED':
                    status = FileQuery.STAT_CANCELLED
                    get_time = True

                elif state == 'SUBMITTED':
                    status = FileQuery.STAT_NEW

                else:
                    status = FileQuery.STAT_QUEUED

                if optype == 'transfer' and exitcode == errno.EEXIST:
                    # Transfer + destination exists -> not an error
                    status = FileQuery.STAT_DONE
                    exitcode = 0
                elif optype == 'deletion' and exitcode == errno.ENOENT:
                    # Deletion + destination does not exist -> not an error
                    status = FileQuery.STAT_DONE
                    exitcode = 0
                    
                if get_time:
                    try:
                        start_time = calendar.timegm(time.strptime(fts_file['start_time'], '%Y-%m-%dT%H:%M:%S'))
                    except TypeError: # start time is NULL (can happen when the job is cancelled)
                        start_time = None
                    try:
                        finish_time = calendar.timegm(time.strptime(fts_file['finish_time'], '%Y-%m-%dT%H:%M:%S'))
                    except TypeError:
                        start_time = None

                LOG.debug('%s %d: %s, %d, %s, %s, %s', optype, task_id, FileQuery.status_name(status), exitcode, message, start_time, finish_time)
    
                results.append((task_id, status, exitcode, message, start_time, finish_time))

        return results

    def _write_history(self, history_db, task_id, history_id, optype):
        if not self._read_only:
            history_db.db.insert_update('fts_servers', ('url',), self.server_url)

        try:
            server_id = history_db.db.query('SELECT `id` FROM `fts_servers` WHERE `url` = %s', self.server_url)[0]
        except IndexError:
            server_id = 0

        sql = 'SELECT b.`job_id`, t.`fts_file_id` FROM `fts_{op}_tasks` AS t'
        sql += ' INNER JOIN `fts_{op}_batches` AS b ON b.`id` = t.`fts_batch_id`'
        sql += ' WHERE t.`id` = %s'

        try:
            fts_job_id, fts_file_id = self.db.query(sql.format(op = optype), task_id)[0]
        except IndexError:
            return

        if not self._read_only:
            history_db.db.insert_update('fts_batches', ('fts_server_id', 'job_id'), server_id, fts_job_id)
            batch_id = history_db.db.query('SELECT `id` FROM `fts_batches` WHERE `fts_server_id` = %s AND `job_id` = %s', server_id, fts_job_id)[0]

            history_db.db.insert_update('fts_file_{op}s'.format(op = optype), ('id', 'fts_batch_id', 'fts_file_id'), history_id, batch_id, fts_file_id)

    def _forget_status(self, task_id, optype):
        if self._read_only:
            return

        sql = 'DELETE FROM `fts_{optype}_tasks` WHERE `id` = %s'.format(optype = optype)
        self.db.query(sql, task_id)

    def _forget_batch(self, batch_id, optype):
        if self._read_only:
            return

        sql = 'DELETE FROM `fts_{optype}_batches` WHERE `batch_id` = %s'.format(optype = optype)
        self.db.query(sql, batch_id)

    def _set_server_id(self):
        if not self._read_only:
            self.db.query('INSERT INTO `fts_servers` (`url`) VALUES (%s) ON DUPLICATE KEY UPDATE `url`=VALUES(`url`)', self.server_url)

        result = self.db.query('SELECT `id` FROM `fts_servers` WHERE `url` = %s', self.server_url)
        if len(result) == 0:
            self.server_id = 0
        else:
            self.server_id = result[0]
Esempio n. 2
0
class MySQLAppManager(AppManager):
    def __init__(self, config):
        AppManager.__init__(self, config)

        if not hasattr(self, '_mysql'):
            db_params = Configuration(config.db_params)
            db_params.reuse_connection = True  # we use locks

            self._mysql = MySQL(db_params)

        # make sure applications row with id 0 exists
        count = self._mysql.query(
            'SELECT COUNT(*) FROM `applications` WHERE `id` = 0')[0]

        if count == 0:
            # Cannot insert with id = 0 (will be interpreted as next auto_increment id unless server-wide setting is changed)
            # Inesrt with an implicit id first and update later
            columns = ('auth_level', 'title', 'path', 'status', 'user_id',
                       'user_host')
            values = (AppManager.LV_WRITE, 'wsgi', '', 'done', 0, '')
            insert_id = self._mysql.insert_get_id('applications',
                                                  columns=columns,
                                                  values=values)

            self._mysql.query(
                'UPDATE `applications` SET `id` = 0 WHERE `id` = %s',
                insert_id)

    def get_applications(self,
                         older_than=0,
                         status=None,
                         app_id=None,
                         path=None):  #override
        sql = 'SELECT `applications`.`id`, 0+`applications`.`auth_level`, `applications`.`title`, `applications`.`path`, `applications`.`args`,'
        sql += ' `applications`.`timeout`, 0+`applications`.`status`, `applications`.`server`, `applications`.`exit_code`, `users`.`name`, `applications`.`user_host`'
        sql += ' FROM `applications` INNER JOIN `users` ON `users`.`id` = `applications`.`user_id`'

        constraints = []
        args = []
        if older_than > 0:
            constraints.append(
                'UNIX_TIMESTAMP(`applications`.`timestamp`) < %s')
            args.append(older_than)
        if status is not None:
            constraints.append('`applications`.`status` = %s')
            args.append(status)
        if app_id is not None:
            constraints.append('`applications`.`id` = %s')
            args.append(app_id)
        if path is not None:
            constraints.append('`applications`.`path` = %s')
            args.append(path)

        if len(constraints) != 0:
            sql += ' WHERE ' + ' AND '.join(constraints)

        args = tuple(args)

        applications = []
        for aid, auth_level, title, path, args, timeout, status, server, exit_code, uname, uhost in self._mysql.xquery(
                sql, *args):
            applications.append({
                'appid': aid,
                'auth_level': auth_level,
                'user_name': uname,
                'user_host': uhost,
                'title': title,
                'path': path,
                'args': args,
                'timeout': timeout,
                'status': int(status),
                'server': server,
                'exit_code': exit_code
            })

        return applications

    def get_writing_process_id(self):  #override
        result = self._mysql.query(
            'SELECT `id` FROM `applications` WHERE `auth_level` = \'write\' AND `status` IN (\'assigned\', \'run\')'
        )
        if len(result) == 0:
            return None
        else:
            return result[0]

    def get_writing_process_host(self):  #override
        result = self._mysql.query(
            'SELECT `server` FROM `applications` WHERE `auth_level` = \'write\' AND `status` IN (\'assigned\', \'run\')'
        )
        if len(result) == 0:
            return None
        else:
            return result[0]

    def get_web_write_process_id(self):  #override
        # user_id is repurposed for web server suprocess PID
        return self._mysql.query(
            'SELECT `user_id` FROM `applications` WHERE `id` = 0')[0]

    def get_running_processes(self):  #override
        sql = 'SELECT `title`, 0+`auth_level`, `server`, UNIX_TIMESTAMP(`timestamp`) FROM `applications` WHERE `status` = \'run\''

        result = []
        for title, auth_level, server, timestamp in self._mysql.xquery(sql):
            result.append((title, auth_level, server, timestamp))

        return result

    def schedule_application(self, title, path, args, user_id, host,
                             auth_level, timeout):  #override
        columns = ('auth_level', 'title', 'path', 'args', 'timeout', 'user_id',
                   'user_host')
        values = (auth_level, title, path, args, timeout, user_id, host)
        return self._mysql.insert_get_id('applications',
                                         columns=columns,
                                         values=values)

    def _do_get_next_application(self, read_only, blocked_apps):  #override
        sql = 'SELECT `applications`.`id`, 0+`auth_level`, `title`, `path`, `args`, `timeout`, `users`.`name`, `user_host` FROM `applications`'
        sql += ' INNER JOIN `users` ON `users`.`id` = `applications`.`user_id`'
        sql += ' WHERE `status` = \'new\''
        if read_only:
            sql += ' AND `auth_level` != \'write\''
        if len(blocked_apps) != 0:
            sql += ' AND `title` NOT IN %s' % MySQL.stringify_sequence(
                blocked_apps)
        sql += ' ORDER BY `applications`.`id` LIMIT 1'

        result = self._mysql.query(sql)

        if len(result) == 0:
            return None
        else:
            appid, auth_level, title, path, args, timeout, uname, uhost = result[
                0]
            return {
                'appid': appid,
                'auth_level': auth_level,
                'user_name': uname,
                'user_host': uhost,
                'title': title,
                'path': path,
                'args': args,
                'timeout': timeout
            }

    def update_application(self, app_id, **kwd):  #override
        sql = 'UPDATE `applications` SET '

        args = []
        updates = []

        if 'status' in kwd:
            updates.append('`status` = %s')
            args.append(AppManager.status_name(kwd['status']))

        if 'hostname' in kwd:
            updates.append('`server` = %s')
            args.append(kwd['hostname'])

        if 'exit_code' in kwd:
            updates.append('`exit_code` = %s')
            args.append(kwd['exit_code'])

        if 'path' in kwd:
            updates.append('`path` = %s')
            args.append(kwd['path'])

        sql += ', '.join(updates)

        sql += ' WHERE `id` = %s'
        args.append(app_id)

        self._mysql.query(sql, *tuple(args))

    def delete_application(self, app_id):  #override
        self._mysql.query('DELETE FROM `applications` WHERE `id` = %s', app_id)

    def start_write_web(self, host, pid):  #override
        # repurposing user_id for pid
        sql = 'UPDATE `applications` SET `status` = \'run\', `server` = %s, `user_host` = %s, `user_id` = %s, `timestamp` = NOW() WHERE `id` = 0'
        self._mysql.query(sql, host, host, pid)

    def stop_write_web(self):  #override
        # We don't actually use the host name because there is only one slot for web write anyway
        sql = 'UPDATE `applications` SET `status` = \'done\', `server` = \'\', `user_host` = \'\', `user_id` = 0 WHERE `id` = 0'
        self._mysql.query(sql)

    def check_application_auth(self, title, user, checksum):  #override
        result = self._mysql.query(
            'SELECT `id` FROM `users` WHERE `name` = %s', user)
        if len(result) == 0:
            return False

        user_id = result[0]

        sql = 'SELECT `user_id` FROM `authorized_applications` WHERE `title` = %s AND `checksum` = UNHEX(%s)'
        for auth_user_id in self._mysql.query(sql, title, checksum):
            if auth_user_id == 0 or auth_user_id == user_id:
                return True

        return False

    def list_authorized_applications(self,
                                     titles=None,
                                     users=None,
                                     checksums=None):  #override
        sql = 'SELECT a.`title`, u.`name`, HEX(a.`checksum`) FROM `authorized_applications` AS a'
        sql += ' LEFT JOIN `users` AS u ON u.`id` = a.`user_id`'

        constraints = []
        args = []
        if type(titles) is list:
            constraints.append('a.`title` IN (%s)' %
                               ','.join(['%s'] * len(titles)))
            args.extend(titles)

        if type(users) is list:
            constraints.append('u.`name` IN (%s)' %
                               ','.join(['%s'] * len(users)))
            args.extend(users)

        if type(checksums) is list:
            constraints.append('a.`checksum` IN (%s)' %
                               ','.join(['UNHEX(%s)'] * len(checksums)))
            args.extend(checksums)

        if len(constraints) != 0:
            sql += ' WHERE ' + ' AND '.join(constraints)

        return self._mysql.query(sql, *tuple(args))

    def authorize_application(self, title, checksum, user=None):  #override
        sql = 'INSERT INTO `authorized_applications` (`user_id`, `title`, `checksum`)'
        if user is None:
            sql += ' VALUES (0, %s, UNHEX(%s))'
            args = (title, checksum)
        else:
            sql += ' SELECT u.`id`, %s, UNHEX(%s) FROM `users` AS u WHERE u.`name` = %s'
            args = (title, checksum, user)

        inserted = self._mysql.query(sql, *args)
        return inserted != 0

    def revoke_application_authorization(self, title, user=None):  #override
        sql = 'DELETE FROM `authorized_applications` WHERE (`user_id`, `title`) ='
        if user is None:
            sql += ' (0, %s)'
            args = (title, )
        else:
            sql += ' (SELECT u.`id`, %s FROM `users` AS u WHERE u.`name` = %s)'
            args = (title, user)

        deleted = self._mysql.query(sql, *args)
        return deleted != 0

    def register_sequence(self, name, user, restart=False):  #override
        sql = 'INSERT INTO `application_sequences` (`name`, `user_id`, `restart`) SELECT %s, `id`, %s FROM `users` WHERE `name` = %s'
        inserted = self._mysql.query(sql, name, 1 if restart else 0, user)
        return inserted != 0

    def find_sequence(self, name):  #override
        sql = 'SELECT u.`name`, s.`restart`, s.`status` FROM `application_sequences` AS s'
        sql += ' INNER JOIN `users` AS u ON u.`id` = s.`user_id`'
        sql += ' WHERE s.`name` = %s'

        try:
            user, restart, status = self._mysql.query(sql, name)[0]
        except IndexError:
            return None

        return (name, user, (restart != 0), status == 'enabled')

    def update_sequence(self, name, restart=None, enabled=None):  #override
        if restart is None and enabled is None:
            return True

        changes = []
        args = []

        if restart is not None:
            changes.append('`restart` = %s')
            args.append(1 if restart else 0)
        if enabled is not None:
            changes.append('`status` = %s')
            args.append('enabled' if enabled else 'disabled')

        args.append(name)

        sql = 'UPDATE `application_sequences` SET ' + ', '.join(
            changes) + ' WHERE `name` = %s'

        updated = self._mysql.query(sql, *tuple(args))
        return updated != 0

    def delete_sequence(self, name):  #override
        sql = 'DELETE FROM `application_sequences` WHERE `name` = %s'
        deleted = self._mysql.query(sql, name)
        return deleted != 0

    def get_sequences(self, enabled_only=True):  #override
        sql = 'SELECT `name` FROM `application_sequences`'
        if enabled_only:
            sql += ' WHERE `status` = \'enabled\''

        return self._mysql.query(sql)

    def create_appmanager(self):  #override
        if self.readonly_config is None:
            db_params = self._mysql.config()
        else:
            db_params = self.readonly_config.db_params

        config = Configuration(db_params=db_params)
        return MySQLAppManager(config)
Esempio n. 3
0
class FTSFileOperation(FileTransferOperation, FileTransferQuery,
                       FileDeletionOperation, FileDeletionQuery):
    def __init__(self, config):
        FileTransferOperation.__init__(self, config)
        FileTransferQuery.__init__(self, config)
        FileDeletionOperation.__init__(self, config)
        FileDeletionQuery.__init__(self, config)

        self.server_url = config.fts_server
        self.server_id = 0  # server id in the DB

        # Parameter "retry" for fts3.new_job. 0 = server default
        self.fts_retry = config.get('fts_retry', 0)

        # String passed to fts3.new_*_job(metadata = _)
        self.metadata_string = config.get('metadata_string', 'Dynamo')

        # Proxy to be forwarded to FTS
        self.x509proxy = config.get('x509proxy', None)

        # Bookkeeping device
        self.db = MySQL(config.db_params)

        # Reuse the context object
        self.keep_context = config.get('keep_context', True)
        self._context = None

    def num_pending_transfers(self):  #override
        # Check the number of files in queue
        # We first thought about counting files with /files, but FTS seems to return only 1000 maximum even when "limit" is set much larger
        #files = self._ftscallurl('/files?state_in=ACTIVE,SUBMITTED,READY&limit=%d' % self.max_pending_transfers)
        #return len(files)

        num_pending = 0
        file_states = ['SUBMITTED', 'READY', 'ACTIVE', 'STAGING', 'STARTED']

        jobs = self._ftscall('list_jobs',
                             state_in=['SUBMITTED', 'ACTIVE', 'STAGING'])
        for job in jobs:
            job_info = self._ftscall('get_job_status',
                                     job['job_id'],
                                     list_files=True)
            for file_info in job_info['files']:
                if file_info['file_state'] in file_states:
                    num_pending += 1
                    if num_pending == self.max_pending_transfers + 1:
                        # don't need to query more
                        return num_pending

        return num_pending

    def num_pending_deletions(self):  #override
        # See above
        #files = self._ftscallurl('/files?state_in=ACTIVE,SUBMITTED,READY&limit=%d' % self.max_pending_deletions)
        #return len(files)

        num_pending = 0
        file_states = ['SUBMITTED', 'READY', 'ACTIVE']

        jobs = self._ftscall('list_jobs', state_in=['SUBMITTED', 'ACTIVE'])
        for job in jobs:
            job_info = self._ftscall('get_job_status',
                                     job['job_id'],
                                     list_files=True)
            for file_info in job_info['dm']:
                if file_info['file_state'] in file_states:
                    num_pending += 1
                    if num_pending == self.max_pending_deletions + 1:
                        # don't need to query more
                        return num_pending

        return num_pending

    def form_batches(self, tasks):  #override
        if len(tasks) == 0:
            return []

        # FTS3 has no restriction on how to group the transfers, but cannot apparently take thousands
        # of tasks at once
        batches = [[]]
        for task in tasks:
            batches[-1].append(task)
            if len(batches[-1]) == self.batch_size:
                batches.append([])

        return batches

    def start_transfers(self, batch_id, batch_tasks):  #override
        result = {}

        stage_files = []
        transfers = []

        s_pfn_to_task = {}
        t_pfn_to_task = {}

        for task in batch_tasks:
            sub = task.subscription
            lfn = sub.file.lfn
            dest_pfn = sub.destination.to_pfn(lfn, 'gfal2')
            source_pfn = task.source.to_pfn(lfn, 'gfal2')

            if dest_pfn is None or source_pfn is None:
                # either gfal2 is not supported or lfn could not be mapped
                LOG.warning('Could not obtain PFN for %s at %s or %s', lfn,
                            sub.destination.name, task.source.name)
                result[task] = False
                continue

            if self.checksum_algorithm:
                checksum = '%s:%s' % (
                    self.checksum_algorithm,
                    str(sub.file.checksum[self.checksum_index]))
                verify_checksum = 'target'
            else:
                checksum = None
                verify_checksum = False

            if task.source.storage_type == Site.TYPE_MSS:
                LOG.debug('Staging %s at %s', lfn, task.source.name)

                # need to stage first
                stage_files.append(
                    (source_pfn, dest_pfn, checksum, sub.file.size))

                # task identified by the source PFN
                s_pfn_to_task[source_pfn] = task
            else:
                LOG.debug('Submitting transfer of %s from %s to %s to FTS',
                          lfn, task.source.name, sub.destination.name)

                transfers.append(
                    fts3.new_transfer(source_pfn,
                                      dest_pfn,
                                      checksum=checksum,
                                      filesize=sub.file.size))

                # there should be only one task per destination pfn
                t_pfn_to_task[dest_pfn] = task

        if len(stage_files) != 0:
            LOG.debug('Submit new staging job for %d files', len(stage_files))
            job = fts3.new_staging_job([ff[0] for ff in stage_files],
                                       bring_online=36000,
                                       metadata=self.metadata_string)
            success = self._submit_job(
                job, 'staging', batch_id,
                dict(
                    (pfn, task.id) for pfn, task in s_pfn_to_task.iteritems()))

            for source_pfn, _, _, _ in stage_files:
                result[s_pfn_to_task[source_pfn]] = success

            if success and not self._read_only:
                LOG.debug('Recording staging queue')
                fields = ('id', 'source', 'destination', 'checksum', 'size')
                mapping = lambda ff: (s_pfn_to_task[ff[0]].id, ) + ff
                if not self._read_only:
                    self.db.insert_many('fts_staging_queue', fields, mapping,
                                        stage_files)

        if len(transfers) != 0:
            LOG.debug('Submit new transfer job for %d files', len(transfers))
            job = fts3.new_job(transfers,
                               retry=self.fts_retry,
                               overwrite=True,
                               verify_checksum=verify_checksum,
                               metadata=self.metadata_string)
            success = self._submit_job(
                job, 'transfer', batch_id,
                dict(
                    (pfn, task.id) for pfn, task in t_pfn_to_task.iteritems()))

            for transfer in transfers:
                dest_pfn = transfer['destinations'][0]
                result[t_pfn_to_task[dest_pfn]] = success

        return result

    def start_deletions(self, batch_id, batch_tasks):  #override
        result = {}

        pfn_to_task = {}

        for task in batch_tasks:
            desub = task.desubscription
            lfn = desub.file.lfn
            pfn = desub.site.to_pfn(lfn, 'gfal2')

            if pfn is None:
                # either gfal2 is not supported or lfn could not be mapped
                result[task] = False
                continue

            # there should be only one task per destination pfn
            pfn_to_task[pfn] = task

        job = fts3.new_delete_job(pfn_to_task.keys(),
                                  metadata=self.metadata_string)

        success = self._submit_job(
            job, 'deletion', batch_id,
            dict((pfn, task.id) for pfn, task in pfn_to_task.iteritems()))

        for task in pfn_to_task.itervalues():
            result[task] = success

        return result

    def cancel_transfers(self, task_ids):  #override
        return self._cancel(task_ids, 'transfer')

    def cancel_deletions(self, task_ids):  #override
        return self._cancel(task_ids, 'deletion')

    def cleanup(self):  #override
        sql = 'DELETE FROM f USING `fts_transfer_tasks` AS f'
        sql += ' LEFT JOIN `transfer_tasks` AS t ON t.`id` = f.`id`'
        sql += ' LEFT JOIN `fts_transfer_batches` AS b ON b.`id` = f.`fts_batch_id`'
        sql += ' WHERE t.`id` IS NULL OR b.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `fts_staging_queue` AS f'
        sql += ' LEFT JOIN `fts_transfer_tasks` AS t ON t.`id` = f.`id`'
        sql += ' WHERE t.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `fts_deletion_tasks` AS f'
        sql += ' LEFT JOIN `deletion_tasks` AS t ON t.`id` = f.`id`'
        sql += ' LEFT JOIN `fts_deletion_batches` AS b ON b.`id` = f.`fts_batch_id`'
        sql += ' WHERE t.`id` IS NULL OR b.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `fts_transfer_batches` AS f'
        sql += ' LEFT JOIN `transfer_batches` AS t ON t.`id` = f.`batch_id`'
        sql += ' WHERE t.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `fts_deletion_batches` AS f'
        sql += ' LEFT JOIN `deletion_batches` AS t ON t.`id` = f.`batch_id`'
        sql += ' WHERE t.`id` IS NULL'
        self.db.query(sql)

        # Delete the source tasks - caution: wipes out all tasks when switching the operation backend
        sql = 'DELETE FROM t USING `transfer_tasks` AS t'
        sql += ' LEFT JOIN `fts_transfer_tasks` AS f ON f.`id` = t.`id`'
        sql += ' WHERE f.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM t USING `deletion_tasks` AS t'
        sql += ' LEFT JOIN `fts_deletion_tasks` AS f ON f.`id` = t.`id`'
        sql += ' WHERE f.`id` IS NULL'
        self.db.query(sql)

    def get_transfer_status(self, batch_id):  #override
        if self.server_id == 0:
            self._set_server_id()

        results = self._get_status(batch_id, 'transfer')

        staged_tasks = []

        for task_id, status, exitcode, msg, start_time, finish_time in self._get_status(
                batch_id, 'staging'):
            if status == FileQuery.STAT_DONE:
                staged_tasks.append(task_id)
                results.append(
                    (task_id, FileQuery.STAT_QUEUED, -1, None, None, None))
            else:
                # these tasks won't appear in results from _get_status('transfer')
                # because no transfer jobs have been submitted yet
                results.append(
                    (task_id, status, exitcode, None, start_time, finish_time))

        if len(staged_tasks) != 0:
            transfers = []
            pfn_to_tid = {}
            for task_id, source_pfn, dest_pfn, checksum, filesize in self.db.select_many(
                    'fts_staging_queue',
                ('id', 'source', 'destination', 'checksum', 'size'), 'id',
                    staged_tasks):
                transfers.append(
                    fts3.new_transfer(source_pfn,
                                      dest_pfn,
                                      checksum=checksum,
                                      filesize=filesize))
                pfn_to_tid[dest_pfn] = task_id

            if self.checksum_algorithm:
                verify_checksum = 'target'
            else:
                verify_checksum = None

            job = fts3.new_job(transfers,
                               retry=self.fts_retry,
                               overwrite=True,
                               verify_checksum=verify_checksum,
                               metadata=self.metadata_string)
            success = self._submit_job(job, 'transfer', batch_id, pfn_to_tid)
            if success and not self._read_only:
                self.db.delete_many('fts_staging_queue', 'id',
                                    pfn_to_tid.values())

        return results

    def get_deletion_status(self, batch_id):  #override
        if self.server_id == 0:
            self._set_server_id()

        return self._get_status(batch_id, 'deletion')

    def write_transfer_history(self, history_db, task_id,
                               history_id):  #override
        self._write_history(history_db, task_id, history_id, 'transfer')

    def write_deletion_history(self, history_db, task_id,
                               history_id):  #override
        self._write_history(history_db, task_id, history_id, 'deletion')

    def forget_transfer_status(self, task_id):  #override
        return self._forget_status(task_id, 'transfer')

    def forget_deletion_status(self, task_id):  #override
        return self._forget_status(task_id, 'deletion')

    def forget_transfer_batch(self, task_id):  #override
        return self._forget_batch(task_id, 'transfer')

    def forget_deletion_batch(self, task_id):  #override
        return self._forget_batch(task_id, 'deletion')

    def _ftscall(self, method, *args, **kwd):
        return self._do_ftscall(binding=(method, args, kwd))

    def _ftscallurl(self, url):
        # Call to FTS URLs that don't have python bindings
        return self._do_ftscall(url=url)

    def _do_ftscall(self, binding=None, url=None):
        if self._context is None:
            # request_class = Request -> use "requests"-based https call (instead of default PyCURL,
            # which may not be able to handle proxy certificates depending on the cURL installation)
            # verify = False -> do not verify the server certificate
            context = fts3.Context(self.server_url,
                                   ucert=self.x509proxy,
                                   ukey=self.x509proxy,
                                   request_class=Request,
                                   verify=False)

            if self.keep_context:
                self._context = context
        else:
            context = self._context

        if binding is not None:
            reqstring = binding[0]
        else:
            reqstring = url

        LOG.debug('FTS: %s', reqstring)

        wait_time = 1.
        for attempt in xrange(10):
            try:
                if binding is not None:
                    method, args, kwd = binding
                    return getattr(fts3, method)(context, *args, **kwd)
                else:
                    return json.loads(context.get(url))
            except fts_exceptions.ServerError as exc:
                if str(exc.reason) == '500':
                    # Internal server error - let's try again
                    pass
            except fts_exceptions.TryAgain:
                pass

            time.sleep(wait_time)
            wait_time *= 1.5

        LOG.error('Failed to communicate with FTS server: %s', reqstring)
        raise RuntimeError('Failed to communicate with FTS server: %s' %
                           reqstring)

    def _submit_job(self, job, optype, batch_id, pfn_to_tid):
        if self._read_only:
            job_id = 'test'
        else:
            try:
                job_id = self._ftscall('submit', job)
            except:
                exc_type, exc, tb = sys.exc_info()
                LOG.error('Failed to submit %s to FTS: Exception %s (%s)',
                          optype, exc_type.__name__, str(exc))
                return False

        LOG.debug('FTS job id: %s', job_id)

        # list of file-level operations (one-to-one with pfn)
        try:
            if optype == 'transfer' or optype == 'staging':
                key = 'files'
            else:
                key = 'dm'

            fts_files = self._ftscall('get_job_status',
                                      job_id=job_id,
                                      list_files=True)[key]
        except:
            exc_type, exc, tb = sys.exc_info()
            LOG.error(
                'Failed to get status of job %s from FTS: Exception %s (%s)',
                job_id, exc_type.__name__, str(exc))
            return False

        if self.server_id == 0:
            self._set_server_id()

        if optype == 'transfer' or optype == 'staging':
            table_name = 'fts_transfer_batches'
            columns = ('batch_id', 'task_type', 'fts_server_id', 'job_id')
            values = (batch_id, optype, self.server_id, job_id)
        else:
            table_name = 'fts_deletion_batches'
            columns = ('batch_id', 'fts_server_id', 'job_id')
            values = (batch_id, self.server_id, job_id)

        if not self._read_only:
            fts_batch_id = self.db.insert_get_id(table_name,
                                                 columns=columns,
                                                 values=values)

        if optype == 'transfer' or optype == 'staging':
            table_name = 'fts_transfer_tasks'
            pfn_key = 'dest_surl'
        else:
            table_name = 'fts_deletion_tasks'
            pfn_key = 'source_surl'

        fields = ('id', 'fts_batch_id', 'fts_file_id')
        mapping = lambda f: (pfn_to_tid[f[pfn_key]], fts_batch_id, f['file_id']
                             )

        if not self._read_only:
            self.db.insert_many(table_name,
                                fields,
                                mapping,
                                fts_files,
                                do_update=True,
                                update_columns=('fts_batch_id', 'fts_file_id'))

        return True

    def _cancel(self, task_ids, optype):
        sql = 'SELECT b.`job_id`, f.`fts_file_id` FROM `fts_{op}_tasks` AS f'
        sql += ' INNER JOIN `fts_{op}_batches` AS b ON b.`id` = f.`fts_batch_id`'
        result = self.db.execute_many(sql.format(op=optype),
                                      MySQL.bare('f.`id`'), task_ids)

        by_job = collections.defaultdict(list)

        for job_id, file_id in result:
            by_job[job_id].append(file_id)

        if not self._read_only:
            for job_id, ids in by_job.iteritems():
                try:
                    self._ftscall('cancel', job_id, file_ids=ids)
                except:
                    LOG.error('Failed to cancel FTS job %s', job_id)

    def _get_status(self, batch_id, optype):
        if optype == 'transfer' or optype == 'staging':
            sql = 'SELECT `id`, `job_id` FROM `fts_transfer_batches`'
            sql += ' WHERE `task_type` = %s AND `fts_server_id` = %s AND `batch_id` = %s'
            batch_data = self.db.query(sql, optype, self.server_id, batch_id)
            task_table_name = 'fts_transfer_tasks'
        else:
            sql = 'SELECT `id`, `job_id` FROM `fts_deletion_batches`'
            sql += ' WHERE `fts_server_id` = %s AND `batch_id` = %s'
            batch_data = self.db.query(sql, self.server_id, batch_id)
            task_table_name = 'fts_deletion_tasks'

        message_pattern = re.compile(
            '(?:DESTINATION|SOURCE|TRANSFER|DELETION) \[([0-9]+)\] (.*)')

        results = []

        for fts_batch_id, job_id in batch_data:
            LOG.debug('Checking status of FTS %s batch %s', optype, job_id)

            sql = 'SELECT `fts_file_id`, `id` FROM `{table}` WHERE `fts_batch_id` = %s'.format(
                table=task_table_name)
            fts_to_task = dict(self.db.xquery(sql, fts_batch_id))

            try:
                result = self._ftscall('get_job_status',
                                       job_id=job_id,
                                       list_files=True)
            except:
                LOG.error('Failed to get job status for FTS job %s', job_id)
                continue

            if optype == 'transfer' or optype == 'staging':
                fts_files = result['files']
            else:
                fts_files = result['dm']

            for fts_file in fts_files:
                try:
                    task_id = fts_to_task[fts_file['file_id']]
                except KeyError:
                    continue

                state = fts_file['file_state']
                exitcode = -1
                start_time = None
                finish_time = None
                get_time = False

                try:
                    message = fts_file['reason']
                except KeyError:
                    message = None

                if message is not None:
                    # Check if reason follows a known format (from which we can get the exit code)
                    matches = message_pattern.match(message)
                    if matches is not None:
                        exitcode = int(matches.group(1))
                        message = matches.group(2)
                    # Additionally, if the message is a known one, convert the exit code
                    c = find_msg_code(message)
                    if c is not None:
                        exitcode = c

                    # HDFS site with gridftp-hdfs gives a I/O error (500) when the file is not there
                    if optype == 'deletion' and 'Input/output error' in message:
                        exitcode = errno.ENOENT

                if state == 'FINISHED':
                    status = FileQuery.STAT_DONE
                    exitcode = 0
                    get_time = True

                elif state == 'FAILED':
                    status = FileQuery.STAT_FAILED
                    get_time = True

                elif state == 'CANCELED':
                    status = FileQuery.STAT_CANCELLED
                    get_time = True

                elif state == 'SUBMITTED':
                    status = FileQuery.STAT_NEW

                else:
                    status = FileQuery.STAT_QUEUED

                if optype == 'transfer' and exitcode == errno.EEXIST:
                    # Transfer + destination exists -> not an error
                    status = FileQuery.STAT_DONE
                    exitcode = 0
                elif optype == 'deletion' and exitcode == errno.ENOENT:
                    # Deletion + destination does not exist -> not an error
                    status = FileQuery.STAT_DONE
                    exitcode = 0

                if get_time:
                    try:
                        start_time = calendar.timegm(
                            time.strptime(fts_file['start_time'],
                                          '%Y-%m-%dT%H:%M:%S'))
                    except TypeError:  # start time is NULL (can happen when the job is cancelled)
                        start_time = None
                    try:
                        finish_time = calendar.timegm(
                            time.strptime(fts_file['finish_time'],
                                          '%Y-%m-%dT%H:%M:%S'))
                    except TypeError:
                        start_time = None

                LOG.debug('%s %d: %s, %d, %s, %s, %s', optype, task_id,
                          FileQuery.status_name(status), exitcode, message,
                          start_time, finish_time)

                results.append((task_id, status, exitcode, message, start_time,
                                finish_time))

        return results

    def _write_history(self, history_db, task_id, history_id, optype):
        if not self._read_only:
            history_db.db.insert_update('fts_servers', ('url', ),
                                        self.server_url)

        try:
            server_id = history_db.db.query(
                'SELECT `id` FROM `fts_servers` WHERE `url` = %s',
                self.server_url)[0]
        except IndexError:
            server_id = 0

        sql = 'SELECT b.`job_id`, t.`fts_file_id` FROM `fts_{op}_tasks` AS t'
        sql += ' INNER JOIN `fts_{op}_batches` AS b ON b.`id` = t.`fts_batch_id`'
        sql += ' WHERE t.`id` = %s'

        try:
            fts_job_id, fts_file_id = self.db.query(sql.format(op=optype),
                                                    task_id)[0]
        except IndexError:
            return

        if not self._read_only:
            history_db.db.insert_update('fts_batches',
                                        ('fts_server_id', 'job_id'), server_id,
                                        fts_job_id)
            batch_id = history_db.db.query(
                'SELECT `id` FROM `fts_batches` WHERE `fts_server_id` = %s AND `job_id` = %s',
                server_id, fts_job_id)[0]

            history_db.db.insert_update('fts_file_{op}s'.format(op=optype),
                                        ('id', 'fts_batch_id', 'fts_file_id'),
                                        history_id, batch_id, fts_file_id)

    def _forget_status(self, task_id, optype):
        if self._read_only:
            return

        sql = 'DELETE FROM `fts_{optype}_tasks` WHERE `id` = %s'.format(
            optype=optype)
        self.db.query(sql, task_id)

    def _forget_batch(self, batch_id, optype):
        if self._read_only:
            return

        sql = 'DELETE FROM `fts_{optype}_batches` WHERE `batch_id` = %s'.format(
            optype=optype)
        self.db.query(sql, batch_id)

    def _set_server_id(self):
        if not self._read_only:
            self.db.query(
                'INSERT INTO `fts_servers` (`url`) VALUES (%s) ON DUPLICATE KEY UPDATE `url`=VALUES(`url`)',
                self.server_url)

        result = self.db.query(
            'SELECT `id` FROM `fts_servers` WHERE `url` = %s', self.server_url)
        if len(result) == 0:
            self.server_id = 0
        else:
            self.server_id = result[0]