예제 #1
0
class MySQLUpdateBoard(UpdateBoard):
    def __init__(self, config):
        UpdateBoard.__init__(self, config)

        db_params = Configuration(config.db_params)
        db_params.reuse_connection = True # we use locks

        self._mysql = MySQL(db_params)

    def lock(self): #override
        self._mysql.lock_tables(write = ['inventory_updates'])

    def unlock(self): #override
        self._mysql.unlock_tables()

    def get_updates(self): #override
        for cmd, obj in self._mysql.xquery('SELECT `cmd`, `obj` FROM `inventory_updates` ORDER BY `id`'):
            if cmd == 'update':
                yield DynamoInventory.CMD_UPDATE, obj
            elif cmd == 'delete':
                yield DynamoInventory.CMD_DELETE, obj

    def flush(self): #override
        self._mysql.query('DELETE FROM `inventory_updates`')
        self._mysql.query('ALTER TABLE `inventory_updates` AUTO_INCREMENT = 1')

    def write_updates(self, update_commands): #override
        self._mysql.lock_tables(write = ['inventory_updates'])

        try:
            sql = 'INSERT INTO `inventory_updates` (`cmd`, `obj`) VALUES (%s, %s)'

            for cmd, sobj in update_commands:
                if cmd == DynamoInventory.CMD_UPDATE:
                    self._mysql.query(sql, 'update', sobj)
                elif cmd == DynamoInventory.CMD_DELETE:
                    self._mysql.query(sql, 'delete', sobj)

        finally:
            self._mysql.unlock_tables()

    def disconnect(self):
        self._mysql.close()
예제 #2
0
class MySQLAppManager(AppManager):
    def __init__(self, config):
        AppManager.__init__(self, config)

        if not hasattr(self, '_mysql'):
            db_params = Configuration(config.db_params)
            db_params.reuse_connection = True  # we use locks

            self._mysql = MySQL(db_params)

        # make sure applications row with id 0 exists
        count = self._mysql.query(
            'SELECT COUNT(*) FROM `applications` WHERE `id` = 0')[0]

        if count == 0:
            # Cannot insert with id = 0 (will be interpreted as next auto_increment id unless server-wide setting is changed)
            # Inesrt with an implicit id first and update later
            columns = ('auth_level', 'title', 'path', 'status', 'user_id',
                       'user_host')
            values = (AppManager.LV_WRITE, 'wsgi', '', 'done', 0, '')
            insert_id = self._mysql.insert_get_id('applications',
                                                  columns=columns,
                                                  values=values)

            self._mysql.query(
                'UPDATE `applications` SET `id` = 0 WHERE `id` = %s',
                insert_id)

    def get_applications(self,
                         older_than=0,
                         status=None,
                         app_id=None,
                         path=None):  #override
        sql = 'SELECT `applications`.`id`, 0+`applications`.`auth_level`, `applications`.`title`, `applications`.`path`, `applications`.`args`,'
        sql += ' `applications`.`timeout`, 0+`applications`.`status`, `applications`.`server`, `applications`.`exit_code`, `users`.`name`, `applications`.`user_host`'
        sql += ' FROM `applications` INNER JOIN `users` ON `users`.`id` = `applications`.`user_id`'

        constraints = []
        args = []
        if older_than > 0:
            constraints.append(
                'UNIX_TIMESTAMP(`applications`.`timestamp`) < %s')
            args.append(older_than)
        if status is not None:
            constraints.append('`applications`.`status` = %s')
            args.append(status)
        if app_id is not None:
            constraints.append('`applications`.`id` = %s')
            args.append(app_id)
        if path is not None:
            constraints.append('`applications`.`path` = %s')
            args.append(path)

        if len(constraints) != 0:
            sql += ' WHERE ' + ' AND '.join(constraints)

        args = tuple(args)

        applications = []
        for aid, auth_level, title, path, args, timeout, status, server, exit_code, uname, uhost in self._mysql.xquery(
                sql, *args):
            applications.append({
                'appid': aid,
                'auth_level': auth_level,
                'user_name': uname,
                'user_host': uhost,
                'title': title,
                'path': path,
                'args': args,
                'timeout': timeout,
                'status': int(status),
                'server': server,
                'exit_code': exit_code
            })

        return applications

    def get_writing_process_id(self):  #override
        result = self._mysql.query(
            'SELECT `id` FROM `applications` WHERE `auth_level` = \'write\' AND `status` IN (\'assigned\', \'run\')'
        )
        if len(result) == 0:
            return None
        else:
            return result[0]

    def get_writing_process_host(self):  #override
        result = self._mysql.query(
            'SELECT `server` FROM `applications` WHERE `auth_level` = \'write\' AND `status` IN (\'assigned\', \'run\')'
        )
        if len(result) == 0:
            return None
        else:
            return result[0]

    def get_web_write_process_id(self):  #override
        # user_id is repurposed for web server suprocess PID
        return self._mysql.query(
            'SELECT `user_id` FROM `applications` WHERE `id` = 0')[0]

    def get_running_processes(self):  #override
        sql = 'SELECT `title`, 0+`auth_level`, `server`, UNIX_TIMESTAMP(`timestamp`) FROM `applications` WHERE `status` = \'run\''

        result = []
        for title, auth_level, server, timestamp in self._mysql.xquery(sql):
            result.append((title, auth_level, server, timestamp))

        return result

    def schedule_application(self, title, path, args, user_id, host,
                             auth_level, timeout):  #override
        columns = ('auth_level', 'title', 'path', 'args', 'timeout', 'user_id',
                   'user_host')
        values = (auth_level, title, path, args, timeout, user_id, host)
        return self._mysql.insert_get_id('applications',
                                         columns=columns,
                                         values=values)

    def _do_get_next_application(self, read_only, blocked_apps):  #override
        sql = 'SELECT `applications`.`id`, 0+`auth_level`, `title`, `path`, `args`, `timeout`, `users`.`name`, `user_host` FROM `applications`'
        sql += ' INNER JOIN `users` ON `users`.`id` = `applications`.`user_id`'
        sql += ' WHERE `status` = \'new\''
        if read_only:
            sql += ' AND `auth_level` != \'write\''
        if len(blocked_apps) != 0:
            sql += ' AND `title` NOT IN %s' % MySQL.stringify_sequence(
                blocked_apps)
        sql += ' ORDER BY `applications`.`id` LIMIT 1'

        result = self._mysql.query(sql)

        if len(result) == 0:
            return None
        else:
            appid, auth_level, title, path, args, timeout, uname, uhost = result[
                0]
            return {
                'appid': appid,
                'auth_level': auth_level,
                'user_name': uname,
                'user_host': uhost,
                'title': title,
                'path': path,
                'args': args,
                'timeout': timeout
            }

    def update_application(self, app_id, **kwd):  #override
        sql = 'UPDATE `applications` SET '

        args = []
        updates = []

        if 'status' in kwd:
            updates.append('`status` = %s')
            args.append(AppManager.status_name(kwd['status']))

        if 'hostname' in kwd:
            updates.append('`server` = %s')
            args.append(kwd['hostname'])

        if 'exit_code' in kwd:
            updates.append('`exit_code` = %s')
            args.append(kwd['exit_code'])

        if 'path' in kwd:
            updates.append('`path` = %s')
            args.append(kwd['path'])

        sql += ', '.join(updates)

        sql += ' WHERE `id` = %s'
        args.append(app_id)

        self._mysql.query(sql, *tuple(args))

    def delete_application(self, app_id):  #override
        self._mysql.query('DELETE FROM `applications` WHERE `id` = %s', app_id)

    def start_write_web(self, host, pid):  #override
        # repurposing user_id for pid
        sql = 'UPDATE `applications` SET `status` = \'run\', `server` = %s, `user_host` = %s, `user_id` = %s, `timestamp` = NOW() WHERE `id` = 0'
        self._mysql.query(sql, host, host, pid)

    def stop_write_web(self):  #override
        # We don't actually use the host name because there is only one slot for web write anyway
        sql = 'UPDATE `applications` SET `status` = \'done\', `server` = \'\', `user_host` = \'\', `user_id` = 0 WHERE `id` = 0'
        self._mysql.query(sql)

    def check_application_auth(self, title, user, checksum):  #override
        result = self._mysql.query(
            'SELECT `id` FROM `users` WHERE `name` = %s', user)
        if len(result) == 0:
            return False

        user_id = result[0]

        sql = 'SELECT `user_id` FROM `authorized_applications` WHERE `title` = %s AND `checksum` = UNHEX(%s)'
        for auth_user_id in self._mysql.query(sql, title, checksum):
            if auth_user_id == 0 or auth_user_id == user_id:
                return True

        return False

    def list_authorized_applications(self,
                                     titles=None,
                                     users=None,
                                     checksums=None):  #override
        sql = 'SELECT a.`title`, u.`name`, HEX(a.`checksum`) FROM `authorized_applications` AS a'
        sql += ' LEFT JOIN `users` AS u ON u.`id` = a.`user_id`'

        constraints = []
        args = []
        if type(titles) is list:
            constraints.append('a.`title` IN (%s)' %
                               ','.join(['%s'] * len(titles)))
            args.extend(titles)

        if type(users) is list:
            constraints.append('u.`name` IN (%s)' %
                               ','.join(['%s'] * len(users)))
            args.extend(users)

        if type(checksums) is list:
            constraints.append('a.`checksum` IN (%s)' %
                               ','.join(['UNHEX(%s)'] * len(checksums)))
            args.extend(checksums)

        if len(constraints) != 0:
            sql += ' WHERE ' + ' AND '.join(constraints)

        return self._mysql.query(sql, *tuple(args))

    def authorize_application(self, title, checksum, user=None):  #override
        sql = 'INSERT INTO `authorized_applications` (`user_id`, `title`, `checksum`)'
        if user is None:
            sql += ' VALUES (0, %s, UNHEX(%s))'
            args = (title, checksum)
        else:
            sql += ' SELECT u.`id`, %s, UNHEX(%s) FROM `users` AS u WHERE u.`name` = %s'
            args = (title, checksum, user)

        inserted = self._mysql.query(sql, *args)
        return inserted != 0

    def revoke_application_authorization(self, title, user=None):  #override
        sql = 'DELETE FROM `authorized_applications` WHERE (`user_id`, `title`) ='
        if user is None:
            sql += ' (0, %s)'
            args = (title, )
        else:
            sql += ' (SELECT u.`id`, %s FROM `users` AS u WHERE u.`name` = %s)'
            args = (title, user)

        deleted = self._mysql.query(sql, *args)
        return deleted != 0

    def register_sequence(self, name, user, restart=False):  #override
        sql = 'INSERT INTO `application_sequences` (`name`, `user_id`, `restart`) SELECT %s, `id`, %s FROM `users` WHERE `name` = %s'
        inserted = self._mysql.query(sql, name, 1 if restart else 0, user)
        return inserted != 0

    def find_sequence(self, name):  #override
        sql = 'SELECT u.`name`, s.`restart`, s.`status` FROM `application_sequences` AS s'
        sql += ' INNER JOIN `users` AS u ON u.`id` = s.`user_id`'
        sql += ' WHERE s.`name` = %s'

        try:
            user, restart, status = self._mysql.query(sql, name)[0]
        except IndexError:
            return None

        return (name, user, (restart != 0), status == 'enabled')

    def update_sequence(self, name, restart=None, enabled=None):  #override
        if restart is None and enabled is None:
            return True

        changes = []
        args = []

        if restart is not None:
            changes.append('`restart` = %s')
            args.append(1 if restart else 0)
        if enabled is not None:
            changes.append('`status` = %s')
            args.append('enabled' if enabled else 'disabled')

        args.append(name)

        sql = 'UPDATE `application_sequences` SET ' + ', '.join(
            changes) + ' WHERE `name` = %s'

        updated = self._mysql.query(sql, *tuple(args))
        return updated != 0

    def delete_sequence(self, name):  #override
        sql = 'DELETE FROM `application_sequences` WHERE `name` = %s'
        deleted = self._mysql.query(sql, name)
        return deleted != 0

    def get_sequences(self, enabled_only=True):  #override
        sql = 'SELECT `name` FROM `application_sequences`'
        if enabled_only:
            sql += ' WHERE `status` = \'enabled\''

        return self._mysql.query(sql)

    def create_appmanager(self):  #override
        if self.readonly_config is None:
            db_params = self._mysql.config()
        else:
            db_params = self.readonly_config.db_params

        config = Configuration(db_params=db_params)
        return MySQLAppManager(config)
예제 #3
0
class FTSFileOperation(FileTransferOperation, FileTransferQuery, FileDeletionOperation, FileDeletionQuery):
    def __init__(self, config):
        FileTransferOperation.__init__(self, config)
        FileTransferQuery.__init__(self, config)
        FileDeletionOperation.__init__(self, config)
        FileDeletionQuery.__init__(self, config)

        self.server_url = config.fts_server
        self.server_id = 0 # server id in the DB

        # Parameter "retry" for fts3.new_job. 0 = server default
        self.fts_retry = config.get('fts_retry', 0)

        # String passed to fts3.new_*_job(metadata = _)
        self.metadata_string = config.get('metadata_string', 'Dynamo')

        # Proxy to be forwarded to FTS
        self.x509proxy = config.get('x509proxy', None)
        self.x509proxy_orig = config.get('x509proxy', None)

        # Bookkeeping device
        self.db = MySQL(config.db_params)

        # Reuse the context object
        self.keep_context = config.get('keep_context', False)
        self._context = None

    def num_pending_transfers(self): #override
        # Check the number of files in queue
        # We first thought about counting files with /files, but FTS seems to return only 1000 maximum even when "limit" is set much larger
        #files = self._ftscallurl('/files?state_in=ACTIVE,SUBMITTED,READY&limit=%d' % self.max_pending_transfers)
        #return len(files)

        num_pending = 0
        file_states = ['SUBMITTED', 'READY', 'ACTIVE', 'STAGING', 'STARTED']

        jobs = self._ftscall('list_jobs', state_in = ['SUBMITTED', 'ACTIVE', 'STAGING'])
        from random import shuffle
        shuffle(jobs)

        total_count = 0
        for job in jobs:
            total_count = total_count + 1
            #LOG.info("List_files call 1")
            job_info = self._ftscall('get_job_status', job['job_id'], list_files = True)
            for file_info in job_info['files']:
                if file_info['file_state'] in file_states:
                    num_pending += 1
                    if num_pending == self.max_pending_transfers + 1:
                        # don't need to query more
                        return num_pending

        return num_pending

    def num_pending_deletions(self): #override
        # See above
        #files = self._ftscallurl('/files?state_in=ACTIVE,SUBMITTED,READY&limit=%d' % self.max_pending_deletions)
        #return len(files)

        num_pending = 0
        file_states = ['SUBMITTED', 'READY', 'ACTIVE']

        jobs = self._ftscall('list_jobs', state_in = ['SUBMITTED', 'ACTIVE'])
        from random import shuffle
        shuffle(jobs)

        total_count = 0
        for job in jobs:
            total_count = total_count + 1
            #LOG.info("List_files call 2")
            job_info = self._ftscall('get_job_status', job['job_id'], list_files = True)
            for file_info in job_info['dm']:
                if file_info['file_state'] in file_states:
                    num_pending += 1
                    if num_pending == self.max_pending_deletions + 1:
                        # don't need to query more
                        return num_pending

        return num_pending

    def form_batches(self, tasks): #override
        if len(tasks) == 0:
            return []

        # FTS3 has no restriction on how to group the transfers, but cannot apparently take thousands
        # of tasks at once
        batches = [[]]
        for task in tasks:
            batches[-1].append(task)
            if len(batches[-1]) == self.batch_size:
                batches.append([])

        return batches

    def start_transfers(self, batch_id, batch_tasks): #override
        result = {}

        stage_files = []
        transfers = []

        s_pfn_to_task = {}
        t_pfn_to_task = {}

        for task in batch_tasks:
            sub = task.subscription
            lfn = sub.file.lfn
            dest_pfn = sub.destination.to_pfn(lfn, 'gfal2')
            source_pfn = task.source.to_pfn(lfn, 'gfal2')

            self.x509proxy = sub.destination.x509proxy

            if task.source.storage_type == Site.TYPE_MSS:
                self.x509proxy = task.source.x509proxy

            if dest_pfn is None or source_pfn is None:
                # either gfal2 is not supported or lfn could not be mapped
                LOG.warning('Could not obtain PFN for %s at %s or %s', lfn, sub.destination.name, task.source.name)
                result[task] = False
                continue

            if self.checksum_algorithm:
                checksum = '%s:%s' % (self.checksum_algorithm, str(sub.file.checksum[self.checksum_index]))
                verify_checksum = 'target'
            else:
                checksum = None
                verify_checksum = False

            if task.source.storage_type == Site.TYPE_MSS:
                LOG.debug('Staging %s at %s', lfn, task.source.name)

                # need to stage first
                stage_files.append((source_pfn, dest_pfn, checksum, sub.file.size))

                # task identified by the source PFN
                s_pfn_to_task[source_pfn] = task
            else:
                LOG.info("Here we are")
                LOG.info('Submitting transfer of %s from %s to %s to FTS', lfn, task.source.name, sub.destination.name)

                transfers.append(fts3.new_transfer(source_pfn, dest_pfn, checksum = checksum, filesize = sub.file.size))

                # there should be only one task per destination pfn
                t_pfn_to_task[dest_pfn] = task

        if len(stage_files) != 0:
            LOG.debug('Submit new staging job for %d files', len(stage_files))
            job = fts3.new_staging_job([ff[0] for ff in stage_files], bring_online = 36000, metadata = self.metadata_string)
            success = self._submit_job(job, 'staging', batch_id, dict((pfn, task.id) for pfn, task in s_pfn_to_task.iteritems()), x509=task.source.x509proxy)

            for source_pfn, _, _, _ in stage_files:
                result[s_pfn_to_task[source_pfn]] = success

            if success and not self._read_only:
                LOG.debug('Recording staging queue')
                fields = ('id', 'source', 'destination', 'checksum', 'size')
                mapping = lambda ff: (s_pfn_to_task[ff[0]].id,) + ff
                if not self._read_only:
                    self.db.insert_many('fts_staging_queue', fields, mapping, stage_files)

        if len(transfers) != 0:
            LOG.debug('Submit new transfer job for %d files', len(transfers))
            LOG.info("Submitting transfer job from disk to site %s with proxy %s." % (sub.destination.name, sub.destination.x509proxy))

            job = fts3.new_job(transfers, retry = self.fts_retry, overwrite = True, 
                               verify_checksum = verify_checksum, metadata = self.metadata_string)
            success = self._submit_job(job, 'transfer', batch_id, dict((pfn, task.id) for pfn, task in t_pfn_to_task.iteritems()), x509=sub.destination.x509proxy)

            for transfer in transfers:
                dest_pfn = transfer['destinations'][0]
                result[t_pfn_to_task[dest_pfn]] = success

        return result

    def start_deletions(self, batch_id, batch_tasks): #override
        result = {}

        pfn_to_task = {}

        for task in batch_tasks:
            desub = task.desubscription
            lfn = desub.file.lfn
            pfn = desub.site.to_pfn(lfn, 'gfal2')

            if pfn is None:
                # either gfal2 is not supported or lfn could not be mapped
                result[task] = False
                continue

            # there should be only one task per destination pfn
            pfn_to_task[pfn] = task

        job = fts3.new_delete_job(pfn_to_task.keys(), metadata = self.metadata_string)

        success = self._submit_job(job, 'deletion', batch_id, dict((pfn, task.id) for pfn, task in pfn_to_task.iteritems()))

        for task in pfn_to_task.itervalues():
            result[task] = success

        return result

    def cancel_transfers(self, task_ids): #override
        return self._cancel(task_ids, 'transfer')

    def cancel_deletions(self, task_ids): #override
        return self._cancel(task_ids, 'deletion')

    def cleanup(self): #override
        sql = 'DELETE FROM f USING `fts_transfer_tasks` AS f'
        sql += ' LEFT JOIN `transfer_tasks` AS t ON t.`id` = f.`id`'
        sql += ' LEFT JOIN `fts_transfer_batches` AS b ON b.`id` = f.`fts_batch_id`'
        sql += ' WHERE t.`id` IS NULL OR b.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `fts_staging_queue` AS f'
        sql += ' LEFT JOIN `fts_transfer_tasks` AS t ON t.`id` = f.`id`'
        sql += ' WHERE t.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `fts_deletion_tasks` AS f'
        sql += ' LEFT JOIN `deletion_tasks` AS t ON t.`id` = f.`id`'
        sql += ' LEFT JOIN `fts_deletion_batches` AS b ON b.`id` = f.`fts_batch_id`'
        sql += ' WHERE t.`id` IS NULL OR b.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `fts_transfer_batches` AS f'
        sql += ' LEFT JOIN `transfer_batches` AS t ON t.`id` = f.`batch_id`'
        sql += ' WHERE t.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `fts_deletion_batches` AS f'
        sql += ' LEFT JOIN `deletion_batches` AS t ON t.`id` = f.`batch_id`'
        sql += ' WHERE t.`id` IS NULL'
        self.db.query(sql)

        # Delete the source tasks - caution: wipes out all tasks when switching the operation backend
        sql = 'DELETE FROM t USING `transfer_tasks` AS t'
        sql += ' LEFT JOIN `fts_transfer_tasks` AS f ON f.`id` = t.`id`'
        sql += ' WHERE f.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM t USING `deletion_tasks` AS t'
        sql += ' LEFT JOIN `fts_deletion_tasks` AS f ON f.`id` = t.`id`'
        sql += ' WHERE f.`id` IS NULL'
        self.db.query(sql)

    def get_transfer_status(self, batch_id): #override
        if self.server_id == 0:
            self._set_server_id()

        results = self._get_status(batch_id, 'transfer')

        staged_tasks = []

        for task_id, status, exitcode, msg, start_time, finish_time in self._get_status(batch_id, 'staging'):
            if status == FileQuery.STAT_DONE:
                staged_tasks.append(task_id)
                results.append((task_id, FileQuery.STAT_QUEUED, -1, None, None, None))
            else:
                # these tasks won't appear in results from _get_status('transfer')
                # because no transfer jobs have been submitted yet
                results.append((task_id, status, exitcode, None, start_time, finish_time))

        if len(staged_tasks) != 0:
            if self.checksum_algorithm:
                verify_checksum = 'target'
            else:
                verify_checksum = None

            which_dest = ""
            transfers = []
            pfn_to_tid = {}

            for task_id, source_pfn, dest_pfn, checksum, filesize in self.db.select_many('fts_staging_queue', ('id', 'source', 'destination', 'checksum', 'size'), 'id', staged_tasks):
                for s in inventory.sites.itervalues():
                    if s.backend in dest_pfn and s.backend != "":
                        which_dest = s
                #transfers = []
                #pfn_to_tid = {}
                transfers.append(fts3.new_transfer(source_pfn, dest_pfn, checksum = checksum, filesize = filesize))
                pfn_to_tid[dest_pfn] = task_id

            job = fts3.new_job(transfers, retry = self.fts_retry, overwrite = True, verify_checksum = verify_checksum, metadata = self.metadata_string)
                
                #which_dest = ""
                #for s in inventory.sites.itervalues():
                #    if s.backend in dest_pfn and s.backend != "":
                #        which_dest = s
            LOG.info("Submitting job from buffer to site %s with proxy %s." % (which_dest.name, which_dest.x509proxy))
            #LOG.info("dest_pfn: %s" % dest_pfn) 
            success = self._submit_job(job, 'transfer', batch_id, pfn_to_tid, x509=which_dest.x509proxy)

            if success and not self._read_only:
                self.db.delete_many('fts_staging_queue', 'id', pfn_to_tid.values())

        return results

    def get_deletion_status(self, batch_id): #override
        if self.server_id == 0:
            self._set_server_id()

        return self._get_status(batch_id, 'deletion')

    def write_transfer_history(self, history_db, task_id, history_id): #override
        self._write_history(history_db, task_id, history_id, 'transfer')

    def write_deletion_history(self, history_db, task_id, history_id): #override
        self._write_history(history_db, task_id, history_id, 'deletion')

    def forget_transfer_status(self, task_id): #override
        return self._forget_status(task_id, 'transfer')

    def forget_deletion_status(self, task_id): #override
        return self._forget_status(task_id, 'deletion')

    def forget_transfer_batch(self, task_id): #override
        return self._forget_batch(task_id, 'transfer')

    def forget_deletion_batch(self, task_id): #override
        return self._forget_batch(task_id, 'deletion')

    def _ftscall(self, method, *args, **kwd):
        return self._do_ftscall(binding = (method, args, kwd))

    def _ftscallurl(self, url):
        # Call to FTS URLs that don't have python bindings
        return self._do_ftscall(url = url)

    def _do_ftscall(self, binding = None, url = None):
        
        proxy = self.x509proxy

        if binding is not None:
            method, args, kwd = binding
            for k in kwd.iteritems():
                LOG.info(k)
                if k[0] == "ucert":
                    LOG.info("Setting proxy to %s" % k[1])
                    proxy = k[1]

        if self._context is None:
            # request_class = Request -> use "requests"-based https call (instead of default PyCURL,
            # which may not be able to handle proxy certificates depending on the cURL installation)
            # verify = False -> do not verify the server certificate
            LOG.info("Setting context with proxy %s" % proxy)
            context = fts3.Context(self.server_url, ucert = proxy, ukey = proxy,
                                   request_class = Request, verify = False)

            if self.keep_context:
                self._context = context
        else:
            context = self._context

        if binding is not None:
            reqstring = binding[0]
        else:
            reqstring = url

        LOG.debug('FTS: %s', reqstring)

        wait_time = 1.

        for attempt in xrange(10):
            try:
                if binding is not None:
                    method, args, kwd = binding

                    if "ucert" in kwd:
                        del kwd["ucert"]
                    if "ukey" in kwd:
                        del kwd["ukey"]

                    return getattr(fts3, method)(context, *args, **kwd)
                else:
                    return json.loads(context.get(url))
            except fts_exceptions.ServerError as exc:
                if str(exc.reason) == '500':
                    # Internal server error - let's try again
                    pass
            except fts_exceptions.TryAgain:
                pass

            time.sleep(wait_time)
            wait_time *= 1.5

        LOG.error('Failed to communicate with FTS server: %s', reqstring)
        raise RuntimeError('Failed to communicate with FTS server: %s' % reqstring)

    def _submit_job(self, job, optype, batch_id, pfn_to_tid, x509=None):
        if self._read_only:
            job_id = 'test'
        else:
            try:
                if x509 is not None:
                    job_id = self._ftscall('submit', job, ucert=x509, ukey=x509)
                else:
                    job_id = self._ftscall('submit', job)
            except:
                exc_type, exc, tb = sys.exc_info()
                LOG.error('Failed to submit %s to FTS: Exception %s (%s)', optype, exc_type.__name__, str(exc))
                return False

        # list of file-level operations (one-to-one with pfn)
        try:
            if optype == 'transfer' or optype == 'staging':
                key = 'files'
            else:
                key = 'dm'

            #LOG.info("List_files call 3")
            fts_files = self._ftscall('get_job_status', job_id = job_id, list_files = True)[key]
        except:
            exc_type, exc, tb = sys.exc_info()
            LOG.error('Failed to get status of job %s from FTS: Exception %s (%s)', job_id, exc_type.__name__, str(exc))
            return False

        if self.server_id == 0:
            self._set_server_id()

        if optype == 'transfer' or optype == 'staging':
            table_name = 'fts_transfer_batches'
            columns = ('batch_id', 'task_type', 'fts_server_id', 'job_id')
            values = (batch_id, optype, self.server_id, job_id)
        else:
            table_name = 'fts_deletion_batches'
            columns = ('batch_id', 'fts_server_id', 'job_id')
            values = (batch_id, self.server_id, job_id)

        if not self._read_only:
            fts_batch_id = self.db.insert_get_id(table_name, columns = columns, values = values)

        if optype == 'transfer' or optype == 'staging':
            table_name = 'fts_transfer_tasks'
            pfn_key = 'dest_surl'
        else:
            table_name = 'fts_deletion_tasks'
            pfn_key = 'source_surl'

        fields = ('id', 'fts_batch_id', 'fts_file_id')
        mapping = lambda f: (pfn_to_tid[f[pfn_key]], fts_batch_id, f['file_id'])

        if not self._read_only:
            self.db.insert_many(table_name, fields, mapping, fts_files, do_update = True, update_columns = ('fts_batch_id', 'fts_file_id'))

        return True

    def _cancel(self, task_ids, optype):
        sql = 'SELECT b.`job_id`, f.`fts_file_id` FROM `fts_{op}_tasks` AS f'
        sql += ' INNER JOIN `fts_{op}_batches` AS b ON b.`id` = f.`fts_batch_id`'
        result = self.db.execute_many(sql.format(op = optype), MySQL.bare('f.`id`'), task_ids)

        by_job = collections.defaultdict(list)

        for job_id, file_id in result:
            by_job[job_id].append(file_id)

        if not self._read_only:
            for job_id, ids in by_job.iteritems():
                try:
                    self._ftscall('cancel', job_id, file_ids = ids)
                except:
                    LOG.error('Failed to cancel FTS job %s', job_id)
    
    def _get_status(self, batch_id, optype):
        if optype == 'transfer' or optype == 'staging':
            sql = 'SELECT `id`, `job_id` FROM `fts_transfer_batches`'
            sql += ' WHERE `task_type` = %s AND `fts_server_id` = %s AND `batch_id` = %s'
            batch_data = self.db.query(sql, optype, self.server_id, batch_id)
            task_table_name = 'fts_transfer_tasks'
        else:
            sql = 'SELECT `id`, `job_id` FROM `fts_deletion_batches`'
            sql += ' WHERE `fts_server_id` = %s AND `batch_id` = %s'
            batch_data = self.db.query(sql, self.server_id, batch_id)
            task_table_name = 'fts_deletion_tasks'

        message_pattern = re.compile('(?:DESTINATION|SOURCE|TRANSFER|DELETION) \[([0-9]+)\] (.*)')

        results = []

        for fts_batch_id, job_id in batch_data:
            LOG.debug('Checking status of FTS %s batch %s', optype, job_id)

            sql = 'SELECT `fts_file_id`, `id` FROM `{table}` WHERE `fts_batch_id` = %s'.format(table = task_table_name)
            fts_to_task = dict(self.db.xquery(sql, fts_batch_id))

            try:
                #LOG.info("List_files call 4")
                result = self._ftscall('get_job_status', job_id = job_id, list_files = True)
            except:
                LOG.error('Failed to get job status for FTS job %s', job_id)
                LOG.error(optype)
                continue
    
            if optype == 'transfer' or optype == 'staging':
                fts_files = result['files']
            else:
                fts_files = result['dm']

            for fts_file in fts_files:
                try:
                    task_id = fts_to_task[fts_file['file_id']]
                except KeyError:
                    continue
    
                state = fts_file['file_state']
                exitcode = -1
                start_time = None
                finish_time = None
                get_time = False

                try:
                    message = fts_file['reason']
                except KeyError:
                    message = None

                if message is not None:
                    # Check if reason follows a known format (from which we can get the exit code)
                    matches = message_pattern.match(message)
                    if matches is not None:
                        exitcode = int(matches.group(1))
                        message = matches.group(2)
                    # Additionally, if the message is a known one, convert the exit code
                    c = find_msg_code(message)
                    if c is not None:
                        exitcode = c

                    # HDFS site with gridftp-hdfs gives a I/O error (500) when the file is not there
                    if optype == 'deletion' and 'Input/output error' in message:
                        exitcode = errno.ENOENT

                if state == 'FINISHED':
                    status = FileQuery.STAT_DONE
                    exitcode = 0
                    get_time = True

                elif state == 'FAILED':
                    status = FileQuery.STAT_FAILED
                    get_time = True

                elif state == 'CANCELED':
                    status = FileQuery.STAT_CANCELLED
                    get_time = True

                elif state == 'SUBMITTED':
                    status = FileQuery.STAT_NEW

                else:
                    status = FileQuery.STAT_QUEUED

                if optype == 'transfer' and exitcode == errno.EEXIST:
                    # Transfer + destination exists -> not an error
                    status = FileQuery.STAT_DONE
                    exitcode = 0
                elif optype == 'deletion' and exitcode == errno.ENOENT:
                    # Deletion + destination does not exist -> not an error
                    status = FileQuery.STAT_DONE
                    exitcode = 0
                    
                if get_time:
                    try:
                        start_time = calendar.timegm(time.strptime(fts_file['start_time'], '%Y-%m-%dT%H:%M:%S'))
                    except TypeError: # start time is NULL (can happen when the job is cancelled)
                        start_time = None
                    try:
                        finish_time = calendar.timegm(time.strptime(fts_file['finish_time'], '%Y-%m-%dT%H:%M:%S'))
                    except TypeError:
                        start_time = None

                LOG.debug('%s %d: %s, %d, %s, %s, %s', optype, task_id, FileQuery.status_name(status), exitcode, message, start_time, finish_time)
    
                results.append((task_id, status, exitcode, message, start_time, finish_time))

        return results

    def _write_history(self, history_db, task_id, history_id, optype):
        if not self._read_only:
            history_db.db.insert_update('fts_servers', ('url',), self.server_url)

        try:
            server_id = history_db.db.query('SELECT `id` FROM `fts_servers` WHERE `url` = %s', self.server_url)[0]
        except IndexError:
            server_id = 0

        sql = 'SELECT b.`job_id`, t.`fts_file_id` FROM `fts_{op}_tasks` AS t'
        sql += ' INNER JOIN `fts_{op}_batches` AS b ON b.`id` = t.`fts_batch_id`'
        sql += ' WHERE t.`id` = %s'

        try:
            fts_job_id, fts_file_id = self.db.query(sql.format(op = optype), task_id)[0]
        except IndexError:
            return

        if not self._read_only:
            history_db.db.insert_update('fts_batches', ('fts_server_id', 'job_id'), server_id, fts_job_id)
            batch_id = history_db.db.query('SELECT `id` FROM `fts_batches` WHERE `fts_server_id` = %s AND `job_id` = %s', server_id, fts_job_id)[0]

            history_db.db.insert_update('fts_file_{op}s'.format(op = optype), ('id', 'fts_batch_id', 'fts_file_id'), history_id, batch_id, fts_file_id)

    def _forget_status(self, task_id, optype):
        if self._read_only:
            return

        sql = 'DELETE FROM `fts_{optype}_tasks` WHERE `id` = %s'.format(optype = optype)
        self.db.query(sql, task_id)

    def _forget_batch(self, batch_id, optype):
        if self._read_only:
            return

        sql = 'DELETE FROM `fts_{optype}_batches` WHERE `batch_id` = %s'.format(optype = optype)
        self.db.query(sql, batch_id)

    def _set_server_id(self):
        if not self._read_only:
            self.db.query('INSERT INTO `fts_servers` (`url`) VALUES (%s) ON DUPLICATE KEY UPDATE `url`=VALUES(`url`)', self.server_url)

        result = self.db.query('SELECT `id` FROM `fts_servers` WHERE `url` = %s', self.server_url)
        if len(result) == 0:
            self.server_id = 0
        else:
            self.server_id = result[0]
예제 #4
0
파일: fts.py 프로젝트: cpausmit/dynamo
class FTSFileOperation(FileTransferOperation, FileTransferQuery,
                       FileDeletionOperation, FileDeletionQuery):
    def __init__(self, config):
        FileTransferOperation.__init__(self, config)
        FileTransferQuery.__init__(self, config)
        FileDeletionOperation.__init__(self, config)
        FileDeletionQuery.__init__(self, config)

        self.server_url = config.fts_server
        self.server_id = 0  # server id in the DB

        # Parameter "retry" for fts3.new_job. 0 = server default
        self.fts_retry = config.get('fts_retry', 0)

        # String passed to fts3.new_*_job(metadata = _)
        self.metadata_string = config.get('metadata_string', 'Dynamo')

        # Proxy to be forwarded to FTS
        self.x509proxy = config.get('x509proxy', None)

        # Bookkeeping device
        self.db = MySQL(config.db_params)

        # Reuse the context object
        self.keep_context = config.get('keep_context', True)
        self._context = None

    def num_pending_transfers(self):  #override
        # Check the number of files in queue
        # We first thought about counting files with /files, but FTS seems to return only 1000 maximum even when "limit" is set much larger
        #files = self._ftscallurl('/files?state_in=ACTIVE,SUBMITTED,READY&limit=%d' % self.max_pending_transfers)
        #return len(files)

        num_pending = 0
        file_states = ['SUBMITTED', 'READY', 'ACTIVE', 'STAGING', 'STARTED']

        jobs = self._ftscall('list_jobs',
                             state_in=['SUBMITTED', 'ACTIVE', 'STAGING'])
        for job in jobs:
            job_info = self._ftscall('get_job_status',
                                     job['job_id'],
                                     list_files=True)
            for file_info in job_info['files']:
                if file_info['file_state'] in file_states:
                    num_pending += 1
                    if num_pending == self.max_pending_transfers + 1:
                        # don't need to query more
                        return num_pending

        return num_pending

    def num_pending_deletions(self):  #override
        # See above
        #files = self._ftscallurl('/files?state_in=ACTIVE,SUBMITTED,READY&limit=%d' % self.max_pending_deletions)
        #return len(files)

        num_pending = 0
        file_states = ['SUBMITTED', 'READY', 'ACTIVE']

        jobs = self._ftscall('list_jobs', state_in=['SUBMITTED', 'ACTIVE'])
        for job in jobs:
            job_info = self._ftscall('get_job_status',
                                     job['job_id'],
                                     list_files=True)
            for file_info in job_info['dm']:
                if file_info['file_state'] in file_states:
                    num_pending += 1
                    if num_pending == self.max_pending_deletions + 1:
                        # don't need to query more
                        return num_pending

        return num_pending

    def form_batches(self, tasks):  #override
        if len(tasks) == 0:
            return []

        # FTS3 has no restriction on how to group the transfers, but cannot apparently take thousands
        # of tasks at once
        batches = [[]]
        for task in tasks:
            batches[-1].append(task)
            if len(batches[-1]) == self.batch_size:
                batches.append([])

        return batches

    def start_transfers(self, batch_id, batch_tasks):  #override
        result = {}

        stage_files = []
        transfers = []

        s_pfn_to_task = {}
        t_pfn_to_task = {}

        for task in batch_tasks:
            sub = task.subscription
            lfn = sub.file.lfn
            dest_pfn = sub.destination.to_pfn(lfn, 'gfal2')
            source_pfn = task.source.to_pfn(lfn, 'gfal2')

            if dest_pfn is None or source_pfn is None:
                # either gfal2 is not supported or lfn could not be mapped
                LOG.warning('Could not obtain PFN for %s at %s or %s', lfn,
                            sub.destination.name, task.source.name)
                result[task] = False
                continue

            if self.checksum_algorithm:
                checksum = '%s:%s' % (
                    self.checksum_algorithm,
                    str(sub.file.checksum[self.checksum_index]))
                verify_checksum = 'target'
            else:
                checksum = None
                verify_checksum = False

            if task.source.storage_type == Site.TYPE_MSS:
                LOG.debug('Staging %s at %s', lfn, task.source.name)

                # need to stage first
                stage_files.append(
                    (source_pfn, dest_pfn, checksum, sub.file.size))

                # task identified by the source PFN
                s_pfn_to_task[source_pfn] = task
            else:
                LOG.debug('Submitting transfer of %s from %s to %s to FTS',
                          lfn, task.source.name, sub.destination.name)

                transfers.append(
                    fts3.new_transfer(source_pfn,
                                      dest_pfn,
                                      checksum=checksum,
                                      filesize=sub.file.size))

                # there should be only one task per destination pfn
                t_pfn_to_task[dest_pfn] = task

        if len(stage_files) != 0:
            LOG.debug('Submit new staging job for %d files', len(stage_files))
            job = fts3.new_staging_job([ff[0] for ff in stage_files],
                                       bring_online=36000,
                                       metadata=self.metadata_string)
            success = self._submit_job(
                job, 'staging', batch_id,
                dict(
                    (pfn, task.id) for pfn, task in s_pfn_to_task.iteritems()))

            for source_pfn, _, _, _ in stage_files:
                result[s_pfn_to_task[source_pfn]] = success

            if success and not self._read_only:
                LOG.debug('Recording staging queue')
                fields = ('id', 'source', 'destination', 'checksum', 'size')
                mapping = lambda ff: (s_pfn_to_task[ff[0]].id, ) + ff
                if not self._read_only:
                    self.db.insert_many('fts_staging_queue', fields, mapping,
                                        stage_files)

        if len(transfers) != 0:
            LOG.debug('Submit new transfer job for %d files', len(transfers))
            job = fts3.new_job(transfers,
                               retry=self.fts_retry,
                               overwrite=True,
                               verify_checksum=verify_checksum,
                               metadata=self.metadata_string)
            success = self._submit_job(
                job, 'transfer', batch_id,
                dict(
                    (pfn, task.id) for pfn, task in t_pfn_to_task.iteritems()))

            for transfer in transfers:
                dest_pfn = transfer['destinations'][0]
                result[t_pfn_to_task[dest_pfn]] = success

        return result

    def start_deletions(self, batch_id, batch_tasks):  #override
        result = {}

        pfn_to_task = {}

        for task in batch_tasks:
            desub = task.desubscription
            lfn = desub.file.lfn
            pfn = desub.site.to_pfn(lfn, 'gfal2')

            if pfn is None:
                # either gfal2 is not supported or lfn could not be mapped
                result[task] = False
                continue

            # there should be only one task per destination pfn
            pfn_to_task[pfn] = task

        job = fts3.new_delete_job(pfn_to_task.keys(),
                                  metadata=self.metadata_string)

        success = self._submit_job(
            job, 'deletion', batch_id,
            dict((pfn, task.id) for pfn, task in pfn_to_task.iteritems()))

        for task in pfn_to_task.itervalues():
            result[task] = success

        return result

    def cancel_transfers(self, task_ids):  #override
        return self._cancel(task_ids, 'transfer')

    def cancel_deletions(self, task_ids):  #override
        return self._cancel(task_ids, 'deletion')

    def cleanup(self):  #override
        sql = 'DELETE FROM f USING `fts_transfer_tasks` AS f'
        sql += ' LEFT JOIN `transfer_tasks` AS t ON t.`id` = f.`id`'
        sql += ' LEFT JOIN `fts_transfer_batches` AS b ON b.`id` = f.`fts_batch_id`'
        sql += ' WHERE t.`id` IS NULL OR b.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `fts_staging_queue` AS f'
        sql += ' LEFT JOIN `fts_transfer_tasks` AS t ON t.`id` = f.`id`'
        sql += ' WHERE t.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `fts_deletion_tasks` AS f'
        sql += ' LEFT JOIN `deletion_tasks` AS t ON t.`id` = f.`id`'
        sql += ' LEFT JOIN `fts_deletion_batches` AS b ON b.`id` = f.`fts_batch_id`'
        sql += ' WHERE t.`id` IS NULL OR b.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `fts_transfer_batches` AS f'
        sql += ' LEFT JOIN `transfer_batches` AS t ON t.`id` = f.`batch_id`'
        sql += ' WHERE t.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `fts_deletion_batches` AS f'
        sql += ' LEFT JOIN `deletion_batches` AS t ON t.`id` = f.`batch_id`'
        sql += ' WHERE t.`id` IS NULL'
        self.db.query(sql)

        # Delete the source tasks - caution: wipes out all tasks when switching the operation backend
        sql = 'DELETE FROM t USING `transfer_tasks` AS t'
        sql += ' LEFT JOIN `fts_transfer_tasks` AS f ON f.`id` = t.`id`'
        sql += ' WHERE f.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM t USING `deletion_tasks` AS t'
        sql += ' LEFT JOIN `fts_deletion_tasks` AS f ON f.`id` = t.`id`'
        sql += ' WHERE f.`id` IS NULL'
        self.db.query(sql)

    def get_transfer_status(self, batch_id):  #override
        if self.server_id == 0:
            self._set_server_id()

        results = self._get_status(batch_id, 'transfer')

        staged_tasks = []

        for task_id, status, exitcode, msg, start_time, finish_time in self._get_status(
                batch_id, 'staging'):
            if status == FileQuery.STAT_DONE:
                staged_tasks.append(task_id)
                results.append(
                    (task_id, FileQuery.STAT_QUEUED, -1, None, None, None))
            else:
                # these tasks won't appear in results from _get_status('transfer')
                # because no transfer jobs have been submitted yet
                results.append(
                    (task_id, status, exitcode, None, start_time, finish_time))

        if len(staged_tasks) != 0:
            transfers = []
            pfn_to_tid = {}
            for task_id, source_pfn, dest_pfn, checksum, filesize in self.db.select_many(
                    'fts_staging_queue',
                ('id', 'source', 'destination', 'checksum', 'size'), 'id',
                    staged_tasks):
                transfers.append(
                    fts3.new_transfer(source_pfn,
                                      dest_pfn,
                                      checksum=checksum,
                                      filesize=filesize))
                pfn_to_tid[dest_pfn] = task_id

            if self.checksum_algorithm:
                verify_checksum = 'target'
            else:
                verify_checksum = None

            job = fts3.new_job(transfers,
                               retry=self.fts_retry,
                               overwrite=True,
                               verify_checksum=verify_checksum,
                               metadata=self.metadata_string)
            success = self._submit_job(job, 'transfer', batch_id, pfn_to_tid)
            if success and not self._read_only:
                self.db.delete_many('fts_staging_queue', 'id',
                                    pfn_to_tid.values())

        return results

    def get_deletion_status(self, batch_id):  #override
        if self.server_id == 0:
            self._set_server_id()

        return self._get_status(batch_id, 'deletion')

    def write_transfer_history(self, history_db, task_id,
                               history_id):  #override
        self._write_history(history_db, task_id, history_id, 'transfer')

    def write_deletion_history(self, history_db, task_id,
                               history_id):  #override
        self._write_history(history_db, task_id, history_id, 'deletion')

    def forget_transfer_status(self, task_id):  #override
        return self._forget_status(task_id, 'transfer')

    def forget_deletion_status(self, task_id):  #override
        return self._forget_status(task_id, 'deletion')

    def forget_transfer_batch(self, task_id):  #override
        return self._forget_batch(task_id, 'transfer')

    def forget_deletion_batch(self, task_id):  #override
        return self._forget_batch(task_id, 'deletion')

    def _ftscall(self, method, *args, **kwd):
        return self._do_ftscall(binding=(method, args, kwd))

    def _ftscallurl(self, url):
        # Call to FTS URLs that don't have python bindings
        return self._do_ftscall(url=url)

    def _do_ftscall(self, binding=None, url=None):
        if self._context is None:
            # request_class = Request -> use "requests"-based https call (instead of default PyCURL,
            # which may not be able to handle proxy certificates depending on the cURL installation)
            # verify = False -> do not verify the server certificate
            context = fts3.Context(self.server_url,
                                   ucert=self.x509proxy,
                                   ukey=self.x509proxy,
                                   request_class=Request,
                                   verify=False)

            if self.keep_context:
                self._context = context
        else:
            context = self._context

        if binding is not None:
            reqstring = binding[0]
        else:
            reqstring = url

        LOG.debug('FTS: %s', reqstring)

        wait_time = 1.
        for attempt in xrange(10):
            try:
                if binding is not None:
                    method, args, kwd = binding
                    return getattr(fts3, method)(context, *args, **kwd)
                else:
                    return json.loads(context.get(url))
            except fts_exceptions.ServerError as exc:
                if str(exc.reason) == '500':
                    # Internal server error - let's try again
                    pass
            except fts_exceptions.TryAgain:
                pass

            time.sleep(wait_time)
            wait_time *= 1.5

        LOG.error('Failed to communicate with FTS server: %s', reqstring)
        raise RuntimeError('Failed to communicate with FTS server: %s' %
                           reqstring)

    def _submit_job(self, job, optype, batch_id, pfn_to_tid):
        if self._read_only:
            job_id = 'test'
        else:
            try:
                job_id = self._ftscall('submit', job)
            except:
                exc_type, exc, tb = sys.exc_info()
                LOG.error('Failed to submit %s to FTS: Exception %s (%s)',
                          optype, exc_type.__name__, str(exc))
                return False

        LOG.debug('FTS job id: %s', job_id)

        # list of file-level operations (one-to-one with pfn)
        try:
            if optype == 'transfer' or optype == 'staging':
                key = 'files'
            else:
                key = 'dm'

            fts_files = self._ftscall('get_job_status',
                                      job_id=job_id,
                                      list_files=True)[key]
        except:
            exc_type, exc, tb = sys.exc_info()
            LOG.error(
                'Failed to get status of job %s from FTS: Exception %s (%s)',
                job_id, exc_type.__name__, str(exc))
            return False

        if self.server_id == 0:
            self._set_server_id()

        if optype == 'transfer' or optype == 'staging':
            table_name = 'fts_transfer_batches'
            columns = ('batch_id', 'task_type', 'fts_server_id', 'job_id')
            values = (batch_id, optype, self.server_id, job_id)
        else:
            table_name = 'fts_deletion_batches'
            columns = ('batch_id', 'fts_server_id', 'job_id')
            values = (batch_id, self.server_id, job_id)

        if not self._read_only:
            fts_batch_id = self.db.insert_get_id(table_name,
                                                 columns=columns,
                                                 values=values)

        if optype == 'transfer' or optype == 'staging':
            table_name = 'fts_transfer_tasks'
            pfn_key = 'dest_surl'
        else:
            table_name = 'fts_deletion_tasks'
            pfn_key = 'source_surl'

        fields = ('id', 'fts_batch_id', 'fts_file_id')
        mapping = lambda f: (pfn_to_tid[f[pfn_key]], fts_batch_id, f['file_id']
                             )

        if not self._read_only:
            self.db.insert_many(table_name,
                                fields,
                                mapping,
                                fts_files,
                                do_update=True,
                                update_columns=('fts_batch_id', 'fts_file_id'))

        return True

    def _cancel(self, task_ids, optype):
        sql = 'SELECT b.`job_id`, f.`fts_file_id` FROM `fts_{op}_tasks` AS f'
        sql += ' INNER JOIN `fts_{op}_batches` AS b ON b.`id` = f.`fts_batch_id`'
        result = self.db.execute_many(sql.format(op=optype),
                                      MySQL.bare('f.`id`'), task_ids)

        by_job = collections.defaultdict(list)

        for job_id, file_id in result:
            by_job[job_id].append(file_id)

        if not self._read_only:
            for job_id, ids in by_job.iteritems():
                try:
                    self._ftscall('cancel', job_id, file_ids=ids)
                except:
                    LOG.error('Failed to cancel FTS job %s', job_id)

    def _get_status(self, batch_id, optype):
        if optype == 'transfer' or optype == 'staging':
            sql = 'SELECT `id`, `job_id` FROM `fts_transfer_batches`'
            sql += ' WHERE `task_type` = %s AND `fts_server_id` = %s AND `batch_id` = %s'
            batch_data = self.db.query(sql, optype, self.server_id, batch_id)
            task_table_name = 'fts_transfer_tasks'
        else:
            sql = 'SELECT `id`, `job_id` FROM `fts_deletion_batches`'
            sql += ' WHERE `fts_server_id` = %s AND `batch_id` = %s'
            batch_data = self.db.query(sql, self.server_id, batch_id)
            task_table_name = 'fts_deletion_tasks'

        message_pattern = re.compile(
            '(?:DESTINATION|SOURCE|TRANSFER|DELETION) \[([0-9]+)\] (.*)')

        results = []

        for fts_batch_id, job_id in batch_data:
            LOG.debug('Checking status of FTS %s batch %s', optype, job_id)

            sql = 'SELECT `fts_file_id`, `id` FROM `{table}` WHERE `fts_batch_id` = %s'.format(
                table=task_table_name)
            fts_to_task = dict(self.db.xquery(sql, fts_batch_id))

            try:
                result = self._ftscall('get_job_status',
                                       job_id=job_id,
                                       list_files=True)
            except:
                LOG.error('Failed to get job status for FTS job %s', job_id)
                continue

            if optype == 'transfer' or optype == 'staging':
                fts_files = result['files']
            else:
                fts_files = result['dm']

            for fts_file in fts_files:
                try:
                    task_id = fts_to_task[fts_file['file_id']]
                except KeyError:
                    continue

                state = fts_file['file_state']
                exitcode = -1
                start_time = None
                finish_time = None
                get_time = False

                try:
                    message = fts_file['reason']
                except KeyError:
                    message = None

                if message is not None:
                    # Check if reason follows a known format (from which we can get the exit code)
                    matches = message_pattern.match(message)
                    if matches is not None:
                        exitcode = int(matches.group(1))
                        message = matches.group(2)
                    # Additionally, if the message is a known one, convert the exit code
                    c = find_msg_code(message)
                    if c is not None:
                        exitcode = c

                    # HDFS site with gridftp-hdfs gives a I/O error (500) when the file is not there
                    if optype == 'deletion' and 'Input/output error' in message:
                        exitcode = errno.ENOENT

                if state == 'FINISHED':
                    status = FileQuery.STAT_DONE
                    exitcode = 0
                    get_time = True

                elif state == 'FAILED':
                    status = FileQuery.STAT_FAILED
                    get_time = True

                elif state == 'CANCELED':
                    status = FileQuery.STAT_CANCELLED
                    get_time = True

                elif state == 'SUBMITTED':
                    status = FileQuery.STAT_NEW

                else:
                    status = FileQuery.STAT_QUEUED

                if optype == 'transfer' and exitcode == errno.EEXIST:
                    # Transfer + destination exists -> not an error
                    status = FileQuery.STAT_DONE
                    exitcode = 0
                elif optype == 'deletion' and exitcode == errno.ENOENT:
                    # Deletion + destination does not exist -> not an error
                    status = FileQuery.STAT_DONE
                    exitcode = 0

                if get_time:
                    try:
                        start_time = calendar.timegm(
                            time.strptime(fts_file['start_time'],
                                          '%Y-%m-%dT%H:%M:%S'))
                    except TypeError:  # start time is NULL (can happen when the job is cancelled)
                        start_time = None
                    try:
                        finish_time = calendar.timegm(
                            time.strptime(fts_file['finish_time'],
                                          '%Y-%m-%dT%H:%M:%S'))
                    except TypeError:
                        start_time = None

                LOG.debug('%s %d: %s, %d, %s, %s, %s', optype, task_id,
                          FileQuery.status_name(status), exitcode, message,
                          start_time, finish_time)

                results.append((task_id, status, exitcode, message, start_time,
                                finish_time))

        return results

    def _write_history(self, history_db, task_id, history_id, optype):
        if not self._read_only:
            history_db.db.insert_update('fts_servers', ('url', ),
                                        self.server_url)

        try:
            server_id = history_db.db.query(
                'SELECT `id` FROM `fts_servers` WHERE `url` = %s',
                self.server_url)[0]
        except IndexError:
            server_id = 0

        sql = 'SELECT b.`job_id`, t.`fts_file_id` FROM `fts_{op}_tasks` AS t'
        sql += ' INNER JOIN `fts_{op}_batches` AS b ON b.`id` = t.`fts_batch_id`'
        sql += ' WHERE t.`id` = %s'

        try:
            fts_job_id, fts_file_id = self.db.query(sql.format(op=optype),
                                                    task_id)[0]
        except IndexError:
            return

        if not self._read_only:
            history_db.db.insert_update('fts_batches',
                                        ('fts_server_id', 'job_id'), server_id,
                                        fts_job_id)
            batch_id = history_db.db.query(
                'SELECT `id` FROM `fts_batches` WHERE `fts_server_id` = %s AND `job_id` = %s',
                server_id, fts_job_id)[0]

            history_db.db.insert_update('fts_file_{op}s'.format(op=optype),
                                        ('id', 'fts_batch_id', 'fts_file_id'),
                                        history_id, batch_id, fts_file_id)

    def _forget_status(self, task_id, optype):
        if self._read_only:
            return

        sql = 'DELETE FROM `fts_{optype}_tasks` WHERE `id` = %s'.format(
            optype=optype)
        self.db.query(sql, task_id)

    def _forget_batch(self, batch_id, optype):
        if self._read_only:
            return

        sql = 'DELETE FROM `fts_{optype}_batches` WHERE `batch_id` = %s'.format(
            optype=optype)
        self.db.query(sql, batch_id)

    def _set_server_id(self):
        if not self._read_only:
            self.db.query(
                'INSERT INTO `fts_servers` (`url`) VALUES (%s) ON DUPLICATE KEY UPDATE `url`=VALUES(`url`)',
                self.server_url)

        result = self.db.query(
            'SELECT `id` FROM `fts_servers` WHERE `url` = %s', self.server_url)
        if len(result) == 0:
            self.server_id = 0
        else:
            self.server_id = result[0]
예제 #5
0
class CRABAccessHistory(object):
    """
    Sets two attrs:
      global_usage_rank:  float value
      local_usage:        {site: ReplicaAccess}
    """

    produces = ['global_usage_rank', 'local_usage']

    def __init__(self, config):
        self._store = MySQL(config.store.db_params)

    def load(self, inventory):
        records = self._get_stored_records(inventory)
        self._compute(inventory, records)

    def _get_stored_records(self, inventory):
        """
        Get the replica access data from DB.
        @param inventory  DynamoInventory
        @return  {replica: {date: (number of access, total cpu time)}}
        """

        # pick up all accesses that are less than 2 years old
        # old accesses will be removed automatically next time the access information is saved from memory
        sql = 'SELECT s.`name`, d.`name`, YEAR(a.`date`), MONTH(a.`date`), DAY(a.`date`), a.`access_type`+0, a.`num_accesses`, a.`cputime` FROM `dataset_accesses` AS a'
        sql += ' INNER JOIN `sites` AS s ON s.`id` = a.`site_id`'
        sql += ' INNER JOIN `datasets` AS d ON d.`id` = a.`dataset_id`'
        sql += ' WHERE a.`date` > DATE_SUB(NOW(), INTERVAL 2 YEAR) ORDER BY s.`id`, d.`id`, a.`date`'

        all_accesses = {}
        num_records = 0

        # little speedup by not repeating lookups for the same replica
        current_site_name = ''
        site_exists = True
        current_dataset_name = ''
        dataset_exists = True
        replica = None
        for site_name, dataset_name, year, month, day, access_type, num_accesses, cputime in self._store.xquery(
                sql):
            num_records += 1

            if site_name == current_site_name:
                if not site_exists:
                    continue
            else:
                current_site_name = site_name

                current_dataset_name = ''
                dataset_exists = True
                replica = None

                try:
                    site = inventory.sites[site_name]
                except KeyError:
                    site_exists = False
                    continue
                else:
                    site_exists = True

            if dataset_name == current_dataset_name:
                if not dataset_exists:
                    continue
            else:
                current_dataset_name = dataset_name

                try:
                    dataset = inventory.datasets[dataset_name]
                except KeyError:
                    dataset_exists = False
                    continue
                else:
                    dataset_exists = True

                replica = site.find_dataset_replica(dataset)
                if replica is not None:
                    accesses = all_accesses[replica] = {}

            if replica is None:
                continue

            date = datetime.date(year, month, day)
            accesses[date] = (num_accesses, cputime)

        last_update = self._store.query(
            'SELECT UNIX_TIMESTAMP(`dataset_accesses_last_update`) FROM `system`'
        )[0]

        LOG.info('Loaded %d replica access data. Last update on %s UTC',
                 num_records,
                 time.strftime('%Y-%m-%d', time.gmtime(last_update)))

        return all_accesses

    def _compute(self, inventory, all_accesses):
        """
        Set the dataset usage rank based on access list.
        Following the IntelROCCS implementation for local rank:
        datasetRank = (1-used)*(now-creationDate)/(60*60*24) + \
            used*( (now-lastAccessed)/(60*60*24)-nAccessed) - size/1000
        nAccessed is NACC normalized by size (in GB).
        @param inventory   DynamoInventory
        @param all_accesses {replica: {date: (number of access, cpu time)}}
        """

        now = time.time()
        today = datetime.datetime.utcfromtimestamp(now).date()

        for dataset in inventory.datasets.itervalues():
            local_usage = dataset.attr['local_usage'] = {
            }  # {site: ReplicaAccess}

            n_disk_nodes = 0
            for replica in dataset.replicas:
                if replica.site.storage_type == Site.TYPE_DISK:
                    n_disk_nodes += 1

            for replica in dataset.replicas:
                size = replica.size(physical=False) * 1.e-9

                try:
                    accesses = all_accesses[replica]
                except KeyError:
                    accesses = {}
                    last_used = 0
                    num_access = 0
                    tot_cpu = 0.
                else:
                    last_access = max(accesses.iterkeys())
                    # mktime returns expects the local time but the timetuple we pass is for UTC. subtracting time.timezone
                    last_used = time.mktime(
                        last_access.timetuple()) - time.timezone
                    num_access = sum(e[0] for e in accesses.itervalues())
                    tot_cpu = sum(e[1] for e in accesses.itervalues())

                if num_access == 0:
                    local_rank = (now - replica.last_block_created()) / (24. *
                                                                         3600.)
                elif size > 0.:
                    local_rank = (today - last_access).days - num_access / size
                else:
                    local_rank = (today - last_access).days

                local_rank -= size * 1.e-3

                local_usage[replica.site] = ReplicaAccess(
                    local_rank, num_access, tot_cpu, last_used)

            global_rank = sum(usage.rank for usage in local_usage.values())

            if n_disk_nodes != 0:
                global_rank /= n_disk_nodes

            dataset.attr['global_usage_rank'] = global_rank

    @staticmethod
    def update(config, inventory):
        popdb = PopDB(config.popdb.config)
        store = MySQL(config.store.db_params)

        last_update = store.query(
            'SELECT UNIX_TIMESTAMP(`dataset_accesses_last_update`) FROM `system`'
        )[0]
        try:
            store.query(
                'UPDATE `system` SET `dataset_accesses_last_update` = NOW()',
                retries=0,
                silent=True)
        except MySQLdb.OperationalError:
            # We have a read-only config
            read_only = True
            LOG.info('Running update() in read-only mode.')
        else:
            read_only = False

        start_time = max(last_update,
                         (time.time() - 3600 * 24 * config.max_back_query))
        start_date = datetime.date(*time.gmtime(start_time)[:3])

        included_sites = list(config.included_sites)
        excluded_sites = list(config.excluded_sites)

        source_records = CRABAccessHistory._get_source_records(
            popdb, inventory, included_sites, excluded_sites, start_date)

        if not read_only:
            CRABAccessHistory._save_records(source_records, store)
            # remove old entries
            store.query(
                'DELETE FROM `dataset_accesses` WHERE `date` < DATE_SUB(NOW(), INTERVAL 2 YEAR)'
            )
            store.query(
                'UPDATE `system` SET `dataset_accesses_last_update` = NOW()')

    @staticmethod
    def _get_source_records(popdb, inventory, included_sites, excluded_sites,
                            start_date):
        """
        Get the replica access data from PopDB from start_date to today.
        @param popdb          PopDB interface
        @param inventory      DynamoInventory
        @param included_sites List of site name patterns to include
        @param excluded_sites List of site name patterns to exclude
        @param start_date     Query start date (datetime.datetime)
        @return  {replica: {date: (number of access, total cpu time)}}
        """

        days_to_query = []

        utctoday = datetime.date(*time.gmtime()[:3])
        date = start_date
        while date <= utctoday:  # get records up to today
            days_to_query.append(date)
            date += datetime.timedelta(1)  # one day

        LOG.info('Updating dataset access info from %s to %s',
                 start_date.strftime('%Y-%m-%d'),
                 utctoday.strftime('%Y-%m-%d'))

        all_accesses = {}

        arg_pool = []
        for site in inventory.sites.itervalues():
            matched = False
            for pattern in included_sites:
                if fnmatch.fnmatch(site.name, pattern):
                    matched = True
                    break
            for pattern in excluded_sites:
                if fnmatch.fnmatch(site.name, pattern):
                    matched = False
                    break

            if matched:
                for date in days_to_query:
                    arg_pool.append((popdb, site, inventory, date))

        mapper = Map()
        mapper.logger = LOG

        records = mapper.execute(CRABAccessHistory._get_site_record, arg_pool)

        for site_record in records:
            for replica, date, naccess, cputime in site_record:
                if replica not in all_accesses:
                    all_accesses[replica] = {}

                all_accesses[replica][date] = (naccess, cputime)

        return all_accesses

    @staticmethod
    def _get_site_record(popdb, site, inventory, date):
        """
        Get the replica access data on a single site from PopDB.
        @param popdb      PopDB interface
        @param site       Site
        @param inventory  Inventory
        @param date       datetime.date
        @return [(replica, number of access, total cpu time)]
        """

        if site.name.startswith('T0'):
            return []
        elif site.name.startswith('T1') and site.name.count('_') > 2:
            nameparts = site.name.split('_')
            sitename = '_'.join(nameparts[:3])
            service = 'popularity/DSStatInTimeWindow/'  # the trailing slash is apparently important
        elif site.name == 'T2_CH_CERN':
            sitename = site.name
            service = 'xrdpopularity/DSStatInTimeWindow'
        else:
            sitename = site.name
            service = 'popularity/DSStatInTimeWindow/'

        datestr = date.strftime('%Y-%m-%d')
        result = popdb.make_request(
            service,
            ['sitename=' + sitename, 'tstart=' + datestr, 'tstop=' + datestr])

        records = []

        for ds_entry in result:
            try:
                dataset = inventory.datasets[ds_entry['COLLNAME']]
            except KeyError:
                continue

            replica = site.find_dataset_replica(dataset)
            if replica is None:
                continue

            records.append((replica, date, int(ds_entry['NACC']),
                            float(ds_entry['TOTCPU'])))

        return records

    @staticmethod
    def _save_records(records, store):
        """
        Save the newly fetched access records.
        @param records  {replica: {date: (number of access, total cpu time)}}
        @param store    Write-allowed MySQL interface
        """

        site_id_map = {}
        store.make_map('sites', set(r.site for r in records.iterkeys()),
                       site_id_map, None)
        dataset_id_map = {}
        store.make_map('datasets', set(r.dataset for r in records.iterkeys()),
                       dataset_id_map, None)

        fields = ('dataset_id', 'site_id', 'date', 'access_type',
                  'num_accesses', 'cputime')

        data = []
        for replica, entries in records.iteritems():
            dataset_id = dataset_id_map[replica.dataset]
            site_id = site_id_map[replica.site]

            for date, (num_accesses, cputime) in entries.iteritems():
                data.append((dataset_id, site_id, date.strftime('%Y-%m-%d'),
                             'local', num_accesses, cputime))

        store.insert_many('dataset_accesses',
                          fields,
                          None,
                          data,
                          do_update=True)
예제 #6
0
class StandaloneFileOperation(FileTransferOperation, FileTransferQuery, FileDeletionOperation, FileDeletionQuery):
    """
    Interface to in-house transfer & deletion daemon using MySQL for bookkeeping.
    """

    def __init__(self, config):
        FileTransferOperation.__init__(self, config)
        FileTransferQuery.__init__(self, config)
        FileDeletionOperation.__init__(self, config)
        FileDeletionQuery.__init__(self, config)

        self.db = MySQL(config.db_params)

    def num_pending_transfers(self): #override
        # FOD can throttle itself.
        return 0

    def num_pending_deletions(self): #override
        # FOD can throttle itself.
        return 0

    def form_batches(self, tasks): #override
        if len(tasks) == 0:
            return []

        if hasattr(tasks[0], 'source'):
            # These are transfer tasks
            by_endpoints = collections.defaultdict(list)
            for task in tasks:
                endpoints = (task.source, task.subscription.destination)
                by_endpoints[endpoints].append(task)

            return by_endpoints.values()
        else:
            by_endpoint = collections.defaultdict(list)
            for task in tasks:
                by_endpoint[task.desubscription.site].append(task)

            return by_endpoint.values()

    def start_transfers(self, batch_id, batch_tasks): #override
        if len(batch_tasks) == 0:
            return {}

        result = {}

        # tasks should all have the same source and destination
        source = batch_tasks[0].source
        destination = batch_tasks[0].subscription.destination

        fields = ('id', 'source', 'destination', 'checksum_algo', 'checksum')

        def yield_task_entry():
            for task in batch_tasks:
                lfile = task.subscription.file
                lfn = lfile.lfn
                source_pfn = source.to_pfn(lfn, 'gfal2')
                dest_pfn = destination.to_pfn(lfn, 'gfal2')

                if source_pfn is None or dest_pfn is None:
                    # either gfal2 is not supported or lfn could not be mapped
                    result[task] = False
                    continue

                if self.checksum_algorithm:
                    checksum = (self.checksum_algorithm, str(lfile.checksum[self.checksum_index]))
                else:
                    checksum = (None, None)

                result[task] = True
                yield (task.id, source_pfn, dest_pfn) + checksum

        if not self._read_only:
            sql = 'INSERT INTO `standalone_transfer_batches` (`batch_id`, `source_site`, `destination_site`) VALUES (%s, %s, %s)'
            self.db.query(sql, batch_id, source.name, destination.name)
            self.db.insert_many('standalone_transfer_tasks', fields, None, yield_task_entry())

        LOG.debug('Inserted %d entries to standalone_transfer_tasks for batch %d.', len(batch_tasks), batch_id)

        return result

    def start_deletions(self, batch_id, batch_tasks): #override
        if len(batch_tasks) == 0:
            return {}

        result = {}

        # tasks should all have the same target site
        site = batch_tasks[0].desubscription.site

        fields = ('id', 'file')

        def yield_task_entry():
            for task in batch_tasks:
                lfn = task.desubscription.file.lfn
                pfn = site.to_pfn(lfn, 'gfal2')

                if pfn is None:
                    # either gfal2 is not supported or lfn could not be mapped
                    result[task] = False
                    continue

                result[task] = True
                yield (task.id, pfn)

        if not self._read_only:
            sql = 'INSERT INTO `standalone_deletion_batches` (`batch_id`, `site`) VALUES (%s, %s)'
            self.db.query(sql, batch_id, site.name)
            self.db.insert_many('standalone_deletion_tasks', fields, None, yield_task_entry())

        LOG.debug('Inserted %d entries to standalone_deletion_tasks for batch %d.', len(batch_tasks), batch_id)

        return result

    def cancel_transfers(self, task_ids): #override
        return self._cancel(task_ids, 'transfer')

    def cancel_deletions(self, task_ids): #override
        return self._cancel(task_ids, 'deletion')

    def cleanup(self): #override
        sql = 'DELETE FROM f USING `standalone_transfer_tasks` AS f LEFT JOIN `transfer_tasks` AS t ON t.`id` = f.`id` WHERE t.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `standalone_deletion_tasks` AS f LEFT JOIN `deletion_tasks` AS t ON t.`id` = f.`id` WHERE t.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `standalone_transfer_batches` AS f LEFT JOIN `transfer_batches` AS t ON t.`id` = f.`batch_id` WHERE t.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `standalone_deletion_batches` AS f LEFT JOIN `deletion_batches` AS t ON t.`id` = f.`batch_id` WHERE t.`id` IS NULL'
        self.db.query(sql)

        # Delete the source tasks - caution: wipes out all tasks when switching the operation backend
        sql = 'DELETE FROM t USING `transfer_tasks` AS t'
        sql += ' LEFT JOIN `standalone_transfer_tasks` AS f ON f.`id` = t.`id`'
        sql += ' WHERE f.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM t USING `deletion_tasks` AS t'
        sql += ' LEFT JOIN `standalone_deletion_tasks` AS f ON f.`id` = t.`id`'
        sql += ' WHERE f.`id` IS NULL'
        self.db.query(sql)

    def get_transfer_status(self, batch_id): #override
        return self._get_status(batch_id, 'transfer')

    def get_deletion_status(self, batch_id): #override
        return self._get_status(batch_id, 'deletion')

    def write_transfer_history(self, history_db, task_id, history_id): #override
        pass

    def write_deletion_history(self, history_db, task_id, history_id): #override
        pass

    def forget_transfer_status(self, task_id): #override
        return self._forget_status(task_id, 'transfer')

    def forget_deletion_status(self, task_id): #override
        return self._forget_status(task_id, 'deletion')

    def forget_transfer_batch(self, batch_id): #override
        return self._forget_batch(batch_id, 'transfer')

    def forget_deletion_batch(self, batch_id): #override
        return self._forget_batch(batch_id, 'deletion')

    def _cancel(self, task_ids, optype):
        sql = 'UPDATE `standalone_{op}_tasks` SET `status` = \'cancelled\''.format(op = optype)
        self.db.execute_many(sql, 'id', task_ids, ['`status` IN (\'new\', \'queued\')'])

    def _get_status(self, batch_id, optype):
        sql = 'SELECT q.`id`, a.`status`, a.`exitcode`, a.`message`, UNIX_TIMESTAMP(a.`start_time`), UNIX_TIMESTAMP(a.`finish_time`) FROM `standalone_{op}_tasks` AS a'
        sql += ' INNER JOIN `{op}_tasks` AS q ON q.`id` = a.`id`'
        sql += ' WHERE q.`batch_id` = %s'
        sql = sql.format(op = optype)

        return [(i, FileQuery.status_val(s), c, m, t, f) for (i, s, c, m, t, f) in self.db.xquery(sql, batch_id)]

    def _forget_status(self, task_id, optype):
        if self._read_only:
            return

        sql = 'DELETE FROM `standalone_{op}_tasks` WHERE `id` = %s'.format(op = optype)
        self.db.query(sql, task_id)

    def _forget_batch(self, batch_id, optype):
        if self._read_only:
            return

        sql = 'DELETE FROM `standalone_{op}_batches` WHERE `batch_id` = %s'
        self.db.query(sql.format(op = optype), batch_id)
예제 #7
0
파일: standalone.py 프로젝트: kpark1/dynamo
class StandaloneFileOperation(FileTransferOperation, FileTransferQuery,
                              FileDeletionOperation, FileDeletionQuery):
    """
    Interface to in-house transfer & deletion daemon using MySQL for bookkeeping.
    """
    def __init__(self, config):
        FileTransferOperation.__init__(self, config)
        FileTransferQuery.__init__(self, config)
        FileDeletionOperation.__init__(self, config)
        FileDeletionQuery.__init__(self, config)

        self.db = MySQL(config.db_params)

    def num_pending_transfers(self):  #override
        # FOD can throttle itself.
        return 0

    def num_pending_deletions(self):  #override
        # FOD can throttle itself.
        return 0

    def form_batches(self, tasks):  #override
        if len(tasks) == 0:
            return []

        if hasattr(tasks[0], 'source'):
            # These are transfer tasks
            by_endpoints = collections.defaultdict(list)
            for task in tasks:
                endpoints = (task.source, task.subscription.destination)
                by_endpoints[endpoints].append(task)

            return by_endpoints.values()
        else:
            by_endpoint = collections.defaultdict(list)
            for task in tasks:
                by_endpoint[task.desubscription.site].append(task)

            return by_endpoint.values()

    def start_transfers(self, batch_id, batch_tasks):  #override
        if len(batch_tasks) == 0:
            return {}

        result = {}

        # tasks should all have the same source and destination
        source = batch_tasks[0].source
        destination = batch_tasks[0].subscription.destination

        fields = ('id', 'source', 'destination', 'checksum_algo', 'checksum')

        def yield_task_entry():
            for task in batch_tasks:
                lfile = task.subscription.file
                lfn = lfile.lfn
                source_pfn = source.to_pfn(lfn, 'gfal2')
                dest_pfn = destination.to_pfn(lfn, 'gfal2')

                if source_pfn is None or dest_pfn is None:
                    # either gfal2 is not supported or lfn could not be mapped
                    result[task] = False
                    continue

                if self.checksum_algorithm:
                    checksum = (self.checksum_algorithm,
                                str(lfile.checksum[self.checksum_index]))
                else:
                    checksum = (None, None)

                result[task] = True
                yield (task.id, source_pfn, dest_pfn) + checksum

        if not self._read_only:
            sql = 'INSERT INTO `standalone_transfer_batches` (`batch_id`, `source_site`, `destination_site`) VALUES (%s, %s, %s)'
            self.db.query(sql, batch_id, source.name, destination.name)
            self.db.insert_many('standalone_transfer_tasks', fields, None,
                                yield_task_entry())

        LOG.debug(
            'Inserted %d entries to standalone_transfer_tasks for batch %d.',
            len(batch_tasks), batch_id)

        return result

    def start_deletions(self, batch_id, batch_tasks):  #override
        if len(batch_tasks) == 0:
            return {}

        result = {}

        # tasks should all have the same target site
        site = batch_tasks[0].desubscription.site

        fields = ('id', 'file')

        def yield_task_entry():
            for task in batch_tasks:
                lfn = task.desubscription.file.lfn
                pfn = site.to_pfn(lfn, 'gfal2')

                if pfn is None:
                    # either gfal2 is not supported or lfn could not be mapped
                    result[task] = False
                    continue

                result[task] = True
                yield (task.id, pfn)

        if not self._read_only:
            sql = 'INSERT INTO `standalone_deletion_batches` (`batch_id`, `site`) VALUES (%s, %s)'
            self.db.query(sql, batch_id, site.name)
            self.db.insert_many('standalone_deletion_tasks', fields, None,
                                yield_task_entry())

        LOG.debug(
            'Inserted %d entries to standalone_deletion_tasks for batch %d.',
            len(batch_tasks), batch_id)

        return result

    def cancel_transfers(self, task_ids):  #override
        return self._cancel(task_ids, 'transfer')

    def cancel_deletions(self, task_ids):  #override
        return self._cancel(task_ids, 'deletion')

    def cleanup(self):  #override
        sql = 'DELETE FROM f USING `standalone_transfer_tasks` AS f LEFT JOIN `transfer_tasks` AS t ON t.`id` = f.`id` WHERE t.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `standalone_deletion_tasks` AS f LEFT JOIN `deletion_tasks` AS t ON t.`id` = f.`id` WHERE t.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `standalone_transfer_batches` AS f LEFT JOIN `transfer_batches` AS t ON t.`id` = f.`batch_id` WHERE t.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM f USING `standalone_deletion_batches` AS f LEFT JOIN `deletion_batches` AS t ON t.`id` = f.`batch_id` WHERE t.`id` IS NULL'
        self.db.query(sql)

        # Delete the source tasks - caution: wipes out all tasks when switching the operation backend
        sql = 'DELETE FROM t USING `transfer_tasks` AS t'
        sql += ' LEFT JOIN `standalone_transfer_tasks` AS f ON f.`id` = t.`id`'
        sql += ' WHERE f.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM t USING `deletion_tasks` AS t'
        sql += ' LEFT JOIN `standalone_deletion_tasks` AS f ON f.`id` = t.`id`'
        sql += ' WHERE f.`id` IS NULL'
        self.db.query(sql)

    def get_transfer_status(self, batch_id):  #override
        return self._get_status(batch_id, 'transfer')

    def get_deletion_status(self, batch_id):  #override
        return self._get_status(batch_id, 'deletion')

    def write_transfer_history(self, history_db, task_id,
                               history_id):  #override
        pass

    def write_deletion_history(self, history_db, task_id,
                               history_id):  #override
        pass

    def forget_transfer_status(self, task_id):  #override
        return self._forget_status(task_id, 'transfer')

    def forget_deletion_status(self, task_id):  #override
        return self._forget_status(task_id, 'deletion')

    def forget_transfer_batch(self, batch_id):  #override
        return self._forget_batch(batch_id, 'transfer')

    def forget_deletion_batch(self, batch_id):  #override
        return self._forget_batch(batch_id, 'deletion')

    def _cancel(self, task_ids, optype):
        sql = 'UPDATE `standalone_{op}_tasks` SET `status` = \'cancelled\''.format(
            op=optype)
        self.db.execute_many(sql, 'id', task_ids,
                             ['`status` IN (\'new\', \'queued\')'])

    def _get_status(self, batch_id, optype):
        sql = 'SELECT q.`id`, a.`status`, a.`exitcode`, a.`message`, UNIX_TIMESTAMP(a.`start_time`), UNIX_TIMESTAMP(a.`finish_time`) FROM `standalone_{op}_tasks` AS a'
        sql += ' INNER JOIN `{op}_tasks` AS q ON q.`id` = a.`id`'
        sql += ' WHERE q.`batch_id` = %s'
        sql = sql.format(op=optype)

        return [(i, FileQuery.status_val(s), c, m, t, f)
                for (i, s, c, m, t, f) in self.db.xquery(sql, batch_id)]

    def _forget_status(self, task_id, optype):
        if self._read_only:
            return

        sql = 'DELETE FROM `standalone_{op}_tasks` WHERE `id` = %s'.format(
            op=optype)
        self.db.query(sql, task_id)

    def _forget_batch(self, batch_id, optype):
        if self._read_only:
            return

        sql = 'DELETE FROM `standalone_{op}_batches` WHERE `batch_id` = %s'
        self.db.query(sql.format(op=optype), batch_id)
예제 #8
0
class RLFSM(object):
    """
    File operations manager using MySQL tables for queue bookkeeping. Also implies the
    inventory backend is MySQL.
    """

    class Subscription(object):
        __slots__ = ['id', 'status', 'file', 'destination', 'disk_sources', 'tape_sources', 'failed_sources', 'hold_reason']

        def __init__(self, id, status, file, destination, disk_sources, tape_sources, failed_sources = None, hold_reason = None):
            self.id = id
            self.status = status
            self.file = file
            self.destination = destination
            self.disk_sources = disk_sources
            self.tape_sources = tape_sources
            self.failed_sources = failed_sources
            self.hold_reason = hold_reason

    class TransferTask(object):
        __slots__ = ['id', 'subscription', 'source']

        def __init__(self, subscription, source):
            self.id = None
            self.subscription = subscription
            self.source = source

    class Desubscription(object):
        __slots__ = ['id', 'status', 'file', 'site']

        def __init__(self, id, status, file, site):
            self.id = id
            self.status = status
            self.file = file
            self.site = site

    class DeletionTask(object):
        __slots__ = ['id', 'desubscription']

        def __init__(self, desubscription):
            self.id = None
            self.desubscription = desubscription

    # default config
    _config = ''

    @staticmethod
    def set_default(config):
        RLFSM._config = Configuration(config)

    def __init__(self, config = None):
        if config is None:
            config = RLFSM._config

        # Handle to the inventory DB
        self.db = MySQL(config.db.db_params)

        # Handle to the history DB
        self.history_db = HistoryDatabase(config.get('history', None))

        # FileTransferOperation backend (can make it a map from (source, dest) to operator)
        self.transfer_operations = []
        if 'transfer' in config:
            for condition_text, module, conf in config.transfer:
                if condition_text is None: # default
                    condition = None
                else:
                    condition = Condition(condition_text, site_variables)

                self.transfer_operations.append((condition, FileTransferOperation.get_instance(module, conf)))

        if 'transfer_query' in config:
            self.transfer_queries = []
            for condition_text, module, conf in config.transfer_query:
                if condition_text is None: # default
                    condition = None
                else:
                    condition = Condition(condition_text, site_variables)

                self.transfer_queries.append(condition, FileTransferQuery.get_instance(module, conf))
        else:
            self.transfer_queries = self.transfer_operations

        if 'deletion' in config:
            self.deletion_operations = []
            for condition_text, module, conf in config.deletion:
                if condition_text is None: # default
                    condition = None
                else:
                    condition = Condition(condition_text, site_variables)

                self.deletion_operations.append(condition, FileDeletionOperation.get_instance(module, conf))
        else:
            self.deletion_operations = self.transfer_operations

        if 'deletion_query' in config:
            self.deletion_queries = []
            for condition_text, module, conf in config.deletion_query:
                if condition_text is None: # default
                    condition = None
                else:
                    condition = Condition(condition_text, site_variables)

                self.deletion_queries.append(condition, FileDeletionQuery.get_instance(module, conf))
        else:
            self.deletion_queries = self.deletion_operations

        self.sites_in_downtime = []

        # Cycle thread
        self.main_cycle = None
        self.cycle_stop = threading.Event()

        self.set_read_only(config.get('read_only', False))

    def set_read_only(self, value = True):
        self._read_only = value
        self.history_db.set_read_only(value)
        for _, op in self.transfer_operations:
            op.set_read_only(value)
        if self.transfer_queries is not self.transfer_operations:
            for _, qry in self.transfer_queries:
                qry.set_read_only(value)
        if self.deletion_operations is not self.transfer_operations:
            for _, op in self.deletion_operations:
                op.set_read_only(value)
        if self.deletion_queries is not self.deletion_operations:
            for _, qry in self.deletion_queries:
                qry.set_read_only(value)

    def start(self, inventory):
        """
        Start the file operations management cycle. Issue transfer and deletion tasks to the backend.
        """

        if self.main_cycle is not None:
            return

        LOG.info('Starting file operations manager')

        self.main_cycle = threading.Thread(target = self._run_cycle, name = 'FOM', args = (inventory,))
        self.main_cycle.start()

        LOG.info('Started file operations manager.')

    def stop(self):
        """
        Stop the file operations management cycle.
        """

        LOG.info('Stopping file operations manager.')

        self.cycle_stop.set()
        self.main_cycle.join()

        self.main_cycle = None
        self.cycle_stop.clear()
        
    def transfer_files(self, inventory):
        """
        Routine for managing file transfers.
        1. Query the file transfer agent and update the status of known subscriptions.
        2. Pick up new subscriptions.
        3. Select the source for each transfer.
        4. Organize the transfers into batches.
        5. Start the transfers.
        The routine can be stopped almost at any point without corrupting the state machine.
        The only cases where stopping is problematic are within self._update_status and self._start_transfers.

        @param inventory   The inventory.
        """

        self._cleanup()

        LOG.debug('Clearing cancelled transfer tasks.')
        task_ids = self._get_cancelled_tasks('transfer')
        for _, op in self.transfer_operations:
            op.cancel_transfers(task_ids)

        if self.cycle_stop.is_set():
            return

        LOG.debug('Fetching subscription status from the file operation agent.')
        self._update_status('transfer')

        if self.cycle_stop.is_set():
            return

        LOG.debug('Filtering out transfers to unavailable destinations.')
        if not self._read_only:
            for site in self.sites_in_downtime:
                self.db.query('UPDATE `file_subscriptions` SET `status` = \'held\', `hold_reason` = \'site_unavailable\' WHERE `site_id` = (SELECT `id` FROM `sites` WHERE `name` = %s)', site.name)

        if self.cycle_stop.is_set():
            return

        LOG.debug('Collecting new transfer subscriptions.')
        subscriptions = self.get_subscriptions(inventory, op = 'transfer', status = ['new', 'retry'])

        if self.cycle_stop.is_set():
            return

        # We check the operators here because get_subscriptions does some state update and we want that to happen
        pending_count = {}
        n_available = 0
        for _, op in self.transfer_operations:
            pending_count[op] = op.num_pending_transfers()
            if pending_count[op] <= op.max_pending_transfers:
                n_available += 1

        if n_available == 0:
            LOG.info('No transfer operators are available at the moment.')
            return

        LOG.debug('Identifying source sites for %d transfers.', len(subscriptions))
        tasks = self._select_source(subscriptions)

        if self.cycle_stop.is_set():
            return

        LOG.debug('Organizing %d transfers into batches.', len(tasks))

        by_dest = {}
        for task in tasks:
            try:
                by_dest[task.subscription.destination].append(task)
            except KeyError:
                by_dest[task.subscription.destination] = [task]

        def issue_tasks(op, my_tasks):
            if len(my_tasks) == 0:
                return 0, 0, 0

            batches = op.form_batches(my_tasks)
    
            if self.cycle_stop.is_set():
                return 0, 0, 0

            nb = 0
            ns = 0
            nf = 0
   
            LOG.debug('Issuing transfer tasks.')
            for batch_tasks in batches:
                s, f = self._start_transfers(op, batch_tasks)
                nb += 1
                ns += s
                nf += f

                pending_count[op] += s
                if pending_count[op] > op.max_pending_transfers:
                    break

                if self.cycle_stop.is_set():
                    break

            return nb, ns, nf

        num_success = 0
        num_failure = 0
        num_batches = 0
        
        for condition, op in self.transfer_operations:
            if condition is None:
                default_op = op
                continue
            
            my_tasks = []
            for site in by_dest.keys():
                if condition.match(site):
                    my_tasks.extend(by_dest.pop(site))

            if pending_count[op] > op.max_pending_transfers:
                continue

            nb, ns, nf = issue_tasks(op, my_tasks)
            num_batches += nb
            num_success += ns
            num_failure += nf

            if self.cycle_stop.is_set():
                break

        else:
            # default condition
            if pending_count[default_op] <= default_op.max_pending_transfers:
                my_tasks = sum(by_dest.itervalues(), [])
                nb, ns, nf = issue_tasks(default_op, my_tasks)
                num_batches += nb
                num_success += ns
                num_failure += nf

        if num_success + num_failure != 0:
            LOG.info('Issued transfer tasks: %d success, %d failure. %d batches.', num_success, num_failure, num_batches)
        else:
            LOG.debug('Issued transfer tasks: %d success, %d failure. %d batches.', num_success, num_failure, num_batches)

    def delete_files(self, inventory):
        """
        Routine for managing file deletions.
        1. Query the file deletion agent and update the status of known subscriptions.
        2. Register the paths for completed deletions as candidates of empty directories.
        3. Pick up new subscriptions.
        4. Organize the deletions into batches.
        5. Start the deletions.
        The routine can be stopped almost at any point without corrupting the state machine.
        The only cases where stopping is problematic are within self._update_status and self._start_deletions.
        @param inventory   The inventory.
        """

        self._cleanup()

        LOG.debug('Clearing cancelled deletion tasks.')
        task_ids = self._get_cancelled_tasks('deletion')
        for _, op in self.deletion_operations:
            op.cancel_deletions(task_ids)

        if self.cycle_stop.is_set():
            return

        LOG.debug('Fetching deletion status from the file operation agent.')
        completed = self._update_status('deletion')

        LOG.debug('Recording candidates for empty directories.')
        self._set_dirclean_candidates(completed, inventory)

        if self.cycle_stop.is_set():
            return

        LOG.debug('Filtering out transfers to unavailable destinations.')
        if not self._read_only:
            for site in self.sites_in_downtime:
                self.db.query('UPDATE `file_subscriptions` SET `status` = \'held\', `hold_reason` = \'site_unavailable\' WHERE `site_id` = (SELECT `id` FROM `sites` WHERE `name` = %s)', site.name)

        if self.cycle_stop.is_set():
            return

        LOG.debug('Collecting new deletion subscriptions.')
        desubscriptions = self.get_subscriptions(inventory, op = 'deletion', status = ['new', 'retry'])

        if self.cycle_stop.is_set():
            return

        # See transfer_files
        pending_count = {}
        n_available = 0
        for _, op in self.deletion_operations:
            pending_count[op] = op.num_pending_deletions()
            if pending_count[op] <= op.max_pending_deletions:
                n_available += 1

        if n_available == 0:
            LOG.info('No deletion operators are available at the moment.')
            return

        tasks = [RLFSM.DeletionTask(d) for d in desubscriptions]

        by_site = {}
        for task in tasks:
            try:
                by_site[task.desubscription.site].append(task)
            except KeyError:
                by_site[task.desubscription.site] = [task]

        LOG.debug('Organizing the deletions into batches.')

        def issue_tasks(op, my_tasks):
            if len(my_tasks) == 0:
                return 0, 0, 0

            batches = op.form_batches(my_tasks)
    
            if self.cycle_stop.is_set():
                return 0, 0, 0

            nb = 0
            ns = 0
            nf = 0
    
            LOG.debug('Issuing deletion tasks for %d batches.', len(batches))    
            for batch_tasks in batches:
                LOG.debug('Batch with %d tasks.', len(batch_tasks))
                s, f = self._start_deletions(op, batch_tasks)
                nb += 1
                ns += s
                nf += f

                pending_count[op] += s
                if pending_count[op] > op.max_pending_deletions:
                    break

                if self.cycle_stop.is_set():
                    break

            return nb, ns, nf

        num_success = 0
        num_failure = 0
        num_batches = 0

        for condition, op in self.deletion_operations:
            if condition is None:
                default_op = op
                continue

            my_tasks = []
            for site in by_site.keys():
                if condition.match(site):
                    my_tasks.extend(by_site.pop(site))

            if pending_count[op] > op.max_pending_deletions:
                continue

            nb, ns, nf = issue_tasks(op, my_tasks)
            num_batches += nb;
            num_success += ns;
            num_failure += nf;

            if self.cycle_stop.is_set():
                break

        else:
            # default condition
            if pending_count[default_op] <= default_op.max_pending_deletions:
                my_tasks = sum(by_site.itervalues(), [])
                nb, ns, nf = issue_tasks(default_op, my_tasks)
                num_batches += nb;
                num_success += ns;
                num_failure += nf;

        if num_success + num_failure != 0:
            LOG.info('Issued deletion tasks: %d success, %d failure. %d batches.', num_success, num_failure, num_batches)
        else:
            LOG.debug('Issued deletion tasks: %d success, %d failure. %d batches.', num_success, num_failure, num_batches)

    def subscribe_file(self, site, lfile):
        """
        Make a file subscription at a site.
        @param site  Site object
        @param lfile File object
        """
        LOG.debug('Subscribing %s to %s', lfile.lfn, site.name)

        self._subscribe(site, lfile, 0)

    def desubscribe_file(self, site, lfile):
        """
        Book deletion of a file at a site.
        @param site  Site object
        @param lfile File object
        """
        LOG.debug('Desubscribing %s from %s', lfile.lfn, site.name)

        self._subscribe(site, lfile, 1)

    def cancel_subscription(self, site = None, lfile = None, sub_id = None):
        sql = 'UPDATE `file_subscriptions` SET `status` = \'cancelled\' WHERE '

        if sub_id is None:
            if site is None or lfile is None:
                raise OperationalError('site and lfile must be non-None.')

            sql += '`file_id` = %s AND `site_id` = %s'
            if not self._read_only:
                self.db.query(sql, lfile.id, site.id)
        else:
            sql += '`id` = %s'
            if not self._read_only:
                self.db.query(sql, sub_id)

    def cancel_desubscription(self, site = None, lfile = None, sub_id = None):
        self.cancel_subscription(site = site, lfile = lfile, sub_id = sub_id)

    def convert_pre_subscriptions(self, inventory):
        sql = 'SELECT `id`, `file_name`, `site_name`, UNIX_TIMESTAMP(`created`), `delete` FROM `file_pre_subscriptions`'

        sids = []

        for sid, lfn, site_name, created, delete in self.db.query(sql):
            lfile = inventory.find_file(lfn)
            if lfile is None or lfile.id == 0:
                continue

            try:
                site = inventory.sites[site_name]
            except KeyError:
                continue

            if site.id == 0:
                continue

            sids.append(sid)

            self._subscribe(site, lfile, delete, created = created)

        if not self._read_only:
            self.db.lock_tables(write = ['file_pre_subscriptions'])
            self.db.delete_many('file_pre_subscriptions', 'id', sids)
            if self.db.query('SELECT COUNT(*) FROM `file_pre_subscriptions`')[0] == 0:
                self.db.query('ALTER TABLE `file_pre_subscriptions` AUTO_INCREMENT = 1')
            self.db.unlock_tables()

    def get_subscriptions(self, inventory, op = None, status = None):
        """
        Return a list containing Subscription and Desubscription objects ordered by the id.
        @param inventory   Dynamo inventory
        @param op          If set to 'transfer' or 'deletion', limit to the operation type.
        @param status      If not None, set to list of status strings to limit the query.
        """

        # First convert all pre-subscriptions
        self.convert_pre_subscriptions(inventory)

        subscriptions = []

        get_all = 'SELECT u.`id`, u.`status`, u.`delete`, f.`block_id`, f.`name`, s.`name`, u.`hold_reason` FROM `file_subscriptions` AS u'
        get_all += ' INNER JOIN `files` AS f ON f.`id` = u.`file_id`'
        get_all += ' INNER JOIN `sites` AS s ON s.`id` = u.`site_id`'

        constraints = []
        if op == 'transfer':
            constraints.append('u.`delete` = 0')
        elif op == 'deletion':
            constraints.append('u.`delete` = 1')
        if status is not None:
            constraints.append('u.`status` IN ' + MySQL.stringify_sequence(status))

        if len(constraints) != 0:
            get_all += ' WHERE ' + ' AND '.join(constraints)

        get_all += ' ORDER BY s.`id`, f.`block_id`'

        get_tried_sites = 'SELECT s.`name`, f.`exitcode` FROM `failed_transfers` AS f'
        get_tried_sites += ' INNER JOIN `sites` AS s ON s.`id` = f.`source_id`'
        get_tried_sites += ' WHERE f.`subscription_id` = %s'

        _destination_name = ''
        _block_id = -1

        no_source = []
        all_failed = []
        to_done = []

        COPY = 0
        DELETE = 1

        for row in self.db.query(get_all):
            sub_id, st, optype, block_id, file_name, site_name, hold_reason = row

            if site_name != _destination_name:
                _destination_name = site_name
                try:
                    destination = inventory.sites[site_name]
                except KeyError:
                    # Site disappeared from the inventory - weird but can happen!
                    destination = None

                _block_id = -1

            if destination is None:
                continue

            if block_id != _block_id:
                lfile = inventory.find_file(file_name)
                if lfile is None:
                    # Dataset, block, or file was deleted from the inventory earlier in this process (deletion not reflected in the inventory store yet)
                    continue

                _block_id = block_id
                block = lfile.block
                dest_replica = block.find_replica(destination)

            else:
                lfile = block.find_file(file_name)
                if lfile is None:
                    # Dataset, block, or file was deleted from the inventory earlier in this process (deletion not reflected in the inventory store yet)
                    continue

            if dest_replica is None and st != 'cancelled':
                LOG.debug('Destination replica for %s does not exist. Canceling the subscription.', file_name)
                # Replica was invalidated
                sql = 'UPDATE `file_subscriptions` SET `status` = \'cancelled\''
                sql += ' WHERE `id` = %s'
                if not self._read_only:
                    self.db.query(sql, sub_id)

                if status is not None and 'cancelled' not in status:
                    # We are not asked to return cancelled subscriptions
                    continue

                st = 'cancelled'

            if optype == COPY:
                disk_sources = None
                tape_sources = None
                failed_sources = None

                if st not in ('done', 'held', 'cancelled'):
                    if dest_replica.has_file(lfile):
                        LOG.debug('%s already exists at %s', file_name, site_name)
                        to_done.append(sub_id)

                        st = 'done'

                    else:
                        disk_sources = []
                        tape_sources = []
                        for replica in block.replicas:
                            if replica.site == destination or replica.site.status != Site.STAT_READY:
                                continue
            
                            if replica.has_file(lfile):
                                if replica.site.storage_type == Site.TYPE_DISK:
                                    disk_sources.append(replica.site)
                                elif replica.site.storage_type == Site.TYPE_MSS:
                                    tape_sources.append(replica.site)
            
                        if len(disk_sources) + len(tape_sources) == 0:
                            LOG.warning('Transfer of %s to %s has no source.', file_name, site_name)
                            no_source.append(sub_id)

                            st = 'held'

                if st == 'retry':
                    failed_sources = {}
                    for source_name, exitcode in self.db.query(get_tried_sites, sub_id):
                        try:
                            source = inventory.sites[source_name]
                        except KeyError:
                            # this site may have been deleted in this process
                            continue

                        try:
                            failed_sources[source].append(exitcode)
                        except KeyError:
                            if source not in disk_sources and source not in tape_sources:
                                # this is not a source site any more
                                continue

                            failed_sources[source] = [exitcode]
    
                    if len(failed_sources) == len(disk_sources) + len(tape_sources):
                        # transfers from all sites failed at least once
                        for codes in failed_sources.itervalues():
                            if codes[-1] not in irrecoverable_errors:
                                # This site failed for a recoverable reason
                                break
                        else:
                            # last failure from all sites due to irrecoverable errors
                            LOG.warning('Transfer of %s to %s failed from all sites.', file_name, site_name)
                            all_failed.append(sub_id)

                            st = 'held'

                # st value may have changed - filter again
                if status is None or st in status:
                    subscription = RLFSM.Subscription(sub_id, st, lfile, destination, disk_sources, tape_sources, failed_sources, hold_reason)
                    subscriptions.append(subscription)

            elif optype == DELETE:
                if st not in ('done', 'held', 'cancelled') and not dest_replica.has_file(lfile):
                    LOG.debug('%s is already gone from %s', file_name, site_name)
                    to_done.append(sub_id)

                    st = 'done'

                if status is None or st in status:
                    desubscription = RLFSM.Desubscription(sub_id, st, lfile, destination)
                    subscriptions.append(desubscription)

        if len(to_done) + len(no_source) + len(all_failed) != 0:
            msg = 'Subscriptions terminated directly: %d done' % len(to_done)
            if len(no_source) != 0:
                msg += ', %d held with reason "no_source"' % len(no_source)
            if len(all_failed) != 0:
                msg += ', %d held with reason "all_failed"' % len(all_failed)

            LOG.info(msg)

        if not self._read_only:
            self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'done\', `last_update` = NOW()', 'id', to_done)
            self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'held\', `hold_reason` = \'no_source\', `last_update` = NOW()', 'id', no_source)
            self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'held\', `hold_reason` = \'all_failed\', `last_update` = NOW()', 'id', all_failed)

            # Clean up subscriptions for deleted files / sites
            sql = 'DELETE FROM u USING `file_subscriptions` AS u'
            sql += ' LEFT JOIN `files` AS f ON f.`id` = u.`file_id`'
            sql += ' LEFT JOIN `sites` AS s ON s.`id` = u.`site_id`'
            sql += ' WHERE f.`name` IS NULL OR s.`name` IS NULL'
            self.db.query(sql)

            sql = 'DELETE FROM f USING `failed_transfers` AS f'
            sql += ' LEFT JOIN `file_subscriptions` AS u ON u.`id` = f.`subscription_id`'
            sql += ' WHERE u.`id` IS NULL'
            self.db.query(sql)

        return subscriptions

    def close_subscriptions(self, done_ids):
        """
        Get subscription completion acknowledgments.
        """

        if not self._read_only:
            self.db.delete_many('file_subscriptions', 'id', done_ids)

    def release_subscription(self, subscription):
        """
        Clear failed transfers list and set the subscription status to retry.
        """

        if subscription.status != 'held':
            return

        if self._read_only:
            return

        self.db.query('DELETE FROM `failed_transfers` WHERE `subscription_id` = %s', subscription.id)
        self.db.query('UPDATE `file_subscriptions` SET `status` = \'retry\' WHERE `id` = %s', subscription.id)

    def _run_cycle(self, inventory):
        while True:
            if self.cycle_stop.is_set():
                break
    
            LOG.debug('Checking and executing new file transfer subscriptions.')
            self.transfer_files(inventory)
    
            if self.cycle_stop.is_set():
                break
    
            LOG.debug('Checking and executing new file deletion subscriptions.')
            self.delete_files(inventory)

            is_set = self.cycle_stop.wait(30)
            if is_set: # is true if in Python 2.7 and the flag is set
                break

    def _cleanup(self):
        if self._read_only:
            return

        # Make the tables consistent in case the previous cycles was terminated prematurely

        # There should not be tasks with subscription status new
        sql = 'DELETE FROM t USING `transfer_tasks` AS t'
        sql += ' INNER JOIN `file_subscriptions` AS u ON u.`id` = t.`subscription_id`'
        sql += ' WHERE u.`status` IN (\'new\', \'retry\')'
        self.db.query(sql)
        sql = 'DELETE FROM t USING `deletion_tasks` AS t'
        sql += ' INNER JOIN `file_subscriptions` AS u ON u.`id` = t.`subscription_id`'
        sql += ' WHERE u.`status` IN (\'new\', \'retry\')'
        self.db.query(sql)

        # There should not be batches with no tasks
        sql = 'DELETE FROM b USING `transfer_batches` AS b LEFT JOIN `transfer_tasks` AS t ON t.`batch_id` = b.`id` WHERE t.`batch_id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM b USING `deletion_batches` AS b LEFT JOIN `deletion_tasks` AS t ON t.`batch_id` = b.`id` WHERE t.`batch_id` IS NULL'
        self.db.query(sql)

        # and tasks with no batches
        sql = 'DELETE FROM t USING `transfer_tasks` AS t LEFT JOIN `transfer_batches` AS b ON b.`id` = t.`batch_id` WHERE b.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM t USING `deletion_tasks` AS t LEFT JOIN `deletion_batches` AS b ON b.`id` = t.`batch_id` WHERE b.`id` IS NULL'
        self.db.query(sql)

        # Cleanup the plugins (might delete tasks)
        for _, op in self.transfer_operations:
            op.cleanup()
        if self.deletion_operations is not self.transfer_operations:
            for _, op in self.deletion_operations:
                op.cleanup()

        # Reset inbatch subscriptions with no task to new state
        sql = 'UPDATE `file_subscriptions` SET `status` = \'new\' WHERE `status` = \'inbatch\' AND `id` NOT IN (SELECT `subscription_id` FROM `transfer_tasks`) AND `id` NOT IN (SELECT `subscription_id` FROM `deletion_tasks`)'
        self.db.query(sql)

        # Delete canceled subscriptions with no task (ones with task need to be archived in update_status)
        sql = 'DELETE FROM u USING `file_subscriptions` AS u LEFT JOIN `transfer_tasks` AS t ON t.`subscription_id` = u.`id` WHERE u.`delete` = 0 AND u.`status` = \'cancelled\' AND t.`id` IS NULL'
        self.db.query(sql)

        sql = 'DELETE FROM u USING `file_subscriptions` AS u LEFT JOIN `deletion_tasks` AS t ON t.`subscription_id` = u.`id` WHERE u.`delete` = 1 AND u.`status` = \'cancelled\' AND t.`id` IS NULL'
        self.db.query(sql)

        # Delete failed transfers with no subscription
        sql = 'DELETE FROM f USING `failed_transfers` AS f LEFT JOIN `file_subscriptions` AS u ON u.`id` = f.`subscription_id` WHERE u.`id` IS NULL'
        self.db.query(sql)

    def _subscribe(self, site, lfile, delete, created = None):
        opp_op = 0 if delete == 1 else 1
        now = time.strftime('%Y-%m-%d %H:%M:%S')

        if created is None:
            created = now
        else:
            created = datetime.datetime(*time.localtime(created)[:6])

        if lfile.id == 0 or site.id == 0:
            # file is not registered in inventory store yet; update the presubscription
            if not self._read_only:
                fields = ('file_name', 'site_name', 'created', 'delete')
                self.db.insert_update('file_pre_subscriptions', fields, lfile.lfn, site.name, now, delete, update_columns = ('delete',))
            return

        if not self._read_only:
            self.db.lock_tables(write = ['file_subscriptions'])

        try:
            sql = 'UPDATE `file_subscriptions` SET `status` = \'cancelled\''
            sql += ' WHERE `file_id` = %s AND `site_id` = %s AND `delete` = %s'
            sql += ' AND `status` IN (\'new\', \'inbatch\', \'retry\', \'held\')'
            if not self._read_only:
                self.db.query(sql, lfile.id, site.id, opp_op)
    
            fields = ('file_id', 'site_id', 'status', 'delete', 'created', 'last_update')

            if not self._read_only:
                self.db.insert_update('file_subscriptions', fields, lfile.id, site.id, 'new', delete, now, now, update_columns = ('status', 'last_update'))

        finally:
            if not self._read_only:
                self.db.unlock_tables()

    def _get_cancelled_tasks(self, optype):
        if optype == 'transfer':
            delete = 0
        else:
            delete = 1

        sql = 'SELECT q.`id` FROM `{op}_tasks` AS q'.format(op = optype)
        sql += ' INNER JOIN `file_subscriptions` AS u ON u.`id` = q.`subscription_id`'
        sql += ' WHERE u.`status` = \'cancelled\' AND u.`delete` = %d' % delete
        return self.db.query(sql)

    def _update_status(self, optype):
        if optype == 'transfer':
            site_columns = 'ss.`name`, sd.`name`'
            site_joins = ' INNER JOIN `sites` AS ss ON ss.`id` = q.`source_id`'
            site_joins += ' INNER JOIN `sites` AS sd ON sd.`id` = u.`site_id`'
        else:
            site_columns = 's.`name`'
            site_joins = ' INNER JOIN `sites` AS s ON s.`id` = u.`site_id`'

        get_task_data = 'SELECT u.`id`, f.`name`, f.`size`, UNIX_TIMESTAMP(q.`created`), ' + site_columns + ' FROM `{op}_tasks` AS q'
        get_task_data += ' INNER JOIN `file_subscriptions` AS u ON u.`id` = q.`subscription_id`'
        get_task_data += ' INNER JOIN `files` AS f ON f.`id` = u.`file_id`'
        get_task_data += site_joins
        get_task_data += ' WHERE q.`id` = %s'

        get_task_data = get_task_data.format(op = optype)

        if optype == 'transfer':
            history_table_name = 'file_transfers'
            history_site_fields = ('source_id', 'destination_id')
        else:
            history_table_name = 'file_deletions'
            history_site_fields = ('site_id',)

        history_fields = ('file_id', 'exitcode', 'message', 'batch_id', 'created', 'started', 'finished', 'completed') + history_site_fields

        if optype == 'transfer':
            insert_failure = 'INSERT INTO `failed_transfers` (`id`, `subscription_id`, `source_id`, `exitcode`)'
            insert_failure += ' SELECT `id`, `subscription_id`, `source_id`, %s FROM `transfer_tasks` WHERE `id` = %s'
            insert_failure += ' ON DUPLICATE KEY UPDATE `id`=VALUES(`id`)'
            delete_failures = 'DELETE FROM `failed_transfers` WHERE `subscription_id` = %s'

        get_subscription_status = 'SELECT `status` FROM `file_subscriptions` WHERE `id` = %s'

        update_subscription = 'UPDATE `file_subscriptions` SET `status` = %s, `last_update` = NOW() WHERE `id` = %s'
        delete_subscription = 'DELETE FROM `file_subscriptions` WHERE `id` = %s'

        delete_task = 'DELETE FROM `{op}_tasks` WHERE `id` = %s'.format(op = optype)

        delete_batch = 'DELETE FROM `{op}_batches` WHERE `id` = %s'.format(op = optype)

        done_subscriptions = []
        num_success = 0
        num_failure = 0
        num_cancelled = 0

        # Collect completed tasks

        for batch_id in self.db.query('SELECT `id` FROM `{op}_batches`'.format(op = optype)):
            results = []

            if optype == 'transfer':
                for _, query in self.transfer_queries:
                    results = query.get_transfer_status(batch_id)
                    if len(results) != 0:
                        break

            else:
                for _, query in self.deletion_queries:
                    results = query.get_deletion_status(batch_id)
                    if len(results) != 0:
                        break

            batch_complete = True

            for task_id, status, exitcode, message, start_time, finish_time in results:
                # start_time and finish_time can be None
                LOG.debug('%s result: %d %s %d %s %s', optype, task_id, FileQuery.status_name(status), exitcode, start_time, finish_time)

                if status == FileQuery.STAT_DONE:
                    num_success += 1
                elif status == FileQuery.STAT_FAILED:
                    num_failure += 1
                elif status == FileQuery.STAT_CANCELLED:
                    num_cancelled += 1
                else:
                    batch_complete = False
                    continue

                try:
                    task_data = self.db.query(get_task_data, task_id)[0]
                except IndexError:
                    LOG.warning('%s task %d got lost.', optype, task_id)
                    if optype == 'transfer':
                        query.forget_transfer_status(task_id)
                    else:
                        query.forget_deletion_status(task_id)

                    if not self._read_only:
                        self.db.query(delete_task, task_id)

                    continue

                subscription_id, lfn, size, create_time = task_data[:4]

                if optype == 'transfer':
                    source_name, dest_name = task_data[4:]
                    history_site_ids = (
                        self.history_db.save_sites([source_name], get_ids = True)[0],
                        self.history_db.save_sites([dest_name], get_ids = True)[0]
                    )
                else:
                    site_name = task_data[4]
                    history_site_ids = (self.history_db.save_sites([site_name], get_ids = True)[0],)

                file_id = self.history_db.save_files([(lfn, size)], get_ids = True)[0]

                if start_time is None:
                    sql_start_time = None
                else:
                    sql_start_time = datetime.datetime(*time.localtime(start_time)[:6])

                if finish_time is None:
                    sql_finish_time = None
                else:
                    sql_finish_time = datetime.datetime(*time.localtime(finish_time)[:6])

                values = (file_id, exitcode, message, batch_id, datetime.datetime(*time.localtime(create_time)[:6]),
                    sql_start_time, sql_finish_time, MySQL.bare('NOW()')) + history_site_ids

                if optype == 'transfer':
                    LOG.debug('Archiving transfer of %s from %s to %s (exitcode %d)', lfn, source_name, dest_name, exitcode)
                else:
                    LOG.debug('Archiving deletion of %s at %s (exitcode %d)', lfn, site_name, exitcode)

                if self._read_only:
                    history_id = 0
                else:
                    history_id = self.history_db.db.insert_get_id(history_table_name, history_fields, values)

                if optype == 'transfer':
                    query.write_transfer_history(self.history_db, task_id, history_id)
                else:
                    query.write_deletion_history(self.history_db, task_id, history_id)

                # We check the subscription status and update accordingly. Need to lock the tables.
                if not self._read_only:
                    self.db.lock_tables(write = ['file_subscriptions'])

                try:
                    subscription_status = self.db.query(get_subscription_status, subscription_id)[0]

                    if subscription_status == 'inbatch':
                        if status == FileQuery.STAT_DONE:
                            LOG.debug('Subscription %d done.', subscription_id)
                            if not self._read_only:
                                self.db.query(update_subscription, 'done', subscription_id)
        
                        elif status == FileQuery.STAT_FAILED:
                            LOG.debug('Subscription %d failed (exit code %d). Flagging retry.', subscription_id, exitcode)
                            if not self._read_only:
                                self.db.query(update_subscription, 'retry', subscription_id)
        
                    elif subscription_status == 'cancelled':
                        # subscription is cancelled and task terminated -> delete the subscription now, irrespective of the task status
                        LOG.debug('Subscription %d is cancelled.', subscription_id)
                        if not self._read_only:
                            self.db.query(delete_subscription, subscription_id)
                finally:
                    if not self._read_only:
                        self.db.unlock_tables()

                if not self._read_only:
                    if optype == 'transfer':
                        if subscription_status == 'cancelled' or (subscription_status == 'inbatch' and status == FileQuery.STAT_DONE):
                            # Delete entries from failed_transfers table
                            self.db.query(delete_failures, subscription_id)
    
                        elif subscription_status == 'inbatch' and status == FileQuery.STAT_FAILED:
                            # Insert entry to failed_transfers table
                            self.db.query(insert_failure, exitcode, task_id)
        
                    self.db.query(delete_task, task_id)

                if status == FileQuery.STAT_DONE:
                    done_subscriptions.append(subscription_id)

                if optype == 'transfer':
                    query.forget_transfer_status(task_id)
                else:
                    query.forget_deletion_status(task_id)

                if self.cycle_stop.is_set():
                    break

            if batch_complete:
                if not self._read_only:
                    self.db.query(delete_batch, batch_id)

                if optype == 'transfer':
                    query.forget_transfer_batch(batch_id)
                else:
                    query.forget_deletion_batch(batch_id)

        if num_success + num_failure + num_cancelled != 0:
            LOG.info('Archived file %s: %d succeeded, %d failed, %d cancelled.', optype, num_success, num_failure, num_cancelled)
        else:
            LOG.debug('Archived file %s: %d succeeded, %d failed, %d cancelled.', optype, num_success, num_failure, num_cancelled)

        return done_subscriptions

    def _select_source(self, subscriptions):
        """
        Intelligently select the best source for each subscription.
        @param subscriptions  List of Subscription objects

        @return  List of TransferTask objects
        """

        def find_site_to_try(sources, failed_sources):
            not_tried = set(sources)
            if failed_sources is not None:
                not_tried -= set(failed_sources.iterkeys())

            LOG.debug('%d sites not tried', len(not_tried))

            if len(not_tried) == 0:
                if failed_sources is None:
                    return None

                # we've tried all sites. Did any of them fail with a recoverable error?
                sites_to_retry = []
                for site, codes in failed_sources.iteritems():
                    if site not in sources:
                        continue

                    if codes[-1] not in irrecoverable_errors:
                        sites_to_retry.append(site)

                if len(sites_to_retry) == 0:
                    return None
                else:
                    # select the least failed site
                    by_failure = sorted(sites_to_retry, key = lambda s: len(failed_sources[s]))
                    LOG.debug('%s has the least failures', by_failure[0].name)
                    return by_failure[0]

            else:
                LOG.debug('Selecting randomly')
                return random.choice(list(not_tried))

        tasks = []

        for subscription in subscriptions:
            LOG.debug('Selecting a disk source for subscription %d (%s to %s)', subscription.id, subscription.file.lfn, subscription.destination.name)
            source = find_site_to_try(subscription.disk_sources, subscription.failed_sources)
            if source is None:
                LOG.debug('Selecting a tape source for subscription %d', subscription.id)
                source = find_site_to_try(subscription.tape_sources, subscription.failed_sources)

            if source is None:
                # If both disk and tape failed irrecoveably, the subscription must be placed in held queue in get_subscriptions.
                # Reaching this line means something is wrong.
                LOG.warning('Could not find a source for transfer of %s to %s from %d disk and %d tape candidates.',
                    subscription.file.lfn, subscription.destination.name, len(subscription.disk_sources), len(subscription.tape_sources))
                continue
            
            tasks.append(RLFSM.TransferTask(subscription, source))

        return tasks

    def _start_transfers(self, transfer_operation, tasks):
        # start the transfer of tasks. If batch submission fails, make progressively smaller batches until failing tasks are identified.
        if self._read_only:
            batch_id = 0
        else:
            self.db.query('INSERT INTO `transfer_batches` (`id`) VALUES (0)')
            batch_id = self.db.last_insert_id

        LOG.debug('New transfer batch %d for %d files.', batch_id, len(tasks))

        # local time
        now = time.strftime('%Y-%m-%d %H:%M:%S')

        # need to create the transfer tasks first to have ids assigned
        fields = ('subscription_id', 'source_id', 'batch_id', 'created')
        mapping = lambda t: (t.subscription.id, t.source.id, batch_id, now)

        if not self._read_only:
            self.db.insert_many('transfer_tasks', fields, mapping, tasks)
        
        # set the task ids
        tasks_by_sub = dict((t.subscription.id, t) for t in tasks)
        for task_id, subscription_id in self.db.xquery('SELECT `id`, `subscription_id` FROM `transfer_tasks` WHERE `batch_id` = %s', batch_id):
            tasks_by_sub[subscription_id].id = task_id

        result = transfer_operation.start_transfers(batch_id, tasks)

        successful = [task for task, success in result.iteritems() if success]

        if not self._read_only:
            self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'inbatch\', `last_update` = NOW()', 'id', [t.subscription.id for t in successful])

            if len(successful) != len(result):
                failed = [task for task, success in result.iteritems() if not success]
                for task in failed:
                    LOG.error('Cannot issue transfer of %s from %s to %s',
                              task.subscription.file.lfn, task.source.name, task.subscription.destination.name)

                failed_ids = [t.id for t in failed]

                sql = 'INSERT INTO `failed_transfers` (`id`, `subscription_id`, `source_id`, `exitcode`)'
                sql += ' SELECT `id`, `subscription_id`, `source_id`, -1 FROM `transfer_tasks`'
                self.db.execute_many(sql, 'id', failed_ids)

                self.db.delete_many('transfer_tasks', 'id', failed_ids)

                self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'retry\', `last_update` = NOW()', 'id', [t.subscription.id for t in failed])

        return len(successful), len(result) - len(successful)

    def _start_deletions(self, deletion_operation, tasks):
        if self._read_only:
            batch_id = 0
        else:
            self.db.query('INSERT INTO `deletion_batches` (`id`) VALUES (0)')
            batch_id = self.db.last_insert_id

        # local time
        now = time.strftime('%Y-%m-%d %H:%M:%S')
        
        fields = ('subscription_id', 'batch_id', 'created')
        mapping = lambda t: (t.desubscription.id, batch_id, now)

        if not self._read_only:
            self.db.insert_many('deletion_tasks', fields, mapping, tasks)

        # set the task ids
        tasks_by_sub = dict((t.desubscription.id, t) for t in tasks)
        for task_id, desubscription_id in self.db.xquery('SELECT `id`, `subscription_id` FROM `deletion_tasks` WHERE `batch_id` = %s', batch_id):
            tasks_by_sub[desubscription_id].id = task_id
        
        result = deletion_operation.start_deletions(batch_id, tasks)

        successful = [task for task, success in result.iteritems() if success]

        if not self._read_only:
            self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'inbatch\', `last_update` = NOW()', 'id', [t.desubscription.id for t in successful])

            if len(successful) != len(result):
                failed = [task for task, success in result.iteritems() if not success]

                for task in failed:
                    LOG.error('Cannot delete %s at %s',
                              task.desubscription.file.lfn, task.desubscription.site.name)

                self.db.delete_many('deletion_tasks', 'id', [t.id for t in failed])

                self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'held\', `last_update` = NOW()', 'id', [t.desubscription.id for t in failed])

        return len(successful), len(result) - len(successful)
    
    def _set_dirclean_candidates(self, subscription_ids, inventory):
        site_dirs = {}

        # Clean up directories of completed subscriptions
        sql = 'SELECT s.`name`, f.`name` FROM `file_subscriptions` AS u'
        sql += ' INNER JOIN `files` AS f ON f.`id` = u.`file_id`'
        sql += ' INNER JOIN `sites` AS s ON s.`id` = u.`site_id`'

        for site_name, file_name in self.db.execute_many(sql, 'u.`id`', subscription_ids):
            try:
                site = inventory.sites[site_name]
            except KeyError:
                continue

            try:
                dirs = site_dirs[site]
            except KeyError:
                dirs = site_dirs[site] = set()

            dirs.add(os.path.dirname(file_name))

        def get_entry():
            for site, dirs in site_dirs.iteritems():
                for directory in dirs:
                    yield site.id, directory

        fields = ('site_id', 'directory')
        if not self._read_only:
            self.db.insert_many('directory_cleaning_tasks', fields, None, get_entry(), do_update = True)
예제 #9
0
import pandas as pd

from dynamo.dataformat import Configuration
from dynamo.utils.interface.mysql import MySQL

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

# # # # # # # #
# Query part  #
# # # # # # # #

nowtime = datetime.datetime.fromtimestamp(int(time.time())).strftime('%Y-%m-%d %H:%M:%S')

# Dynamo database: inbatch
dynamo = MySQL(Configuration(db = 'dynamo', user = '******', passwd = 'putpasswordhere'))
sites = dynamo.xquery('SELECT `id`, `name` FROM `sites`')
sitesdict = {}
for ide, name in sites:
    sitesdict[ide] = name
transfers = dynamo.xquery('SELECT tt.`source_id`, fs.`site_id`, f.`size` FROM `transfer_tasks` AS tt INNER JOIN file_subscriptions AS fs ON fs.`id` = tt.`subscription_id` INNER JOIN files as f on f.`id` = fs.`file_id` WHERE fs.`status`="inbatch" and fs.`delete`=0')


# Dynamo history database: exitcode
dynamohistory = MySQL(Configuration(db = 'dynamohistory', user = '******', passwd = 'putpasswordhere'))
historysites = dynamohistory.xquery('SELECT `id`, `name` FROM `sites`')
historysitesdict = {}
for ide, name in historysites:
    historysitesdict[ide] = name
historyxfers = dynamohistory.xquery('SELECT `source_id`, `destination_id`, `exitcode` FROM `file_transfers` WHERE `source_id` != 0 AND `completed` > NOW() - INTERVAL 7 DAY')

예제 #10
0
class MySQLHistory(TransactionHistoryInterface):
    """
    Transaction history interface implementation using MySQL as the backend.
    """
    def __init__(self, config):
        TransactionHistoryInterface.__init__(self, config)

        self._mysql = MySQL(config.db_params)
        self._cache_db = MySQL(config.cache_db_params)

        self._site_id_map = {}
        self._dataset_id_map = {}

    def _do_acquire_lock(self, blocking):  #override
        while True:
            # Use the system table to "software-lock" the database
            self._mysql.query('LOCK TABLES `lock` WRITE')
            self._mysql.query(
                'UPDATE `lock` SET `lock_host` = %s, `lock_process` = %s WHERE `lock_host` LIKE \'\' AND `lock_process` = 0',
                socket.gethostname(), os.getpid())

            # Did the update go through?
            host, pid = self._mysql.query(
                'SELECT `lock_host`, `lock_process` FROM `lock`')[0]
            self._mysql.query('UNLOCK TABLES')

            if host == socket.gethostname() and pid == os.getpid():
                # The database is locked.
                break

            if blocking:
                LOG.warning('Failed to lock database. Waiting 30 seconds..')
                time.sleep(30)
            else:
                LOG.warning('Failed to lock database.')
                return False

        return True

    def _do_release_lock(self, force):  #override
        self._mysql.query('LOCK TABLES `lock` WRITE')
        if force:
            self._mysql.query(
                'UPDATE `lock` SET `lock_host` = \'\', `lock_process` = 0')
        else:
            self._mysql.query(
                'UPDATE `lock` SET `lock_host` = \'\', `lock_process` = 0 WHERE `lock_host` LIKE %s AND `lock_process` = %s',
                socket.gethostname(), os.getpid())

        # Did the update go through?
        host, pid = self._mysql.query(
            'SELECT `lock_host`, `lock_process` FROM `lock`')[0]
        self._mysql.query('UNLOCK TABLES')

        if host != '' or pid != 0:
            raise RuntimeError('Failed to release lock from ' +
                               socket.gethostname() + ':' + str(os.getpid()))

    def _do_new_run(self, operation, partition, policy_version,
                    comment):  #override
        part_ids = self._mysql.query(
            'SELECT `id` FROM `partitions` WHERE `name` LIKE %s', partition)
        if len(part_ids) == 0:
            part_id = self._mysql.query(
                'INSERT INTO `partitions` (`name`) VALUES (%s)', partition)
        else:
            part_id = part_ids[0]

        if operation == HistoryRecord.OP_COPY:
            if self.config.get('test', False):
                operation_str = 'copy_test'
            else:
                operation_str = 'copy'
        else:
            if self.config.get('test', False):
                operation_str = 'deletion_test'
            else:
                operation_str = 'deletion'

        return self._mysql.query(
            'INSERT INTO `runs` (`operation`, `partition_id`, `policy_version`, `comment`, `time_start`) VALUES (%s, %s, %s, %s, NOW())',
            operation_str, part_id, policy_version, comment)

    def _do_close_run(self, operation, run_number):  #override
        self._mysql.query(
            'UPDATE `runs` SET `time_end` = FROM_UNIXTIME(%s) WHERE `id` = %s',
            time.time(), run_number)

    def _do_make_copy_entry(self, run_number, site, operation_id, approved,
                            dataset_list, size):  #override
        """
        Site and datasets are expected to be already in the database.
        """

        if len(self._site_id_map) == 0:
            self._make_site_id_map()
        if len(self._dataset_id_map) == 0:
            self._make_dataset_id_map()

        self._mysql.query(
            'INSERT INTO `copy_requests` (`id`, `run_id`, `timestamp`, `approved`, `site_id`, `size`) VALUES (%s, %s, NOW(), %s, %s, %s)',
            operation_id, run_number, approved, self._site_id_map[site.name],
            size)

        self._mysql.insert_many(
            'copied_replicas', ('copy_id', 'dataset_id'), lambda d:
            (operation_id, self._dataset_id_map[d.name]), dataset_list)

    def _do_make_deletion_entry(self, run_number, site, operation_id, approved,
                                datasets, size):  #override
        """
        site and dataset are expected to be already in the database (save_deletion_decisions should be called first).
        """

        site_id = self._mysql.query(
            'SELECT `id` FROM `sites` WHERE `name` LIKE %s', site.name)[0]

        dataset_ids = self._mysql.select_many('datasets', ('id', ), 'name',
                                              (d.name for d in datasets))

        self._mysql.query(
            'INSERT INTO `deletion_requests` (`id`, `run_id`, `timestamp`, `approved`, `site_id`, `size`) VALUES (%s, %s, NOW(), %s, %s, %s)',
            operation_id, run_number, approved, site_id, size)

        self._mysql.insert_many('deleted_replicas',
                                ('deletion_id', 'dataset_id'), lambda did:
                                (operation_id, did), dataset_ids)

    def _do_update_copy_entry(self, copy_record):  #override
        self._mysql.query(
            'UPDATE `copy_requests` SET `approved` = %s, `size` = %s, `completed` = %s WHERE `id` = %s',
            copy_record.approved, copy_record.size, copy_record.completed,
            copy_record.operation_id)

    def _do_update_deletion_entry(self, deletion_record):  #override
        self._mysql.query(
            'UPDATE `deletion_requests` SET `approved` = %s, `size` = %s WHERE `id` = %s',
            deletion_record.approved, deletion_record.size,
            deletion_record.operation_id)

    def _do_save_sites(self, sites):  #override
        if len(self._site_id_map) == 0:
            self._make_site_id_map()

        names_to_sites = dict((s.name, s) for s in sites)

        sites_to_insert = []
        for site_name in names_to_sites.iterkeys():
            if site_name not in self._site_id_map:
                sites_to_insert.append(site_name)

        if len(sites_to_insert) != 0:
            self._mysql.insert_many('sites', ('name', ), None, sites_to_insert)
            self._make_site_id_map()

    def _do_get_sites(self, run_number):  #override
        self._fill_snapshot_cache('sites', run_number)

        table_name = 'sites_%d' % run_number

        sql = 'SELECT s.`name`, n.`status`, n.`quota` FROM `%s`.`%s` AS n' % (
            self._cache_db.db_name(), table_name)
        sql += ' INNER JOIN `%s`.`sites` AS s ON s.`id` = n.`site_id`' % self._mysql.db_name(
        )

        sites_dict = {}

        for site_name, status, quota in self._mysql.xquery(sql):
            sites_dict[site_name] = (status, quota)

        return sites_dict

    def _do_save_datasets(self, datasets):  #override
        if len(self._dataset_id_map) == 0:
            self._make_dataset_id_map()

        datasets_to_insert = set(d.name for d in datasets) - set(
            self._dataset_id_map.iterkeys())
        if len(datasets_to_insert) == 0:
            return

        self._mysql.insert_many('datasets', ('name', ), None,
                                datasets_to_insert)
        self._make_dataset_id_map()

    def _do_save_conditions(self, policy_lines):  #ovrride
        for line in policy_lines:
            text = re.sub('\s+', ' ', line.condition.text)
            ids = self._mysql.query(
                'SELECT `id` FROM `policy_conditions` WHERE `text` = %s', text)
            if len(ids) == 0:
                line.condition_id = self._mysql.query(
                    'INSERT INTO `policy_conditions` (`text`) VALUES (%s)',
                    text)
            else:
                line.condition_id = ids[0]

    def _do_save_copy_decisions(self, run_number, copies):  #override
        pass

    def _do_save_deletion_decisions(self, run_number, deleted_list, kept_list,
                                    protected_list):  #override
        if len(self._site_id_map) == 0:
            self._make_site_id_map()
        if len(self._dataset_id_map) == 0:
            self._make_dataset_id_map()

        if type(run_number) is int:
            # Saving deletion decisions of a cycle
            db_file_name = '%s/snapshot_%09d.db' % (
                self.config.snapshots_spool_dir, run_number)
        else:
            # run_number is actually the partition name
            db_file_name = '%s/snapshot_%s.db' % (
                self.config.snapshots_spool_dir, run_number)

        try:
            os.makedirs(self.config.snapshots_spool_dir)
            os.chmod(self.config.snapshots_spool_dir, 0777)
        except OSError:
            pass

        if os.path.exists(db_file_name):
            os.unlink(db_file_name)

        LOG.info('Creating snapshot SQLite3 DB %s', db_file_name)

        # hardcoded!!
        replica_delete = 1
        replica_keep = 2
        replica_protect = 3

        snapshot_db = sqlite3.connect(db_file_name)
        snapshot_cursor = snapshot_db.cursor()

        sql = 'CREATE TABLE `decisions` ('
        sql += '`id` TINYINT PRIMARY KEY NOT NULL,'
        sql += '`value` TEXT NOT NULL'
        sql += ')'
        snapshot_db.execute(sql)
        snapshot_db.execute('INSERT INTO `decisions` VALUES (%d, \'delete\')' %
                            replica_delete)
        snapshot_db.execute('INSERT INTO `decisions` VALUES (%d, \'keep\')' %
                            replica_keep)
        snapshot_db.execute(
            'INSERT INTO `decisions` VALUES (%d, \'protect\')' %
            replica_protect)

        sql = 'CREATE TABLE `replicas` ('
        sql += '`site_id` SMALLINT NOT NULL,'
        sql += '`dataset_id` INT NOT NULL,'
        sql += '`size` BIGINT NOT NULL,'
        sql += '`decision_id` TINYINT NOT NULL REFERENCES `decisions`(`id`),'
        sql += '`condition` MEDIUMINT NOT NULL'
        sql += ')'
        snapshot_db.execute(sql)
        snapshot_db.execute(
            'CREATE INDEX `site_dataset` ON `replicas` (`site_id`, `dataset_id`)'
        )

        sql = 'INSERT INTO `replicas` VALUES (?, ?, ?, ?, ?)'

        def do_insert(entries, decision):
            for replica, matches in entries.iteritems():
                site_id = self._site_id_map[replica.site.name]
                dataset_id = self._dataset_id_map[replica.dataset.name]

                for condition_id, block_replicas in matches.iteritems():
                    size = sum(r.size for r in block_replicas)
                    snapshot_cursor.execute(
                        sql,
                        (site_id, dataset_id, size, decision, condition_id))

            snapshot_db.commit()

        do_insert(deleted_list, replica_delete)
        do_insert(kept_list, replica_keep)
        do_insert(protected_list, replica_protect)

        snapshot_cursor.close()
        snapshot_db.close()

        os.chmod(db_file_name, 0666)

        self._fill_snapshot_cache('replicas', run_number, overwrite=True)

    def _do_save_quotas(self, run_number, quotas):  #override
        # Will save quotas and statuses

        if len(self._site_id_map) == 0:
            self._make_site_id_map()

        if type(run_number) is int:
            db_file_name = '%s/snapshot_%09d.db' % (
                self.config.snapshots_spool_dir, run_number)
        else:
            # run_number is actually the partition name
            db_file_name = '%s/snapshot_%s.db' % (
                self.config.snapshots_spool_dir, run_number)

        # DB file should exist already - this function is called after save_deletion_decisions

        snapshot_db = sqlite3.connect(db_file_name)
        snapshot_cursor = snapshot_db.cursor()

        sql = 'CREATE TABLE `statuses` ('
        sql += '`id` TINYINT PRIMARY KEY NOT NULL,'
        sql += '`value` TEXT NOT NULL'
        sql += ')'
        snapshot_db.execute(sql)
        snapshot_db.execute('INSERT INTO `statuses` VALUES (%d, \'ready\')' %
                            Site.STAT_READY)
        snapshot_db.execute(
            'INSERT INTO `statuses` VALUES (%d, \'waitroom\')' %
            Site.STAT_WAITROOM)
        snapshot_db.execute('INSERT INTO `statuses` VALUES (%d, \'morgue\')' %
                            Site.STAT_MORGUE)
        snapshot_db.execute('INSERT INTO `statuses` VALUES (%d, \'unknown\')' %
                            Site.STAT_UNKNOWN)

        sql = 'CREATE TABLE `sites` ('
        sql += '`site_id` SMALLINT PRIMARY KEY NOT NULL,'
        sql += '`status_id` TINYINT NOT NULL REFERENCES `statuses`(`id`),'
        sql += '`quota` INT NOT NULL'
        sql += ')'
        snapshot_db.execute(sql)

        sql = 'INSERT INTO `sites` VALUES (?, ?, ?)'

        for site, quota in quotas.iteritems():
            snapshot_cursor.execute(
                sql, (self._site_id_map[site.name], site.status, quota))

        snapshot_db.commit()

        snapshot_cursor.close()
        snapshot_db.close()

        self._fill_snapshot_cache('sites', run_number, overwrite=True)

        if type(run_number) is int:
            # This was a numbered cycle
            # Archive the sqlite3 file
            # Relying on the fact save_quotas is called after save_deletion_decisions

            srun = '%09d' % run_number
            archive_dir_name = '%s/%s/%s' % (self.config.snapshots_archive_dir,
                                             srun[:3], srun[3:6])
            xz_file_name = '%s/snapshot_%09d.db.xz' % (archive_dir_name,
                                                       run_number)

            try:
                os.makedirs(archive_dir_name)
            except OSError:
                pass

            with open(db_file_name, 'rb') as db_file:
                with open(xz_file_name, 'wb') as xz_file:
                    xz_file.write(lzma.compress(db_file.read()))

    def _do_get_deletion_decisions(self, run_number, size_only):  #override
        self._fill_snapshot_cache('replicas', run_number)

        table_name = 'replicas_%d' % run_number

        if size_only:
            # return {site_name: (protect_size, delete_size, keep_size)}
            volumes = {}
            sites = set()

            query = 'SELECT s.`name`, SUM(r.`size`) * 1.e-12 FROM `%s`.`%s` AS r' % (
                self._cache_db.db_name(), table_name)
            query += ' INNER JOIN `%s`.`sites` AS s ON s.`id` = r.`site_id`' % self._mysql.db_name(
            )
            query += ' WHERE r.`decision` LIKE %s'
            query += ' GROUP BY r.`site_id`'

            for decision in ['protect', 'delete', 'keep']:
                volumes[decision] = dict(self._mysql.xquery(query, decision))
                sites.update(set(volumes[decision].iterkeys()))

            product = {}
            for site_name in sites:
                v = {}
                for decision in ['protect', 'delete', 'keep']:
                    try:
                        v[decision] = volumes[decision][site_name]
                    except:
                        v[decision] = 0

                product[site_name] = (v['protect'], v['delete'], v['keep'])

            return product

        else:
            # return {site_name: [(dataset_name, size, decision, reason)]}

            query = 'SELECT s.`name`, d.`name`, r.`size`, r.`decision`, p.`text` FROM `%s`.`%s` AS r' % (
                self._cache_db.db_name(), table_name)
            query += ' INNER JOIN `%s`.`sites` AS s ON s.`id` = r.`site_id`' % self._mysql.db_name(
            )
            query += ' INNER JOIN `%s`.`datasets` AS d ON d.`id` = r.`dataset_id`' % self._mysql.db_name(
            )
            query += ' INNER JOIN `%s`.`policy_conditions` AS p ON p.`id` = r.`condition`' % self._mysql.db_name(
            )
            query += ' ORDER BY s.`name` ASC, r.`size` DESC'

            product = {}

            _site_name = ''

            for site_name, dataset_name, size, decision, reason in self._cache_db.xquery(
                    query):
                if site_name != _site_name:
                    product[site_name] = []
                    current = product[site_name]
                    _site_name = site_name

                current.append((dataset_name, size, decision, reason))

            return product

    def _do_save_dataset_popularity(self, run_number, datasets):  #override
        if len(self._dataset_id_map) == 0:
            self._make_dataset_id_map()

        fields = ('run_id', 'dataset_id', 'popularity')
        mapping = lambda dataset: (run_number, self._dataset_id_map[
            dataset.name], dataset.attr['request_weight']
                                   if 'request_weight' in dataset.attr else 0.)
        self._mysql.insert_many('dataset_popularity_snapshots', fields,
                                mapping, datasets)

    def _do_get_incomplete_copies(self, partition):  #override
        query = 'SELECT h.`id`, UNIX_TIMESTAMP(h.`timestamp`), h.`approved`, s.`name`, h.`size`'
        query += ' FROM `copy_requests` AS h'
        query += ' INNER JOIN `runs` AS r ON r.`id` = h.`run_id`'
        query += ' INNER JOIN `partitions` AS p ON p.`id` = r.`partition_id`'
        query += ' INNER JOIN `sites` AS s ON s.`id` = h.`site_id`'
        query += ' WHERE h.`id` > 0 AND p.`name` LIKE \'%s\' AND h.`completed` = 0 AND h.`run_id` > 0' % partition
        history_entries = self._mysql.xquery(query)

        id_to_record = {}
        for eid, timestamp, approved, site_name, size in history_entries:
            id_to_record[eid] = HistoryRecord(HistoryRecord.OP_COPY,
                                              eid,
                                              site_name,
                                              timestamp=timestamp,
                                              approved=approved,
                                              size=size)

        id_to_dataset = dict(
            self._mysql.xquery('SELECT `id`, `name` FROM `datasets`'))
        id_to_site = dict(
            self._mysql.xquery('SELECT `id`, `name` FROM `sites`'))

        replicas = self._mysql.select_many('copied_replicas',
                                           ('copy_id', 'dataset_id'),
                                           'copy_id', id_to_record.iterkeys())

        current_copy_id = 0
        for copy_id, dataset_id in replicas:
            if copy_id != current_copy_id:
                record = id_to_record[copy_id]
                current_copy_id = copy_id

            record.replicas.append(
                HistoryRecord.CopiedReplica(
                    dataset_name=id_to_dataset[dataset_id]))

        return id_to_record.values()

    def _do_get_copied_replicas(self, run_number):  #override
        query = 'SELECT s.`name`, d.`name` FROM `copied_replicas` AS p'
        query += ' INNER JOIN `copy_requests` AS r ON r.`id` = p.`copy_id`'
        query += ' INNER JOIN `datasets` AS d ON d.`id` = p.`dataset_id`'
        query += ' INNER JOIN `sites` AS s ON s.`id` = r.`site_id`'
        query += ' WHERE r.`run_id` = %d' % run_number

        return self._mysql.query(query)

    def _do_get_site_name(self, operation_id):  #override
        result = self._mysql.query(
            'SELECT s.name FROM `sites` AS s INNER JOIN `copy_requests` AS h ON h.`site_id` = s.`id` WHERE h.`id` = %s',
            operation_id)
        if len(result) != 0:
            return result[0]

        result = self._mysql.query(
            'SELECT s.name FROM `sites` AS s INNER JOIN `deletion_requests` AS h ON h.`site_id` = s.`id` WHERE h.`id` = %s',
            operation_id)
        if len(result) != 0:
            return result[0]

        return ''

    def _do_get_deletion_runs(self, partition, first, last):  #override
        result = self._mysql.query(
            'SELECT `id` FROM `partitions` WHERE `name` LIKE %s', partition)
        if len(result) == 0:
            return []

        partition_id = result[0]

        sql = 'SELECT `id` FROM `runs` WHERE `partition_id` = %d AND `time_end` NOT LIKE \'0000-00-00 00:00:00\' AND `operation` IN (\'deletion\', \'deletion_test\')' % partition_id

        if first >= 0:
            sql += ' AND `id` >= %d' % first
        if last >= 0:
            sql += ' AND `id` <= %d' % last

        sql += ' ORDER BY `id` ASC'

        result = self._mysql.query(sql)

        if first < 0 and len(result) > 1:
            result = result[-1:]

        return result

    def _do_get_copy_runs(self, partition, first, last):  #override
        result = self._mysql.query(
            'SELECT `id` FROM `partitions` WHERE `name` LIKE %s', partition)
        if len(result) == 0:
            return []

        partition_id = result[0]

        sql = 'SELECT `id` FROM `runs` WHERE `partition_id` = %d AND `time_end` NOT LIKE \'0000-00-00 00:00:00\' AND `operation` IN (\'copy\', \'copy_test\')' % partition_id

        if first >= 0:
            sql += ' AND `id` >= %d' % first
        if last >= 0:
            sql += ' AND `id` <= %d' % last

        sql += ' ORDER BY `id` ASC'

        if first < 0 and len(result) > 1:
            result = result[-1:]

        return result

    def _do_get_run_timestamp(self, run_number):  #override
        result = self._mysql.query(
            'SELECT UNIX_TIMESTAMP(`time_start`) FROM `runs` WHERE `id` = %s',
            run_number)
        if len(result) == 0:
            return 0

        return result[0]

    def _do_get_next_test_id(self):  #override
        copy_result = self._mysql.query(
            'SELECT MIN(`id`) FROM `copy_requests`')[0]
        if copy_result == None:
            copy_result = 0

        deletion_result = self._mysql.query(
            'SELECT MIN(`id`) FROM `deletion_requests`')[0]
        if deletion_result == None:
            deletion_result = 0

        return min(copy_result, deletion_result) - 1

    def _make_site_id_map(self):
        self._site_id_map = {}
        for name, site_id in self._mysql.xquery(
                'SELECT `name`, `id` FROM `sites`'):
            self._site_id_map[name] = int(site_id)

    def _make_dataset_id_map(self):
        self._dataset_id_map = {}
        for name, dataset_id in self._mysql.xquery(
                'SELECT `name`, `id` FROM `datasets`'):
            self._dataset_id_map[name] = int(dataset_id)

    def _fill_snapshot_cache(self, template, run_number, overwrite=False):
        # run_number is either a cycle number or a partition name. %s works for both
        table_name = '%s_%s' % (template, run_number)

        sql = 'SELECT COUNT(*) FROM `information_schema`.`TABLES` WHERE `TABLE_SCHEMA` = %s AND `TABLE_NAME` = %s'
        table_exists = (self._mysql.query(sql, self._cache_db.db_name(),
                                          table_name)[0] != 0)

        if overwrite or not table_exists:
            # fill from sqlite
            if table_exists:
                self._cache_db.query('TRUNCATE TABLE `%s`' % table_name)
            else:
                self._cache_db.query('CREATE TABLE `%s` LIKE `%s`' %
                                     (table_name, template))

            if type(run_number) is int:
                db_file_name = '%s/snapshot_%09d.db' % (
                    self.config.snapshots_spool_dir, run_number)

                if not os.path.exists(db_file_name):
                    srun = '%09d' % run_number
                    xz_file_name = '%s/%s/%s/snapshot_%09d.db.xz' % (
                        self.config.snapshots_archive_dir, srun[:3], srun[3:6],
                        run_number)
                    if not os.path.exists(xz_file_name):
                        raise RuntimeError('Snapshot DB ' + db_file_name +
                                           ' does not exist')

                    with open(xz_file_name, 'rb') as xz_file:
                        with open(db_file_name, 'wb') as db_file:
                            db_file.write(lzma.decompress(xz_file.read()))

            else:
                db_file_name = '%s/snapshot_%s.db' % (
                    self.config.snapshots_spool_dir, run_number)

                if not os.path.exists(db_file_name):
                    return

            snapshot_db = sqlite3.connect(db_file_name)
            snapshot_db.text_factory = str  # otherwise we'll get unicode and MySQLdb cannot convert that
            snapshot_cursor = snapshot_db.cursor()

            def make_snapshot_reader():
                if template == 'replicas':
                    sql = 'SELECT r.`site_id`, r.`dataset_id`, r.`size`, d.`value`, r.`condition` FROM `replicas` AS r'
                    sql += ' INNER JOIN `decisions` AS d ON d.`id` = r.`decision_id`'
                elif template == 'sites':
                    sql = 'SELECT s.`site_id`, t.`value`, s.`quota` FROM `sites` AS s'
                    sql += ' INNER JOIN `statuses` AS t ON t.`id` = s.`status_id`'

                snapshot_cursor.execute(sql)

                while True:
                    row = snapshot_cursor.fetchone()
                    if row is None:
                        return

                    yield row

            snapshot_reader = make_snapshot_reader()

            if template == 'replicas':
                fields = ('site_id', 'dataset_id', 'size', 'decision',
                          'condition')
            elif template == 'sites':
                fields = ('site_id', 'status', 'quota')

            self._cache_db.insert_many(table_name,
                                       fields,
                                       None,
                                       snapshot_reader,
                                       do_update=False)

            snapshot_cursor.close()
            snapshot_db.close()

        if type(run_number) is int:
            self._cache_db.query(
                'INSERT INTO `{template}_snapshot_usage` VALUES (%s, NOW())'.
                format(template=template), run_number)

            # also save into the main cache table
            sql = 'SELECT p.`name` FROM `partitions` AS p INNER JOIN `runs` AS r ON r.`partition_id` = p.`id` WHERE r.`id` = %s'
            partition = self._mysql.query(sql, run_number)[0]

            self._fill_snapshot_cache(template, partition, overwrite)

        self._clean_old_cache()

    def _clean_old_cache(self):
        sql = 'SELECT `run_id` FROM (SELECT `run_id`, MAX(`timestamp`) AS m FROM `replicas_snapshot_usage` GROUP BY `run_id`) AS t WHERE m < DATE_SUB(NOW(), INTERVAL 1 WEEK)'
        old_replica_runs = self._cache_db.query(sql)
        for old_run in old_replica_runs:
            table_name = 'replicas_%d' % old_run
            self._cache_db.query('DROP TABLE IF EXISTS `%s`' % table_name)

        sql = 'SELECT `run_id` FROM (SELECT `run_id`, MAX(`timestamp`) AS m FROM `sites_snapshot_usage` GROUP BY `run_id`) AS t WHERE m < DATE_SUB(NOW(), INTERVAL 1 WEEK)'
        old_site_runs = self._cache_db.query(sql)
        for old_run in old_site_runs:
            table_name = 'sites_%d' % old_run
            self._cache_db.query('DROP TABLE IF EXISTS `%s`' % table_name)

        for old_run in set(old_replica_runs) & set(old_site_runs):
            srun = '%09d' % old_run
            db_file_name = '%s/snapshot_%09d.db' % (
                self.config.snapshots_spool_dir, old_run)
            if os.path.exists(db_file_name):
                try:
                    os.unlink(db_file_name)
                except:
                    LOG.error('Failed to delete %s' % db_file_name)
                    pass

        self._cache_db.query(
            'DELETE FROM `replicas_snapshot_usage` WHERE `timestamp` < DATE_SUB(NOW(), INTERVAL 1 WEEK)'
        )
        self._cache_db.query('OPTIMIZE TABLE `replicas_snapshot_usage`')
        self._cache_db.query(
            'DELETE FROM `sites_snapshot_usage` WHERE `timestamp` < DATE_SUB(NOW(), INTERVAL 1 WEEK)'
        )
        self._cache_db.query('OPTIMIZE TABLE `sites_snapshot_usage`')
예제 #11
0
class MySQLInventoryStore(InventoryStore):
    """InventoryStore with a MySQL backend."""
    def __init__(self, config):
        InventoryStore.__init__(self, config)

        self._mysql = MySQL(config.db_params)

    def get_partition_names(self):
        return self._mysql.query('SELECT `name` FROM `partitions`')

    def get_group_names(self, include=['*'], exclude=[]):  #override
        # Load groups
        group_names = []

        names = self._mysql.xquery('SELECT `name` FROM `groups`')

        for name in names:
            for filt in include:
                if fnmatch.fnmatch(name, filt):
                    break
            else:
                # no match
                continue

            for filt in exclude:
                if fnmatch.fnmatch(name, filt):
                    break
            else:
                # no match
                group_names.append(name)

        return group_names

    def get_site_names(self, include=['*'], exclude=[]):  #override
        # Load sites
        site_names = []

        names = self._mysql.xquery('SELECT `name` FROM `sites`')

        for name in names:
            for filt in include:
                if fnmatch.fnmatch(name, filt):
                    break
            else:
                # no match
                continue

            for filt in exclude:
                if fnmatch.fnmatch(name, filt):
                    break
            else:
                # no match
                site_names.append(name)

        return site_names

    def get_dataset_names(self, include=['*'], exclude=[]):  #override
        dataset_names = []

        include_patterns = []
        for pattern in include:
            sql = 'SELECT `name` FROM `datasets` WHERE `name` LIKE %s'
            names = self._mysql.xquery(
                sql,
                pattern.replace('*', '%').replace('?', '_'))

            for name in names:
                for filt in exclude:
                    if fnmatch.fnmatch(name, filt):
                        break
                else:
                    # no match
                    dataset_names.append(name)

        return dataset_names

    def get_files(self, block):
        if LOG.getEffectiveLevel() == logging.DEBUG:
            LOG.debug('Loading files for block %s', block.full_name())

        files = set()

        # assuming unique block names
        sql = 'SELECT f.`size`, f.`name` FROM `files` AS f'
        sql += ' INNER JOIN `blocks` AS b ON b.`id` = f.`block_id`'
        sql += ' WHERE b.`name` = %s'

        for size, name in self._mysql.xquery(sql, block.real_name()):
            files.add(File(name, block, size))

        return files

    def load_data(self,
                  inventory,
                  group_names=None,
                  site_names=None,
                  dataset_names=None):  #override
        ## Load groups
        LOG.info('Loading groups.')

        # name constraints communicated between _load_* functions via load_tmp tables
        if self._mysql.table_exists('groups_load_tmp'):
            self._mysql.query('DROP TABLE `groups_load_tmp`')

        id_group_map = {0: inventory.groups[None]}
        self._load_groups(inventory, group_names, id_group_map)

        LOG.info('Loaded %d groups.', len(inventory.groups))

        ## Load sites
        LOG.info('Loading sites.')

        if self._mysql.table_exists('sites_load_tmp'):
            self._mysql.query('DROP TABLE `sites_load_tmp`')

        id_site_map = {}
        self._load_sites(inventory, site_names, id_site_map)

        LOG.info('Loaded %d sites.', len(inventory.sites))

        ## Load datasets
        LOG.info('Loading datasets.')
        start = time.time()

        if self._mysql.table_exists('datasets_load_tmp'):
            self._mysql.query('DROP TABLE `datasets_load_tmp`')

        id_dataset_map = {}
        self._load_datasets(inventory, dataset_names, id_dataset_map)

        LOG.info('Loaded %d datasets in %.1f seconds.',
                 len(inventory.datasets),
                 time.time() - start)

        ## Load blocks
        LOG.info('Loading blocks.')
        start = time.time()

        id_block_maps = {}  # {dataset_id: {block_id: block}}
        self._load_blocks(inventory, id_dataset_map, id_block_maps)

        num_blocks = sum(len(m) for m in id_block_maps.itervalues())

        LOG.info('Loaded %d blocks in %.1f seconds.', num_blocks,
                 time.time() - start)

        ## Load replicas (dataset and block in one go)
        LOG.info('Loading replicas.')
        start = time.time()

        self._load_replicas(inventory, id_group_map, id_site_map,
                            id_dataset_map, id_block_maps)

        num_dataset_replicas = 0
        num_block_replicas = 0
        for dataset in id_dataset_map.itervalues():
            num_dataset_replicas += len(dataset.replicas)
            num_block_replicas += sum(
                len(r.block_replicas) for r in dataset.replicas)

        LOG.info(
            'Loaded %d dataset replicas and %d block replicas in %.1f seconds.',
            num_dataset_replicas, num_block_replicas,
            time.time() - start)

        ## cleanup
        if self._mysql.table_exists('blocks_load_tmp'):
            self._mysql.query('DROP TABLE `blocks_load_tmp`')
        if self._mysql.table_exists('sites_load_tmp'):
            self._mysql.query('DROP TABLE `sites_load_tmp`')
        if self._mysql.table_exists('datasets_load_tmp'):
            self._mysql.query('DROP TABLE `datasets_load_tmp`')

    def _load_groups(self, inventory, group_names, id_group_map):
        sql = 'SELECT g.`id`, g.`name`, g.`olevel` FROM `groups` AS g'

        if group_names is not None:
            # first dump the group ids into a temporary table, then constrain the original table
            self._mysql.query(
                'CREATE TABLE `groups_load_tmp` (`id` int(11) unsigned NOT NULL, PRIMARY KEY (`id`))'
            )
            sqlbase = 'INSERT INTO `groups_load_tmp` SELECT `id` FROM `groups`'
            self._mysql.execute_many(sqlbase, 'name', group_names)

            sql += ' INNER JOIN `groups_load_tmp` AS t ON t.`id` = g.`id`'

        for group_id, name, olname in self._mysql.xquery(sql):
            if olname == 'Dataset':
                olevel = Dataset
            else:
                olevel = Block

            group = Group(name, olevel)

            inventory.groups[name] = group
            id_group_map[group_id] = group

    def _load_sites(self, inventory, site_names, id_site_map):
        sql = 'SELECT s.`id`, s.`name`, s.`host`, s.`storage_type`+0, s.`backend`, s.`storage`, s.`cpu`, `status`+0 FROM `sites` AS s'

        if site_names is not None:
            # first dump the site ids into a temporary table, then constrain the original table
            self._mysql.query(
                'CREATE TABLE `sites_load_tmp` (`id` int(11) unsigned NOT NULL, PRIMARY KEY (`id`))'
            )
            sqlbase = 'INSERT INTO `sites_load_tmp` SELECT `id` FROM `sites`'
            self._mysql.execute_many(sqlbase, 'name', site_names)

            sql += ' INNER JOIN `sites_load_tmp` AS t ON t.`id` = s.`id`'

        for site_id, name, host, storage_type, backend, storage, cpu, status in self._mysql.xquery(
                sql):
            site = Site(name,
                        host=host,
                        storage_type=storage_type,
                        backend=backend,
                        storage=storage,
                        cpu=cpu,
                        status=status)

            inventory.sites[name] = site
            id_site_map[site_id] = site

            for partition in inventory.partitions.itervalues():
                site.partitions[partition] = SitePartition(site, partition)

        # Load site quotas
        sql = 'SELECT q.`site_id`, p.`name`, q.`storage` FROM `quotas` AS q INNER JOIN `partitions` AS p ON p.`id` = q.`partition_id`'
        if site_names is not None:
            sql += ' INNER JOIN `sites_load_tmp` AS t ON t.`id` = q.`site_id`'

        for site_id, partition_name, storage in self._mysql.xquery(sql):
            try:
                site = id_site_map[site_id]
            except KeyError:
                continue

            partition = inventory.partitions[partition_name]
            site.partitions[partition].set_quota(storage * 1.e+12)

    def _load_datasets(self, inventory, dataset_names, id_dataset_map):
        sql = 'SELECT d.`id`, d.`name`, d.`size`, d.`num_files`, d.`status`+0, d.`data_type`+0, s.`cycle`, s.`major`, s.`minor`, s.`suffix`, UNIX_TIMESTAMP(d.`last_update`), d.`is_open`'
        sql += ' FROM `datasets` AS d'
        sql += ' LEFT JOIN `software_versions` AS s ON s.`id` = d.`software_version_id`'

        if dataset_names is not None:
            # first dump the dataset ids into a temporary table, then constrain the original table
            self._mysql.query(
                'CREATE TABLE `datasets_load_tmp` (`id` int(11) unsigned NOT NULL, PRIMARY KEY (`id`))'
            )
            sqlbase = 'INSERT INTO `datasets_load_tmp` SELECT `id` FROM `datasets`'
            self._mysql.execute_many(sqlbase, 'name', dataset_names)

            sql += ' INNER JOIN `datasets_load_tmp` AS t ON t.`id` = d.`id`'

        for dataset_id, name, size, num_files, status, data_type, sw_cycle, sw_major, sw_minor, sw_suffix, last_update, is_open in self._mysql.xquery(
                sql):
            # size and num_files are reset when loading blocks
            dataset = Dataset(name,
                              size=size,
                              num_files=num_files,
                              status=int(status),
                              data_type=int(data_type),
                              last_update=last_update,
                              is_open=(is_open == 1))
            if sw_cycle is None:
                dataset.software_version = None
            else:
                dataset.software_version = (sw_cycle, sw_major, sw_minor,
                                            sw_suffix)

            inventory.datasets[name] = dataset
            id_dataset_map[dataset_id] = dataset

    def _load_blocks(self, inventory, id_dataset_map, id_block_maps):
        sql = 'SELECT b.`id`, b.`dataset_id`, b.`name`, b.`size`, b.`num_files`, b.`is_open`, UNIX_TIMESTAMP(b.`last_update`) FROM `blocks` AS b'

        if self._mysql.table_exists('datasets_load_tmp'):
            sql += ' INNER JOIN `datasets_load_tmp` AS t ON t.`id` = b.`dataset_id`'

        sql += ' ORDER BY b.`dataset_id`'

        _dataset_id = 0
        dataset = None
        for block_id, dataset_id, name, size, num_files, is_open, last_update in self._mysql.xquery(
                sql):
            if dataset_id != _dataset_id:
                _dataset_id = dataset_id

                dataset = id_dataset_map[dataset_id]
                dataset.blocks.clear()
                dataset.size = 0
                dataset.num_files = 0

                id_block_map = id_block_maps[dataset_id] = {}

            block = Block(Block.to_internal_name(name), dataset, size,
                          num_files, (is_open == 1), last_update)

            dataset.blocks.add(block)
            dataset.size += block.size
            dataset.num_files += block.num_files

            id_block_map[block_id] = block

    def _load_replicas(self, inventory, id_group_map, id_site_map,
                       id_dataset_map, id_block_maps):
        sql = 'SELECT dr.`dataset_id`, dr.`site_id`,'
        sql += ' br.`block_id`, br.`group_id`, br.`is_complete`, br.`is_custodial`, brs.`size`, UNIX_TIMESTAMP(br.`last_update`)'
        sql += ' FROM `dataset_replicas` AS dr'
        sql += ' INNER JOIN `blocks` AS b ON b.`dataset_id` = dr.`dataset_id`'
        sql += ' LEFT JOIN `block_replicas` AS br ON (br.`block_id`, br.`site_id`) = (b.`id`, dr.`site_id`)'
        sql += ' LEFT JOIN `block_replica_sizes` AS brs ON (brs.`block_id`, brs.`site_id`) = (b.`id`, dr.`site_id`)'

        if self._mysql.table_exists('groups_load_tmp'):
            sql += ' INNER JOIN `groups_load_tmp` AS gt ON gt.`id` = br.`group_id`'

        if self._mysql.table_exists('sites_load_tmp'):
            sql += ' INNER JOIN `sites_load_tmp` AS st ON st.`id` = dr.`site_id`'

        if self._mysql.table_exists('datasets_load_tmp'):
            sql += ' INNER JOIN `datasets_load_tmp` AS dt ON dt.`id` = dr.`dataset_id`'

        sql += ' ORDER BY dr.`dataset_id`, dr.`site_id`'

        # Blocks are left joined -> there will be (# sites) x (# blocks) entries per dataset

        _dataset_id = 0
        _site_id = 0
        dataset_replica = None
        for dataset_id, site_id, block_id, group_id, is_complete, b_is_custodial, b_size, b_last_update in self._mysql.xquery(
                sql):
            if dataset_id != _dataset_id:
                _dataset_id = dataset_id

                dataset = id_dataset_map[_dataset_id]
                dataset.replicas.clear()

                id_block_map = id_block_maps[dataset_id]

            if site_id != _site_id:
                _site_id = site_id
                site = id_site_map[site_id]

            if dataset_replica is None or dataset is not dataset_replica.dataset or site is not dataset_replica.site:
                if dataset_replica is not None:
                    # this is the last dataset_replica
                    # add to dataset and site after filling all block replicas
                    # this does not matter for the dataset, but for the site there is some heavy
                    # computation needed when a replica is added
                    dataset_replica.dataset.replicas.add(dataset_replica)
                    dataset_replica.site.add_dataset_replica(
                        dataset_replica, add_block_replicas=True)

                dataset_replica = DatasetReplica(dataset, site)

            if block_id is None:
                # this block replica does not exist
                continue

            block = id_block_map[block_id]
            group = id_group_map[group_id]

            block_replica = BlockReplica(
                block,
                site,
                group=group,
                is_complete=(is_complete == 1),
                is_custodial=(b_is_custodial == 1),
                size=block.size if b_size is None else b_size,
                last_update=b_last_update)

            dataset_replica.block_replicas.add(block_replica)
            block.replicas.add(block_replica)

        if dataset_replica is not None:
            # one last bit
            dataset_replica.dataset.replicas.add(dataset_replica)
            dataset_replica.site.add_dataset_replica(dataset_replica,
                                                     add_block_replicas=True)

    def save_block(self, block):  #override
        dataset_id = self._get_dataset_id(block.dataset)
        if dataset_id == 0:
            return

        fields = ('dataset_id', 'name', 'size', 'num_files', 'is_open',
                  'last_update')
        self._insert_update(
            'blocks', fields, dataset_id, block.real_name(), block.size,
            block.num_files, block.is_open,
            time.strftime('%Y-%m-%d %H:%M:%S',
                          time.localtime(block.last_update)))

    def delete_block(self, block):  #override
        # Here we don't assume block name is unique..
        dataset_id = self._get_dataset_id(block.dataset)
        if dataset_id == 0:
            return

        sql = 'DELETE FROM `blocks` WHERE `dataset_id` = %s AND `name` = %s'
        self._mysql.query(sql, dataset_id, block.real_name())

    def save_file(self, lfile):  #override
        dataset_id = self._get_dataset_id(lfile.block.dataset)
        if dataset_id == 0:
            return

        block_id = self._get_block_id(lfile.block)
        if block_id == 0:
            return

        fields = ('block_id', 'dataset_id', 'size', 'name')
        self._insert_update('files', fields, block_id, dataset_id, lfile.size,
                            lfile.lfn)

    def delete_file(self, lfile):  #override
        sql = 'DELETE FROM `files` WHERE `name` = %s'
        self._mysql.query(sql, lfile.lfn)

    def save_blockreplica(self, block_replica):  #override
        block_id = self._get_block_id(block_replica.block)
        if block_id == 0:
            return

        site_id = self._get_site_id(block_replica.site)
        if site_id == 0:
            return

        group_id = self._get_group_id(block_replica.group)
        if group_id == 0:
            return

        fields = ('block_id', 'site_id', 'group_id', 'is_complete',
                  'is_custodial', 'last_update')
        self._insert_update(
            'block_replicas', fields, block_id, site_id, group_id,
            block_replica.is_complete, block_replica.is_custodial,
            time.strftime('%Y-%m-%d %H:%M:%S',
                          time.localtime(block_replica.last_update)))

        if block_replica.size != block_replica.block.size:
            fields = ('block_id', 'site_id', 'size')
            self._insert_update('block_replica_sizes', fields, block_id,
                                site_id, block_replica.size)
        else:
            sql = 'DELETE FROM `block_replica_sizes` WHERE `block_id` = %s AND `site_id` = %s'
            self._mysql.query(sql, block_id, site_id)

    def delete_blockreplica(self, block_replica):  #override
        block_id = self._get_block_id(block_replica.block)
        if block_id == 0:
            return

        site_id = self._get_site_id(block_replica.site)
        if site_id == 0:
            return

        sql = 'DELETE FROM `block_replicas` WHERE `block_id` = %s AND `site_id` = %s'
        self._mysql.query(sql, block_id, site_id)

        sql = 'DELETE FROM `block_replica_sizes` WHERE `block_id` = %s AND `site_id` = %s'
        self._mysql.query(sql, block_id, site_id)

    def save_dataset(self, dataset):  #override
        if dataset.software_version is None:
            software_version_id = 0
        else:
            sql = 'SELECT `id` FROM `software_versions` WHERE (`cycle`, `major`, `minor`, `suffix`) = (%s, %s, %s, %s)'

            result = self._mysql.query(sql, *dataset.software_version)
            if len(result) == 0:
                sql = 'INSERT INTO `software_versions` (`cycle`, `major`, `minor`, `suffix`) VALUES (%s, %s, %s, %s)'
                software_version_id = self._mysql.query(
                    sql, *dataset.software_version)
            else:
                software_version_id = result[0]

        fields = ('name', 'size', 'num_files', 'status', 'data_type',
                  'software_version_id', 'last_update', 'is_open')
        self._insert_update('datasets', fields, dataset.name, dataset.size, dataset.num_files, \
            dataset.status, dataset.data_type, software_version_id,
            time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(dataset.last_update)), dataset.is_open)

    def delete_dataset(self, dataset):  #override
        sql = 'DELETE FROM `datasets` WHERE `name` = %s'
        self._mysql.query(sql, dataset.name)

    def save_datasetreplica(self, dataset_replica):  #override
        dataset_id = self._get_dataset_id(dataset_replica.dataset)
        if dataset_id == 0:
            return

        site_id = self._get_site_id(dataset_replica.site)
        if site_id == 0:
            return

        fields = ('dataset_id', 'site_id')
        self._insert_update('dataset_replicas', fields, dataset_id, site_id)

    def delete_datasetreplica(self, dataset_replica):  #override
        dataset_id = self._get_dataset_id(dataset_replica.dataset)
        if dataset_id == 0:
            return

        site_id = self._get_site_id(dataset_replica.site)
        if site_id == 0:
            return

        sql = 'DELETE FROM `dataset_replicas` WHERE `dataset_id` = %s AND `site_id` = %s'
        self._mysql.query(sql, dataset_id, site_id)

    def save_group(self, group):  #override
        fields = ('name', 'olevel')
        self._insert_update('groups', fields, group.name,
                            group.olevel.__name__)

    def delete_group(self, group):  #override
        sql = 'DELETE FROM `groups` WHERE `name` = %s'
        self._mysql.query(sql, group.name)

    def save_partition(self, partition):  #override
        fields = ('name', )
        self._insert_update('partitions', fields, partition.name)

    def delete_partition(self, partition):  #override
        sql = 'DELETE FROM `partitions` WHERE `name` = %s'
        self._mysql.query(sql, partition.name)

    def save_site(self, site):  #override
        fields = ('name', 'host', 'storage_type', 'backend', 'storage', 'cpu',
                  'status')
        self._insert_update('sites', fields, site.name, site.host,
                            site.storage_type, site.backend, site.storage,
                            site.cpu, site.status)

    def delete_site(self, site):  #override
        sql = 'DELETE FROM `sites` WHERE `name` = %s'
        self._mysql.query(sql, site.name)

    def save_sitepartition(self, site_partition):  #override
        # We are only saving quotas. For superpartitions, there is nothing to do.
        if site_partition.partition.subpartitions is not None:
            return

        site_id = self._get_site_id(site_partition.site)
        if site_id == 0:
            return

        partition_id = self._get_partition_id(site_partition.partition)
        if partition_id == 0:
            return

        fields = ('site_id', 'partition_id', 'storage')
        self._insert_update('quotas', fields, site_id, partition_id,
                            site_partition.quota * 1.e-12)

    def delete_sitepartition(self, site_partition):  #override
        # We are only saving quotas. For superpartitions, there is nothing to do.
        if site_partition.partition.subpartitions is not None:
            return

        site_id = self._get_site_id(site_partition.site)
        if site_id == 0:
            return

        partition_id = self._get_partition_id(site_partition.partition)
        if partition_id == 0:
            return

        sql = 'DELETE FROM `quotas` WHERE `site_id` = %s AND `partition_id` = %s'
        self._mysql.query(sql, site_id, partition_id)

    def _insert_update(self, table, fields, *values):
        placeholders = ', '.join(['%s'] * len(fields))

        sql = 'INSERT INTO `%s` (' % table
        sql += ', '.join('`%s`' % f for f in fields)
        sql += ') VALUES (' + placeholders + ')'
        sql += ' ON DUPLICATE KEY UPDATE '
        sql += ', '.join(['`%s`=VALUES(`%s`)' % (f, f) for f in fields])

        self._mysql.query(sql, *values)

    def _get_dataset_id(self, dataset):
        sql = 'SELECT `id` FROM `datasets` WHERE `name` = %s'

        result = self._mysql.query(sql, dataset.name)
        if len(result) == 0:
            # should I raise?
            return 0

        return result[0]

    def _get_block_id(self, block):
        sql = 'SELECT b.`id` FROM `blocks` AS b'
        sql += ' INNER JOIN `datasets` AS d ON d.`id` = b.`dataset_id`'
        sql += ' WHERE d.`name` = %s AND b.`name` = %s'

        result = self._mysql.query(sql, block.dataset.name, block.real_name())
        if len(result) == 0:
            return 0

        return result[0]

    def _get_site_id(self, site):
        sql = 'SELECT `id` FROM `sites` WHERE `name` = %s'

        result = self._mysql.query(sql, site.name)
        if len(result) == 0:
            return 0

        return result[0]

    def _get_group_id(self, group):
        if group.name is None:
            return 0

        sql = 'SELECT `id` FROM `groups` WHERE `name` = %s'

        result = self._mysql.query(sql, group.name)
        if len(result) == 0:
            return 0

        return result[0]

    def _get_partition_id(self, partition):
        sql = 'SELECT `id` FROM `partitions` WHERE `name` = %s'

        result = self._mysql.query(sql, partition.name)
        if len(result) == 0:
            return 0

        return result[0]
예제 #12
0
class GlobalQueueRequestHistory(object):
    """
    Sets one attr:
      request_weight:  float value
    """

    produces = ['request_weight']

    def __init__(self, config):
        self._store = MySQL(config.store.db_params)

        # Weight computation halflife constant (given in days in config)
        self.weight_halflife = config.weight_halflife * 3600. * 24.

    def load(self, inventory):
        records = self._get_stored_records(inventory)
        self._compute(inventory, records)

    def _get_stored_records(self, inventory):
        """
        Get the dataset request data from DB.
        @param inventory  DynamoInventory
        @return  {dataset: {jobid: GlobalQueueJob}}
        """

        # pick up requests that are less than 1 year old
        # old requests will be removed automatically next time the access information is saved from memory
        sql = 'SELECT d.`name`, r.`id`, UNIX_TIMESTAMP(r.`queue_time`), UNIX_TIMESTAMP(r.`completion_time`),'
        sql += ' r.`nodes_total`, r.`nodes_done`, r.`nodes_failed`, r.`nodes_queued` FROM `dataset_requests` AS r'
        sql += ' INNER JOIN `datasets` AS d ON d.`id` = r.`dataset_id`'
        sql += ' WHERE r.`queue_time` > DATE_SUB(NOW(), INTERVAL 1 YEAR) ORDER BY d.`id`, r.`queue_time`'

        all_requests = {}
        num_records = 0

        # little speedup by not repeating lookups for the same dataset
        current_dataset_name = ''
        dataset_exists = True
        for dataset_name, job_id, queue_time, completion_time, nodes_total, nodes_done, nodes_failed, nodes_queued in self._store.xquery(
                sql):
            num_records += 1

            if dataset_name == current_dataset_name:
                if not dataset_exists:
                    continue
            else:
                current_dataset_name = dataset_name

                try:
                    dataset = inventory.datasets[dataset_name]
                except KeyError:
                    dataset_exists = False
                    continue
                else:
                    dataset_exists = True

                requests = all_requests[dataset] = {}

            requests[job_id] = GlobalQueueJob(queue_time, completion_time,
                                              nodes_total, nodes_done,
                                              nodes_failed, nodes_queued)

        last_update = self._store.query(
            'SELECT UNIX_TIMESTAMP(`dataset_requests_last_update`) FROM `system`'
        )[0]

        LOG.info('Loaded %d dataset request data. Last update at %s UTC',
                 num_records,
                 time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(last_update)))

        return all_requests

    def _compute(self, inventory, all_requests):
        """
        Set the dataset request weight based on request list. Formula:
          w = Sum(exp(-t_i/T))
        where t_i is the time distance of the ith request from now. T is defined in the configuration.
        @param inventory     DynamoInventory
        @param all_requests  {dataset: {jobid: GlobalQueueJob}}
        """

        now = time.time()
        decay_constant = self.weight_halflife / math.log(2.)

        for dataset in inventory.datasets.itervalues():
            try:
                requests = all_requests[dataset]
            except KeyError:
                dataset.attr['request_weight'] = 0.
                continue

            weight = 0.
            for job in requests.itervalues():
                # first element of reqdata tuple is the queue time
                weight += math.exp((job.queue_time - now) / decay_constant)

            dataset.attr['request_weight'] = weight

    @staticmethod
    def update(config, inventory):
        htcondor = HTCondor(config.htcondor.config)
        store = MySQL(config.store.db_params)

        last_update = store.query(
            'SELECT UNIX_TIMESTAMP(`dataset_requests_last_update`) FROM `system`'
        )[0]
        try:
            store.query(
                'UPDATE `system` SET `dataset_requests_last_update` = NOW()',
                retries=0,
                silent=True)
        except MySQLdb.OperationalError:
            # We have a read-only config
            read_only = True
        else:
            read_only = False

        source_records = GlobalQueueRequestHistory._get_source_records(
            htcondor, inventory, last_update)

        if not read_only:
            GlobalQueueRequestHistory._save_records(source_records, store)
            # remove old entries
            store.query(
                'DELETE FROM `dataset_requests` WHERE `queue_time` < DATE_SUB(NOW(), INTERVAL 1 YEAR)'
            )
            store.query(
                'UPDATE `system` SET `dataset_requests_last_update` = NOW()')

    @staticmethod
    def _get_source_records(htcondor, inventory, last_update):
        """
        Get the dataset request data from Global Queue schedd.
        @param htcondor     HTCondor interface
        @param inventory    DynamoInventory
        @param last_update  UNIX timestamp
        @return {dataset: {jobid: GlobalQueueJob}}
        """

        constraint = 'TaskType=?="ROOT" && !isUndefined(DESIRED_CMSDataset) && (QDate > {last_update} || CompletionDate > {last_update})'.format(
            last_update=last_update)

        attributes = [
            'DESIRED_CMSDataset', 'GlobalJobId', 'QDate', 'CompletionDate',
            'DAG_NodesTotal', 'DAG_NodesDone', 'DAG_NodesFailed',
            'DAG_NodesQueued'
        ]

        job_ads = htcondor.find_jobs(constraint=constraint,
                                     attributes=attributes)

        job_ads.sort(key=lambda a: a['DESIRED_CMSDataset'])

        all_requests = {}

        for ad in job_ads:
            dataset_name = ad['DESIRED_CMSDataset']

            try:
                dataset = inventory.datasets[dataset_name]
            except KeyError:
                continue

            if dataset not in all_requests:
                all_requests[dataset] = {}

            try:
                nodes_total = ad['DAG_NodesTotal']
                nodes_done = ad['DAG_NodesDone']
                nodes_failed = ad['DAG_NodesFailed']
                nodes_queued = ad['DAG_NodesQueued']
            except KeyError:
                nodes_total = 0
                nodes_done = 0
                nodes_failed = 0
                nodes_queued = 0

            all_requests[dataset][ad['GlobalJobId']] = GlobalQueueJob(
                ad['QDate'], ad['CompletionDate'], nodes_total, nodes_done,
                nodes_failed, nodes_queued)

        return all_requests

    @staticmethod
    def _save_records(records, store):
        """
        Save the newly fetched request records.
        @param records  {dataset: {jobid: GlobalQueueJob}}
        @param store    Write-allowed MySQL interface
        """

        dataset_id_map = {}
        store.make_map('datasets', records.iterkeys(), dataset_id_map, None)

        fields = ('id', 'dataset_id', 'queue_time', 'completion_time',
                  'nodes_total', 'nodes_done', 'nodes_failed', 'nodes_queued')

        data = []
        for dataset, dataset_request_list in records.iteritems():
            dataset_id = dataset_id_map[dataset]

            for job_id, (queue_time, completion_time, nodes_total, nodes_done,
                         nodes_failed,
                         nodes_queued) in dataset_request_list.iteritems():
                data.append(
                    (job_id, dataset_id,
                     time.strftime('%Y-%m-%d %H:%M:%S',
                                   time.localtime(queue_time)),
                     time.strftime('%Y-%m-%d %H:%M:%S',
                                   time.localtime(completion_time))
                     if completion_time > 0 else '0000-00-00 00:00:00',
                     nodes_total, nodes_done, nodes_failed, nodes_queued))

        store.insert_many('dataset_requests',
                          fields,
                          None,
                          data,
                          do_update=True)