def __init__(self, config=None):
        if config is None:
            config = GlobalQueueRequestHistory._default_config

        self._history = HistoryDatabase(config.get('history', None))
        self._htcondor = HTCondor(config.get('htcondor', None))

        # Weight computation halflife constant (given in days in config)
        self.weight_halflife = config.get('weight_halflife', 4) * 3600. * 24.

        self.set_read_only(config.get('read_only', False))
Beispiel #2
0
    def __init__(self, config = None):
        if config is None:
            config = CRABAccessHistory._default_config

        self._history = HistoryDatabase(config.get('history', None))
        self._popdb = PopDB(config.get('popdb', None))

        self.max_back_query = config.get('max_back_query', 7)

        self.included_sites = list(config.get('include_sites', []))
        self.excluded_sites = list(config.get('exclude_sites', []))

        self.set_read_only(config.get('read_only', False))
Beispiel #3
0
    def __init__(self, optype, config=None):
        """
        @param optype  'copy' or 'deletion'.
        """
        if config is None:
            config = RequestManager._config

        self.registry = RegistryDatabase(config.get('registry', None))
        self.history = HistoryDatabase(config.get('history', None))

        # we'll be using temporary tables
        self.registry.db.reuse_connection = True
        self.history.db.reuse_connection = True

        self.optype = optype

        self.set_read_only(config.get('read_only', False))
Beispiel #4
0
    def __init__(self, config):
        WebModule.__init__(self, config)

        self.copy_manager = CopyRequestManager()
        self.copy_manager.set_read_only()
        self.dele_manager = DeletionRequestManager()
        self.dele_manager.set_read_only()

        self.mysql_hist = HistoryDatabase(config.get('history', None))
Beispiel #5
0
    def __init__(self):

        self.list = []         # list of all operations
        self.n_sources = {}    # dictionary n operations per sources
        self.time_bins = []
        self.volume_bins = []
        self.history_db = HistoryDatabase()   # usage: result = self.history_db.db.query('SELECT ...')
        self.sites = Sites()   # we need to have a translation from site_id to site_name
        self.sites.read_db(self.history_db)   # - get the current list of all sites by id
    def __init__(self, config=None):
        config = Configuration(config)

        CopyInterface.__init__(self, config)

        self._phedex = PhEDEx(config.get('phedex', None))

        self._history = HistoryDatabase(config.get('history', None))

        self.subscription_chunk_size = config.get('chunk_size', 50.) * 1.e+12
    def __init__(self, config=None):
        config = Configuration(config)

        DeletionInterface.__init__(self, config)

        self._phedex = PhEDEx(config.get('phedex', None))

        self._history = HistoryDatabase(config.get('history', None))

        self.auto_approval = config.get('auto_approval', True)
        self.allow_tape_deletion = config.get('allow_tape_deletion', True)
        self.tape_auto_approval = config.get('tape_auto_approval', False)

        self.deletion_chunk_size = config.get('chunk_size', 50.) * 1.e+12
Beispiel #8
0
    def __init__(self, config):
        WebModule.__init__(self, config)
        HTMLMixin.__init__(self, 'Detox deletion results',
                           'detox/monitor.html')

        self.history = HistoryDatabase()

        self.stylesheets = ['/css/detox/monitor.css']
        self.scripts = ['/js/utils.js', '/js/detox/monitor.js']

        with open(HTMLMixin.contents_path +
                  '/html/detox/monitor_titleblock.html') as source:
            self.titleblock = source.read()

        self.default_partition = config.detox.default_partition
        self.test_cycle = False
Beispiel #9
0
    def __init__(self, optype, config = None):
        """
        @param optype  'copy' or 'deletion'.
        """
        if config is None:
            config = RequestManager._config

        self.registry = RegistryDatabase(config.get('registry', None))
        self.history = HistoryDatabase(config.get('history', None))

        # we'll be using temporary tables
        self.registry.db.reuse_connection = True
        self.history.db.reuse_connection = True

        self.optype = optype

        self.set_read_only(config.get('read_only', False))
Beispiel #10
0
 def __init__(self, config):
     WebModule.__init__(self, config)
     self.history = HistoryDatabase()
Beispiel #11
0
class RLFSM(object):
    """
    File operations manager using MySQL tables for queue bookkeeping. Also implies the
    inventory backend is MySQL.
    """

    class Subscription(object):
        __slots__ = ['id', 'status', 'file', 'destination', 'disk_sources', 'tape_sources', 'failed_sources', 'hold_reason']

        def __init__(self, id, status, file, destination, disk_sources, tape_sources, failed_sources = None, hold_reason = None):
            self.id = id
            self.status = status
            self.file = file
            self.destination = destination
            self.disk_sources = disk_sources
            self.tape_sources = tape_sources
            self.failed_sources = failed_sources
            self.hold_reason = hold_reason

    class TransferTask(object):
        __slots__ = ['id', 'subscription', 'source']

        def __init__(self, subscription, source):
            self.id = None
            self.subscription = subscription
            self.source = source

    class Desubscription(object):
        __slots__ = ['id', 'status', 'file', 'site']

        def __init__(self, id, status, file, site):
            self.id = id
            self.status = status
            self.file = file
            self.site = site

    class DeletionTask(object):
        __slots__ = ['id', 'desubscription']

        def __init__(self, desubscription):
            self.id = None
            self.desubscription = desubscription

    # default config
    _config = ''

    @staticmethod
    def set_default(config):
        RLFSM._config = Configuration(config)

    def __init__(self, config = None):
        if config is None:
            config = RLFSM._config

        # Handle to the inventory DB
        self.db = MySQL(config.db.db_params)

        # Handle to the history DB
        self.history_db = HistoryDatabase(config.get('history', None))

        # FileTransferOperation backend (can make it a map from (source, dest) to operator)
        self.transfer_operations = []
        if 'transfer' in config:
            for condition_text, module, conf in config.transfer:
                if condition_text is None: # default
                    condition = None
                else:
                    condition = Condition(condition_text, site_variables)

                self.transfer_operations.append((condition, FileTransferOperation.get_instance(module, conf)))

        if 'transfer_query' in config:
            self.transfer_queries = []
            for condition_text, module, conf in config.transfer_query:
                if condition_text is None: # default
                    condition = None
                else:
                    condition = Condition(condition_text, site_variables)

                self.transfer_queries.append(condition, FileTransferQuery.get_instance(module, conf))
        else:
            self.transfer_queries = self.transfer_operations

        if 'deletion' in config:
            self.deletion_operations = []
            for condition_text, module, conf in config.deletion:
                if condition_text is None: # default
                    condition = None
                else:
                    condition = Condition(condition_text, site_variables)

                self.deletion_operations.append(condition, FileDeletionOperation.get_instance(module, conf))
        else:
            self.deletion_operations = self.transfer_operations

        if 'deletion_query' in config:
            self.deletion_queries = []
            for condition_text, module, conf in config.deletion_query:
                if condition_text is None: # default
                    condition = None
                else:
                    condition = Condition(condition_text, site_variables)

                self.deletion_queries.append(condition, FileDeletionQuery.get_instance(module, conf))
        else:
            self.deletion_queries = self.deletion_operations

        self.sites_in_downtime = []

        # Cycle thread
        self.main_cycle = None
        self.cycle_stop = threading.Event()

        self.set_read_only(config.get('read_only', False))

    def set_read_only(self, value = True):
        self._read_only = value
        self.history_db.set_read_only(value)
        for _, op in self.transfer_operations:
            op.set_read_only(value)
        if self.transfer_queries is not self.transfer_operations:
            for _, qry in self.transfer_queries:
                qry.set_read_only(value)
        if self.deletion_operations is not self.transfer_operations:
            for _, op in self.deletion_operations:
                op.set_read_only(value)
        if self.deletion_queries is not self.deletion_operations:
            for _, qry in self.deletion_queries:
                qry.set_read_only(value)

    def start(self, inventory):
        """
        Start the file operations management cycle. Issue transfer and deletion tasks to the backend.
        """

        if self.main_cycle is not None:
            return

        LOG.info('Starting file operations manager')

        self.main_cycle = threading.Thread(target = self._run_cycle, name = 'FOM', args = (inventory,))
        self.main_cycle.start()

        LOG.info('Started file operations manager.')

    def stop(self):
        """
        Stop the file operations management cycle.
        """

        LOG.info('Stopping file operations manager.')

        self.cycle_stop.set()
        self.main_cycle.join()

        self.main_cycle = None
        self.cycle_stop.clear()
        
    def transfer_files(self, inventory):
        """
        Routine for managing file transfers.
        1. Query the file transfer agent and update the status of known subscriptions.
        2. Pick up new subscriptions.
        3. Select the source for each transfer.
        4. Organize the transfers into batches.
        5. Start the transfers.
        The routine can be stopped almost at any point without corrupting the state machine.
        The only cases where stopping is problematic are within self._update_status and self._start_transfers.

        @param inventory   The inventory.
        """

        self._cleanup()

        LOG.debug('Clearing cancelled transfer tasks.')
        task_ids = self._get_cancelled_tasks('transfer')
        for _, op in self.transfer_operations:
            op.cancel_transfers(task_ids)

        if self.cycle_stop.is_set():
            return

        LOG.debug('Fetching subscription status from the file operation agent.')
        self._update_status('transfer')

        if self.cycle_stop.is_set():
            return

        LOG.debug('Filtering out transfers to unavailable destinations.')
        if not self._read_only:
            for site in self.sites_in_downtime:
                self.db.query('UPDATE `file_subscriptions` SET `status` = \'held\', `hold_reason` = \'site_unavailable\' WHERE `site_id` = (SELECT `id` FROM `sites` WHERE `name` = %s)', site.name)

        if self.cycle_stop.is_set():
            return

        LOG.debug('Collecting new transfer subscriptions.')
        subscriptions = self.get_subscriptions(inventory, op = 'transfer', status = ['new', 'retry'])

        if self.cycle_stop.is_set():
            return

        # We check the operators here because get_subscriptions does some state update and we want that to happen
        pending_count = {}
        n_available = 0
        for _, op in self.transfer_operations:
            pending_count[op] = op.num_pending_transfers()
            if pending_count[op] <= op.max_pending_transfers:
                n_available += 1

        if n_available == 0:
            LOG.info('No transfer operators are available at the moment.')
            return

        LOG.debug('Identifying source sites for %d transfers.', len(subscriptions))
        tasks = self._select_source(subscriptions)

        if self.cycle_stop.is_set():
            return

        LOG.debug('Organizing %d transfers into batches.', len(tasks))

        by_dest = {}
        for task in tasks:
            try:
                by_dest[task.subscription.destination].append(task)
            except KeyError:
                by_dest[task.subscription.destination] = [task]

        def issue_tasks(op, my_tasks):
            if len(my_tasks) == 0:
                return 0, 0, 0

            batches = op.form_batches(my_tasks)
    
            if self.cycle_stop.is_set():
                return 0, 0, 0

            nb = 0
            ns = 0
            nf = 0
   
            LOG.debug('Issuing transfer tasks.')
            for batch_tasks in batches:
                s, f = self._start_transfers(op, batch_tasks)
                nb += 1
                ns += s
                nf += f

                pending_count[op] += s
                if pending_count[op] > op.max_pending_transfers:
                    break

                if self.cycle_stop.is_set():
                    break

            return nb, ns, nf

        num_success = 0
        num_failure = 0
        num_batches = 0
        
        for condition, op in self.transfer_operations:
            if condition is None:
                default_op = op
                continue
            
            my_tasks = []
            for site in by_dest.keys():
                if condition.match(site):
                    my_tasks.extend(by_dest.pop(site))

            if pending_count[op] > op.max_pending_transfers:
                continue

            nb, ns, nf = issue_tasks(op, my_tasks)
            num_batches += nb
            num_success += ns
            num_failure += nf

            if self.cycle_stop.is_set():
                break

        else:
            # default condition
            if pending_count[default_op] <= default_op.max_pending_transfers:
                my_tasks = sum(by_dest.itervalues(), [])
                nb, ns, nf = issue_tasks(default_op, my_tasks)
                num_batches += nb
                num_success += ns
                num_failure += nf

        if num_success + num_failure != 0:
            LOG.info('Issued transfer tasks: %d success, %d failure. %d batches.', num_success, num_failure, num_batches)
        else:
            LOG.debug('Issued transfer tasks: %d success, %d failure. %d batches.', num_success, num_failure, num_batches)

    def delete_files(self, inventory):
        """
        Routine for managing file deletions.
        1. Query the file deletion agent and update the status of known subscriptions.
        2. Register the paths for completed deletions as candidates of empty directories.
        3. Pick up new subscriptions.
        4. Organize the deletions into batches.
        5. Start the deletions.
        The routine can be stopped almost at any point without corrupting the state machine.
        The only cases where stopping is problematic are within self._update_status and self._start_deletions.
        @param inventory   The inventory.
        """

        self._cleanup()

        LOG.debug('Clearing cancelled deletion tasks.')
        task_ids = self._get_cancelled_tasks('deletion')
        for _, op in self.deletion_operations:
            op.cancel_deletions(task_ids)

        if self.cycle_stop.is_set():
            return

        LOG.debug('Fetching deletion status from the file operation agent.')
        completed = self._update_status('deletion')

        LOG.debug('Recording candidates for empty directories.')
        self._set_dirclean_candidates(completed, inventory)

        if self.cycle_stop.is_set():
            return

        LOG.debug('Filtering out transfers to unavailable destinations.')
        if not self._read_only:
            for site in self.sites_in_downtime:
                self.db.query('UPDATE `file_subscriptions` SET `status` = \'held\', `hold_reason` = \'site_unavailable\' WHERE `site_id` = (SELECT `id` FROM `sites` WHERE `name` = %s)', site.name)

        if self.cycle_stop.is_set():
            return

        LOG.debug('Collecting new deletion subscriptions.')
        desubscriptions = self.get_subscriptions(inventory, op = 'deletion', status = ['new', 'retry'])

        if self.cycle_stop.is_set():
            return

        # See transfer_files
        pending_count = {}
        n_available = 0
        for _, op in self.deletion_operations:
            pending_count[op] = op.num_pending_deletions()
            if pending_count[op] <= op.max_pending_deletions:
                n_available += 1

        if n_available == 0:
            LOG.info('No deletion operators are available at the moment.')
            return

        tasks = [RLFSM.DeletionTask(d) for d in desubscriptions]

        by_site = {}
        for task in tasks:
            try:
                by_site[task.desubscription.site].append(task)
            except KeyError:
                by_site[task.desubscription.site] = [task]

        LOG.debug('Organizing the deletions into batches.')

        def issue_tasks(op, my_tasks):
            if len(my_tasks) == 0:
                return 0, 0, 0

            batches = op.form_batches(my_tasks)
    
            if self.cycle_stop.is_set():
                return 0, 0, 0

            nb = 0
            ns = 0
            nf = 0
    
            LOG.debug('Issuing deletion tasks for %d batches.', len(batches))    
            for batch_tasks in batches:
                LOG.debug('Batch with %d tasks.', len(batch_tasks))
                s, f = self._start_deletions(op, batch_tasks)
                nb += 1
                ns += s
                nf += f

                pending_count[op] += s
                if pending_count[op] > op.max_pending_deletions:
                    break

                if self.cycle_stop.is_set():
                    break

            return nb, ns, nf

        num_success = 0
        num_failure = 0
        num_batches = 0

        for condition, op in self.deletion_operations:
            if condition is None:
                default_op = op
                continue

            my_tasks = []
            for site in by_site.keys():
                if condition.match(site):
                    my_tasks.extend(by_site.pop(site))

            if pending_count[op] > op.max_pending_deletions:
                continue

            nb, ns, nf = issue_tasks(op, my_tasks)
            num_batches += nb;
            num_success += ns;
            num_failure += nf;

            if self.cycle_stop.is_set():
                break

        else:
            # default condition
            if pending_count[default_op] <= default_op.max_pending_deletions:
                my_tasks = sum(by_site.itervalues(), [])
                nb, ns, nf = issue_tasks(default_op, my_tasks)
                num_batches += nb;
                num_success += ns;
                num_failure += nf;

        if num_success + num_failure != 0:
            LOG.info('Issued deletion tasks: %d success, %d failure. %d batches.', num_success, num_failure, num_batches)
        else:
            LOG.debug('Issued deletion tasks: %d success, %d failure. %d batches.', num_success, num_failure, num_batches)

    def subscribe_file(self, site, lfile):
        """
        Make a file subscription at a site.
        @param site  Site object
        @param lfile File object
        """
        LOG.debug('Subscribing %s to %s', lfile.lfn, site.name)

        self._subscribe(site, lfile, 0)

    def desubscribe_file(self, site, lfile):
        """
        Book deletion of a file at a site.
        @param site  Site object
        @param lfile File object
        """
        LOG.debug('Desubscribing %s from %s', lfile.lfn, site.name)

        self._subscribe(site, lfile, 1)

    def cancel_subscription(self, site = None, lfile = None, sub_id = None):
        sql = 'UPDATE `file_subscriptions` SET `status` = \'cancelled\' WHERE '

        if sub_id is None:
            if site is None or lfile is None:
                raise OperationalError('site and lfile must be non-None.')

            sql += '`file_id` = %s AND `site_id` = %s'
            if not self._read_only:
                self.db.query(sql, lfile.id, site.id)
        else:
            sql += '`id` = %s'
            if not self._read_only:
                self.db.query(sql, sub_id)

    def cancel_desubscription(self, site = None, lfile = None, sub_id = None):
        self.cancel_subscription(site = site, lfile = lfile, sub_id = sub_id)

    def convert_pre_subscriptions(self, inventory):
        sql = 'SELECT `id`, `file_name`, `site_name`, UNIX_TIMESTAMP(`created`), `delete` FROM `file_pre_subscriptions`'

        sids = []

        for sid, lfn, site_name, created, delete in self.db.query(sql):
            lfile = inventory.find_file(lfn)
            if lfile is None or lfile.id == 0:
                continue

            try:
                site = inventory.sites[site_name]
            except KeyError:
                continue

            if site.id == 0:
                continue

            sids.append(sid)

            self._subscribe(site, lfile, delete, created = created)

        if not self._read_only:
            self.db.lock_tables(write = ['file_pre_subscriptions'])
            self.db.delete_many('file_pre_subscriptions', 'id', sids)
            if self.db.query('SELECT COUNT(*) FROM `file_pre_subscriptions`')[0] == 0:
                self.db.query('ALTER TABLE `file_pre_subscriptions` AUTO_INCREMENT = 1')
            self.db.unlock_tables()

    def get_subscriptions(self, inventory, op = None, status = None):
        """
        Return a list containing Subscription and Desubscription objects ordered by the id.
        @param inventory   Dynamo inventory
        @param op          If set to 'transfer' or 'deletion', limit to the operation type.
        @param status      If not None, set to list of status strings to limit the query.
        """

        # First convert all pre-subscriptions
        self.convert_pre_subscriptions(inventory)

        subscriptions = []

        get_all = 'SELECT u.`id`, u.`status`, u.`delete`, f.`block_id`, f.`name`, s.`name`, u.`hold_reason` FROM `file_subscriptions` AS u'
        get_all += ' INNER JOIN `files` AS f ON f.`id` = u.`file_id`'
        get_all += ' INNER JOIN `sites` AS s ON s.`id` = u.`site_id`'

        constraints = []
        if op == 'transfer':
            constraints.append('u.`delete` = 0')
        elif op == 'deletion':
            constraints.append('u.`delete` = 1')
        if status is not None:
            constraints.append('u.`status` IN ' + MySQL.stringify_sequence(status))

        if len(constraints) != 0:
            get_all += ' WHERE ' + ' AND '.join(constraints)

        get_all += ' ORDER BY s.`id`, f.`block_id`'

        get_tried_sites = 'SELECT s.`name`, f.`exitcode` FROM `failed_transfers` AS f'
        get_tried_sites += ' INNER JOIN `sites` AS s ON s.`id` = f.`source_id`'
        get_tried_sites += ' WHERE f.`subscription_id` = %s'

        _destination_name = ''
        _block_id = -1

        no_source = []
        all_failed = []
        to_done = []

        COPY = 0
        DELETE = 1

        for row in self.db.query(get_all):
            sub_id, st, optype, block_id, file_name, site_name, hold_reason = row

            if site_name != _destination_name:
                _destination_name = site_name
                try:
                    destination = inventory.sites[site_name]
                except KeyError:
                    # Site disappeared from the inventory - weird but can happen!
                    destination = None

                _block_id = -1

            if destination is None:
                continue

            if block_id != _block_id:
                lfile = inventory.find_file(file_name)
                if lfile is None:
                    # Dataset, block, or file was deleted from the inventory earlier in this process (deletion not reflected in the inventory store yet)
                    continue

                _block_id = block_id
                block = lfile.block
                dest_replica = block.find_replica(destination)

            else:
                lfile = block.find_file(file_name)
                if lfile is None:
                    # Dataset, block, or file was deleted from the inventory earlier in this process (deletion not reflected in the inventory store yet)
                    continue

            if dest_replica is None and st != 'cancelled':
                LOG.debug('Destination replica for %s does not exist. Canceling the subscription.', file_name)
                # Replica was invalidated
                sql = 'UPDATE `file_subscriptions` SET `status` = \'cancelled\''
                sql += ' WHERE `id` = %s'
                if not self._read_only:
                    self.db.query(sql, sub_id)

                if status is not None and 'cancelled' not in status:
                    # We are not asked to return cancelled subscriptions
                    continue

                st = 'cancelled'

            if optype == COPY:
                disk_sources = None
                tape_sources = None
                failed_sources = None

                if st not in ('done', 'held', 'cancelled'):
                    if dest_replica.has_file(lfile):
                        LOG.debug('%s already exists at %s', file_name, site_name)
                        to_done.append(sub_id)

                        st = 'done'

                    else:
                        disk_sources = []
                        tape_sources = []
                        for replica in block.replicas:
                            if replica.site == destination or replica.site.status != Site.STAT_READY:
                                continue
            
                            if replica.has_file(lfile):
                                if replica.site.storage_type == Site.TYPE_DISK:
                                    disk_sources.append(replica.site)
                                elif replica.site.storage_type == Site.TYPE_MSS:
                                    tape_sources.append(replica.site)
            
                        if len(disk_sources) + len(tape_sources) == 0:
                            LOG.warning('Transfer of %s to %s has no source.', file_name, site_name)
                            no_source.append(sub_id)

                            st = 'held'

                if st == 'retry':
                    failed_sources = {}
                    for source_name, exitcode in self.db.query(get_tried_sites, sub_id):
                        try:
                            source = inventory.sites[source_name]
                        except KeyError:
                            # this site may have been deleted in this process
                            continue

                        try:
                            failed_sources[source].append(exitcode)
                        except KeyError:
                            if source not in disk_sources and source not in tape_sources:
                                # this is not a source site any more
                                continue

                            failed_sources[source] = [exitcode]
    
                    if len(failed_sources) == len(disk_sources) + len(tape_sources):
                        # transfers from all sites failed at least once
                        for codes in failed_sources.itervalues():
                            if codes[-1] not in irrecoverable_errors:
                                # This site failed for a recoverable reason
                                break
                        else:
                            # last failure from all sites due to irrecoverable errors
                            LOG.warning('Transfer of %s to %s failed from all sites.', file_name, site_name)
                            all_failed.append(sub_id)

                            st = 'held'

                # st value may have changed - filter again
                if status is None or st in status:
                    subscription = RLFSM.Subscription(sub_id, st, lfile, destination, disk_sources, tape_sources, failed_sources, hold_reason)
                    subscriptions.append(subscription)

            elif optype == DELETE:
                if st not in ('done', 'held', 'cancelled') and not dest_replica.has_file(lfile):
                    LOG.debug('%s is already gone from %s', file_name, site_name)
                    to_done.append(sub_id)

                    st = 'done'

                if status is None or st in status:
                    desubscription = RLFSM.Desubscription(sub_id, st, lfile, destination)
                    subscriptions.append(desubscription)

        if len(to_done) + len(no_source) + len(all_failed) != 0:
            msg = 'Subscriptions terminated directly: %d done' % len(to_done)
            if len(no_source) != 0:
                msg += ', %d held with reason "no_source"' % len(no_source)
            if len(all_failed) != 0:
                msg += ', %d held with reason "all_failed"' % len(all_failed)

            LOG.info(msg)

        if not self._read_only:
            self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'done\', `last_update` = NOW()', 'id', to_done)
            self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'held\', `hold_reason` = \'no_source\', `last_update` = NOW()', 'id', no_source)
            self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'held\', `hold_reason` = \'all_failed\', `last_update` = NOW()', 'id', all_failed)

            # Clean up subscriptions for deleted files / sites
            sql = 'DELETE FROM u USING `file_subscriptions` AS u'
            sql += ' LEFT JOIN `files` AS f ON f.`id` = u.`file_id`'
            sql += ' LEFT JOIN `sites` AS s ON s.`id` = u.`site_id`'
            sql += ' WHERE f.`name` IS NULL OR s.`name` IS NULL'
            self.db.query(sql)

            sql = 'DELETE FROM f USING `failed_transfers` AS f'
            sql += ' LEFT JOIN `file_subscriptions` AS u ON u.`id` = f.`subscription_id`'
            sql += ' WHERE u.`id` IS NULL'
            self.db.query(sql)

        return subscriptions

    def close_subscriptions(self, done_ids):
        """
        Get subscription completion acknowledgments.
        """

        if not self._read_only:
            self.db.delete_many('file_subscriptions', 'id', done_ids)

    def release_subscription(self, subscription):
        """
        Clear failed transfers list and set the subscription status to retry.
        """

        if subscription.status != 'held':
            return

        if self._read_only:
            return

        self.db.query('DELETE FROM `failed_transfers` WHERE `subscription_id` = %s', subscription.id)
        self.db.query('UPDATE `file_subscriptions` SET `status` = \'retry\' WHERE `id` = %s', subscription.id)

    def _run_cycle(self, inventory):
        while True:
            if self.cycle_stop.is_set():
                break
    
            LOG.debug('Checking and executing new file transfer subscriptions.')
            self.transfer_files(inventory)
    
            if self.cycle_stop.is_set():
                break
    
            LOG.debug('Checking and executing new file deletion subscriptions.')
            self.delete_files(inventory)

            is_set = self.cycle_stop.wait(30)
            if is_set: # is true if in Python 2.7 and the flag is set
                break

    def _cleanup(self):
        if self._read_only:
            return

        # Make the tables consistent in case the previous cycles was terminated prematurely

        # There should not be tasks with subscription status new
        sql = 'DELETE FROM t USING `transfer_tasks` AS t'
        sql += ' INNER JOIN `file_subscriptions` AS u ON u.`id` = t.`subscription_id`'
        sql += ' WHERE u.`status` IN (\'new\', \'retry\')'
        self.db.query(sql)
        sql = 'DELETE FROM t USING `deletion_tasks` AS t'
        sql += ' INNER JOIN `file_subscriptions` AS u ON u.`id` = t.`subscription_id`'
        sql += ' WHERE u.`status` IN (\'new\', \'retry\')'
        self.db.query(sql)

        # There should not be batches with no tasks
        sql = 'DELETE FROM b USING `transfer_batches` AS b LEFT JOIN `transfer_tasks` AS t ON t.`batch_id` = b.`id` WHERE t.`batch_id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM b USING `deletion_batches` AS b LEFT JOIN `deletion_tasks` AS t ON t.`batch_id` = b.`id` WHERE t.`batch_id` IS NULL'
        self.db.query(sql)

        # and tasks with no batches
        sql = 'DELETE FROM t USING `transfer_tasks` AS t LEFT JOIN `transfer_batches` AS b ON b.`id` = t.`batch_id` WHERE b.`id` IS NULL'
        self.db.query(sql)
        sql = 'DELETE FROM t USING `deletion_tasks` AS t LEFT JOIN `deletion_batches` AS b ON b.`id` = t.`batch_id` WHERE b.`id` IS NULL'
        self.db.query(sql)

        # Cleanup the plugins (might delete tasks)
        for _, op in self.transfer_operations:
            op.cleanup()
        if self.deletion_operations is not self.transfer_operations:
            for _, op in self.deletion_operations:
                op.cleanup()

        # Reset inbatch subscriptions with no task to new state
        sql = 'UPDATE `file_subscriptions` SET `status` = \'new\' WHERE `status` = \'inbatch\' AND `id` NOT IN (SELECT `subscription_id` FROM `transfer_tasks`) AND `id` NOT IN (SELECT `subscription_id` FROM `deletion_tasks`)'
        self.db.query(sql)

        # Delete canceled subscriptions with no task (ones with task need to be archived in update_status)
        sql = 'DELETE FROM u USING `file_subscriptions` AS u LEFT JOIN `transfer_tasks` AS t ON t.`subscription_id` = u.`id` WHERE u.`delete` = 0 AND u.`status` = \'cancelled\' AND t.`id` IS NULL'
        self.db.query(sql)

        sql = 'DELETE FROM u USING `file_subscriptions` AS u LEFT JOIN `deletion_tasks` AS t ON t.`subscription_id` = u.`id` WHERE u.`delete` = 1 AND u.`status` = \'cancelled\' AND t.`id` IS NULL'
        self.db.query(sql)

        # Delete failed transfers with no subscription
        sql = 'DELETE FROM f USING `failed_transfers` AS f LEFT JOIN `file_subscriptions` AS u ON u.`id` = f.`subscription_id` WHERE u.`id` IS NULL'
        self.db.query(sql)

    def _subscribe(self, site, lfile, delete, created = None):
        opp_op = 0 if delete == 1 else 1
        now = time.strftime('%Y-%m-%d %H:%M:%S')

        if created is None:
            created = now
        else:
            created = datetime.datetime(*time.localtime(created)[:6])

        if lfile.id == 0 or site.id == 0:
            # file is not registered in inventory store yet; update the presubscription
            if not self._read_only:
                fields = ('file_name', 'site_name', 'created', 'delete')
                self.db.insert_update('file_pre_subscriptions', fields, lfile.lfn, site.name, now, delete, update_columns = ('delete',))
            return

        if not self._read_only:
            self.db.lock_tables(write = ['file_subscriptions'])

        try:
            sql = 'UPDATE `file_subscriptions` SET `status` = \'cancelled\''
            sql += ' WHERE `file_id` = %s AND `site_id` = %s AND `delete` = %s'
            sql += ' AND `status` IN (\'new\', \'inbatch\', \'retry\', \'held\')'
            if not self._read_only:
                self.db.query(sql, lfile.id, site.id, opp_op)
    
            fields = ('file_id', 'site_id', 'status', 'delete', 'created', 'last_update')

            if not self._read_only:
                self.db.insert_update('file_subscriptions', fields, lfile.id, site.id, 'new', delete, now, now, update_columns = ('status', 'last_update'))

        finally:
            if not self._read_only:
                self.db.unlock_tables()

    def _get_cancelled_tasks(self, optype):
        if optype == 'transfer':
            delete = 0
        else:
            delete = 1

        sql = 'SELECT q.`id` FROM `{op}_tasks` AS q'.format(op = optype)
        sql += ' INNER JOIN `file_subscriptions` AS u ON u.`id` = q.`subscription_id`'
        sql += ' WHERE u.`status` = \'cancelled\' AND u.`delete` = %d' % delete
        return self.db.query(sql)

    def _update_status(self, optype):
        if optype == 'transfer':
            site_columns = 'ss.`name`, sd.`name`'
            site_joins = ' INNER JOIN `sites` AS ss ON ss.`id` = q.`source_id`'
            site_joins += ' INNER JOIN `sites` AS sd ON sd.`id` = u.`site_id`'
        else:
            site_columns = 's.`name`'
            site_joins = ' INNER JOIN `sites` AS s ON s.`id` = u.`site_id`'

        get_task_data = 'SELECT u.`id`, f.`name`, f.`size`, UNIX_TIMESTAMP(q.`created`), ' + site_columns + ' FROM `{op}_tasks` AS q'
        get_task_data += ' INNER JOIN `file_subscriptions` AS u ON u.`id` = q.`subscription_id`'
        get_task_data += ' INNER JOIN `files` AS f ON f.`id` = u.`file_id`'
        get_task_data += site_joins
        get_task_data += ' WHERE q.`id` = %s'

        get_task_data = get_task_data.format(op = optype)

        if optype == 'transfer':
            history_table_name = 'file_transfers'
            history_site_fields = ('source_id', 'destination_id')
        else:
            history_table_name = 'file_deletions'
            history_site_fields = ('site_id',)

        history_fields = ('file_id', 'exitcode', 'message', 'batch_id', 'created', 'started', 'finished', 'completed') + history_site_fields

        if optype == 'transfer':
            insert_failure = 'INSERT INTO `failed_transfers` (`id`, `subscription_id`, `source_id`, `exitcode`)'
            insert_failure += ' SELECT `id`, `subscription_id`, `source_id`, %s FROM `transfer_tasks` WHERE `id` = %s'
            insert_failure += ' ON DUPLICATE KEY UPDATE `id`=VALUES(`id`)'
            delete_failures = 'DELETE FROM `failed_transfers` WHERE `subscription_id` = %s'

        get_subscription_status = 'SELECT `status` FROM `file_subscriptions` WHERE `id` = %s'

        update_subscription = 'UPDATE `file_subscriptions` SET `status` = %s, `last_update` = NOW() WHERE `id` = %s'
        delete_subscription = 'DELETE FROM `file_subscriptions` WHERE `id` = %s'

        delete_task = 'DELETE FROM `{op}_tasks` WHERE `id` = %s'.format(op = optype)

        delete_batch = 'DELETE FROM `{op}_batches` WHERE `id` = %s'.format(op = optype)

        done_subscriptions = []
        num_success = 0
        num_failure = 0
        num_cancelled = 0

        # Collect completed tasks

        for batch_id in self.db.query('SELECT `id` FROM `{op}_batches`'.format(op = optype)):
            results = []

            if optype == 'transfer':
                for _, query in self.transfer_queries:
                    results = query.get_transfer_status(batch_id)
                    if len(results) != 0:
                        break

            else:
                for _, query in self.deletion_queries:
                    results = query.get_deletion_status(batch_id)
                    if len(results) != 0:
                        break

            batch_complete = True

            for task_id, status, exitcode, message, start_time, finish_time in results:
                # start_time and finish_time can be None
                LOG.debug('%s result: %d %s %d %s %s', optype, task_id, FileQuery.status_name(status), exitcode, start_time, finish_time)

                if status == FileQuery.STAT_DONE:
                    num_success += 1
                elif status == FileQuery.STAT_FAILED:
                    num_failure += 1
                elif status == FileQuery.STAT_CANCELLED:
                    num_cancelled += 1
                else:
                    batch_complete = False
                    continue

                try:
                    task_data = self.db.query(get_task_data, task_id)[0]
                except IndexError:
                    LOG.warning('%s task %d got lost.', optype, task_id)
                    if optype == 'transfer':
                        query.forget_transfer_status(task_id)
                    else:
                        query.forget_deletion_status(task_id)

                    if not self._read_only:
                        self.db.query(delete_task, task_id)

                    continue

                subscription_id, lfn, size, create_time = task_data[:4]

                if optype == 'transfer':
                    source_name, dest_name = task_data[4:]
                    history_site_ids = (
                        self.history_db.save_sites([source_name], get_ids = True)[0],
                        self.history_db.save_sites([dest_name], get_ids = True)[0]
                    )
                else:
                    site_name = task_data[4]
                    history_site_ids = (self.history_db.save_sites([site_name], get_ids = True)[0],)

                file_id = self.history_db.save_files([(lfn, size)], get_ids = True)[0]

                if start_time is None:
                    sql_start_time = None
                else:
                    sql_start_time = datetime.datetime(*time.localtime(start_time)[:6])

                if finish_time is None:
                    sql_finish_time = None
                else:
                    sql_finish_time = datetime.datetime(*time.localtime(finish_time)[:6])

                values = (file_id, exitcode, message, batch_id, datetime.datetime(*time.localtime(create_time)[:6]),
                    sql_start_time, sql_finish_time, MySQL.bare('NOW()')) + history_site_ids

                if optype == 'transfer':
                    LOG.debug('Archiving transfer of %s from %s to %s (exitcode %d)', lfn, source_name, dest_name, exitcode)
                else:
                    LOG.debug('Archiving deletion of %s at %s (exitcode %d)', lfn, site_name, exitcode)

                if self._read_only:
                    history_id = 0
                else:
                    history_id = self.history_db.db.insert_get_id(history_table_name, history_fields, values)

                if optype == 'transfer':
                    query.write_transfer_history(self.history_db, task_id, history_id)
                else:
                    query.write_deletion_history(self.history_db, task_id, history_id)

                # We check the subscription status and update accordingly. Need to lock the tables.
                if not self._read_only:
                    self.db.lock_tables(write = ['file_subscriptions'])

                try:
                    subscription_status = self.db.query(get_subscription_status, subscription_id)[0]

                    if subscription_status == 'inbatch':
                        if status == FileQuery.STAT_DONE:
                            LOG.debug('Subscription %d done.', subscription_id)
                            if not self._read_only:
                                self.db.query(update_subscription, 'done', subscription_id)
        
                        elif status == FileQuery.STAT_FAILED:
                            LOG.debug('Subscription %d failed (exit code %d). Flagging retry.', subscription_id, exitcode)
                            if not self._read_only:
                                self.db.query(update_subscription, 'retry', subscription_id)
        
                    elif subscription_status == 'cancelled':
                        # subscription is cancelled and task terminated -> delete the subscription now, irrespective of the task status
                        LOG.debug('Subscription %d is cancelled.', subscription_id)
                        if not self._read_only:
                            self.db.query(delete_subscription, subscription_id)
                finally:
                    if not self._read_only:
                        self.db.unlock_tables()

                if not self._read_only:
                    if optype == 'transfer':
                        if subscription_status == 'cancelled' or (subscription_status == 'inbatch' and status == FileQuery.STAT_DONE):
                            # Delete entries from failed_transfers table
                            self.db.query(delete_failures, subscription_id)
    
                        elif subscription_status == 'inbatch' and status == FileQuery.STAT_FAILED:
                            # Insert entry to failed_transfers table
                            self.db.query(insert_failure, exitcode, task_id)
        
                    self.db.query(delete_task, task_id)

                if status == FileQuery.STAT_DONE:
                    done_subscriptions.append(subscription_id)

                if optype == 'transfer':
                    query.forget_transfer_status(task_id)
                else:
                    query.forget_deletion_status(task_id)

                if self.cycle_stop.is_set():
                    break

            if batch_complete:
                if not self._read_only:
                    self.db.query(delete_batch, batch_id)

                if optype == 'transfer':
                    query.forget_transfer_batch(batch_id)
                else:
                    query.forget_deletion_batch(batch_id)

        if num_success + num_failure + num_cancelled != 0:
            LOG.info('Archived file %s: %d succeeded, %d failed, %d cancelled.', optype, num_success, num_failure, num_cancelled)
        else:
            LOG.debug('Archived file %s: %d succeeded, %d failed, %d cancelled.', optype, num_success, num_failure, num_cancelled)

        return done_subscriptions

    def _select_source(self, subscriptions):
        """
        Intelligently select the best source for each subscription.
        @param subscriptions  List of Subscription objects

        @return  List of TransferTask objects
        """

        def find_site_to_try(sources, failed_sources):
            not_tried = set(sources)
            if failed_sources is not None:
                not_tried -= set(failed_sources.iterkeys())

            LOG.debug('%d sites not tried', len(not_tried))

            if len(not_tried) == 0:
                if failed_sources is None:
                    return None

                # we've tried all sites. Did any of them fail with a recoverable error?
                sites_to_retry = []
                for site, codes in failed_sources.iteritems():
                    if site not in sources:
                        continue

                    if codes[-1] not in irrecoverable_errors:
                        sites_to_retry.append(site)

                if len(sites_to_retry) == 0:
                    return None
                else:
                    # select the least failed site
                    by_failure = sorted(sites_to_retry, key = lambda s: len(failed_sources[s]))
                    LOG.debug('%s has the least failures', by_failure[0].name)
                    return by_failure[0]

            else:
                LOG.debug('Selecting randomly')
                return random.choice(list(not_tried))

        tasks = []

        for subscription in subscriptions:
            LOG.debug('Selecting a disk source for subscription %d (%s to %s)', subscription.id, subscription.file.lfn, subscription.destination.name)
            source = find_site_to_try(subscription.disk_sources, subscription.failed_sources)
            if source is None:
                LOG.debug('Selecting a tape source for subscription %d', subscription.id)
                source = find_site_to_try(subscription.tape_sources, subscription.failed_sources)

            if source is None:
                # If both disk and tape failed irrecoveably, the subscription must be placed in held queue in get_subscriptions.
                # Reaching this line means something is wrong.
                LOG.warning('Could not find a source for transfer of %s to %s from %d disk and %d tape candidates.',
                    subscription.file.lfn, subscription.destination.name, len(subscription.disk_sources), len(subscription.tape_sources))
                continue
            
            tasks.append(RLFSM.TransferTask(subscription, source))

        return tasks

    def _start_transfers(self, transfer_operation, tasks):
        # start the transfer of tasks. If batch submission fails, make progressively smaller batches until failing tasks are identified.
        if self._read_only:
            batch_id = 0
        else:
            self.db.query('INSERT INTO `transfer_batches` (`id`) VALUES (0)')
            batch_id = self.db.last_insert_id

        LOG.debug('New transfer batch %d for %d files.', batch_id, len(tasks))

        # local time
        now = time.strftime('%Y-%m-%d %H:%M:%S')

        # need to create the transfer tasks first to have ids assigned
        fields = ('subscription_id', 'source_id', 'batch_id', 'created')
        mapping = lambda t: (t.subscription.id, t.source.id, batch_id, now)

        if not self._read_only:
            self.db.insert_many('transfer_tasks', fields, mapping, tasks)
        
        # set the task ids
        tasks_by_sub = dict((t.subscription.id, t) for t in tasks)
        for task_id, subscription_id in self.db.xquery('SELECT `id`, `subscription_id` FROM `transfer_tasks` WHERE `batch_id` = %s', batch_id):
            tasks_by_sub[subscription_id].id = task_id

        result = transfer_operation.start_transfers(batch_id, tasks)

        successful = [task for task, success in result.iteritems() if success]

        if not self._read_only:
            self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'inbatch\', `last_update` = NOW()', 'id', [t.subscription.id for t in successful])

            if len(successful) != len(result):
                failed = [task for task, success in result.iteritems() if not success]
                for task in failed:
                    LOG.error('Cannot issue transfer of %s from %s to %s',
                              task.subscription.file.lfn, task.source.name, task.subscription.destination.name)

                failed_ids = [t.id for t in failed]

                sql = 'INSERT INTO `failed_transfers` (`id`, `subscription_id`, `source_id`, `exitcode`)'
                sql += ' SELECT `id`, `subscription_id`, `source_id`, -1 FROM `transfer_tasks`'
                self.db.execute_many(sql, 'id', failed_ids)

                self.db.delete_many('transfer_tasks', 'id', failed_ids)

                self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'retry\', `last_update` = NOW()', 'id', [t.subscription.id for t in failed])

        return len(successful), len(result) - len(successful)

    def _start_deletions(self, deletion_operation, tasks):
        if self._read_only:
            batch_id = 0
        else:
            self.db.query('INSERT INTO `deletion_batches` (`id`) VALUES (0)')
            batch_id = self.db.last_insert_id

        # local time
        now = time.strftime('%Y-%m-%d %H:%M:%S')
        
        fields = ('subscription_id', 'batch_id', 'created')
        mapping = lambda t: (t.desubscription.id, batch_id, now)

        if not self._read_only:
            self.db.insert_many('deletion_tasks', fields, mapping, tasks)

        # set the task ids
        tasks_by_sub = dict((t.desubscription.id, t) for t in tasks)
        for task_id, desubscription_id in self.db.xquery('SELECT `id`, `subscription_id` FROM `deletion_tasks` WHERE `batch_id` = %s', batch_id):
            tasks_by_sub[desubscription_id].id = task_id
        
        result = deletion_operation.start_deletions(batch_id, tasks)

        successful = [task for task, success in result.iteritems() if success]

        if not self._read_only:
            self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'inbatch\', `last_update` = NOW()', 'id', [t.desubscription.id for t in successful])

            if len(successful) != len(result):
                failed = [task for task, success in result.iteritems() if not success]

                for task in failed:
                    LOG.error('Cannot delete %s at %s',
                              task.desubscription.file.lfn, task.desubscription.site.name)

                self.db.delete_many('deletion_tasks', 'id', [t.id for t in failed])

                self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'held\', `last_update` = NOW()', 'id', [t.desubscription.id for t in failed])

        return len(successful), len(result) - len(successful)
    
    def _set_dirclean_candidates(self, subscription_ids, inventory):
        site_dirs = {}

        # Clean up directories of completed subscriptions
        sql = 'SELECT s.`name`, f.`name` FROM `file_subscriptions` AS u'
        sql += ' INNER JOIN `files` AS f ON f.`id` = u.`file_id`'
        sql += ' INNER JOIN `sites` AS s ON s.`id` = u.`site_id`'

        for site_name, file_name in self.db.execute_many(sql, 'u.`id`', subscription_ids):
            try:
                site = inventory.sites[site_name]
            except KeyError:
                continue

            try:
                dirs = site_dirs[site]
            except KeyError:
                dirs = site_dirs[site] = set()

            dirs.add(os.path.dirname(file_name))

        def get_entry():
            for site, dirs in site_dirs.iteritems():
                for directory in dirs:
                    yield site.id, directory

        fields = ('site_id', 'directory')
        if not self._read_only:
            self.db.insert_many('directory_cleaning_tasks', fields, None, get_entry(), do_update = True)
Beispiel #12
0
    def __init__(self, config = None):
        if config is None:
            config = RLFSM._config

        # Handle to the inventory DB
        self.db = MySQL(config.db.db_params)

        # Handle to the history DB
        self.history_db = HistoryDatabase(config.get('history', None))

        # FileTransferOperation backend (can make it a map from (source, dest) to operator)
        self.transfer_operations = []
        if 'transfer' in config:
            for condition_text, module, conf in config.transfer:
                if condition_text is None: # default
                    condition = None
                else:
                    condition = Condition(condition_text, site_variables)

                self.transfer_operations.append((condition, FileTransferOperation.get_instance(module, conf)))

        if 'transfer_query' in config:
            self.transfer_queries = []
            for condition_text, module, conf in config.transfer_query:
                if condition_text is None: # default
                    condition = None
                else:
                    condition = Condition(condition_text, site_variables)

                self.transfer_queries.append(condition, FileTransferQuery.get_instance(module, conf))
        else:
            self.transfer_queries = self.transfer_operations

        if 'deletion' in config:
            self.deletion_operations = []
            for condition_text, module, conf in config.deletion:
                if condition_text is None: # default
                    condition = None
                else:
                    condition = Condition(condition_text, site_variables)

                self.deletion_operations.append(condition, FileDeletionOperation.get_instance(module, conf))
        else:
            self.deletion_operations = self.transfer_operations

        if 'deletion_query' in config:
            self.deletion_queries = []
            for condition_text, module, conf in config.deletion_query:
                if condition_text is None: # default
                    condition = None
                else:
                    condition = Condition(condition_text, site_variables)

                self.deletion_queries.append(condition, FileDeletionQuery.get_instance(module, conf))
        else:
            self.deletion_queries = self.deletion_operations

        self.sites_in_downtime = []

        # Cycle thread
        self.main_cycle = None
        self.cycle_stop = threading.Event()

        self.set_read_only(config.get('read_only', False))
Beispiel #13
0
class CRABAccessHistory(object):
    """
    Sets two attrs:
      global_usage_rank:  float value
      num_access: integer
      last_access: timestamp
    """

    produces = ['global_usage_rank', 'num_access', 'last_access']

    _default_config = None

    @staticmethod
    def set_default(config):
        CRABAccessHistory._default_config = Configuration(config)

    def __init__(self, config = None):
        if config is None:
            config = CRABAccessHistory._default_config

        self._history = HistoryDatabase(config.get('history', None))
        self._popdb = PopDB(config.get('popdb', None))

        self.max_back_query = config.get('max_back_query', 7)

        self.included_sites = list(config.get('include_sites', []))
        self.excluded_sites = list(config.get('exclude_sites', []))

        self.set_read_only(config.get('read_only', False))

    def set_read_only(self, value = True):
        self._read_only = value
        self._history.set_read_only(value)

    def load(self, inventory):
        records = self._get_stored_records(inventory)
        self._compute(inventory, records)

    def _get_stored_records(self, inventory):
        """
        Get the replica access data from DB.
        @param inventory  DynamoInventory
        @return  {dataset: [(date, number of access)]}
        """

        # pick up all accesses that are less than 2 years old
        # old accesses will be removed automatically next time the access information is saved from memory
        sql = 'SELECT d.`name`, UNIX_TIMESTAMP(a.`date`), a.`num_accesses` FROM `dataset_accesses` AS a'
        sql += ' INNER JOIN `datasets` AS d ON d.`id` = a.`dataset_id`'
        sql += ' WHERE a.`date` > DATE_SUB(NOW(), INTERVAL 2 YEAR) ORDER BY d.`id`, a.`date`'

        all_accesses = {}
        num_records = 0

        # little speedup by not repeating lookups for the same replica
        current_dataset_name = ''
        dataset_exists = True
        replica = None
        for dataset_name, timestamp, num_accesses in self._history.db.xquery(sql):
            num_records += 1

            if dataset_name == current_dataset_name:
                if not dataset_exists:
                    continue
            else:
                current_dataset_name = dataset_name

                try:
                    dataset = inventory.datasets[dataset_name]
                except KeyError:
                    dataset_exists = False
                    continue
                else:
                    dataset_exists = True

                accesses = all_accesses[dataset] = []

            accesses.append((timestamp, num_accesses))

        try:
            last_update = self._history.db.query('SELECT UNIX_TIMESTAMP(`dataset_accesses_last_update`) FROM `popularity_last_update`')[0]
        except IndexError:
            last_update = 0

        LOG.info('Loaded %d replica access data. Last update on %s UTC', num_records, time.strftime('%Y-%m-%d', time.gmtime(last_update)))

        return all_accesses

    def _compute(self, inventory, all_accesses):
        """
        Set the dataset usage rank based on access list.
        nAccessed is NACC normalized by size (in GB).
        @param inventory   DynamoInventory
        @param all_accesses {dataset: [(date, number of access)]} (time ordered)
        """

        now = time.time()
        today = datetime.datetime.utcfromtimestamp(now).date()

        for dataset in inventory.datasets.itervalues():
            last_access = 0
            num_access = 0
            norm_access = 0.

            try:
                accesses = all_accesses[dataset]
            except KeyError:
                pass
            else:
                last_access = accesses[-1][0]
                num_access = sum(e[1] for e in accesses)
                if dataset.size != 0:
                    norm_access = float(num_access) / (dataset.size * 1.e-9)

            try:
                last_block_created = max(r.last_block_created() for r in dataset.replicas)
            except ValueError: # empty sequence
                last_block_created = 0

            last_change = max(last_access, dataset.last_update, last_block_created)

            rank = (now - last_change) / (24. * 3600.) - norm_access

            dataset.attr['global_usage_rank'] = rank
            dataset.attr['num_access'] = num_access
            dataset.attr['last_access'] = max(last_access, dataset.last_update)

    def update(self, inventory):
        try:
            try:
                last_update = self._history.db.query('SELECT UNIX_TIMESTAMP(`dataset_accesses_last_update`) FROM `popularity_last_update`')[0]
            except IndexError:
                last_update = time.time() - 3600 * 24 # just go back by a day
                if not self._read_only:
                    self._history.db.query('INSERT INTO `popularity_last_update` VALUES ()')

            if not self._read_only:
                self._history.db.query('UPDATE `popularity_last_update` SET `dataset_accesses_last_update` = NOW()', retries = 0, silent = True)

        except MySQLdb.OperationalError:
            # We have a read-only config
            self._read_only = True
            LOG.info('Cannot write to DB. Switching to read_only.')

        start_time = max(last_update, (time.time() - 3600 * 24 * self.max_back_query))
        start_date = datetime.date(*time.gmtime(start_time)[:3])

        source_records = self._get_source_records(inventory, start_date)

        if not self._read_only:
            self._save_records(source_records)
            # remove old entries
            self._history.db.query('DELETE FROM `dataset_accesses` WHERE `date` < DATE_SUB(NOW(), INTERVAL 2 YEAR)')
            self._history.db.query('UPDATE `popularity_last_update` SET `dataset_accesses_last_update` = NOW()')

    def _get_source_records(self, inventory, start_date):
        """
        Get the replica access data from PopDB from start_date to today.
        @param inventory      DynamoInventory
        @param start_date     Query start date (datetime.datetime)
        @return  {replica: {date: (number of access, total cpu time)}}
        """

        days_to_query = []

        utctoday = datetime.date(*time.gmtime()[:3])
        date = start_date
        while date <= utctoday: # get records up to today
            days_to_query.append(date)
            date += datetime.timedelta(1) # one day

        LOG.info('Updating dataset access info from %s to %s', start_date.strftime('%Y-%m-%d'), utctoday.strftime('%Y-%m-%d'))

        all_accesses = {}

        arg_pool = []
        for site in inventory.sites.itervalues():
            matched = (len(self.included_sites) == 0)

            for pattern in self.included_sites:
                if fnmatch.fnmatch(site.name, pattern):
                    matched = True
                    break

            for pattern in self.excluded_sites:
                if fnmatch.fnmatch(site.name, pattern):
                    matched = False
                    break

            if matched:
                for date in days_to_query:
                    arg_pool.append((site, inventory, date))

        mapper = Map()
        mapper.logger = LOG

        records = mapper.execute(self._get_site_record, arg_pool)

        for site_record in records:
            for replica, date, naccess, cputime in site_record:
                if replica not in all_accesses:
                    all_accesses[replica] = {}

                all_accesses[replica][date] = (naccess, cputime)

        return all_accesses

    def _get_site_record(self, site, inventory, date):
        """
        Get the replica access data on a single site from PopDB.
        @param site       Site
        @param inventory  Inventory
        @param date       datetime.date
        @return [(replica, number of access, total cpu time)]
        """

        if site.name.startswith('T0'):
            return []
        elif site.name.startswith('T1') and site.name.count('_') > 2:
            nameparts = site.name.split('_')
            sitename = '_'.join(nameparts[:3])
            service = 'popularity/DSStatInTimeWindow/' # the trailing slash is apparently important
        elif site.name == 'T2_CH_CERN':
            sitename = site.name
            service = 'xrdpopularity/DSStatInTimeWindow'
        else:
            sitename = site.name
            service = 'popularity/DSStatInTimeWindow/'

        datestr = date.strftime('%Y-%m-%d')
        result = self._popdb.make_request(service, ['sitename=' + sitename, 'tstart=' + datestr, 'tstop=' + datestr])

        records = []
        
        for ds_entry in result:
            try:
                dataset = inventory.datasets[ds_entry['COLLNAME']]
            except KeyError:
                continue

            replica = site.find_dataset_replica(dataset)
            if replica is None:
                continue

            records.append((replica, date, int(ds_entry['NACC']), float(ds_entry['TOTCPU'])))

        return records

    def _save_records(self, records):
        """
        Save the newly fetched access records.
        @param records  {replica: {date: (number of access, total cpu time)}}
        """

        site_names = set(r.site.name for r in records.iterkeys())
        self._history.save_sites(site_names)
        site_id_map = dict(self._history.db.select_many('sites', ('name', 'id'), 'name', site_names))

        dataset_names = set(r.dataset.name for r in records.iterkeys())
        self._history.save_datasets(dataset_names)
        dataset_id_map = dict(self._history.db.select_many('datasets', ('name', 'id'), 'name', dataset_names))

        fields = ('dataset_id', 'site_id', 'date', 'access_type', 'num_accesses', 'cputime')

        data = []
        for replica, entries in records.iteritems():
            dataset_id = dataset_id_map[replica.dataset.name]
            site_id = site_id_map[replica.site.name]

            for date, (num_accesses, cputime) in entries.iteritems():
                data.append((dataset_id, site_id, date.strftime('%Y-%m-%d'), 'local', num_accesses, cputime))

        self._history.db.insert_many('dataset_accesses', fields, None, data, do_update = True)
Beispiel #14
0
 def __init__(self, config):
     WebModule.__init__(self, config)
     self.registry = RegistryDatabase()
     self.history = HistoryDatabase()
Beispiel #15
0
class DetoxLockBase(WebModule):
    def __init__(self, config):
        WebModule.__init__(self, config)
        self.registry = RegistryDatabase()
        self.history = HistoryDatabase()

    def _validate_request(self, request, inventory, required, allowed = None):
        for key in required:
            if key not in request:
                raise MissingParameter(key)

        for key in request.iterkeys():
            if key not in required and key not in allowed:
                raise ExtraParameter(key)

        if 'lockid' in request:
            if type(request['lockid']) is str:
                lock_ids = request['lockid'].split(',')
            else:
                lock_ids = request['lockid']
            try:
                request['lockid'] = map(int, lock_ids)
            except ValueError:
                raise InvalidRequest('Invalid lock id %s' % request['lockid'])

        if 'sites' in request:
            if type(request['sites']) is str:
                request['sites'] = request['sites'].split(',')

            for site in request['sites']:
                if '*' in site or '?' in site:
                    pass
                elif site not in inventory.sites:
                    raise InvalidRequest('Unknown site %s' % site)

        if 'groups' in request:
            if type(request['groups']) is str:
                request['groups'] = request['groups'].split(',')

            for group in request['groups']:
                if '*' in group or '?' in group:
                    pass
                elif group not in inventory.groups:
                    raise InvalidRequest('Unknown group %s' % group)

        if 'user' in request:
            if type(request['user']) is str:
                request['user'] = request['user'].split(',')

        for key in ['expires', 'created_before', 'created_after', 'expires_before', 'expires_after']:
            if key in request:
                t = dateparser.parse(request[key])
                request[key] = calendar.timegm(t.utctimetuple())

    def _get_lock(self, request, valid_only = False):
        sql = 'SELECT l.`id`, l.`user`, l.`dn`, s.`name`, l.`item`, l.`sites`, l.`groups`,'
        sql += ' UNIX_TIMESTAMP(l.`lock_date`), UNIX_TIMESTAMP(l.`expiration_date`), l.`comment`'
        sql += ' FROM `detox_locks` AS l'
        sql += ' LEFT JOIN `user_services` AS s ON s.`id` = l.`service_id`'
        
        constraints = []
        args = []
        user_const = -1

        if 'lockid' in request:
            constraints.append('l.`id` IN %s' % MySQL.stringify_sequence(request['lockid']))

        if 'user' in request:
            user_const = len(constraints)
            constraints.append('l.`user` IN %s' % MySQL.stringify_sequence(request['user']))

        if 'service' in request:
            constraints.append('s.`name` = %s')
            args.append(request['service'])

        if 'item' in request:
            constraints.append('l.`item` = %s')
            args.append(request['item'])

        if 'sites' in request:
            constraints.append('l.`sites` IN %s' % MySQL.stringify_sequence(request['sites']))

        if 'groups' in request:
            constraints.append('l.`groups` IN %s' % MySQL.stringify_sequence(request['groups']))

        if 'created_before' in request:
            constraints.append('l.`lock_date` <= FROM_UNIXTIME(%s)')
            args.append(request['created_before'])

        if 'created_after' in request:
            constraints.append('l.`lock_date` >= FROM_UNIXTIME(%s)')
            args.append(request['created_after'])

        if 'expires_before' in request:
            constraints.append('l.`expiration_date` <= FROM_UNIXTIME(%s)')
            args.append(request['expires_before'])

        if 'expires_after' in request:
            constraints.append('l.`expiration_date` >= FROM_UNIXTIME(%s)')
            args.append(request['expires_after'])

        if len(constraints) != 0:
            sql += ' WHERE ' + ' AND '.join(constraints)

        existing = []

        for lock_id, user, dn, service, item, site, group, lock_date, expiration_date, comment in self.registry.db.xquery(sql, *args):
            lock = {
                'lockid': lock_id,
                'user': user,
                'dn': dn,
                'item': item,
                'locked': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(lock_date)),
                'expires': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(expiration_date))
            }
            if service is not None:
                lock['service'] = service
            if site is not None:
                lock['sites'] = site
            if group is not None:
                lock['groups'] = group
            if comment is not None:
                lock['comment'] = comment

            existing.append(lock)

        if valid_only or ('lockid' in request and len(existing) != 0):
            return existing

        sql = 'SELECT l.`id`, u.`name`, u.`dn`, s.`name`, l.`item`, l.`sites`, l.`groups`,'
        sql += ' UNIX_TIMESTAMP(l.`lock_date`), UNIX_TIMESTAMP(l.`unlock_date`), UNIX_TIMESTAMP(l.`expiration_date`), l.`comment`'
        sql += ' FROM `detox_locks` AS l'
        sql += ' LEFT JOIN `users` AS u ON u.`id` = l.`user_id`'
        sql += ' LEFT JOIN `user_services` AS s ON s.`id` = l.`service_id`'

        if len(constraints) != 0:
            if user_const != -1:
                constraints[user_const] = 'u.`name` IN %s' % MySQL.stringify_sequence(request['user'])

            sql += ' WHERE ' + ' AND '.join(constraints)
        
        for lock_id, user, dn, service, item, site, group, lock_date, unlock_date, expiration_date, comment in self.history.db.xquery(sql, *args):
            lock = {
                'lockid': lock_id,
                'user': user,
                'dn': dn,
                'item': item,
                'locked': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(lock_date)),
                'unlocked': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(unlock_date)),
                'expires': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(expiration_date))
            }
            if service is not None:
                lock['service'] = service
            if site is not None:
                lock['sites'] = site
            if group is not None:
                lock['groups'] = group
            if comment is not None:
                lock['comment'] = comment

            existing.append(lock)

        return existing

    def _create_lock(self, request, user, dn):
        service_id = 0
        if 'service' in request:
            try:
                service_id = self.registry.db.query('SELECT `id` FROM `user_services` WHERE `name` = %s', request['service'])[0]
            except IndexError:
                pass

        columns = ('item', 'sites', 'groups', 'lock_date', 'expiration_date', 'user', 'dn', 'service_id', 'comment')

        comment = None
        if 'comment' in request:
            comment = request['comment']

        values = [(request['item'], None, None, MySQL.bare('NOW()'), MySQL.bare('FROM_UNIXTIME(%d)' % request['expires']), user, dn, service_id, comment)]
        if 'sites' in request:
            new_values = []
            for site in request['sites']:
                for v in values:
                    new_values.append(v[:1] + (site,) + v[2:])
            values = new_values
        if 'groups' in request:
            new_values = []
            for group in request['groups']:
                for v in values:
                    new_values.append(v[:2] + (group,) + v[3:])
            values = new_values

        new_locks = []

        for v in values:
            lock_id = self.registry.db.insert_get_id('detox_locks', columns, v)

            new_lock = {
                'lockid': lock_id,
                'user': user,
                'dn': dn,
                'item': request['item'],
                'locked': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime()),
                'expires': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(request['expires']))
            }
            if v[7] != 0:
                new_lock['service'] = request['service']
            if v[1] is not None:
                new_lock['sites'] = v[1]
            if v[2] is not None:
                new_lock['groups'] = v[2]
            if 'comment' in request:
                new_lock['comment'] = request['comment']

            new_locks.append(new_lock)

        return new_locks

    def _update_lock(self, existing, request):
        updates = []
        args = []
        if 'expires' in request:
            updates.append('`expiration_date` = FROM_UNIXTIME(%s)')
            args.append(request['expires'])
        if 'comment' in request:
            updates.append('`comment` = %s')
            args.append(request['comment'])

        if len(updates) == 0:
            return []

        sql = 'UPDATE `detox_locks` SET ' + ', '.join(updates)

        updated = []

        for lock in existing:
            self.registry.db.query(sql + ' WHERE `id` = %d' % lock['lockid'], *args)

            if 'expires' in request:
                lock['expires'] = time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(request['expires']))
            if 'comment' in request:
                lock['comment'] = request['comment']

            updated.append(lock)

        return updated

    def _disable_lock(self, existing):
        history_sql = 'INSERT INTO `detox_locks` (`id`, `item`, `sites`, `groups`, `lock_date`, `unlock_date`, `expiration_date`, `user_id`, `service_id`, `comment`)'
        history_sql += ' VALUES (%s, %s, %s, %s, FROM_UNIXTIME(%s), NOW(), FROM_UNIXTIME(%s), %s, %s, %s)'

        registry_sql = 'DELETE FROM `detox_locks` WHERE `id` = %s'

        disabled = []

        for lock in existing:
            if 'unlocked' in lock:
                continue
            
            user_id = self.history.save_users([(lock['user'], lock['dn'])], get_ids = True)[0]

            if 'sites' in lock:
                sites = lock['sites']
            else:
                sites = None

            if 'groups' in lock:
                groups = lock['groups']
            else:
                groups = None

            if 'service' in lock:
                service_id = self.history.save_user_services([lock['service']], get_ids = True)[0]
            else:
                service_id = 0

            if 'comment' in lock:
                comment = lock['comment']
            else:
                comment = None

            lock_date = calendar.timegm(time.strptime(lock['locked'], '%Y-%m-%d %H:%M:%S %Z'))
            expiration_date = calendar.timegm(time.strptime(lock['expires'], '%Y-%m-%d %H:%M:%S %Z'))

            self.history.db.query(history_sql, lock['lockid'], lock['item'], sites, groups,
                lock_date, expiration_date, user_id, service_id, comment)

            disabled.append(lock)
            
            self.registry.db.query(registry_sql, lock['lockid'])

        return disabled

    def _lock_tables(self):
        self.registry.db.lock_tables(write = ['detox_locks', ('detox_locks', 'l'), 'user_services', ('user_services', 's')])

    def _unlock_tables(self):
        self.registry.db.unlock_tables()
Beispiel #16
0
class DetoxLockBase(WebModule):
    def __init__(self, config):
        WebModule.__init__(self, config)
        self.registry = RegistryDatabase()
        self.history = HistoryDatabase()

    def _validate_request(self, request, inventory, required, allowed=None):
        for key in required:
            if key not in request:
                raise MissingParameter(key)

        for key in request.iterkeys():
            if key not in required and key not in allowed:
                raise ExtraParameter(key)

        if 'lockid' in request:
            if type(request['lockid']) is str:
                lock_ids = request['lockid'].split(',')
            else:
                lock_ids = request['lockid']
            try:
                request['lockid'] = map(int, lock_ids)
            except ValueError:
                raise InvalidRequest('Invalid lock id %s' % request['lockid'])

        if 'sites' in request:
            if type(request['sites']) is str:
                request['sites'] = request['sites'].split(',')

            for site in request['sites']:
                if '*' in site or '?' in site:
                    pass
                elif site not in inventory.sites:
                    raise InvalidRequest('Unknown site %s' % site)

        if 'groups' in request:
            if type(request['groups']) is str:
                request['groups'] = request['groups'].split(',')

            for group in request['groups']:
                if '*' in group or '?' in group:
                    pass
                elif group not in inventory.groups:
                    raise InvalidRequest('Unknown group %s' % group)

        if 'user' in request:
            if type(request['user']) is str:
                request['user'] = request['user'].split(',')

        for key in [
                'expires', 'created_before', 'created_after', 'expires_before',
                'expires_after'
        ]:
            if key in request:
                t = dateparser.parse(request[key])
                request[key] = calendar.timegm(t.utctimetuple())

    def _get_lock(self, request, valid_only=False):
        sql = 'SELECT l.`id`, l.`user`, l.`dn`, s.`name`, l.`item`, l.`sites`, l.`groups`,'
        sql += ' UNIX_TIMESTAMP(l.`lock_date`), UNIX_TIMESTAMP(l.`expiration_date`), l.`comment`'
        sql += ' FROM `detox_locks` AS l'
        sql += ' LEFT JOIN `user_services` AS s ON s.`id` = l.`service_id`'

        constraints = []
        args = []
        user_const = -1

        if 'lockid' in request:
            constraints.append('l.`id` IN %s' %
                               MySQL.stringify_sequence(request['lockid']))

        if 'user' in request:
            user_const = len(constraints)
            constraints.append('l.`user` IN %s' %
                               MySQL.stringify_sequence(request['user']))

        if 'service' in request:
            constraints.append('s.`name` = %s')
            args.append(request['service'])

        if 'item' in request:
            constraints.append('l.`item` = %s')
            args.append(request['item'])

        if 'sites' in request:
            constraints.append('l.`sites` IN %s' %
                               MySQL.stringify_sequence(request['sites']))

        if 'groups' in request:
            constraints.append('l.`groups` IN %s' %
                               MySQL.stringify_sequence(request['groups']))

        if 'created_before' in request:
            constraints.append('l.`lock_date` <= FROM_UNIXTIME(%s)')
            args.append(request['created_before'])

        if 'created_after' in request:
            constraints.append('l.`lock_date` >= FROM_UNIXTIME(%s)')
            args.append(request['created_after'])

        if 'expires_before' in request:
            constraints.append('l.`expiration_date` <= FROM_UNIXTIME(%s)')
            args.append(request['expires_before'])

        if 'expires_after' in request:
            constraints.append('l.`expiration_date` >= FROM_UNIXTIME(%s)')
            args.append(request['expires_after'])

        if len(constraints) != 0:
            sql += ' WHERE ' + ' AND '.join(constraints)

        existing = []

        for lock_id, user, dn, service, item, site, group, lock_date, expiration_date, comment in self.registry.db.xquery(
                sql, *args):
            lock = {
                'lockid':
                lock_id,
                'user':
                user,
                'dn':
                dn,
                'item':
                item,
                'locked':
                time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(lock_date)),
                'expires':
                time.strftime('%Y-%m-%d %H:%M:%S UTC',
                              time.gmtime(expiration_date))
            }
            if service is not None:
                lock['service'] = service
            if site is not None:
                lock['sites'] = site
            if group is not None:
                lock['groups'] = group
            if comment is not None:
                lock['comment'] = comment

            existing.append(lock)

        if valid_only or ('lockid' in request and len(existing) != 0):
            return existing

        sql = 'SELECT l.`id`, u.`name`, u.`dn`, s.`name`, l.`item`, l.`sites`, l.`groups`,'
        sql += ' UNIX_TIMESTAMP(l.`lock_date`), UNIX_TIMESTAMP(l.`unlock_date`), UNIX_TIMESTAMP(l.`expiration_date`), l.`comment`'
        sql += ' FROM `detox_locks` AS l'
        sql += ' LEFT JOIN `users` AS u ON u.`id` = l.`user_id`'
        sql += ' LEFT JOIN `user_services` AS s ON s.`id` = l.`service_id`'

        if len(constraints) != 0:
            if user_const != -1:
                constraints[
                    user_const] = 'u.`name` IN %s' % MySQL.stringify_sequence(
                        request['user'])

            sql += ' WHERE ' + ' AND '.join(constraints)

        for lock_id, user, dn, service, item, site, group, lock_date, unlock_date, expiration_date, comment in self.history.db.xquery(
                sql, *args):
            lock = {
                'lockid':
                lock_id,
                'user':
                user,
                'dn':
                dn,
                'item':
                item,
                'locked':
                time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(lock_date)),
                'unlocked':
                time.strftime('%Y-%m-%d %H:%M:%S UTC',
                              time.gmtime(unlock_date)),
                'expires':
                time.strftime('%Y-%m-%d %H:%M:%S UTC',
                              time.gmtime(expiration_date))
            }
            if service is not None:
                lock['service'] = service
            if site is not None:
                lock['sites'] = site
            if group is not None:
                lock['groups'] = group
            if comment is not None:
                lock['comment'] = comment

            existing.append(lock)

        return existing

    def _create_lock(self, request, user, dn):
        service_id = 0
        if 'service' in request:
            try:
                service_id = self.registry.db.query(
                    'SELECT `id` FROM `user_services` WHERE `name` = %s',
                    request['service'])[0]
            except IndexError:
                pass

        columns = ('item', 'sites', 'groups', 'lock_date', 'expiration_date',
                   'user', 'dn', 'service_id', 'comment')

        comment = None
        if 'comment' in request:
            comment = request['comment']

        values = [(request['item'], None, None, MySQL.bare('NOW()'),
                   MySQL.bare('FROM_UNIXTIME(%d)' % request['expires']), user,
                   dn, service_id, comment)]
        if 'sites' in request:
            new_values = []
            for site in request['sites']:
                for v in values:
                    new_values.append(v[:1] + (site, ) + v[2:])
            values = new_values
        if 'groups' in request:
            new_values = []
            for group in request['groups']:
                for v in values:
                    new_values.append(v[:2] + (group, ) + v[3:])
            values = new_values

        new_locks = []

        for v in values:
            lock_id = self.registry.db.insert_get_id('detox_locks', columns, v)

            new_lock = {
                'lockid':
                lock_id,
                'user':
                user,
                'dn':
                dn,
                'item':
                request['item'],
                'locked':
                time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime()),
                'expires':
                time.strftime('%Y-%m-%d %H:%M:%S UTC',
                              time.gmtime(request['expires']))
            }
            if v[7] != 0:
                new_lock['service'] = request['service']
            if v[1] is not None:
                new_lock['sites'] = v[1]
            if v[2] is not None:
                new_lock['groups'] = v[2]
            if 'comment' in request:
                new_lock['comment'] = request['comment']

            new_locks.append(new_lock)

        return new_locks

    def _update_lock(self, existing, request):
        updates = []
        args = []
        if 'expires' in request:
            updates.append('`expiration_date` = FROM_UNIXTIME(%s)')
            args.append(request['expires'])
        if 'comment' in request:
            updates.append('`comment` = %s')
            args.append(request['comment'])

        if len(updates) == 0:
            return []

        sql = 'UPDATE `detox_locks` SET ' + ', '.join(updates)

        updated = []

        for lock in existing:
            self.registry.db.query(sql + ' WHERE `id` = %d' % lock['lockid'],
                                   *args)

            if 'expires' in request:
                lock['expires'] = time.strftime(
                    '%Y-%m-%d %H:%M:%S UTC', time.gmtime(request['expires']))
            if 'comment' in request:
                lock['comment'] = request['comment']

            updated.append(lock)

        return updated

    def _disable_lock(self, existing):
        history_sql = 'INSERT INTO `detox_locks` (`id`, `item`, `sites`, `groups`, `lock_date`, `unlock_date`, `expiration_date`, `user_id`, `service_id`, `comment`)'
        history_sql += ' VALUES (%s, %s, %s, %s, FROM_UNIXTIME(%s), NOW(), FROM_UNIXTIME(%s), %s, %s, %s)'

        registry_sql = 'DELETE FROM `detox_locks` WHERE `id` = %s'

        disabled = []

        for lock in existing:
            if 'unlocked' in lock:
                continue

            user_id = self.history.save_users([(lock['user'], lock['dn'])],
                                              get_ids=True)[0]

            if 'sites' in lock:
                sites = lock['sites']
            else:
                sites = None

            if 'groups' in lock:
                groups = lock['groups']
            else:
                groups = None

            if 'service' in lock:
                service_id = self.history.save_user_services([lock['service']],
                                                             get_ids=True)[0]
            else:
                service_id = 0

            if 'comment' in lock:
                comment = lock['comment']
            else:
                comment = None

            lock_date = calendar.timegm(
                time.strptime(lock['locked'], '%Y-%m-%d %H:%M:%S %Z'))
            expiration_date = calendar.timegm(
                time.strptime(lock['expires'], '%Y-%m-%d %H:%M:%S %Z'))

            self.history.db.query(history_sql, lock['lockid'], lock['item'],
                                  sites, groups, lock_date, expiration_date,
                                  user_id, service_id, comment)

            disabled.append(lock)

            self.registry.db.query(registry_sql, lock['lockid'])

        return disabled

    def _lock_tables(self):
        self.registry.db.lock_tables(write=[
            'detox_locks', ('detox_locks',
                            'l'), 'user_services', ('user_services', 's')
        ])

    def _unlock_tables(self):
        self.registry.db.unlock_tables()
Beispiel #17
0
class RequestManager(object):
    """
    Manager for external copy and deletion requests made through the web interface.
    Requests are written in registry when they are in new and activated states.
    When moving to terminal states (completed, rejected, cancelled) the records are migrated to history.
    This is a MySQL-specific implementation, but the interface is generic. It should be straightforward
    to abstractify the class if necessary.
    """

    # default config
    _config = df.Configuration()

    @staticmethod
    def set_default(config):
        RequestManager._config = df.Configuration(config)

    def __init__(self, optype, config=None):
        """
        @param optype  'copy' or 'deletion'.
        """
        if config is None:
            config = RequestManager._config

        self.registry = RegistryDatabase(config.get('registry', None))
        self.history = HistoryDatabase(config.get('history', None))

        # we'll be using temporary tables
        self.registry.db.reuse_connection = True
        self.history.db.reuse_connection = True

        self.optype = optype

        self.set_read_only(config.get('read_only', False))

    def set_read_only(self, value=True):
        self._read_only = value

    def lock(self):
        """
        Lock the registry table for lookup + update workflows.
        """
        if not self._read_only:
            self.registry.db.lock_tables()

    def unlock(self):
        if not self._read_only:
            self.registry.db.unlock_tables()

    def _save_items(self, items):
        """
        Save the items into history.
        @param items          List of dataset and block names.

        @return [dataset id], [block id]
        """
        dataset_names = []
        block_names = []

        for item in items:
            # names are validated already
            try:
                dataset_name, block_name = df.Block.from_full_name(item)
            except df.ObjectError:
                dataset_names.append(item)
            else:
                block_names.append(
                    (dataset_name, df.Block.to_real_name(block_name)))

        dataset_ids = self.history.save_datasets(dataset_names, get_ids=True)
        block_ids = self.history.save_blocks(block_names, get_ids=True)

        return dataset_ids, block_ids

    def _get_saved_item_ids(self, items):
        """
        Get the history dataset and block ids from the items list.
        @param items          List of dataset and block names.

        @return [dataset id], [block id]
        """
        dataset_names = []
        block_names = []

        for item in items:
            # names are validated already
            try:
                dataset_name, block_name = df.Block.from_full_name(item)
            except df.ObjectError:
                dataset_names.append(item)
            else:
                block_names.append(
                    (dataset_name, df.Block.to_real_name(block_name)))

        dataset_ids = self.history.db.select_many('datasets', 'id', 'name',
                                                  dataset_names)
        block_ids = self.history.db.select_many('blocks', 'id', 'name',
                                                block_names)

        return dataset_ids, block_ids

    def _make_temp_registry_tables(self, items, sites):
        """
        Make temporary tables to be used to constrain request search.
        @param items   List of dataset and block names.
        @param sites   List of site names.
        """

        # Make temporary tables and fill copy_ids_tmp with ids of requests whose item and site lists fully cover the provided list of items and sites.
        columns = [
            '`item` varchar(512) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL'
        ]
        self.registry.db.create_tmp_table('items_tmp', columns)
        columns = [
            '`site` varchar(32) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL'
        ]
        self.registry.db.create_tmp_table('sites_tmp', columns)

        if items is not None:
            self.registry.db.insert_many('items_tmp', ('item', ),
                                         MySQL.make_tuple,
                                         items,
                                         db=self.registry.db.scratch_db)
        if sites is not None:
            self.registry.db.insert_many('sites_tmp', ('site', ),
                                         MySQL.make_tuple,
                                         sites,
                                         db=self.registry.db.scratch_db)

        columns = [
            '`id` int(10) unsigned NOT NULL AUTO_INCREMENT',
            'PRIMARY KEY (`id`)'
        ]
        self.registry.db.create_tmp_table('ids_tmp', columns)

        sql = 'INSERT INTO `{db}`.`ids_tmp`'
        sql += ' SELECT r.`id` FROM `{op}_requests` AS r WHERE'
        sql += ' 0 NOT IN (SELECT (`site` IN (SELECT `site` FROM `{op}_request_sites` AS s WHERE s.`request_id` = r.`id`)) FROM `{db}`.`sites_tmp`)'
        sql += ' AND '
        sql += ' 0 NOT IN (SELECT (`item` IN (SELECT `item` FROM `{op}_request_items` AS i WHERE i.`request_id` = r.`id`)) FROM `{db}`.`items_tmp`)'
        self.registry.db.query(
            sql.format(db=self.registry.db.scratch_db, op=self.optype))

        self.registry.db.drop_tmp_table('items_tmp')
        self.registry.db.drop_tmp_table('sites_tmp')

        return '`{db}`.`ids_tmp`'.format(db=self.registry.db.scratch_db)

    def _make_temp_history_tables(self, dataset_ids, block_ids, site_ids):
        """
        Make temporary tables to be used to constrain request search.
        @param dataset_ids   List of dataset ids.
        @param block_ids     List of block ids.
        @param site_ids      List of site ids.
        """

        columns = [
            '`id` int(10) unsigned NOT NULL AUTO_INCREMENT',
            'PRIMARY KEY (`id`)'
        ]
        self.history.db.create_tmp_table('ids_tmp', columns)

        tmp_table_name = '`{db}`.`ids_tmp`'.format(
            db=self.history.db.scratch_db)

        if (dataset_ids is not None and len(dataset_ids) == 0) or \
                (block_ids is not None and len(block_ids) == 0) or \
                (site_ids is not None and len(site_ids) == 0):
            # temp table must be empty
            return tmp_table_name

        # Make temporary tables and fill ids_tmp with ids of requests whose item and site lists fully cover the provided list of items and sites.
        columns = ['`id` int(10) unsigned NOT NULL']
        self.history.db.create_tmp_table('datasets_tmp', columns)
        columns = ['`id` bigint(20) unsigned NOT NULL']
        self.history.db.create_tmp_table('blocks_tmp', columns)
        columns = ['`id` int(10) unsigned NOT NULL']
        self.history.db.create_tmp_table('sites_tmp', columns)

        if dataset_ids is not None:
            self.history.db.insert_many('datasets_tmp', ('id', ),
                                        MySQL.make_tuple,
                                        dataset_ids,
                                        db=self.history.db.scratch_db)
        if block_ids is not None:
            self.history.db.insert_many('blocks_tmp', ('id', ),
                                        MySQL.make_tuple,
                                        block_ids,
                                        db=self.history.db.scratch_db)
        if site_ids is not None:
            self.history.db.insert_many('sites_tmp', ('id', ),
                                        MySQL.make_tuple,
                                        site_ids,
                                        db=self.history.db.scratch_db)

        # Explaining the query outwards:
        # SELECT `X_id` FROM `{op}_request_X` WHERE `request_id` = r.`id` -> Full list of X for the request
        # `id` IN (SELECT `X_id` ...) -> 0 or 1
        # SELECT (`id` IN (SELECT `X_id` ...)) FROM tmp.`X_tmp` -> 0s and 1s for all entries in X_tmp
        # 0 NOT IN (SELECT ... FROM tmp.`X_tmp`) -> All entries in X_tmp are contained in {op}_request_X for the specific request

        sql = 'INSERT INTO `{db}`.`ids_tmp`'
        sql += ' SELECT r.`id` FROM `{op}_requests` AS r WHERE'
        sql += ' 0 NOT IN (SELECT (`id` IN (SELECT `site_id` FROM `{op}_request_sites` AS s WHERE s.`request_id` = r.`id`)) FROM `{db}`.`sites_tmp`)'
        sql += ' AND '
        sql += ' 0 NOT IN (SELECT (`id` IN (SELECT `dataset_id` FROM `{op}_request_datasets` AS d WHERE d.`request_id` = r.`id`)) FROM `{db}`.`datasets_tmp`)'
        sql += ' AND '
        sql += ' 0 NOT IN (SELECT (`id` IN (SELECT `block_id` FROM `{op}_request_blocks` AS b WHERE b.`request_id` = r.`id`)) FROM `{db}`.`blocks_tmp`)'
        self.history.db.query(
            sql.format(db=self.history.db.scratch_db, op=self.optype))

        self.history.db.drop_tmp_table('datasets_tmp')
        self.history.db.drop_tmp_table('blocks_tmp')
        self.history.db.drop_tmp_table('sites_tmp')

        return tmp_table_name

    def _make_registry_constraints(self, request_id, statuses, users, items,
                                   sites):
        constraints = []

        if request_id is not None:
            constraints.append('r.`id` = %d' % request_id)

        if statuses is not None:
            constraints.append('r.`status` IN ' +
                               MySQL.stringify_sequence(statuses))

        if users is not None:
            constraints.append('r.`user` IN ' +
                               MySQL.stringify_sequence(users))

        if items is not None or sites is not None:
            temp_table = self._make_temp_registry_tables(items, sites)
            constraints.append(
                'r.`id` IN (SELECT `id` FROM {0})'.format(temp_table))

        if len(constraints) != 0:
            return ' WHERE ' + ' AND '.join(constraints)
        else:
            return ''

    def _make_history_constraints(self, request_id, statuses, users, items,
                                  sites):
        if users is not None:
            history_user_ids = self.history.db.select_many(
                'users', 'id', 'name', users)
        else:
            history_user_ids = None

        if items is not None:
            history_dataset_ids, history_block_ids = self._get_saved_item_ids(
                items)
        else:
            history_dataset_ids = None
            history_block_ids = None

        if sites is not None:
            history_site_ids = self.history.db.select_many(
                'sites', 'id', 'name', sites)
        else:
            history_site_ids = None

        constraints = []

        if request_id is not None:
            constraints.append('r.`id` = %d' % request_id)

        if statuses is not None:
            constraints.append('r.`status` IN ' +
                               MySQL.stringify_sequence(statuses))

        if users is not None:
            constraints.append('r.`user_id` IN ' +
                               MySQL.stringify_sequence(history_user_ids))

        if items is not None or sites is not None:
            temp_table = self._make_temp_history_tables(
                history_dataset_ids, history_block_ids, history_site_ids)
            constraints.append(
                'r.`id` IN (SELECT `id` FROM {0})'.format(temp_table))

        if len(constraints) != 0:
            return ' WHERE ' + ' AND '.join(constraints)
        else:
            return ''
class GlobalQueueRequestHistory(object):
    """
    Sets one attr:
      request_weight:  float value
    """

    produces = ['request_weight']

    _default_config = None

    @staticmethod
    def set_default(config):
        GlobalQueueRequestHistory._default_config = Configuration(config)

    def __init__(self, config=None):
        if config is None:
            config = GlobalQueueRequestHistory._default_config

        self._history = HistoryDatabase(config.get('history', None))
        self._htcondor = HTCondor(config.get('htcondor', None))

        # Weight computation halflife constant (given in days in config)
        self.weight_halflife = config.get('weight_halflife', 4) * 3600. * 24.

        self.set_read_only(config.get('read_only', False))

    def set_read_only(self, value=True):
        self._read_only = value

    def load(self, inventory):
        records = self._get_stored_records(inventory)
        self._compute(inventory, records)

    def _get_stored_records(self, inventory):
        """
        Get the dataset request data from DB.
        @param inventory  DynamoInventory
        @return  {dataset: {jobid: GlobalQueueJob}}
        """

        # pick up requests that are less than 1 year old
        # old requests will be removed automatically next time the access information is saved from memory
        sql = 'SELECT d.`name`, r.`id`, UNIX_TIMESTAMP(r.`queue_time`), UNIX_TIMESTAMP(r.`completion_time`),'
        sql += ' r.`nodes_total`, r.`nodes_done`, r.`nodes_failed`, r.`nodes_queued` FROM `dataset_requests` AS r'
        sql += ' INNER JOIN `datasets` AS d ON d.`id` = r.`dataset_id`'
        sql += ' WHERE r.`queue_time` > DATE_SUB(NOW(), INTERVAL 1 YEAR) ORDER BY d.`id`, r.`queue_time`'

        all_requests = {}
        num_records = 0

        # little speedup by not repeating lookups for the same dataset
        current_dataset_name = ''
        dataset_exists = True
        for dataset_name, job_id, queue_time, completion_time, nodes_total, nodes_done, nodes_failed, nodes_queued in self._history.db.xquery(
                sql):
            num_records += 1

            if dataset_name == current_dataset_name:
                if not dataset_exists:
                    continue
            else:
                current_dataset_name = dataset_name

                try:
                    dataset = inventory.datasets[dataset_name]
                except KeyError:
                    dataset_exists = False
                    continue
                else:
                    dataset_exists = True

                requests = all_requests[dataset] = {}

            requests[job_id] = GlobalQueueJob(queue_time, completion_time,
                                              nodes_total, nodes_done,
                                              nodes_failed, nodes_queued)

        try:
            last_update = self._history.db.query(
                'SELECT UNIX_TIMESTAMP(`dataset_requests_last_update`) FROM `popularity_last_update`',
                retries=1)[0]
        except IndexError:
            last_update = 0

        LOG.info('Loaded %d dataset request data. Last update at %s UTC',
                 num_records,
                 time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(last_update)))

        return all_requests

    def _compute(self, inventory, all_requests):
        """
        Set the dataset request weight based on request list. Formula:
          w = Sum(exp(-t_i/T))
        where t_i is the time distance of the ith request from now. T is defined in the configuration.
        @param inventory     DynamoInventory
        @param all_requests  {dataset: {jobid: GlobalQueueJob}}
        """

        now = time.time()
        decay_constant = self.weight_halflife / math.log(2.)

        for dataset in inventory.datasets.itervalues():
            try:
                requests = all_requests[dataset]
            except KeyError:
                dataset.attr['request_weight'] = 0.
                continue

            weight = 0.
            for job in requests.itervalues():
                # first element of reqdata tuple is the queue time
                weight += math.exp((job.queue_time - now) / decay_constant)

            dataset.attr['request_weight'] = weight

    def update(self, inventory):
        try:
            try:
                last_update = self._history.db.query(
                    'SELECT UNIX_TIMESTAMP(`dataset_requests_last_update`) FROM `popularity_last_update`',
                    retries=1)[0]
            except IndexError:
                last_update = time.time() - 3600 * 24  # just go back by a day
                if not self._read_only:
                    self._history.db.query(
                        'INSERT INTO `popularity_last_update` VALUES ()')

            if not self._read_only:
                self._history.db.query(
                    'UPDATE `popularity_last_update` SET `dataset_requests_last_update` = NOW()',
                    retries=0,
                    silent=True)

        except MySQLdb.OperationalError:
            # We have a read-only config
            self._read_only = True
            LOG.info('Cannot write to DB. Switching to self._read_only.')

        source_records = self._get_source_records(inventory, last_update)

        if not self._read_only:
            self._save_records(source_records)
            # remove old entries
            self._history.db.query(
                'DELETE FROM `dataset_requests` WHERE `queue_time` < DATE_SUB(NOW(), INTERVAL 1 YEAR)'
            )
            self._history.db.query(
                'UPDATE `popularity_last_update` SET `dataset_requests_last_update` = NOW()'
            )

    def _get_source_records(self, inventory, last_update):
        """
        Get the dataset request data from Global Queue schedd.
        @param inventory    DynamoInventory
        @param last_update  UNIX timestamp
        @return {dataset: {jobid: GlobalQueueJob}}
        """

        constraint = 'TaskType=?="ROOT" && !isUndefined(DESIRED_CMSDataset) && (QDate > {last_update} || CompletionDate > {last_update})'.format(
            last_update=last_update)

        attributes = [
            'DESIRED_CMSDataset', 'GlobalJobId', 'QDate', 'CompletionDate',
            'DAG_NodesTotal', 'DAG_NodesDone', 'DAG_NodesFailed',
            'DAG_NodesQueued'
        ]

        job_ads = self._htcondor.find_jobs(constraint=constraint,
                                           attributes=attributes)

        job_ads.sort(key=lambda a: a['DESIRED_CMSDataset'])

        all_requests = {}

        for ad in job_ads:
            dataset_name = ad['DESIRED_CMSDataset']

            try:
                dataset = inventory.datasets[dataset_name]
            except KeyError:
                continue

            if dataset not in all_requests:
                all_requests[dataset] = {}

            try:
                nodes_total = ad['DAG_NodesTotal']
                nodes_done = ad['DAG_NodesDone']
                nodes_failed = ad['DAG_NodesFailed']
                nodes_queued = ad['DAG_NodesQueued']
            except KeyError:
                nodes_total = 0
                nodes_done = 0
                nodes_failed = 0
                nodes_queued = 0

            all_requests[dataset][ad['GlobalJobId']] = GlobalQueueJob(
                ad['QDate'], ad['CompletionDate'], nodes_total, nodes_done,
                nodes_failed, nodes_queued)

        return all_requests

    def _save_records(self, records):
        """
        Save the newly fetched request records.
        @param records  {dataset: {jobid: GlobalQueueJob}}
        """

        dataset_names = [d.name for d in records.iterkeys()]

        self._history.save_datasets(dataset_names)
        dataset_id_map = dict(
            self._history.db.select_many('datasets', ('name', 'id'), 'name',
                                         dataset_names))

        fields = ('id', 'dataset_id', 'queue_time', 'completion_time',
                  'nodes_total', 'nodes_done', 'nodes_failed', 'nodes_queued')

        data = []
        for dataset, dataset_request_list in records.iteritems():
            dataset_id = dataset_id_map[dataset.name]

            for job_id, (queue_time, completion_time, nodes_total, nodes_done,
                         nodes_failed,
                         nodes_queued) in dataset_request_list.iteritems():
                data.append(
                    (job_id, dataset_id,
                     time.strftime('%Y-%m-%d %H:%M:%S',
                                   time.localtime(queue_time)),
                     time.strftime('%Y-%m-%d %H:%M:%S',
                                   time.localtime(completion_time))
                     if completion_time > 0 else '0000-00-00 00:00:00',
                     nodes_total, nodes_done, nodes_failed, nodes_queued))

        self._history.db.insert_many('dataset_requests',
                                     fields,
                                     None,
                                     data,
                                     do_update=True)
Beispiel #19
0
class RequestManager(object):
    """
    Manager for external copy and deletion requests made through the web interface.
    Requests are written in registry when they are in new and activated states.
    When moving to terminal states (completed, rejected, cancelled) the records are migrated to history.
    This is a MySQL-specific implementation, but the interface is generic. It should be straightforward
    to abstractify the class if necessary.
    """

    # default config
    _config = df.Configuration()

    @staticmethod
    def set_default(config):
        RequestManager._config = df.Configuration(config)

    def __init__(self, optype, config = None):
        """
        @param optype  'copy' or 'deletion'.
        """
        if config is None:
            config = RequestManager._config

        self.registry = RegistryDatabase(config.get('registry', None))
        self.history = HistoryDatabase(config.get('history', None))
        #self.cache = CacheDatabase(config.get('cache', None))

        # we'll be using temporary tables
        self.registry.db.reuse_connection = True
        self.history.db.reuse_connection = True
        #self.cache.db.reuse_connection = True

        self.optype = optype

        self.set_read_only(config.get('read_only', False))

    def set_read_only(self, value = True):
        self._read_only = value

    def lock(self):
        """
        Lock the registry table for lookup + update workflows.
        """
        if not self._read_only:
            self.registry.db.lock_tables()

    def unlock(self):
        if not self._read_only:
            self.registry.db.unlock_tables()

    def _save_items(self, items):
        """
        Save the items into history.
        @param items          List of dataset and block names.

        @return [dataset id], [block id]
        """
        dataset_names = []
        block_names = []

        for item in items:
            # names are validated already
            try:
                dataset_name, block_name = df.Block.from_full_name(item)
            except df.ObjectError:
                dataset_names.append(item)
            else:
                block_names.append((dataset_name, df.Block.to_real_name(block_name)))

        dataset_ids = self.history.save_datasets(dataset_names, get_ids = True)
        block_ids = self.history.save_blocks(block_names, get_ids = True)

        return dataset_ids, block_ids

    def _get_saved_item_ids(self, items):
        """
        Get the history dataset and block ids from the items list.
        @param items          List of dataset and block names.

        @return [dataset id], [block id]
        """
        dataset_names = []
        block_names = []

        for item in items:
            # names are validated already
            try:
                dataset_name, block_name = df.Block.from_full_name(item)
            except df.ObjectError:
                dataset_names.append(item)
            else:
                block_names.append((dataset_name, df.Block.to_real_name(block_name)))

        dataset_ids = self.history.db.select_many('datasets', 'id', 'name', dataset_names)
        block_ids = self.history.db.select_many('blocks', 'id', 'name', block_names)

        return dataset_ids, block_ids

    def _make_temp_registry_tables(self, items, sites):
        """
        Make temporary tables to be used to constrain request search.
        @param items   List of dataset and block names.
        @param sites   List of site names.
        """

        # Make temporary tables and fill copy_ids_tmp with ids of requests whose item and site lists fully cover the provided list of items and sites.
        columns = ['`item` varchar(512) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL']
        self.registry.db.create_tmp_table('items_tmp', columns)
        columns = ['`site` varchar(32) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL']
        self.registry.db.create_tmp_table('sites_tmp', columns)

        if items is not None:
            self.registry.db.insert_many('items_tmp', ('item',), MySQL.make_tuple, items, db = self.registry.db.scratch_db)

        LOG.info(sites)

        if sites is not None:
            self.registry.db.insert_many('sites_tmp', ('site',), MySQL.make_tuple, sites, db = self.registry.db.scratch_db)

        columns = [
            '`id` int(10) unsigned NOT NULL AUTO_INCREMENT',
            'PRIMARY KEY (`id`)'
        ]
        self.registry.db.create_tmp_table('ids_tmp', columns)


        sql = 'INSERT INTO `{db}`.`ids_tmp`'
        sql += ' SELECT r.`id` FROM `{op}_requests` AS r WHERE'
        sql += ' 0 NOT IN (SELECT (`site` IN (SELECT `site` FROM `{op}_request_sites` AS s WHERE s.`request_id` = r.`id`)) FROM `{db}`.`sites_tmp`)'
        sql += ' AND '
        sql += ' 0 NOT IN (SELECT (`item` IN (SELECT `item` FROM `{op}_request_items` AS i WHERE i.`request_id` = r.`id`)) FROM `{db}`.`items_tmp`)'
        self.registry.db.query(sql.format(db = self.registry.db.scratch_db, op = self.optype))

        self.registry.db.drop_tmp_table('items_tmp')
        self.registry.db.drop_tmp_table('sites_tmp')

        return '`{db}`.`ids_tmp`'.format(db = self.registry.db.scratch_db)

    def _make_temp_history_tables(self, dataset_ids, block_ids, site_ids):
        """
        Make temporary tables to be used to constrain request search.
        @param dataset_ids   List of dataset ids.
        @param block_ids     List of block ids.
        @param site_ids      List of site ids.
        """

        columns = [
            '`id` int(10) unsigned NOT NULL AUTO_INCREMENT',
            'PRIMARY KEY (`id`)'
        ]
        self.history.db.create_tmp_table('ids_tmp', columns)

        tmp_table_name = '`{db}`.`ids_tmp`'.format(db = self.history.db.scratch_db)

        if (dataset_ids is not None and len(dataset_ids) == 0) or \
                (block_ids is not None and len(block_ids) == 0) or \
                (site_ids is not None and len(site_ids) == 0):
            # temp table must be empty
            return tmp_table_name

        # Make temporary tables and fill ids_tmp with ids of requests whose item and site lists fully cover the provided list of items and sites.
        columns = ['`id` int(10) unsigned NOT NULL']
        self.history.db.create_tmp_table('datasets_tmp', columns)
        columns = ['`id` bigint(20) unsigned NOT NULL']
        self.history.db.create_tmp_table('blocks_tmp', columns)
        columns = ['`id` int(10) unsigned NOT NULL']
        self.history.db.create_tmp_table('sites_tmp', columns)

        if dataset_ids is not None:
            self.history.db.insert_many('datasets_tmp', ('id',), MySQL.make_tuple, dataset_ids, db = self.history.db.scratch_db)
        if block_ids is not None:
            self.history.db.insert_many('blocks_tmp', ('id',), MySQL.make_tuple, block_ids, db = self.history.db.scratch_db)
        if site_ids is not None:
            self.history.db.insert_many('sites_tmp', ('id',), MySQL.make_tuple, site_ids, db = self.history.db.scratch_db)

        # Explaining the query outwards:
        # SELECT `X_id` FROM `{op}_request_X` WHERE `request_id` = r.`id` -> Full list of X for the request
        # `id` IN (SELECT `X_id` ...) -> 0 or 1
        # SELECT (`id` IN (SELECT `X_id` ...)) FROM tmp.`X_tmp` -> 0s and 1s for all entries in X_tmp
        # 0 NOT IN (SELECT ... FROM tmp.`X_tmp`) -> All entries in X_tmp are contained in {op}_request_X for the specific request

        sql = 'INSERT INTO `{db}`.`ids_tmp`'
        sql += ' SELECT r.`id` FROM `{op}_requests` AS r WHERE'
        sql += ' 0 NOT IN (SELECT (`id` IN (SELECT `site_id` FROM `{op}_request_sites` AS s WHERE s.`request_id` = r.`id`)) FROM `{db}`.`sites_tmp`)'
        sql += ' AND '
        sql += ' 0 NOT IN (SELECT (`id` IN (SELECT `dataset_id` FROM `{op}_request_datasets` AS d WHERE d.`request_id` = r.`id`)) FROM `{db}`.`datasets_tmp`)'
        sql += ' AND '
        sql += ' 0 NOT IN (SELECT (`id` IN (SELECT `block_id` FROM `{op}_request_blocks` AS b WHERE b.`request_id` = r.`id`)) FROM `{db}`.`blocks_tmp`)'
        self.history.db.query(sql.format(db = self.history.db.scratch_db, op = self.optype))

        self.history.db.drop_tmp_table('datasets_tmp')
        self.history.db.drop_tmp_table('blocks_tmp')
        self.history.db.drop_tmp_table('sites_tmp')

        return tmp_table_name

    def _make_registry_constraints(self, request_id, statuses, users, items, sites):
        constraints = []

        if request_id is not None:
            constraints.append('r.`id` = %d' % request_id)

        if statuses is not None:
            constraints.append('r.`status` IN ' + MySQL.stringify_sequence(statuses))

        if users is not None:
            constraints.append('r.`user` IN ' + MySQL.stringify_sequence(users))

        if items is not None or sites is not None:
            temp_table = self._make_temp_registry_tables(items, sites)
            constraints.append('r.`id` IN (SELECT `id` FROM {0})'.format(temp_table))

        if len(constraints) != 0:
            return ' WHERE ' + ' AND '.join(constraints)
        else:
            return ''

    def _make_history_constraints(self, request_id, statuses, users, items, sites):
        if users is not None:
            history_user_ids = self.history.db.select_many('users', 'id', 'name', users)
        else:
            history_user_ids = None

        if items is not None:
            history_dataset_ids, history_block_ids = self._get_saved_item_ids(items)
        else:
            history_dataset_ids = None
            history_block_ids = None

        if sites is not None:
            history_site_ids = self.history.db.select_many('sites', 'id', 'name', sites)
        else:
            history_site_ids = None

        constraints = []

        if request_id is not None:
            constraints.append('r.`id` = %d' % request_id)

        if statuses is not None:
            constraints.append('r.`status` IN ' + MySQL.stringify_sequence(statuses))

        if users is not None:
            constraints.append('r.`user_id` IN ' + MySQL.stringify_sequence(history_user_ids))

        if items is not None or sites is not None:
            temp_table = self._make_temp_history_tables(history_dataset_ids, history_block_ids, history_site_ids)
            constraints.append('r.`id` IN (SELECT `id` FROM {0})'.format(temp_table))

        if len(constraints) != 0:
            return ' WHERE ' + ' AND '.join(constraints)
        else:
            return ''