def __init__(self, config): AppManager.__init__(self, config) if not hasattr(self, '_mysql'): db_params = Configuration(config.db_params) db_params.reuse_connection = True # we use locks self._mysql = MySQL(db_params) # make sure applications row with id 0 exists count = self._mysql.query( 'SELECT COUNT(*) FROM `applications` WHERE `id` = 0')[0] if count == 0: # Cannot insert with id = 0 (will be interpreted as next auto_increment id unless server-wide setting is changed) # Inesrt with an implicit id first and update later columns = ('auth_level', 'title', 'path', 'status', 'user_id', 'user_host') values = (AppManager.LV_WRITE, 'wsgi', '', 'done', 0, '') insert_id = self._mysql.insert_get_id('applications', columns=columns, values=values) self._mysql.query( 'UPDATE `applications` SET `id` = 0 WHERE `id` = %s', insert_id)
def __init__(self, config): FileTransferOperation.__init__(self, config) FileTransferQuery.__init__(self, config) FileDeletionOperation.__init__(self, config) FileDeletionQuery.__init__(self, config) self.db = MySQL(config.db_params)
def __init__(self, config=None): if config is None: config = HistoryDatabase._config self.db = MySQL(config.db_params) self.set_read_only(config.get('read_only', False))
def __init__(self, config): TransactionHistoryInterface.__init__(self, config) self._mysql = MySQL(config.db_params) self._cache_db = MySQL(config.cache_db_params) self._site_id_map = {} self._dataset_id_map = {}
class RLFSMPhEDExReserveCopyInterface(CopyInterface): """ CopyInterface using the Dynamo RLFSM. """ def __init__(self, config = None): CopyInterface.__init__(self, config) self.rlfsm = RLFSM(config.get('rlfsm', None)) self.mysql = MySQL(config.reserve_db_params) def set_read_only(self, value = True): #override self._read_only = value self.rlfsm.set_read_only(value) def schedule_copies(self, replica_list, operation_id, comments = ''): #override sites = set(r.site for r in replica_list) if len(sites) != 1: raise OperationalError('schedule_copies should be called with a list of replicas at a single site.') LOG.info('Scheduling copy of %d replicas to %s using RLFSM (operation %d)', len(replica_list), list(sites)[0], operation_id) result = [] for replica in replica_list: # Function spec is to return clones (so that if specific block fails to copy, we can return a dataset replica without the block) clone_replica = DatasetReplica(replica.dataset, replica.site) clone_replica.copy(replica) result.append(clone_replica) for block_replica in replica.block_replicas: LOG.debug('Subscribing files for %s', str(block_replica)) if block_replica.file_ids is None: LOG.debug('No file to subscribe for %s', str(block_replica)) return all_files = block_replica.block.files missing_files = all_files - block_replica.files() for lfile in missing_files: self.rlfsm.subscribe_file(block_replica.site, lfile) clone_block_replica = BlockReplica(block_replica.block, block_replica.site, block_replica.group) clone_block_replica.copy(block_replica) clone_block_replica.last_update = int(time.time()) clone_replica.block_replicas.add(clone_block_replica) if not self._read_only: for clone_replica in result: if clone_replica.growing: self.mysql.query('INSERT INTO `phedex_transfer_reservations` (`operation_id`, `item`, `site`, `group`) VALUES (%s, %s, %s, %s)', operation_id, clone_replica.dataset.name, clone_replica.site.name, clone_replica.group.name) else: for block_replica in clone_replica.block_replicas: self.mysql.query('INSERT INTO `phedex_transfer_reservations` (`operation_id`, `item`, `site`, `group`) VALUES (%s, %s, %s, %s)', operation_id, block_replica.block.full_name(), clone_replica.site.name, block_replica.group.name) # no external dependency - everything is a success return result
def update(config, inventory): htcondor = HTCondor(config.htcondor.config) store = MySQL(config.store.db_params) last_update = store.query( 'SELECT UNIX_TIMESTAMP(`dataset_requests_last_update`) FROM `system`' )[0] try: store.query( 'UPDATE `system` SET `dataset_requests_last_update` = NOW()', retries=0, silent=True) except MySQLdb.OperationalError: # We have a read-only config read_only = True else: read_only = False source_records = GlobalQueueRequestHistory._get_source_records( htcondor, inventory, last_update) if not read_only: GlobalQueueRequestHistory._save_records(source_records, store) # remove old entries store.query( 'DELETE FROM `dataset_requests` WHERE `queue_time` < DATE_SUB(NOW(), INTERVAL 1 YEAR)' ) store.query( 'UPDATE `system` SET `dataset_requests_last_update` = NOW()')
def __init__(self, config = None): if config is None: if MySQLReplicaLock._default_config is None: raise ConfigurationError('MySQLReplicaLock default config is not set') config = MySQLReplicaLock._default_config self._mysql = MySQL(config.get('db_params', None)) self.users = [] for user_id, role_id in config.get('users', []): self.users.append((user_id, role_id))
def _create_lock(self, request, user, dn): service_id = 0 if 'service' in request: try: service_id = self.registry.db.query('SELECT `id` FROM `user_services` WHERE `name` = %s', request['service'])[0] except IndexError: pass columns = ('item', 'sites', 'groups', 'lock_date', 'expiration_date', 'user', 'dn', 'service_id', 'comment') comment = None if 'comment' in request: comment = request['comment'] values = [(request['item'], None, None, MySQL.bare('NOW()'), MySQL.bare('FROM_UNIXTIME(%d)' % request['expires']), user, dn, service_id, comment)] if 'sites' in request: new_values = [] for site in request['sites']: for v in values: new_values.append(v[:1] + (site,) + v[2:]) values = new_values if 'groups' in request: new_values = [] for group in request['groups']: for v in values: new_values.append(v[:2] + (group,) + v[3:]) values = new_values new_locks = [] for v in values: lock_id = self.registry.db.insert_get_id('detox_locks', columns, v) new_lock = { 'lockid': lock_id, 'user': user, 'dn': dn, 'item': request['item'], 'locked': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime()), 'expires': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(request['expires'])) } if v[7] != 0: new_lock['service'] = request['service'] if v[1] is not None: new_lock['sites'] = v[1] if v[2] is not None: new_lock['groups'] = v[2] if 'comment' in request: new_lock['comment'] = request['comment'] new_locks.append(new_lock) return new_locks
def create_request(self, caller, items, sites, sites_original, group, ncopies): now = int(time.time()) if self._read_only: return CopyRequest(0, caller.name, caller.dn, group, ncopies, 'new', now, now, 1) # Make an entry in registry columns = ('group', 'num_copies', 'user', 'dn', 'first_request_time', 'last_request_time') values = (group, ncopies, caller.name, caller.dn, MySQL.bare('FROM_UNIXTIME(%d)' % now), MySQL.bare('FROM_UNIXTIME(%d)' % now)) request_id = self.registry.db.insert_get_id('copy_requests', columns, values) mapping = lambda site: (request_id, site) self.registry.db.insert_many('copy_request_sites', ('request_id', 'site'), mapping, sites) mapping = lambda item: (request_id, item) self.registry.db.insert_many('copy_request_items', ('request_id', 'item'), mapping, items) # Make an entry in history history_user_ids = self.history.save_users([(caller.name, caller.dn)], get_ids=True) history_site_ids = self.history.save_sites(sites_original, get_ids=True) history_group_ids = self.history.save_groups([group], get_ids=True) history_dataset_ids, history_block_ids = self._save_items(items) sql = 'INSERT INTO `copy_requests` (`id`, `group_id`, `num_copies`, `user_id`, `request_time`)' sql += ' VALUES (%s, %s, %s, %s, FROM_UNIXTIME(%s))' self.history.db.query(sql, request_id, history_group_ids[0], ncopies, history_user_ids[0], now) mapping = lambda sid: (request_id, sid) self.history.db.insert_many('copy_request_sites', ('request_id', 'site_id'), mapping, history_site_ids) mapping = lambda did: (request_id, did) self.history.db.insert_many('copy_request_datasets', ('request_id', 'dataset_id'), mapping, history_dataset_ids) mapping = lambda bid: (request_id, bid) self.history.db.insert_many('copy_request_blocks', ('request_id', 'block_id'), mapping, history_block_ids) return self.get_requests(request_id=request_id)[request_id]
def _do_get_next_application(self, read_only, blocked_apps): #override sql = 'SELECT `applications`.`id`, 0+`auth_level`, `title`, `path`, `args`, `timeout`, `users`.`name`, `user_host` FROM `applications`' sql += ' INNER JOIN `users` ON `users`.`id` = `applications`.`user_id`' sql += ' WHERE `status` = \'new\'' if read_only: sql += ' AND `auth_level` != \'write\'' if len(blocked_apps) != 0: sql += ' AND `title` NOT IN %s' % MySQL.stringify_sequence( blocked_apps) sql += ' ORDER BY `applications`.`id` LIMIT 1' result = self._mysql.query(sql) if len(result) == 0: return None else: appid, auth_level, title, path, args, timeout, uname, uhost = result[ 0] return { 'appid': appid, 'auth_level': auth_level, 'user_name': uname, 'user_host': uhost, 'title': title, 'path': path, 'args': args, 'timeout': timeout }
def __init__(self, config): UpdateBoard.__init__(self, config) db_params = Configuration(config.db_params) db_params.reuse_connection = True # we use locks self._mysql = MySQL(db_params)
def new_cycle(self, partition, policy_text, comment='', test=False): """ Set up a new deletion cycle for the partition. @param partition Partition name string @param policy_text Full text of the policy @param comment Comment string @param test If True, create a deletion_test cycle. @return cycle number. """ if self._read_only: return 0 part_id = self.save_partitions([partition], get_ids=True)[0] policy_id = self.save_policy(policy_text) if test: operation_str = 'deletion_test' else: operation_str = 'deletion' columns = ('operation', 'partition_id', 'policy_id', 'comment', 'time_start') values = (operation_str, part_id, policy_id, comment, MySQL.bare('NOW()')) return self.db.insert_get_id('deletion_cycles', columns=columns, values=values)
def create_request(self, caller, items, sites): now = int(time.time()) if self._read_only: return DeletionRequest(0, caller.name, caller.dn, 'new', now, None) # Make an entry in registry columns = ('user', 'dn', 'request_time') values = (caller.name, caller.dn, MySQL.bare('FROM_UNIXTIME(%d)' % now)) request_id = self.registry.db.insert_get_id('deletion_requests', columns, values) mapping = lambda site: (request_id, site) self.registry.db.insert_many('deletion_request_sites', ('request_id', 'site'), mapping, sites) mapping = lambda item: (request_id, item) self.registry.db.insert_many('deletion_request_items', ('request_id', 'item'), mapping, items) # Make an entry in history history_user_ids = self.history.save_users([(caller.name, caller.dn)], get_ids = True) history_site_ids = self.history.save_sites(sites, get_ids = True) history_dataset_ids, history_block_ids = self._save_items(items) sql = 'INSERT INTO `deletion_requests` (`id`, `user_id`, `request_time`)' sql += ' SELECT %s, u.`id`, FROM_UNIXTIME(%s) FROM `groups` AS g, `users` AS u' sql += ' WHERE u.`dn` = %s' self.history.db.query(sql, request_id, now, caller.dn) mapping = lambda sid: (request_id, sid) self.history.db.insert_many('deletion_request_sites', ('request_id', 'site_id'), mapping, history_site_ids) mapping = lambda did: (request_id, did) self.history.db.insert_many('deletion_request_datasets', ('request_id', 'dataset_id'), mapping, history_dataset_ids) mapping = lambda bid: (request_id, bid) self.history.db.insert_many('deletion_request_blocks', ('request_id', 'block_id'), mapping, history_block_ids) return self.get_requests(request_id = request_id)[request_id]
def new_cycle(self, partition, policy_text, comment = '', test = False): """ Set up a new deletion cycle for the partition. @param partition Partition name string @param policy_text Full text of the policy @param comment Comment string @param test If True, create a deletion_test cycle. @return cycle number. """ if self._read_only: return 0 part_id = self.save_partitions([partition], get_ids = True)[0] policy_id = self.save_policy(policy_text) if test: operation_str = 'deletion_test' else: operation_str = 'deletion' columns = ('operation', 'partition_id', 'policy_id', 'comment', 'time_start') values = (operation_str, part_id, policy_id, comment, MySQL.bare('NOW()')) return self.db.insert_get_id('deletion_cycles', columns = columns, values = values)
def create_request(self, caller, items, sites, sites_original, group, ncopies): now = int(time.time()) if self._read_only: return CopyRequest(0, caller.name, caller.dn, group, ncopies, 'new', now, now, 1) # Make an entry in registry columns = ('group', 'num_copies', 'user', 'dn', 'first_request_time', 'last_request_time') values = (group, ncopies, caller.name, caller.dn, MySQL.bare('FROM_UNIXTIME(%d)' % now), MySQL.bare('FROM_UNIXTIME(%d)' % now)) request_id = self.registry.db.insert_get_id('copy_requests', columns, values) mapping = lambda site: (request_id, site) self.registry.db.insert_many('copy_request_sites', ('request_id', 'site'), mapping, sites) mapping = lambda item: (request_id, item) self.registry.db.insert_many('copy_request_items', ('request_id', 'item'), mapping, items) # Make an entry in history history_user_ids = self.history.save_users([(caller.name, caller.dn)], get_ids = True) history_site_ids = self.history.save_sites(sites_original, get_ids = True) history_group_ids = self.history.save_groups([group], get_ids = True) history_dataset_ids, history_block_ids = self._save_items(items) sql = 'INSERT INTO `copy_requests` (`id`, `group_id`, `num_copies`, `user_id`, `request_time`)' sql += ' VALUES (%s, %s, %s, %s, FROM_UNIXTIME(%s))' self.history.db.query(sql, request_id, history_group_ids[0], ncopies, history_user_ids[0], now) mapping = lambda sid: (request_id, sid) self.history.db.insert_many('copy_request_sites', ('request_id', 'site_id'), mapping, history_site_ids) mapping = lambda did: (request_id, did) self.history.db.insert_many('copy_request_datasets', ('request_id', 'dataset_id'), mapping, history_dataset_ids) mapping = lambda bid: (request_id, bid) self.history.db.insert_many('copy_request_blocks', ('request_id', 'block_id'), mapping, history_block_ids) return self.get_requests(request_id = request_id)[request_id]
def __init__(self, config = None): if config is None: config = HistoryDatabase._config self.db = MySQL(config.db_params) self.set_read_only(config.get('read_only', False))
def __init__(self, config): FileTransferOperation.__init__(self, config) FileTransferQuery.__init__(self, config) FileDeletionOperation.__init__(self, config) FileDeletionQuery.__init__(self, config) self.server_url = config.fts_server self.server_id = 0 # server id in the DB # Parameter "retry" for fts3.new_job. 0 = server default self.fts_retry = config.get('fts_retry', 0) # String passed to fts3.new_*_job(metadata = _) self.metadata_string = config.get('metadata_string', 'Dynamo') # Proxy to be forwarded to FTS self.x509proxy = config.get('x509proxy', None) self.x509proxy_orig = config.get('x509proxy', None) # Bookkeeping device self.db = MySQL(config.db_params) # Reuse the context object self.keep_context = config.get('keep_context', False) self._context = None
def new_cycle(self, partition, comment='', test=False): """ Set up a new copy cycle for the partition. @param partition partition name string @param comment comment string @param test if True, create a copy_test cycle. @return cycle number. """ if self._read_only: return 0 part_id = self.save_partitions([partition], get_ids=True)[0] if test: operation_str = 'copy_test' else: operation_str = 'copy' columns = ('operation', 'partition_id', 'comment', 'time_start') values = (operation_str, part_id, comment, MySQL.bare('NOW()')) return self.db.insert_get_id('copy_cycles', columns=columns, values=values)
def __init__(self, config): Authorizer.__init__(self, config) if not hasattr(self, '_mysql'): db_params = Configuration(config.db_params) db_params.reuse_connection = True # we use locks self._mysql = MySQL(db_params)
def _make_history_constraints(self, request_id, statuses, users, items, sites): if users is not None: history_user_ids = self.history.db.select_many( 'users', 'id', 'name', users) else: history_user_ids = None if items is not None: history_dataset_ids, history_block_ids = self._get_saved_item_ids( items) else: history_dataset_ids = None history_block_ids = None if sites is not None: history_site_ids = self.history.db.select_many( 'sites', 'id', 'name', sites) else: history_site_ids = None constraints = [] if request_id is not None: constraints.append('r.`id` = %d' % request_id) if statuses is not None: constraints.append('r.`status` IN ' + MySQL.stringify_sequence(statuses)) if users is not None: constraints.append('r.`user_id` IN ' + MySQL.stringify_sequence(history_user_ids)) if items is not None or sites is not None: temp_table = self._make_temp_history_tables( history_dataset_ids, history_block_ids, history_site_ids) constraints.append( 'r.`id` IN (SELECT `id` FROM {0})'.format(temp_table)) if len(constraints) != 0: return ' WHERE ' + ' AND '.join(constraints) else: return ''
def _make_registry_constraints(self, request_id, statuses, users, items, sites): constraints = [] if request_id is not None: constraints.append('r.`id` = %d' % request_id) if statuses is not None: constraints.append('r.`status` IN ' + MySQL.stringify_sequence(statuses)) if users is not None: constraints.append('r.`user` IN ' + MySQL.stringify_sequence(users)) if items is not None or sites is not None: temp_table = self._make_temp_registry_tables(items, sites) constraints.append('r.`id` IN (SELECT `id` FROM {0})'.format(temp_table)) if len(constraints) != 0: return ' WHERE ' + ' AND '.join(constraints) else: return ''
def make_entry(self, site_name): if self._read_only: operation_id = 0 else: site_id = self.save_sites([site_name], get_ids=True)[0] operation_id = self.db.insert_get_id('copy_operations', columns=('timestamp', 'site_id'), values=(MySQL.bare('NOW()'), site_id)) return HistoryRecord(HistoryRecord.OP_COPY, operation_id, site_name, int(time.time()))
def _connect(self): #override if self._host == 'localhost' or self._host == socket.gethostname(): # This is the master server; wipe the table clean self._mysql.query('DELETE FROM `servers`') self._mysql.query('ALTER TABLE `servers` AUTO_INCREMENT = 1') else: self._mysql.query('DELETE FROM `servers` WHERE `hostname` = %s', socket.gethostname()) # id of this server self._server_id = self._mysql.insert_get_id( 'servers', columns=('hostname', 'last_heartbeat'), values=(socket.gethostname(), MySQL.bare('NOW()')))
def __init__(self, config): FileTransferOperation.__init__(self, config) FileTransferQuery.__init__(self, config) FileDeletionOperation.__init__(self, config) FileDeletionQuery.__init__(self, config) self.server_url = config.fts_server self.server_id = 0 # server id in the DB # Parameter "retry" for fts3.new_job. 0 = server default self.fts_retry = config.get('fts_retry', 0) # String passed to fts3.new_*_job(metadata = _) self.metadata_string = config.get('metadata_string', 'Dynamo') # Proxy to be forwarded to FTS self.x509proxy = config.get('x509proxy', None) # Bookkeeping device self.db = MySQL(config.db_params) # Reuse the context object self.keep_context = config.get('keep_context', True) self._context = None
def _make_registry_constraints(self, request_id, statuses, users, items, sites): constraints = [] if request_id is not None: constraints.append('r.`id` = %d' % request_id) if statuses is not None: constraints.append('r.`status` IN ' + MySQL.stringify_sequence(statuses)) if users is not None: constraints.append('r.`user` IN ' + MySQL.stringify_sequence(users)) if items is not None or sites is not None: temp_table = self._make_temp_registry_tables(items, sites) constraints.append( 'r.`id` IN (SELECT `id` FROM {0})'.format(temp_table)) if len(constraints) != 0: return ' WHERE ' + ' AND '.join(constraints) else: return ''
def create_cached_request(self, caller, item, sites_original, group, ncopies): now = int(time.time()) # Make an entry in registry columns = ('item', 'sites', 'group', 'num_copies', 'user', 'dn', 'request_time', 'status') values = (item, sites_original, group, ncopies, caller.name, caller.dn, MySQL.bare('FROM_UNIXTIME(%d)' % now), 'new') LOG.info(values) cached_request_id = self.registry.db.insert_get_id('cached_copy_requests', columns, values) return_dict = {} return_dict['request_id'] = cached_request_id return_dict['item'] = item return_dict['sites'] = sites_original return return_dict
def _make_history_constraints(self, request_id, statuses, users, items, sites): if users is not None: history_user_ids = self.history.db.select_many('users', 'id', 'name', users) else: history_user_ids = None if items is not None: history_dataset_ids, history_block_ids = self._get_saved_item_ids(items) else: history_dataset_ids = None history_block_ids = None if sites is not None: history_site_ids = self.history.db.select_many('sites', 'id', 'name', sites) else: history_site_ids = None constraints = [] if request_id is not None: constraints.append('r.`id` = %d' % request_id) if statuses is not None: constraints.append('r.`status` IN ' + MySQL.stringify_sequence(statuses)) if users is not None: constraints.append('r.`user_id` IN ' + MySQL.stringify_sequence(history_user_ids)) if items is not None or sites is not None: temp_table = self._make_temp_history_tables(history_dataset_ids, history_block_ids, history_site_ids) constraints.append('r.`id` IN (SELECT `id` FROM {0})'.format(temp_table)) if len(constraints) != 0: return ' WHERE ' + ' AND '.join(constraints) else: return ''
def _cancel(self, task_ids, optype): sql = 'SELECT b.`job_id`, f.`fts_file_id` FROM `fts_{op}_tasks` AS f' sql += ' INNER JOIN `fts_{op}_batches` AS b ON b.`id` = f.`fts_batch_id`' result = self.db.execute_many(sql.format(op = optype), MySQL.bare('f.`id`'), task_ids) by_job = collections.defaultdict(list) for job_id, file_id in result: by_job[job_id].append(file_id) if not self._read_only: for job_id, ids in by_job.iteritems(): try: self._ftscall('cancel', job_id, file_ids = ids) except: LOG.error('Failed to cancel FTS job %s', job_id)
def save_policy(self, policy_text): md5 = hashlib.md5(policy_text).hexdigest() result = self.db.query( 'SELECT `id`, `text` FROM `deletion_policies` WHERE `hash` = UNHEX(%s)', md5) for policy_id, text in result: if text == policy_text: return policy_id # no row with matching hash or no row with matching text although hash matches (basically impossible) # new policy columns = ('hash', 'text') return self.db.insert_get_id('deletion_policies', columns=columns, values=(MySQL.bare('UNHEX(\'%s\')' % md5), policy_text))
def _cancel(self, task_ids, optype): sql = 'SELECT b.`job_id`, f.`fts_file_id` FROM `fts_{op}_tasks` AS f' sql += ' INNER JOIN `fts_{op}_batches` AS b ON b.`id` = f.`fts_batch_id`' result = self.db.execute_many(sql.format(op=optype), MySQL.bare('f.`id`'), task_ids) by_job = collections.defaultdict(list) for job_id, file_id in result: by_job[job_id].append(file_id) if not self._read_only: for job_id, ids in by_job.iteritems(): try: self._ftscall('cancel', job_id, file_ids=ids) except: LOG.error('Failed to cancel FTS job %s', job_id)
def create_request(self, caller, items, sites): now = int(time.time()) if self._read_only: return DeletionRequest(0, caller.name, caller.dn, 'new', now, None) # Make an entry in registry columns = ('user', 'dn', 'request_time') values = (caller.name, caller.dn, MySQL.bare('FROM_UNIXTIME(%d)' % now)) request_id = self.registry.db.insert_get_id('deletion_requests', columns, values) mapping = lambda site: (request_id, site) self.registry.db.insert_many('deletion_request_sites', ('request_id', 'site'), mapping, sites) mapping = lambda item: (request_id, item) self.registry.db.insert_many('deletion_request_items', ('request_id', 'item'), mapping, items) # Make an entry in history history_user_ids = self.history.save_users([(caller.name, caller.dn)], get_ids=True) history_site_ids = self.history.save_sites(sites, get_ids=True) history_dataset_ids, history_block_ids = self._save_items(items) sql = 'INSERT INTO `deletion_requests` (`id`, `user_id`, `request_time`)' sql += ' SELECT %s, u.`id`, FROM_UNIXTIME(%s) FROM `groups` AS g, `users` AS u' sql += ' WHERE u.`dn` = %s' self.history.db.query(sql, request_id, now, caller.dn) mapping = lambda sid: (request_id, sid) self.history.db.insert_many('deletion_request_sites', ('request_id', 'site_id'), mapping, history_site_ids) mapping = lambda did: (request_id, did) self.history.db.insert_many('deletion_request_datasets', ('request_id', 'dataset_id'), mapping, history_dataset_ids) mapping = lambda bid: (request_id, bid) self.history.db.insert_many('deletion_request_blocks', ('request_id', 'block_id'), mapping, history_block_ids) return self.get_requests(request_id=request_id)[request_id]
def update(config, inventory): popdb = PopDB(config.popdb.config) store = MySQL(config.store.db_params) last_update = store.query( 'SELECT UNIX_TIMESTAMP(`dataset_accesses_last_update`) FROM `system`' )[0] try: store.query( 'UPDATE `system` SET `dataset_accesses_last_update` = NOW()', retries=0, silent=True) except MySQLdb.OperationalError: # We have a read-only config read_only = True LOG.info('Running update() in read-only mode.') else: read_only = False start_time = max(last_update, (time.time() - 3600 * 24 * config.max_back_query)) start_date = datetime.date(*time.gmtime(start_time)[:3]) included_sites = list(config.included_sites) excluded_sites = list(config.excluded_sites) source_records = CRABAccessHistory._get_source_records( popdb, inventory, included_sites, excluded_sites, start_date) if not read_only: CRABAccessHistory._save_records(source_records, store) # remove old entries store.query( 'DELETE FROM `dataset_accesses` WHERE `date` < DATE_SUB(NOW(), INTERVAL 2 YEAR)' ) store.query( 'UPDATE `system` SET `dataset_accesses_last_update` = NOW()')
def new_cycle(self, partition, comment = '', test = False): """ Set up a new copy cycle for the partition. @param partition partition name string @param comment comment string @param test if True, create a copy_test cycle. @return cycle number. """ if self._read_only: return 0 part_id = self.save_partitions([partition], get_ids = True)[0] if test: operation_str = 'copy_test' else: operation_str = 'copy' columns = ('operation', 'partition_id', 'comment', 'time_start') values = (operation_str, part_id, comment, MySQL.bare('NOW()')) return self.db.insert_get_id('copy_cycles', columns = columns, values = values)
class MySQLUpdateBoard(UpdateBoard): def __init__(self, config): UpdateBoard.__init__(self, config) db_params = Configuration(config.db_params) db_params.reuse_connection = True # we use locks self._mysql = MySQL(db_params) def lock(self): #override self._mysql.lock_tables(write = ['inventory_updates']) def unlock(self): #override self._mysql.unlock_tables() def get_updates(self): #override for cmd, obj in self._mysql.xquery('SELECT `cmd`, `obj` FROM `inventory_updates` ORDER BY `id`'): if cmd == 'update': yield DynamoInventory.CMD_UPDATE, obj elif cmd == 'delete': yield DynamoInventory.CMD_DELETE, obj def flush(self): #override self._mysql.query('DELETE FROM `inventory_updates`') self._mysql.query('ALTER TABLE `inventory_updates` AUTO_INCREMENT = 1') def write_updates(self, update_commands): #override self._mysql.lock_tables(write = ['inventory_updates']) try: sql = 'INSERT INTO `inventory_updates` (`cmd`, `obj`) VALUES (%s, %s)' for cmd, sobj in update_commands: if cmd == DynamoInventory.CMD_UPDATE: self._mysql.query(sql, 'update', sobj) elif cmd == DynamoInventory.CMD_DELETE: self._mysql.query(sql, 'delete', sobj) finally: self._mysql.unlock_tables() def disconnect(self): self._mysql.close()
class StandaloneFileOperation(FileTransferOperation, FileTransferQuery, FileDeletionOperation, FileDeletionQuery): """ Interface to in-house transfer & deletion daemon using MySQL for bookkeeping. """ def __init__(self, config): FileTransferOperation.__init__(self, config) FileTransferQuery.__init__(self, config) FileDeletionOperation.__init__(self, config) FileDeletionQuery.__init__(self, config) self.db = MySQL(config.db_params) def num_pending_transfers(self): #override # FOD can throttle itself. return 0 def num_pending_deletions(self): #override # FOD can throttle itself. return 0 def form_batches(self, tasks): #override if len(tasks) == 0: return [] if hasattr(tasks[0], 'source'): # These are transfer tasks by_endpoints = collections.defaultdict(list) for task in tasks: endpoints = (task.source, task.subscription.destination) by_endpoints[endpoints].append(task) return by_endpoints.values() else: by_endpoint = collections.defaultdict(list) for task in tasks: by_endpoint[task.desubscription.site].append(task) return by_endpoint.values() def start_transfers(self, batch_id, batch_tasks): #override if len(batch_tasks) == 0: return {} result = {} # tasks should all have the same source and destination source = batch_tasks[0].source destination = batch_tasks[0].subscription.destination fields = ('id', 'source', 'destination', 'checksum_algo', 'checksum') def yield_task_entry(): for task in batch_tasks: lfile = task.subscription.file lfn = lfile.lfn source_pfn = source.to_pfn(lfn, 'gfal2') dest_pfn = destination.to_pfn(lfn, 'gfal2') if source_pfn is None or dest_pfn is None: # either gfal2 is not supported or lfn could not be mapped result[task] = False continue if self.checksum_algorithm: checksum = (self.checksum_algorithm, str(lfile.checksum[self.checksum_index])) else: checksum = (None, None) result[task] = True yield (task.id, source_pfn, dest_pfn) + checksum if not self._read_only: sql = 'INSERT INTO `standalone_transfer_batches` (`batch_id`, `source_site`, `destination_site`) VALUES (%s, %s, %s)' self.db.query(sql, batch_id, source.name, destination.name) self.db.insert_many('standalone_transfer_tasks', fields, None, yield_task_entry()) LOG.debug('Inserted %d entries to standalone_transfer_tasks for batch %d.', len(batch_tasks), batch_id) return result def start_deletions(self, batch_id, batch_tasks): #override if len(batch_tasks) == 0: return {} result = {} # tasks should all have the same target site site = batch_tasks[0].desubscription.site fields = ('id', 'file') def yield_task_entry(): for task in batch_tasks: lfn = task.desubscription.file.lfn pfn = site.to_pfn(lfn, 'gfal2') if pfn is None: # either gfal2 is not supported or lfn could not be mapped result[task] = False continue result[task] = True yield (task.id, pfn) if not self._read_only: sql = 'INSERT INTO `standalone_deletion_batches` (`batch_id`, `site`) VALUES (%s, %s)' self.db.query(sql, batch_id, site.name) self.db.insert_many('standalone_deletion_tasks', fields, None, yield_task_entry()) LOG.debug('Inserted %d entries to standalone_deletion_tasks for batch %d.', len(batch_tasks), batch_id) return result def cancel_transfers(self, task_ids): #override return self._cancel(task_ids, 'transfer') def cancel_deletions(self, task_ids): #override return self._cancel(task_ids, 'deletion') def cleanup(self): #override sql = 'DELETE FROM f USING `standalone_transfer_tasks` AS f LEFT JOIN `transfer_tasks` AS t ON t.`id` = f.`id` WHERE t.`id` IS NULL' self.db.query(sql) sql = 'DELETE FROM f USING `standalone_deletion_tasks` AS f LEFT JOIN `deletion_tasks` AS t ON t.`id` = f.`id` WHERE t.`id` IS NULL' self.db.query(sql) sql = 'DELETE FROM f USING `standalone_transfer_batches` AS f LEFT JOIN `transfer_batches` AS t ON t.`id` = f.`batch_id` WHERE t.`id` IS NULL' self.db.query(sql) sql = 'DELETE FROM f USING `standalone_deletion_batches` AS f LEFT JOIN `deletion_batches` AS t ON t.`id` = f.`batch_id` WHERE t.`id` IS NULL' self.db.query(sql) # Delete the source tasks - caution: wipes out all tasks when switching the operation backend sql = 'DELETE FROM t USING `transfer_tasks` AS t' sql += ' LEFT JOIN `standalone_transfer_tasks` AS f ON f.`id` = t.`id`' sql += ' WHERE f.`id` IS NULL' self.db.query(sql) sql = 'DELETE FROM t USING `deletion_tasks` AS t' sql += ' LEFT JOIN `standalone_deletion_tasks` AS f ON f.`id` = t.`id`' sql += ' WHERE f.`id` IS NULL' self.db.query(sql) def get_transfer_status(self, batch_id): #override return self._get_status(batch_id, 'transfer') def get_deletion_status(self, batch_id): #override return self._get_status(batch_id, 'deletion') def write_transfer_history(self, history_db, task_id, history_id): #override pass def write_deletion_history(self, history_db, task_id, history_id): #override pass def forget_transfer_status(self, task_id): #override return self._forget_status(task_id, 'transfer') def forget_deletion_status(self, task_id): #override return self._forget_status(task_id, 'deletion') def forget_transfer_batch(self, batch_id): #override return self._forget_batch(batch_id, 'transfer') def forget_deletion_batch(self, batch_id): #override return self._forget_batch(batch_id, 'deletion') def _cancel(self, task_ids, optype): sql = 'UPDATE `standalone_{op}_tasks` SET `status` = \'cancelled\''.format(op = optype) self.db.execute_many(sql, 'id', task_ids, ['`status` IN (\'new\', \'queued\')']) def _get_status(self, batch_id, optype): sql = 'SELECT q.`id`, a.`status`, a.`exitcode`, a.`message`, UNIX_TIMESTAMP(a.`start_time`), UNIX_TIMESTAMP(a.`finish_time`) FROM `standalone_{op}_tasks` AS a' sql += ' INNER JOIN `{op}_tasks` AS q ON q.`id` = a.`id`' sql += ' WHERE q.`batch_id` = %s' sql = sql.format(op = optype) return [(i, FileQuery.status_val(s), c, m, t, f) for (i, s, c, m, t, f) in self.db.xquery(sql, batch_id)] def _forget_status(self, task_id, optype): if self._read_only: return sql = 'DELETE FROM `standalone_{op}_tasks` WHERE `id` = %s'.format(op = optype) self.db.query(sql, task_id) def _forget_batch(self, batch_id, optype): if self._read_only: return sql = 'DELETE FROM `standalone_{op}_batches` WHERE `batch_id` = %s' self.db.query(sql.format(op = optype), batch_id)
class FTSFileOperation(FileTransferOperation, FileTransferQuery, FileDeletionOperation, FileDeletionQuery): def __init__(self, config): FileTransferOperation.__init__(self, config) FileTransferQuery.__init__(self, config) FileDeletionOperation.__init__(self, config) FileDeletionQuery.__init__(self, config) self.server_url = config.fts_server self.server_id = 0 # server id in the DB # Parameter "retry" for fts3.new_job. 0 = server default self.fts_retry = config.get('fts_retry', 0) # String passed to fts3.new_*_job(metadata = _) self.metadata_string = config.get('metadata_string', 'Dynamo') # Proxy to be forwarded to FTS self.x509proxy = config.get('x509proxy', None) # Bookkeeping device self.db = MySQL(config.db_params) # Reuse the context object self.keep_context = config.get('keep_context', True) self._context = None def num_pending_transfers(self): #override # Check the number of files in queue # We first thought about counting files with /files, but FTS seems to return only 1000 maximum even when "limit" is set much larger #files = self._ftscallurl('/files?state_in=ACTIVE,SUBMITTED,READY&limit=%d' % self.max_pending_transfers) #return len(files) num_pending = 0 file_states = ['SUBMITTED', 'READY', 'ACTIVE', 'STAGING', 'STARTED'] jobs = self._ftscall('list_jobs', state_in=['SUBMITTED', 'ACTIVE', 'STAGING']) for job in jobs: job_info = self._ftscall('get_job_status', job['job_id'], list_files=True) for file_info in job_info['files']: if file_info['file_state'] in file_states: num_pending += 1 if num_pending == self.max_pending_transfers + 1: # don't need to query more return num_pending return num_pending def num_pending_deletions(self): #override # See above #files = self._ftscallurl('/files?state_in=ACTIVE,SUBMITTED,READY&limit=%d' % self.max_pending_deletions) #return len(files) num_pending = 0 file_states = ['SUBMITTED', 'READY', 'ACTIVE'] jobs = self._ftscall('list_jobs', state_in=['SUBMITTED', 'ACTIVE']) for job in jobs: job_info = self._ftscall('get_job_status', job['job_id'], list_files=True) for file_info in job_info['dm']: if file_info['file_state'] in file_states: num_pending += 1 if num_pending == self.max_pending_deletions + 1: # don't need to query more return num_pending return num_pending def form_batches(self, tasks): #override if len(tasks) == 0: return [] # FTS3 has no restriction on how to group the transfers, but cannot apparently take thousands # of tasks at once batches = [[]] for task in tasks: batches[-1].append(task) if len(batches[-1]) == self.batch_size: batches.append([]) return batches def start_transfers(self, batch_id, batch_tasks): #override result = {} stage_files = [] transfers = [] s_pfn_to_task = {} t_pfn_to_task = {} for task in batch_tasks: sub = task.subscription lfn = sub.file.lfn dest_pfn = sub.destination.to_pfn(lfn, 'gfal2') source_pfn = task.source.to_pfn(lfn, 'gfal2') if dest_pfn is None or source_pfn is None: # either gfal2 is not supported or lfn could not be mapped LOG.warning('Could not obtain PFN for %s at %s or %s', lfn, sub.destination.name, task.source.name) result[task] = False continue if self.checksum_algorithm: checksum = '%s:%s' % ( self.checksum_algorithm, str(sub.file.checksum[self.checksum_index])) verify_checksum = 'target' else: checksum = None verify_checksum = False if task.source.storage_type == Site.TYPE_MSS: LOG.debug('Staging %s at %s', lfn, task.source.name) # need to stage first stage_files.append( (source_pfn, dest_pfn, checksum, sub.file.size)) # task identified by the source PFN s_pfn_to_task[source_pfn] = task else: LOG.debug('Submitting transfer of %s from %s to %s to FTS', lfn, task.source.name, sub.destination.name) transfers.append( fts3.new_transfer(source_pfn, dest_pfn, checksum=checksum, filesize=sub.file.size)) # there should be only one task per destination pfn t_pfn_to_task[dest_pfn] = task if len(stage_files) != 0: LOG.debug('Submit new staging job for %d files', len(stage_files)) job = fts3.new_staging_job([ff[0] for ff in stage_files], bring_online=36000, metadata=self.metadata_string) success = self._submit_job( job, 'staging', batch_id, dict( (pfn, task.id) for pfn, task in s_pfn_to_task.iteritems())) for source_pfn, _, _, _ in stage_files: result[s_pfn_to_task[source_pfn]] = success if success and not self._read_only: LOG.debug('Recording staging queue') fields = ('id', 'source', 'destination', 'checksum', 'size') mapping = lambda ff: (s_pfn_to_task[ff[0]].id, ) + ff if not self._read_only: self.db.insert_many('fts_staging_queue', fields, mapping, stage_files) if len(transfers) != 0: LOG.debug('Submit new transfer job for %d files', len(transfers)) job = fts3.new_job(transfers, retry=self.fts_retry, overwrite=True, verify_checksum=verify_checksum, metadata=self.metadata_string) success = self._submit_job( job, 'transfer', batch_id, dict( (pfn, task.id) for pfn, task in t_pfn_to_task.iteritems())) for transfer in transfers: dest_pfn = transfer['destinations'][0] result[t_pfn_to_task[dest_pfn]] = success return result def start_deletions(self, batch_id, batch_tasks): #override result = {} pfn_to_task = {} for task in batch_tasks: desub = task.desubscription lfn = desub.file.lfn pfn = desub.site.to_pfn(lfn, 'gfal2') if pfn is None: # either gfal2 is not supported or lfn could not be mapped result[task] = False continue # there should be only one task per destination pfn pfn_to_task[pfn] = task job = fts3.new_delete_job(pfn_to_task.keys(), metadata=self.metadata_string) success = self._submit_job( job, 'deletion', batch_id, dict((pfn, task.id) for pfn, task in pfn_to_task.iteritems())) for task in pfn_to_task.itervalues(): result[task] = success return result def cancel_transfers(self, task_ids): #override return self._cancel(task_ids, 'transfer') def cancel_deletions(self, task_ids): #override return self._cancel(task_ids, 'deletion') def cleanup(self): #override sql = 'DELETE FROM f USING `fts_transfer_tasks` AS f' sql += ' LEFT JOIN `transfer_tasks` AS t ON t.`id` = f.`id`' sql += ' LEFT JOIN `fts_transfer_batches` AS b ON b.`id` = f.`fts_batch_id`' sql += ' WHERE t.`id` IS NULL OR b.`id` IS NULL' self.db.query(sql) sql = 'DELETE FROM f USING `fts_staging_queue` AS f' sql += ' LEFT JOIN `fts_transfer_tasks` AS t ON t.`id` = f.`id`' sql += ' WHERE t.`id` IS NULL' self.db.query(sql) sql = 'DELETE FROM f USING `fts_deletion_tasks` AS f' sql += ' LEFT JOIN `deletion_tasks` AS t ON t.`id` = f.`id`' sql += ' LEFT JOIN `fts_deletion_batches` AS b ON b.`id` = f.`fts_batch_id`' sql += ' WHERE t.`id` IS NULL OR b.`id` IS NULL' self.db.query(sql) sql = 'DELETE FROM f USING `fts_transfer_batches` AS f' sql += ' LEFT JOIN `transfer_batches` AS t ON t.`id` = f.`batch_id`' sql += ' WHERE t.`id` IS NULL' self.db.query(sql) sql = 'DELETE FROM f USING `fts_deletion_batches` AS f' sql += ' LEFT JOIN `deletion_batches` AS t ON t.`id` = f.`batch_id`' sql += ' WHERE t.`id` IS NULL' self.db.query(sql) # Delete the source tasks - caution: wipes out all tasks when switching the operation backend sql = 'DELETE FROM t USING `transfer_tasks` AS t' sql += ' LEFT JOIN `fts_transfer_tasks` AS f ON f.`id` = t.`id`' sql += ' WHERE f.`id` IS NULL' self.db.query(sql) sql = 'DELETE FROM t USING `deletion_tasks` AS t' sql += ' LEFT JOIN `fts_deletion_tasks` AS f ON f.`id` = t.`id`' sql += ' WHERE f.`id` IS NULL' self.db.query(sql) def get_transfer_status(self, batch_id): #override if self.server_id == 0: self._set_server_id() results = self._get_status(batch_id, 'transfer') staged_tasks = [] for task_id, status, exitcode, msg, start_time, finish_time in self._get_status( batch_id, 'staging'): if status == FileQuery.STAT_DONE: staged_tasks.append(task_id) results.append( (task_id, FileQuery.STAT_QUEUED, -1, None, None, None)) else: # these tasks won't appear in results from _get_status('transfer') # because no transfer jobs have been submitted yet results.append( (task_id, status, exitcode, None, start_time, finish_time)) if len(staged_tasks) != 0: transfers = [] pfn_to_tid = {} for task_id, source_pfn, dest_pfn, checksum, filesize in self.db.select_many( 'fts_staging_queue', ('id', 'source', 'destination', 'checksum', 'size'), 'id', staged_tasks): transfers.append( fts3.new_transfer(source_pfn, dest_pfn, checksum=checksum, filesize=filesize)) pfn_to_tid[dest_pfn] = task_id if self.checksum_algorithm: verify_checksum = 'target' else: verify_checksum = None job = fts3.new_job(transfers, retry=self.fts_retry, overwrite=True, verify_checksum=verify_checksum, metadata=self.metadata_string) success = self._submit_job(job, 'transfer', batch_id, pfn_to_tid) if success and not self._read_only: self.db.delete_many('fts_staging_queue', 'id', pfn_to_tid.values()) return results def get_deletion_status(self, batch_id): #override if self.server_id == 0: self._set_server_id() return self._get_status(batch_id, 'deletion') def write_transfer_history(self, history_db, task_id, history_id): #override self._write_history(history_db, task_id, history_id, 'transfer') def write_deletion_history(self, history_db, task_id, history_id): #override self._write_history(history_db, task_id, history_id, 'deletion') def forget_transfer_status(self, task_id): #override return self._forget_status(task_id, 'transfer') def forget_deletion_status(self, task_id): #override return self._forget_status(task_id, 'deletion') def forget_transfer_batch(self, task_id): #override return self._forget_batch(task_id, 'transfer') def forget_deletion_batch(self, task_id): #override return self._forget_batch(task_id, 'deletion') def _ftscall(self, method, *args, **kwd): return self._do_ftscall(binding=(method, args, kwd)) def _ftscallurl(self, url): # Call to FTS URLs that don't have python bindings return self._do_ftscall(url=url) def _do_ftscall(self, binding=None, url=None): if self._context is None: # request_class = Request -> use "requests"-based https call (instead of default PyCURL, # which may not be able to handle proxy certificates depending on the cURL installation) # verify = False -> do not verify the server certificate context = fts3.Context(self.server_url, ucert=self.x509proxy, ukey=self.x509proxy, request_class=Request, verify=False) if self.keep_context: self._context = context else: context = self._context if binding is not None: reqstring = binding[0] else: reqstring = url LOG.debug('FTS: %s', reqstring) wait_time = 1. for attempt in xrange(10): try: if binding is not None: method, args, kwd = binding return getattr(fts3, method)(context, *args, **kwd) else: return json.loads(context.get(url)) except fts_exceptions.ServerError as exc: if str(exc.reason) == '500': # Internal server error - let's try again pass except fts_exceptions.TryAgain: pass time.sleep(wait_time) wait_time *= 1.5 LOG.error('Failed to communicate with FTS server: %s', reqstring) raise RuntimeError('Failed to communicate with FTS server: %s' % reqstring) def _submit_job(self, job, optype, batch_id, pfn_to_tid): if self._read_only: job_id = 'test' else: try: job_id = self._ftscall('submit', job) except: exc_type, exc, tb = sys.exc_info() LOG.error('Failed to submit %s to FTS: Exception %s (%s)', optype, exc_type.__name__, str(exc)) return False LOG.debug('FTS job id: %s', job_id) # list of file-level operations (one-to-one with pfn) try: if optype == 'transfer' or optype == 'staging': key = 'files' else: key = 'dm' fts_files = self._ftscall('get_job_status', job_id=job_id, list_files=True)[key] except: exc_type, exc, tb = sys.exc_info() LOG.error( 'Failed to get status of job %s from FTS: Exception %s (%s)', job_id, exc_type.__name__, str(exc)) return False if self.server_id == 0: self._set_server_id() if optype == 'transfer' or optype == 'staging': table_name = 'fts_transfer_batches' columns = ('batch_id', 'task_type', 'fts_server_id', 'job_id') values = (batch_id, optype, self.server_id, job_id) else: table_name = 'fts_deletion_batches' columns = ('batch_id', 'fts_server_id', 'job_id') values = (batch_id, self.server_id, job_id) if not self._read_only: fts_batch_id = self.db.insert_get_id(table_name, columns=columns, values=values) if optype == 'transfer' or optype == 'staging': table_name = 'fts_transfer_tasks' pfn_key = 'dest_surl' else: table_name = 'fts_deletion_tasks' pfn_key = 'source_surl' fields = ('id', 'fts_batch_id', 'fts_file_id') mapping = lambda f: (pfn_to_tid[f[pfn_key]], fts_batch_id, f['file_id'] ) if not self._read_only: self.db.insert_many(table_name, fields, mapping, fts_files, do_update=True, update_columns=('fts_batch_id', 'fts_file_id')) return True def _cancel(self, task_ids, optype): sql = 'SELECT b.`job_id`, f.`fts_file_id` FROM `fts_{op}_tasks` AS f' sql += ' INNER JOIN `fts_{op}_batches` AS b ON b.`id` = f.`fts_batch_id`' result = self.db.execute_many(sql.format(op=optype), MySQL.bare('f.`id`'), task_ids) by_job = collections.defaultdict(list) for job_id, file_id in result: by_job[job_id].append(file_id) if not self._read_only: for job_id, ids in by_job.iteritems(): try: self._ftscall('cancel', job_id, file_ids=ids) except: LOG.error('Failed to cancel FTS job %s', job_id) def _get_status(self, batch_id, optype): if optype == 'transfer' or optype == 'staging': sql = 'SELECT `id`, `job_id` FROM `fts_transfer_batches`' sql += ' WHERE `task_type` = %s AND `fts_server_id` = %s AND `batch_id` = %s' batch_data = self.db.query(sql, optype, self.server_id, batch_id) task_table_name = 'fts_transfer_tasks' else: sql = 'SELECT `id`, `job_id` FROM `fts_deletion_batches`' sql += ' WHERE `fts_server_id` = %s AND `batch_id` = %s' batch_data = self.db.query(sql, self.server_id, batch_id) task_table_name = 'fts_deletion_tasks' message_pattern = re.compile( '(?:DESTINATION|SOURCE|TRANSFER|DELETION) \[([0-9]+)\] (.*)') results = [] for fts_batch_id, job_id in batch_data: LOG.debug('Checking status of FTS %s batch %s', optype, job_id) sql = 'SELECT `fts_file_id`, `id` FROM `{table}` WHERE `fts_batch_id` = %s'.format( table=task_table_name) fts_to_task = dict(self.db.xquery(sql, fts_batch_id)) try: result = self._ftscall('get_job_status', job_id=job_id, list_files=True) except: LOG.error('Failed to get job status for FTS job %s', job_id) continue if optype == 'transfer' or optype == 'staging': fts_files = result['files'] else: fts_files = result['dm'] for fts_file in fts_files: try: task_id = fts_to_task[fts_file['file_id']] except KeyError: continue state = fts_file['file_state'] exitcode = -1 start_time = None finish_time = None get_time = False try: message = fts_file['reason'] except KeyError: message = None if message is not None: # Check if reason follows a known format (from which we can get the exit code) matches = message_pattern.match(message) if matches is not None: exitcode = int(matches.group(1)) message = matches.group(2) # Additionally, if the message is a known one, convert the exit code c = find_msg_code(message) if c is not None: exitcode = c # HDFS site with gridftp-hdfs gives a I/O error (500) when the file is not there if optype == 'deletion' and 'Input/output error' in message: exitcode = errno.ENOENT if state == 'FINISHED': status = FileQuery.STAT_DONE exitcode = 0 get_time = True elif state == 'FAILED': status = FileQuery.STAT_FAILED get_time = True elif state == 'CANCELED': status = FileQuery.STAT_CANCELLED get_time = True elif state == 'SUBMITTED': status = FileQuery.STAT_NEW else: status = FileQuery.STAT_QUEUED if optype == 'transfer' and exitcode == errno.EEXIST: # Transfer + destination exists -> not an error status = FileQuery.STAT_DONE exitcode = 0 elif optype == 'deletion' and exitcode == errno.ENOENT: # Deletion + destination does not exist -> not an error status = FileQuery.STAT_DONE exitcode = 0 if get_time: try: start_time = calendar.timegm( time.strptime(fts_file['start_time'], '%Y-%m-%dT%H:%M:%S')) except TypeError: # start time is NULL (can happen when the job is cancelled) start_time = None try: finish_time = calendar.timegm( time.strptime(fts_file['finish_time'], '%Y-%m-%dT%H:%M:%S')) except TypeError: start_time = None LOG.debug('%s %d: %s, %d, %s, %s, %s', optype, task_id, FileQuery.status_name(status), exitcode, message, start_time, finish_time) results.append((task_id, status, exitcode, message, start_time, finish_time)) return results def _write_history(self, history_db, task_id, history_id, optype): if not self._read_only: history_db.db.insert_update('fts_servers', ('url', ), self.server_url) try: server_id = history_db.db.query( 'SELECT `id` FROM `fts_servers` WHERE `url` = %s', self.server_url)[0] except IndexError: server_id = 0 sql = 'SELECT b.`job_id`, t.`fts_file_id` FROM `fts_{op}_tasks` AS t' sql += ' INNER JOIN `fts_{op}_batches` AS b ON b.`id` = t.`fts_batch_id`' sql += ' WHERE t.`id` = %s' try: fts_job_id, fts_file_id = self.db.query(sql.format(op=optype), task_id)[0] except IndexError: return if not self._read_only: history_db.db.insert_update('fts_batches', ('fts_server_id', 'job_id'), server_id, fts_job_id) batch_id = history_db.db.query( 'SELECT `id` FROM `fts_batches` WHERE `fts_server_id` = %s AND `job_id` = %s', server_id, fts_job_id)[0] history_db.db.insert_update('fts_file_{op}s'.format(op=optype), ('id', 'fts_batch_id', 'fts_file_id'), history_id, batch_id, fts_file_id) def _forget_status(self, task_id, optype): if self._read_only: return sql = 'DELETE FROM `fts_{optype}_tasks` WHERE `id` = %s'.format( optype=optype) self.db.query(sql, task_id) def _forget_batch(self, batch_id, optype): if self._read_only: return sql = 'DELETE FROM `fts_{optype}_batches` WHERE `batch_id` = %s'.format( optype=optype) self.db.query(sql, batch_id) def _set_server_id(self): if not self._read_only: self.db.query( 'INSERT INTO `fts_servers` (`url`) VALUES (%s) ON DUPLICATE KEY UPDATE `url`=VALUES(`url`)', self.server_url) result = self.db.query( 'SELECT `id` FROM `fts_servers` WHERE `url` = %s', self.server_url) if len(result) == 0: self.server_id = 0 else: self.server_id = result[0]
def _get_lock(self, request, valid_only = False): sql = 'SELECT l.`id`, l.`user`, l.`dn`, s.`name`, l.`item`, l.`sites`, l.`groups`,' sql += ' UNIX_TIMESTAMP(l.`lock_date`), UNIX_TIMESTAMP(l.`expiration_date`), l.`comment`' sql += ' FROM `detox_locks` AS l' sql += ' LEFT JOIN `user_services` AS s ON s.`id` = l.`service_id`' constraints = [] args = [] user_const = -1 if 'lockid' in request: constraints.append('l.`id` IN %s' % MySQL.stringify_sequence(request['lockid'])) if 'user' in request: user_const = len(constraints) constraints.append('l.`user` IN %s' % MySQL.stringify_sequence(request['user'])) if 'service' in request: constraints.append('s.`name` = %s') args.append(request['service']) if 'item' in request: constraints.append('l.`item` = %s') args.append(request['item']) if 'sites' in request: constraints.append('l.`sites` IN %s' % MySQL.stringify_sequence(request['sites'])) if 'groups' in request: constraints.append('l.`groups` IN %s' % MySQL.stringify_sequence(request['groups'])) if 'created_before' in request: constraints.append('l.`lock_date` <= FROM_UNIXTIME(%s)') args.append(request['created_before']) if 'created_after' in request: constraints.append('l.`lock_date` >= FROM_UNIXTIME(%s)') args.append(request['created_after']) if 'expires_before' in request: constraints.append('l.`expiration_date` <= FROM_UNIXTIME(%s)') args.append(request['expires_before']) if 'expires_after' in request: constraints.append('l.`expiration_date` >= FROM_UNIXTIME(%s)') args.append(request['expires_after']) if len(constraints) != 0: sql += ' WHERE ' + ' AND '.join(constraints) existing = [] for lock_id, user, dn, service, item, site, group, lock_date, expiration_date, comment in self.registry.db.xquery(sql, *args): lock = { 'lockid': lock_id, 'user': user, 'dn': dn, 'item': item, 'locked': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(lock_date)), 'expires': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(expiration_date)) } if service is not None: lock['service'] = service if site is not None: lock['sites'] = site if group is not None: lock['groups'] = group if comment is not None: lock['comment'] = comment existing.append(lock) if valid_only or ('lockid' in request and len(existing) != 0): return existing sql = 'SELECT l.`id`, u.`name`, u.`dn`, s.`name`, l.`item`, l.`sites`, l.`groups`,' sql += ' UNIX_TIMESTAMP(l.`lock_date`), UNIX_TIMESTAMP(l.`unlock_date`), UNIX_TIMESTAMP(l.`expiration_date`), l.`comment`' sql += ' FROM `detox_locks` AS l' sql += ' LEFT JOIN `users` AS u ON u.`id` = l.`user_id`' sql += ' LEFT JOIN `user_services` AS s ON s.`id` = l.`service_id`' if len(constraints) != 0: if user_const != -1: constraints[user_const] = 'u.`name` IN %s' % MySQL.stringify_sequence(request['user']) sql += ' WHERE ' + ' AND '.join(constraints) for lock_id, user, dn, service, item, site, group, lock_date, unlock_date, expiration_date, comment in self.history.db.xquery(sql, *args): lock = { 'lockid': lock_id, 'user': user, 'dn': dn, 'item': item, 'locked': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(lock_date)), 'unlocked': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(unlock_date)), 'expires': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(expiration_date)) } if service is not None: lock['service'] = service if site is not None: lock['sites'] = site if group is not None: lock['groups'] = group if comment is not None: lock['comment'] = comment existing.append(lock) return existing
class RLFSM(object): """ File operations manager using MySQL tables for queue bookkeeping. Also implies the inventory backend is MySQL. """ class Subscription(object): __slots__ = ['id', 'status', 'file', 'destination', 'disk_sources', 'tape_sources', 'failed_sources', 'hold_reason'] def __init__(self, id, status, file, destination, disk_sources, tape_sources, failed_sources = None, hold_reason = None): self.id = id self.status = status self.file = file self.destination = destination self.disk_sources = disk_sources self.tape_sources = tape_sources self.failed_sources = failed_sources self.hold_reason = hold_reason class TransferTask(object): __slots__ = ['id', 'subscription', 'source'] def __init__(self, subscription, source): self.id = None self.subscription = subscription self.source = source class Desubscription(object): __slots__ = ['id', 'status', 'file', 'site'] def __init__(self, id, status, file, site): self.id = id self.status = status self.file = file self.site = site class DeletionTask(object): __slots__ = ['id', 'desubscription'] def __init__(self, desubscription): self.id = None self.desubscription = desubscription # default config _config = '' @staticmethod def set_default(config): RLFSM._config = Configuration(config) def __init__(self, config = None): if config is None: config = RLFSM._config # Handle to the inventory DB self.db = MySQL(config.db.db_params) # Handle to the history DB self.history_db = HistoryDatabase(config.get('history', None)) # FileTransferOperation backend (can make it a map from (source, dest) to operator) self.transfer_operations = [] if 'transfer' in config: for condition_text, module, conf in config.transfer: if condition_text is None: # default condition = None else: condition = Condition(condition_text, site_variables) self.transfer_operations.append((condition, FileTransferOperation.get_instance(module, conf))) if 'transfer_query' in config: self.transfer_queries = [] for condition_text, module, conf in config.transfer_query: if condition_text is None: # default condition = None else: condition = Condition(condition_text, site_variables) self.transfer_queries.append(condition, FileTransferQuery.get_instance(module, conf)) else: self.transfer_queries = self.transfer_operations if 'deletion' in config: self.deletion_operations = [] for condition_text, module, conf in config.deletion: if condition_text is None: # default condition = None else: condition = Condition(condition_text, site_variables) self.deletion_operations.append(condition, FileDeletionOperation.get_instance(module, conf)) else: self.deletion_operations = self.transfer_operations if 'deletion_query' in config: self.deletion_queries = [] for condition_text, module, conf in config.deletion_query: if condition_text is None: # default condition = None else: condition = Condition(condition_text, site_variables) self.deletion_queries.append(condition, FileDeletionQuery.get_instance(module, conf)) else: self.deletion_queries = self.deletion_operations self.sites_in_downtime = [] # Cycle thread self.main_cycle = None self.cycle_stop = threading.Event() self.set_read_only(config.get('read_only', False)) def set_read_only(self, value = True): self._read_only = value self.history_db.set_read_only(value) for _, op in self.transfer_operations: op.set_read_only(value) if self.transfer_queries is not self.transfer_operations: for _, qry in self.transfer_queries: qry.set_read_only(value) if self.deletion_operations is not self.transfer_operations: for _, op in self.deletion_operations: op.set_read_only(value) if self.deletion_queries is not self.deletion_operations: for _, qry in self.deletion_queries: qry.set_read_only(value) def start(self, inventory): """ Start the file operations management cycle. Issue transfer and deletion tasks to the backend. """ if self.main_cycle is not None: return LOG.info('Starting file operations manager') self.main_cycle = threading.Thread(target = self._run_cycle, name = 'FOM', args = (inventory,)) self.main_cycle.start() LOG.info('Started file operations manager.') def stop(self): """ Stop the file operations management cycle. """ LOG.info('Stopping file operations manager.') self.cycle_stop.set() self.main_cycle.join() self.main_cycle = None self.cycle_stop.clear() def transfer_files(self, inventory): """ Routine for managing file transfers. 1. Query the file transfer agent and update the status of known subscriptions. 2. Pick up new subscriptions. 3. Select the source for each transfer. 4. Organize the transfers into batches. 5. Start the transfers. The routine can be stopped almost at any point without corrupting the state machine. The only cases where stopping is problematic are within self._update_status and self._start_transfers. @param inventory The inventory. """ self._cleanup() LOG.debug('Clearing cancelled transfer tasks.') task_ids = self._get_cancelled_tasks('transfer') for _, op in self.transfer_operations: op.cancel_transfers(task_ids) if self.cycle_stop.is_set(): return LOG.debug('Fetching subscription status from the file operation agent.') self._update_status('transfer') if self.cycle_stop.is_set(): return LOG.debug('Filtering out transfers to unavailable destinations.') if not self._read_only: for site in self.sites_in_downtime: self.db.query('UPDATE `file_subscriptions` SET `status` = \'held\', `hold_reason` = \'site_unavailable\' WHERE `site_id` = (SELECT `id` FROM `sites` WHERE `name` = %s)', site.name) if self.cycle_stop.is_set(): return LOG.debug('Collecting new transfer subscriptions.') subscriptions = self.get_subscriptions(inventory, op = 'transfer', status = ['new', 'retry']) if self.cycle_stop.is_set(): return # We check the operators here because get_subscriptions does some state update and we want that to happen pending_count = {} n_available = 0 for _, op in self.transfer_operations: pending_count[op] = op.num_pending_transfers() if pending_count[op] <= op.max_pending_transfers: n_available += 1 if n_available == 0: LOG.info('No transfer operators are available at the moment.') return LOG.debug('Identifying source sites for %d transfers.', len(subscriptions)) tasks = self._select_source(subscriptions) if self.cycle_stop.is_set(): return LOG.debug('Organizing %d transfers into batches.', len(tasks)) by_dest = {} for task in tasks: try: by_dest[task.subscription.destination].append(task) except KeyError: by_dest[task.subscription.destination] = [task] def issue_tasks(op, my_tasks): if len(my_tasks) == 0: return 0, 0, 0 batches = op.form_batches(my_tasks) if self.cycle_stop.is_set(): return 0, 0, 0 nb = 0 ns = 0 nf = 0 LOG.debug('Issuing transfer tasks.') for batch_tasks in batches: s, f = self._start_transfers(op, batch_tasks) nb += 1 ns += s nf += f pending_count[op] += s if pending_count[op] > op.max_pending_transfers: break if self.cycle_stop.is_set(): break return nb, ns, nf num_success = 0 num_failure = 0 num_batches = 0 for condition, op in self.transfer_operations: if condition is None: default_op = op continue my_tasks = [] for site in by_dest.keys(): if condition.match(site): my_tasks.extend(by_dest.pop(site)) if pending_count[op] > op.max_pending_transfers: continue nb, ns, nf = issue_tasks(op, my_tasks) num_batches += nb num_success += ns num_failure += nf if self.cycle_stop.is_set(): break else: # default condition if pending_count[default_op] <= default_op.max_pending_transfers: my_tasks = sum(by_dest.itervalues(), []) nb, ns, nf = issue_tasks(default_op, my_tasks) num_batches += nb num_success += ns num_failure += nf if num_success + num_failure != 0: LOG.info('Issued transfer tasks: %d success, %d failure. %d batches.', num_success, num_failure, num_batches) else: LOG.debug('Issued transfer tasks: %d success, %d failure. %d batches.', num_success, num_failure, num_batches) def delete_files(self, inventory): """ Routine for managing file deletions. 1. Query the file deletion agent and update the status of known subscriptions. 2. Register the paths for completed deletions as candidates of empty directories. 3. Pick up new subscriptions. 4. Organize the deletions into batches. 5. Start the deletions. The routine can be stopped almost at any point without corrupting the state machine. The only cases where stopping is problematic are within self._update_status and self._start_deletions. @param inventory The inventory. """ self._cleanup() LOG.debug('Clearing cancelled deletion tasks.') task_ids = self._get_cancelled_tasks('deletion') for _, op in self.deletion_operations: op.cancel_deletions(task_ids) if self.cycle_stop.is_set(): return LOG.debug('Fetching deletion status from the file operation agent.') completed = self._update_status('deletion') LOG.debug('Recording candidates for empty directories.') self._set_dirclean_candidates(completed, inventory) if self.cycle_stop.is_set(): return LOG.debug('Filtering out transfers to unavailable destinations.') if not self._read_only: for site in self.sites_in_downtime: self.db.query('UPDATE `file_subscriptions` SET `status` = \'held\', `hold_reason` = \'site_unavailable\' WHERE `site_id` = (SELECT `id` FROM `sites` WHERE `name` = %s)', site.name) if self.cycle_stop.is_set(): return LOG.debug('Collecting new deletion subscriptions.') desubscriptions = self.get_subscriptions(inventory, op = 'deletion', status = ['new', 'retry']) if self.cycle_stop.is_set(): return # See transfer_files pending_count = {} n_available = 0 for _, op in self.deletion_operations: pending_count[op] = op.num_pending_deletions() if pending_count[op] <= op.max_pending_deletions: n_available += 1 if n_available == 0: LOG.info('No deletion operators are available at the moment.') return tasks = [RLFSM.DeletionTask(d) for d in desubscriptions] by_site = {} for task in tasks: try: by_site[task.desubscription.site].append(task) except KeyError: by_site[task.desubscription.site] = [task] LOG.debug('Organizing the deletions into batches.') def issue_tasks(op, my_tasks): if len(my_tasks) == 0: return 0, 0, 0 batches = op.form_batches(my_tasks) if self.cycle_stop.is_set(): return 0, 0, 0 nb = 0 ns = 0 nf = 0 LOG.debug('Issuing deletion tasks for %d batches.', len(batches)) for batch_tasks in batches: LOG.debug('Batch with %d tasks.', len(batch_tasks)) s, f = self._start_deletions(op, batch_tasks) nb += 1 ns += s nf += f pending_count[op] += s if pending_count[op] > op.max_pending_deletions: break if self.cycle_stop.is_set(): break return nb, ns, nf num_success = 0 num_failure = 0 num_batches = 0 for condition, op in self.deletion_operations: if condition is None: default_op = op continue my_tasks = [] for site in by_site.keys(): if condition.match(site): my_tasks.extend(by_site.pop(site)) if pending_count[op] > op.max_pending_deletions: continue nb, ns, nf = issue_tasks(op, my_tasks) num_batches += nb; num_success += ns; num_failure += nf; if self.cycle_stop.is_set(): break else: # default condition if pending_count[default_op] <= default_op.max_pending_deletions: my_tasks = sum(by_site.itervalues(), []) nb, ns, nf = issue_tasks(default_op, my_tasks) num_batches += nb; num_success += ns; num_failure += nf; if num_success + num_failure != 0: LOG.info('Issued deletion tasks: %d success, %d failure. %d batches.', num_success, num_failure, num_batches) else: LOG.debug('Issued deletion tasks: %d success, %d failure. %d batches.', num_success, num_failure, num_batches) def subscribe_file(self, site, lfile): """ Make a file subscription at a site. @param site Site object @param lfile File object """ LOG.debug('Subscribing %s to %s', lfile.lfn, site.name) self._subscribe(site, lfile, 0) def desubscribe_file(self, site, lfile): """ Book deletion of a file at a site. @param site Site object @param lfile File object """ LOG.debug('Desubscribing %s from %s', lfile.lfn, site.name) self._subscribe(site, lfile, 1) def cancel_subscription(self, site = None, lfile = None, sub_id = None): sql = 'UPDATE `file_subscriptions` SET `status` = \'cancelled\' WHERE ' if sub_id is None: if site is None or lfile is None: raise OperationalError('site and lfile must be non-None.') sql += '`file_id` = %s AND `site_id` = %s' if not self._read_only: self.db.query(sql, lfile.id, site.id) else: sql += '`id` = %s' if not self._read_only: self.db.query(sql, sub_id) def cancel_desubscription(self, site = None, lfile = None, sub_id = None): self.cancel_subscription(site = site, lfile = lfile, sub_id = sub_id) def convert_pre_subscriptions(self, inventory): sql = 'SELECT `id`, `file_name`, `site_name`, UNIX_TIMESTAMP(`created`), `delete` FROM `file_pre_subscriptions`' sids = [] for sid, lfn, site_name, created, delete in self.db.query(sql): lfile = inventory.find_file(lfn) if lfile is None or lfile.id == 0: continue try: site = inventory.sites[site_name] except KeyError: continue if site.id == 0: continue sids.append(sid) self._subscribe(site, lfile, delete, created = created) if not self._read_only: self.db.lock_tables(write = ['file_pre_subscriptions']) self.db.delete_many('file_pre_subscriptions', 'id', sids) if self.db.query('SELECT COUNT(*) FROM `file_pre_subscriptions`')[0] == 0: self.db.query('ALTER TABLE `file_pre_subscriptions` AUTO_INCREMENT = 1') self.db.unlock_tables() def get_subscriptions(self, inventory, op = None, status = None): """ Return a list containing Subscription and Desubscription objects ordered by the id. @param inventory Dynamo inventory @param op If set to 'transfer' or 'deletion', limit to the operation type. @param status If not None, set to list of status strings to limit the query. """ # First convert all pre-subscriptions self.convert_pre_subscriptions(inventory) subscriptions = [] get_all = 'SELECT u.`id`, u.`status`, u.`delete`, f.`block_id`, f.`name`, s.`name`, u.`hold_reason` FROM `file_subscriptions` AS u' get_all += ' INNER JOIN `files` AS f ON f.`id` = u.`file_id`' get_all += ' INNER JOIN `sites` AS s ON s.`id` = u.`site_id`' constraints = [] if op == 'transfer': constraints.append('u.`delete` = 0') elif op == 'deletion': constraints.append('u.`delete` = 1') if status is not None: constraints.append('u.`status` IN ' + MySQL.stringify_sequence(status)) if len(constraints) != 0: get_all += ' WHERE ' + ' AND '.join(constraints) get_all += ' ORDER BY s.`id`, f.`block_id`' get_tried_sites = 'SELECT s.`name`, f.`exitcode` FROM `failed_transfers` AS f' get_tried_sites += ' INNER JOIN `sites` AS s ON s.`id` = f.`source_id`' get_tried_sites += ' WHERE f.`subscription_id` = %s' _destination_name = '' _block_id = -1 no_source = [] all_failed = [] to_done = [] COPY = 0 DELETE = 1 for row in self.db.query(get_all): sub_id, st, optype, block_id, file_name, site_name, hold_reason = row if site_name != _destination_name: _destination_name = site_name try: destination = inventory.sites[site_name] except KeyError: # Site disappeared from the inventory - weird but can happen! destination = None _block_id = -1 if destination is None: continue if block_id != _block_id: lfile = inventory.find_file(file_name) if lfile is None: # Dataset, block, or file was deleted from the inventory earlier in this process (deletion not reflected in the inventory store yet) continue _block_id = block_id block = lfile.block dest_replica = block.find_replica(destination) else: lfile = block.find_file(file_name) if lfile is None: # Dataset, block, or file was deleted from the inventory earlier in this process (deletion not reflected in the inventory store yet) continue if dest_replica is None and st != 'cancelled': LOG.debug('Destination replica for %s does not exist. Canceling the subscription.', file_name) # Replica was invalidated sql = 'UPDATE `file_subscriptions` SET `status` = \'cancelled\'' sql += ' WHERE `id` = %s' if not self._read_only: self.db.query(sql, sub_id) if status is not None and 'cancelled' not in status: # We are not asked to return cancelled subscriptions continue st = 'cancelled' if optype == COPY: disk_sources = None tape_sources = None failed_sources = None if st not in ('done', 'held', 'cancelled'): if dest_replica.has_file(lfile): LOG.debug('%s already exists at %s', file_name, site_name) to_done.append(sub_id) st = 'done' else: disk_sources = [] tape_sources = [] for replica in block.replicas: if replica.site == destination or replica.site.status != Site.STAT_READY: continue if replica.has_file(lfile): if replica.site.storage_type == Site.TYPE_DISK: disk_sources.append(replica.site) elif replica.site.storage_type == Site.TYPE_MSS: tape_sources.append(replica.site) if len(disk_sources) + len(tape_sources) == 0: LOG.warning('Transfer of %s to %s has no source.', file_name, site_name) no_source.append(sub_id) st = 'held' if st == 'retry': failed_sources = {} for source_name, exitcode in self.db.query(get_tried_sites, sub_id): try: source = inventory.sites[source_name] except KeyError: # this site may have been deleted in this process continue try: failed_sources[source].append(exitcode) except KeyError: if source not in disk_sources and source not in tape_sources: # this is not a source site any more continue failed_sources[source] = [exitcode] if len(failed_sources) == len(disk_sources) + len(tape_sources): # transfers from all sites failed at least once for codes in failed_sources.itervalues(): if codes[-1] not in irrecoverable_errors: # This site failed for a recoverable reason break else: # last failure from all sites due to irrecoverable errors LOG.warning('Transfer of %s to %s failed from all sites.', file_name, site_name) all_failed.append(sub_id) st = 'held' # st value may have changed - filter again if status is None or st in status: subscription = RLFSM.Subscription(sub_id, st, lfile, destination, disk_sources, tape_sources, failed_sources, hold_reason) subscriptions.append(subscription) elif optype == DELETE: if st not in ('done', 'held', 'cancelled') and not dest_replica.has_file(lfile): LOG.debug('%s is already gone from %s', file_name, site_name) to_done.append(sub_id) st = 'done' if status is None or st in status: desubscription = RLFSM.Desubscription(sub_id, st, lfile, destination) subscriptions.append(desubscription) if len(to_done) + len(no_source) + len(all_failed) != 0: msg = 'Subscriptions terminated directly: %d done' % len(to_done) if len(no_source) != 0: msg += ', %d held with reason "no_source"' % len(no_source) if len(all_failed) != 0: msg += ', %d held with reason "all_failed"' % len(all_failed) LOG.info(msg) if not self._read_only: self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'done\', `last_update` = NOW()', 'id', to_done) self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'held\', `hold_reason` = \'no_source\', `last_update` = NOW()', 'id', no_source) self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'held\', `hold_reason` = \'all_failed\', `last_update` = NOW()', 'id', all_failed) # Clean up subscriptions for deleted files / sites sql = 'DELETE FROM u USING `file_subscriptions` AS u' sql += ' LEFT JOIN `files` AS f ON f.`id` = u.`file_id`' sql += ' LEFT JOIN `sites` AS s ON s.`id` = u.`site_id`' sql += ' WHERE f.`name` IS NULL OR s.`name` IS NULL' self.db.query(sql) sql = 'DELETE FROM f USING `failed_transfers` AS f' sql += ' LEFT JOIN `file_subscriptions` AS u ON u.`id` = f.`subscription_id`' sql += ' WHERE u.`id` IS NULL' self.db.query(sql) return subscriptions def close_subscriptions(self, done_ids): """ Get subscription completion acknowledgments. """ if not self._read_only: self.db.delete_many('file_subscriptions', 'id', done_ids) def release_subscription(self, subscription): """ Clear failed transfers list and set the subscription status to retry. """ if subscription.status != 'held': return if self._read_only: return self.db.query('DELETE FROM `failed_transfers` WHERE `subscription_id` = %s', subscription.id) self.db.query('UPDATE `file_subscriptions` SET `status` = \'retry\' WHERE `id` = %s', subscription.id) def _run_cycle(self, inventory): while True: if self.cycle_stop.is_set(): break LOG.debug('Checking and executing new file transfer subscriptions.') self.transfer_files(inventory) if self.cycle_stop.is_set(): break LOG.debug('Checking and executing new file deletion subscriptions.') self.delete_files(inventory) is_set = self.cycle_stop.wait(30) if is_set: # is true if in Python 2.7 and the flag is set break def _cleanup(self): if self._read_only: return # Make the tables consistent in case the previous cycles was terminated prematurely # There should not be tasks with subscription status new sql = 'DELETE FROM t USING `transfer_tasks` AS t' sql += ' INNER JOIN `file_subscriptions` AS u ON u.`id` = t.`subscription_id`' sql += ' WHERE u.`status` IN (\'new\', \'retry\')' self.db.query(sql) sql = 'DELETE FROM t USING `deletion_tasks` AS t' sql += ' INNER JOIN `file_subscriptions` AS u ON u.`id` = t.`subscription_id`' sql += ' WHERE u.`status` IN (\'new\', \'retry\')' self.db.query(sql) # There should not be batches with no tasks sql = 'DELETE FROM b USING `transfer_batches` AS b LEFT JOIN `transfer_tasks` AS t ON t.`batch_id` = b.`id` WHERE t.`batch_id` IS NULL' self.db.query(sql) sql = 'DELETE FROM b USING `deletion_batches` AS b LEFT JOIN `deletion_tasks` AS t ON t.`batch_id` = b.`id` WHERE t.`batch_id` IS NULL' self.db.query(sql) # and tasks with no batches sql = 'DELETE FROM t USING `transfer_tasks` AS t LEFT JOIN `transfer_batches` AS b ON b.`id` = t.`batch_id` WHERE b.`id` IS NULL' self.db.query(sql) sql = 'DELETE FROM t USING `deletion_tasks` AS t LEFT JOIN `deletion_batches` AS b ON b.`id` = t.`batch_id` WHERE b.`id` IS NULL' self.db.query(sql) # Cleanup the plugins (might delete tasks) for _, op in self.transfer_operations: op.cleanup() if self.deletion_operations is not self.transfer_operations: for _, op in self.deletion_operations: op.cleanup() # Reset inbatch subscriptions with no task to new state sql = 'UPDATE `file_subscriptions` SET `status` = \'new\' WHERE `status` = \'inbatch\' AND `id` NOT IN (SELECT `subscription_id` FROM `transfer_tasks`) AND `id` NOT IN (SELECT `subscription_id` FROM `deletion_tasks`)' self.db.query(sql) # Delete canceled subscriptions with no task (ones with task need to be archived in update_status) sql = 'DELETE FROM u USING `file_subscriptions` AS u LEFT JOIN `transfer_tasks` AS t ON t.`subscription_id` = u.`id` WHERE u.`delete` = 0 AND u.`status` = \'cancelled\' AND t.`id` IS NULL' self.db.query(sql) sql = 'DELETE FROM u USING `file_subscriptions` AS u LEFT JOIN `deletion_tasks` AS t ON t.`subscription_id` = u.`id` WHERE u.`delete` = 1 AND u.`status` = \'cancelled\' AND t.`id` IS NULL' self.db.query(sql) # Delete failed transfers with no subscription sql = 'DELETE FROM f USING `failed_transfers` AS f LEFT JOIN `file_subscriptions` AS u ON u.`id` = f.`subscription_id` WHERE u.`id` IS NULL' self.db.query(sql) def _subscribe(self, site, lfile, delete, created = None): opp_op = 0 if delete == 1 else 1 now = time.strftime('%Y-%m-%d %H:%M:%S') if created is None: created = now else: created = datetime.datetime(*time.localtime(created)[:6]) if lfile.id == 0 or site.id == 0: # file is not registered in inventory store yet; update the presubscription if not self._read_only: fields = ('file_name', 'site_name', 'created', 'delete') self.db.insert_update('file_pre_subscriptions', fields, lfile.lfn, site.name, now, delete, update_columns = ('delete',)) return if not self._read_only: self.db.lock_tables(write = ['file_subscriptions']) try: sql = 'UPDATE `file_subscriptions` SET `status` = \'cancelled\'' sql += ' WHERE `file_id` = %s AND `site_id` = %s AND `delete` = %s' sql += ' AND `status` IN (\'new\', \'inbatch\', \'retry\', \'held\')' if not self._read_only: self.db.query(sql, lfile.id, site.id, opp_op) fields = ('file_id', 'site_id', 'status', 'delete', 'created', 'last_update') if not self._read_only: self.db.insert_update('file_subscriptions', fields, lfile.id, site.id, 'new', delete, now, now, update_columns = ('status', 'last_update')) finally: if not self._read_only: self.db.unlock_tables() def _get_cancelled_tasks(self, optype): if optype == 'transfer': delete = 0 else: delete = 1 sql = 'SELECT q.`id` FROM `{op}_tasks` AS q'.format(op = optype) sql += ' INNER JOIN `file_subscriptions` AS u ON u.`id` = q.`subscription_id`' sql += ' WHERE u.`status` = \'cancelled\' AND u.`delete` = %d' % delete return self.db.query(sql) def _update_status(self, optype): if optype == 'transfer': site_columns = 'ss.`name`, sd.`name`' site_joins = ' INNER JOIN `sites` AS ss ON ss.`id` = q.`source_id`' site_joins += ' INNER JOIN `sites` AS sd ON sd.`id` = u.`site_id`' else: site_columns = 's.`name`' site_joins = ' INNER JOIN `sites` AS s ON s.`id` = u.`site_id`' get_task_data = 'SELECT u.`id`, f.`name`, f.`size`, UNIX_TIMESTAMP(q.`created`), ' + site_columns + ' FROM `{op}_tasks` AS q' get_task_data += ' INNER JOIN `file_subscriptions` AS u ON u.`id` = q.`subscription_id`' get_task_data += ' INNER JOIN `files` AS f ON f.`id` = u.`file_id`' get_task_data += site_joins get_task_data += ' WHERE q.`id` = %s' get_task_data = get_task_data.format(op = optype) if optype == 'transfer': history_table_name = 'file_transfers' history_site_fields = ('source_id', 'destination_id') else: history_table_name = 'file_deletions' history_site_fields = ('site_id',) history_fields = ('file_id', 'exitcode', 'message', 'batch_id', 'created', 'started', 'finished', 'completed') + history_site_fields if optype == 'transfer': insert_failure = 'INSERT INTO `failed_transfers` (`id`, `subscription_id`, `source_id`, `exitcode`)' insert_failure += ' SELECT `id`, `subscription_id`, `source_id`, %s FROM `transfer_tasks` WHERE `id` = %s' insert_failure += ' ON DUPLICATE KEY UPDATE `id`=VALUES(`id`)' delete_failures = 'DELETE FROM `failed_transfers` WHERE `subscription_id` = %s' get_subscription_status = 'SELECT `status` FROM `file_subscriptions` WHERE `id` = %s' update_subscription = 'UPDATE `file_subscriptions` SET `status` = %s, `last_update` = NOW() WHERE `id` = %s' delete_subscription = 'DELETE FROM `file_subscriptions` WHERE `id` = %s' delete_task = 'DELETE FROM `{op}_tasks` WHERE `id` = %s'.format(op = optype) delete_batch = 'DELETE FROM `{op}_batches` WHERE `id` = %s'.format(op = optype) done_subscriptions = [] num_success = 0 num_failure = 0 num_cancelled = 0 # Collect completed tasks for batch_id in self.db.query('SELECT `id` FROM `{op}_batches`'.format(op = optype)): results = [] if optype == 'transfer': for _, query in self.transfer_queries: results = query.get_transfer_status(batch_id) if len(results) != 0: break else: for _, query in self.deletion_queries: results = query.get_deletion_status(batch_id) if len(results) != 0: break batch_complete = True for task_id, status, exitcode, message, start_time, finish_time in results: # start_time and finish_time can be None LOG.debug('%s result: %d %s %d %s %s', optype, task_id, FileQuery.status_name(status), exitcode, start_time, finish_time) if status == FileQuery.STAT_DONE: num_success += 1 elif status == FileQuery.STAT_FAILED: num_failure += 1 elif status == FileQuery.STAT_CANCELLED: num_cancelled += 1 else: batch_complete = False continue try: task_data = self.db.query(get_task_data, task_id)[0] except IndexError: LOG.warning('%s task %d got lost.', optype, task_id) if optype == 'transfer': query.forget_transfer_status(task_id) else: query.forget_deletion_status(task_id) if not self._read_only: self.db.query(delete_task, task_id) continue subscription_id, lfn, size, create_time = task_data[:4] if optype == 'transfer': source_name, dest_name = task_data[4:] history_site_ids = ( self.history_db.save_sites([source_name], get_ids = True)[0], self.history_db.save_sites([dest_name], get_ids = True)[0] ) else: site_name = task_data[4] history_site_ids = (self.history_db.save_sites([site_name], get_ids = True)[0],) file_id = self.history_db.save_files([(lfn, size)], get_ids = True)[0] if start_time is None: sql_start_time = None else: sql_start_time = datetime.datetime(*time.localtime(start_time)[:6]) if finish_time is None: sql_finish_time = None else: sql_finish_time = datetime.datetime(*time.localtime(finish_time)[:6]) values = (file_id, exitcode, message, batch_id, datetime.datetime(*time.localtime(create_time)[:6]), sql_start_time, sql_finish_time, MySQL.bare('NOW()')) + history_site_ids if optype == 'transfer': LOG.debug('Archiving transfer of %s from %s to %s (exitcode %d)', lfn, source_name, dest_name, exitcode) else: LOG.debug('Archiving deletion of %s at %s (exitcode %d)', lfn, site_name, exitcode) if self._read_only: history_id = 0 else: history_id = self.history_db.db.insert_get_id(history_table_name, history_fields, values) if optype == 'transfer': query.write_transfer_history(self.history_db, task_id, history_id) else: query.write_deletion_history(self.history_db, task_id, history_id) # We check the subscription status and update accordingly. Need to lock the tables. if not self._read_only: self.db.lock_tables(write = ['file_subscriptions']) try: subscription_status = self.db.query(get_subscription_status, subscription_id)[0] if subscription_status == 'inbatch': if status == FileQuery.STAT_DONE: LOG.debug('Subscription %d done.', subscription_id) if not self._read_only: self.db.query(update_subscription, 'done', subscription_id) elif status == FileQuery.STAT_FAILED: LOG.debug('Subscription %d failed (exit code %d). Flagging retry.', subscription_id, exitcode) if not self._read_only: self.db.query(update_subscription, 'retry', subscription_id) elif subscription_status == 'cancelled': # subscription is cancelled and task terminated -> delete the subscription now, irrespective of the task status LOG.debug('Subscription %d is cancelled.', subscription_id) if not self._read_only: self.db.query(delete_subscription, subscription_id) finally: if not self._read_only: self.db.unlock_tables() if not self._read_only: if optype == 'transfer': if subscription_status == 'cancelled' or (subscription_status == 'inbatch' and status == FileQuery.STAT_DONE): # Delete entries from failed_transfers table self.db.query(delete_failures, subscription_id) elif subscription_status == 'inbatch' and status == FileQuery.STAT_FAILED: # Insert entry to failed_transfers table self.db.query(insert_failure, exitcode, task_id) self.db.query(delete_task, task_id) if status == FileQuery.STAT_DONE: done_subscriptions.append(subscription_id) if optype == 'transfer': query.forget_transfer_status(task_id) else: query.forget_deletion_status(task_id) if self.cycle_stop.is_set(): break if batch_complete: if not self._read_only: self.db.query(delete_batch, batch_id) if optype == 'transfer': query.forget_transfer_batch(batch_id) else: query.forget_deletion_batch(batch_id) if num_success + num_failure + num_cancelled != 0: LOG.info('Archived file %s: %d succeeded, %d failed, %d cancelled.', optype, num_success, num_failure, num_cancelled) else: LOG.debug('Archived file %s: %d succeeded, %d failed, %d cancelled.', optype, num_success, num_failure, num_cancelled) return done_subscriptions def _select_source(self, subscriptions): """ Intelligently select the best source for each subscription. @param subscriptions List of Subscription objects @return List of TransferTask objects """ def find_site_to_try(sources, failed_sources): not_tried = set(sources) if failed_sources is not None: not_tried -= set(failed_sources.iterkeys()) LOG.debug('%d sites not tried', len(not_tried)) if len(not_tried) == 0: if failed_sources is None: return None # we've tried all sites. Did any of them fail with a recoverable error? sites_to_retry = [] for site, codes in failed_sources.iteritems(): if site not in sources: continue if codes[-1] not in irrecoverable_errors: sites_to_retry.append(site) if len(sites_to_retry) == 0: return None else: # select the least failed site by_failure = sorted(sites_to_retry, key = lambda s: len(failed_sources[s])) LOG.debug('%s has the least failures', by_failure[0].name) return by_failure[0] else: LOG.debug('Selecting randomly') return random.choice(list(not_tried)) tasks = [] for subscription in subscriptions: LOG.debug('Selecting a disk source for subscription %d (%s to %s)', subscription.id, subscription.file.lfn, subscription.destination.name) source = find_site_to_try(subscription.disk_sources, subscription.failed_sources) if source is None: LOG.debug('Selecting a tape source for subscription %d', subscription.id) source = find_site_to_try(subscription.tape_sources, subscription.failed_sources) if source is None: # If both disk and tape failed irrecoveably, the subscription must be placed in held queue in get_subscriptions. # Reaching this line means something is wrong. LOG.warning('Could not find a source for transfer of %s to %s from %d disk and %d tape candidates.', subscription.file.lfn, subscription.destination.name, len(subscription.disk_sources), len(subscription.tape_sources)) continue tasks.append(RLFSM.TransferTask(subscription, source)) return tasks def _start_transfers(self, transfer_operation, tasks): # start the transfer of tasks. If batch submission fails, make progressively smaller batches until failing tasks are identified. if self._read_only: batch_id = 0 else: self.db.query('INSERT INTO `transfer_batches` (`id`) VALUES (0)') batch_id = self.db.last_insert_id LOG.debug('New transfer batch %d for %d files.', batch_id, len(tasks)) # local time now = time.strftime('%Y-%m-%d %H:%M:%S') # need to create the transfer tasks first to have ids assigned fields = ('subscription_id', 'source_id', 'batch_id', 'created') mapping = lambda t: (t.subscription.id, t.source.id, batch_id, now) if not self._read_only: self.db.insert_many('transfer_tasks', fields, mapping, tasks) # set the task ids tasks_by_sub = dict((t.subscription.id, t) for t in tasks) for task_id, subscription_id in self.db.xquery('SELECT `id`, `subscription_id` FROM `transfer_tasks` WHERE `batch_id` = %s', batch_id): tasks_by_sub[subscription_id].id = task_id result = transfer_operation.start_transfers(batch_id, tasks) successful = [task for task, success in result.iteritems() if success] if not self._read_only: self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'inbatch\', `last_update` = NOW()', 'id', [t.subscription.id for t in successful]) if len(successful) != len(result): failed = [task for task, success in result.iteritems() if not success] for task in failed: LOG.error('Cannot issue transfer of %s from %s to %s', task.subscription.file.lfn, task.source.name, task.subscription.destination.name) failed_ids = [t.id for t in failed] sql = 'INSERT INTO `failed_transfers` (`id`, `subscription_id`, `source_id`, `exitcode`)' sql += ' SELECT `id`, `subscription_id`, `source_id`, -1 FROM `transfer_tasks`' self.db.execute_many(sql, 'id', failed_ids) self.db.delete_many('transfer_tasks', 'id', failed_ids) self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'retry\', `last_update` = NOW()', 'id', [t.subscription.id for t in failed]) return len(successful), len(result) - len(successful) def _start_deletions(self, deletion_operation, tasks): if self._read_only: batch_id = 0 else: self.db.query('INSERT INTO `deletion_batches` (`id`) VALUES (0)') batch_id = self.db.last_insert_id # local time now = time.strftime('%Y-%m-%d %H:%M:%S') fields = ('subscription_id', 'batch_id', 'created') mapping = lambda t: (t.desubscription.id, batch_id, now) if not self._read_only: self.db.insert_many('deletion_tasks', fields, mapping, tasks) # set the task ids tasks_by_sub = dict((t.desubscription.id, t) for t in tasks) for task_id, desubscription_id in self.db.xquery('SELECT `id`, `subscription_id` FROM `deletion_tasks` WHERE `batch_id` = %s', batch_id): tasks_by_sub[desubscription_id].id = task_id result = deletion_operation.start_deletions(batch_id, tasks) successful = [task for task, success in result.iteritems() if success] if not self._read_only: self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'inbatch\', `last_update` = NOW()', 'id', [t.desubscription.id for t in successful]) if len(successful) != len(result): failed = [task for task, success in result.iteritems() if not success] for task in failed: LOG.error('Cannot delete %s at %s', task.desubscription.file.lfn, task.desubscription.site.name) self.db.delete_many('deletion_tasks', 'id', [t.id for t in failed]) self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'held\', `last_update` = NOW()', 'id', [t.desubscription.id for t in failed]) return len(successful), len(result) - len(successful) def _set_dirclean_candidates(self, subscription_ids, inventory): site_dirs = {} # Clean up directories of completed subscriptions sql = 'SELECT s.`name`, f.`name` FROM `file_subscriptions` AS u' sql += ' INNER JOIN `files` AS f ON f.`id` = u.`file_id`' sql += ' INNER JOIN `sites` AS s ON s.`id` = u.`site_id`' for site_name, file_name in self.db.execute_many(sql, 'u.`id`', subscription_ids): try: site = inventory.sites[site_name] except KeyError: continue try: dirs = site_dirs[site] except KeyError: dirs = site_dirs[site] = set() dirs.add(os.path.dirname(file_name)) def get_entry(): for site, dirs in site_dirs.iteritems(): for directory in dirs: yield site.id, directory fields = ('site_id', 'directory') if not self._read_only: self.db.insert_many('directory_cleaning_tasks', fields, None, get_entry(), do_update = True)
def __init__(self, config = None): CopyInterface.__init__(self, config) self.rlfsm = RLFSM(config.get('rlfsm', None)) self.mysql = MySQL(config.reserve_db_params)
def _get_lock(self, request, valid_only=False): sql = 'SELECT l.`id`, l.`user`, l.`dn`, s.`name`, l.`item`, l.`sites`, l.`groups`,' sql += ' UNIX_TIMESTAMP(l.`lock_date`), UNIX_TIMESTAMP(l.`expiration_date`), l.`comment`' sql += ' FROM `detox_locks` AS l' sql += ' LEFT JOIN `user_services` AS s ON s.`id` = l.`service_id`' constraints = [] args = [] user_const = -1 if 'lockid' in request: constraints.append('l.`id` IN %s' % MySQL.stringify_sequence(request['lockid'])) if 'user' in request: user_const = len(constraints) constraints.append('l.`user` IN %s' % MySQL.stringify_sequence(request['user'])) if 'service' in request: constraints.append('s.`name` = %s') args.append(request['service']) if 'item' in request: constraints.append('l.`item` = %s') args.append(request['item']) if 'sites' in request: constraints.append('l.`sites` IN %s' % MySQL.stringify_sequence(request['sites'])) if 'groups' in request: constraints.append('l.`groups` IN %s' % MySQL.stringify_sequence(request['groups'])) if 'created_before' in request: constraints.append('l.`lock_date` <= FROM_UNIXTIME(%s)') args.append(request['created_before']) if 'created_after' in request: constraints.append('l.`lock_date` >= FROM_UNIXTIME(%s)') args.append(request['created_after']) if 'expires_before' in request: constraints.append('l.`expiration_date` <= FROM_UNIXTIME(%s)') args.append(request['expires_before']) if 'expires_after' in request: constraints.append('l.`expiration_date` >= FROM_UNIXTIME(%s)') args.append(request['expires_after']) if len(constraints) != 0: sql += ' WHERE ' + ' AND '.join(constraints) existing = [] for lock_id, user, dn, service, item, site, group, lock_date, expiration_date, comment in self.registry.db.xquery( sql, *args): lock = { 'lockid': lock_id, 'user': user, 'dn': dn, 'item': item, 'locked': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(lock_date)), 'expires': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(expiration_date)) } if service is not None: lock['service'] = service if site is not None: lock['sites'] = site if group is not None: lock['groups'] = group if comment is not None: lock['comment'] = comment existing.append(lock) if valid_only or ('lockid' in request and len(existing) != 0): return existing sql = 'SELECT l.`id`, u.`name`, u.`dn`, s.`name`, l.`item`, l.`sites`, l.`groups`,' sql += ' UNIX_TIMESTAMP(l.`lock_date`), UNIX_TIMESTAMP(l.`unlock_date`), UNIX_TIMESTAMP(l.`expiration_date`), l.`comment`' sql += ' FROM `detox_locks` AS l' sql += ' LEFT JOIN `users` AS u ON u.`id` = l.`user_id`' sql += ' LEFT JOIN `user_services` AS s ON s.`id` = l.`service_id`' if len(constraints) != 0: if user_const != -1: constraints[ user_const] = 'u.`name` IN %s' % MySQL.stringify_sequence( request['user']) sql += ' WHERE ' + ' AND '.join(constraints) for lock_id, user, dn, service, item, site, group, lock_date, unlock_date, expiration_date, comment in self.history.db.xquery( sql, *args): lock = { 'lockid': lock_id, 'user': user, 'dn': dn, 'item': item, 'locked': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(lock_date)), 'unlocked': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(unlock_date)), 'expires': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(expiration_date)) } if service is not None: lock['service'] = service if site is not None: lock['sites'] = site if group is not None: lock['groups'] = group if comment is not None: lock['comment'] = comment existing.append(lock) return existing
def get_subscriptions(self, inventory, op = None, status = None): """ Return a list containing Subscription and Desubscription objects ordered by the id. @param inventory Dynamo inventory @param op If set to 'transfer' or 'deletion', limit to the operation type. @param status If not None, set to list of status strings to limit the query. """ # First convert all pre-subscriptions self.convert_pre_subscriptions(inventory) subscriptions = [] get_all = 'SELECT u.`id`, u.`status`, u.`delete`, f.`block_id`, f.`name`, s.`name`, u.`hold_reason` FROM `file_subscriptions` AS u' get_all += ' INNER JOIN `files` AS f ON f.`id` = u.`file_id`' get_all += ' INNER JOIN `sites` AS s ON s.`id` = u.`site_id`' constraints = [] if op == 'transfer': constraints.append('u.`delete` = 0') elif op == 'deletion': constraints.append('u.`delete` = 1') if status is not None: constraints.append('u.`status` IN ' + MySQL.stringify_sequence(status)) if len(constraints) != 0: get_all += ' WHERE ' + ' AND '.join(constraints) get_all += ' ORDER BY s.`id`, f.`block_id`' get_tried_sites = 'SELECT s.`name`, f.`exitcode` FROM `failed_transfers` AS f' get_tried_sites += ' INNER JOIN `sites` AS s ON s.`id` = f.`source_id`' get_tried_sites += ' WHERE f.`subscription_id` = %s' _destination_name = '' _block_id = -1 no_source = [] all_failed = [] to_done = [] COPY = 0 DELETE = 1 for row in self.db.query(get_all): sub_id, st, optype, block_id, file_name, site_name, hold_reason = row if site_name != _destination_name: _destination_name = site_name try: destination = inventory.sites[site_name] except KeyError: # Site disappeared from the inventory - weird but can happen! destination = None _block_id = -1 if destination is None: continue if block_id != _block_id: lfile = inventory.find_file(file_name) if lfile is None: # Dataset, block, or file was deleted from the inventory earlier in this process (deletion not reflected in the inventory store yet) continue _block_id = block_id block = lfile.block dest_replica = block.find_replica(destination) else: lfile = block.find_file(file_name) if lfile is None: # Dataset, block, or file was deleted from the inventory earlier in this process (deletion not reflected in the inventory store yet) continue if dest_replica is None and st != 'cancelled': LOG.debug('Destination replica for %s does not exist. Canceling the subscription.', file_name) # Replica was invalidated sql = 'UPDATE `file_subscriptions` SET `status` = \'cancelled\'' sql += ' WHERE `id` = %s' if not self._read_only: self.db.query(sql, sub_id) if status is not None and 'cancelled' not in status: # We are not asked to return cancelled subscriptions continue st = 'cancelled' if optype == COPY: disk_sources = None tape_sources = None failed_sources = None if st not in ('done', 'held', 'cancelled'): if dest_replica.has_file(lfile): LOG.debug('%s already exists at %s', file_name, site_name) to_done.append(sub_id) st = 'done' else: disk_sources = [] tape_sources = [] for replica in block.replicas: if replica.site == destination or replica.site.status != Site.STAT_READY: continue if replica.has_file(lfile): if replica.site.storage_type == Site.TYPE_DISK: disk_sources.append(replica.site) elif replica.site.storage_type == Site.TYPE_MSS: tape_sources.append(replica.site) if len(disk_sources) + len(tape_sources) == 0: LOG.warning('Transfer of %s to %s has no source.', file_name, site_name) no_source.append(sub_id) st = 'held' if st == 'retry': failed_sources = {} for source_name, exitcode in self.db.query(get_tried_sites, sub_id): try: source = inventory.sites[source_name] except KeyError: # this site may have been deleted in this process continue try: failed_sources[source].append(exitcode) except KeyError: if source not in disk_sources and source not in tape_sources: # this is not a source site any more continue failed_sources[source] = [exitcode] if len(failed_sources) == len(disk_sources) + len(tape_sources): # transfers from all sites failed at least once for codes in failed_sources.itervalues(): if codes[-1] not in irrecoverable_errors: # This site failed for a recoverable reason break else: # last failure from all sites due to irrecoverable errors LOG.warning('Transfer of %s to %s failed from all sites.', file_name, site_name) all_failed.append(sub_id) st = 'held' # st value may have changed - filter again if status is None or st in status: subscription = RLFSM.Subscription(sub_id, st, lfile, destination, disk_sources, tape_sources, failed_sources, hold_reason) subscriptions.append(subscription) elif optype == DELETE: if st not in ('done', 'held', 'cancelled') and not dest_replica.has_file(lfile): LOG.debug('%s is already gone from %s', file_name, site_name) to_done.append(sub_id) st = 'done' if status is None or st in status: desubscription = RLFSM.Desubscription(sub_id, st, lfile, destination) subscriptions.append(desubscription) if len(to_done) + len(no_source) + len(all_failed) != 0: msg = 'Subscriptions terminated directly: %d done' % len(to_done) if len(no_source) != 0: msg += ', %d held with reason "no_source"' % len(no_source) if len(all_failed) != 0: msg += ', %d held with reason "all_failed"' % len(all_failed) LOG.info(msg) if not self._read_only: self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'done\', `last_update` = NOW()', 'id', to_done) self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'held\', `hold_reason` = \'no_source\', `last_update` = NOW()', 'id', no_source) self.db.execute_many('UPDATE `file_subscriptions` SET `status` = \'held\', `hold_reason` = \'all_failed\', `last_update` = NOW()', 'id', all_failed) # Clean up subscriptions for deleted files / sites sql = 'DELETE FROM u USING `file_subscriptions` AS u' sql += ' LEFT JOIN `files` AS f ON f.`id` = u.`file_id`' sql += ' LEFT JOIN `sites` AS s ON s.`id` = u.`site_id`' sql += ' WHERE f.`name` IS NULL OR s.`name` IS NULL' self.db.query(sql) sql = 'DELETE FROM f USING `failed_transfers` AS f' sql += ' LEFT JOIN `file_subscriptions` AS u ON u.`id` = f.`subscription_id`' sql += ' WHERE u.`id` IS NULL' self.db.query(sql) return subscriptions
def __init__(self): BaseHandler.__init__(self, 'Direct') self._mysql = MySQL(**config.registry.db_params)
def _create_lock(self, request, user, dn): service_id = 0 if 'service' in request: try: service_id = self.registry.db.query( 'SELECT `id` FROM `user_services` WHERE `name` = %s', request['service'])[0] except IndexError: pass columns = ('item', 'sites', 'groups', 'lock_date', 'expiration_date', 'user', 'dn', 'service_id', 'comment') comment = None if 'comment' in request: comment = request['comment'] values = [(request['item'], None, None, MySQL.bare('NOW()'), MySQL.bare('FROM_UNIXTIME(%d)' % request['expires']), user, dn, service_id, comment)] if 'sites' in request: new_values = [] for site in request['sites']: for v in values: new_values.append(v[:1] + (site, ) + v[2:]) values = new_values if 'groups' in request: new_values = [] for group in request['groups']: for v in values: new_values.append(v[:2] + (group, ) + v[3:]) values = new_values new_locks = [] for v in values: lock_id = self.registry.db.insert_get_id('detox_locks', columns, v) new_lock = { 'lockid': lock_id, 'user': user, 'dn': dn, 'item': request['item'], 'locked': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime()), 'expires': time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime(request['expires'])) } if v[7] != 0: new_lock['service'] = request['service'] if v[1] is not None: new_lock['sites'] = v[1] if v[2] is not None: new_lock['groups'] = v[2] if 'comment' in request: new_lock['comment'] = request['comment'] new_locks.append(new_lock) return new_locks
class RegistryDatabase(object): """ Similar to HistoryDatabase, this is just one abstraction layer that doesn't really hide the backend technology for the registry. We still have the benefit of being able to use default parameters to initialize the registry database handle. """ # default configuration _config = Configuration() @staticmethod def set_default(config): RegistryDatabase._config = Configuration(config) def __init__(self, config=None): if config is None: config = RegistryDatabase._config self.db = MySQL(config.db_params) self.set_read_only(config.get('read_only', False)) def set_read_only(self, value=True): self._read_only = True def get_locked_apps(self): sql = 'SELECT DISTINCT `application` FROM `activity_lock`' return self.db.query(sql) def get_app_lock(self, app): # this function can be called within a table lock, so we need to lock what we use self.db.lock_tables(read=[('activity_lock', 'l'), ('user_services', 's')]) sql = 'SELECT l.`user`, s.`name`, UNIX_TIMESTAMP(l.`timestamp`), l.`note` FROM `activity_lock` AS l' sql += ' LEFT JOIN `user_services` AS s ON s.`id` = l.`service_id`' sql += ' WHERE l.`application` = %s ORDER BY l.`timestamp` ASC' lock_data = self.db.query(sql, app) self.db.unlock_tables() if len(lock_data) == 0: return None, None, None, None, 0 first_user, first_service, lock_time, note = lock_data[0] depth = 1 for user, service, _, _ in lock_data[1:]: if user == first_user and service == first_service: depth += 1 return first_user, first_service, lock_time, note, depth def lock_app(self, app, user, service=None, note=None): if service is None: service_id = 0 else: try: sql = 'SELECT `id` FROM `user_services` WHERE `name` = %s' service_id = self.db.query(sql, service)[0] except IndexError: service_id = 0 sql = 'INSERT INTO `activity_lock` (`user`, `service_id`, `application`, `timestamp`, `note`)' sql += ' VALUES (%s, %s, %s, NOW(), %s)' self.db.query(sql, user, service_id, app, note) def unlock_app(self, app, user, service=None): if service is None: service_id = 0 else: try: sql = 'SELECT `id` FROM `user_services` WHERE `name` = %s' service_id = self.db.query(sql, service)[0] except IndexError: service_id = 0 self.db.lock_tables(write=['activity_lock', ('activity_lock', 'l')]) sql = 'DELETE FROM `activity_lock` WHERE `id` = (' sql += ' SELECT m FROM (' sql += ' SELECT MAX(`id`) m FROM `activity_lock` AS l' sql += ' WHERE `user` = %s AND `service_id` = %s AND `application` = %s' sql += ' ) AS tmp' sql += ')' self.db.query(sql, user, service_id, app) # a little cleanup if self.db.query('SELECT COUNT(*) FROM `activity_lock`')[0] == 0: self.db.query('ALTER TABLE `activity_lock` AUTO_INCREMENT = 1') self.db.unlock_tables()
class RLFSMPhEDExReserveDeletionInterface(DeletionInterface): """ DeletionInterface using the Dynamo RLFSM. """ def __init__(self, config=None): DeletionInterface.__init__(self, config) self.rlfsm = RLFSM(config.get('rlfsm', None)) self.mysql = MySQL(config.reserve_db_params) def set_read_only(self, value=True): #override self._read_only = value self.rlfsm.set_read_only(value) def schedule_deletions(self, replica_list, operation_id, comments=''): #override sites = set(r.site for r, b in replica_list) if len(sites) != 1: raise OperationalError( 'schedule_deletions should be called with a list of replicas at a single site.' ) site = list(sites)[0] LOG.info( 'Scheduling deletion of %d replicas from %s using RLFSM (operation %d)', len(replica_list), site.name, operation_id) clones = [] for dataset_replica, block_replicas in replica_list: if block_replicas is None: to_delete = dataset_replica.block_replicas else: to_delete = block_replicas for block_replica in to_delete: for lfile in block_replica.files(): self.rlfsm.desubscribe_file(block_replica.site, lfile) # No external dependency -> all operations are successful clone_replica = DatasetReplica(dataset_replica.dataset, dataset_replica.site) clone_replica.copy(dataset_replica) if block_replicas is None: clones.append((clone_replica, None)) else: clones.append((clone_replica, [])) for block_replica in block_replicas: clone_block_replica = BlockReplica(block_replica.block, block_replica.site, block_replica.group) clone_block_replica.copy(block_replica) clone_block_replica.last_update = int(time.time()) clones[-1][1].append(clone_block_replica) if not self._read_only: for clone_replica, block_replicas in clones: if block_replicas is None: self.mysql.query( 'INSERT INTO `phedex_deletion_reservations` (`operation_id`, `item`, `site`) VALUES (%s, %s, %s)', operation_id, clone_replica.dataset.name, clone_replica.site.name) else: for block_replica in block_replicas: self.mysql.query( 'INSERT INTO `phedex_deletion_reservations` (`operation_id`, `item`, `site`) VALUES (%s, %s, %s)', operation_id, block_replica.block.full_name(), clone_replica.site.name) return clones def deletion_status(self, operation_id): #override raise NotImplementedError('deletion_status')
def save_policy(self, policy_text): md5 = hashlib.md5(policy_text).hexdigest() result = self.db.query('SELECT `id`, `text` FROM `deletion_policies` WHERE `hash` = UNHEX(%s)', md5) for policy_id, text in result: if text == policy_text: return policy_id # no row with matching hash or no row with matching text although hash matches (basically impossible) # new policy columns = ('hash', 'text') return self.db.insert_get_id('deletion_policies', columns = columns, values = (MySQL.bare('UNHEX(\'%s\')' % md5), policy_text))
def _update_status(self, optype): if optype == 'transfer': site_columns = 'ss.`name`, sd.`name`' site_joins = ' INNER JOIN `sites` AS ss ON ss.`id` = q.`source_id`' site_joins += ' INNER JOIN `sites` AS sd ON sd.`id` = u.`site_id`' else: site_columns = 's.`name`' site_joins = ' INNER JOIN `sites` AS s ON s.`id` = u.`site_id`' get_task_data = 'SELECT u.`id`, f.`name`, f.`size`, UNIX_TIMESTAMP(q.`created`), ' + site_columns + ' FROM `{op}_tasks` AS q' get_task_data += ' INNER JOIN `file_subscriptions` AS u ON u.`id` = q.`subscription_id`' get_task_data += ' INNER JOIN `files` AS f ON f.`id` = u.`file_id`' get_task_data += site_joins get_task_data += ' WHERE q.`id` = %s' get_task_data = get_task_data.format(op = optype) if optype == 'transfer': history_table_name = 'file_transfers' history_site_fields = ('source_id', 'destination_id') else: history_table_name = 'file_deletions' history_site_fields = ('site_id',) history_fields = ('file_id', 'exitcode', 'message', 'batch_id', 'created', 'started', 'finished', 'completed') + history_site_fields if optype == 'transfer': insert_failure = 'INSERT INTO `failed_transfers` (`id`, `subscription_id`, `source_id`, `exitcode`)' insert_failure += ' SELECT `id`, `subscription_id`, `source_id`, %s FROM `transfer_tasks` WHERE `id` = %s' insert_failure += ' ON DUPLICATE KEY UPDATE `id`=VALUES(`id`)' delete_failures = 'DELETE FROM `failed_transfers` WHERE `subscription_id` = %s' get_subscription_status = 'SELECT `status` FROM `file_subscriptions` WHERE `id` = %s' update_subscription = 'UPDATE `file_subscriptions` SET `status` = %s, `last_update` = NOW() WHERE `id` = %s' delete_subscription = 'DELETE FROM `file_subscriptions` WHERE `id` = %s' delete_task = 'DELETE FROM `{op}_tasks` WHERE `id` = %s'.format(op = optype) delete_batch = 'DELETE FROM `{op}_batches` WHERE `id` = %s'.format(op = optype) done_subscriptions = [] num_success = 0 num_failure = 0 num_cancelled = 0 # Collect completed tasks for batch_id in self.db.query('SELECT `id` FROM `{op}_batches`'.format(op = optype)): results = [] if optype == 'transfer': for _, query in self.transfer_queries: results = query.get_transfer_status(batch_id) if len(results) != 0: break else: for _, query in self.deletion_queries: results = query.get_deletion_status(batch_id) if len(results) != 0: break batch_complete = True for task_id, status, exitcode, message, start_time, finish_time in results: # start_time and finish_time can be None LOG.debug('%s result: %d %s %d %s %s', optype, task_id, FileQuery.status_name(status), exitcode, start_time, finish_time) if status == FileQuery.STAT_DONE: num_success += 1 elif status == FileQuery.STAT_FAILED: num_failure += 1 elif status == FileQuery.STAT_CANCELLED: num_cancelled += 1 else: batch_complete = False continue try: task_data = self.db.query(get_task_data, task_id)[0] except IndexError: LOG.warning('%s task %d got lost.', optype, task_id) if optype == 'transfer': query.forget_transfer_status(task_id) else: query.forget_deletion_status(task_id) if not self._read_only: self.db.query(delete_task, task_id) continue subscription_id, lfn, size, create_time = task_data[:4] if optype == 'transfer': source_name, dest_name = task_data[4:] history_site_ids = ( self.history_db.save_sites([source_name], get_ids = True)[0], self.history_db.save_sites([dest_name], get_ids = True)[0] ) else: site_name = task_data[4] history_site_ids = (self.history_db.save_sites([site_name], get_ids = True)[0],) file_id = self.history_db.save_files([(lfn, size)], get_ids = True)[0] if start_time is None: sql_start_time = None else: sql_start_time = datetime.datetime(*time.localtime(start_time)[:6]) if finish_time is None: sql_finish_time = None else: sql_finish_time = datetime.datetime(*time.localtime(finish_time)[:6]) values = (file_id, exitcode, message, batch_id, datetime.datetime(*time.localtime(create_time)[:6]), sql_start_time, sql_finish_time, MySQL.bare('NOW()')) + history_site_ids if optype == 'transfer': LOG.debug('Archiving transfer of %s from %s to %s (exitcode %d)', lfn, source_name, dest_name, exitcode) else: LOG.debug('Archiving deletion of %s at %s (exitcode %d)', lfn, site_name, exitcode) if self._read_only: history_id = 0 else: history_id = self.history_db.db.insert_get_id(history_table_name, history_fields, values) if optype == 'transfer': query.write_transfer_history(self.history_db, task_id, history_id) else: query.write_deletion_history(self.history_db, task_id, history_id) # We check the subscription status and update accordingly. Need to lock the tables. if not self._read_only: self.db.lock_tables(write = ['file_subscriptions']) try: subscription_status = self.db.query(get_subscription_status, subscription_id)[0] if subscription_status == 'inbatch': if status == FileQuery.STAT_DONE: LOG.debug('Subscription %d done.', subscription_id) if not self._read_only: self.db.query(update_subscription, 'done', subscription_id) elif status == FileQuery.STAT_FAILED: LOG.debug('Subscription %d failed (exit code %d). Flagging retry.', subscription_id, exitcode) if not self._read_only: self.db.query(update_subscription, 'retry', subscription_id) elif subscription_status == 'cancelled': # subscription is cancelled and task terminated -> delete the subscription now, irrespective of the task status LOG.debug('Subscription %d is cancelled.', subscription_id) if not self._read_only: self.db.query(delete_subscription, subscription_id) finally: if not self._read_only: self.db.unlock_tables() if not self._read_only: if optype == 'transfer': if subscription_status == 'cancelled' or (subscription_status == 'inbatch' and status == FileQuery.STAT_DONE): # Delete entries from failed_transfers table self.db.query(delete_failures, subscription_id) elif subscription_status == 'inbatch' and status == FileQuery.STAT_FAILED: # Insert entry to failed_transfers table self.db.query(insert_failure, exitcode, task_id) self.db.query(delete_task, task_id) if status == FileQuery.STAT_DONE: done_subscriptions.append(subscription_id) if optype == 'transfer': query.forget_transfer_status(task_id) else: query.forget_deletion_status(task_id) if self.cycle_stop.is_set(): break if batch_complete: if not self._read_only: self.db.query(delete_batch, batch_id) if optype == 'transfer': query.forget_transfer_batch(batch_id) else: query.forget_deletion_batch(batch_id) if num_success + num_failure + num_cancelled != 0: LOG.info('Archived file %s: %d succeeded, %d failed, %d cancelled.', optype, num_success, num_failure, num_cancelled) else: LOG.debug('Archived file %s: %d succeeded, %d failed, %d cancelled.', optype, num_success, num_failure, num_cancelled) return done_subscriptions
class FTSFileOperation(FileTransferOperation, FileTransferQuery, FileDeletionOperation, FileDeletionQuery): def __init__(self, config): FileTransferOperation.__init__(self, config) FileTransferQuery.__init__(self, config) FileDeletionOperation.__init__(self, config) FileDeletionQuery.__init__(self, config) self.server_url = config.fts_server self.server_id = 0 # server id in the DB # Parameter "retry" for fts3.new_job. 0 = server default self.fts_retry = config.get('fts_retry', 0) # String passed to fts3.new_*_job(metadata = _) self.metadata_string = config.get('metadata_string', 'Dynamo') # Proxy to be forwarded to FTS self.x509proxy = config.get('x509proxy', None) self.x509proxy_orig = config.get('x509proxy', None) # Bookkeeping device self.db = MySQL(config.db_params) # Reuse the context object self.keep_context = config.get('keep_context', False) self._context = None def num_pending_transfers(self): #override # Check the number of files in queue # We first thought about counting files with /files, but FTS seems to return only 1000 maximum even when "limit" is set much larger #files = self._ftscallurl('/files?state_in=ACTIVE,SUBMITTED,READY&limit=%d' % self.max_pending_transfers) #return len(files) num_pending = 0 file_states = ['SUBMITTED', 'READY', 'ACTIVE', 'STAGING', 'STARTED'] jobs = self._ftscall('list_jobs', state_in = ['SUBMITTED', 'ACTIVE', 'STAGING']) from random import shuffle shuffle(jobs) total_count = 0 for job in jobs: total_count = total_count + 1 #LOG.info("List_files call 1") job_info = self._ftscall('get_job_status', job['job_id'], list_files = True) for file_info in job_info['files']: if file_info['file_state'] in file_states: num_pending += 1 if num_pending == self.max_pending_transfers + 1: # don't need to query more return num_pending return num_pending def num_pending_deletions(self): #override # See above #files = self._ftscallurl('/files?state_in=ACTIVE,SUBMITTED,READY&limit=%d' % self.max_pending_deletions) #return len(files) num_pending = 0 file_states = ['SUBMITTED', 'READY', 'ACTIVE'] jobs = self._ftscall('list_jobs', state_in = ['SUBMITTED', 'ACTIVE']) from random import shuffle shuffle(jobs) total_count = 0 for job in jobs: total_count = total_count + 1 #LOG.info("List_files call 2") job_info = self._ftscall('get_job_status', job['job_id'], list_files = True) for file_info in job_info['dm']: if file_info['file_state'] in file_states: num_pending += 1 if num_pending == self.max_pending_deletions + 1: # don't need to query more return num_pending return num_pending def form_batches(self, tasks): #override if len(tasks) == 0: return [] # FTS3 has no restriction on how to group the transfers, but cannot apparently take thousands # of tasks at once batches = [[]] for task in tasks: batches[-1].append(task) if len(batches[-1]) == self.batch_size: batches.append([]) return batches def start_transfers(self, batch_id, batch_tasks): #override result = {} stage_files = [] transfers = [] s_pfn_to_task = {} t_pfn_to_task = {} for task in batch_tasks: sub = task.subscription lfn = sub.file.lfn dest_pfn = sub.destination.to_pfn(lfn, 'gfal2') source_pfn = task.source.to_pfn(lfn, 'gfal2') self.x509proxy = sub.destination.x509proxy if task.source.storage_type == Site.TYPE_MSS: self.x509proxy = task.source.x509proxy if dest_pfn is None or source_pfn is None: # either gfal2 is not supported or lfn could not be mapped LOG.warning('Could not obtain PFN for %s at %s or %s', lfn, sub.destination.name, task.source.name) result[task] = False continue if self.checksum_algorithm: checksum = '%s:%s' % (self.checksum_algorithm, str(sub.file.checksum[self.checksum_index])) verify_checksum = 'target' else: checksum = None verify_checksum = False if task.source.storage_type == Site.TYPE_MSS: LOG.debug('Staging %s at %s', lfn, task.source.name) # need to stage first stage_files.append((source_pfn, dest_pfn, checksum, sub.file.size)) # task identified by the source PFN s_pfn_to_task[source_pfn] = task else: LOG.info("Here we are") LOG.info('Submitting transfer of %s from %s to %s to FTS', lfn, task.source.name, sub.destination.name) transfers.append(fts3.new_transfer(source_pfn, dest_pfn, checksum = checksum, filesize = sub.file.size)) # there should be only one task per destination pfn t_pfn_to_task[dest_pfn] = task if len(stage_files) != 0: LOG.debug('Submit new staging job for %d files', len(stage_files)) job = fts3.new_staging_job([ff[0] for ff in stage_files], bring_online = 36000, metadata = self.metadata_string) success = self._submit_job(job, 'staging', batch_id, dict((pfn, task.id) for pfn, task in s_pfn_to_task.iteritems()), x509=task.source.x509proxy) for source_pfn, _, _, _ in stage_files: result[s_pfn_to_task[source_pfn]] = success if success and not self._read_only: LOG.debug('Recording staging queue') fields = ('id', 'source', 'destination', 'checksum', 'size') mapping = lambda ff: (s_pfn_to_task[ff[0]].id,) + ff if not self._read_only: self.db.insert_many('fts_staging_queue', fields, mapping, stage_files) if len(transfers) != 0: LOG.debug('Submit new transfer job for %d files', len(transfers)) LOG.info("Submitting transfer job from disk to site %s with proxy %s." % (sub.destination.name, sub.destination.x509proxy)) job = fts3.new_job(transfers, retry = self.fts_retry, overwrite = True, verify_checksum = verify_checksum, metadata = self.metadata_string) success = self._submit_job(job, 'transfer', batch_id, dict((pfn, task.id) for pfn, task in t_pfn_to_task.iteritems()), x509=sub.destination.x509proxy) for transfer in transfers: dest_pfn = transfer['destinations'][0] result[t_pfn_to_task[dest_pfn]] = success return result def start_deletions(self, batch_id, batch_tasks): #override result = {} pfn_to_task = {} for task in batch_tasks: desub = task.desubscription lfn = desub.file.lfn pfn = desub.site.to_pfn(lfn, 'gfal2') if pfn is None: # either gfal2 is not supported or lfn could not be mapped result[task] = False continue # there should be only one task per destination pfn pfn_to_task[pfn] = task job = fts3.new_delete_job(pfn_to_task.keys(), metadata = self.metadata_string) success = self._submit_job(job, 'deletion', batch_id, dict((pfn, task.id) for pfn, task in pfn_to_task.iteritems())) for task in pfn_to_task.itervalues(): result[task] = success return result def cancel_transfers(self, task_ids): #override return self._cancel(task_ids, 'transfer') def cancel_deletions(self, task_ids): #override return self._cancel(task_ids, 'deletion') def cleanup(self): #override sql = 'DELETE FROM f USING `fts_transfer_tasks` AS f' sql += ' LEFT JOIN `transfer_tasks` AS t ON t.`id` = f.`id`' sql += ' LEFT JOIN `fts_transfer_batches` AS b ON b.`id` = f.`fts_batch_id`' sql += ' WHERE t.`id` IS NULL OR b.`id` IS NULL' self.db.query(sql) sql = 'DELETE FROM f USING `fts_staging_queue` AS f' sql += ' LEFT JOIN `fts_transfer_tasks` AS t ON t.`id` = f.`id`' sql += ' WHERE t.`id` IS NULL' self.db.query(sql) sql = 'DELETE FROM f USING `fts_deletion_tasks` AS f' sql += ' LEFT JOIN `deletion_tasks` AS t ON t.`id` = f.`id`' sql += ' LEFT JOIN `fts_deletion_batches` AS b ON b.`id` = f.`fts_batch_id`' sql += ' WHERE t.`id` IS NULL OR b.`id` IS NULL' self.db.query(sql) sql = 'DELETE FROM f USING `fts_transfer_batches` AS f' sql += ' LEFT JOIN `transfer_batches` AS t ON t.`id` = f.`batch_id`' sql += ' WHERE t.`id` IS NULL' self.db.query(sql) sql = 'DELETE FROM f USING `fts_deletion_batches` AS f' sql += ' LEFT JOIN `deletion_batches` AS t ON t.`id` = f.`batch_id`' sql += ' WHERE t.`id` IS NULL' self.db.query(sql) # Delete the source tasks - caution: wipes out all tasks when switching the operation backend sql = 'DELETE FROM t USING `transfer_tasks` AS t' sql += ' LEFT JOIN `fts_transfer_tasks` AS f ON f.`id` = t.`id`' sql += ' WHERE f.`id` IS NULL' self.db.query(sql) sql = 'DELETE FROM t USING `deletion_tasks` AS t' sql += ' LEFT JOIN `fts_deletion_tasks` AS f ON f.`id` = t.`id`' sql += ' WHERE f.`id` IS NULL' self.db.query(sql) def get_transfer_status(self, batch_id): #override if self.server_id == 0: self._set_server_id() results = self._get_status(batch_id, 'transfer') staged_tasks = [] for task_id, status, exitcode, msg, start_time, finish_time in self._get_status(batch_id, 'staging'): if status == FileQuery.STAT_DONE: staged_tasks.append(task_id) results.append((task_id, FileQuery.STAT_QUEUED, -1, None, None, None)) else: # these tasks won't appear in results from _get_status('transfer') # because no transfer jobs have been submitted yet results.append((task_id, status, exitcode, None, start_time, finish_time)) if len(staged_tasks) != 0: if self.checksum_algorithm: verify_checksum = 'target' else: verify_checksum = None which_dest = "" transfers = [] pfn_to_tid = {} for task_id, source_pfn, dest_pfn, checksum, filesize in self.db.select_many('fts_staging_queue', ('id', 'source', 'destination', 'checksum', 'size'), 'id', staged_tasks): for s in inventory.sites.itervalues(): if s.backend in dest_pfn and s.backend != "": which_dest = s #transfers = [] #pfn_to_tid = {} transfers.append(fts3.new_transfer(source_pfn, dest_pfn, checksum = checksum, filesize = filesize)) pfn_to_tid[dest_pfn] = task_id job = fts3.new_job(transfers, retry = self.fts_retry, overwrite = True, verify_checksum = verify_checksum, metadata = self.metadata_string) #which_dest = "" #for s in inventory.sites.itervalues(): # if s.backend in dest_pfn and s.backend != "": # which_dest = s LOG.info("Submitting job from buffer to site %s with proxy %s." % (which_dest.name, which_dest.x509proxy)) #LOG.info("dest_pfn: %s" % dest_pfn) success = self._submit_job(job, 'transfer', batch_id, pfn_to_tid, x509=which_dest.x509proxy) if success and not self._read_only: self.db.delete_many('fts_staging_queue', 'id', pfn_to_tid.values()) return results def get_deletion_status(self, batch_id): #override if self.server_id == 0: self._set_server_id() return self._get_status(batch_id, 'deletion') def write_transfer_history(self, history_db, task_id, history_id): #override self._write_history(history_db, task_id, history_id, 'transfer') def write_deletion_history(self, history_db, task_id, history_id): #override self._write_history(history_db, task_id, history_id, 'deletion') def forget_transfer_status(self, task_id): #override return self._forget_status(task_id, 'transfer') def forget_deletion_status(self, task_id): #override return self._forget_status(task_id, 'deletion') def forget_transfer_batch(self, task_id): #override return self._forget_batch(task_id, 'transfer') def forget_deletion_batch(self, task_id): #override return self._forget_batch(task_id, 'deletion') def _ftscall(self, method, *args, **kwd): return self._do_ftscall(binding = (method, args, kwd)) def _ftscallurl(self, url): # Call to FTS URLs that don't have python bindings return self._do_ftscall(url = url) def _do_ftscall(self, binding = None, url = None): proxy = self.x509proxy if binding is not None: method, args, kwd = binding for k in kwd.iteritems(): LOG.info(k) if k[0] == "ucert": LOG.info("Setting proxy to %s" % k[1]) proxy = k[1] if self._context is None: # request_class = Request -> use "requests"-based https call (instead of default PyCURL, # which may not be able to handle proxy certificates depending on the cURL installation) # verify = False -> do not verify the server certificate LOG.info("Setting context with proxy %s" % proxy) context = fts3.Context(self.server_url, ucert = proxy, ukey = proxy, request_class = Request, verify = False) if self.keep_context: self._context = context else: context = self._context if binding is not None: reqstring = binding[0] else: reqstring = url LOG.debug('FTS: %s', reqstring) wait_time = 1. for attempt in xrange(10): try: if binding is not None: method, args, kwd = binding if "ucert" in kwd: del kwd["ucert"] if "ukey" in kwd: del kwd["ukey"] return getattr(fts3, method)(context, *args, **kwd) else: return json.loads(context.get(url)) except fts_exceptions.ServerError as exc: if str(exc.reason) == '500': # Internal server error - let's try again pass except fts_exceptions.TryAgain: pass time.sleep(wait_time) wait_time *= 1.5 LOG.error('Failed to communicate with FTS server: %s', reqstring) raise RuntimeError('Failed to communicate with FTS server: %s' % reqstring) def _submit_job(self, job, optype, batch_id, pfn_to_tid, x509=None): if self._read_only: job_id = 'test' else: try: if x509 is not None: job_id = self._ftscall('submit', job, ucert=x509, ukey=x509) else: job_id = self._ftscall('submit', job) except: exc_type, exc, tb = sys.exc_info() LOG.error('Failed to submit %s to FTS: Exception %s (%s)', optype, exc_type.__name__, str(exc)) return False # list of file-level operations (one-to-one with pfn) try: if optype == 'transfer' or optype == 'staging': key = 'files' else: key = 'dm' #LOG.info("List_files call 3") fts_files = self._ftscall('get_job_status', job_id = job_id, list_files = True)[key] except: exc_type, exc, tb = sys.exc_info() LOG.error('Failed to get status of job %s from FTS: Exception %s (%s)', job_id, exc_type.__name__, str(exc)) return False if self.server_id == 0: self._set_server_id() if optype == 'transfer' or optype == 'staging': table_name = 'fts_transfer_batches' columns = ('batch_id', 'task_type', 'fts_server_id', 'job_id') values = (batch_id, optype, self.server_id, job_id) else: table_name = 'fts_deletion_batches' columns = ('batch_id', 'fts_server_id', 'job_id') values = (batch_id, self.server_id, job_id) if not self._read_only: fts_batch_id = self.db.insert_get_id(table_name, columns = columns, values = values) if optype == 'transfer' or optype == 'staging': table_name = 'fts_transfer_tasks' pfn_key = 'dest_surl' else: table_name = 'fts_deletion_tasks' pfn_key = 'source_surl' fields = ('id', 'fts_batch_id', 'fts_file_id') mapping = lambda f: (pfn_to_tid[f[pfn_key]], fts_batch_id, f['file_id']) if not self._read_only: self.db.insert_many(table_name, fields, mapping, fts_files, do_update = True, update_columns = ('fts_batch_id', 'fts_file_id')) return True def _cancel(self, task_ids, optype): sql = 'SELECT b.`job_id`, f.`fts_file_id` FROM `fts_{op}_tasks` AS f' sql += ' INNER JOIN `fts_{op}_batches` AS b ON b.`id` = f.`fts_batch_id`' result = self.db.execute_many(sql.format(op = optype), MySQL.bare('f.`id`'), task_ids) by_job = collections.defaultdict(list) for job_id, file_id in result: by_job[job_id].append(file_id) if not self._read_only: for job_id, ids in by_job.iteritems(): try: self._ftscall('cancel', job_id, file_ids = ids) except: LOG.error('Failed to cancel FTS job %s', job_id) def _get_status(self, batch_id, optype): if optype == 'transfer' or optype == 'staging': sql = 'SELECT `id`, `job_id` FROM `fts_transfer_batches`' sql += ' WHERE `task_type` = %s AND `fts_server_id` = %s AND `batch_id` = %s' batch_data = self.db.query(sql, optype, self.server_id, batch_id) task_table_name = 'fts_transfer_tasks' else: sql = 'SELECT `id`, `job_id` FROM `fts_deletion_batches`' sql += ' WHERE `fts_server_id` = %s AND `batch_id` = %s' batch_data = self.db.query(sql, self.server_id, batch_id) task_table_name = 'fts_deletion_tasks' message_pattern = re.compile('(?:DESTINATION|SOURCE|TRANSFER|DELETION) \[([0-9]+)\] (.*)') results = [] for fts_batch_id, job_id in batch_data: LOG.debug('Checking status of FTS %s batch %s', optype, job_id) sql = 'SELECT `fts_file_id`, `id` FROM `{table}` WHERE `fts_batch_id` = %s'.format(table = task_table_name) fts_to_task = dict(self.db.xquery(sql, fts_batch_id)) try: #LOG.info("List_files call 4") result = self._ftscall('get_job_status', job_id = job_id, list_files = True) except: LOG.error('Failed to get job status for FTS job %s', job_id) LOG.error(optype) continue if optype == 'transfer' or optype == 'staging': fts_files = result['files'] else: fts_files = result['dm'] for fts_file in fts_files: try: task_id = fts_to_task[fts_file['file_id']] except KeyError: continue state = fts_file['file_state'] exitcode = -1 start_time = None finish_time = None get_time = False try: message = fts_file['reason'] except KeyError: message = None if message is not None: # Check if reason follows a known format (from which we can get the exit code) matches = message_pattern.match(message) if matches is not None: exitcode = int(matches.group(1)) message = matches.group(2) # Additionally, if the message is a known one, convert the exit code c = find_msg_code(message) if c is not None: exitcode = c # HDFS site with gridftp-hdfs gives a I/O error (500) when the file is not there if optype == 'deletion' and 'Input/output error' in message: exitcode = errno.ENOENT if state == 'FINISHED': status = FileQuery.STAT_DONE exitcode = 0 get_time = True elif state == 'FAILED': status = FileQuery.STAT_FAILED get_time = True elif state == 'CANCELED': status = FileQuery.STAT_CANCELLED get_time = True elif state == 'SUBMITTED': status = FileQuery.STAT_NEW else: status = FileQuery.STAT_QUEUED if optype == 'transfer' and exitcode == errno.EEXIST: # Transfer + destination exists -> not an error status = FileQuery.STAT_DONE exitcode = 0 elif optype == 'deletion' and exitcode == errno.ENOENT: # Deletion + destination does not exist -> not an error status = FileQuery.STAT_DONE exitcode = 0 if get_time: try: start_time = calendar.timegm(time.strptime(fts_file['start_time'], '%Y-%m-%dT%H:%M:%S')) except TypeError: # start time is NULL (can happen when the job is cancelled) start_time = None try: finish_time = calendar.timegm(time.strptime(fts_file['finish_time'], '%Y-%m-%dT%H:%M:%S')) except TypeError: start_time = None LOG.debug('%s %d: %s, %d, %s, %s, %s', optype, task_id, FileQuery.status_name(status), exitcode, message, start_time, finish_time) results.append((task_id, status, exitcode, message, start_time, finish_time)) return results def _write_history(self, history_db, task_id, history_id, optype): if not self._read_only: history_db.db.insert_update('fts_servers', ('url',), self.server_url) try: server_id = history_db.db.query('SELECT `id` FROM `fts_servers` WHERE `url` = %s', self.server_url)[0] except IndexError: server_id = 0 sql = 'SELECT b.`job_id`, t.`fts_file_id` FROM `fts_{op}_tasks` AS t' sql += ' INNER JOIN `fts_{op}_batches` AS b ON b.`id` = t.`fts_batch_id`' sql += ' WHERE t.`id` = %s' try: fts_job_id, fts_file_id = self.db.query(sql.format(op = optype), task_id)[0] except IndexError: return if not self._read_only: history_db.db.insert_update('fts_batches', ('fts_server_id', 'job_id'), server_id, fts_job_id) batch_id = history_db.db.query('SELECT `id` FROM `fts_batches` WHERE `fts_server_id` = %s AND `job_id` = %s', server_id, fts_job_id)[0] history_db.db.insert_update('fts_file_{op}s'.format(op = optype), ('id', 'fts_batch_id', 'fts_file_id'), history_id, batch_id, fts_file_id) def _forget_status(self, task_id, optype): if self._read_only: return sql = 'DELETE FROM `fts_{optype}_tasks` WHERE `id` = %s'.format(optype = optype) self.db.query(sql, task_id) def _forget_batch(self, batch_id, optype): if self._read_only: return sql = 'DELETE FROM `fts_{optype}_batches` WHERE `batch_id` = %s'.format(optype = optype) self.db.query(sql, batch_id) def _set_server_id(self): if not self._read_only: self.db.query('INSERT INTO `fts_servers` (`url`) VALUES (%s) ON DUPLICATE KEY UPDATE `url`=VALUES(`url`)', self.server_url) result = self.db.query('SELECT `id` FROM `fts_servers` WHERE `url` = %s', self.server_url) if len(result) == 0: self.server_id = 0 else: self.server_id = result[0]
def __init__(self, config = None): if config is None: config = RLFSM._config # Handle to the inventory DB self.db = MySQL(config.db.db_params) # Handle to the history DB self.history_db = HistoryDatabase(config.get('history', None)) # FileTransferOperation backend (can make it a map from (source, dest) to operator) self.transfer_operations = [] if 'transfer' in config: for condition_text, module, conf in config.transfer: if condition_text is None: # default condition = None else: condition = Condition(condition_text, site_variables) self.transfer_operations.append((condition, FileTransferOperation.get_instance(module, conf))) if 'transfer_query' in config: self.transfer_queries = [] for condition_text, module, conf in config.transfer_query: if condition_text is None: # default condition = None else: condition = Condition(condition_text, site_variables) self.transfer_queries.append(condition, FileTransferQuery.get_instance(module, conf)) else: self.transfer_queries = self.transfer_operations if 'deletion' in config: self.deletion_operations = [] for condition_text, module, conf in config.deletion: if condition_text is None: # default condition = None else: condition = Condition(condition_text, site_variables) self.deletion_operations.append(condition, FileDeletionOperation.get_instance(module, conf)) else: self.deletion_operations = self.transfer_operations if 'deletion_query' in config: self.deletion_queries = [] for condition_text, module, conf in config.deletion_query: if condition_text is None: # default condition = None else: condition = Condition(condition_text, site_variables) self.deletion_queries.append(condition, FileDeletionQuery.get_instance(module, conf)) else: self.deletion_queries = self.deletion_operations self.sites_in_downtime = [] # Cycle thread self.main_cycle = None self.cycle_stop = threading.Event() self.set_read_only(config.get('read_only', False))
class DirectRequestsHandler(BaseHandler): """ Create dataset transfer proposals from direct user requests. """ def __init__(self): BaseHandler.__init__(self, 'Direct') self._mysql = MySQL(**config.registry.db_params) def release_requests(self, table, reqs2delete): array = [] for did, site in reqs2delete: array.append((did, site, 'copy')) self._mysql.delete_many(table, ('item', 'site', 'reqtype'), array) def get_requests(self, inventory, policy): # override self._mysql.query("LOCK TABLES `requests` WRITE") try: self._get_requests(inventory, policy) finally: self._mysql.query("UNLOCK TABLES") def _get_requests(self, inventory, policy): requests = {} newRequests = {} reqs2delete = [] unif2delete = [] reqs = self._mysql.query( "SELECT `reqid`, `item`, `site`, `rank`, `status`, `created` FROM `requests_unified`" ) for reqid, dset, target, rank, status, create_datetime in reqs: reqtime = int(time.mktime(create_datetime.timetuple())) request = UserRequest(dset, target, reqtime, True) request.reqid = reqid request.nrequests = rank request.status = status requests[(dset, target)] = request reps = inventory.datasets[dset].replicas fullreps = [i for i in reps if i.is_complete()] if len([i for i in fullreps if i.site.name == target]) != 0: logger.debug(dset) logger.debug(" request already done, trash it") unif2delete.append((dset, target)) reqs = self._mysql.query( "SELECT `item`, `site`, `reqtype`, `created` FROM `requests`") for dset, target, reqtype, create_datetime in reqs: if create_datetime != None: reqtime = int(time.mktime(create_datetime.timetuple())) else: reqtime = int(time.time()) #we only deal with copy requests here if reqtype != 'copy': logger.debug(dset) logger.debug(" ignoring non-copy request") continue #pass only reqyests with data known to inverntory if dset not in inventory.datasets: logger.debug(dset) logger.debug(" non existing dataset, trash it ") reqs2delete.append((dset, target)) continue #check that the full replicas exist anywhere reps = inventory.datasets[dset].replicas fullreps = [i for i in reps if i.is_complete()] if len(fullreps) < 1: logger.debug(dset) logger.debug(" no full replicas exist, ingnoring") continue #check if this dataset already exists in full at target site if len([i for i in fullreps if i.site.name == target]) != 0: logger.debug(dset) logger.debug(" request already done, trash it") reqs2delete.append((dset, target)) continue if (dset, target) not in requests: requests[(dset, target)] = UserRequest(dset, target) requests[(dset, target)].updateRequest(reqtime, False) for (dset, target), request in requests.items(): #if is_active true it means we already acting upon it #collapse all other requests and update the date if request.is_active: logger.debug(dset) logger.debug("master request is in") reqs2delete.append((dset, target)) for (dset, target), request in requests.items(): created = datetime.datetime.fromtimestamp(request.request_time()) if not request.is_active: newRequests[(dset, target)] = request elif request.updated: logger.debug("old request rank = " + str(request.nrequests)) sql = "UPDATE `requests_unified` SET `rank` = %d" % request.nrequests sql += " WHERE `reqid` = %d" % request.reqid self._mysql.query(sql) self.release_requests('requests', reqs2delete) self.release_requests('requests_unified', unif2delete) self.release_lock() for (dset, target), request in requests.items(): if request.is_active and request.status == 'new': newRequests[(dset, target)] = request datasets_to_request = [] logger.debug("\n attaching copy requests for datasets:") for req in sorted(newRequests.values(), key=lambda x: x.nrequests, reverse=True): ds = inventory.datasets[req.dataset] logger.debug(ds.name) datasets_to_request.append((ds, inventory.sites[req.site])) return datasets_to_request
class MySQLAppManager(AppManager): def __init__(self, config): AppManager.__init__(self, config) if not hasattr(self, '_mysql'): db_params = Configuration(config.db_params) db_params.reuse_connection = True # we use locks self._mysql = MySQL(db_params) # make sure applications row with id 0 exists count = self._mysql.query( 'SELECT COUNT(*) FROM `applications` WHERE `id` = 0')[0] if count == 0: # Cannot insert with id = 0 (will be interpreted as next auto_increment id unless server-wide setting is changed) # Inesrt with an implicit id first and update later columns = ('auth_level', 'title', 'path', 'status', 'user_id', 'user_host') values = (AppManager.LV_WRITE, 'wsgi', '', 'done', 0, '') insert_id = self._mysql.insert_get_id('applications', columns=columns, values=values) self._mysql.query( 'UPDATE `applications` SET `id` = 0 WHERE `id` = %s', insert_id) def get_applications(self, older_than=0, status=None, app_id=None, path=None): #override sql = 'SELECT `applications`.`id`, 0+`applications`.`auth_level`, `applications`.`title`, `applications`.`path`, `applications`.`args`,' sql += ' `applications`.`timeout`, 0+`applications`.`status`, `applications`.`server`, `applications`.`exit_code`, `users`.`name`, `applications`.`user_host`' sql += ' FROM `applications` INNER JOIN `users` ON `users`.`id` = `applications`.`user_id`' constraints = [] args = [] if older_than > 0: constraints.append( 'UNIX_TIMESTAMP(`applications`.`timestamp`) < %s') args.append(older_than) if status is not None: constraints.append('`applications`.`status` = %s') args.append(status) if app_id is not None: constraints.append('`applications`.`id` = %s') args.append(app_id) if path is not None: constraints.append('`applications`.`path` = %s') args.append(path) if len(constraints) != 0: sql += ' WHERE ' + ' AND '.join(constraints) args = tuple(args) applications = [] for aid, auth_level, title, path, args, timeout, status, server, exit_code, uname, uhost in self._mysql.xquery( sql, *args): applications.append({ 'appid': aid, 'auth_level': auth_level, 'user_name': uname, 'user_host': uhost, 'title': title, 'path': path, 'args': args, 'timeout': timeout, 'status': int(status), 'server': server, 'exit_code': exit_code }) return applications def get_writing_process_id(self): #override result = self._mysql.query( 'SELECT `id` FROM `applications` WHERE `auth_level` = \'write\' AND `status` IN (\'assigned\', \'run\')' ) if len(result) == 0: return None else: return result[0] def get_writing_process_host(self): #override result = self._mysql.query( 'SELECT `server` FROM `applications` WHERE `auth_level` = \'write\' AND `status` IN (\'assigned\', \'run\')' ) if len(result) == 0: return None else: return result[0] def get_web_write_process_id(self): #override # user_id is repurposed for web server suprocess PID return self._mysql.query( 'SELECT `user_id` FROM `applications` WHERE `id` = 0')[0] def get_running_processes(self): #override sql = 'SELECT `title`, 0+`auth_level`, `server`, UNIX_TIMESTAMP(`timestamp`) FROM `applications` WHERE `status` = \'run\'' result = [] for title, auth_level, server, timestamp in self._mysql.xquery(sql): result.append((title, auth_level, server, timestamp)) return result def schedule_application(self, title, path, args, user_id, host, auth_level, timeout): #override columns = ('auth_level', 'title', 'path', 'args', 'timeout', 'user_id', 'user_host') values = (auth_level, title, path, args, timeout, user_id, host) return self._mysql.insert_get_id('applications', columns=columns, values=values) def _do_get_next_application(self, read_only, blocked_apps): #override sql = 'SELECT `applications`.`id`, 0+`auth_level`, `title`, `path`, `args`, `timeout`, `users`.`name`, `user_host` FROM `applications`' sql += ' INNER JOIN `users` ON `users`.`id` = `applications`.`user_id`' sql += ' WHERE `status` = \'new\'' if read_only: sql += ' AND `auth_level` != \'write\'' if len(blocked_apps) != 0: sql += ' AND `title` NOT IN %s' % MySQL.stringify_sequence( blocked_apps) sql += ' ORDER BY `applications`.`id` LIMIT 1' result = self._mysql.query(sql) if len(result) == 0: return None else: appid, auth_level, title, path, args, timeout, uname, uhost = result[ 0] return { 'appid': appid, 'auth_level': auth_level, 'user_name': uname, 'user_host': uhost, 'title': title, 'path': path, 'args': args, 'timeout': timeout } def update_application(self, app_id, **kwd): #override sql = 'UPDATE `applications` SET ' args = [] updates = [] if 'status' in kwd: updates.append('`status` = %s') args.append(AppManager.status_name(kwd['status'])) if 'hostname' in kwd: updates.append('`server` = %s') args.append(kwd['hostname']) if 'exit_code' in kwd: updates.append('`exit_code` = %s') args.append(kwd['exit_code']) if 'path' in kwd: updates.append('`path` = %s') args.append(kwd['path']) sql += ', '.join(updates) sql += ' WHERE `id` = %s' args.append(app_id) self._mysql.query(sql, *tuple(args)) def delete_application(self, app_id): #override self._mysql.query('DELETE FROM `applications` WHERE `id` = %s', app_id) def start_write_web(self, host, pid): #override # repurposing user_id for pid sql = 'UPDATE `applications` SET `status` = \'run\', `server` = %s, `user_host` = %s, `user_id` = %s, `timestamp` = NOW() WHERE `id` = 0' self._mysql.query(sql, host, host, pid) def stop_write_web(self): #override # We don't actually use the host name because there is only one slot for web write anyway sql = 'UPDATE `applications` SET `status` = \'done\', `server` = \'\', `user_host` = \'\', `user_id` = 0 WHERE `id` = 0' self._mysql.query(sql) def check_application_auth(self, title, user, checksum): #override result = self._mysql.query( 'SELECT `id` FROM `users` WHERE `name` = %s', user) if len(result) == 0: return False user_id = result[0] sql = 'SELECT `user_id` FROM `authorized_applications` WHERE `title` = %s AND `checksum` = UNHEX(%s)' for auth_user_id in self._mysql.query(sql, title, checksum): if auth_user_id == 0 or auth_user_id == user_id: return True return False def list_authorized_applications(self, titles=None, users=None, checksums=None): #override sql = 'SELECT a.`title`, u.`name`, HEX(a.`checksum`) FROM `authorized_applications` AS a' sql += ' LEFT JOIN `users` AS u ON u.`id` = a.`user_id`' constraints = [] args = [] if type(titles) is list: constraints.append('a.`title` IN (%s)' % ','.join(['%s'] * len(titles))) args.extend(titles) if type(users) is list: constraints.append('u.`name` IN (%s)' % ','.join(['%s'] * len(users))) args.extend(users) if type(checksums) is list: constraints.append('a.`checksum` IN (%s)' % ','.join(['UNHEX(%s)'] * len(checksums))) args.extend(checksums) if len(constraints) != 0: sql += ' WHERE ' + ' AND '.join(constraints) return self._mysql.query(sql, *tuple(args)) def authorize_application(self, title, checksum, user=None): #override sql = 'INSERT INTO `authorized_applications` (`user_id`, `title`, `checksum`)' if user is None: sql += ' VALUES (0, %s, UNHEX(%s))' args = (title, checksum) else: sql += ' SELECT u.`id`, %s, UNHEX(%s) FROM `users` AS u WHERE u.`name` = %s' args = (title, checksum, user) inserted = self._mysql.query(sql, *args) return inserted != 0 def revoke_application_authorization(self, title, user=None): #override sql = 'DELETE FROM `authorized_applications` WHERE (`user_id`, `title`) =' if user is None: sql += ' (0, %s)' args = (title, ) else: sql += ' (SELECT u.`id`, %s FROM `users` AS u WHERE u.`name` = %s)' args = (title, user) deleted = self._mysql.query(sql, *args) return deleted != 0 def register_sequence(self, name, user, restart=False): #override sql = 'INSERT INTO `application_sequences` (`name`, `user_id`, `restart`) SELECT %s, `id`, %s FROM `users` WHERE `name` = %s' inserted = self._mysql.query(sql, name, 1 if restart else 0, user) return inserted != 0 def find_sequence(self, name): #override sql = 'SELECT u.`name`, s.`restart`, s.`status` FROM `application_sequences` AS s' sql += ' INNER JOIN `users` AS u ON u.`id` = s.`user_id`' sql += ' WHERE s.`name` = %s' try: user, restart, status = self._mysql.query(sql, name)[0] except IndexError: return None return (name, user, (restart != 0), status == 'enabled') def update_sequence(self, name, restart=None, enabled=None): #override if restart is None and enabled is None: return True changes = [] args = [] if restart is not None: changes.append('`restart` = %s') args.append(1 if restart else 0) if enabled is not None: changes.append('`status` = %s') args.append('enabled' if enabled else 'disabled') args.append(name) sql = 'UPDATE `application_sequences` SET ' + ', '.join( changes) + ' WHERE `name` = %s' updated = self._mysql.query(sql, *tuple(args)) return updated != 0 def delete_sequence(self, name): #override sql = 'DELETE FROM `application_sequences` WHERE `name` = %s' deleted = self._mysql.query(sql, name) return deleted != 0 def get_sequences(self, enabled_only=True): #override sql = 'SELECT `name` FROM `application_sequences`' if enabled_only: sql += ' WHERE `status` = \'enabled\'' return self._mysql.query(sql) def create_appmanager(self): #override if self.readonly_config is None: db_params = self._mysql.config() else: db_params = self.readonly_config.db_params config = Configuration(db_params=db_params) return MySQLAppManager(config)
class RegistryDatabase(object): """ Similar to HistoryDatabase, this is just one abstraction layer that doesn't really hide the backend technology for the registry. We still have the benefit of being able to use default parameters to initialize the registry database handle. """ # default configuration _config = Configuration() @staticmethod def set_default(config): RegistryDatabase._config = Configuration(config) def __init__(self, config = None): if config is None: config = RegistryDatabase._config self.db = MySQL(config.db_params) self.set_read_only(config.get('read_only', False)) def set_read_only(self, value = True): self._read_only = True def get_locked_apps(self): sql = 'SELECT DISTINCT `application` FROM `activity_lock`' return self.db.query(sql) def get_app_lock(self, app): # this function can be called within a table lock, so we need to lock what we use self.db.lock_tables(read = [('activity_lock', 'l'), ('user_services', 's')]) sql = 'SELECT l.`user`, s.`name`, UNIX_TIMESTAMP(l.`timestamp`), l.`note` FROM `activity_lock` AS l' sql += ' LEFT JOIN `user_services` AS s ON s.`id` = l.`service_id`' sql += ' WHERE l.`application` = %s ORDER BY l.`timestamp` ASC'; lock_data = self.db.query(sql, app) self.db.unlock_tables() if len(lock_data) == 0: return None, None, None, None, 0 first_user, first_service, lock_time, note = lock_data[0] depth = 1 for user, service, _, _ in lock_data[1:]: if user == first_user and service == first_service: depth += 1 return first_user, first_service, lock_time, note, depth def lock_app(self, app, user, service = None, note = None): if service is None: service_id = 0 else: try: sql = 'SELECT `id` FROM `user_services` WHERE `name` = %s' service_id = self.db.query(sql, service)[0] except IndexError: service_id = 0 sql = 'INSERT INTO `activity_lock` (`user`, `service_id`, `application`, `timestamp`, `note`)' sql += ' VALUES (%s, %s, %s, NOW(), %s)' self.db.query(sql, user, service_id, app, note) def unlock_app(self, app, user, service = None): if service is None: service_id = 0 else: try: sql = 'SELECT `id` FROM `user_services` WHERE `name` = %s' service_id = self.db.query(sql, service)[0] except IndexError: service_id = 0 self.db.lock_tables(write = ['activity_lock', ('activity_lock', 'l')]) sql = 'DELETE FROM `activity_lock` WHERE `id` = (' sql += ' SELECT m FROM (' sql += ' SELECT MAX(`id`) m FROM `activity_lock` AS l' sql += ' WHERE `user` = %s AND `service_id` = %s AND `application` = %s' sql += ' ) AS tmp' sql += ')' self.db.query(sql, user, service_id, app) # a little cleanup if self.db.query('SELECT COUNT(*) FROM `activity_lock`')[0] == 0: self.db.query('ALTER TABLE `activity_lock` AUTO_INCREMENT = 1') self.db.unlock_tables()
import numpy as np import pandas as pd from dynamo.dataformat import Configuration from dynamo.utils.interface.mysql import MySQL from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot # # # # # # # # # Query part # # # # # # # # # nowtime = datetime.datetime.fromtimestamp(int(time.time())).strftime('%Y-%m-%d %H:%M:%S') # Dynamo database: inbatch dynamo = MySQL(Configuration(db = 'dynamo', user = '******', passwd = 'putpasswordhere')) sites = dynamo.xquery('SELECT `id`, `name` FROM `sites`') sitesdict = {} for ide, name in sites: sitesdict[ide] = name transfers = dynamo.xquery('SELECT tt.`source_id`, fs.`site_id`, f.`size` FROM `transfer_tasks` AS tt INNER JOIN file_subscriptions AS fs ON fs.`id` = tt.`subscription_id` INNER JOIN files as f on f.`id` = fs.`file_id` WHERE fs.`status`="inbatch" and fs.`delete`=0') # Dynamo history database: exitcode dynamohistory = MySQL(Configuration(db = 'dynamohistory', user = '******', passwd = 'putpasswordhere')) historysites = dynamohistory.xquery('SELECT `id`, `name` FROM `sites`') historysitesdict = {} for ide, name in historysites: historysitesdict[ide] = name historyxfers = dynamohistory.xquery('SELECT `source_id`, `destination_id`, `exitcode` FROM `file_transfers` WHERE `source_id` != 0 AND `completed` > NOW() - INTERVAL 7 DAY')
def make_entry(self, site_name): if self._read_only: operation_id = 0 else: site_id = self.save_sites([site_name], get_ids = True)[0] operation_id = self.db.insert_get_id('copy_operations', columns = ('timestamp', 'site_id'), values = (MySQL.bare('NOW()'), site_id)) return HistoryRecord(HistoryRecord.OP_COPY, operation_id, site_name, int(time.time()))
class HistoryDatabase(object): """ Interface to the history database. This is a MySQL-specific implementation, and we actually expose the backend database.. Will be a bit tricky to replace the backend when we need to do it. What we do with the history DB is very much application specific, so it makes little sense to have generic abstract interface to individual actions. The methods of this class are just a few of the common operations that are necessary for any history recording. """ # default configuration _config = Configuration() @staticmethod def set_default(config): HistoryDatabase._config = Configuration(config) def __init__(self, config = None): if config is None: config = HistoryDatabase._config self.db = MySQL(config.db_params) self.set_read_only(config.get('read_only', False)) def set_read_only(self, value = True): self._read_only = value def save_users(self, user_list, get_ids = False): """ @param user_list [(name, dn)] """ if self._read_only: if get_ids: return [0] * len(user_list) else: return self.db.insert_many('users', ('name', 'dn'), None, user_list, do_update = True) if get_ids: return self.db.select_many('users', ('id',), 'dn', [u[1] for u in user_list]) def save_user_services(self, service_names, get_ids = False): if self._read_only: if get_ids: return [0] * len(service_names) else: return self.db.insert_many('user_services', ('name',), MySQL.make_tuple, service_names, do_update = True) if get_ids: return self.db.select_many('user_services', ('id',), 'name', service_names) def save_partitions(self, partition_names, get_ids = False): if self._read_only: if get_ids: return [0] * len(partition_names) else: return self.db.insert_many('partitions', ('name',), MySQL.make_tuple, partition_names, do_update = True) if get_ids: return self.db.select_many('partitions', ('id',), 'name', partition_names) def save_sites(self, site_names, get_ids = False): if self._read_only: if get_ids: return [0] * len(site_names) else: return self.db.insert_many('sites', ('name',), MySQL.make_tuple, site_names, do_update = True) if get_ids: return self.db.select_many('sites', ('id',), 'name', site_names) def save_groups(self, group_names, get_ids = False): if self._read_only: if get_ids: return [0] * len(group_names) else: return self.db.insert_many('groups', ('name',), MySQL.make_tuple, group_names, do_update = True) if get_ids: return self.db.select_many('groups', ('id',), 'name', group_names) def save_datasets(self, dataset_names, get_ids = False): if self._read_only: if get_ids: return [0] * len(dataset_names) else: return self.db.insert_many('datasets', ('name',), MySQL.make_tuple, dataset_names, do_update = True) if get_ids: return self.db.select_many('datasets', ('id',), 'name', dataset_names) def save_blocks(self, block_list, get_ids = False): """ @param block_list [(dataset name, block name)] """ if self._read_only: if get_ids: return [0] * len(block_list) else: return reuse_orig = self.db.reuse_connection self.db.reuse_connection = True datasets = set(b[0] for b in block_list) self.save_datasets(datasets) columns = [ '`dataset` varchar(512) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL', '`block` varchar(128) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL' ] self.db.create_tmp_table('blocks_tmp', columns) self.db.insert_many('blocks_tmp', ('dataset', 'block'), None, block_list, db = self.db.scratch_db) sql = 'INSERT INTO `blocks` (`dataset_id`, `name`)' sql += ' SELECT d.`id`, b.`block` FROM `{scratch}`.`blocks_tmp` AS b'.format(scratch = self.db.scratch_db) sql += ' INNER JOIN `datasets` AS d ON d.`name` = b.`dataset`' self.db.query(sql) if get_ids: sql = 'SELECT b.`id` FROM `blocks` AS b' sql += ' INNER JOIN (SELECT d.`id` dataset_id, t.`block` block_name FROM `{scratch}`.`blocks_tmp` AS t'.format(scratch = self.db.scratch_db) sql += ' INNER JOIN `datasets` AS d ON d.`name` = t.`dataset`) AS j ON (j.`dataset_id`, j.`block_name`) = (b.`dataset_id`, b.`name`)' ids = self.db.query(sql) self.db.drop_tmp_table('blocks_tmp') self.db.reuse_connection = reuse_orig if get_ids: return ids def save_files(self, file_data, get_ids = False): if self._read_only: if get_ids: return [0] * len(file_data) else: return self.db.insert_many('files', ('name', 'size'), None, file_data, do_update = True) if get_ids: return self.db.select_many('files', ('id',), 'name', [f[0] for f in file_data])
class HistoryDatabase(object): """ Interface to the history database. This is a MySQL-specific implementation, and we actually expose the backend database.. Will be a bit tricky to replace the backend when we need to do it. What we do with the history DB is very much application specific, so it makes little sense to have generic abstract interface to individual actions. The methods of this class are just a few of the common operations that are necessary for any history recording. """ # default configuration _config = Configuration() @staticmethod def set_default(config): HistoryDatabase._config = Configuration(config) def __init__(self, config=None): if config is None: config = HistoryDatabase._config self.db = MySQL(config.db_params) self.set_read_only(config.get('read_only', False)) def set_read_only(self, value=True): self._read_only = value def save_users(self, user_list, get_ids=False): """ @param user_list [(name, dn)] """ if self._read_only: if get_ids: return [0] * len(user_list) else: return self.db.insert_many('users', ('name', 'dn'), None, user_list, do_update=True) if get_ids: return self.db.select_many('users', ('id', ), 'dn', [u[1] for u in user_list]) def save_user_services(self, service_names, get_ids=False): if self._read_only: if get_ids: return [0] * len(service_names) else: return self.db.insert_many('user_services', ('name', ), MySQL.make_tuple, service_names, do_update=True) if get_ids: return self.db.select_many('user_services', ('id', ), 'name', service_names) def save_partitions(self, partition_names, get_ids=False): if self._read_only: if get_ids: return [0] * len(partition_names) else: return self.db.insert_many('partitions', ('name', ), MySQL.make_tuple, partition_names, do_update=True) if get_ids: return self.db.select_many('partitions', ('id', ), 'name', partition_names) def save_sites(self, site_names, get_ids=False): if self._read_only: if get_ids: return [0] * len(site_names) else: return self.db.insert_many('sites', ('name', ), MySQL.make_tuple, site_names, do_update=True) if get_ids: return self.db.select_many('sites', ('id', ), 'name', site_names) def save_groups(self, group_names, get_ids=False): if self._read_only: if get_ids: return [0] * len(group_names) else: return self.db.insert_many('groups', ('name', ), MySQL.make_tuple, group_names, do_update=True) if get_ids: return self.db.select_many('groups', ('id', ), 'name', group_names) def save_datasets(self, dataset_names, get_ids=False): if self._read_only: if get_ids: return [0] * len(dataset_names) else: return self.db.insert_many('datasets', ('name', ), MySQL.make_tuple, dataset_names, do_update=True) if get_ids: return self.db.select_many('datasets', ('id', ), 'name', dataset_names) def save_blocks(self, block_list, get_ids=False): """ @param block_list [(dataset name, block name)] """ if self._read_only: if get_ids: return [0] * len(block_list) else: return reuse_orig = self.db.reuse_connection self.db.reuse_connection = True datasets = set(b[0] for b in block_list) self.save_datasets(datasets) columns = [ '`dataset` varchar(512) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL', '`block` varchar(128) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL' ] self.db.create_tmp_table('blocks_tmp', columns) self.db.insert_many('blocks_tmp', ('dataset', 'block'), None, block_list, db=self.db.scratch_db) sql = 'INSERT INTO `blocks` (`dataset_id`, `name`)' sql += ' SELECT d.`id`, b.`block` FROM `{scratch}`.`blocks_tmp` AS b'.format( scratch=self.db.scratch_db) sql += ' INNER JOIN `datasets` AS d ON d.`name` = b.`dataset`' self.db.query(sql) if get_ids: sql = 'SELECT b.`id` FROM `blocks` AS b' sql += ' INNER JOIN (SELECT d.`id` dataset_id, t.`block` block_name FROM `{scratch}`.`blocks_tmp` AS t'.format( scratch=self.db.scratch_db) sql += ' INNER JOIN `datasets` AS d ON d.`name` = t.`dataset`) AS j ON (j.`dataset_id`, j.`block_name`) = (b.`dataset_id`, b.`name`)' ids = self.db.query(sql) self.db.drop_tmp_table('blocks_tmp') self.db.reuse_connection = reuse_orig if get_ids: return ids def save_files(self, file_data, get_ids=False): if self._read_only: if get_ids: return [0] * len(file_data) else: return self.db.insert_many('files', ('name', 'size'), None, file_data, do_update=True) if get_ids: return self.db.select_many('files', ('id', ), 'name', [f[0] for f in file_data])
class MySQLReplicaLock(object): """ Dataset lock read from local DB. Sets one attr: locked_blocks: {site: set([blocks]) or None if dataset-level} """ produces = ['locked_blocks'] _default_config = None @staticmethod def set_default(config): MySQLReplicaLock._default_config = Configuration(config) def __init__(self, config = None): if config is None: if MySQLReplicaLock._default_config is None: raise ConfigurationError('MySQLReplicaLock default config is not set') config = MySQLReplicaLock._default_config self._mysql = MySQL(config.get('db_params', None)) self.users = [] for user_id, role_id in config.get('users', []): self.users.append((user_id, role_id)) def load(self, inventory): for dataset in inventory.datasets.itervalues(): try: dataset.attr.pop('locked_blocks') except KeyError: pass if len(self.users) != 0: entries = self._mysql.select_many('detox_locks', ('item', 'sites', 'groups'), ('user_id', 'role_id'), self.users) else: query = 'SELECT `item`, `sites`, `groups` FROM `detox_locks`' entries = self._mysql.query(query) for item_name, sites_pattern, groups_pattern in entries: # wildcard not allowed in block name try: dataset_pattern, block_name = Block.from_full_name(item_name) except ObjectError: dataset_pattern, block_name = item_name, None if '*' in dataset_pattern: pat_exp = re.compile(fnmatch.translate(dataset_pattern)) datasets = [] for dataset in inventory.datasets.values(): # this is highly inefficient but I can't think of a better way if pat_exp.match(dataset.name): datasets.append(dataset) else: try: dataset = inventory.datasets[dataset_pattern] except KeyError: LOG.debug('Cannot lock unknown dataset %s', dataset_pattern) continue datasets = [dataset] specified_sites = [] if sites_pattern: if sites_pattern == '*': pass elif '*' in sites_pattern: pat_exp = re.compile(fnmatch.translate(sites_pattern)) specified_sites.extend(s for n, s in inventory.sites.iteritems() if pat_exp.match(n)) else: try: specified_sites.append(inventory.sites[sites_pattern]) except KeyError: pass specified_groups = [] if groups_pattern: if groups_pattern == '*': pass elif '*' in groups_pattern: pat_exp = re.compile(fnmatch.translate(groups_pattern)) specified_groups.extend(g for n, g in inventory.groups.iteritems() if pat_exp.match(n)) else: try: specified_groups.append(inventory.groups[groups_pattern]) except KeyError: pass for dataset in datasets: sites = set(specified_sites) groups = set(specified_groups) if len(sites) == 0: # either sites_pattern was not given (global lock) or no sites matched (typo?) # we will treat this as a global lock sites.update(r.site for r in dataset.replicas) if len(groups) == 0: # if no group matches the pattern, we will be on the safe side and treat it as a global lock for replica in dataset.replicas: groups.update(brep.group for brep in replica.block_replicas) try: locked_blocks = dataset.attr['locked_blocks'] except KeyError: locked_blocks = dataset.attr['locked_blocks'] = {} if block_name is None: for replica in dataset.replicas: if replica.site not in sites: continue if replica.site not in locked_blocks: locked_blocks[replica.site] = set() for block_replica in replica.block_replicas: if block_replica.group not in groups: continue locked_blocks[replica.site].add(block_replica.block) else: block = dataset.find_block(block_name) if block is None: LOG.debug('Cannot lock unknown block %s', block_name) continue for replica in block.replicas: if replica.site not in sites: continue if replica.group not in groups: continue if replica.site not in locked_blocks: locked_blocks[replica.site] = set([block]) else: locked_blocks[replica.site].add(block) for dataset in inventory.datasets.itervalues(): try: locked_blocks = dataset.attr['locked_blocks'] except KeyError: continue for site, blocks in locked_blocks.items(): if blocks is None: continue # if all blocks are locked, set to None (dataset-level lock) if blocks == dataset.blocks: locked_blocks[site] = None LOG.info('Locked %d items.', len(entries))
class MySQLAuthorizer(Authorizer): def __init__(self, config): Authorizer.__init__(self, config) if not hasattr(self, '_mysql'): db_params = Configuration(config.db_params) db_params.reuse_connection = True # we use locks self._mysql = MySQL(db_params) def user_exists(self, name): result = self._mysql.query('SELECT COUNT(*) FROM `users` WHERE `name` = %s', name)[0] return result != 0 def list_users(self): return self._mysql.query('SELECT `name`, `email`, `dn` FROM `users` ORDER BY `id`') def identify_user(self, dn = '', check_trunc = False, name = '', uid = None): #override if dn: result = self._mysql.query('SELECT `name`, `id`, `dn` FROM `users` WHERE `dn` = %s', dn) if check_trunc and len(result) == 0: while dn: dn = dn[:dn.rfind('/')] result = self._mysql.query('SELECT `name`, `id`, `dn` FROM `users` WHERE `dn` = %s', dn) if len(result) != 0: break elif name: result = self._mysql.query('SELECT `name`, `id`, `dn` FROM `users` WHERE `name` = %s', name) else: result = self._mysql.query('SELECT `name`, `id`, `dn` FROM `users` WHERE `id` = %s', uid) if len(result) == 0: return None else: return (result[0][0], int(result[0][1]), result[0][2]) def identify_role(self, name): #override try: name, rid = self._mysql.query('SELECT `name`, `id` FROM `roles` WHERE `name` = %s', name)[0] except IndexError: return None else: return (name, int(rid)) def list_roles(self): return self._mysql.query('SELECT `name` FROM `roles`') def list_authorization_targets(self): #override sql = 'SELECT SUBSTRING(COLUMN_TYPE, 5) FROM `information_schema`.`COLUMNS`' sql += ' WHERE `TABLE_SCHEMA` = \'dynamoserver\' AND `TABLE_NAME` = \'user_authorizations\' AND `COLUMN_NAME` = \'target\''; result = self._mysql.query(sql)[0] # eval the results as a python tuple return list(eval(result)) def check_user_auth(self, user, role, target): #override sql = 'SELECT `target` FROM `user_authorizations` WHERE `user_id` = (SELECT `id` FROM `users` WHERE `name` = %s) AND' args = (user,) if role is None: sql += ' `role_id` = 0' else: sql += ' `role_id` = (SELECT `id` FROM `roles` WHERE `name` = %s)' args += (role,) targets = self._mysql.query(sql, *args) if target is None: return len(targets) != 0 else: return target in targets def list_user_auth(self, user): #override sql = 'SELECT r.`name`, a.`target` FROM `user_authorizations` AS a' sql += ' LEFT JOIN `roles` AS r ON r.`id` = a.`role_id`' sql += ' WHERE a.`user_id` = (SELECT `id` FROM `users` WHERE `name` = %s)' return self._mysql.query(sql, user) def list_authorized_users(self, target): #override sql = 'SELECT u.`name`, s.`name` FROM `user_authorizations` AS a' sql += ' INNER JOIN `users` AS u ON u.`id` = a.`user_id`' sql += ' INNER JOIN `roles` AS s ON s.`id` = a.`role_id`' args = tuple() if target is not None: sql += ' WHERE a.`target` = %s' args = (target,) return self._mysql.query(sql, *args) def create_authorizer(self): #override if self.readonly_config is None: db_params = self._mysql.config() else: db_params = self.readonly_config.db_params config = Configuration(db_params = db_params) return MySQLAuthorizer(config)