def __init__(self, config): AppManager.__init__(self, config) if not hasattr(self, '_mysql'): db_params = Configuration(config.db_params) db_params.reuse_connection = True # we use locks self._mysql = MySQL(db_params) # make sure applications row with id 0 exists count = self._mysql.query( 'SELECT COUNT(*) FROM `applications` WHERE `id` = 0')[0] if count == 0: # Cannot insert with id = 0 (will be interpreted as next auto_increment id unless server-wide setting is changed) # Inesrt with an implicit id first and update later columns = ('auth_level', 'title', 'path', 'status', 'user_id', 'user_host') values = (AppManager.LV_WRITE, 'wsgi', '', 'done', 0, '') insert_id = self._mysql.insert_get_id('applications', columns=columns, values=values) self._mysql.query( 'UPDATE `applications` SET `id` = 0 WHERE `id` = %s', insert_id)
def __init__(self, config): UpdateBoard.__init__(self, config) db_params = Configuration(config.db_params) db_params.reuse_connection = True # we use locks self._mysql = MySQL(db_params)
def __init__(self, config=None): config = Configuration(config) if 'user' in config: user = config.user else: user = MySQL._default_config.default_user try: self._connection_parameters = dict(MySQL._default_parameters[user]) except KeyError: self._connection_parameters = {'user': user} if 'config_file' in config and 'config_group' in config: parser = ConfigParser() parser.read(config['config_file']) group = config['config_group'] for ckey, key in [('host', 'host'), ('user', 'user'), ('password', 'passwd'), ('db', 'db')]: try: self._connection_parameters[key] = parser.get(group, ckey) except: pass if 'host' in config: self._connection_parameters['host'] = config['host'] if 'passwd' in config: self._connection_parameters['passwd'] = config['passwd'] if 'db' in config: self._connection_parameters['db'] = config['db'] self._connection = None # Avoid interference in case the module is used from multiple threads self._connection_lock = multiprocessing.RLock() # MySQL tables can be locked by multiple statements but are unlocked with one. # In nested functions with each one locking different tables, we need to call UNLOCK TABLES # only after the outermost function asks for it. self._locked_tables = [] # Use with care! If False, table locks and temporary tables cannot be used self.reuse_connection = config.get( 'reuse_connection', MySQL._default_config.get('reuse_connection', True)) # Default 1M characters self.max_query_len = config.get( 'max_query_len', MySQL._default_config.get('max_query_len', 1000000)) # Default database for CREATE TEMPORARY TABLE self.scratch_db = config.get( 'scratch_db', MySQL._default_config.get('scratch_db', '')) # Row id of the last insertion. Will be nonzero if the table has an auto-increment primary key. # **NOTE** While core execution of query() and xquery() are locked and thread-safe, last_insert_id is not. # Use insert_and_get_id() in a threaded environment. self.last_insert_id = 0
def __init__(self, config=None): if config is None: config = Configuration() DatasetInfoSource.__init__(self, config) self._phedex = PhEDEx(config.get('phedex', None)) self._dbs = DBS(config.get('dbs', None))
def __init__(self, config): Authorizer.__init__(self, config) if not hasattr(self, '_mysql'): db_params = Configuration(config.db_params) db_params.reuse_connection = True # we use locks self._mysql = MySQL(db_params)
def __init__(self, config=None): if config is None: config = Configuration() ReplicaInfoSource.__init__(self, config) self._phedex = PhEDEx(config.get('phedex', None)) self._parallelizer_config = config
def __init__(self, config=None): config = Configuration(config) config.auth_handler = 'HTTPSCertKeyHandler' if 'url_base' not in config: config.url_base = DBS._url_base if 'num_attempts' not in config: config.num_attempts = DBS._num_attempts RESTService.__init__(self, config)
def __init__(self, config=None): config = Configuration(config) CopyInterface.__init__(self, config) self._phedex = PhEDEx(config.get('phedex', None)) self._history = HistoryDatabase(config.get('history', None)) self.subscription_chunk_size = config.get('chunk_size', 50.) * 1.e+12
def __init__(self, config = None): config = Configuration(config) if 'user' in config: user = config.user else: user = MySQL._default_config.default_user try: self._connection_parameters = dict(MySQL._default_parameters[user]) except KeyError: self._connection_parameters = {'user': user} if 'config_file' in config and 'config_group' in config: parser = ConfigParser() parser.read(config['config_file']) group = config['config_group'] for ckey, key in [('host', 'host'), ('user', 'user'), ('password', 'passwd'), ('db', 'db')]: try: self._connection_parameters[key] = parser.get(group, ckey) except: pass if 'host' in config: self._connection_parameters['host'] = config['host'] if 'passwd' in config: self._connection_parameters['passwd'] = config['passwd'] if 'db' in config: self._connection_parameters['db'] = config['db'] self._connection = None # Avoid interference in case the module is used from multiple threads self._connection_lock = multiprocessing.RLock() # MySQL tables can be locked by multiple statements but are unlocked with one. # In nested functions with each one locking different tables, we need to call UNLOCK TABLES # only after the outermost function asks for it. self._locked_tables = [] # Use with care! If False, table locks and temporary tables cannot be used self.reuse_connection = config.get('reuse_connection', MySQL._default_config.get('reuse_connection', True)) # Default 1M characters self.max_query_len = config.get('max_query_len', MySQL._default_config.get('max_query_len', 1000000)) # Default database for CREATE TEMPORARY TABLE self.scratch_db = config.get('scratch_db', MySQL._default_config.get('scratch_db', '')) # Row id of the last insertion. Will be nonzero if the table has an auto-increment primary key. # **NOTE** While core execution of query() and xquery() are locked and thread-safe, last_insert_id is not. # Use insert_and_get_id() in a threaded environment. self.last_insert_id = 0
def __init__(self, config=None): config = Configuration(config) SiteInfoSource.__init__(self, config) self._phedex = PhEDEx(config.get('phedex', None)) self._ssb = SiteStatusBoard(config.get('ssb', None)) self.ssb_cache_lifetime = config.get('ssb_cache_lifetime', 3600) self._ssb_cache_timestamp = 0 self._caching_lock = threading.Lock() self._waitroom_sites = set() self._morgue_sites = set()
def __init__(self, config): """ @param config Required parameters: str url_base There is no strict rule on separating the URL base and individual request REST command ('resource' in make_request). All requests are made to url_base + '/' + resource. Optional parameters: list headers Additional request headers (All standard headers including Accept are automatically passed). Default empty. str accept Accept header value. Default 'application/json'. str auth_handler Handler class for authentication. Use 'None' for no auth. default HTTPSCertKeyHandler. conf auth_handler_conf int num_attempts """ self.url_base = config.url_base self.headers = list(config.get('headers', [])) self.accept = config.get('accept', 'application/json') self.auth_handler = eval( config.get('auth_handler', 'HTTPSCertKeyHandler')) self.auth_handler_conf = config.get('auth_handler_conf', Configuration()) self.num_attempts = config.get('num_attempts', 1) self.last_errorcode = 0 self.last_exception = None
def set_default(config): MySQL._default_config = Configuration(config) MySQL._default_config.pop('params') for user, params in config.params.items(): MySQL._default_parameters[user] = dict(params) MySQL._default_parameters[user]['user'] = user
def create_appmanager(self): #override if self.readonly_config is None: db_params = self._mysql.config() else: db_params = self.readonly_config.db_params config = Configuration(db_params=db_params) return MySQLAppManager(config)
class GroupInfoSource(object): """ Interface specs for group information authority. """ @staticmethod def get_instance(module=None, config=None): if module is None: module = GroupInfoSource._module if config is None: config = GroupInfoSource._config return get_instance(GroupInfoSource, module, config) # defaults _module = '' _config = Configuration() @staticmethod def set_default(config): GroupInfoSource._module = config.module GroupInfoSource._config = config.config def __init__(self, config): include = config.get('include', None) if type(include) is list: self.include = list(include) elif include is not None: self.include = [include] else: self.include = None exclude = config.get('exclude', None) if type(exclude) is list: self.exclude = list(exclude) elif exclude is not None: self.exclude = [exclude] else: self.exclude = None # List of group names where olevel should be Dataset self.dataset_level_groups = list(config.get('dataset_level_groups', [])) def get_group(self, name): """ @param name Name of the group @return A Group object with full info, or None if the group is not found. """ raise NotImplementedError('get_group') def get_group_list(self): """ @return List of unlinked Group objects. Will always contain a null group. """ raise NotImplementedError('get_group_list')
def __init__(self, config = Configuration()): self.start_sem = threading.Semaphore(max(config.get('num_threads', multiprocessing.cpu_count() - 1), 1)) self.task_per_thread = config.get('task_per_thread', 1) self.print_progress = config.get('print_progress', False) self.timeout = config.get('timeout', 0) self.repeat_on_exception = config.get('repeat_on_exception', True) self.logger = None
def __init__(self, config): BaseHandler.__init__(self, 'Enforcer') if type(config.enforcer) is str: # A path to the common enforcer configuration enforcer_config = Configuration(config.enforcer) else: enforcer_config = config.enforcer self.interface = EnforcerInterface(enforcer_config)
class StaticSiteInfoSource(SiteInfoSource): """ Site information source fully specified by the static configuration. """ def __init__(self, config): SiteInfoSource.__init__(self, config) self.config = Configuration(config.sites) def get_site(self, name, inventory): #override try: site_config = self.config[name] except KeyError: raise RuntimeError('Site %s not in configuration') storage_type = Site.storage_type_val(site_config.storage_type) backend = site_config.backend site_obj = Site(name, host = site_config.host, storage_type = storage_type, backend = backend) if name in inventory.sites: old_site_obj = inventory.sites[name] site_obj.x509proxy = old_site_obj.x509proxy return site_obj def get_site_list(self, inventory): #override site_list = [] for name in self.config.keys(): site_list.append(self.get_site(name,inventory)) return site_list def get_site_status(self, site_name): #override try: site_config = self.config[site_name] except KeyError: raise RuntimeError('Site %s not in configuration') return Site.status_val(site_config.status) def get_filename_mapping(self, site_name): #override try: site_config = self.config[site_name] except KeyError: raise RuntimeError('Site %s not in configuration') result = {} for protocol, mappings in site_config.filename_mapping.items(): result[protocol] = [] for lfnpat, pfnpat in mappings: result[protocol].append([(lfnpat, pfnpat)]) return result
def get_board_config(self, hostname): #override sql = 'SELECT `board_module`, `board_config` FROM `servers` WHERE `hostname` = %s' result = self._mysql.query(sql, hostname) if len(result) == 0: return None module, config_str = result[0] if config_str is None: return None return module, Configuration(json.loads(config_str))
def __init__(self, config=None): config = Configuration(config) DeletionInterface.__init__(self, config) self._phedex = PhEDEx(config.get('phedex', None)) self._history = HistoryDatabase(config.get('history', None)) self.auto_approval = config.get('auto_approval', True) self.allow_tape_deletion = config.get('allow_tape_deletion', True) self.tape_auto_approval = config.get('tape_auto_approval', False) self.deletion_chunk_size = config.get('chunk_size', 50.) * 1.e+12
def __init__(self, config): LOG.info('Initializing Dynamo server %s.', __file__) ## Create the inventory self.inventory_config = config.inventory.clone() self.inventory = None ## Create the server manager self.manager_config = config.manager.clone() self.manager = ServerManager(self.manager_config) ## Modules defaults config self.defaults_config = config.defaults ## Application collection self.applications_config = config.applications.clone() if self.applications_config.enabled: # Initialize the appserver since it may require elevated privilege (this Ctor is run as root) aconf = self.applications_config.server self.appserver = AppServer.get_instance(aconf.module, self, aconf.config) if self.applications_config.timeout < 60: # Some errors were observed when the timeout is too short # (probably 1 second is enough - we just need to get through pre_execution) self.applications_config.timeout = 60 ## Web server if config.web.enabled: config.web.modules_config = Configuration(config.web.modules_config_path) config.web.pop('modules_config_path') self.webserver = WebServer(config.web, self) else: self.webserver = None ## Server status (and application) poll interval self.poll_interval = config.status_poll_interval ## Load the inventory content (filter according to debug config) self.inventory_load_opts = {} if 'debug' in config: for objs in ['groups', 'sites', 'datasets']: included = config.debug.get('included_' + objs, None) excluded = config.debug.get('excluded_' + objs, None) self.inventory_load_opts[objs] = (included, excluded) ## Queue to send / receive inventory updates self.inventory_update_queue = multiprocessing.JoinableQueue() ## Recipient of error message emails self.notification_recipient = config.notification_recipient
def get_next_master(self, current): #override self._mysql.query('DELETE FROM `servers` WHERE `hostname` = %s', current) # shadow config must be the same as master result = self._mysql.query( 'SELECT `shadow_module`, `shadow_config` FROM `servers` ORDER BY `id` LIMIT 1' ) if len(result) == 0: raise RuntimeError('No servers can become master at this moment') module, config_str = result[0] return module, Configuration(json.loads(config_str))
def __init__(self, config=None): config = Configuration(config) if 'include_datasets' in config: if type(config.include_datasets) is list: self.include_datasets = map( lambda pattern: re.compile(fnmatch.translate(pattern)), config.include_datasets) else: self.include_datasets = [ re.compile(fnmatch.translate(config.include_datasets)) ] else: self.include_datasets = None if 'exclude_datasets' in config: if type(config.exclude_datasets) is list: self.exclude_datasets = map( lambda pattern: re.compile(fnmatch.translate(pattern)), config.exclude_datasets) else: self.exclude_datasets = [ re.compile(fnmatch.translate(config.exclude_datasets)) ] else: self.exclude_datasets = None if 'include_sites' in config: if type(config.include_sites) is list: self.include_sites = map( lambda pattern: re.compile(fnmatch.translate(pattern)), config.include_sites) else: self.include_sites = [ re.compile(fnmatch.translate(config.include_sites)) ] else: self.include_sites = None if 'exclude_sites' in config: if type(config.exclude_sites) is list: self.exclude_sites = map( lambda pattern: re.compile(fnmatch.translate(pattern)), config.exclude_sites) else: self.exclude_sites = [ re.compile(fnmatch.translate(config.exclude_sites)) ] else: self.exclude_sites = None
class StaticSiteInfoSource(SiteInfoSource): """ Site information source fully specified by the static configuration. """ def __init__(self, config): SiteInfoSource.__init__(self, config) self.config = Configuration(config.sites) def get_site(self, name): #override try: site_config = self.config[name] except KeyError: raise RuntimeError('Site %s not in configuration') storage_type = Site.storage_type_val(site_config.storage_type) backend = site_config.backend return Site(name, host = site_config.host, storage_type = storage_type, backend = backend) def get_site_list(self): #override site_list = [] for name in self.config.keys(): site_list.append(self.get_site(name)) return site_list def get_site_status(self, site_name): #override try: site_config = self.config[site_name] except KeyError: raise RuntimeError('Site %s not in configuration') return Site.status_val(site_config.status) def get_filename_mapping(self, site_name): #override try: site_config = self.config[site_name] except KeyError: raise RuntimeError('Site %s not in configuration') result = {} for protocol, mappings in site_config.filename_mapping.items(): result[protocol] = [] for lfnpat, pfnpat in mappings: result[protocol].append([(lfnpat, pfnpat)]) return result
def add_source(self, name, config, auth_config): rest_config = Configuration() rest_config.url_base = config.url rest_config.accept = config.get('data_type', 'application/json') if config.auth == 'noauth': rest_config.auth_handler = 'None' else: auth = auth_config[config.auth] rest_config.auth_handler = auth.auth_handler rest_config.auth_handler_conf = Configuration( auth.get('auth_handler_conf', {})) content_type = getattr(WebReplicaLock, config.content_type) site_pattern = config.get('sites', None) lock_url = config.get('lock_url', None) self._sources[name] = (webservice.RESTService(rest_config), content_type, site_pattern, lock_url)
class DeletionInterface(object): """ Interface to data deletion application. """ @staticmethod def get_instance(module=None, config=None): if module is None: module = DeletionInterface._module if config is None: config = DeletionInterface._config return get_instance(DeletionInterface, module, config) _module = '' _config = Configuration() @staticmethod def set_default(config): DeletionInterface._module = config.module DeletionInterface._config = config.config def __init__(self, config=None): config = Configuration(config) self._read_only = False def set_read_only(self, value=True): self._read_only = value def schedule_deletions(self, replica_list, operation_id, comments=''): """ Schedule a deletion of multiple replicas. @param replica_list [(DatasetReplica, [BlockReplica])]. List of block replicas can be None if deleting the entire dataset replica. @param operation_id Deletion operation id in the history DB for logging. @param comments Comments to be pased to the operation interface @return Clone [(DatasetReplica, [BlockReplica] or None)] for successfully scheduled replicas. DatasetReplica does not have BlockReplicas. """ raise NotImplementedError('schedule_deletions') def deletion_status(self, operation_id): """ @param operation_id Operation id returned by schedule_deletion. @return Completion status {dataset: (last_update, total, deleted)} """ raise NotImplementedError('deletion_status')
def __init__(self, config): policy_conf = Configuration(config.policy) # Partition to work in self.partition_name = policy_conf.partition # Enforcer policies self.rules = {} for rule_name, rule_conf in policy_conf.rules.iteritems(): rule = EnforcerRule(rule_conf) if not rule.destination_group_name: rule.destination_group_name = policy_conf.default_destination_group self.rules[rule_name] = rule # If True, report_back returns a list to be fed to RRD writing self.write_rrds = config.get('write_rrds', False)
def __init__(self, config): BaseHandler.__init__(self, 'DirectRequests') registry_config = Configuration(config.registry) registry_config['reuse_connection'] = True # need to work with table locks self.request_manager = CopyRequestManager(config.get('manager', None)) # maximum size that can be requested self.max_size = config.max_size * 1.e+12 # convert block-level requests to dataset-level if requested size is greater than # dataset size * block_request_max self.block_request_max = config.block_request_max # list of group names from which ownership of blocks can be taken away self.overwritten_groups = config.get('overwritten_groups', []) self.activated_requests = []
def get_store_config(self, hostname): #override self._mysql.lock_tables(read=['servers']) try: while self.get_status(hostname) == ServerHost.STAT_UPDATING: # need to get the version of the remote server when it's not updating self._mysql.unlock_tables() time.sleep(2) self._mysql.lock_tables(read=['servers']) sql = 'SELECT `store_module`, `store_config`, `store_version` FROM `servers` WHERE `hostname` = %s' result = self._mysql.query(sql, hostname) finally: self._mysql.unlock_tables() if len(result) == 0: return None module, config_str, version = result[0] return module, Configuration(json.loads(config_str)), version
def config(self): conf = Configuration() for key in ['host', 'user', 'passwd', 'db']: try: conf[key] = self._connection_parameters[key] except KeyError: pass try: conf['config_file'] = self._connection_parameters[ 'read_default_file'] except KeyError: pass try: conf['config_group'] = self._connection_parameters[ 'read_default_group'] except KeyError: pass conf['reuse_connection'] = self.reuse_connection conf['max_query_len'] = self.max_query_len conf['scratch_db'] = self.scratch_db return conf
class UserInfoSource(object): """ Interface specs for user data authority. """ @staticmethod def get_instance(module = None, config = None): if module is None: module = UserInfoSource._module if config is None: config = UserInfoSource._config return get_instance(UserInfoSource, module, config) _module = '' _config = Configuration() @staticmethod def set_default(config): UserInfoSource._module = config.module UserInfoSource._config = config.config def __init__(self, config): pass def get_user(self, name): """ @param name Name of the user @return A tuple (name, email, DN) of the user. If user is not found, return None. """ raise NotImplementedError('get_user') def get_user_list(self): """ @return {name: (name, email, DN)} """ raise NotImplementedError('get_user_list')
def set_default(config): DetoxHistoryBase._config = Configuration(config)
class DetoxHistoryBase(DeletionHistoryDatabase): """ Parts of the DetoxHistory that can be used by the web detox monitor. """ _config = Configuration() @staticmethod def set_default(config): DetoxHistoryBase._config = Configuration(config) def __init__(self, config=None): DeletionHistoryDatabase.__init__(self, config) # intentionally passing the config directly to DeletionHistoryDatabase if config is None: config = DetoxHistoryBase._config self.history_db = self.db.db_name() self.cache_db = config.cache_db self.snapshots_spool_dir = config.snapshots_spool_dir self.snapshots_archive_dir = config.snapshots_archive_dir def get_cycles(self, partition, first=-1, last=-1): """ Get a list of deletion cycles in range first <= cycle <= last. If first == -1, pick only the latest before last. If last == -1, select cycles up to the latest. @param partition partition name @param first first cycle @param last last cycle @return list of cycle numbers """ result = self.db.query( 'SELECT `id` FROM `partitions` WHERE `name` LIKE %s', partition) if len(result) == 0: return [] partition_id = result[0] sql = 'SELECT `id` FROM `deletion_cycles` WHERE `partition_id` = %s AND `time_end` NOT LIKE \'0000-00-00 00:00:00\' AND `operation` IN (\'deletion\', \'deletion_test\')' if first >= 0: sql += ' AND `id` >= %d' % first if last >= 0: sql += ' AND `id` <= %d' % last sql += ' ORDER BY `id` ASC' result = self.db.query(sql, partition_id) if first < 0 and len(result) > 1: result = result[-1:] return result def get_sites(self, cycle_number, skip_unused=False): """ Collect the site status for a given cycle number or the latest cycle of the partition and return as a plain dict. @param cycle_number Detox cycle number @param skip_unused If true, don't list sites that had no data in the cycle @return {site_name: (id, status, quota)} """ self._fill_snapshot_cache('sites', cycle_number) table_name = 'sites_%d' % cycle_number sql = 'SELECT s.`name`, n.`status`, n.`quota` FROM `{0}`.`{1}` AS n'.format( self.cache_db, table_name) sql += ' INNER JOIN `{0}`.`sites` AS s ON s.`id` = n.`site_id`'.format( self.history_db) if skip_unused: self._fill_snapshot_cache('replicas', cycle_number) replica_table_name = 'replicas_%d' % cycle_number sql += ' INNER JOIN (SELECT DISTINCT `site_id` FROM `{0}`.`{1}`) AS r ON r.`site_id` = s.`id`'.format( self.cache_db, replica_table_name) sites_dict = {} for site_name, status, quota in self.db.xquery(sql): sites_dict[site_name] = (status, quota) return sites_dict def get_deletion_decisions(self, cycle_number, size_only=True, decisions=None): """ @param cycle_number Cycle number @param size_only Boolean @param decisions If a list, limit to specified decisions @return If size_only = True: a dict {site: (protect_size, delete_size, keep_size)} If size_only = False: a massive dict {site: [(dataset, size, decision, reason)]} """ self._fill_snapshot_cache('replicas', cycle_number) table_name = 'replicas_%d' % cycle_number if size_only: # return {site_name: (protect_size, delete_size, keep_size)} volumes = {} sites = set() query = 'SELECT s.`name`, SUM(r.`size`) * 1.e-12 FROM `{0}`.`{1}` AS r'.format( self.cache_db, table_name) query += ' INNER JOIN `{0}`.`sites` AS s ON s.`id` = r.`site_id`'.format( self.history_db) query += ' WHERE r.`decision` LIKE %s' query += ' GROUP BY r.`site_id`' if type(decisions) is not list: decisions = ['protect', 'delete', 'keep'] for decision in decisions: volumes[decision] = dict(self.db.xquery(query, decision)) sites.update(set(volumes[decision].iterkeys())) product = {} for site_name in sites: v = {} for decision in ['protect', 'delete', 'keep']: try: v[decision] = volumes[decision][site_name] except: v[decision] = 0 product[site_name] = (v['protect'], v['delete'], v['keep']) return product else: # return {site_name: [(dataset_name, size, decision, condition_id, reason)]} query = 'SELECT s.`name`, d.`name`, r.`size`, r.`decision`, r.`condition`, p.`text` FROM `{0}`.`{1}` AS r'.format( self.cache_db, table_name) query += ' INNER JOIN `{0}`.`sites` AS s ON s.`id` = r.`site_id`'.format( self.history_db) query += ' INNER JOIN `{0}`.`datasets` AS d ON d.`id` = r.`dataset_id`'.format( self.history_db) query += ' LEFT JOIN `{0}`.`policy_conditions` AS p ON p.`id` = r.`condition`'.format( self.history_db) if type(decisions) is list: query += ' WHERE r.`decision` IN (%s)' % ','.join( '\'%s\'' % d for d in decisions) query += ' ORDER BY s.`name` ASC, r.`size` DESC' product = {} _site_name = '' for site_name, dataset_name, size, decision, cid, reason in self.db.xquery( query): if site_name != _site_name: product[site_name] = [] current = product[site_name] _site_name = site_name current.append((dataset_name, size, decision, cid, reason)) return product def get_site_deletion_decisions(self, cycle_number, site_name): """ @return site-specific version of get_deletion_decisions with size_only = False """ self._fill_snapshot_cache('replicas', cycle_number) table_name = 'replicas_%d' % cycle_number query = 'SELECT d.`name`, r.`size`, r.`decision`, r.`condition`, p.`text` FROM `{0}`.`{1}` AS r'.format( self.cache_db, table_name) query += ' INNER JOIN `{0}`.`sites` AS s ON s.`id` = r.`site_id`'.format( self.history_db) query += ' INNER JOIN `{0}`.`datasets` AS d ON d.`id` = r.`dataset_id`'.format( self.history_db) query += ' LEFT JOIN `{0}`.`policy_conditions` AS p ON p.`id` = r.`condition`'.format( self.history_db) query += ' WHERE s.`name` = %s ORDER BY r.`size` DESC' return self.db.query(query, site_name) def _fill_snapshot_cache(self, template, cycle_number): self.db.use_db(self.cache_db) # cycle_number is either a cycle number or a partition name. %s works for both table_name = '%s_%s' % (template, cycle_number) table_exists = self.db.table_exists(table_name) is_cycle = True try: cycle_number += 0 except TypeError: is_cycle = False if not is_cycle or not table_exists: if is_cycle: db_file_name = '%s/snapshot_%09d.db' % ( self.snapshots_spool_dir, cycle_number) if not os.path.exists(db_file_name): try: os.makedirs(self.snapshots_spool_dir) os.chmod(self.snapshots_spool_dir, 0777) except OSError: pass scycle = '%09d' % cycle_number xz_file_name = '%s/%s/%s/snapshot_%09d.db.xz' % ( self.snapshots_archive_dir, scycle[:3], scycle[3:6], cycle_number) if not os.path.exists(xz_file_name): raise RuntimeError('Archived snapshot DB ' + xz_file_name + ' does not exist') with open(xz_file_name, 'rb') as xz_file: with open(db_file_name, 'wb') as db_file: db_file.write(lzma.decompress(xz_file.read())) else: db_file_name = '%s/snapshot_%s.db' % (self.snapshots_spool_dir, cycle_number) if not os.path.exists(db_file_name): return # fill from sqlite if table_exists: self.db.query('TRUNCATE TABLE `{0}`'.format(table_name)) else: self.db.query('CREATE TABLE `{0}` LIKE `{1}`'.format( table_name, template)) snapshot_db = sqlite3.connect(db_file_name) snapshot_db.text_factory = str # otherwise we'll get unicode and MySQLdb cannot convert that snapshot_cursor = snapshot_db.cursor() def make_snapshot_reader(): if template == 'replicas': sql = 'SELECT r.`site_id`, r.`dataset_id`, r.`size`, d.`value`, r.`condition` FROM `replicas` AS r' sql += ' INNER JOIN `decisions` AS d ON d.`id` = r.`decision_id`' elif template == 'sites': sql = 'SELECT s.`site_id`, t.`value`, s.`quota` FROM `sites` AS s' sql += ' INNER JOIN `statuses` AS t ON t.`id` = s.`status_id`' snapshot_cursor.execute(sql) while True: row = snapshot_cursor.fetchone() if row is None: return yield row snapshot_reader = make_snapshot_reader() if template == 'replicas': fields = ('site_id', 'dataset_id', 'size', 'decision', 'condition') elif template == 'sites': fields = ('site_id', 'status', 'quota') self.db.insert_many(table_name, fields, None, snapshot_reader, do_update=False) snapshot_cursor.close() snapshot_db.close() if is_cycle: # cycle_number is really a number. Update the partition cache table too sql = 'SELECT p.`name` FROM `{hdb}`.`partitions` AS p INNER JOIN `{hdb}`.`deletion_cycles` AS r ON r.`partition_id` = p.`id` WHERE r.`id` = %s'.format( hdb=self.history_db) partition = self.db.query(sql, cycle_number)[0] self._fill_snapshot_cache(template, partition) # then update the cache usage self._update_cache_usage(template, cycle_number) self.db.use_db(self.history_db) def _update_cache_usage(self, template, cycle_number): self.db.use_db(self.cache_db) self.db.query( 'INSERT INTO `{template}_snapshot_usage` VALUES (%s, NOW())'. format(template=template), cycle_number) # clean old cache sql = 'SELECT `cycle_id` FROM (SELECT `cycle_id`, MAX(`timestamp`) AS m FROM `replicas_snapshot_usage` GROUP BY `cycle_id`) AS t WHERE m < DATE_SUB(NOW(), INTERVAL 1 WEEK)' old_replica_cycles = self.db.query(sql) for old_cycle in old_replica_cycles: table_name = 'replicas_%d' % old_cycle self.db.query('DROP TABLE IF EXISTS `{0}`'.format(table_name)) sql = 'SELECT `cycle_id` FROM (SELECT `cycle_id`, MAX(`timestamp`) AS m FROM `sites_snapshot_usage` GROUP BY `cycle_id`) AS t WHERE m < DATE_SUB(NOW(), INTERVAL 1 WEEK)' old_site_cycles = self.db.query(sql) for old_cycle in old_site_cycles: table_name = 'sites_%d' % old_cycle self.db.query('DROP TABLE IF EXISTS `{0}`'.format(table_name)) for old_cycle in set(old_replica_cycles) & set(old_site_cycles): scycle = '%09d' % old_cycle db_file_name = '%s/snapshot_%09d.db' % (self.snapshots_spool_dir, old_cycle) if os.path.exists(db_file_name): try: os.unlink(db_file_name) except: LOG.error('Failed to delete %s' % db_file_name) pass self.db.query( 'DELETE FROM `replicas_snapshot_usage` WHERE `timestamp` < DATE_SUB(NOW(), INTERVAL 1 WEEK)' ) self.db.query('OPTIMIZE TABLE `replicas_snapshot_usage`') self.db.query( 'DELETE FROM `sites_snapshot_usage` WHERE `timestamp` < DATE_SUB(NOW(), INTERVAL 1 WEEK)' ) self.db.query('OPTIMIZE TABLE `sites_snapshot_usage`')
def update(self, inventory): for dataset in inventory.datasets.itervalues(): try: dataset.attr.pop('locked_blocks') except KeyError: pass for source, content_type, site_pattern, lock_url in self._sources.itervalues( ): if lock_url is not None: # check that the lock files themselves are not locked while True: # Hacky but this is temporary any way opener = urllib2.build_opener( webservice.HTTPSCertKeyHandler(Configuration())) opener.addheaders.append(('Accept', 'application/json')) request = urllib2.Request(lock_url) try: opener.open(request) except urllib2.HTTPError as err: if err.code == 404: # file not found -> no lock break else: raise LOG.info( 'Lock files are being produced. Waiting 60 seconds.') time.sleep(60) LOG.info('Retrieving lock information from %s', source.url_base) data = source.make_request() if content_type == WebReplicaLock.LIST_OF_DATASETS: # simple list of datasets for dataset_name in data: if dataset_name is None: LOG.debug('Dataset name None found in %s', source.url_base) continue try: dataset = inventory.datasets[dataset_name] except KeyError: LOG.debug('Unknown dataset %s in %s', dataset_name, source.url_base) continue if dataset.replicas is None: continue try: locked_blocks = dataset.attr['locked_blocks'] except KeyError: locked_blocks = dataset.attr['locked_blocks'] = {} for replica in dataset.replicas: if site_pattern is not None and not fnmatch.fnmatch( replica.site.name, site_pattern): continue if replica.site in locked_blocks: locked_blocks[replica.site].update( brep.block for brep in replica.block_replicas) else: locked_blocks[replica.site] = set( brep.block for brep in replica.block_replicas) elif content_type == WebReplicaLock.CMSWEB_LIST_OF_DATASETS: # data['result'] -> simple list of datasets for dataset_name in data['result']: if dataset_name is None: LOG.debug('Dataset name None found in %s', source.url_base) continue try: dataset = inventory.datasets[dataset_name] except KeyError: LOG.debug('Unknown dataset %s in %s', dataset_name, source.url_base) continue if dataset.replicas is None: continue try: locked_blocks = dataset.attr['locked_blocks'] except KeyError: locked_blocks = dataset.attr['locked_blocks'] = {} for replica in dataset.replicas: if site_pattern is not None and not fnmatch.fnmatch( replica.site.name, site_pattern): continue if replica.site in locked_blocks: locked_blocks[replica.site].update( brep.block for brep in replica.block_replicas) else: locked_blocks[replica.site] = set( brep.block for brep in replica.block_replicas) elif content_type == WebReplicaLock.SITE_TO_DATASETS: # data = {site: {dataset: info}} for site_name, objects in data.items(): try: site = inventory.sites[site_name] except KeyError: LOG.debug('Unknown site %s in %s', site_name, source.url_base) continue for object_name, info in objects.items(): if not info['lock']: LOG.debug('Object %s is not locked at %s', object_name, site_name) continue if '#' in object_name: dataset_name, block_real_name = object_name.split( '#') else: dataset_name = object_name block_real_name = None try: dataset = inventory.datasets[dataset_name] except KeyError: LOG.debug('Unknown dataset %s in %s', dataset_name, source.url_base) continue replica = site.find_dataset_replica(dataset) if replica is None: LOG.debug('Replica of %s is not at %s in %s', dataset_name, site_name, source.url_base) continue if block_real_name is None: blocks = list(dataset.blocks) else: block = dataset.find_block( Block.to_internal_name(block_real_name)) if block is None: LOG.debug('Unknown block %s of %s in %s', block_real_name, dataset_name, source.url_base) continue blocks = [block] try: locked_blocks = dataset.attr['locked_blocks'] except KeyError: locked_blocks = dataset.attr['locked_blocks'] = {} if site in locked_blocks: locked_blocks[site].update(blocks) else: locked_blocks[site] = set(blocks)
def __init__(self, config): SiteInfoSource.__init__(self, config) self.config = Configuration(config.sites)
def __init__(self, config = None): config = Configuration(config) self.registry = RegistryDatabase(config.get('registry', None))
def add_source(self, name, config, auth_config): LOG.info(config) rest_config = Configuration() rest_config.url_base = config.get('url', None) rest_config.accept = config.get('data_type', 'application/json') if config.auth == 'noauth': rest_config.auth_handler = 'None' else: auth = auth_config[config.auth] rest_config.auth_handler = auth.auth_handler rest_config.auth_handler_conf = Configuration( auth.get('auth_handler_conf', {})) content_type = getattr(WebReplicaLock, config.content_type) site_pattern = config.get('sites', None) lock_url = config.get('lock_url', None) if rest_config.url_base is not None: self._sources[name] = (webservice.RESTService(rest_config), content_type, site_pattern, lock_url) if config.get('oracledb', None) is not None: oracle_config = Configuration() oracle_config.db = config.oracledb.db oracle_config.pw = config.oracledb.password oracle_config.host = config.oracledb.host self._sources[name] = (webservice.OracleService(oracle_config), content_type, site_pattern, (config.oracledb.lockoflock, config.oracledb.locks))