Example #1
0
    def __init__(self, config):
        AppManager.__init__(self, config)

        if not hasattr(self, '_mysql'):
            db_params = Configuration(config.db_params)
            db_params.reuse_connection = True  # we use locks

            self._mysql = MySQL(db_params)

        # make sure applications row with id 0 exists
        count = self._mysql.query(
            'SELECT COUNT(*) FROM `applications` WHERE `id` = 0')[0]

        if count == 0:
            # Cannot insert with id = 0 (will be interpreted as next auto_increment id unless server-wide setting is changed)
            # Inesrt with an implicit id first and update later
            columns = ('auth_level', 'title', 'path', 'status', 'user_id',
                       'user_host')
            values = (AppManager.LV_WRITE, 'wsgi', '', 'done', 0, '')
            insert_id = self._mysql.insert_get_id('applications',
                                                  columns=columns,
                                                  values=values)

            self._mysql.query(
                'UPDATE `applications` SET `id` = 0 WHERE `id` = %s',
                insert_id)
Example #2
0
    def __init__(self, config):
        UpdateBoard.__init__(self, config)

        db_params = Configuration(config.db_params)
        db_params.reuse_connection = True # we use locks

        self._mysql = MySQL(db_params)
Example #3
0
    def __init__(self, config=None):
        config = Configuration(config)

        if 'user' in config:
            user = config.user
        else:
            user = MySQL._default_config.default_user

        try:
            self._connection_parameters = dict(MySQL._default_parameters[user])
        except KeyError:
            self._connection_parameters = {'user': user}

        if 'config_file' in config and 'config_group' in config:
            parser = ConfigParser()
            parser.read(config['config_file'])
            group = config['config_group']
            for ckey, key in [('host', 'host'), ('user', 'user'),
                              ('password', 'passwd'), ('db', 'db')]:
                try:
                    self._connection_parameters[key] = parser.get(group, ckey)
                except:
                    pass

        if 'host' in config:
            self._connection_parameters['host'] = config['host']
        if 'passwd' in config:
            self._connection_parameters['passwd'] = config['passwd']
        if 'db' in config:
            self._connection_parameters['db'] = config['db']

        self._connection = None

        # Avoid interference in case the module is used from multiple threads
        self._connection_lock = multiprocessing.RLock()

        # MySQL tables can be locked by multiple statements but are unlocked with one.
        # In nested functions with each one locking different tables, we need to call UNLOCK TABLES
        # only after the outermost function asks for it.
        self._locked_tables = []

        # Use with care! If False, table locks and temporary tables cannot be used
        self.reuse_connection = config.get(
            'reuse_connection',
            MySQL._default_config.get('reuse_connection', True))

        # Default 1M characters
        self.max_query_len = config.get(
            'max_query_len', MySQL._default_config.get('max_query_len',
                                                       1000000))

        # Default database for CREATE TEMPORARY TABLE
        self.scratch_db = config.get(
            'scratch_db', MySQL._default_config.get('scratch_db', ''))

        # Row id of the last insertion. Will be nonzero if the table has an auto-increment primary key.
        # **NOTE** While core execution of query() and xquery() are locked and thread-safe, last_insert_id is not.
        # Use insert_and_get_id() in a threaded environment.
        self.last_insert_id = 0
Example #4
0
    def __init__(self, config=None):
        if config is None:
            config = Configuration()

        DatasetInfoSource.__init__(self, config)

        self._phedex = PhEDEx(config.get('phedex', None))
        self._dbs = DBS(config.get('dbs', None))
    def __init__(self, config):
        Authorizer.__init__(self, config)

        if not hasattr(self, '_mysql'):
            db_params = Configuration(config.db_params)
            db_params.reuse_connection = True # we use locks
    
            self._mysql = MySQL(db_params)
    def __init__(self, config=None):
        if config is None:
            config = Configuration()

        ReplicaInfoSource.__init__(self, config)

        self._phedex = PhEDEx(config.get('phedex', None))
        self._parallelizer_config = config
Example #7
0
    def __init__(self, config=None):
        config = Configuration(config)

        config.auth_handler = 'HTTPSCertKeyHandler'
        if 'url_base' not in config:
            config.url_base = DBS._url_base
        if 'num_attempts' not in config:
            config.num_attempts = DBS._num_attempts

        RESTService.__init__(self, config)
    def __init__(self, config=None):
        config = Configuration(config)

        CopyInterface.__init__(self, config)

        self._phedex = PhEDEx(config.get('phedex', None))

        self._history = HistoryDatabase(config.get('history', None))

        self.subscription_chunk_size = config.get('chunk_size', 50.) * 1.e+12
Example #9
0
    def __init__(self, config = None):
        config = Configuration(config)

        if 'user' in config:
            user = config.user
        else:
            user = MySQL._default_config.default_user

        try:
            self._connection_parameters = dict(MySQL._default_parameters[user])
        except KeyError:
            self._connection_parameters = {'user': user}

        if 'config_file' in config and 'config_group' in config:
            parser = ConfigParser()
            parser.read(config['config_file'])
            group = config['config_group']
            for ckey, key in [('host', 'host'), ('user', 'user'), ('password', 'passwd'), ('db', 'db')]:
                try:
                    self._connection_parameters[key] = parser.get(group, ckey)
                except:
                    pass

        if 'host' in config:
            self._connection_parameters['host'] = config['host']
        if 'passwd' in config:
            self._connection_parameters['passwd'] = config['passwd']
        if 'db' in config:
            self._connection_parameters['db'] = config['db']

        self._connection = None

        # Avoid interference in case the module is used from multiple threads
        self._connection_lock = multiprocessing.RLock()

        # MySQL tables can be locked by multiple statements but are unlocked with one.
        # In nested functions with each one locking different tables, we need to call UNLOCK TABLES
        # only after the outermost function asks for it.
        self._locked_tables = []
        
        # Use with care! If False, table locks and temporary tables cannot be used
        self.reuse_connection = config.get('reuse_connection', MySQL._default_config.get('reuse_connection', True))

        # Default 1M characters
        self.max_query_len = config.get('max_query_len', MySQL._default_config.get('max_query_len', 1000000))

        # Default database for CREATE TEMPORARY TABLE
        self.scratch_db = config.get('scratch_db', MySQL._default_config.get('scratch_db', ''))

        # Row id of the last insertion. Will be nonzero if the table has an auto-increment primary key.
        # **NOTE** While core execution of query() and xquery() are locked and thread-safe, last_insert_id is not.
        # Use insert_and_get_id() in a threaded environment.
        self.last_insert_id = 0
Example #10
0
    def __init__(self, config=None):
        config = Configuration(config)

        SiteInfoSource.__init__(self, config)

        self._phedex = PhEDEx(config.get('phedex', None))
        self._ssb = SiteStatusBoard(config.get('ssb', None))

        self.ssb_cache_lifetime = config.get('ssb_cache_lifetime', 3600)
        self._ssb_cache_timestamp = 0
        self._caching_lock = threading.Lock()

        self._waitroom_sites = set()
        self._morgue_sites = set()
Example #11
0
    def __init__(self, config):
        """
        @param config  Required parameters:
                       str url_base      There is no strict rule on separating the URL base and
                                         individual request REST command ('resource' in make_request).
                                         All requests are made to url_base + '/' + resource.
                       Optional parameters:
                       list headers      Additional request headers (All standard headers including
                                         Accept are automatically passed). Default empty.
                       str  accept       Accept header value. Default 'application/json'.
                       str  auth_handler Handler class for authentication. Use 'None' for no auth.
                                         default HTTPSCertKeyHandler.
                       conf auth_handler_conf
                       int  num_attempts
        """

        self.url_base = config.url_base
        self.headers = list(config.get('headers', []))
        self.accept = config.get('accept', 'application/json')
        self.auth_handler = eval(
            config.get('auth_handler', 'HTTPSCertKeyHandler'))
        self.auth_handler_conf = config.get('auth_handler_conf',
                                            Configuration())
        self.num_attempts = config.get('num_attempts', 1)

        self.last_errorcode = 0
        self.last_exception = None
Example #12
0
    def set_default(config):
        MySQL._default_config = Configuration(config)
        MySQL._default_config.pop('params')

        for user, params in config.params.items():
            MySQL._default_parameters[user] = dict(params)
            MySQL._default_parameters[user]['user'] = user
Example #13
0
    def create_appmanager(self):  #override
        if self.readonly_config is None:
            db_params = self._mysql.config()
        else:
            db_params = self.readonly_config.db_params

        config = Configuration(db_params=db_params)
        return MySQLAppManager(config)
Example #14
0
class GroupInfoSource(object):
    """
    Interface specs for group information authority.
    """
    @staticmethod
    def get_instance(module=None, config=None):
        if module is None:
            module = GroupInfoSource._module
        if config is None:
            config = GroupInfoSource._config

        return get_instance(GroupInfoSource, module, config)

    # defaults
    _module = ''
    _config = Configuration()

    @staticmethod
    def set_default(config):
        GroupInfoSource._module = config.module
        GroupInfoSource._config = config.config

    def __init__(self, config):
        include = config.get('include', None)

        if type(include) is list:
            self.include = list(include)
        elif include is not None:
            self.include = [include]
        else:
            self.include = None

        exclude = config.get('exclude', None)

        if type(exclude) is list:
            self.exclude = list(exclude)
        elif exclude is not None:
            self.exclude = [exclude]
        else:
            self.exclude = None

        # List of group names where olevel should be Dataset
        self.dataset_level_groups = list(config.get('dataset_level_groups',
                                                    []))

    def get_group(self, name):
        """
        @param name  Name of the group
        @return  A Group object with full info, or None if the group is not found.
        """
        raise NotImplementedError('get_group')

    def get_group_list(self):
        """
        @return  List of unlinked Group objects. Will always contain a null group.
        """
        raise NotImplementedError('get_group_list')
Example #15
0
    def __init__(self, config = Configuration()):
        self.start_sem = threading.Semaphore(max(config.get('num_threads', multiprocessing.cpu_count() - 1), 1))
        self.task_per_thread = config.get('task_per_thread', 1)

        self.print_progress = config.get('print_progress', False)
        self.timeout = config.get('timeout', 0)
        self.repeat_on_exception = config.get('repeat_on_exception', True)

        self.logger = None
Example #16
0
    def __init__(self, config):
        BaseHandler.__init__(self, 'Enforcer')

        if type(config.enforcer) is str:
            # A path to the common enforcer configuration
            enforcer_config = Configuration(config.enforcer)
        else:
            enforcer_config = config.enforcer

        self.interface = EnforcerInterface(enforcer_config)
class StaticSiteInfoSource(SiteInfoSource):
    """
    Site information source fully specified by the static configuration.
    """

    def __init__(self, config):
        SiteInfoSource.__init__(self, config)
        
        self.config = Configuration(config.sites)

    def get_site(self, name, inventory): #override
        try:
            site_config = self.config[name]
        except KeyError:
            raise RuntimeError('Site %s not in configuration')

        storage_type = Site.storage_type_val(site_config.storage_type)
        backend = site_config.backend

        site_obj = Site(name, host = site_config.host, storage_type = storage_type, backend = backend)
        if name in inventory.sites:
            old_site_obj = inventory.sites[name]
            site_obj.x509proxy = old_site_obj.x509proxy

        return site_obj

    def get_site_list(self, inventory): #override
        site_list = []

        for name in self.config.keys():
            site_list.append(self.get_site(name,inventory))

        return site_list

    def get_site_status(self, site_name): #override
        try:
            site_config = self.config[site_name]
        except KeyError:
            raise RuntimeError('Site %s not in configuration')

        return Site.status_val(site_config.status)

    def get_filename_mapping(self, site_name): #override
        try:
            site_config = self.config[site_name]
        except KeyError:
            raise RuntimeError('Site %s not in configuration')

        result = {}
        for protocol, mappings in site_config.filename_mapping.items():
            result[protocol] = []
            for lfnpat, pfnpat in mappings:
                result[protocol].append([(lfnpat, pfnpat)])

        return result
Example #18
0
    def get_board_config(self, hostname):  #override
        sql = 'SELECT `board_module`, `board_config` FROM `servers` WHERE `hostname` = %s'
        result = self._mysql.query(sql, hostname)
        if len(result) == 0:
            return None

        module, config_str = result[0]

        if config_str is None:
            return None

        return module, Configuration(json.loads(config_str))
    def __init__(self, config=None):
        config = Configuration(config)

        DeletionInterface.__init__(self, config)

        self._phedex = PhEDEx(config.get('phedex', None))

        self._history = HistoryDatabase(config.get('history', None))

        self.auto_approval = config.get('auto_approval', True)
        self.allow_tape_deletion = config.get('allow_tape_deletion', True)
        self.tape_auto_approval = config.get('tape_auto_approval', False)

        self.deletion_chunk_size = config.get('chunk_size', 50.) * 1.e+12
Example #20
0
    def __init__(self, config):
        LOG.info('Initializing Dynamo server %s.', __file__)

        ## Create the inventory
        self.inventory_config = config.inventory.clone()
        self.inventory = None

        ## Create the server manager
        self.manager_config = config.manager.clone()
        self.manager = ServerManager(self.manager_config)

        ## Modules defaults config
        self.defaults_config = config.defaults

        ## Application collection
        self.applications_config = config.applications.clone()
        if self.applications_config.enabled:
            # Initialize the appserver since it may require elevated privilege (this Ctor is run as root)
            aconf = self.applications_config.server
            self.appserver = AppServer.get_instance(aconf.module, self, aconf.config)

            if self.applications_config.timeout < 60:
                # Some errors were observed when the timeout is too short
                # (probably 1 second is enough - we just need to get through pre_execution)
                self.applications_config.timeout = 60

        ## Web server
        if config.web.enabled:
            config.web.modules_config = Configuration(config.web.modules_config_path)
            config.web.pop('modules_config_path')
    
            self.webserver = WebServer(config.web, self)
        else:
            self.webserver = None

        ## Server status (and application) poll interval
        self.poll_interval = config.status_poll_interval

        ## Load the inventory content (filter according to debug config)
        self.inventory_load_opts = {}
        if 'debug' in config:
            for objs in ['groups', 'sites', 'datasets']:
                included = config.debug.get('included_' + objs, None)
                excluded = config.debug.get('excluded_' + objs, None)
    
                self.inventory_load_opts[objs] = (included, excluded)

        ## Queue to send / receive inventory updates
        self.inventory_update_queue = multiprocessing.JoinableQueue()

        ## Recipient of error message emails
        self.notification_recipient = config.notification_recipient
Example #21
0
    def get_next_master(self, current):  #override
        self._mysql.query('DELETE FROM `servers` WHERE `hostname` = %s',
                          current)

        # shadow config must be the same as master
        result = self._mysql.query(
            'SELECT `shadow_module`, `shadow_config` FROM `servers` ORDER BY `id` LIMIT 1'
        )
        if len(result) == 0:
            raise RuntimeError('No servers can become master at this moment')

        module, config_str = result[0]
        return module, Configuration(json.loads(config_str))
Example #22
0
    def __init__(self, config=None):
        config = Configuration(config)

        if 'include_datasets' in config:
            if type(config.include_datasets) is list:
                self.include_datasets = map(
                    lambda pattern: re.compile(fnmatch.translate(pattern)),
                    config.include_datasets)
            else:
                self.include_datasets = [
                    re.compile(fnmatch.translate(config.include_datasets))
                ]
        else:
            self.include_datasets = None

        if 'exclude_datasets' in config:
            if type(config.exclude_datasets) is list:
                self.exclude_datasets = map(
                    lambda pattern: re.compile(fnmatch.translate(pattern)),
                    config.exclude_datasets)
            else:
                self.exclude_datasets = [
                    re.compile(fnmatch.translate(config.exclude_datasets))
                ]
        else:
            self.exclude_datasets = None

        if 'include_sites' in config:
            if type(config.include_sites) is list:
                self.include_sites = map(
                    lambda pattern: re.compile(fnmatch.translate(pattern)),
                    config.include_sites)
            else:
                self.include_sites = [
                    re.compile(fnmatch.translate(config.include_sites))
                ]
        else:
            self.include_sites = None

        if 'exclude_sites' in config:
            if type(config.exclude_sites) is list:
                self.exclude_sites = map(
                    lambda pattern: re.compile(fnmatch.translate(pattern)),
                    config.exclude_sites)
            else:
                self.exclude_sites = [
                    re.compile(fnmatch.translate(config.exclude_sites))
                ]
        else:
            self.exclude_sites = None
Example #23
0
class StaticSiteInfoSource(SiteInfoSource):
    """
    Site information source fully specified by the static configuration.
    """

    def __init__(self, config):
        SiteInfoSource.__init__(self, config)
        
        self.config = Configuration(config.sites)

    def get_site(self, name): #override
        try:
            site_config = self.config[name]
        except KeyError:
            raise RuntimeError('Site %s not in configuration')

        storage_type = Site.storage_type_val(site_config.storage_type)
        backend = site_config.backend

        return Site(name, host = site_config.host, storage_type = storage_type, backend = backend)

    def get_site_list(self): #override
        site_list = []

        for name in self.config.keys():
            site_list.append(self.get_site(name))

        return site_list

    def get_site_status(self, site_name): #override
        try:
            site_config = self.config[site_name]
        except KeyError:
            raise RuntimeError('Site %s not in configuration')

        return Site.status_val(site_config.status)

    def get_filename_mapping(self, site_name): #override
        try:
            site_config = self.config[site_name]
        except KeyError:
            raise RuntimeError('Site %s not in configuration')

        result = {}
        for protocol, mappings in site_config.filename_mapping.items():
            result[protocol] = []
            for lfnpat, pfnpat in mappings:
                result[protocol].append([(lfnpat, pfnpat)])

        return result
Example #24
0
    def add_source(self, name, config, auth_config):
        rest_config = Configuration()
        rest_config.url_base = config.url
        rest_config.accept = config.get('data_type', 'application/json')
        if config.auth == 'noauth':
            rest_config.auth_handler = 'None'
        else:
            auth = auth_config[config.auth]
            rest_config.auth_handler = auth.auth_handler
            rest_config.auth_handler_conf = Configuration(
                auth.get('auth_handler_conf', {}))

        content_type = getattr(WebReplicaLock, config.content_type)
        site_pattern = config.get('sites', None)
        lock_url = config.get('lock_url', None)

        self._sources[name] = (webservice.RESTService(rest_config),
                               content_type, site_pattern, lock_url)
Example #25
0
class DeletionInterface(object):
    """
    Interface to data deletion application.
    """
    @staticmethod
    def get_instance(module=None, config=None):
        if module is None:
            module = DeletionInterface._module
        if config is None:
            config = DeletionInterface._config

        return get_instance(DeletionInterface, module, config)

    _module = ''
    _config = Configuration()

    @staticmethod
    def set_default(config):
        DeletionInterface._module = config.module
        DeletionInterface._config = config.config

    def __init__(self, config=None):
        config = Configuration(config)
        self._read_only = False

    def set_read_only(self, value=True):
        self._read_only = value

    def schedule_deletions(self, replica_list, operation_id, comments=''):
        """
        Schedule a deletion of multiple replicas.
        @param replica_list  [(DatasetReplica, [BlockReplica])]. List of block replicas can be None if deleting the entire dataset replica.
        @param operation_id  Deletion operation id in the history DB for logging.
        @param comments      Comments to be pased to the operation interface

        @return  Clone [(DatasetReplica, [BlockReplica] or None)] for successfully scheduled replicas. DatasetReplica does not have BlockReplicas.
        """

        raise NotImplementedError('schedule_deletions')

    def deletion_status(self, operation_id):
        """
        @param operation_id  Operation id returned by schedule_deletion.
        @return Completion status {dataset: (last_update, total, deleted)}
        """

        raise NotImplementedError('deletion_status')
Example #26
0
    def __init__(self, config):
        policy_conf = Configuration(config.policy)

        # Partition to work in
        self.partition_name = policy_conf.partition

        # Enforcer policies
        self.rules = {}
        for rule_name, rule_conf in policy_conf.rules.iteritems():
            rule = EnforcerRule(rule_conf)
            if not rule.destination_group_name:
                rule.destination_group_name = policy_conf.default_destination_group

            self.rules[rule_name] = rule

        # If True, report_back returns a list to be fed to RRD writing
        self.write_rrds = config.get('write_rrds', False)
Example #27
0
    def __init__(self, config):
        BaseHandler.__init__(self, 'DirectRequests')

        registry_config = Configuration(config.registry)
        registry_config['reuse_connection'] = True # need to work with table locks

        self.request_manager = CopyRequestManager(config.get('manager', None))

        # maximum size that can be requested
        self.max_size = config.max_size * 1.e+12

        # convert block-level requests to dataset-level if requested size is greater than
        # dataset size * block_request_max
        self.block_request_max = config.block_request_max

        # list of group names from which ownership of blocks can be taken away
        self.overwritten_groups = config.get('overwritten_groups', [])

        self.activated_requests = []
Example #28
0
    def get_store_config(self, hostname):  #override
        self._mysql.lock_tables(read=['servers'])

        try:
            while self.get_status(hostname) == ServerHost.STAT_UPDATING:
                # need to get the version of the remote server when it's not updating
                self._mysql.unlock_tables()
                time.sleep(2)
                self._mysql.lock_tables(read=['servers'])

            sql = 'SELECT `store_module`, `store_config`, `store_version` FROM `servers` WHERE `hostname` = %s'
            result = self._mysql.query(sql, hostname)

        finally:
            self._mysql.unlock_tables()

        if len(result) == 0:
            return None

        module, config_str, version = result[0]

        return module, Configuration(json.loads(config_str)), version
Example #29
0
    def config(self):
        conf = Configuration()
        for key in ['host', 'user', 'passwd', 'db']:
            try:
                conf[key] = self._connection_parameters[key]
            except KeyError:
                pass
        try:
            conf['config_file'] = self._connection_parameters[
                'read_default_file']
        except KeyError:
            pass
        try:
            conf['config_group'] = self._connection_parameters[
                'read_default_group']
        except KeyError:
            pass

        conf['reuse_connection'] = self.reuse_connection
        conf['max_query_len'] = self.max_query_len
        conf['scratch_db'] = self.scratch_db

        return conf
Example #30
0
class UserInfoSource(object):
    """
    Interface specs for user data authority.
    """

    @staticmethod
    def get_instance(module = None, config = None):
        if module is None:
            module = UserInfoSource._module
        if config is None:
            config = UserInfoSource._config

        return get_instance(UserInfoSource, module, config)

    _module = ''
    _config = Configuration()

    @staticmethod
    def set_default(config):
        UserInfoSource._module = config.module
        UserInfoSource._config = config.config
        
    def __init__(self, config):
        pass

    def get_user(self, name):
        """
        @param name  Name of the user
        @return A tuple (name, email, DN) of the user. If user is not found, return None.
        """
        raise NotImplementedError('get_user')

    def get_user_list(self):
        """
        @return  {name: (name, email, DN)}
        """
        raise NotImplementedError('get_user_list')
Example #31
0
 def set_default(config):
     DetoxHistoryBase._config = Configuration(config)
Example #32
0
class DetoxHistoryBase(DeletionHistoryDatabase):
    """
    Parts of the DetoxHistory that can be used by the web detox monitor.
    """

    _config = Configuration()

    @staticmethod
    def set_default(config):
        DetoxHistoryBase._config = Configuration(config)

    def __init__(self, config=None):
        DeletionHistoryDatabase.__init__(self, config)

        # intentionally passing the config directly to DeletionHistoryDatabase
        if config is None:
            config = DetoxHistoryBase._config

        self.history_db = self.db.db_name()
        self.cache_db = config.cache_db
        self.snapshots_spool_dir = config.snapshots_spool_dir
        self.snapshots_archive_dir = config.snapshots_archive_dir

    def get_cycles(self, partition, first=-1, last=-1):
        """
        Get a list of deletion cycles in range first <= cycle <= last. If first == -1, pick only the latest before last.
        If last == -1, select cycles up to the latest.
        @param partition  partition name
        @param first      first cycle
        @param last       last cycle

        @return list of cycle numbers
        """

        result = self.db.query(
            'SELECT `id` FROM `partitions` WHERE `name` LIKE %s', partition)
        if len(result) == 0:
            return []

        partition_id = result[0]

        sql = 'SELECT `id` FROM `deletion_cycles` WHERE `partition_id` = %s AND `time_end` NOT LIKE \'0000-00-00 00:00:00\' AND `operation` IN (\'deletion\', \'deletion_test\')'

        if first >= 0:
            sql += ' AND `id` >= %d' % first
        if last >= 0:
            sql += ' AND `id` <= %d' % last

        sql += ' ORDER BY `id` ASC'

        result = self.db.query(sql, partition_id)

        if first < 0 and len(result) > 1:
            result = result[-1:]

        return result

    def get_sites(self, cycle_number, skip_unused=False):
        """
        Collect the site status for a given cycle number or the latest cycle of the partition
        and return as a plain dict.
        @param cycle_number   Detox cycle number
        @param skip_unused    If true, don't list sites that had no data in the cycle

        @return {site_name:  (id, status, quota)}
        """

        self._fill_snapshot_cache('sites', cycle_number)

        table_name = 'sites_%d' % cycle_number

        sql = 'SELECT s.`name`, n.`status`, n.`quota` FROM `{0}`.`{1}` AS n'.format(
            self.cache_db, table_name)
        sql += ' INNER JOIN `{0}`.`sites` AS s ON s.`id` = n.`site_id`'.format(
            self.history_db)
        if skip_unused:
            self._fill_snapshot_cache('replicas', cycle_number)

            replica_table_name = 'replicas_%d' % cycle_number
            sql += ' INNER JOIN (SELECT DISTINCT `site_id` FROM `{0}`.`{1}`) AS r ON r.`site_id` = s.`id`'.format(
                self.cache_db, replica_table_name)

        sites_dict = {}

        for site_name, status, quota in self.db.xquery(sql):
            sites_dict[site_name] = (status, quota)

        return sites_dict

    def get_deletion_decisions(self,
                               cycle_number,
                               size_only=True,
                               decisions=None):
        """
        @param cycle_number   Cycle number
        @param size_only      Boolean
        @param decisions      If a list, limit to specified decisions
        
        @return If size_only = True: a dict {site: (protect_size, delete_size, keep_size)}
                If size_only = False: a massive dict {site: [(dataset, size, decision, reason)]}
        """

        self._fill_snapshot_cache('replicas', cycle_number)

        table_name = 'replicas_%d' % cycle_number

        if size_only:
            # return {site_name: (protect_size, delete_size, keep_size)}
            volumes = {}
            sites = set()

            query = 'SELECT s.`name`, SUM(r.`size`) * 1.e-12 FROM `{0}`.`{1}` AS r'.format(
                self.cache_db, table_name)
            query += ' INNER JOIN `{0}`.`sites` AS s ON s.`id` = r.`site_id`'.format(
                self.history_db)
            query += ' WHERE r.`decision` LIKE %s'
            query += ' GROUP BY r.`site_id`'

            if type(decisions) is not list:
                decisions = ['protect', 'delete', 'keep']

            for decision in decisions:
                volumes[decision] = dict(self.db.xquery(query, decision))
                sites.update(set(volumes[decision].iterkeys()))

            product = {}
            for site_name in sites:
                v = {}
                for decision in ['protect', 'delete', 'keep']:
                    try:
                        v[decision] = volumes[decision][site_name]
                    except:
                        v[decision] = 0

                product[site_name] = (v['protect'], v['delete'], v['keep'])

            return product

        else:
            # return {site_name: [(dataset_name, size, decision, condition_id, reason)]}

            query = 'SELECT s.`name`, d.`name`, r.`size`, r.`decision`, r.`condition`, p.`text` FROM `{0}`.`{1}` AS r'.format(
                self.cache_db, table_name)
            query += ' INNER JOIN `{0}`.`sites` AS s ON s.`id` = r.`site_id`'.format(
                self.history_db)
            query += ' INNER JOIN `{0}`.`datasets` AS d ON d.`id` = r.`dataset_id`'.format(
                self.history_db)
            query += ' LEFT JOIN `{0}`.`policy_conditions` AS p ON p.`id` = r.`condition`'.format(
                self.history_db)
            if type(decisions) is list:
                query += ' WHERE r.`decision` IN (%s)' % ','.join(
                    '\'%s\'' % d for d in decisions)
            query += ' ORDER BY s.`name` ASC, r.`size` DESC'

            product = {}

            _site_name = ''

            for site_name, dataset_name, size, decision, cid, reason in self.db.xquery(
                    query):
                if site_name != _site_name:
                    product[site_name] = []
                    current = product[site_name]
                    _site_name = site_name

                current.append((dataset_name, size, decision, cid, reason))

            return product

    def get_site_deletion_decisions(self, cycle_number, site_name):
        """
        @return  site-specific version of get_deletion_decisions with size_only = False
        """

        self._fill_snapshot_cache('replicas', cycle_number)

        table_name = 'replicas_%d' % cycle_number

        query = 'SELECT d.`name`, r.`size`, r.`decision`, r.`condition`, p.`text` FROM `{0}`.`{1}` AS r'.format(
            self.cache_db, table_name)
        query += ' INNER JOIN `{0}`.`sites` AS s ON s.`id` = r.`site_id`'.format(
            self.history_db)
        query += ' INNER JOIN `{0}`.`datasets` AS d ON d.`id` = r.`dataset_id`'.format(
            self.history_db)
        query += ' LEFT JOIN `{0}`.`policy_conditions` AS p ON p.`id` = r.`condition`'.format(
            self.history_db)
        query += ' WHERE s.`name` = %s ORDER BY r.`size` DESC'

        return self.db.query(query, site_name)

    def _fill_snapshot_cache(self, template, cycle_number):
        self.db.use_db(self.cache_db)

        # cycle_number is either a cycle number or a partition name. %s works for both
        table_name = '%s_%s' % (template, cycle_number)

        table_exists = self.db.table_exists(table_name)

        is_cycle = True
        try:
            cycle_number += 0
        except TypeError:
            is_cycle = False

        if not is_cycle or not table_exists:
            if is_cycle:
                db_file_name = '%s/snapshot_%09d.db' % (
                    self.snapshots_spool_dir, cycle_number)

                if not os.path.exists(db_file_name):
                    try:
                        os.makedirs(self.snapshots_spool_dir)
                        os.chmod(self.snapshots_spool_dir, 0777)
                    except OSError:
                        pass

                    scycle = '%09d' % cycle_number
                    xz_file_name = '%s/%s/%s/snapshot_%09d.db.xz' % (
                        self.snapshots_archive_dir, scycle[:3], scycle[3:6],
                        cycle_number)
                    if not os.path.exists(xz_file_name):
                        raise RuntimeError('Archived snapshot DB ' +
                                           xz_file_name + ' does not exist')

                    with open(xz_file_name, 'rb') as xz_file:
                        with open(db_file_name, 'wb') as db_file:
                            db_file.write(lzma.decompress(xz_file.read()))

            else:
                db_file_name = '%s/snapshot_%s.db' % (self.snapshots_spool_dir,
                                                      cycle_number)

                if not os.path.exists(db_file_name):
                    return

            # fill from sqlite
            if table_exists:
                self.db.query('TRUNCATE TABLE `{0}`'.format(table_name))
            else:
                self.db.query('CREATE TABLE `{0}` LIKE `{1}`'.format(
                    table_name, template))

            snapshot_db = sqlite3.connect(db_file_name)
            snapshot_db.text_factory = str  # otherwise we'll get unicode and MySQLdb cannot convert that
            snapshot_cursor = snapshot_db.cursor()

            def make_snapshot_reader():
                if template == 'replicas':
                    sql = 'SELECT r.`site_id`, r.`dataset_id`, r.`size`, d.`value`, r.`condition` FROM `replicas` AS r'
                    sql += ' INNER JOIN `decisions` AS d ON d.`id` = r.`decision_id`'
                elif template == 'sites':
                    sql = 'SELECT s.`site_id`, t.`value`, s.`quota` FROM `sites` AS s'
                    sql += ' INNER JOIN `statuses` AS t ON t.`id` = s.`status_id`'

                snapshot_cursor.execute(sql)

                while True:
                    row = snapshot_cursor.fetchone()
                    if row is None:
                        return

                    yield row

            snapshot_reader = make_snapshot_reader()

            if template == 'replicas':
                fields = ('site_id', 'dataset_id', 'size', 'decision',
                          'condition')
            elif template == 'sites':
                fields = ('site_id', 'status', 'quota')

            self.db.insert_many(table_name,
                                fields,
                                None,
                                snapshot_reader,
                                do_update=False)

            snapshot_cursor.close()
            snapshot_db.close()

        if is_cycle:
            # cycle_number is really a number. Update the partition cache table too
            sql = 'SELECT p.`name` FROM `{hdb}`.`partitions` AS p INNER JOIN `{hdb}`.`deletion_cycles` AS r ON r.`partition_id` = p.`id` WHERE r.`id` = %s'.format(
                hdb=self.history_db)
            partition = self.db.query(sql, cycle_number)[0]

            self._fill_snapshot_cache(template, partition)

            # then update the cache usage
            self._update_cache_usage(template, cycle_number)

        self.db.use_db(self.history_db)

    def _update_cache_usage(self, template, cycle_number):
        self.db.use_db(self.cache_db)

        self.db.query(
            'INSERT INTO `{template}_snapshot_usage` VALUES (%s, NOW())'.
            format(template=template), cycle_number)

        # clean old cache
        sql = 'SELECT `cycle_id` FROM (SELECT `cycle_id`, MAX(`timestamp`) AS m FROM `replicas_snapshot_usage` GROUP BY `cycle_id`) AS t WHERE m < DATE_SUB(NOW(), INTERVAL 1 WEEK)'
        old_replica_cycles = self.db.query(sql)
        for old_cycle in old_replica_cycles:
            table_name = 'replicas_%d' % old_cycle
            self.db.query('DROP TABLE IF EXISTS `{0}`'.format(table_name))

        sql = 'SELECT `cycle_id` FROM (SELECT `cycle_id`, MAX(`timestamp`) AS m FROM `sites_snapshot_usage` GROUP BY `cycle_id`) AS t WHERE m < DATE_SUB(NOW(), INTERVAL 1 WEEK)'
        old_site_cycles = self.db.query(sql)
        for old_cycle in old_site_cycles:
            table_name = 'sites_%d' % old_cycle
            self.db.query('DROP TABLE IF EXISTS `{0}`'.format(table_name))

        for old_cycle in set(old_replica_cycles) & set(old_site_cycles):
            scycle = '%09d' % old_cycle
            db_file_name = '%s/snapshot_%09d.db' % (self.snapshots_spool_dir,
                                                    old_cycle)
            if os.path.exists(db_file_name):
                try:
                    os.unlink(db_file_name)
                except:
                    LOG.error('Failed to delete %s' % db_file_name)
                    pass

        self.db.query(
            'DELETE FROM `replicas_snapshot_usage` WHERE `timestamp` < DATE_SUB(NOW(), INTERVAL 1 WEEK)'
        )
        self.db.query('OPTIMIZE TABLE `replicas_snapshot_usage`')
        self.db.query(
            'DELETE FROM `sites_snapshot_usage` WHERE `timestamp` < DATE_SUB(NOW(), INTERVAL 1 WEEK)'
        )
        self.db.query('OPTIMIZE TABLE `sites_snapshot_usage`')
Example #33
0
    def update(self, inventory):
        for dataset in inventory.datasets.itervalues():
            try:
                dataset.attr.pop('locked_blocks')
            except KeyError:
                pass

        for source, content_type, site_pattern, lock_url in self._sources.itervalues(
        ):
            if lock_url is not None:
                # check that the lock files themselves are not locked
                while True:
                    # Hacky but this is temporary any way
                    opener = urllib2.build_opener(
                        webservice.HTTPSCertKeyHandler(Configuration()))
                    opener.addheaders.append(('Accept', 'application/json'))
                    request = urllib2.Request(lock_url)
                    try:
                        opener.open(request)
                    except urllib2.HTTPError as err:
                        if err.code == 404:
                            # file not found -> no lock
                            break
                        else:
                            raise

                    LOG.info(
                        'Lock files are being produced. Waiting 60 seconds.')
                    time.sleep(60)

            LOG.info('Retrieving lock information from %s', source.url_base)

            data = source.make_request()

            if content_type == WebReplicaLock.LIST_OF_DATASETS:
                # simple list of datasets
                for dataset_name in data:
                    if dataset_name is None:
                        LOG.debug('Dataset name None found in %s',
                                  source.url_base)
                        continue

                    try:
                        dataset = inventory.datasets[dataset_name]
                    except KeyError:
                        LOG.debug('Unknown dataset %s in %s', dataset_name,
                                  source.url_base)
                        continue

                    if dataset.replicas is None:
                        continue

                    try:
                        locked_blocks = dataset.attr['locked_blocks']
                    except KeyError:
                        locked_blocks = dataset.attr['locked_blocks'] = {}

                    for replica in dataset.replicas:
                        if site_pattern is not None and not fnmatch.fnmatch(
                                replica.site.name, site_pattern):
                            continue

                        if replica.site in locked_blocks:
                            locked_blocks[replica.site].update(
                                brep.block for brep in replica.block_replicas)
                        else:
                            locked_blocks[replica.site] = set(
                                brep.block for brep in replica.block_replicas)

            elif content_type == WebReplicaLock.CMSWEB_LIST_OF_DATASETS:
                # data['result'] -> simple list of datasets
                for dataset_name in data['result']:
                    if dataset_name is None:
                        LOG.debug('Dataset name None found in %s',
                                  source.url_base)
                        continue

                    try:
                        dataset = inventory.datasets[dataset_name]
                    except KeyError:
                        LOG.debug('Unknown dataset %s in %s', dataset_name,
                                  source.url_base)
                        continue

                    if dataset.replicas is None:
                        continue

                    try:
                        locked_blocks = dataset.attr['locked_blocks']
                    except KeyError:
                        locked_blocks = dataset.attr['locked_blocks'] = {}

                    for replica in dataset.replicas:
                        if site_pattern is not None and not fnmatch.fnmatch(
                                replica.site.name, site_pattern):
                            continue

                        if replica.site in locked_blocks:
                            locked_blocks[replica.site].update(
                                brep.block for brep in replica.block_replicas)
                        else:
                            locked_blocks[replica.site] = set(
                                brep.block for brep in replica.block_replicas)

            elif content_type == WebReplicaLock.SITE_TO_DATASETS:
                # data = {site: {dataset: info}}
                for site_name, objects in data.items():
                    try:
                        site = inventory.sites[site_name]
                    except KeyError:
                        LOG.debug('Unknown site %s in %s', site_name,
                                  source.url_base)
                        continue

                    for object_name, info in objects.items():
                        if not info['lock']:
                            LOG.debug('Object %s is not locked at %s',
                                      object_name, site_name)
                            continue

                        if '#' in object_name:
                            dataset_name, block_real_name = object_name.split(
                                '#')
                        else:
                            dataset_name = object_name
                            block_real_name = None

                        try:
                            dataset = inventory.datasets[dataset_name]
                        except KeyError:
                            LOG.debug('Unknown dataset %s in %s', dataset_name,
                                      source.url_base)
                            continue

                        replica = site.find_dataset_replica(dataset)
                        if replica is None:
                            LOG.debug('Replica of %s is not at %s in %s',
                                      dataset_name, site_name, source.url_base)
                            continue

                        if block_real_name is None:
                            blocks = list(dataset.blocks)
                        else:
                            block = dataset.find_block(
                                Block.to_internal_name(block_real_name))
                            if block is None:
                                LOG.debug('Unknown block %s of %s in %s',
                                          block_real_name, dataset_name,
                                          source.url_base)
                                continue

                            blocks = [block]

                        try:
                            locked_blocks = dataset.attr['locked_blocks']
                        except KeyError:
                            locked_blocks = dataset.attr['locked_blocks'] = {}

                        if site in locked_blocks:
                            locked_blocks[site].update(blocks)
                        else:
                            locked_blocks[site] = set(blocks)
 def __init__(self, config):
     SiteInfoSource.__init__(self, config)
     
     self.config = Configuration(config.sites)
Example #35
0
    def __init__(self, config = None):
        config = Configuration(config)

        self.registry = RegistryDatabase(config.get('registry', None))
Example #36
0
    def add_source(self, name, config, auth_config):
        LOG.info(config)
        rest_config = Configuration()
        rest_config.url_base = config.get('url', None)
        rest_config.accept = config.get('data_type', 'application/json')
        if config.auth == 'noauth':
            rest_config.auth_handler = 'None'
        else:
            auth = auth_config[config.auth]
            rest_config.auth_handler = auth.auth_handler
            rest_config.auth_handler_conf = Configuration(
                auth.get('auth_handler_conf', {}))

        content_type = getattr(WebReplicaLock, config.content_type)
        site_pattern = config.get('sites', None)
        lock_url = config.get('lock_url', None)

        if rest_config.url_base is not None:
            self._sources[name] = (webservice.RESTService(rest_config),
                                   content_type, site_pattern, lock_url)

        if config.get('oracledb', None) is not None:
            oracle_config = Configuration()
            oracle_config.db = config.oracledb.db
            oracle_config.pw = config.oracledb.password
            oracle_config.host = config.oracledb.host
            self._sources[name] = (webservice.OracleService(oracle_config),
                                   content_type, site_pattern,
                                   (config.oracledb.lockoflock,
                                    config.oracledb.locks))