Exemple #1
0
    def __init__(self):
        super(self.__class__, self).__init__()

        self._mysql = MySQL(**config.mysqlhistory.db_params)

        self._site_id_map = {}
        self._dataset_id_map = {}
    def __init__(self,
                 application,
                 service='dynamo',
                 asuser='',
                 db_params=config.registry.db_params):
        self._mysql = MySQL(**db_params)

        self.application = application
        self.service = service
        if asuser:
            self.user = asuser
        else:
            self.user = config.activitylock.default_user
Exemple #3
0
 def __init__(self):
     CopyInterface.__init__(self)
     DeletionInterface.__init__(self)
     SiteInfoSourceInterface.__init__(self)
     ReplicaInfoSourceInterface.__init__(self)
     DatasetInfoSourceInterface.__init__(self)
     self._mysql = MySQL(**config.mysqlregistry.db_params)
Exemple #4
0
 def __init__(self):
     self._last_update = 0 # unix time of last update
     self._mysqlreg = MySQL(**config.registry.db_params)
     self._mysqlhist = MySQL(**config.mysqlhistory.db_params)
Exemple #5
0
def main(site):
    """
    Gets the listing from the dynamo database, and remote XRootD listings of a given site.
    The differences are compared to deletion queues and other things.

    .. Note::
       If you add things, list them in the module docstring.

    The differences that should be acted on are copied to the summary webpage
    and entered into the dynamoregister database.

    :param str site: The site to run the check over
    :returns: missing files, size, orphan files, size
    :rtype: list, long, list, long
    """

    start = time.time()

    prev_missing = '%s_compare_missing.txt' % site
    prev_set = set()
    if os.path.exists(prev_missing):
        with open(prev_missing, 'r') as prev_file:
            for line in prev_file:
                prev_set.add(line.strip())

        if int(config.config_dict().get('SaveCache')):
            prev_new_name = '%s.%s' % (
                prev_missing,
                datetime.datetime.fromtimestamp(
                    os.stat(prev_missing).st_mtime).strftime('%y%m%d'))
        else:
            prev_new_name = prev_missing

        shutil.move(
            prev_missing,
            os.path.join(config.config_dict()['CacheLocation'], prev_new_name))

    # All of the files and summary will be dumped here
    webdir = config.config_dict()['WebDir']

    # Open a connection temporarily to make sure we only list good sites
    status_check = MySQL(config_file='/etc/my.cnf',
                         db='dynamo',
                         config_group='mysql-dynamo')
    status = status_check.query('SELECT status FROM sites WHERE name = %s',
                                site)[0]

    if status != 'ready':
        LOG.error('Site %s status is %s', site, status)

        # Note the attempt to do listing
        conn = sqlite3.connect(os.path.join(webdir, 'stats.db'))
        curs = conn.cursor()
        curs.execute(
            """
            REPLACE INTO stats VALUES
            (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, DATETIME(DATETIME(), "-4 hours"), ?, ?)
            """, (site, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))

        conn.commit()
        conn.close()

        exit(0)

    # Close the connection while we are getting the trees together
    status_check.close()

    inv_tree = getinventorycontents.get_db_listing(site)

    # Reset the DirectoryList for the XRootDLister to run on
    config.DIRECTORYLIST = [
        directory.name for directory in inv_tree.directories
    ]

    site_tree = getsitecontents.get_site_tree(site)

    # Get whether or not the site is debugged
    conn = sqlite3.connect(os.path.join(webdir, 'stats.db'))
    curs = conn.cursor()
    curs.execute('SELECT isgood FROM sites WHERE site = ?', (site, ))
    is_debugged = curs.fetchone()[0]
    conn.close()

    # Create the function to check orphans and missing

    # First, datasets in the deletions queue can be missing
    acceptable_missing = checkphedex.set_of_deletions(site)

    # Orphan files cannot belong to any dataset that should be at the site
    inv_sql = MySQL(config_file='/etc/my.cnf',
                    db='dynamo',
                    config_group='mysql-dynamo')
    acceptable_orphans = set(
        inv_sql.query(
            """
            SELECT datasets.name FROM sites
            INNER JOIN dataset_replicas ON dataset_replicas.site_id=sites.id
            INNER JOIN datasets ON dataset_replicas.dataset_id=datasets.id
            WHERE sites.name=%s
            """, site))

    # Orphan files may be a result of deletion requests
    acceptable_orphans.update(acceptable_missing)

    # Ignored datasets will not give a full listing, so they can't be accused of having orphans
    acceptable_orphans.update(
        inv_sql.query('SELECT name FROM datasets WHERE status=%s', 'IGNORED'))

    # Do not delete anything that is protected by Unified
    protected_unmerged = get_json('cmst2.web.cern.ch',
                                  '/cmst2/unified/listProtectedLFN.txt')
    acceptable_orphans.update(['/%s/%s-%s/%s' % (split_name[4], split_name[3],
                                                 split_name[6], split_name[5]) \
                                   for split_name in \
                                   [name.split('/') for name in protected_unmerged['protected']]
                              ])

    LOG.debug('Acceptable orphans: \n%s\n', '\n'.join(acceptable_orphans))

    ignore_list = config.config_dict().get('IgnoreDirectories', [])

    def double_check(file_name, acceptable):
        """
        Checks the file name against a list of datasets to not list files from.

        :param str file_name: LFN of the file
        :param set acceptable: Datasets to not list files from
                               (Acceptable orphans or missing)
        :returns: Whether the file belongs to a dataset in the list or not
        :rtype: bool
        """
        LOG.debug('Checking file_name: %s', file_name)

        # Skip over paths that include part of the list of ignored directories
        for pattern in ignore_list:
            if pattern in file_name:
                return True

        split_name = file_name.split('/')

        try:
            return '/%s/%s-%s/%s' % (split_name[4], split_name[3],
                                     split_name[6],
                                     split_name[5]) in acceptable
        except IndexError:
            LOG.warning('Strange file name: %s', file_name)
            return True

    check_orphans = lambda x: double_check(x, acceptable_orphans)
    check_missing = lambda x: double_check(x, acceptable_missing)

    # Do the comparison
    missing, m_size, orphan, o_size = datatypes.compare(
        inv_tree,
        site_tree,
        '%s_compare' % site,
        orphan_check=check_orphans,
        missing_check=check_missing)

    LOG.debug('Missing size: %i, Orphan size: %i', m_size, o_size)

    # Enter things for site in registry
    if os.environ['USER'] == 'dynamo':
        reg_sql = MySQL(config_file='/etc/my.cnf',
                        db='dynamoregister',
                        config_group='mysql-dynamo')
    else:
        reg_sql = MySQL(config_file=os.path.join(os.environ['HOME'], 'my.cnf'),
                        db='dynamoregister',
                        config_group='mysql-register-test')

    # Determine if files should be entered into the registry

    many_missing = len(missing) > int(config.config_dict()['MaxMissing'])
    many_orphans = len(orphan) > int(config.config_dict()['MaxOrphan'])

    if is_debugged and not many_missing and not many_orphans:

        def execute(query, *args):
            """
            Executes the query on the registry and outputs a log message depending on query

            :param str query: The SQL query to execute
            :param args: The arguments to the SQL query
            """

            reg_sql.query(query, *args)

            if 'transfer_queue' in query:
                LOG.info('Copying %s from %s', args[0], args[1])
            elif 'deletion_queue' in query:
                LOG.info('Deleting %s', args[0])

    else:
        if many_missing:
            LOG.error('Too many missing files: %i, you should investigate.',
                      len(missing))

        if many_orphans:
            LOG.error(
                'Too many orphan files: %i out of %i, you should investigate.',
                len(orphan), site_tree.get_num_files())

        execute = lambda *_: 0

    # Then do entries, if the site is in the debugged status

    def add_transfers(line, sites):
        """
        Add the file into the transfer queue for multiple sites.

        :param str line: The file LFN to transfer
        :param list sites: Sites to try to transfer from
        :returns: Whether or not the entry was a success
        :rtype: bool
        """

        # Don't add transfers if too many missing files
        if line in prev_set or not prev_set:
            for location in sites:
                execute(
                    """
                    INSERT IGNORE INTO `transfer_queue`
                    (`file`, `site_from`, `site_to`, `status`, `reqid`)
                    VALUES (%s, %s, %s, 'new', 0)
                    """, line, location, site)

        return bool(sites)

    # Setup a query for sites, with added condition at the end
    site_query = """
                 SELECT sites.name FROM sites
                 INNER JOIN block_replicas ON sites.id = block_replicas.site_id
                 INNER JOIN files ON block_replicas.block_id = files.block_id
                 WHERE files.name = %s AND sites.name != %s
                 AND sites.status = 'ready'
                 AND block_replicas.is_complete = 1
                 AND group_id != 0
                 {0}
                 """

    # Track files with no sources
    no_source_files = []

    for line in missing:

        # Get sites that are not tape
        sites = inv_sql.query(
            site_query.format('AND sites.storage_type != "mss"'), line, site)

        if not add_transfers(line, sites):
            # Track files without disk source
            no_source_files.append(line)

            # Get sites that are tape
            sites = inv_sql.query(
                site_query.format('AND sites.storage_type = "mss"'), line,
                site)

            add_transfers(line, sites)

    # Only get the empty nodes that are not in the inventory tree
    for line in orphan + \
            [empty_node for empty_node in site_tree.empty_nodes_list() \
                 if not inv_tree.get_node('/'.join(empty_node.split('/')[2:]),
                                          make_new=False)]:
        execute(
            """
            INSERT IGNORE INTO `deletion_queue`
            (`file`, `site`, `status`) VALUES
            (%s, %s, 'new')
            """, line, site)

    reg_sql.close()

    with open('%s_missing_nosite.txt' % site, 'w') as nosite:
        for line in no_source_files:
            nosite.write(line + '\n')

    # We want to track which blocks missing files are coming from
    track_missing_blocks = defaultdict(
        lambda: {
            'errors': 0,
            'blocks': defaultdict(lambda: {
                'group': '',
                'errors': 0
            })
        })

    blocks_query = """
                   SELECT blocks.name, IFNULL(groups.name, 'Unsubscribed') FROM blocks
                   INNER JOIN files ON files.block_id = blocks.id
                   INNER JOIN block_replicas ON block_replicas.block_id = files.block_id
                   INNER JOIN sites ON block_replicas.site_id = sites.id
                   LEFT JOIN groups ON block_replicas.group_id = groups.id
                   WHERE files.name = %s AND sites.name = %s
                   """

    with open('%s_compare_missing.txt' % site, 'r') as input_file:
        for line in input_file:
            split_name = line.split('/')
            dataset = '/%s/%s-%s/%s' % (split_name[4], split_name[3],
                                        split_name[6], split_name[5])

            output = inv_sql.query(blocks_query, line.strip(), site)

            if not output:
                LOG.warning('The following SQL statement failed: %s',
                            blocks_query % (line.strip(), site))
                LOG.warning(
                    'Most likely cause is dynamo update between the listing and now'
                )
                from_phedex = get_json(
                    'cmsweb.cern.ch',
                    '/phedex/datasvc/json/prod/filereplicas',
                    params={
                        'node': site,
                        'LFN': line.strip()
                    },
                    use_cert=True)

                try:
                    output = [(from_phedex['phedex']['block'][0]['name'].split(
                        '#')[1], from_phedex['phedex']['block'][0]['replica']
                               [0]['group'])]
                except IndexError:
                    LOG.error('File replica not in PhEDEx either!')
                    LOG.error('Skipping block level report for this file.')
                    continue

            block, group = output[0]

            track_missing_blocks[dataset]['errors'] += 1
            track_missing_blocks[dataset]['blocks'][block]['errors'] += 1
            track_missing_blocks[dataset]['blocks'][block]['group'] = group

    inv_sql.close()

    # Output file with the missing datasets
    with open('%s_missing_datasets.txt' % site, 'w') as output_file:
        for dataset, vals in \
                sorted(track_missing_blocks.iteritems(),
                       key=lambda x: x[1]['errors'],
                       reverse=True):

            for block_name, block in sorted(vals['blocks'].iteritems()):
                output_file.write('%10i    %-17s  %s#%s\n' % \
                                      (block['errors'], block['group'],
                                       dataset, block_name))

    # If there were permissions or connection issues, no files would be listed
    # Otherwise, copy the output files to the web directory
    shutil.copy('%s_missing_datasets.txt' % site, webdir)
    shutil.copy('%s_missing_nosite.txt' % site, webdir)
    shutil.copy('%s_compare_missing.txt' % site, webdir)
    shutil.copy('%s_compare_orphan.txt' % site, webdir)

    if (os.environ.get('ListAge') is None) and (os.environ.get('InventoryAge')
                                                is None):

        # Update the runtime stats on the stats page if the listing settings are not changed
        conn = sqlite3.connect(os.path.join(webdir, 'stats.db'))
        curs = conn.cursor()

        curs.execute(
            'INSERT INTO stats_history SELECT * FROM stats WHERE site=?',
            (site, ))
        curs.execute(
            """
            REPLACE INTO stats VALUES
            (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, DATETIME(DATETIME(), "-4 hours"), ?, ?)
            """,
            (site, time.time() - start, site_tree.get_num_files(),
             site_tree.count_nodes(), len(
                 site_tree.empty_nodes_list()), config.config_dict().get(
                     'NumThreads',
                     config.config_dict().get('MinThreads', 0)), len(missing),
             m_size, len(orphan), o_size, len(no_source_files),
             site_tree.get_num_files(unlisted=True)))

        conn.commit()
        conn.close()
Exemple #6
0
class MySQLHistory(TransactionHistoryInterface):
    """
    Transaction history interface implementation using MySQL as the backend.
    """
    def __init__(self):
        super(self.__class__, self).__init__()

        self._mysql = MySQL(**config.mysqlhistory.db_params)

        self._site_id_map = {}
        self._dataset_id_map = {}

    def _do_acquire_lock(self, blocking):  #override
        while True:
            # Use the system table to "software-lock" the database
            self._mysql.query('LOCK TABLES `lock` WRITE')
            self._mysql.query(
                'UPDATE `lock` SET `lock_host` = %s, `lock_process` = %s WHERE `lock_host` LIKE \'\' AND `lock_process` = 0',
                socket.gethostname(), os.getpid())

            # Did the update go through?
            host, pid = self._mysql.query(
                'SELECT `lock_host`, `lock_process` FROM `lock`')[0]
            self._mysql.query('UNLOCK TABLES')

            if host == socket.gethostname() and pid == os.getpid():
                # The database is locked.
                break

            if blocking:
                logger.warning('Failed to lock database. Waiting 30 seconds..')
                time.sleep(30)
            else:
                logger.warning('Failed to lock database.')
                return False

        return True

    def _do_release_lock(self, force):  #override
        self._mysql.query('LOCK TABLES `lock` WRITE')
        if force:
            self._mysql.query(
                'UPDATE `lock` SET `lock_host` = \'\', `lock_process` = 0')
        else:
            self._mysql.query(
                'UPDATE `lock` SET `lock_host` = \'\', `lock_process` = 0 WHERE `lock_host` LIKE %s AND `lock_process` = %s',
                socket.gethostname(), os.getpid())

        # Did the update go through?
        host, pid = self._mysql.query(
            'SELECT `lock_host`, `lock_process` FROM `lock`')[0]
        self._mysql.query('UNLOCK TABLES')

        if host != '' or pid != 0:
            raise TransactionHistoryInterface.LockError(
                'Failed to release lock from ' + socket.gethostname() + ':' +
                str(os.getpid()))

    def _do_make_snapshot(self, tag):  #override
        new_db = self._mysql.make_snapshot(tag)

        self._mysql.query(
            'UPDATE `%s`.`lock` SET `lock_host` = \'\', `lock_process` = 0' %
            new_db)

    def _do_remove_snapshot(self, tag, newer_than, older_than):  #override
        self._mysql.remove_snapshot(tag=tag,
                                    newer_than=newer_than,
                                    older_than=older_than)

    def _do_list_snapshots(self, timestamp_only):  #override
        return self._mysql.list_snapshots(timestamp_only)

    def _do_recover_from(self, tag):  #override
        self._mysql.recover_from(tag)

    def _do_new_run(self, operation, partition, policy_version, is_test,
                    comment):  #override
        part_ids = self._mysql.query(
            'SELECT `id` FROM `partitions` WHERE `name` LIKE %s', partition)
        if len(part_ids) == 0:
            part_id = self._mysql.query(
                'INSERT INTO `partitions` (`name`) VALUES (%s)', partition)
        else:
            part_id = part_ids[0]

        if operation == HistoryRecord.OP_COPY:
            if is_test:
                operation_str = 'copy_test'
            else:
                operation_str = 'copy'
        else:
            if is_test:
                operation_str = 'deletion_test'
            else:
                operation_str = 'deletion'

        return self._mysql.query(
            'INSERT INTO `runs` (`operation`, `partition_id`, `policy_version`, `comment`, `time_start`) VALUES (%s, %s, %s, %s, NOW())',
            operation_str, part_id, policy_version, comment)

    def _do_close_run(self, operation, run_number):  #override
        self._mysql.query(
            'UPDATE `runs` SET `time_end` = FROM_UNIXTIME(%s) WHERE `id` = %s',
            time.time(), run_number)

    def _do_make_copy_entry(self, run_number, site, operation_id, approved,
                            dataset_list, size):  #override
        """
        Site and datasets are expected to be already in the database.
        """

        if len(self._site_id_map) == 0:
            self._make_site_id_map()
        if len(self._dataset_id_map) == 0:
            self._make_dataset_id_map()

        self._mysql.query(
            'INSERT INTO `copy_requests` (`id`, `run_id`, `timestamp`, `approved`, `site_id`, `size`) VALUES (%s, %s, NOW(), %s, %s, %s)',
            operation_id, run_number, approved, self._site_id_map[site.name],
            size)

        self._mysql.insert_many(
            'copied_replicas', ('copy_id', 'dataset_id'), lambda d:
            (operation_id, self._dataset_id_map[d.name]), dataset_list)

    def _do_make_deletion_entry(self, run_number, site, operation_id, approved,
                                datasets, size):  #override
        """
        site and dataset are expected to be already in the database (save_deletion_decisions should be called first).
        """

        site_id = self._mysql.query(
            'SELECT `id` FROM `sites` WHERE `name` LIKE %s', site.name)[0]

        dataset_ids = self._mysql.select_many('datasets', ('id', ), 'name',
                                              [d.name for d in datasets])

        self._mysql.query(
            'INSERT INTO `deletion_requests` (`id`, `run_id`, `timestamp`, `approved`, `site_id`, `size`) VALUES (%s, %s, NOW(), %s, %s, %s)',
            operation_id, run_number, approved, site_id, size)

        self._mysql.insert_many('deleted_replicas',
                                ('deletion_id', 'dataset_id'), lambda did:
                                (operation_id, did), dataset_ids)

    def _do_update_copy_entry(self, copy_record):  #override
        self._mysql.query(
            'UPDATE `copy_requests` SET `approved` = %s, `size` = %s, `completed` = %s WHERE `id` = %s',
            copy_record.approved, copy_record.size, copy_record.completed,
            copy_record.operation_id)

    def _do_update_deletion_entry(self, deletion_record):  #override
        self._mysql.query(
            'UPDATE `deletion_requests` SET `approved` = %s, `size` = %s WHERE `id` = %s',
            deletion_record.approved, deletion_record.size,
            deletion_record.operation_id)

    def _do_save_sites(self, run_number, inventory):  #override
        if len(self._site_id_map) == 0:
            self._make_site_id_map()

        sites_to_insert = []
        for site_name in inventory.sites.keys():
            if site_name not in self._site_id_map:
                sites_to_insert.append(site_name)

        if len(sites_to_insert) != 0:
            self._mysql.insert_many('sites', ('name', ), lambda n: (n, ),
                                    sites_to_insert)
            self._make_site_id_map()

        sites_in_record = set()

        insert_query = 'INSERT INTO `site_status_snapshots` (`site_id`, `run_id`, `status`) VALUES (%s, {run_number}, %s)'.format(
            run_number=run_number)

        query = 'SELECT s.`name`, ss.`status`+0 FROM `site_status_snapshots` AS ss INNER JOIN `sites` AS s ON s.`id` = ss.`site_id`'
        query += ' WHERE ss.`run_id` = (SELECT MAX(ss2.`run_id`) FROM `site_status_snapshots` AS ss2 WHERE ss2.`site_id` = ss.`site_id` AND ss2.`run_id` <= %d)' % run_number
        record = self._mysql.query(query)

        sites_in_record = set()

        for site_name, status in record:
            try:
                site = inventory.sites[site_name]
            except KeyError:
                continue

            sites_in_record.add(site)

            if site.status != status:
                self._mysql.query(insert_query, self._site_id_map[site.name],
                                  site.status)

        for site in inventory.sites.values():
            if site not in sites_in_record:
                self._mysql.query(insert_query, self._site_id_map[site.name],
                                  site.status)

    def _do_get_sites(self, run_number):  #override
        partition_id = self._mysql.query(
            'SELECT `partition_id` FROM runs WHERE `id` = %s', run_number)[0]

        query = 'SELECT s.`name`, ss.`status`+0 FROM `site_status_snapshots` AS ss INNER JOIN `sites` AS s ON s.`id` = ss.`site_id`'
        query += ' WHERE ss.`run_id` = (SELECT MAX(ss2.`run_id`) FROM `site_status_snapshots` AS ss2 WHERE ss2.`site_id` = ss.`site_id` AND ss2.`run_id` <= %d)' % run_number
        record = self._mysql.query(query)

        status_map = dict([(site_name, status)
                           for site_name, status in record])

        query = 'SELECT s.`name`, q.`quota` FROM `quota_snapshots` AS q INNER JOIN `sites` AS s ON s.`id` = q.`site_id`'
        query += ' WHERE q.`partition_id` = %d' % partition_id
        query += ' AND q.`run_id` = (SELECT MAX(q2.`run_id`) FROM `quota_snapshots` AS q2 WHERE q2.`partition_id` = %d AND q2.`site_id` = q.`site_id` AND q2.`run_id` <= %d)' % (
            partition_id, run_number)

        quota_map = dict(self._mysql.query(query))

        sites_dict = {}

        for site_name, status in status_map.items():
            try:
                quota = quota_map[site_name]
            except KeyError:
                quota = 0

            sites_dict[site_name] = (status, quota)

        return sites_dict

    def _do_save_datasets(self, run_number, inventory):  #override
        if len(self._dataset_id_map) == 0:
            self._make_dataset_id_map()

        datasets_to_insert = []
        for dataset_name in inventory.datasets.keys():
            if dataset_name not in self._dataset_id_map:
                datasets_to_insert.append(dataset_name)

        if len(datasets_to_insert) == 0:
            return

        self._mysql.insert_many('datasets', ('name', ), lambda n: (n, ),
                                datasets_to_insert)
        self._make_dataset_id_map()

    def _do_save_quotas(self, run_number, quotas):  #override
        if len(self._site_id_map) == 0:
            self._make_site_id_map()

        partition_id = self._mysql.query(
            'SELECT `partition_id` FROM runs WHERE `id` = %s', run_number)[0]

        insert_query = 'INSERT INTO `quota_snapshots` (`site_id`, `partition_id`, `run_id`, `quota`) VALUES (%s, {partition_id}, {run_number}, %s)'.format(
            partition_id=partition_id, run_number=run_number)

        query = 'SELECT s.`name`, q.`quota` FROM `quota_snapshots` AS q INNER JOIN `sites` AS s ON s.`id` = q.`site_id` WHERE'
        query += ' q.`partition_id` = %d' % partition_id
        query += ' AND q.`run_id` = (SELECT MAX(q2.`run_id`) FROM `quota_snapshots` AS q2 WHERE q2.`partition_id` = %d AND q2.`site_id` = q.`site_id` AND q2.`run_id` <= %d)' % (
            partition_id, run_number)

        record = self._mysql.query(query)

        sites_in_record = set()

        for site_name, last_quota in record:
            try:
                site, quota = next(item for item in quotas.items()
                                   if item[0].name == site_name)
            except StopIteration:
                continue

            sites_in_record.add(site)

            if last_quota != quota:
                self._mysql.query(insert_query, self._site_id_map[site.name],
                                  quota)

        for site, quota in quotas.items():
            if site not in sites_in_record:
                self._mysql.query(insert_query, self._site_id_map[site.name],
                                  quota)

    def _do_save_conditions(self, policies):
        for policy in policies:
            text = re.sub('\s+', ' ', policy.condition.text)
            ids = self._mysql.query(
                'SELECT `id` FROM `policy_conditions` WHERE `text` LIKE %s',
                text)
            if len(ids) == 0:
                policy.condition_id = self._mysql.query(
                    'INSERT INTO `policy_conditions` (`text`) VALUES (%s)',
                    text)
            else:
                policy.condition_id = ids[0]

    def _do_save_copy_decisions(self, run_number, copies):  #override
        pass

    def _do_save_deletion_decisions(self, run_number, deleted, kept,
                                    protected):  #override
        # First save the size snapshots of the replicas, which will be referenced when reconstructing the history.
        # Decisions are saved only if they changed from the last run

        if len(self._site_id_map) == 0:
            self._make_site_id_map()
        if len(self._dataset_id_map) == 0:
            self._make_dataset_id_map()

        # (site_id, dataset_id) -> replica in inventory
        indices_to_replicas = {}
        for replica in deleted.keys():
            indices_to_replicas[(
                self._site_id_map[replica.site.name],
                self._dataset_id_map[replica.dataset.name])] = replica
        for replica in kept.keys():
            indices_to_replicas[(
                self._site_id_map[replica.site.name],
                self._dataset_id_map[replica.dataset.name])] = replica
        for replica in protected.keys():
            indices_to_replicas[(
                self._site_id_map[replica.site.name],
                self._dataset_id_map[replica.dataset.name])] = replica

        partition_id = self._mysql.query(
            'SELECT `partition_id` FROM `runs` WHERE `id` = %s', run_number)[0]

        # size snapshots
        # size NULL means the replica is deleted
        query = 'SELECT t1.`site_id`, t1.`dataset_id`, t1.`size` FROM `replica_size_snapshots` AS t1'
        query += ' WHERE t1.`partition_id` = %d' % partition_id
        query += ' AND t1.`size` IS NOT NULL'
        query += ' AND t1.`run_id` = ('
        query += '  SELECT MAX(t2.`run_id`) FROM `replica_size_snapshots` AS t2 WHERE t2.`site_id` = t1.`site_id` AND t2.`dataset_id` = t1.`dataset_id`'
        query += '  AND t2.`partition_id` = %d AND t2.`run_id` <= %d' % (
            partition_id, run_number)
        query += ' )'

        in_record = set()
        insertions = []

        # existing replicas that changed size or disappeared
        for site_id, dataset_id, size in self._mysql.query(query):
            index = (site_id, dataset_id)
            try:
                replica = indices_to_replicas[index]
            except KeyError:
                # this replica is not in the inventory any more
                insertions.append((site_id, dataset_id, None))
                continue

            in_record.add(replica)

            if size != replica.size():
                insertions.append((site_id, dataset_id, replica.size()))

        # new replicas
        for index, replica in indices_to_replicas.items():
            if replica not in in_record:
                insertions.append((index[0], index[1], replica.size()))

        fields = ('site_id', 'dataset_id', 'partition_id', 'run_id', 'size')
        mapping = lambda (site_id, dataset_id, size): (
            site_id, dataset_id, partition_id, run_number, size)
        self._mysql.insert_many('replica_size_snapshots', fields, mapping,
                                insertions)

        # deletion decisions
        decisions = {}
        for replica, condition_id in deleted.items():
            decisions[replica] = ('delete', condition_id)
        for replica, condition_id in kept.items():
            decisions[replica] = ('keep', condition_id)
        for replica, condition_id in protected.items():
            decisions[replica] = ('protect', condition_id)

        query = 'SELECT dd1.`site_id`, dd1.`dataset_id`, dd1.`decision`, dd1.`matched_condition` FROM `deletion_decisions` AS dd1'
        query += ' INNER JOIN `replica_size_snapshots` AS rs1 ON (rs1.`site_id`, rs1.`partition_id`, rs1.`dataset_id`) = (dd1.`site_id`, dd1.`partition_id`, dd1.`dataset_id`)'
        query += ' WHERE dd1.`partition_id` = %d' % partition_id
        query += ' AND rs1.`size` IS NOT NULL'
        query += ' AND rs1.`run_id` = ('
        query += '  SELECT MAX(rs2.`run_id`) FROM `replica_size_snapshots` AS rs2'
        query += '   WHERE (rs2.`site_id`, rs2.`partition_id`, rs2.`dataset_id`) = (rs1.`site_id`, rs1.`partition_id`, rs1.`dataset_id`)'
        query += '   AND rs2.`partition_id` = %d' % partition_id
        query += '   AND rs2.`run_id` <= %d' % run_number
        query += ' )'
        query += ' AND dd1.`run_id` = ('
        query += '  SELECT MAX(dd2.`run_id`) FROM `deletion_decisions` AS dd2'
        query += '   WHERE (dd2.`site_id`, dd2.`partition_id`, dd2.`dataset_id`) = (dd1.`site_id`, dd1.`partition_id`, dd1.`dataset_id`)'
        query += '   AND dd2.`partition_id` = %d' % partition_id
        query += '   AND dd2.`run_id` <= %d' % run_number
        query += ' )'

        insertions = []

        for site_id, dataset_id, rec_decision, rec_condition_id in self._mysql.query(
                query):
            replica = indices_to_replicas.pop((site_id, dataset_id))

            decision, condition_id = decisions[replica]

            if decision != rec_decision or condition_id != rec_condition_id:
                insertions.append(
                    (site_id, dataset_id, decision, condition_id))

        # replicas with no past decision entries
        for index, replica in indices_to_replicas.items():
            insertions.append(index + decisions[replica])

        fields = ('site_id', 'dataset_id', 'partition_id', 'run_id',
                  'decision', 'matched_condition')
        mapping = lambda (site_id, dataset_id, decision, condition_id
                          ): (site_id, dataset_id, partition_id, run_number,
                              decision, condition_id)
        self._mysql.insert_many('deletion_decisions', fields, mapping,
                                insertions)

        # now fill the cache
        self._fill_snapshot_cache(run_number)

    def _do_get_deletion_decisions(self, run_number, size_only):  #override
        self._fill_snapshot_cache(run_number)

        partition_id = self._mysql.query(
            'SELECT `partition_id` FROM `runs` WHERE `id` = %s', run_number)[0]

        if size_only:
            # return {site_name: (protect_size, delete_size, keep_size)}
            volumes = {}
            sites = set()

            query = 'SELECT s.`name`, SUM(r.`size`) * 1.e-12 FROM `replica_snapshot_cache` AS c'
            query += ' INNER JOIN `replica_size_snapshots` AS r ON r.`id` = c.`size_snapshot_id`'
            query += ' INNER JOIN `deletion_decisions` AS d ON d.`id` = c.`decision_id`'
            query += ' INNER JOIN `sites` AS s ON s.`id` = r.`site_id`'
            query += ' WHERE c.`run_id` = %d' % run_number
            query += ' AND d.`decision` LIKE %s'
            query += ' GROUP BY r.`site_id`'

            for decision in ['protect', 'delete', 'keep']:
                volumes[decision] = dict(self._mysql.query(query, decision))
                sites.update(set(volumes[decision].keys()))

            self._mysql.query(
                'INSERT INTO `replica_snapshot_cache_usage` VALUES (%s, NOW())',
                run_number)

            product = {}
            for site_name in sites:
                v = {}
                for decision in ['protect', 'delete', 'keep']:
                    try:
                        v[decision] = volumes[decision][site_name]
                    except:
                        v[decision] = 0

                product[site_name] = (v['protect'], v['delete'], v['keep'])

            return product

        else:
            # return {site_name: [(dataset_name, size, decision, reason)]}

            query = 'SELECT s.`name`, d.`name`, r.`size`, l.`decision`, p.`text` FROM `replica_snapshot_cache` AS c'
            query += ' INNER JOIN `sites` AS s ON s.`id` = c.`site_id`'
            query += ' INNER JOIN `datasets` AS d ON d.`id` = c.`dataset_id`'
            query += ' INNER JOIN `replica_size_snapshots` AS r ON r.`id` = c.`size_snapshot_id`'
            query += ' INNER JOIN `deletion_decisions` AS l ON l.`id` = c.`decision_id`'
            query += ' INNER JOIN `policy_conditions` AS p ON p.`id` = l.`matched_condition`'
            query += ' WHERE c.`run_id` = %d' % run_number
            query += ' ORDER BY s.`name` ASC, r.`size` DESC'

            product = {}

            _site_name = ''

            for site_name, dataset_name, size, decision, reason in self._mysql.query(
                    query):
                if site_name != _site_name:
                    product[site_name] = []
                    current = product[site_name]
                    _site_name = site_name

                current.append((dataset_name, size, decision, reason))

            return product

    def _do_save_dataset_popularity(self, run_number, datasets):  #override
        if len(self._dataset_id_map) == 0:
            self._make_dataset_id_map()

        fields = ('run_id', 'dataset_id', 'popularity')
        mapping = lambda dataset: (run_number, self._dataset_id_map[
            dataset.name], dataset.demand['request_weight'] if 'request_weight'
                                   in dataset.demand else 0.)
        self._mysql.insert_many('dataset_popularity_snapshots', fields,
                                mapping, datasets)

    def _do_get_incomplete_copies(self, partition):  #override
        query = 'SELECT h.`id`, UNIX_TIMESTAMP(h.`timestamp`), h.`approved`, s.`name`, h.`size`'
        query += ' FROM `copy_requests` AS h'
        query += ' INNER JOIN `runs` AS r ON r.`id` = h.`run_id`'
        query += ' INNER JOIN `partitions` AS p ON p.`id` = r.`partition_id`'
        query += ' INNER JOIN `sites` AS s ON s.`id` = h.`site_id`'
        query += ' WHERE h.`id` > 0 AND p.`name` LIKE \'%s\' AND h.`completed` = 0 AND h.`run_id` > 0' % partition
        history_entries = self._mysql.query(query)

        id_to_record = {}
        for eid, timestamp, approved, site_name, size in history_entries:
            id_to_record[eid] = HistoryRecord(HistoryRecord.OP_COPY,
                                              eid,
                                              site_name,
                                              timestamp=timestamp,
                                              approved=approved,
                                              size=size)

        id_to_dataset = dict(
            self._mysql.query('SELECT `id`, `name` FROM `datasets`'))
        id_to_site = dict(
            self._mysql.query('SELECT `id`, `name` FROM `sites`'))

        replicas = self._mysql.select_many('copied_replicas',
                                           ('copy_id', 'dataset_id'),
                                           'copy_id', id_to_record.keys())

        current_copy_id = 0
        for copy_id, dataset_id in replicas:
            if copy_id != current_copy_id:
                record = id_to_record[copy_id]
                current_copy_id = copy_id

            record.replicas.append(
                HistoryRecord.CopiedReplica(
                    dataset_name=id_to_dataset[dataset_id]))

        return id_to_record.values()

    def _do_get_copied_replicas(self, run_number):  #override
        query = 'SELECT s.`name`, d.`name` FROM `copied_replicas` AS p'
        query += ' INNER JOIN `copy_requests` AS r ON r.`id` = p.`copy_id`'
        query += ' INNER JOIN `datasets` AS d ON d.`id` = p.`dataset_id`'
        query += ' INNER JOIN `sites` AS s ON s.`id` = r.`site_id`'
        query += ' WHERE r.`run_id` = %d' % run_number

        return self._mysql.query(query)

    def _do_get_site_name(self, operation_id):  #override
        result = self._mysql.query(
            'SELECT s.name FROM `sites` AS s INNER JOIN `copy_requests` AS h ON h.`site_id` = s.`id` WHERE h.`id` = %s',
            operation_id)
        if len(result) != 0:
            return result[0]

        result = self._mysql.query(
            'SELECT s.name FROM `sites` AS s INNER JOIN `deletion_requests` AS h ON h.`site_id` = s.`id` WHERE h.`id` = %s',
            operation_id)
        if len(result) != 0:
            return result[0]

        return ''

    def _do_get_deletion_runs(self, partition, first, last):  #override
        result = self._mysql.query(
            'SELECT `id` FROM `partitions` WHERE `name` LIKE %s', partition)
        if len(result) == 0:
            return 0

        partition_id = result[0]

        if first < 0:
            sql = 'SELECT MAX(`id`)'
        else:
            sql = 'SELECT `id`'

        sql += ' FROM `runs` WHERE `partition_id` = %d AND `time_end` NOT LIKE \'0000-00-00 00:00:00\' AND `operation` IN (\'deletion\', \'deletion_test\')' % partition_id

        if first >= 0:
            sql += ' AND `id` >= %d' % first
        if last >= 0:
            sql += ' AND `id` <= %d' % last

        return self._mysql.query(sql)

    def _do_get_copy_runs(self, partition, first, last):  #override
        result = self._mysql.query(
            'SELECT `id` FROM `partitions` WHERE `name` LIKE %s', partition)
        if len(result) == 0:
            return 0

        partition_id = result[0]

        if first < 0:
            sql = 'SELECT MAX(`id`)'
        else:
            sql = 'SELECT `id`'

        sql += ' FROM `runs` WHERE `partition_id` = %d AND `time_end` NOT LIKE \'0000-00-00 00:00:00\' AND `operation` IN (\'copy\', \'copy_test\')' % partition_id

        if first >= 0:
            sql += ' AND `id` >= %d' % first
        if last >= 0:
            sql += ' AND `id` <= %d' % last

        return self._mysql.query(sql)

    def _do_get_run_timestamp(self, run_number):  #override
        result = self._mysql.query(
            'SELECT UNIX_TIMESTAMP(`time_start`) FROM `runs` WHERE `id` = %s',
            run_number)
        if len(result) == 0:
            return 0

        return result[0]

    def _do_get_next_test_id(self):  #override
        copy_result = self._mysql.query(
            'SELECT MIN(`id`) FROM `copy_requests`')[0]
        if copy_result == None:
            copy_result = 0

        deletion_result = self._mysql.query(
            'SELECT MIN(`id`) FROM `deletion_requests`')[0]
        if deletion_result == None:
            deletion_result = 0

        return min(copy_result, deletion_result) - 1

    def _make_site_id_map(self):
        self._site_id_map = {}
        for name, site_id in self._mysql.query(
                'SELECT `name`, `id` FROM `sites`'):
            self._site_id_map[name] = int(site_id)

    def _make_dataset_id_map(self):
        self._dataset_id_map = {}
        for name, dataset_id in self._mysql.query(
                'SELECT `name`, `id` FROM `datasets`'):
            self._dataset_id_map[name] = int(dataset_id)

    def _fill_snapshot_cache(self, run_number):
        if self._mysql.query(
                'SELECT COUNT(*) FROM `replica_snapshot_cache` WHERE `run_id` = %s',
                run_number)[0] == 0:
            partition_id = self._mysql.query(
                'SELECT `partition_id` FROM `runs` WHERE `id` = %s',
                run_number)[0]

            query = 'INSERT INTO `replica_snapshot_cache`'
            query += ' SELECT %d, dd1.`site_id`, dd1.`dataset_id`, rs1.`id`, dd1.`id` FROM `deletion_decisions` AS dd1, `replica_size_snapshots` AS rs1' % run_number
            query += ' WHERE (dd1.`site_id`, dd1.`partition_id`, dd1.`dataset_id`) = (rs1.`site_id`, rs1.`partition_id`, rs1.`dataset_id`)'
            query += ' AND dd1.`partition_id` = %d' % partition_id
            query += ' AND rs1.`size` IS NOT NULL'
            query += ' AND rs1.`run_id` = ('
            query += '  SELECT MAX(rs2.`run_id`) FROM `replica_size_snapshots` AS rs2'
            query += '  WHERE (rs2.`site_id`, rs2.`partition_id`, rs2.`dataset_id`) = (rs1.`site_id`, rs1.`partition_id`, rs1.`dataset_id`)'
            query += '  AND rs2.`partition_id` = %d' % partition_id
            query += '  AND rs2.`run_id` <= %d' % run_number
            query += ' )'
            query += ' AND dd1.`run_id` = ('
            query += '  SELECT MAX(dd2.`run_id`) FROM `deletion_decisions` AS dd2'
            query += '  WHERE (dd2.`site_id`, dd2.`partition_id`, dd2.`dataset_id`) = (dd1.`site_id`, dd1.`partition_id`, dd1.`dataset_id`)'
            query += '  AND dd2.`partition_id` = %d' % partition_id
            query += '  AND dd2.`run_id` <= %d' % run_number
            query += ' )'

            self._mysql.query(query)

            self._mysql.query(
                'INSERT INTO `replica_snapshot_cache_usage` VALUES (%s, NOW())',
                run_number)

        num_deleted = self._mysql.query(
            'DELETE FROM `replica_snapshot_cache` WHERE `run_id` NOT IN (SELECT `run_id` FROM `replica_snapshot_cache_usage` WHERE `timestamp` > DATE_SUB(NOW(), INTERVAL 1 WEEK))'
        )
        if num_deleted != 0:
            self._mysql.query('OPTIMIZE TABLE `replica_snapshot_cache`')

        num_deleted = self._mysql.query(
            'DELETE FROM `replica_snapshot_cache_usage` WHERE `timestamp` < DATE_SUB(NOW(), INTERVAL 1 WEEK)'
        )
        if num_deleted != 0:
            self._mysql.query('OPTIMIZE TABLE `replica_snapshot_cache_usage`')
class ActivityLock(object):
    """
    Web-based activity lock using registry.
    """
    def __init__(self,
                 application,
                 service='dynamo',
                 asuser='',
                 db_params=config.registry.db_params):
        self._mysql = MySQL(**db_params)

        self.application = application
        self.service = service
        if asuser:
            self.user = asuser
        else:
            self.user = config.activitylock.default_user

    def __enter__(self):
        self.lock()

    def __exit__(self, exc_type, exc_value, traceback):
        if not self.unlock():
            raise RuntimeError('Failed to unlock')

        return exc_type is None and exc_value is None and traceback is None

    def lock(self):
        while True:
            self._mysql.query(
                'LOCK TABLES `activity_lock` WRITE, `users` WRITE, `services` WRITE'
            )

            query = 'SELECT `users`.`name`, `services`.`name` FROM `activity_lock`'
            query += ' INNER JOIN `users` ON `users`.`id` = `activity_lock`.`user_id`'
            query += ' INNER JOIN `services` ON `services`.`id` = `activity_lock`.`service_id`'
            query += ' WHERE `application` = %s'
            result = self._mysql.query(query, self.application)
            if len(result) == 0:
                break

            elif result[0] == (self.user, self.service):
                query = 'DELETE FROM `activity_lock` WHERE `application` = %s'
                self._mysql.query(query, self.application)
                break

            logger.info(
                'Activity lock for %s in place: user = %s, service = %s',
                self.application, *result[0])
            self._mysql.query('UNLOCK TABLES')
            time.sleep(60)

        query = 'INSERT INTO `activity_lock` (`user_id`, `service_id`, `application`, `timestamp`, `note`)'
        query += ' SELECT `users`.`id`, `services`.`id`, %s, NOW(), \'Dynamo running\' FROM `users`, `services`'
        query += ' WHERE `users`.`name` = %s AND `services`.`name` = %s'
        self._mysql.query(query, self.application, self.user, self.service)

        self._mysql.query('UNLOCK TABLES')

        logger.info('Locked system for %s', self.application)

    def unlock(self):
        self._mysql.query(
            'LOCK TABLES `activity_lock` WRITE, `users` WRITE, `services` WRITE'
        )

        query = 'SELECT `users`.`name`, `services`.`name` FROM `activity_lock`'
        query += ' INNER JOIN `users` ON `users`.`id` = `activity_lock`.`user_id`'
        query += ' INNER JOIN `services` ON `services`.`id` = `activity_lock`.`service_id`'
        query += ' WHERE `application` = %s'
        result = self._mysql.query(query, self.application)
        if len(result) == 0:
            self._mysql.query('UNLOCK TABLES')
            return True

        if result[0] == (self.user, self.service):
            query = 'DELETE FROM `activity_lock` WHERE `application` = %s'
            self._mysql.query(query, self.application)
            self._mysql.query('UNLOCK TABLES')
            return True

        else:
            logger.error(
                'Lock logic error: some process obtained the activity lock for %s',
                self.application)
            self._mysql.query('UNLOCK TABLES')
            return False
Exemple #8
0
 def __init__(self, inventory, history):
     self._inventory = inventory
     self._history = history
     self._mysql = MySQL(**config.registry.db_params)
                    help='Logging level.')

args = parser.parse_args()
sys.argv = []

# Need to setup logging before loading other modules
log_level = getattr(logging, args.log_level.upper())

logging.basicConfig(level=log_level)

logger = logging.getLogger(__name__)

from common.interface.mysql import MySQL

store = MySQL(config_file='/etc/my.cnf',
              config_group='mysql-dynamo',
              db='dynamoregister')

if args.command[0] == 'update':
    logger.info('Synchronizing the user list to SiteDB.')

    from common.interface.sitedb import SiteDB
    sitedb = SiteDB()

    domain_id = store.query(
        'SELECT `id` FROM `domains` WHERE `name` = \'cern.ch\'')[0]

    query = 'INSERT INTO `users` (`name`, `domain_id`, `email`, `dn`) VALUES (%s, ' + str(
        domain_id
    ) + ', %s, %s) ON DUPLICATE KEY UPDATE `email` = `email`, `dn` = `dn`'
Exemple #10
0
def get_phedex_tree(site):
    """
    Get the file list tree from PhEDEx.
    Uses the InventoryAge configuration to determine when to refresh cache.

    :param str site: The site to get information from PhEDEx for.
    :returns: A tree containing file replicas that are supposed to be at the site
    :rtype: ConsistencyCheck.datatypes.DirectoryInfo
    """

    tree = datatypes.DirectoryInfo('/store')

    valid_list = config.config_dict().get('DirectoryList', [])

    sql = MySQL(config_file='/etc/my.cnf',
                db='dynamo',
                config_group='mysql-dynamo')
    datasets = sql.query(
        'SELECT datasets.name '
        'FROM sites INNER JOIN dataset_replicas INNER JOIN datasets '
        'WHERE dataset_replicas.dataset_id=datasets.id AND '
        'dataset_replicas.site_id=sites.id and sites.name=%s', site)

    def add_files(dataset, retries):
        """
        :param str dataset: Dataset to get from PhEDEx
        :param int retries: The number of times to retry PhEDEx call
        :returns: Whether or not the addition was successful
        :rtype: bool
        """

        LOG.info('Getting PhEDEx contents for %s', dataset)

        phedex_response = get_json('cmsweb.cern.ch',
                                   '/phedex/datasvc/json/prod/filereplicas', {
                                       'node': site,
                                       'dataset': dataset
                                   },
                                   retries=retries,
                                   use_https=True)

        report = 0

        if not phedex_response:
            LOG.warning('Bad response from PhEDEx for %s', dataset)
            return False

        for block in phedex_response['phedex']['block']:
            LOG.debug('%s', block)
            replica_list = [(replica['name'], replica['bytes'],
                             int(replica['replica'][0]['time_create'] or time.time()),
                             block['name']) \
                                for replica in block['file'] \
                                if replica['name'].split('/')[2] in valid_list]

            report += len(replica_list)

            tree.add_file_list(replica_list)

        LOG.info('%i files', report)
        return True

    separate = []

    for primary in set([d.split('/')[1][:3] for d in datasets]):
        success = add_files('/%s*/*/*' % primary, 0)
        if not success:
            separate.append(primary)

    # Separate loop to retry datasets individually
    for dataset in [d for d in datasets if d.split('/')[1][:3] in separate]:
        success = add_files(dataset, 5)
        if not success:
            LOG.critical('Cannot get %s from PhEDEx. Do not trust results...',
                         dataset)

    return tree
Exemple #11
0
def get_db_listing(site):
    """
    Get the list of files from dynamo database directly from MySQL.

    :param str site: The name of the site to load
    :returns: The file replicas that are supposed to be at a site
    :rtype: ConsistencyCheck.datatypes.DirectoryInfo
    """

    inv_sql = MySQL(config_file='/etc/my.cnf',
                    db='dynamo',
                    config_group='mysql-dynamo')

    # Get list of files
    curs = inv_sql._connection.cursor()

    LOG.info('About to make MySQL query for files at %s', site)

    tree = datatypes.DirectoryInfo('/store')

    def add_to_tree(curs):
        """
        Add cursor contents to the dynamo listing tree

        :param MySQLdb.cursor curs: The cursor which just completed a query to fetch
        """
        dirs_to_look = iter(sorted(config.config_dict()['DirectoryList']))

        files_to_add = []
        look_dir = ''
        row = curs.fetchone()

        while row:
            name, size = row[0:2]
            timestamp = time.mktime(row[2].timetuple()) if len(row) == 3 else 0

            current_directory = name.split('/')[2]
            try:
                while look_dir < current_directory:
                    look_dir = next(dirs_to_look)
            except StopIteration:
                break

            if current_directory == look_dir:
                LOG.debug('Adding file: %s, %i', name, size)

                files_to_add.append((name, size, timestamp))

            row = curs.fetchone()

        tree.add_file_list(files_to_add)

    curs.execute(
        """
        SELECT files.name, files.size
        FROM block_replicas
        INNER JOIN sites ON block_replicas.site_id = sites.id
        INNER JOIN files ON block_replicas.block_id = files.block_id
        WHERE block_replicas.is_complete = 1 AND sites.name = %s
        AND group_id != 0
        ORDER BY files.name ASC
        """, (site, ))

    add_to_tree(curs)

    curs.execute(
        """
        SELECT files.name, files.size, NOW()
        FROM block_replicas
        INNER JOIN sites ON block_replicas.site_id = sites.id
        INNER JOIN files ON block_replicas.block_id = files.block_id
        WHERE (block_replicas.is_complete = 0 OR group_id = 0) AND sites.name = %s
        ORDER BY files.name ASC
        """, (site, ))

    add_to_tree(curs)

    LOG.info('MySQL query returned')

    return tree
Exemple #12
0
    def make_request(self,
                     resource='',
                     options=[],
                     method=GET,
                     format='url',
                     cache_lifetime=0):
        url = self.url_base
        if resource:
            url += '/' + resource

        if method == GET and len(options) != 0:
            if type(options) is list:
                url += '?' + '&'.join(options)
            elif type(options) is str:
                url += '?' + options

        if logger.getEffectiveLevel() == logging.DEBUG:
            logger.debug(url)

        # first check the cache
        if method == GET and self._cache_lock is not None and cache_lifetime > 0:
            with self._cache_lock:
                try:
                    db = MySQL(**config.webservice.cache_db_params)
                    cache = db.query(
                        'SELECT UNIX_TIMESTAMP(`timestamp`), `content` FROM `webservice` WHERE `url` = %s',
                        url)
                    db.close()
                except:
                    logger.error(
                        'Connection to cache DB failed when fetching the timestamp for %s.',
                        url)
                    cache = []

            if len(cache) != 0:
                timestamp, content = cache[0]
                if time.time() - timestamp < cache_lifetime:
                    logger.debug('Using cache for %s', url)
                    if self.accept == 'application/json':
                        result = json.loads(content)
                        unicode2str(result)

                    elif self.accept == 'application/xml':
                        # TODO implement xml -> dict
                        result = content

                    return result

        # now query the URL
        request = urllib2.Request(url)

        if method == POST:
            if format == 'url':
                # Options can be a dict or a list of key=value strings or 2-tuples. The latter case allows repeated keys (e.g. dataset=A&dataset=B)
                if type(options) is list:
                    # convert key=value strings to (key, value) 2-tuples
                    optlist = []
                    for opt in options:
                        if type(opt) is tuple:
                            optlist.append(opt)

                        elif type(opt) is str:
                            key, eq, value = opt.partition('=')
                            if eq == '=':
                                optlist.append((key, value))

                    options = optlist

                data = urllib.urlencode(options)

            elif format == 'json':
                # Options must be jsonizable.
                request.add_header('Content-type', 'application/json')
                data = json.dumps(options)

            request.add_data(data)

        wait = 1.
        exceptions = []
        while len(exceptions) != config.webservice.num_attempts:
            try:
                if self.auth_handler:
                    opener = urllib2.build_opener(self.auth_handler())
                else:
                    opener = urllib2.build_opener()

                if 'Accept' not in self.headers:
                    opener.addheaders.append(('Accept', self.accept))

                opener.addheaders.extend(self.headers)

                response = opener.open(request)

                # clean up - break reference cycle so python can free the memory up
                for handler in opener.handlers:
                    handler.parent = None
                del opener

                content = response.read()
                del response

                if method == GET and self._cache_lock is not None:
                    with tempfile.NamedTemporaryFile(mode='w',
                                                     delete=False) as tmpfile:
                        filename = tmpfile.name
                        tmpfile.write('\'%s\',\'%s\',\'%s\'' %
                                      (MySQL.escape_string(url),
                                       time.strftime('%Y-%m-%d %H:%M:%S'),
                                       MySQL.escape_string(content)))

                    os.chmod(filename, 0644)

                    with self._cache_lock:
                        try:
                            db = MySQL(**config.webservice.cache_db_params)
                            db.query(
                                'DELETE FROM `webservice` WHERE `url` = %s',
                                url)
                            db.query(
                                r"LOAD DATA LOCAL INFILE '%s' INTO TABLE `dynamocache`.`webservice` FIELDS TERMINATED BY ',' ENCLOSED BY '\''"
                                % filename)
                            db.close()
                        except:
                            logger.error(
                                'Connection to cache DB failed when writing the response of %s.',
                                url)
                            pass

                    os.remove(filename)

                if self.accept == 'application/json':
                    result = json.loads(content)
                    unicode2str(result)

                elif self.accept == 'application/xml':
                    # TODO implement xml -> dict
                    result = content

                del content

                return result

            except urllib2.HTTPError as err:
                last_except = (str(err)) + '\nBody:\n' + err.read()
            except:
                last_except = sys.exc_info()[:2]

            exceptions.append(last_except)

            logger.info(
                'Exception "%s" occurred in webservice. Trying again in %.1f seconds.',
                str(last_except), wait)

            time.sleep(wait)
            wait *= 1.5

        else:  # exhausted allowed attempts
            logger.error('Too many failed attempts in webservice')
            logger.error('%s' % ' '.join(map(str, exceptions)))
            raise RuntimeError('webservice too many attempts')
Exemple #13
0
 def __init__(self, db_params=config.registry.db_params):
     self._mysql = MySQL(**db_params)
Exemple #14
0
class MySQLReplicaLock(object):
    """
    A plugin for DemandManager that appends lists of block replicas that are locked.
    Sets one demand value:
      locked_blocks:   {site: set of blocks}
    """
    def __init__(self, db_params=config.registry.db_params):
        self._mysql = MySQL(**db_params)

    def load(self, inventory):
        self.update(inventory)

    def update(self, inventory):
        query = 'SELECT `item`, `sites`, `groups` FROM `detox_locks` WHERE `unlock_date` IS NULL'
        if len(config.mysqllock.users) != 0:
            query += ' AND (`user_id`, `service_id`) IN ('
            query += 'SELECT u.`id`, s.`id` FROM `users` AS u, `services` AS s WHERE '
            query += ' OR '.join(
                '(u.`name` LIKE "%s" AND s.`name` LIKE "%s")' % us
                for us in config.mysqllock.users)
            query += ')'

        entries = self._mysql.query(query)

        for item_name, sites_pattern, groups_pattern in entries:
            if '#' in item_name:
                dataset_name, block_real_name = item_name.split('#')
            else:
                dataset_name = item_name
                block_real_name = None

            try:
                dataset = inventory.datasets[dataset_name]
            except KeyError:
                logger.debug('Cannot lock unknown dataset %s', dataset_name)
                continue

            if dataset.replicas is None:
                continue

            if dataset.blocks is None:
                inventory.store.load_blocks(dataset)

            if block_real_name is None:
                blocks = list(dataset.blocks)
            else:
                block = dataset.find_block(
                    Block.translate_name(block_real_name))
                if block is None:
                    logger.debug('Cannot lock unknown block %s#%s',
                                 dataset_name, block_real_name)
                    continue

                blocks = [block]

            sites = set()
            if sites_pattern:
                if '*' in sites_pattern:
                    sites.update(s for n, s in inventory.sites.items()
                                 if fnmatch.fnmatch(n, sites_pattern))
                else:
                    try:
                        sites.add(inventory.sites[sites_pattern])
                    except KeyError:
                        pass

            if len(sites) == 0:
                # if no site matches the pattern, we will be on the safe side and treat it as a global lock
                sites.update(r.site for r in dataset.replicas)

            groups = set()
            if groups_pattern:
                if '*' in groups_pattern:
                    groups.update(g for n, g in inventory.groups.items()
                                  if fnmatch.fnmatch(n, groups_pattern))
                else:
                    try:
                        groups.add(inventory.groups[groups_pattern])
                    except KeyError:
                        pass

            if len(groups) == 0:
                # if no group matches the pattern, we will be on the safe side and treat it as a global lock
                for replica in dataset.replicas:
                    groups.update(brep.group
                                  for brep in replica.block_replicas)

            try:
                locked_blocks = dataset.demand['locked_blocks']
            except KeyError:
                locked_blocks = dataset.demand['locked_blocks'] = {}

            for replica in dataset.replicas:
                if replica.site not in sites:
                    continue

                if replica.site not in locked_blocks:
                    locked_blocks[replica.site] = set()

                for block_replica in replica.block_replicas:
                    if block_replica.group not in groups:
                        continue

                    if block_replica.block in blocks:
                        locked_blocks[replica.site].add(block_replica.block)
Exemple #15
0
def main(site):
    """
    Gets the listing from the dynamo database, and remote XRootD listings of a given site.
    The differences are compared to deletion queues and other things.

    .. Note::
       If you add things, list them in the module docstring.

    The differences that should be acted on are copied to the summary webpage
    and entered into the dynamoregister database.

    :param str site: The site to run the check over
    :returns: missing files, size, orphan files, size
    :rtype: list, long, list, long
    """

    start = time.time()

    prev_missing = '%s_compare_missing.txt' % site
    prev_set = set()
    if os.path.exists(prev_missing):
        with open(prev_missing, 'r') as prev_file:
            for line in prev_file:
                prev_set.add(line.strip())

        if int(config.config_dict().get('SaveCache')):
            prev_new_name = '%s.%s' % (prev_missing,
                                       datetime.datetime.fromtimestamp(
                                           os.stat(prev_missing).st_mtime).strftime('%y%m%d')
                                      )
        else:
            prev_new_name = prev_missing

        shutil.move(prev_missing,
                    os.path.join(config.config_dict()['CacheLocation'],
                                 prev_new_name)
                   )

    # All of the files and summary will be dumped here
    webdir = config.config_dict()['WebDir']

    # Open a connection temporarily to make sure we only list good sites
    status_check = MySQL(config_file='/etc/my.cnf', db='dynamo', config_group='mysql-dynamo')
    status = status_check.query('SELECT status FROM sites WHERE name = %s', site)[0]

    if status != 'ready':
        LOG.error('Site %s status is %s', site, status)

        # Note the attempt to do listing
        conn = sqlite3.connect(os.path.join(webdir, 'stats.db'))
        curs = conn.cursor()
        curs.execute(
            """
            REPLACE INTO stats VALUES
            (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, DATETIME(DATETIME(), "-4 hours"), ?, ?)
            """,
            (site, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))

        conn.commit()
        conn.close()

        exit(0)

    # Close the connection while we are getting the trees together
    status_check.close()

    inv_tree = getinventorycontents.get_db_listing(site)

    # Reset the DirectoryList for the XRootDLister to run on
    config.DIRECTORYLIST = [directory.name for directory in inv_tree.directories]

    site_tree = getsitecontents.get_site_tree(site)

    # Get whether or not the site is debugged
    conn = sqlite3.connect(os.path.join(webdir, 'stats.db'))
    curs = conn.cursor()
    curs.execute('SELECT isgood FROM sites WHERE site = ?', (site, ))
    is_debugged = curs.fetchone()[0]
    conn.close()

    # Create the function to check orphans and missing

    # First, datasets in the deletions queue can be missing
    acceptable_missing = checkphedex.set_of_deletions(site)

    # Orphan files cannot belong to any dataset that should be at the site
    inv_sql = MySQL(config_file='/etc/my.cnf', db='dynamo', config_group='mysql-dynamo')
    acceptable_orphans = set(
        inv_sql.query(
            """
            SELECT datasets.name FROM sites
            INNER JOIN dataset_replicas ON dataset_replicas.site_id=sites.id
            INNER JOIN datasets ON dataset_replicas.dataset_id=datasets.id
            WHERE sites.name=%s
            """,
            site)
        )

    # Orphan files may be a result of deletion requests
    acceptable_orphans.update(acceptable_missing)

    # Ignored datasets will not give a full listing, so they can't be accused of having orphans
    acceptable_orphans.update(
        inv_sql.query('SELECT name FROM datasets WHERE status=%s', 'IGNORED')
        )

    # Do not delete anything that is protected by Unified
    protected_unmerged = get_json('cmst2.web.cern.ch', '/cmst2/unified/listProtectedLFN.txt')
    acceptable_orphans.update(['/%s/%s-%s/%s' % (split_name[4], split_name[3],
                                                 split_name[6], split_name[5]) \
                                   for split_name in \
                                   [name.split('/') for name in protected_unmerged['protected']]
                              ])

    LOG.debug('Acceptable orphans: \n%s\n', '\n'.join(acceptable_orphans))

    ignore_list = config.config_dict().get('IgnoreDirectories', [])

    def double_check(file_name, acceptable):
        """
        Checks the file name against a list of datasets to not list files from.

        :param str file_name: LFN of the file
        :param set acceptable: Datasets to not list files from
                               (Acceptable orphans or missing)
        :returns: Whether the file belongs to a dataset in the list or not
        :rtype: bool
        """
        LOG.debug('Checking file_name: %s', file_name)

        # Skip over paths that include part of the list of ignored directories
        for pattern in ignore_list:
            if pattern in file_name:
                return True

        split_name = file_name.split('/')

        try:
            return '/%s/%s-%s/%s' % (split_name[4], split_name[3],
                                     split_name[6], split_name[5]) in acceptable
        except IndexError:
            LOG.warning('Strange file name: %s', file_name)
            return True

    check_orphans = lambda x: double_check(x, acceptable_orphans)
    check_missing = lambda x: double_check(x, acceptable_missing)

    # Do the comparison
    missing, m_size, orphan, o_size = datatypes.compare(
        inv_tree, site_tree, '%s_compare' % site,
        orphan_check=check_orphans, missing_check=check_missing)

    LOG.debug('Missing size: %i, Orphan size: %i', m_size, o_size)

    # Enter things for site in registry
    if os.environ['USER'] == 'dynamo':
        reg_sql = MySQL(config_file='/etc/my.cnf',
                        db='dynamoregister', config_group='mysql-dynamo')
    else:
        reg_sql = MySQL(config_file=os.path.join(os.environ['HOME'], 'my.cnf'),
                        db='dynamoregister', config_group='mysql-register-test')

    # Determine if files should be entered into the registry

    many_missing = len(missing) > int(config.config_dict()['MaxMissing'])
    many_orphans = len(orphan) > int(config.config_dict()['MaxOrphan'])

    if is_debugged and not many_missing and not many_orphans:
        def execute(query, *args):
            """
            Executes the query on the registry and outputs a log message depending on query

            :param str query: The SQL query to execute
            :param args: The arguments to the SQL query
            """

            reg_sql.query(query, *args)

            if 'transfer_queue' in query:
                LOG.info('Copying %s from %s', args[0], args[1])
            elif 'deletion_queue' in query:
                LOG.info('Deleting %s', args[0])

    else:
        if many_missing:
            LOG.error('Too many missing files: %i, you should investigate.', len(missing))

        if many_orphans:
            LOG.error('Too many orphan files: %i out of %i, you should investigate.',
                      len(orphan), site_tree.get_num_files())

        execute = lambda *_: 0

    # Then do entries, if the site is in the debugged status

    def add_transfers(line, sites):
        """
        Add the file into the transfer queue for multiple sites.

        :param str line: The file LFN to transfer
        :param list sites: Sites to try to transfer from
        :returns: Whether or not the entry was a success
        :rtype: bool
        """

        # Don't add transfers if too many missing files
        if line in prev_set or not prev_set:
            for location in sites:
                execute(
                    """
                    INSERT IGNORE INTO `transfer_queue`
                    (`file`, `site_from`, `site_to`, `status`, `reqid`)
                    VALUES (%s, %s, %s, 'new', 0)
                    """,
                    line, location, site)

        return bool(sites)


    # Setup a query for sites, with added condition at the end
    site_query = """
                 SELECT sites.name FROM sites
                 INNER JOIN block_replicas ON sites.id = block_replicas.site_id
                 INNER JOIN files ON block_replicas.block_id = files.block_id
                 WHERE files.name = %s AND sites.name != %s
                 AND sites.status = 'ready'
                 AND block_replicas.is_complete = 1
                 AND group_id != 0
                 {0}
                 """

    # Track files with no sources
    no_source_files = []

    for line in missing:

        # Get sites that are not tape
        sites = inv_sql.query(
            site_query.format('AND sites.storage_type != "mss"'),
            line, site)

        if not add_transfers(line, sites):
            # Track files without disk source
            no_source_files.append(line)

            # Get sites that are tape
            sites = inv_sql.query(
                site_query.format('AND sites.storage_type = "mss"'),
                line, site)

            add_transfers(line, sites)



    # Only get the empty nodes that are not in the inventory tree
    for line in orphan + \
            [empty_node for empty_node in site_tree.empty_nodes_list() \
                 if not inv_tree.get_node('/'.join(empty_node.split('/')[2:]),
                                          make_new=False)]:
        execute(
            """
            INSERT IGNORE INTO `deletion_queue`
            (`file`, `site`, `status`) VALUES
            (%s, %s, 'new')
            """,
            line, site)


    reg_sql.close()


    with open('%s_missing_nosite.txt' % site, 'w') as nosite:
        for line in no_source_files:
            nosite.write(line + '\n')

    # We want to track which blocks missing files are coming from
    track_missing_blocks = defaultdict(
        lambda: {'errors': 0,
                 'blocks': defaultdict(lambda: {'group': '',
                                                'errors': 0}
                                      )
                })

    blocks_query = """
                   SELECT blocks.name, IFNULL(groups.name, 'Unsubscribed') FROM blocks
                   INNER JOIN files ON files.block_id = blocks.id
                   INNER JOIN block_replicas ON block_replicas.block_id = files.block_id
                   INNER JOIN sites ON block_replicas.site_id = sites.id
                   LEFT JOIN groups ON block_replicas.group_id = groups.id
                   WHERE files.name = %s AND sites.name = %s
                   """

    with open('%s_compare_missing.txt' % site, 'r') as input_file:
        for line in input_file:
            split_name = line.split('/')
            dataset = '/%s/%s-%s/%s' % (split_name[4], split_name[3], split_name[6], split_name[5])

            output = inv_sql.query(blocks_query, line.strip(), site)

            if not output:
                LOG.warning('The following SQL statement failed: %s',
                            blocks_query % (line.strip(), site))
                LOG.warning('Most likely cause is dynamo update between the listing and now')
                from_phedex = get_json('cmsweb.cern.ch', '/phedex/datasvc/json/prod/filereplicas',
                                       params={'node': site, 'LFN': line.strip()}, use_cert=True)

                try:
                    output = [(from_phedex['phedex']['block'][0]['name'].split('#')[1],
                               from_phedex['phedex']['block'][0]['replica'][0]['group'])]
                except IndexError:
                    LOG.error('File replica not in PhEDEx either!')
                    LOG.error('Skipping block level report for this file.')
                    continue

            block, group = output[0]

            track_missing_blocks[dataset]['errors'] += 1
            track_missing_blocks[dataset]['blocks'][block]['errors'] += 1
            track_missing_blocks[dataset]['blocks'][block]['group'] = group

    inv_sql.close()

    # Output file with the missing datasets
    with open('%s_missing_datasets.txt' % site, 'w') as output_file:
        for dataset, vals in \
                sorted(track_missing_blocks.iteritems(),
                       key=lambda x: x[1]['errors'],
                       reverse=True):

            for block_name, block in sorted(vals['blocks'].iteritems()):
                output_file.write('%10i    %-17s  %s#%s\n' % \
                                      (block['errors'], block['group'],
                                       dataset, block_name))

    # If there were permissions or connection issues, no files would be listed
    # Otherwise, copy the output files to the web directory
    shutil.copy('%s_missing_datasets.txt' % site, webdir)
    shutil.copy('%s_missing_nosite.txt' % site, webdir)
    shutil.copy('%s_compare_missing.txt' % site, webdir)
    shutil.copy('%s_compare_orphan.txt' % site, webdir)

    if (os.environ.get('ListAge') is None) and (os.environ.get('InventoryAge') is None):

        # Update the runtime stats on the stats page if the listing settings are not changed
        conn = sqlite3.connect(os.path.join(webdir, 'stats.db'))
        curs = conn.cursor()

        curs.execute('INSERT INTO stats_history SELECT * FROM stats WHERE site=?', (site, ))
        curs.execute(
            """
            REPLACE INTO stats VALUES
            (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, DATETIME(DATETIME(), "-4 hours"), ?, ?)
            """,
            (site, time.time() - start, site_tree.get_num_files(),
             site_tree.count_nodes(), len(site_tree.empty_nodes_list()),
             config.config_dict().get('NumThreads', config.config_dict().get('MinThreads', 0)),
             len(missing), m_size, len(orphan), o_size, len(no_source_files),
             site_tree.get_num_files(unlisted=True)))

        conn.commit()
        conn.close()
def get_phedex_tree(site):
    """
    Get the file list tree from PhEDEx.
    Uses the InventoryAge configuration to determine when to refresh cache.

    :param str site: The site to get information from PhEDEx for.
    :returns: A tree containing file replicas that are supposed to be at the site
    :rtype: ConsistencyCheck.datatypes.DirectoryInfo
    """

    tree = datatypes.DirectoryInfo('/store')

    valid_list = config.config_dict().get('DirectoryList', [])

    sql = MySQL(config_file='/etc/my.cnf', db='dynamo', config_group='mysql-dynamo')
    datasets = sql.query('SELECT datasets.name '
                         'FROM sites INNER JOIN dataset_replicas INNER JOIN datasets '
                         'WHERE dataset_replicas.dataset_id=datasets.id AND '
                         'dataset_replicas.site_id=sites.id and sites.name=%s', site)

    def add_files(dataset, retries):
        """
        :param str dataset: Dataset to get from PhEDEx
        :param int retries: The number of times to retry PhEDEx call
        :returns: Whether or not the addition was successful
        :rtype: bool
        """

        LOG.info('Getting PhEDEx contents for %s', dataset)

        phedex_response = get_json(
            'cmsweb.cern.ch', '/phedex/datasvc/json/prod/filereplicas',
            {'node': site, 'dataset': dataset},
            retries=retries,
            use_https=True)

        report = 0

        if not phedex_response:
            LOG.warning('Bad response from PhEDEx for %s', dataset)
            return False

        for block in phedex_response['phedex']['block']:
            LOG.debug('%s', block)
            replica_list = [(replica['name'], replica['bytes'],
                             int(replica['replica'][0]['time_create'] or time.time()),
                             block['name']) \
                                for replica in block['file'] \
                                if replica['name'].split('/')[2] in valid_list]

            report += len(replica_list)

            tree.add_file_list(replica_list)

        LOG.info('%i files', report)
        return True

    separate = []

    for primary in set([d.split('/')[1][:3] for d in datasets]):
        success = add_files('/%s*/*/*' % primary, 0)
        if not success:
            separate.append(primary)

    # Separate loop to retry datasets individually
    for dataset in [d for d in datasets if d.split('/')[1][:3] in separate]:
        success = add_files(dataset, 5)
        if not success:
            LOG.critical('Cannot get %s from PhEDEx. Do not trust results...', dataset)

    return tree
Exemple #17
0
class QueueManager(object):

    def __init__(self, inventory, history):
        self._inventory = inventory
        self._history = history
        self._mysql = MySQL(**config.registry.db_params)

    def getTransfers(self,requests):
        sql = "select tq.reqid,tq.file,tq.site_from,tq.site_to,tq.status"
        sql = sql + " from transfer_queue as tq,requests_unified as ru"
        sql = sql + " where tq.status='done' and tq.reqid=0"
        entries = self._mysql.query(sql)
        singlesToDelete = []
        for line in entries:
            reqid = int(line[0])
            (fileName,siteFrom,siteTo,status) = line[1:]
            if reqid == 0 :
                singlesToDelete.append((fileName,siteTo))

        self._mysql.delete_many('transfer_queue',('file','site_to'),singlesToDelete)

        sql = "select tq.reqid,tq.file,tq.site_from,tq.site_to,tq.status"
        sql = sql + " from transfer_queue as tq,requests_unified as ru"
        sql = sql + " where tq.status='done' and tq.reqid=ru.reqid"
        entries = self._mysql.query(sql)
        for line in entries:
            reqid = int(line[0])
            (fileName,siteFrom,siteTo,status) = line[1:]
            if reqid == 0 :
                continue

            stripedName = (fileName.split('/'))[-1]
            uRequest = requests[reqid]
            dsetObj = self._inventory.datasets[uRequest._itemName]
            fileObj = None
            for filef in dsetObj.files:
                if filef.name == stripedName:
                    fileObj = filef
                    break
            uRequest.markDone(stripedName,fileObj)

    def getDeletions(self,requests):
        sql = "select dq.reqid,dq.file,dq.site,dq.status"
        sql = sql + " from deletion_queue as dq where dq.status='done' and dq.reqid=0"
        entries = self._mysql.query(sql)
        singlesToDelete = []
        for line in entries:
            reqid = int(line[0])
            (fileName,site,status) = line[1:]
            if reqid == 0 :
                singlesToDelete.append((fileName,site))
        self._mysql.delete_many('deletion_queue',('file','site'),singlesToDelete)


        sql = "select dq.reqid,dq.file,dq.site,dq.status"
        sql = sql + " from deletion_queue as dq,requests_unified as ru"
        sql = sql + " where dq.status='done' and dq.reqid=ru.reqid"
        entries = self._mysql.query(sql)
        print '------------'
        for line in entries:
            reqid = int(line[0])
            (fileName,site,status) = line[1:]
            if reqid == 0 :
                continue
            
            stripedName = (fileName.split('/'))[-1]
            uRequest = requests[reqid]
            dsetObj = self._inventory.datasets[uRequest._itemName]
            fileObj = None
            for filef in dsetObj.files:
                if filef.name == stripedName:
                    fileObj = filef
                    break
            uRequest.markDone(stripedName,fileObj)

    def fillDoneTransfers(self,requests):
        #for finished requests we update history first, then delete
        #for unfinished requests we update status and timestamps
        new_dataset_replicas = []
        replica_timestamps = {}
        done_requests = []
        for reqid in sorted(requests):
            uRequest = requests[reqid]
            dataset = self._inventory.datasets[uRequest._itemName]
            site    = self._inventory.sites[uRequest._siteTo]
            reqtype = uRequest._reqType
            if reqtype != 'copy':
                continue
                
            dsetRep = dataset.find_replica(site)
            #new block replica is derived from existing replica
            targetGroup = None
            for someRep in dataset.replicas:
                if someRep == dsetRep:
                    continue
                else:
                    targetGroup = someRep.block_replicas[0].group
                    break

            #ask for size, update only if size is changing
            if dsetRep == None:
                dsetRep = DatasetReplica(dataset, site)
                self._inventory.update(dsetRep)
                for block in dataset.blocks:
                    blockRep = BlockReplica(block, site, targetGroup, size = 0, last_update = 0)
                    self._inventory.update(blockRep)

                print "making new dataset replica"

            dsetDone = True
            for blockRep in dsetRep.block_replicas:
                block = blockRep.block
                size = uRequest.getSize(block)
                complete = False
                if size == block.size:
                    complete = True
                else:
                    dsetDone = False
                    
                if size > blockRep.size:
                    print 'updating block replica ...'
                    blockRep.group = targetGroup
                    blockRep.is_complete = complete
                    blockRep.is_custodial = False
                    blockRep.size = size
                    blockRep.last_update = time.time()

            #here we enter done requests into the history databas
            #and delete them them from ongoing activities
            print dsetRep.dataset.name
            print "dset done status = " + str(dsetDone)
            if 0 in uRequest._files.itervalues():
                print "reqid=" + str(reqid) + " request not finished"
            else:
                print "reqid=" + str(reqid) + " request is done !!!"
                new_dataset_replicas.append(dsetRep)
                replica_timestamps[dsetRep] = uRequest._created
                done_requests.append(uRequest._reqid)

        #save complete requests into history
        self._history.save_dataset_transfers(new_dataset_replicas,replica_timestamps)
        #and delete from registry
        self._mysql.delete_many('requests_unified','reqid',done_requests)
        self._mysql.delete_many('transfer_queue','reqid',done_requests)

    def fillDoneDeletions(self,requests):
        #for finished requests we update history first, then delete
        #for unfinished requests we update status and timestamps
        gone_dataset_replicas = []
        replica_timestamps = {}
        done_requests = []
        for reqid in sorted(requests):
            uRequest = requests[reqid]
            dataset = self._inventory.datasets[uRequest._itemName]
            site    = self._inventory.sites[uRequest._siteTo]
            reqtype = uRequest._reqType
            if reqtype != 'delete':
                continue
            
            print '------------'
            print reqid

            dsetRep = dataset.find_replica(site)

            #deleting something that does not exist
            if dsetRep == None:
                print "..!!.. trying to delete non-existing dataset"
                print site.name
                print dataset.name
                continue

            #here we enter done requests into the history databas
            #and delete them them from ongoing activities
            print dsetRep.dataset.name
            if 0 in uRequest._files.itervalues():
                print "reqid=" + str(reqid) + " request not finished"
            else:
                print "reqid=" + str(reqid) + " request is done !!!"
                gone_dataset_replicas.append(dsetRep)
                replica_timestamps[dsetRep] = uRequest._created
                done_requests.append(uRequest._reqid)
                print dsetRep.block_replicas
                dataset.replicas.remove(dsetRep)
                site.remove_dataset_replica(dsetRep)

        #save complete requests into history
        self._history.save_dataset_deletions(gone_dataset_replicas,replica_timestamps)
        #and delete from registry
        self._mysql.delete_many('requests_unified','reqid',done_requests)
        self._mysql.delete_many('deletion_queue','reqid',done_requests)


    def run(self, comment = ''):
        requests = {}
        logger.info('QueueManager run starting at %s', time.strftime('%Y-%m-%d %H:%M:%S'))

        sql = "select * from requests_unified where status='queued'"
        entries = self._mysql.query(sql)
        for line in entries:
            reqid = int(line[0])
            (itemName,datatype,siteTo,reqtype,rank,status,created,updated) = line[1:]
            dsetObj = self._inventory.datasets[itemName]
            requests[reqid] = UserRequest(reqid,itemName,siteTo,reqtype,created)
            for fileObj in dsetObj.files:
                requests[reqid]._files[fileObj.name] = 0

        self.getTransfers(requests)
        self.getDeletions(requests)

        self.fillDoneTransfers(requests)
        self.fillDoneDeletions(requests)

        logger.info('Finished QueueManager run at %s\n', time.strftime('%Y-%m-%d %H:%M:%S'))