Exemple #1
0
    def schedule_deletions(self,
                           replica_list,
                           operation_id,
                           comments=''):  #override
        LOG.info('Ignoring deletion schedule of %d replicas (operation %d)',
                 len(replica_list), operation_id)

        result = []

        for replica, block_replicas in replica_list:
            clone_replica = DatasetReplica(replica.dataset, replica.site)
            clone_replica.copy(replica)

            if block_replicas is None:
                result.append((clone_replica, None))
            else:
                clone_block_replicas = []

                for block_replica in block_replicas:
                    clone_block_replica = BlockReplica(block_replica.block,
                                                       block_replica.site,
                                                       block_replica.group)
                    clone_block_replica.copy(block_replica)
                    clone_block_replica.last_update = int(time.time())
                    clone_block_replicas.append(clone_block_replica)

                result.append((clone_replica, clone_block_replicas))

        return result
    def schedule_copies(self, replica_list, operation_id, comments = ''): #override
        sites = set(r.site for r in replica_list)
        if len(sites) != 1:
            raise OperationalError('schedule_copies should be called with a list of replicas at a single site.')

        LOG.info('Scheduling copy of %d replicas to %s using RLFSM (operation %d)', len(replica_list), list(sites)[0], operation_id)

        result = []

        for replica in replica_list:
            # Function spec is to return clones (so that if specific block fails to copy, we can return a dataset replica without the block)
            clone_replica = DatasetReplica(replica.dataset, replica.site)
            clone_replica.copy(replica)
            result.append(clone_replica)

            for block_replica in replica.block_replicas:
                LOG.debug('Subscribing files for %s', str(block_replica))

                if block_replica.file_ids is None:
                    LOG.debug('No file to subscribe for %s', str(block_replica))
                    return
        
                all_files = block_replica.block.files
                missing_files = all_files - block_replica.files()

                for lfile in missing_files:
                    self.rlfsm.subscribe_file(block_replica.site, lfile)

                clone_block_replica = BlockReplica(block_replica.block, block_replica.site, block_replica.group)
                clone_block_replica.copy(block_replica)
                clone_block_replica.last_update = int(time.time())
                clone_replica.block_replicas.add(clone_block_replica)

        # no external dependency - everything is a success
        return result
Exemple #3
0
def new_replica_from_blocks(blocks, site, group):
    dataset = blocks[0].dataset
    replica = DatasetReplica(dataset, site)
    for block in blocks:
        replica.block_replicas.add(BlockReplica(block, site, group, size = 0))

    return replica
Exemple #4
0
    def schedule_deletions(self,
                           replica_list,
                           operation_id,
                           comments=''):  #override
        sites = set(r.site for r, b in replica_list)
        if len(sites) != 1:
            raise OperationalError(
                'schedule_copies should be called with a list of replicas at a single site.'
            )

        site = list(sites)[0]

        LOG.info(
            'Scheduling deletion of %d replicas from %s using RLFSM (operation %d)',
            len(replica_list), site.name, operation_id)

        clones = []

        for dataset_replica, block_replicas in replica_list:
            if block_replicas is None:
                to_delete = dataset_replica.block_replicas
            else:
                to_delete = block_replicas

            for block_replica in to_delete:
                for lfile in block_replica.files():
                    self.rlfsm.desubscribe_file(block_replica.site, lfile)

            # No external dependency -> all operations are successful

            clone_replica = DatasetReplica(dataset_replica.dataset,
                                           dataset_replica.site)
            clone_replica.copy(dataset_replica)

            if block_replicas is None:
                clones.append((clone_replica, None))
            else:
                clones.append((clone_replica, []))
                for block_replica in block_replicas:
                    clone_block_replica = BlockReplica(block_replica.block,
                                                       block_replica.site)
                    clone_block_replica.copy(block_replica)
                    clone_block_replica.last_update = int(time.time())
                    clones[-1][1].append(clone_block_replica)

        return clones
Exemple #5
0
    def schedule_copies(self, replica_list, operation_id, comments = ''): #override
        LOG.info('Ignoring copy schedule of %d replicas (operation %d)', len(replica_list), operation_id)

        result = []

        for replica in replica_list:
            clone_replica = DatasetReplica(replica.dataset, replica.site)
            clone_replica.copy(replica)
            result.append(clone_replica)
            
            for block_replica in replica.block_replicas:
                clone_block_replica = BlockReplica(block_replica.block, block_replica.site, block_replica.group)
                clone_block_replica.copy(block_replica)
                clone_block_replica.last_update = int(time.time())
                clone_replica.block_replicas.add(clone_block_replica)

        return result
    def schedule_copies(self, replica_list, operation_id, comments = ''): #override
        sites = set(r.site for r in replica_list)
        if len(sites) != 1:
            raise OperationalError('schedule_copies should be called with a list of replicas at a single site.')

        LOG.info('Scheduling copy of %d replicas to %s using RLFSM (operation %d)', len(replica_list), list(sites)[0], operation_id)

        result = []

        for replica in replica_list:
            # Function spec is to return clones (so that if specific block fails to copy, we can return a dataset replica without the block)
            clone_replica = DatasetReplica(replica.dataset, replica.site)
            clone_replica.copy(replica)
            result.append(clone_replica)

            for block_replica in replica.block_replicas:
                LOG.debug('Subscribing files for %s', str(block_replica))

                if block_replica.file_ids is None:
                    LOG.debug('No file to subscribe for %s', str(block_replica))
                    return
        
                all_files = block_replica.block.files
                missing_files = all_files - block_replica.files()

                for lfile in missing_files:
                    self.rlfsm.subscribe_file(block_replica.site, lfile)

                clone_block_replica = BlockReplica(block_replica.block, block_replica.site, block_replica.group)
                clone_block_replica.copy(block_replica)
                clone_block_replica.last_update = int(time.time())
                clone_replica.block_replicas.add(clone_block_replica)

        if not self._read_only:
            for clone_replica in result:
                if clone_replica.growing:
                    self.mysql.query('INSERT INTO `phedex_transfer_reservations` (`operation_id`, `item`, `site`, `group`) VALUES (%s, %s, %s, %s)', operation_id, clone_replica.dataset.name, clone_replica.site.name, clone_replica.group.name)
                else:
                    for block_replica in clone_replica.block_replicas:
                        self.mysql.query('INSERT INTO `phedex_transfer_reservations` (`operation_id`, `item`, `site`, `group`) VALUES (%s, %s, %s, %s)', operation_id, block_replica.block.full_name(), clone_replica.site.name, block_replica.group.name)

        # no external dependency - everything is a success
        return result
    def schedule_deletions(self, replica_list, operation_id, comments = ''): #override
        LOG.info('Ignoring deletion schedule of %d replicas (operation %d)', len(replica_list), operation_id)

        result = []

        for replica, block_replicas in replica_list:
            clone_replica = DatasetReplica(replica.dataset, replica.site)
            clone_replica.copy(replica)

            if block_replicas is None:
                result.append((clone_replica, None))
            else:
                clone_block_replicas = []
    
                for block_replica in block_replicas:
                    clone_block_replica = BlockReplica(block_replica.block, block_replica.site, block_replica.group)
                    clone_block_replica.copy(block_replica)
                    clone_block_replica.last_update = int(time.time())
                    clone_block_replicas.append(clone_block_replica)
                    
                result.append((clone_replica, clone_block_replicas))

        return result
    def schedule_deletions(self, replica_list, operation_id, comments = ''): #override
        sites = set(r.site for r, b in replica_list)
        if len(sites) != 1:
            raise OperationalError('schedule_copies should be called with a list of replicas at a single site.')

        site = list(sites)[0]

        LOG.info('Scheduling deletion of %d replicas from %s using RLFSM (operation %d)', len(replica_list), site.name, operation_id)

        clones = []

        for dataset_replica, block_replicas in replica_list:
            if block_replicas is None:
                to_delete = dataset_replica.block_replicas
            else:
                to_delete = block_replicas

            for block_replica in to_delete:
                for lfile in block_replica.files():
                    self.rlfsm.desubscribe_file(block_replica.site, lfile)

            # No external dependency -> all operations are successful

            clone_replica = DatasetReplica(dataset_replica.dataset, dataset_replica.site)
            clone_replica.copy(dataset_replica)

            if block_replicas is None:
                clones.append((clone_replica, None))
            else:
                clones.append((clone_replica, []))
                for block_replica in block_replicas:
                    clone_block_replica = BlockReplica(block_replica.block, block_replica.site)
                    clone_block_replica.copy(block_replica)
                    clone_block_replica.last_update = int(time.time())
                    clones[-1][1].append(clone_block_replica)

        return clones
Exemple #9
0
    def _determine_copies(self, partition, requests):
        """
        @param partition       Partition we copy into.
        @param requests        [(DealerRequest, plugin)]
        @return {plugin: [new dataset replica]}
        """

        # returned dict
        copy_list = collections.defaultdict(list)
        copy_volumes = dict(
            (site, 0.) for site in self.policy.target_sites
        )  # keep track of how much we are assigning to each site

        stats = {}
        for plugin in self._plugin_priorities.keys():
            stats[plugin.name] = {}

        reject_stats = {
            'Not a target site': 0,
            'Replica exists': 0,
            'Not allowed': 0,
            'Destination is full': 0,
            'Invalid request': 0,
            'No destination available': 0,
            'Source files missing': 0
        }

        # now go through all requests
        for request, plugin in requests:
            # make sure we have all blocks complete somewhere
            if not self.policy.validate_source(request):
                reject_stats['Source files missing'] += 1
                continue

            if request.destination is None:
                # Randomly choose the destination site with probability proportional to free space
                # request.destination will be set in the function
                reject_reason = self.policy.find_destination_for(
                    request, partition)
            else:
                # Check the destination availability
                reject_reason = self.policy.check_destination(
                    request, partition)

            if reject_reason is not None:
                reject_stats[reject_reason] += 1
                continue

            LOG.debug('Copying %s to %s requested by %s', request.item_name(),
                      request.destination.name, plugin.name)
            try:
                stat = stats[plugin.name][request.destination.name]
            except KeyError:
                stat = (0, 0)

            stats[plugin.name][request.destination.name] = (
                stat[0] + 1, stat[0] + request.item_size())

            if request.block is not None:
                blocks = [request.block]
                growing = False
            elif request.blocks is not None:
                blocks = request.blocks
                growing = False
            else:
                blocks = request.dataset.blocks
                growing = True

            new_replica = DatasetReplica(request.dataset,
                                         request.destination,
                                         growing=growing,
                                         group=request.group)
            for block in blocks:
                new_replica.block_replicas.add(
                    BlockReplica(block,
                                 request.destination,
                                 request.group,
                                 size=0))

            copy_list[plugin].append(new_replica)
            # New replicas may not be in the target partition, but we add the size up to be conservative
            copy_volumes[request.destination] += request.item_size()

            if not self.policy.is_target_site(
                    request.destination.partitions[partition],
                    copy_volumes[request.destination]):
                LOG.info('%s is not a target site any more.',
                         request.destination.name)
                self.policy.target_sites.remove(request.destination)

            if sum(copy_volumes.itervalues()
                   ) > self.policy.max_total_cycle_volume:
                LOG.warning(
                    'Total copy volume has exceeded the limit. No more copies will be made.'
                )
                break

        for plugin_name in sorted(stats.keys()):
            plugin_stats = stats[plugin_name]
            for destination_name in sorted(plugin_stats.keys()):
                dest_stats = plugin_stats[destination_name]
                LOG.info('Plugin %s requests %d items (%.1f TB) to %s',
                         plugin_name, dest_stats[0], dest_stats[1] * 1.e-12,
                         destination_name)

        for reason in sorted(reject_stats.keys()):
            LOG.info('%d items rejected for [%s]', reject_stats[reason],
                     reason)

        return copy_list
    def schedule_deletions(self,
                           replica_list,
                           operation_id,
                           comments=''):  #override
        sites = set(r.site for r, b in replica_list)
        if len(sites) != 1:
            raise OperationalError(
                'schedule_deletions should be called with a list of replicas at a single site.'
            )

        site = list(sites)[0]

        LOG.info(
            'Scheduling deletion of %d replicas from %s using RLFSM (operation %d)',
            len(replica_list), site.name, operation_id)

        clones = []

        for dataset_replica, block_replicas in replica_list:
            if block_replicas is None:
                to_delete = dataset_replica.block_replicas
            else:
                to_delete = block_replicas

            for block_replica in to_delete:
                for lfile in block_replica.files():
                    self.rlfsm.desubscribe_file(block_replica.site, lfile)

            # No external dependency -> all operations are successful

            clone_replica = DatasetReplica(dataset_replica.dataset,
                                           dataset_replica.site)
            clone_replica.copy(dataset_replica)

            if block_replicas is None:
                clones.append((clone_replica, None))
            else:
                clones.append((clone_replica, []))
                for block_replica in block_replicas:
                    clone_block_replica = BlockReplica(block_replica.block,
                                                       block_replica.site,
                                                       block_replica.group)
                    clone_block_replica.copy(block_replica)
                    clone_block_replica.last_update = int(time.time())
                    clones[-1][1].append(clone_block_replica)

        if not self._read_only:
            for clone_replica, block_replicas in clones:
                if block_replicas is None:
                    self.mysql.query(
                        'INSERT INTO `phedex_deletion_reservations` (`operation_id`, `item`, `site`) VALUES (%s, %s, %s)',
                        operation_id, clone_replica.dataset.name,
                        clone_replica.site.name)
                else:
                    for block_replica in block_replicas:
                        self.mysql.query(
                            'INSERT INTO `phedex_deletion_reservations` (`operation_id`, `item`, `site`) VALUES (%s, %s, %s)',
                            operation_id, block_replica.block.full_name(),
                            clone_replica.site.name)

        return clones
Exemple #11
0
    def _build_partition(self, inventory):
        """Create a mini-inventory consisting only of replicas in the partition."""

        partition_repository = ObjectRepository()
        partition_repository._store = inventory._store

        LOG.info('Identifying target sites.')

        partition = inventory.partitions[self.policy.partition_name]

        partition.embed_tree(partition_repository)

        # Ask each site if deletion should be triggered.
        target_sites = set()  # target sites of this detox cycle
        tape_is_target = False
        for site in inventory.sites.itervalues():
            # target_site_defs are SiteConditions, which take site_partition as the argument
            site_partition = site.partitions[partition]

            for targdef in self.policy.target_site_def:
                if targdef.match(site_partition):
                    target_sites.add(site)
                    if site.storage_type == Site.TYPE_MSS:
                        tape_is_target = True

                    break

        if len(target_sites) == 0:
            LOG.info('No site matches the target definition.')
            return partition_repository

        # Safety measure - if there are empty (no block rep) tape replicas, create block replicas with size 0 and
        # add them into the partition. We will not report back to the main process though (i.e. won't call inventory.update).
        if tape_is_target:
            for site in filter(lambda s: s.storage_type == Site.TYPE_MSS,
                               target_sites):
                for replica in site.dataset_replicas():
                    if len(replica.block_replicas) != 0:
                        continue

                    for block in replica.dataset.blocks:
                        block_replica = BlockReplica(block,
                                                     site,
                                                     Group.null_group,
                                                     size=0)
                        replica.block_replicas.add(block_replica)
                        block.replicas.add(block_replica)

                    # Add to the site partition
                    site.partitions[partition].replicas[replica] = None

        # Create a copy of the inventory, limiting to the current partition
        # We will be stripping replicas off the image as we process the policy in iterations
        LOG.info('Creating a partition image.')

        for group in inventory.groups.itervalues():
            group.embed_into(partition_repository)

        # Now clone the sites, datasets, and replicas
        # Basically a copy-paste of various embed_into() functions ommitting the checks

        # make a map to avoid excessive lookups
        block_to_clone = {}
        for site in target_sites:
            site_clone = site.embed_into(partition_repository)

            site_partition = site.partitions[partition]
            site_partition_clone = site_partition.embed_tree(
                partition_repository)

            for dataset_replica, block_replica_set in site_partition.replicas.iteritems(
            ):
                dataset = dataset_replica.dataset

                try:
                    dataset_clone = partition_repository.datasets[dataset.name]

                except KeyError:
                    dataset_clone = dataset.embed_into(partition_repository)

                    for block in dataset.blocks:
                        block_clone = Block(block.name,
                                            dataset_clone,
                                            size=block.size,
                                            num_files=block.num_files,
                                            is_open=block.is_open,
                                            last_update=block.last_update,
                                            bid=block.id)
                        dataset_clone.blocks.add(block_clone)

                        block_to_clone[block] = block_clone

                if dataset_replica.group is None:
                    group = None
                else:
                    group = partition_repository.groups[
                        dataset_replica.group.name]

                replica_clone = DatasetReplica(dataset_clone,
                                               site_clone,
                                               growing=dataset_replica.growing,
                                               group=group)
                dataset_clone.replicas.add(replica_clone)
                site_clone.add_dataset_replica(replica_clone,
                                               add_block_replicas=False)

                if block_replica_set is None:
                    # all block reps in partition
                    block_replica_set = dataset_replica.block_replicas
                    full_replica = True
                    site_partition_clone.replicas[replica_clone] = None
                else:
                    full_replica = False
                    block_replica_clone_set = site_partition_clone.replicas[
                        replica_clone] = set()

                for block_replica in block_replica_set:
                    block_clone = block_to_clone[block_replica.block]
                    if block_replica.is_complete():
                        size = -1
                    else:
                        size = block_replica.size

                    block_replica_clone = BlockReplica(
                        block_clone,
                        site_clone,
                        partition_repository.groups[block_replica.group.name],
                        is_custodial=block_replica.is_custodial,
                        size=size,
                        last_update=block_replica.last_update,
                        file_ids=block_replica.file_ids)

                    replica_clone.block_replicas.add(block_replica_clone)
                    block_clone.replicas.add(block_replica_clone)

                    if not full_replica:
                        block_replica_clone_set.add(block_replica_clone)

        return partition_repository
Exemple #12
0
def new_replica_from_dataset(dataset, site, group):
    replica = DatasetReplica(dataset, site)
    for block in dataset.blocks:
        replica.block_replicas.add(BlockReplica(block, site, group, size = 0))

    return replica
    def _run_subscription_request(self, operation_id, site, group, level,
                                  subscription_list, comments):
        # Make a subscription request for potentitally multiple datasets or blocks but to one site and one group
        full_catalog = collections.defaultdict(list)

        if level == 'dataset':
            for dataset in subscription_list:
                full_catalog[dataset] = []
        elif level == 'block':
            for block in subscription_list:
                full_catalog[block.dataset].append(block)

        history_sql = 'INSERT INTO `phedex_requests` (`id`, `operation_type`, `operation_id`, `approved`) VALUES (%s, \'copy\', %s, %s)'

        success = []

        # make requests in chunks
        request_catalog = {}
        chunk_size = 0
        items = []
        while len(full_catalog) != 0:
            dataset, blocks = full_catalog.popitem()
            request_catalog[dataset] = blocks

            if level == 'dataset':
                chunk_size += dataset.size
                items.append(dataset)
            elif level == 'block':
                chunk_size += sum(b.size for b in blocks)
                items.extend(blocks)

            if chunk_size < self.subscription_chunk_size and len(
                    full_catalog) != 0:
                continue

            options = {
                'node': site.name,
                'data': self._phedex.form_catalog_xml(request_catalog),
                'level': level,
                'priority': 'low',
                'move': 'n',
                'static': 'n',
                'custodial': 'n',
                'group': group.name,
                'request_only': 'n',
                'no_mail': 'n',
                'comments': comments
            }

            try:
                if self._read_only:
                    result = [{'id': 0}]
                else:
                    result = self._phedex.make_request('subscribe',
                                                       options,
                                                       method=POST)
            except:
                LOG.error('Copy %s failed.', str(options))
                # we should probably do something here
            else:
                request_id = int(result[0]['id'])  # return value is a string
                LOG.warning('PhEDEx subscription request id: %d', request_id)
                if not self._read_only:
                    self._history.db.query(history_sql, request_id,
                                           operation_id, True)

                for dataset, blocks in request_catalog.iteritems():
                    if level == 'dataset':
                        replica = DatasetReplica(dataset,
                                                 site,
                                                 growing=True,
                                                 group=group)
                        for block in dataset.blocks:
                            replica.block_replicas.add(
                                BlockReplica(block,
                                             site,
                                             group,
                                             size=0,
                                             last_update=int(time.time())))

                    else:
                        replica = DatasetReplica(dataset, site, growing=False)
                        for block in blocks:
                            replica.block_replicas.add(
                                BlockReplica(block,
                                             site,
                                             group,
                                             size=0,
                                             last_update=int(time.time())))

                    success.append(replica)

            request_catalog = {}
            chunk_size = 0
            items = []

        return success
    def schedule_deletions(self,
                           replica_list,
                           operation_id,
                           comments=''):  #override
        sites = set(r.site for r, b in replica_list)
        if len(sites) != 1:
            raise OperationalError(
                'schedule_copies should be called with a list of replicas at a single site.'
            )

        site = list(sites)[0]

        if site.storage_type == Site.TYPE_MSS and not self.allow_tape_deletion:
            LOG.warning('Deletion from MSS not allowed by configuration.')
            return []

        if self.allow_tape_deletion and self.auto_approval:
            LOG.warning(
                'You cannot have auto-approved tape deletions. Set auto-approval to False.'
            )
            return []

        # execute the deletions in two steps: one for dataset-level and one for block-level
        datasets = []
        blocks = []

        # maps used later for cloning
        # getting ugly here.. should come up with a better way of making clones
        replica_map = {}
        block_replica_map = {}

        for dataset_replica, block_replicas in replica_list:
            if block_replicas is None:
                datasets.append(dataset_replica.dataset)
            else:
                blocks.extend(br.block for br in block_replicas)

                replica_map[dataset_replica.dataset] = dataset_replica
                block_replica_map.update(
                    (br.block, br) for br in block_replicas)

        success = []

        deleted_datasets = self._run_deletion_request(operation_id, site,
                                                      'dataset', datasets,
                                                      comments)

        for dataset in deleted_datasets:
            replica = DatasetReplica(dataset,
                                     site,
                                     growing=False,
                                     group=Group.null_group)
            success.append((replica, None))

        tmp_map = dict((dataset, []) for dataset in replica_map.iterkeys())

        deleted_blocks = self._run_deletion_request(operation_id, site,
                                                    'block', blocks, comments)

        for block in deleted_blocks:
            tmp_map[block.dataset].append(block)

        for dataset, blocks in tmp_map.iteritems():
            replica = DatasetReplica(dataset, site)
            replica.copy(replica_map[dataset])

            success.append((replica, []))
            for block in blocks:
                block_replica = BlockReplica(block, site, Group.null_group)
                block_replica.copy(block_replica_map[block])
                block_replica.last_update = int(time.time())
                success[-1][1].append(block_replica)

        return success
Exemple #15
0
    def _load_replicas(self, inventory, id_group_map, id_site_map,
                       id_dataset_map, id_block_maps):
        sql = 'SELECT dr.`dataset_id`, dr.`site_id`,'
        sql += ' br.`block_id`, br.`group_id`, br.`is_complete`, br.`is_custodial`, brs.`size`, UNIX_TIMESTAMP(br.`last_update`)'
        sql += ' FROM `dataset_replicas` AS dr'
        sql += ' INNER JOIN `blocks` AS b ON b.`dataset_id` = dr.`dataset_id`'
        sql += ' LEFT JOIN `block_replicas` AS br ON (br.`block_id`, br.`site_id`) = (b.`id`, dr.`site_id`)'
        sql += ' LEFT JOIN `block_replica_sizes` AS brs ON (brs.`block_id`, brs.`site_id`) = (b.`id`, dr.`site_id`)'

        if self._mysql.table_exists('groups_load_tmp'):
            sql += ' INNER JOIN `groups_load_tmp` AS gt ON gt.`id` = br.`group_id`'

        if self._mysql.table_exists('sites_load_tmp'):
            sql += ' INNER JOIN `sites_load_tmp` AS st ON st.`id` = dr.`site_id`'

        if self._mysql.table_exists('datasets_load_tmp'):
            sql += ' INNER JOIN `datasets_load_tmp` AS dt ON dt.`id` = dr.`dataset_id`'

        sql += ' ORDER BY dr.`dataset_id`, dr.`site_id`'

        # Blocks are left joined -> there will be (# sites) x (# blocks) entries per dataset

        _dataset_id = 0
        _site_id = 0
        dataset_replica = None
        for dataset_id, site_id, block_id, group_id, is_complete, b_is_custodial, b_size, b_last_update in self._mysql.xquery(
                sql):
            if dataset_id != _dataset_id:
                _dataset_id = dataset_id

                dataset = id_dataset_map[_dataset_id]
                dataset.replicas.clear()

                id_block_map = id_block_maps[dataset_id]

            if site_id != _site_id:
                _site_id = site_id
                site = id_site_map[site_id]

            if dataset_replica is None or dataset is not dataset_replica.dataset or site is not dataset_replica.site:
                if dataset_replica is not None:
                    # this is the last dataset_replica
                    # add to dataset and site after filling all block replicas
                    # this does not matter for the dataset, but for the site there is some heavy
                    # computation needed when a replica is added
                    dataset_replica.dataset.replicas.add(dataset_replica)
                    dataset_replica.site.add_dataset_replica(
                        dataset_replica, add_block_replicas=True)

                dataset_replica = DatasetReplica(dataset, site)

            if block_id is None:
                # this block replica does not exist
                continue

            block = id_block_map[block_id]
            group = id_group_map[group_id]

            block_replica = BlockReplica(
                block,
                site,
                group=group,
                is_complete=(is_complete == 1),
                is_custodial=(b_is_custodial == 1),
                size=block.size if b_size is None else b_size,
                last_update=b_last_update)

            dataset_replica.block_replicas.add(block_replica)
            block.replicas.add(block_replica)

        if dataset_replica is not None:
            # one last bit
            dataset_replica.dataset.replicas.add(dataset_replica)
            dataset_replica.site.add_dataset_replica(dataset_replica,
                                                     add_block_replicas=True)