def rebuild_chunk(self, chunk_id, allow_same_rawx=False, chunk_pos=None, allow_frozen_container=False): # Identify the chunk to rebuild current_chunk = self.chunks.filter(id=chunk_id).one() if current_chunk is None and chunk_pos is None: raise exc.OrphanChunk("Chunk not found in content") elif chunk_pos is None: chunk_pos = current_chunk.pos # Sort chunks by score to try to copy with higher score. # When scores are close together (e.g. [95, 94, 94, 93, 50]), # don't always start with the highest element. duplicate_chunks = self.chunks \ .filter(pos=chunk_pos) \ .exclude(id=chunk_id) \ .sort(key=lambda chunk: _get_weighted_random_score(chunk.raw()), reverse=True) \ .all() if len(duplicate_chunks) == 0: raise UnrecoverableContent("No copy of missing chunk") if current_chunk is None: chunk = {} chunk['hash'] = duplicate_chunks[0].checksum chunk['size'] = duplicate_chunks[0].size chunk['url'] = '' chunk['pos'] = chunk_pos current_chunk = Chunk(chunk) # Find a spare chunk address broken_list = list() if not allow_same_rawx and chunk_id is not None: broken_list.append(current_chunk) spare_urls, _quals = self._get_spare_chunk(duplicate_chunks, broken_list) spare_url = spare_urls[0] # Actually create the spare chunk, by duplicating a good one for src in duplicate_chunks: try: self.blob_client.chunk_copy(src.url, spare_url, chunk_id=chunk_id, fullpath=self.full_path, cid=self.container_id, path=self.path, version=self.version, content_id=self.content_id) self.logger.debug('Chunk copied from %s to %s, registering it', src.url, spare_url) break except Exception as err: self.logger.warn("Failed to copy chunk from %s to %s: %s %s", src.url, spare_url, type(err), err) else: raise UnrecoverableContent("No copy available of missing chunk") # Register the spare chunk in object's metadata if chunk_id is None: self._add_raw_chunk(current_chunk, spare_url, frozen=allow_frozen_container) else: self._update_spare_chunk(current_chunk, spare_url, frozen=allow_frozen_container) self.logger.debug('Chunk %s repaired in %s', chunk_id or chunk_pos, spare_url) return current_chunk.size
def move_chunk(self, chunk_id, service_id=None, check_quality=False, dry_run=False, max_attempts=3, **kwargs): """ Move a chunk to another place. Optionally ensure that the new place is an improvement over the current one. """ if isinstance(chunk_id, Chunk): current_chunk = chunk_id chunk_id = current_chunk.id service_id = current_chunk.host else: candidates = self.chunks.filter(id=chunk_id) if len(candidates) > 1: if service_id is None: raise exc.ChunkException( "Several chunks with ID %s and no service ID" % (chunk_id, )) candidates = candidates.filter(host=service_id) current_chunk = candidates.one() if current_chunk is None or current_chunk not in self.chunks: raise exc.OrphanChunk("Chunk not found in content") if service_id: other_chunks = self.chunks.filter( metapos=current_chunk.metapos).exclude(host=service_id).all() else: other_chunks = self.chunks.filter( metapos=current_chunk.metapos).exclude(id=chunk_id).all() spare_urls, qualities = self._get_spare_chunk( other_chunks, [current_chunk], position=current_chunk.pos, check_quality=check_quality, max_attempts=max_attempts, **kwargs) # Sort chunks by score to try to copy with higher score. # When scores are close together (e.g. [95, 94, 94, 93, 50]), # don't always start with the highest element. duplicate_chunks = self.chunks \ .filter(pos=current_chunk.pos) \ .sort(key=lambda chunk: _get_weighted_random_score(chunk.raw()), reverse=True) \ .all() if dry_run: self.logger.info('Dry-run: would copy chunk from %s to %s', duplicate_chunks[0].url, spare_urls[0]) else: # To reduce the load on the rawx to decommission, # use one of the rawx with a copy of the chunk to move. for src in duplicate_chunks: try: self.logger.info('Copying chunk from %s to %s', src.url, spare_urls[0]) # TODO(FVE): retry to copy (max_attempts times) self.blob_client.chunk_copy(src.url, spare_urls[0], chunk_id=chunk_id, fullpath=self.full_path, cid=self.container_id, path=self.path, version=self.version, content_id=self.content_id, **kwargs) break except Exception as err: self.logger.warn('Failed to copy chunk from %s to %s: %s', src.url, spare_urls[0], err) if len(duplicate_chunks) == 1: raise else: raise UnrecoverableContent( 'No copy available of chunk to move') self._update_spare_chunk(current_chunk, spare_urls[0]) try: self.blob_client.chunk_delete(current_chunk.url, **kwargs) except Exception as err: self.logger.warn("Failed to delete chunk %s: %s", current_chunk.url, err) current_chunk.url = spare_urls[0] current_chunk.quality = qualities[current_chunk.url] return current_chunk.raw()
def rebuild_chunk(self, chunk_id, allow_same_rawx=False, chunk_pos=None, allow_frozen_container=False): # Identify the chunk to rebuild current_chunk = self.chunks.filter(id=chunk_id).one() if current_chunk is None and chunk_pos is None: raise OrphanChunk("Chunk not found in content") elif current_chunk is None: current_chunk = self.chunks.filter(pos=chunk_pos).one() if current_chunk is None: chunk = {'pos': chunk_pos, 'url': ''} current_chunk = Chunk(chunk) else: chunk_id = current_chunk.id self.logger.debug('Chunk at pos %s has id %s', chunk_pos, chunk_id) # Sort chunks by score to try to rebuild with higher score. # When scores are close together (e.g. [95, 94, 94, 93, 50]), # don't always start with the highest element. chunks = self.chunks \ .filter(metapos=current_chunk.metapos) \ .exclude(id=chunk_id, pos=chunk_pos) \ .sort(key=lambda chunk: _get_weighted_random_score(chunk.raw()), reverse=True) if chunk_id is None: current_chunk.size = chunks[0].size current_chunk.checksum = chunks[0].checksum # Find a spare chunk address broken_list = list() if not allow_same_rawx and chunk_id is not None: broken_list.append(current_chunk) spare_url, _quals = self._get_spare_chunk(chunks.all(), broken_list) new_chunk = Chunk({'pos': current_chunk.pos, 'url': spare_url[0]}) # Regenerate the lost chunk's data, from existing chunks handler = ECRebuildHandler(chunks.raw(), current_chunk.subpos, self.storage_method) stream = handler.rebuild() # Actually create the spare chunk meta = {} meta['chunk_id'] = new_chunk.id meta['chunk_pos'] = current_chunk.pos meta['container_id'] = self.container_id # FIXME: should be 'content_chunkmethod' everywhere # but sadly it isn't meta['chunk_method'] = self.chunk_method # FIXME: should be 'content_id' everywhere # but sadly it isn't meta['id'] = self.content_id meta['content_path'] = self.path # FIXME: should be 'content_policy' everywhere # but sadly it isn't meta['policy'] = self.policy # FIXME: should be 'content_version' everywhere # but sadly it isn't meta['version'] = self.version meta['metachunk_hash'] = current_chunk.checksum meta['metachunk_size'] = current_chunk.size meta['full_path'] = self.full_path meta['oio_version'] = OIO_VERSION bytes_transferred, _ = self.blob_client.chunk_put( spare_url[0], meta, GeneratorIO(stream, sub_generator=PY2)) # Register the spare chunk in object's metadata if chunk_id is None: self._add_raw_chunk(current_chunk, spare_url[0], frozen=allow_frozen_container) else: self._update_spare_chunk(current_chunk, spare_url[0], frozen=allow_frozen_container) self.logger.debug('Chunk %s repaired in %s', chunk_id or chunk_pos, spare_url[0]) return bytes_transferred