def chunk_move(self, path, chunk_id): meta = self.load_chunk_metadata(path, chunk_id) container_id = meta['container_id'] content_id = meta['content_id'] chunk_id = meta['chunk_id'] try: content = self.content_factory.get(container_id, content_id) except ContentNotFound: raise exc.OrphanChunk('Content not found') new_chunk = content.move_chunk(chunk_id) self.logger.info('moved chunk http://%s/%s to %s', self.address, chunk_id, new_chunk['url']) if self.allow_links: old_links = meta['links'] for chunk_id, fullpath in old_links.iteritems(): account, container, _, _, content_id = \ decode_fullpath(fullpath) container_id = cid_from_name(account, container) try: content = self.content_factory.get(container_id, content_id) except ContentNotFound: raise exc.OrphanChunk('Content not found') new_linked_chunk = content.move_linked_chunk( chunk_id, new_chunk['url']) self.logger.info('moved chunk http://%s/%s to %s', self.address, chunk_id, new_linked_chunk['url'])
def chunk_audit(self, path): with open(path) as f: try: meta = read_chunk_metadata(f) except exc.MissingAttribute as e: raise exc.FaultyChunk( 'Missing extended attribute %s' % e) size = int(meta['chunk_size']) md5_checksum = meta['chunk_hash'].lower() reader = ChunkReader(f, size, md5_checksum) with closing(reader): for buf in reader: buf_len = len(buf) self.bytes_running_time = ratelimit( self.bytes_running_time, self.max_bytes_per_second, increment=buf_len) self.bytes_processed += buf_len self.total_bytes_processed += buf_len try: content_cid = meta['content_cid'] content_path = meta['content_path'] content_attr, data = self.container_client.content_show( cid=content_cid, path=content_path) # Check chunk data chunks_nb = 0 chunk_data = None for c in data: if c['url'].endswith(meta['chunk_id']): chunks_nb += 1 # FIXME: won't work with DUP chunk_data = c if not chunk_data: raise exc.OrphanChunk('Not found in content') if chunk_data['size'] != int(meta['chunk_size']): raise exc.FaultyChunk('Invalid chunk size found') if chunk_data['hash'] != meta['chunk_hash']: raise exc.FaultyChunk('Invalid chunk hash found') if chunk_data['pos'] != meta['chunk_pos']: raise exc.FaultyChunk('Invalid chunk position found') # Check content data if content_attr['length'] != meta['content_size']: raise exc.FaultyChunk('Invalid content size found') if chunks_nb != int(meta['content_chunksnb']): self.logger.warn('Invalid number of chunks found') # TODO: really count chunks and enable the exception # raise exc.FaultyChunk('Invalid number of chunks found') except exc.NotFound: raise exc.OrphanChunk('Chunk not found in container')
def chunk_move(self, path): meta = self.load_chunk_metadata(path) content_cid = meta['content_cid'] content_path = meta['content_path'] chunk_url = 'http://%s/%s' % \ (self.address, meta['chunk_id']) try: _, data = self.container_client.content_show(cid=content_cid, path=content_path) except exc.NotFound: raise exc.OrphanChunk('Content not found') current_chunk = None notin = [] for c in data: if c['pos'] == meta['chunk_pos']: notin.append(c) for c in notin: if c['url'] == chunk_url: current_chunk = c notin.remove(c) if not current_chunk: raise exc.OrphanChunk('Chunk not found in content') spare_data = {'notin': notin, 'broken': [current_chunk], 'size': 0} spare_resp = self.container_client.content_spare(cid=content_cid, path=content_path, data=spare_data) new_chunk = spare_resp['chunks'][0] self.blob_client.chunk_copy(current_chunk['url'], new_chunk['id']) old = [{ 'type': 'chunk', 'id': current_chunk['url'], 'hash': meta['chunk_hash'], 'size': int(meta['chunk_size']) }] new = [{ 'type': 'chunk', 'id': new_chunk['id'], 'hash': meta['chunk_hash'], 'size': int(meta['chunk_size']) }] update_data = {'old': old, 'new': new} self.container_client.container_raw_update(cid=content_cid, data=update_data) self.blob_client.chunk_delete(current_chunk['url']) self.logger.info('moved chunk %s to %s', current_chunk['url'], new_chunk['id'])
def chunk_file_audit(self, chunk_file, chunk_id): try: meta, _ = read_chunk_metadata(chunk_file, chunk_id) except exc.MissingAttribute as err: raise exc.FaultyChunk(err) size = int(meta['chunk_size']) md5_checksum = meta['chunk_hash'].lower() reader = ChunkReader(chunk_file, size, md5_checksum, compression=meta.get("compression", "")) with closing(reader): for buf in reader: buf_len = len(buf) self.bytes_running_time = ratelimit(self.bytes_running_time, self.max_bytes_per_second, increment=buf_len) self.bytes_processed += buf_len self.total_bytes_processed += buf_len try: container_id = meta['container_id'] content_id = meta['content_id'] _obj_meta, data = self.container_client.content_locate( cid=container_id, content=content_id, properties=False) # Check chunk data chunk_data = None metachunks = set() for c in data: if c['url'].endswith(meta['chunk_id']): metachunks.add(c['pos'].split('.', 2)[0]) chunk_data = c if not chunk_data: raise exc.OrphanChunk('Not found in content') metachunk_size = meta.get('metachunk_size') if metachunk_size is not None \ and chunk_data['size'] != int(metachunk_size): raise exc.FaultyChunk('Invalid metachunk size found') metachunk_hash = meta.get('metachunk_hash') if metachunk_hash is not None \ and chunk_data['hash'] != meta['metachunk_hash']: raise exc.FaultyChunk('Invalid metachunk hash found') if chunk_data['pos'] != meta['chunk_pos']: raise exc.FaultyChunk('Invalid chunk position found') except exc.NotFound: raise exc.OrphanChunk('Chunk not found in container')
def chunk_move(self, path, chunk_id): meta = self.load_chunk_metadata(path, chunk_id) container_id = meta['container_id'] content_id = meta['content_id'] chunk_id = meta['chunk_id'] # Maybe skip the chunk because it doesn't match the size constaint chunk_size = int(meta['chunk_size']) min_chunk_size = int(self.conf.get('min_chunk_size', 0)) max_chunk_size = int(self.conf.get('max_chunk_size', 0)) if chunk_size < min_chunk_size: self.logger.debug("SKIP %s too small", path) return if max_chunk_size > 0 and chunk_size > max_chunk_size: self.logger.debug("SKIP %s too big", path) return # Start moving the chunk try: content = self.content_factory.get(container_id, content_id) except ContentNotFound: raise exc.OrphanChunk('Content not found') new_chunk = content.move_chunk( chunk_id, service_id=self.service_id, fake_excluded_chunks=self.fake_excluded_chunks) self.logger.info('moved chunk http://%s/%s to %s', self.service_id, chunk_id, new_chunk['url']) if self.allow_links: old_links = meta['links'] for chunk_id, fullpath in old_links.items(): # pylint: disable=unbalanced-tuple-unpacking account, container, _, _, content_id = \ decode_fullpath(fullpath) container_id = cid_from_name(account, container) try: content = self.content_factory.get(container_id, content_id) except ContentNotFound: raise exc.OrphanChunk('Content not found') new_linked_chunk = content.move_linked_chunk( chunk_id, new_chunk['url']) self.logger.info('moved chunk http://%s/%s to %s', self.service_id, chunk_id, new_linked_chunk['url'])
def rebuild_chunk(self, chunk_id): current_chunk = self.chunks.filter(id=chunk_id).one() if current_chunk is None: raise exc.OrphanChunk("Chunk not found in content") duplicate_chunks = self.chunks.filter(pos=current_chunk.pos).exclude( id=chunk_id).all() if len(duplicate_chunks) == 0: raise UnrecoverableContent("No copy of missing chunk") spare_urls = self._get_spare_chunk(duplicate_chunks, [current_chunk]) uploaded = False for src in duplicate_chunks: try: self.blob_client.chunk_copy(src.url, spare_urls[0]) self.logger.debug("copy chunk from %s to %s", src.url, spare_urls[0]) uploaded = True break except Exception as e: self.logger.warn("Failed to copy chunk from %s to %s: %s", src.url, spare_urls[0], str(e.message)) if not uploaded: raise UnrecoverableContent("No copy available of missing chunk") self._update_spare_chunk(current_chunk, spare_urls[0])
def rebuild_chunk(self, chunk_id, allow_same_rawx=False, chunk_pos=None): # Identify the chunk to rebuild current_chunk = self.chunks.filter(id=chunk_id).one() if current_chunk is None and chunk_pos is None: raise exc.OrphanChunk("Chunk not found in content") elif chunk_pos is None: chunk_pos = current_chunk.pos duplicate_chunks = self.chunks.filter(pos=chunk_pos).exclude( id=chunk_id).all() if len(duplicate_chunks) == 0: raise UnrecoverableContent("No copy of missing chunk") if current_chunk is None: chunk = {} chunk['hash'] = duplicate_chunks[0].checksum chunk['size'] = duplicate_chunks[0].size chunk['url'] = '' chunk['pos'] = chunk_pos current_chunk = Chunk(chunk) # Find a spare chunk address broken_list = list() if not allow_same_rawx and chunk_id is not None: broken_list.append(current_chunk) spare_urls, _quals = self._get_spare_chunk(duplicate_chunks, broken_list) spare_url = spare_urls[0] # Actually create the spare chunk, by duplicating a good one uploaded = False for src in duplicate_chunks: try: self.blob_client.chunk_copy(src.url, spare_url, chunk_id=chunk_id, fullpath=self.full_path, cid=self.container_id, path=self.path, version=self.version, content_id=self.content_id) self.logger.debug('Chunk copied from %s to %s, registering it', src.url, spare_url) uploaded = True break except Exception as err: self.logger.warn("Failed to copy chunk from %s to %s: %s %s", src.url, spare_url, type(err), str(err.message)) if not uploaded: raise UnrecoverableContent("No copy available of missing chunk") # Register the spare chunk in object's metadata if chunk_id is None: self._add_raw_chunk(current_chunk, spare_url) else: self._update_spare_chunk(current_chunk, spare_url) self.logger.debug('Chunk %s repaired in %s', chunk_id or chunk_pos, spare_url)
def chunk_audit(self, path): with open(path) as f: try: meta = read_chunk_metadata(f) except exc.MissingAttribute as e: raise exc.FaultyChunk('Missing extended attribute %s' % e) size = int(meta['chunk_size']) md5_checksum = meta['chunk_hash'].lower() reader = ChunkReader(f, size, md5_checksum) with closing(reader): for buf in reader: buf_len = len(buf) self.bytes_running_time = ratelimit( self.bytes_running_time, self.max_bytes_per_second, increment=buf_len) self.bytes_processed += buf_len self.total_bytes_processed += buf_len try: container_id = meta['container_id'] content_path = meta['content_path'] content_attr, data = self.container_client.content_show( cid=container_id, path=content_path) # Check chunk data chunk_data = None metachunks = set() for c in data: if c['url'].endswith(meta['chunk_id']): metachunks.add(c['pos'].split('.', 2)[0]) chunk_data = c if not chunk_data: raise exc.OrphanChunk('Not found in content') if chunk_data['size'] != int(meta['chunk_size']): raise exc.FaultyChunk('Invalid chunk size found') if chunk_data['hash'] != meta['chunk_hash']: raise exc.FaultyChunk('Invalid chunk hash found') if chunk_data['pos'] != meta['chunk_pos']: raise exc.FaultyChunk('Invalid chunk position found') except exc.NotFound: raise exc.OrphanChunk('Chunk not found in container')
def move_chunk(self, chunk_id, check_quality=False, dry_run=False, max_attempts=3, **kwargs): """ Move a chunk to another place. Optionally ensure that the new place is an improvement over the current one. """ if isinstance(chunk_id, Chunk): current_chunk = chunk_id chunk_id = current_chunk.id else: current_chunk = self.chunks.filter(id=chunk_id).one() if current_chunk is None or current_chunk not in self.chunks: raise exc.OrphanChunk("Chunk not found in content") other_chunks = self.chunks.filter( metapos=current_chunk.metapos).exclude(id=chunk_id).all() spare_urls, qualities = self._get_spare_chunk( other_chunks, [current_chunk], check_quality=check_quality, max_attempts=max_attempts, **kwargs) if dry_run: self.logger.info("Dry-run: would copy chunk from %s to %s", current_chunk.url, spare_urls[0]) else: self.logger.info("Copying chunk from %s to %s", current_chunk.url, spare_urls[0]) # TODO(FVE): retry to copy (max_attempts times) self.blob_client.chunk_copy(current_chunk.url, spare_urls[0], chunk_id=chunk_id, fullpath=self.full_path, cid=self.container_id, path=self.path, version=self.version, content_id=self.content_id, **kwargs) self._update_spare_chunk(current_chunk, spare_urls[0]) try: self.blob_client.chunk_delete(current_chunk.url, **kwargs) except Exception as err: self.logger.warn("Failed to delete chunk %s: %s", current_chunk.url, err) current_chunk.url = spare_urls[0] current_chunk.quality = qualities[current_chunk.url] return current_chunk.raw()
def chunk_move(self, path): meta = self.load_chunk_metadata(path) container_id = meta['container_id'] content_id = meta['content_id'] chunk_id = meta['chunk_id'] chunk_url = 'http://%s/%s' % (self.address, meta['chunk_id']) try: content = self.content_factory.get(container_id, content_id) except ContentNotFound: raise exc.OrphanChunk('Content not found') new_chunk = content.move_chunk(chunk_id) self.logger.info('moved chunk %s to %s', chunk_url, new_chunk['url'])
def rebuild_chunk(self, chunk_id, allow_same_rawx=False, chunk_pos=None): current_chunk = self.chunks.filter(id=chunk_id).one() if current_chunk is None and chunk_pos is None: raise exc.OrphanChunk("Chunk not found in content") elif chunk_pos is None: chunk_pos = current_chunk.pos duplicate_chunks = self.chunks.filter( pos=chunk_pos).exclude(id=chunk_id).all() if len(duplicate_chunks) == 0: raise UnrecoverableContent("No copy of missing chunk") if current_chunk is None: chunk = {} chunk['hash'] = duplicate_chunks[0].checksum chunk['size'] = duplicate_chunks[0].size chunk['url'] = '' chunk['pos'] = chunk_pos current_chunk = Chunk(chunk) broken_list = list() if not allow_same_rawx and chunk_id is not None: broken_list.append(current_chunk) spare_urls = self._get_spare_chunk( duplicate_chunks, broken_list) uploaded = False for src in duplicate_chunks: try: self.blob_client.chunk_copy(src.url, spare_urls[0]) self.logger.debug('Chunk copied from %s to %s, registering it', src.url, spare_urls[0]) uploaded = True break except Exception as err: self.logger.warn( "Failed to copy chunk from %s to %s: %s", src.url, spare_urls[0], str(err.message)) if not uploaded: raise UnrecoverableContent("No copy available of missing chunk") if chunk_id is None: self._add_raw_chunk(current_chunk, spare_urls[0]) else: self._update_spare_chunk(current_chunk, spare_urls[0]) self.logger.info('Chunk %s repaired in %s', chunk_id or chunk_pos, spare_urls[0])
def move_linked_chunk(self, chunk_id, from_url): current_chunk = self.chunks.filter(id=chunk_id).one() if current_chunk is None: raise exc.OrphanChunk("Chunk not found in content") _, to_url = self.blob_client.chunk_link(from_url, None, self.full_path) self.logger.debug("link chunk %s from %s to %s", chunk_id, from_url, to_url) self._update_spare_chunk(current_chunk, to_url) try: self.blob_client.chunk_delete(current_chunk.url) except Exception as err: self.logger.warn("Failed to delete chunk %s: %s", current_chunk.url, err) current_chunk.url = to_url return current_chunk.raw()
def chunk_rebuild(self, container_id, content_id, chunk_id): self.logger.info('Rebuilding (container %s, content %s, chunk %s)', container_id, content_id, chunk_id) try: content = self.content_factory.get(container_id, content_id) except ContentNotFound: raise exc.OrphanChunk('Content not found') chunk = content.chunks.filter(id=chunk_id).one() if chunk is None: raise OrphanChunk("Chunk not found in content") chunk_size = chunk.size content.rebuild_chunk(chunk_id, allow_same_rawx=self.allow_same_rawx) self.rdir_client.chunk_delete(self.volume, container_id, content_id, chunk_id) self.bytes_processed += chunk_size self.total_bytes_processed += chunk_size
def move_chunk(self, chunk_id, service_id=None, check_quality=False, dry_run=False, max_attempts=3, **kwargs): """ Move a chunk to another place. Optionally ensure that the new place is an improvement over the current one. """ if isinstance(chunk_id, Chunk): current_chunk = chunk_id chunk_id = current_chunk.id service_id = current_chunk.host else: candidates = self.chunks.filter(id=chunk_id) if len(candidates) > 1: if service_id is None: raise exc.ChunkException( "Several chunks with ID %s and no service ID" % (chunk_id, )) candidates = candidates.filter(host=service_id) current_chunk = candidates.one() if current_chunk is None or current_chunk not in self.chunks: raise exc.OrphanChunk("Chunk not found in content") if service_id: other_chunks = self.chunks.filter( metapos=current_chunk.metapos).exclude(host=service_id).all() else: other_chunks = self.chunks.filter( metapos=current_chunk.metapos).exclude(id=chunk_id).all() spare_urls, qualities = self._get_spare_chunk( other_chunks, [current_chunk], position=current_chunk.pos, check_quality=check_quality, max_attempts=max_attempts, **kwargs) # Sort chunks by score to try to copy with higher score. # When scores are close together (e.g. [95, 94, 94, 93, 50]), # don't always start with the highest element. duplicate_chunks = self.chunks \ .filter(pos=current_chunk.pos) \ .sort(key=lambda chunk: _get_weighted_random_score(chunk.raw()), reverse=True) \ .all() if dry_run: self.logger.info('Dry-run: would copy chunk from %s to %s', duplicate_chunks[0].url, spare_urls[0]) else: # To reduce the load on the rawx to decommission, # use one of the rawx with a copy of the chunk to move. for src in duplicate_chunks: try: self.logger.info('Copying chunk from %s to %s', src.url, spare_urls[0]) # TODO(FVE): retry to copy (max_attempts times) self.blob_client.chunk_copy(src.url, spare_urls[0], chunk_id=chunk_id, fullpath=self.full_path, cid=self.container_id, path=self.path, version=self.version, content_id=self.content_id, **kwargs) break except Exception as err: self.logger.warn('Failed to copy chunk from %s to %s: %s', src.url, spare_urls[0], err) if len(duplicate_chunks) == 1: raise else: raise UnrecoverableContent( 'No copy available of chunk to move') self._update_spare_chunk(current_chunk, spare_urls[0]) try: self.blob_client.chunk_delete(current_chunk.url, **kwargs) except Exception as err: self.logger.warn("Failed to delete chunk %s: %s", current_chunk.url, err) current_chunk.url = spare_urls[0] current_chunk.quality = qualities[current_chunk.url] return current_chunk.raw()
def rebuild_chunk(self, chunk_id, allow_same_rawx=False, chunk_pos=None, allow_frozen_container=False): # Identify the chunk to rebuild current_chunk = self.chunks.filter(id=chunk_id).one() if current_chunk is None and chunk_pos is None: raise exc.OrphanChunk("Chunk not found in content") elif chunk_pos is None: chunk_pos = current_chunk.pos # Sort chunks by score to try to copy with higher score. # When scores are close together (e.g. [95, 94, 94, 93, 50]), # don't always start with the highest element. duplicate_chunks = self.chunks \ .filter(pos=chunk_pos) \ .exclude(id=chunk_id) \ .sort(key=lambda chunk: _get_weighted_random_score(chunk.raw()), reverse=True) \ .all() if len(duplicate_chunks) == 0: raise UnrecoverableContent("No copy of missing chunk") if current_chunk is None: chunk = {} chunk['hash'] = duplicate_chunks[0].checksum chunk['size'] = duplicate_chunks[0].size chunk['url'] = '' chunk['pos'] = chunk_pos current_chunk = Chunk(chunk) # Find a spare chunk address broken_list = list() if not allow_same_rawx and chunk_id is not None: broken_list.append(current_chunk) spare_urls, _quals = self._get_spare_chunk(duplicate_chunks, broken_list) spare_url = spare_urls[0] # Actually create the spare chunk, by duplicating a good one for src in duplicate_chunks: try: self.blob_client.chunk_copy(src.url, spare_url, chunk_id=chunk_id, fullpath=self.full_path, cid=self.container_id, path=self.path, version=self.version, content_id=self.content_id) self.logger.debug('Chunk copied from %s to %s, registering it', src.url, spare_url) break except Exception as err: self.logger.warn("Failed to copy chunk from %s to %s: %s %s", src.url, spare_url, type(err), err) else: raise UnrecoverableContent("No copy available of missing chunk") # Register the spare chunk in object's metadata if chunk_id is None: self._add_raw_chunk(current_chunk, spare_url, frozen=allow_frozen_container) else: self._update_spare_chunk(current_chunk, spare_url, frozen=allow_frozen_container) self.logger.debug('Chunk %s repaired in %s', chunk_id or chunk_pos, spare_url) return current_chunk.size