def _process_split(self, split_doc, chunk_dist): """ Processes a single split event, transforming a given ChunkDistribution into a new one, where the two chunks are merged back into one original (split backwards). """ # extract the before, left, right details left_doc = split_doc['details']['left'] right_doc = split_doc['details']['right'] before_doc = split_doc['details']['before'] # Chunk objects from the splits left_split = Chunk(split_doc, 'left') right_split = Chunk(split_doc, 'right') before_split = Chunk(split_doc, 'before') # Chunk objects found in the distribution try: left_chunk = chunk_dist.find( left_split.range ) except ValueError: raise ValueError("Error processing split: can't find left chunk in distribution.") try: right_chunk = chunk_dist.find( right_split.range ) except ValueError: raise ValueError("Error processing split: can't find right chunk in distribution.") # set shards to be equal (they are not in split_doc), then compare left_split.shard = left_chunk.shard right_split.shard = right_chunk.shard # update any shard versions if they are different (retrospectively, from moved chunks) left_chunk.shard_version = left_split.shard_version right_chunk.shard_version = right_split.shard_version if left_split != left_chunk: raise ValueError("Error processing split: left chunks not the same. %s <--> %s" % (left_split, left_chunk)) if right_split != right_chunk: print ValueError("Error processing split: right chunks not the same. %s <--> %s" % (right_split, right_chunk)) # create a shallow copy of the original chunk distribution new_dist = copy(chunk_dist) # now remove these two chunks and insert a new one new_dist.remove(left_chunk) new_dist.remove(right_chunk) # link before chunk to the left and right (parent / children relationship) before_split.shard = left_chunk.shard left_chunk.parent = before_split right_chunk.parent = before_split before_split.children = [left_chunk, right_chunk] new_dist.insert(before_split) # update time of new distribution chunk_dist.time = split_doc['time'] chunk_dist.what = 'split' # another sanity check: make sure new chunk distribution is correct if not new_dist.check(verbose=True): raise ValueError('Error processing split: resulting chunk distribution check failed.') return new_dist
def _process_multi_split(self, changelog, chunk_dist): """ Processes a multi-split event, transforming a given ChunkDistribution into a new one, where all the children chunks are merged back into one original (split backwards). """ # check if this multi-split has already been processed (only process first one for each shard version) split_doc = changelog[0] lastmod = (split_doc['details']['before']['lastmod'].time, split_doc['details']['before']['lastmod'].inc) if lastmod in self.processed_multisplits: return False else: self.processed_multisplits.add(lastmod) # find all documents in the changelog belonging to this multi-split multi_split_docs = filter( lambda chl: chl['what'] == 'multi-split' and chl['details']['before']['lastmod'] == split_doc['details']['before']['lastmod'], changelog) # "before" doc and its chunk before_doc = split_doc['details']['before'] before_split = Chunk(split_doc, 'before') # Chunk objects found in the distribution chunks = [] for doc in multi_split_docs: split = Chunk(doc, 'chunk') try: chunk = chunk_dist.find( split.range ) except ValueError: raise ValueError("Error processing multi-split: can't find a chunk in distribution.") # set shards to be equal (they are not in split_doc), then compare split.shard = chunk.shard # update shard versions in chunks chunk.shard_version = split.shard_version if split != chunk: raise ValueError("Error processing multi-split: chunks not the same. %s <--> %s" % (split, chunk)) chunks.append(chunk) # create a shallow copy of the original chunk distribution new_dist = copy(chunk_dist) # now remove all chunks and insert a new one for chunk in chunks: new_dist.remove(chunk) # link before chunk to all children chunks before_split.shard = chunks[0].shard for c, chunk in enumerate(chunks): chunks[c].parent = before_split before_split.children = chunks new_dist.insert(before_split) # update time of new distribution chunk_dist.time = split_doc['time'] chunk_dist.what = 'multi-split' # another sanity check: make sure new chunk distribution is correct if not new_dist.check(verbose=True): raise ValueError('Error processing multi-split: resulting chunk distribution check failed.') return new_dist