Esempio n. 1
0
    def _process_split(self, split_doc, chunk_dist):
        """ Processes a single split event, transforming a given ChunkDistribution into a new one,
            where the two chunks are merged back into one original (split backwards).
        """

        # extract the before, left, right details
        left_doc = split_doc['details']['left']
        right_doc = split_doc['details']['right']
        before_doc = split_doc['details']['before']
       
        # Chunk objects from the splits 
        left_split = Chunk(split_doc, 'left')
        right_split = Chunk(split_doc, 'right')
        before_split = Chunk(split_doc, 'before')

        # Chunk objects found in the distribution
        try:
            left_chunk = chunk_dist.find( left_split.range )
        except ValueError:
            raise ValueError("Error processing split: can't find left chunk in distribution.")

        try: 
            right_chunk = chunk_dist.find( right_split.range )
        except ValueError:
            raise ValueError("Error processing split: can't find right chunk in distribution.")
        
        # set shards to be equal (they are not in split_doc), then compare
        left_split.shard = left_chunk.shard
        right_split.shard = right_chunk.shard

        # update any shard versions if they are different (retrospectively, from moved chunks)
        left_chunk.shard_version = left_split.shard_version
        right_chunk.shard_version = right_split.shard_version

        if left_split != left_chunk:
            raise ValueError("Error processing split: left chunks not the same. %s <--> %s" % (left_split, left_chunk))
        if right_split != right_chunk:
            print ValueError("Error processing split: right chunks not the same. %s <--> %s" % (right_split, right_chunk))

        # create a shallow copy of the original chunk distribution
        new_dist = copy(chunk_dist)

        # now remove these two chunks and insert a new one
        new_dist.remove(left_chunk)
        new_dist.remove(right_chunk)

        # link before chunk to the left and right (parent / children relationship)
        before_split.shard = left_chunk.shard
        left_chunk.parent = before_split
        right_chunk.parent = before_split
        before_split.children = [left_chunk, right_chunk]

        new_dist.insert(before_split)

        # update time of new distribution
        chunk_dist.time = split_doc['time']
        chunk_dist.what = 'split'

        # another sanity check: make sure new chunk distribution is correct
        if not new_dist.check(verbose=True):
            raise ValueError('Error processing split: resulting chunk distribution check failed.')
        
        return new_dist
Esempio n. 2
0
    def _process_multi_split(self, changelog, chunk_dist):
        """ Processes a multi-split event, transforming a given ChunkDistribution into a new one,
            where all the children chunks are merged back into one original (split backwards).
        """
        # check if this multi-split has already been processed (only process first one for each shard version)
        split_doc = changelog[0]
        lastmod = (split_doc['details']['before']['lastmod'].time, split_doc['details']['before']['lastmod'].inc)

        if lastmod in self.processed_multisplits:
            return False
        else:
            self.processed_multisplits.add(lastmod)

        # find all documents in the changelog belonging to this multi-split
        multi_split_docs = filter( lambda chl: chl['what'] == 'multi-split' and
                                               chl['details']['before']['lastmod'] == split_doc['details']['before']['lastmod'], changelog) 

        # "before" doc and its chunk
        before_doc = split_doc['details']['before']
        before_split = Chunk(split_doc, 'before')

        # Chunk objects found in the distribution
        chunks = []
        for doc in multi_split_docs:
            split = Chunk(doc, 'chunk')
            try:
                chunk = chunk_dist.find( split.range )
            except ValueError:
                raise ValueError("Error processing multi-split: can't find a chunk in distribution.")

            # set shards to be equal (they are not in split_doc), then compare
            split.shard = chunk.shard

            # update shard versions in chunks
            chunk.shard_version = split.shard_version

            if split != chunk:
                raise ValueError("Error processing multi-split: chunks not the same. %s <--> %s" % (split, chunk))

            chunks.append(chunk)

        # create a shallow copy of the original chunk distribution
        new_dist = copy(chunk_dist)

        # now remove all chunks and insert a new one
        for chunk in chunks:
            new_dist.remove(chunk)

        # link before chunk to all children chunks
        before_split.shard = chunks[0].shard
        for c, chunk in enumerate(chunks):
            chunks[c].parent = before_split
        before_split.children = chunks

        new_dist.insert(before_split)

        # update time of new distribution
        chunk_dist.time = split_doc['time']
        chunk_dist.what = 'multi-split'

        # another sanity check: make sure new chunk distribution is correct
        if not new_dist.check(verbose=True):
            raise ValueError('Error processing multi-split: resulting chunk distribution check failed.')
        
        return new_dist