Example #1
0
 def init_num_shards(self):
     if self.num_shards is not None:
         return
     try:
         self.num_shards = client.num_log_shards(self.src_conn, self.type)
         log.debug('%d shards to check', self.num_shards)
     except Exception:
         log.error('finding number of shards failed')
         raise
Example #2
0
 def init_num_shards(self):
     if self.num_shards is not None:
         return
     try:
         self.num_shards = client.num_log_shards(self.src_conn, self.type)
         log.debug('%d shards to check', self.num_shards)
     except Exception:
         log.error('finding number of shards failed')
         raise
Example #3
0
    def sync_partial(self, num_workers, log_lock_time, max_entries):
        try:
            num_shards = client.num_log_shards(self.src_conn, self._type)
        except:
            log.exception('finding number of shards failed')
            raise
        log.debug('We have %d shards to check', num_shards)

        # create the work and results Queue
        workQueue = multiprocessing.Queue()
        resultQueue = multiprocessing.Queue()

        # create the worker processes
        if self._type == 'data':
            worker_cls = worker.DataWorkerPartial
        else:
            worker_cls = worker.MetadataWorkerPartial
        processes = [worker_cls(workQueue,
                                resultQueue,
                                log_lock_time,
                                self.src,
                                self.dest,
                                daemon_id=self.daemon_id,
                                max_entries=max_entries)
                     for i in xrange(num_workers)]
        for process in processes:
            process.daemon = True
            process.start()

        log.info('Starting partial sync')
        # enqueue the shards to be synced
        for i in xrange(num_shards):
            workQueue.put(i)

        # add a poison pill for each worker
        for i in xrange(num_workers):
            workQueue.put(None)

        # pull the results out as they are produced
        errors = []
        for i in xrange(num_shards):
            result, shard_num = resultQueue.get()
            if result == worker.RESULT_SUCCESS:
                log.debug('synced shard %d', shard_num)
            else:
                log.error('error syncing shard %d', shard_num)
                errors.append(shard_num)
            log.info('%d/%d shards processed', i + 1, num_shards)
        if errors:
            log.error('Encountered  errors syncing these %d shards: %s',
                      len(errors), errors)
Example #4
0
    def sync_full(self, num_workers, log_lock_time):
        try:
            sections = client.get_metadata_sections(self.src_conn)
        except client.HttpError as e:
            log.error('Error listing metadata sections: %s', e)
            raise

        # grab the lastest shard markers and timestamps before we sync
        shard_info = []
        num_shards = client.num_log_shards(self.src_conn, 'metadata')
        for shard_num in xrange(num_shards):
            info = client.get_log_info(self.src_conn, 'metadata', shard_num)
            # setting an empty marker returns an error
            if info['marker']:
                shard_info.append((shard_num, info['marker'],
                                   info['last_update']))

        meta_keys = []
        for section in sections:
            try:
                meta_keys += [(section, key) for key in
                              client.list_metadata_keys(self.src_conn, section)]
            except client.NotFound:
                # no keys of this type exist
                continue
            except client.HttpError as e:
                log.error('Error listing metadata for section %s: %s',
                          section, e)
                raise

        # create the work and results Queue
        workQueue = multiprocessing.Queue()
        resultQueue = multiprocessing.Queue()

        # create the worker processes
        if self._type == 'data':
            worker_cls = worker.DataWorkerFull
        else:
            worker_cls = worker.MetadataWorkerFull
        processes = [worker_cls(workQueue, resultQueue, log_lock_time, self.src,
                                self.dest) for i in xrange(num_workers)]
        for process in processes:
            process.daemon = True
            process.start()

        start_time = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
        log.info('Starting full sync at %s', start_time)

        # enqueue the shards to be synced
        for meta in meta_keys:
            workQueue.put(meta)

        # add a poison pill for each worker
        for i in xrange(num_workers):
            workQueue.put(None)

        # pull the results out as they are produced
        errors = []
        for i in xrange(len(meta_keys)):
            log.info('%d/%d items synced', i, len(meta_keys))
            result, section, name = resultQueue.get()
            if result != worker.RESULT_SUCCESS:
                log.error('error syncing %s %r', section, name)
                errors.append((section, name))
            else:
                log.debug('synced %s %r', section, name)
        for process in processes:
            process.join()
        if errors:
            log.error('Encountered  errors syncing these %d entries: %s',
                      len(errors), errors)
        else:
            for shard_num, marker, timestamp in shard_info:
                client.set_worker_bound(self.src_conn, 'metadata', shard_num,
                                        marker, timestamp, self.daemon_id)
                client.del_worker_bound(self.src_conn, 'metadata', shard_num,
                                        self.daemon_id)