Exemple #1
0
    def prepare(self):
        try:
            self.sections = client.get_metadata_sections(self.src_conn)
        except HttpError as e:
            log.error('Error listing metadata sections: %s', e)
            raise

        # grab the lastest shard markers and timestamps before we sync
        self.shard_info = {}
        self.init_num_shards()
        for shard_num in xrange(self.num_shards):
            info = client.get_log_info(self.src_conn, 'metadata', shard_num)
            # setting an empty marker returns an error
            if info['marker']:
                self.shard_info[shard_num] = info['marker']
            else:
                self.shard_info[shard_num] = ' '

        self.metadata_by_shard = {}
        for section in self.sections:
            try:
                for key in client.list_metadata_keys(self.src_conn, section):
                    shard = self.shard_num_for_key(section + ':' + key)
                    self.metadata_by_shard.setdefault(shard, [])
                    self.metadata_by_shard[shard].append((section, key))
            except NotFound:
                # no keys of this type exist
                continue
            except HttpError as e:
                log.error('Error listing metadata for section %s: %s', section,
                          e)
                raise
Exemple #2
0
    def prepare(self):
        log.info('preparing to do a full data sync')
        self.init_num_shards()

        # save data log markers for each shard
        self.shard_info = {}
        for shard in xrange(self.num_shards):
            info = client.get_log_info(self.src_conn, 'data', shard)
            # setting an empty marker returns an error
            if info['marker']:
                self.shard_info[shard] = info['marker']
            else:
                self.shard_info[shard] = ' '

        # get list of buckets after getting any markers to avoid skipping
        # entries added before we got the marker info
        log.debug('getting bucket list')
        buckets = client.get_bucket_list(self.src_conn)

        self.prepared_at = time.time()

        self.buckets_by_shard = {}
        for bucket in buckets:
            shard = self.shard_num_for_key(bucket)
            self.buckets_by_shard.setdefault(shard, [])
            self.buckets_by_shard[shard].append(bucket)
Exemple #3
0
    def full_sync_bucket(self, bucket):
        try:
            instance = self.get_bucket_instance(bucket)
            try:
                marker = client.get_log_info(self.src_conn, 'bucket-index',
                                             instance)['max_marker']
            except client.NotFound:
                marker = ''
            log.debug('bucket instance is "%s" with marker %s', instance, marker)
            # nothing to do for this bucket
            if not marker:
                return True

            objects = client.list_objects_in_bucket(self.src_conn, bucket)
            if not objects:
                return True
        except Exception as e:
            log.error('error preparing for full sync of bucket "%s": %s',
                      bucket, e)
            return False

        retries = self.sync_bucket(bucket, objects)

        result = self.set_bound(instance, marker, retries, 'bucket-index')
        return not retries and result == RESULT_SUCCESS
Exemple #4
0
    def prepare(self):
        try:
            self.sections = client.get_metadata_sections(self.src_conn)
        except HttpError as e:
            log.error('Error listing metadata sections: %s', e)
            raise

        # grab the lastest shard markers and timestamps before we sync
        self.shard_info = {}
        self.init_num_shards()
        for shard_num in xrange(self.num_shards):
            info = client.get_log_info(self.src_conn, 'metadata', shard_num)
            # setting an empty marker returns an error
            if info['marker']:
                self.shard_info[shard_num] = info['marker']
            else:
                self.shard_info[shard_num] = ' '

        self.metadata_by_shard = {}
        for section in self.sections:
            try:
                for key in client.list_metadata_keys(self.src_conn, section):
                    shard = self.shard_num_for_key(section + ':' + key)
                    self.metadata_by_shard.setdefault(shard, [])
                    self.metadata_by_shard[shard].append((section, key))
            except NotFound:
                # no keys of this type exist
                continue
            except HttpError as e:
                log.error('Error listing metadata for section %s: %s',
                          section, e)
                raise
Exemple #5
0
    def prepare(self):
        log.info('preparing to do a full data sync')
        self.init_num_shards()

        # save data log markers for each shard
        self.shard_info = {}
        for shard in xrange(self.num_shards):
            info = client.get_log_info(self.src_conn, 'data', shard)
            # setting an empty marker returns an error
            if info['marker']:
                self.shard_info[shard] = info['marker']
            else:
                self.shard_info[shard] = ' '

        # get list of buckets after getting any markers to avoid skipping
        # entries added before we got the marker info
        log.debug('getting bucket list')
        buckets = client.get_bucket_list(self.src_conn)

        self.prepared_at = time.time()

        self.buckets_by_shard = {}
        for bucket in buckets:
            shard = self.shard_num_for_key(bucket)
            self.buckets_by_shard.setdefault(shard, [])
            self.buckets_by_shard[shard].append(bucket)
Exemple #6
0
    def full_sync_bucket(self, bucket):
        try:
            instance = self.get_bucket_instance(bucket)
            try:
                marker = client.get_log_info(self.src_conn, "bucket-index", instance)["max_marker"]
            except NotFound:
                marker = " "
            log.debug('bucket instance is "%s" with marker %s', instance, marker)

            objects = client.list_objects_in_bucket(self.src_conn, bucket)
            retries = self.sync_bucket(bucket, objects)

            result = self.set_bound(instance, marker, retries, "bucket-index")
            return not retries and result == RESULT_SUCCESS
        except BucketEmpty:
            log.debug("no objects in bucket %s", bucket)
            return True
        except Exception:
            log.exception('error preparing for full sync of bucket "%s"', bucket)
            return False
Exemple #7
0
    def full_sync_bucket(self, bucket):
        try:
            instance = self.get_bucket_instance(bucket)
            try:
                marker = client.get_log_info(self.src_conn, 'bucket-index',
                                             instance)['max_marker']
            except NotFound:
                marker = ' '
            log.debug('bucket instance is "%s" with marker %s', instance, marker)

            objects = client.list_objects_in_bucket(self.src_conn, bucket)
            retries = self.sync_bucket(bucket, objects)

            result = self.set_bound(instance, marker, retries, 'bucket-index')
            return not retries and result == RESULT_SUCCESS
        except BucketEmpty:
            log.debug('no objects in bucket %s', bucket)
            return True
        except Exception:
            log.exception('error preparing for full sync of bucket "%s"',
                          bucket)
            return False
Exemple #8
0
    def sync_full(self, num_workers, log_lock_time):
        try:
            sections = client.get_metadata_sections(self.src_conn)
        except client.HttpError as e:
            log.error('Error listing metadata sections: %s', e)
            raise

        # grab the lastest shard markers and timestamps before we sync
        shard_info = []
        num_shards = client.num_log_shards(self.src_conn, 'metadata')
        for shard_num in xrange(num_shards):
            info = client.get_log_info(self.src_conn, 'metadata', shard_num)
            # setting an empty marker returns an error
            if info['marker']:
                shard_info.append((shard_num, info['marker'],
                                   info['last_update']))

        meta_keys = []
        for section in sections:
            try:
                meta_keys += [(section, key) for key in
                              client.list_metadata_keys(self.src_conn, section)]
            except client.NotFound:
                # no keys of this type exist
                continue
            except client.HttpError as e:
                log.error('Error listing metadata for section %s: %s',
                          section, e)
                raise

        # create the work and results Queue
        workQueue = multiprocessing.Queue()
        resultQueue = multiprocessing.Queue()

        # create the worker processes
        if self._type == 'data':
            worker_cls = worker.DataWorkerFull
        else:
            worker_cls = worker.MetadataWorkerFull
        processes = [worker_cls(workQueue, resultQueue, log_lock_time, self.src,
                                self.dest) for i in xrange(num_workers)]
        for process in processes:
            process.daemon = True
            process.start()

        start_time = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
        log.info('Starting full sync at %s', start_time)

        # enqueue the shards to be synced
        for meta in meta_keys:
            workQueue.put(meta)

        # add a poison pill for each worker
        for i in xrange(num_workers):
            workQueue.put(None)

        # pull the results out as they are produced
        errors = []
        for i in xrange(len(meta_keys)):
            log.info('%d/%d items synced', i, len(meta_keys))
            result, section, name = resultQueue.get()
            if result != worker.RESULT_SUCCESS:
                log.error('error syncing %s %r', section, name)
                errors.append((section, name))
            else:
                log.debug('synced %s %r', section, name)
        for process in processes:
            process.join()
        if errors:
            log.error('Encountered  errors syncing these %d entries: %s',
                      len(errors), errors)
        else:
            for shard_num, marker, timestamp in shard_info:
                client.set_worker_bound(self.src_conn, 'metadata', shard_num,
                                        marker, timestamp, self.daemon_id)
                client.del_worker_bound(self.src_conn, 'metadata', shard_num,
                                        self.daemon_id)