def prepare(self): try: self.sections = client.get_metadata_sections(self.src_conn) except HttpError as e: log.error('Error listing metadata sections: %s', e) raise # grab the lastest shard markers and timestamps before we sync self.shard_info = {} self.init_num_shards() for shard_num in xrange(self.num_shards): info = client.get_log_info(self.src_conn, 'metadata', shard_num) # setting an empty marker returns an error if info['marker']: self.shard_info[shard_num] = info['marker'] else: self.shard_info[shard_num] = ' ' self.metadata_by_shard = {} for section in self.sections: try: for key in client.list_metadata_keys(self.src_conn, section): shard = self.shard_num_for_key(section + ':' + key) self.metadata_by_shard.setdefault(shard, []) self.metadata_by_shard[shard].append((section, key)) except NotFound: # no keys of this type exist continue except HttpError as e: log.error('Error listing metadata for section %s: %s', section, e) raise
def sync_full(self, num_workers, log_lock_time): try: sections = client.get_metadata_sections(self.src_conn) except client.HttpError as e: log.error('Error listing metadata sections: %s', e) raise # grab the lastest shard markers and timestamps before we sync shard_info = [] num_shards = client.num_log_shards(self.src_conn, 'metadata') for shard_num in xrange(num_shards): info = client.get_log_info(self.src_conn, 'metadata', shard_num) # setting an empty marker returns an error if info['marker']: shard_info.append((shard_num, info['marker'], info['last_update'])) meta_keys = [] for section in sections: try: meta_keys += [(section, key) for key in client.list_metadata_keys(self.src_conn, section)] except client.NotFound: # no keys of this type exist continue except client.HttpError as e: log.error('Error listing metadata for section %s: %s', section, e) raise # create the work and results Queue workQueue = multiprocessing.Queue() resultQueue = multiprocessing.Queue() # create the worker processes if self._type == 'data': worker_cls = worker.DataWorkerFull else: worker_cls = worker.MetadataWorkerFull processes = [worker_cls(workQueue, resultQueue, log_lock_time, self.src, self.dest) for i in xrange(num_workers)] for process in processes: process.daemon = True process.start() start_time = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ") log.info('Starting full sync at %s', start_time) # enqueue the shards to be synced for meta in meta_keys: workQueue.put(meta) # add a poison pill for each worker for i in xrange(num_workers): workQueue.put(None) # pull the results out as they are produced errors = [] for i in xrange(len(meta_keys)): log.info('%d/%d items synced', i, len(meta_keys)) result, section, name = resultQueue.get() if result != worker.RESULT_SUCCESS: log.error('error syncing %s %r', section, name) errors.append((section, name)) else: log.debug('synced %s %r', section, name) for process in processes: process.join() if errors: log.error('Encountered errors syncing these %d entries: %s', len(errors), errors) else: for shard_num, marker, timestamp in shard_info: client.set_worker_bound(self.src_conn, 'metadata', shard_num, marker, timestamp, self.daemon_id) client.del_worker_bound(self.src_conn, 'metadata', shard_num, self.daemon_id)
def test_get_metadata_sections(self): self.register() client.get_metadata_sections(self.connection) server_request = httpretty.last_request() assert server_request.path == '/admin/metadata'