def _get_and_process_entries(self, marker, shard_num): """ sync up to self.max_entries entries, returning number of entries processed and the last marker of the entries processed. """ log_entries = client.get_meta_log(self.source_conn, shard_num, marker, self.max_entries) log.info('shard %d has %d entries after %r', shard_num, len(log_entries), marker) try: entries = [_meta_entry_from_json(entry) for entry in log_entries] except KeyError: log.error('log conting bad key is: %s', log_entries) raise mentioned = set([(entry.section, entry.name) for entry in entries]) for section, name in mentioned: self.sync_meta(section, name) if entries: try: client.set_worker_bound(self.source_conn, 'metadata', shard_num, entries[-1].marker, entries[-1].timestamp, self.daemon_id) return len(entries), entries[-1].marker except: log.exception('error setting worker bound, may duplicate some work later') return 0, ''
def _get_and_process_entries(self, marker, shard_num): """ sync up to self.max_entries entries, returning number of entries processed and the last marker of the entries processed. """ log_entries = client.get_meta_log(self.source_conn, shard_num, marker, self.max_entries) log.info('shard %d has %d entries after %r', shard_num, len(log_entries), marker) try: entries = [_meta_entry_from_json(entry) for entry in log_entries] except KeyError: log.error('log conting bad key is: %s', log_entries) raise error_encountered = False mentioned = set([(entry.section, entry.name) for entry in entries]) for section, name in mentioned: sync_result = self.sync_meta(section, name) if sync_result == RESULT_ERROR: error_encountered = True # Only set worker bounds if there was data synced and no # errors were encountered if entries and not error_encountered: try: client.set_worker_bound(self.dest_conn, 'metadata', shard_num, entries[-1].marker, entries[-1].timestamp, self.daemon_id) return len(entries), entries[-1].marker except: log.exception('error setting worker bound for shard {shard_num},' ' may duplicate some work later'.format(shard_num=shard_num)) elif entries and error_encountered: log.error('Error encountered while syncing shard {shard_num}.' 'Not setting worker bound, may duplicate some work later'.format(shard_num=shard_num)) return 0, ''
def run(self): while True: items = self.work_queue.get() if items is None: log.info('process %s is done. Exiting', self.ident) break shard_num, start_time, end_time = items log.debug('working on shard %s', shard_num) log.info('%s is processing shard number %d', self.ident, shard_num) # first, lock the log try: self.acquire_log_lock(self.local_lock_id, self.source_zone, shard_num) except client.NotFound: # no log means nothing changed in this time period self.result_queue.put((RESULT_SUCCESS, shard_num)) continue except client.HttpError as e: log.info('error locking shard %d log, assuming' ' it was processed by someone else and skipping: %s', shard_num, e) self.result_queue.put((RESULT_ERROR, shard_num)) continue try: log_entries = client.get_meta_log(self.source_conn, shard_num, start_time, end_time) except client.HttpError as e: log.error('metadata list failed: %s', e) # we hit an error getting the data to sync. # Bail and unlock the log try: self.release_log_lock(self.local_lock_id, self.source_zone, shard_num) except: log.exception('error unlocking log, continuing anyway ' 'since lock will timeout') self.result_queue.put((RESULT_ERROR, shard_num)) continue log.info('shard %d has %d entries', shard_num, len(log_entries)) try: entries = [MetadataEntry(entry) for entry in log_entries] except KeyError: log.exception('error reading metadata entry, skipping shard') log.error('log was: %s', log_entries) continue try: self.process_entries(entries) except Exception as e: log.exception('error processing log entries for shard %d', shard_num) self.release_log_lock(self.local_lock_id, self.source_zone, shard_num) self.result_queue.put((RESULT_ERROR, shard_num)) continue # finally, unlock the log self.release_log_lock(self.local_lock_id, \ self.source_zone, shard_num) self.result_queue.put((RESULT_SUCCESS, shard_num)) log.info('finished processing shard %d', shard_num)