Example #1
0
    def _get_and_process_entries(self, marker, shard_num):
        """
        sync up to self.max_entries entries, returning number of entries
        processed and the last marker of the entries processed.
        """
        log_entries = client.get_meta_log(self.source_conn, shard_num,
                                          marker, self.max_entries)

        log.info('shard %d has %d entries after %r', shard_num, len(log_entries),
                 marker)
        try:
            entries = [_meta_entry_from_json(entry) for entry in log_entries]
        except KeyError:
            log.error('log conting bad key is: %s', log_entries)
            raise

        mentioned = set([(entry.section, entry.name) for entry in entries])
        for section, name in mentioned:
            self.sync_meta(section, name)

        if entries:
            try:
                client.set_worker_bound(self.source_conn, 'metadata',
                                        shard_num, entries[-1].marker,
                                        entries[-1].timestamp,
                                        self.daemon_id)
                return len(entries), entries[-1].marker
            except:
                log.exception('error setting worker bound, may duplicate some work later')

        return 0, ''
Example #2
0
 def set_bound(self, key, marker, retries, type_=None):
     # api doesn't allow setting a bound with a blank marker
     if marker != " ":
         if type_ is None:
             type_ = self.type
         try:
             data = [obj_.to_dict(item, time=DEFAULT_TIME) for item in retries]
             client.set_worker_bound(self.dest_conn, type_, marker, DEFAULT_TIME, self.daemon_id, key, data=data)
             return RESULT_SUCCESS
         except Exception:
             log.warn(
                 'error setting worker bound for key "%s",' " may duplicate some work later. Traceback:",
                 key,
                 exc_info=True,
             )
             return RESULT_ERROR
Example #3
0
 def complete_item(self, shard_num, retries):
     """Called when syncing a single item completes successfully"""
     marker = self.shard_info.get(shard_num)
     if not marker:
         return
     try:
         data = [
             dict(name=retry, time=worker.DEFAULT_TIME) for retry in retries
         ]
         client.set_worker_bound(self.dest_conn, self.type, marker,
                                 worker.DEFAULT_TIME, self.daemon_id,
                                 shard_num, data)
     except Exception:
         log.warn(
             'could not set worker bounds, may repeat some work.'
             'Traceback:',
             exc_info=True)
Example #4
0
 def complete_item(self, shard_num, retries):
     """Called when syncing a single item completes successfully"""
     marker = self.shard_info.get(shard_num)
     if not marker:
         return
     try:
         data = [dict(name=retry, time=worker.DEFAULT_TIME)
                 for retry in retries]
         client.set_worker_bound(self.dest_conn,
                                 self.type,
                                 marker,
                                 worker.DEFAULT_TIME,
                                 self.daemon_id,
                                 shard_num,
                                 data)
     except Exception:
         log.warn('could not set worker bounds, may repeat some work.'
                  'Traceback:', exc_info=True)
Example #5
0
    def _get_and_process_entries(self, marker, shard_num):
        """
        sync up to self.max_entries entries, returning number of entries
        processed and the last marker of the entries processed.
        """
        log_entries = client.get_meta_log(self.source_conn, shard_num,
                                          marker, self.max_entries)

        log.info('shard %d has %d entries after %r', shard_num, len(log_entries),
                 marker)
        try:
            entries = [_meta_entry_from_json(entry) for entry in log_entries]
        except KeyError:
            log.error('log conting bad key is: %s', log_entries)
            raise

        error_encountered = False
        mentioned = set([(entry.section, entry.name) for entry in entries])
        for section, name in mentioned:
            sync_result = self.sync_meta(section, name)
            if sync_result == RESULT_ERROR:
                error_encountered = True

        # Only set worker bounds if there was data synced and no 
        # errors were encountered
        if entries and not error_encountered:
            try:
                client.set_worker_bound(self.dest_conn, 'metadata',
                                        shard_num, entries[-1].marker,
                                        entries[-1].timestamp,
                                        self.daemon_id)
                return len(entries), entries[-1].marker
            except:
                log.exception('error setting worker bound for shard {shard_num},'
                              ' may duplicate some work later'.format(shard_num=shard_num))
        elif entries and error_encountered:
            log.error('Error encountered while syncing shard {shard_num}.'
                      'Not setting worker bound, may duplicate some work later'.format(shard_num=shard_num))

        return 0, ''
Example #6
0
 def set_bound(self, key, marker, retries, type_=None):
     # api doesn't allow setting a bound with a blank marker
     if marker:
         if type_ is None:
             type_ = self.type
         try:
             data = [
                 obj_.to_dict(item, time=DEFAULT_TIME) for item in retries
             ]
             client.set_worker_bound(self.dest_conn,
                                     type_,
                                     marker,
                                     DEFAULT_TIME,
                                     self.daemon_id,
                                     key,
                                     data=data)
             return RESULT_SUCCESS
         except Exception:
             log.warn('error setting worker bound for key "%s",'
                      ' may duplicate some work later. Traceback:', key,
                      exc_info=True)
             return RESULT_ERROR
Example #7
0
    def sync_full(self, num_workers, log_lock_time):
        try:
            sections = client.get_metadata_sections(self.src_conn)
        except client.HttpError as e:
            log.error('Error listing metadata sections: %s', e)
            raise

        # grab the lastest shard markers and timestamps before we sync
        shard_info = []
        num_shards = client.num_log_shards(self.src_conn, 'metadata')
        for shard_num in xrange(num_shards):
            info = client.get_log_info(self.src_conn, 'metadata', shard_num)
            # setting an empty marker returns an error
            if info['marker']:
                shard_info.append((shard_num, info['marker'],
                                   info['last_update']))

        meta_keys = []
        for section in sections:
            try:
                meta_keys += [(section, key) for key in
                              client.list_metadata_keys(self.src_conn, section)]
            except client.NotFound:
                # no keys of this type exist
                continue
            except client.HttpError as e:
                log.error('Error listing metadata for section %s: %s',
                          section, e)
                raise

        # create the work and results Queue
        workQueue = multiprocessing.Queue()
        resultQueue = multiprocessing.Queue()

        # create the worker processes
        if self._type == 'data':
            worker_cls = worker.DataWorkerFull
        else:
            worker_cls = worker.MetadataWorkerFull
        processes = [worker_cls(workQueue, resultQueue, log_lock_time, self.src,
                                self.dest) for i in xrange(num_workers)]
        for process in processes:
            process.daemon = True
            process.start()

        start_time = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")
        log.info('Starting full sync at %s', start_time)

        # enqueue the shards to be synced
        for meta in meta_keys:
            workQueue.put(meta)

        # add a poison pill for each worker
        for i in xrange(num_workers):
            workQueue.put(None)

        # pull the results out as they are produced
        errors = []
        for i in xrange(len(meta_keys)):
            log.info('%d/%d items synced', i, len(meta_keys))
            result, section, name = resultQueue.get()
            if result != worker.RESULT_SUCCESS:
                log.error('error syncing %s %r', section, name)
                errors.append((section, name))
            else:
                log.debug('synced %s %r', section, name)
        for process in processes:
            process.join()
        if errors:
            log.error('Encountered  errors syncing these %d entries: %s',
                      len(errors), errors)
        else:
            for shard_num, marker, timestamp in shard_info:
                client.set_worker_bound(self.src_conn, 'metadata', shard_num,
                                        marker, timestamp, self.daemon_id)
                client.del_worker_bound(self.src_conn, 'metadata', shard_num,
                                        self.daemon_id)