コード例 #1
0
    def update_to(self, seq):
        kafka_seq = None
        if isinstance(seq, dict):
            assert self.sequence_format == 'json'
            kafka_seq = seq
            seq = kafka_seq_to_str(seq)
        elif isinstance(seq, int):
            seq = str(seq)

        pillow_logging.info(
            "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq)
        )
        with transaction.atomic():
            if kafka_seq:
                for topic_partition, offset in kafka_seq.items():
                    KafkaCheckpoint.objects.update_or_create(
                        checkpoint_id=self.checkpoint_id,
                        topic=topic_partition[0],
                        partition=topic_partition[1],
                        defaults={'offset': offset}
                    )
            checkpoint = self.get_or_create_wrapped(verify_unchanged=True)
            checkpoint.sequence = seq
            checkpoint.timestamp = datetime.utcnow()
            checkpoint.save()
        self._last_checkpoint = checkpoint
コード例 #2
0
ファイル: reindexer.py プロジェクト: solleks/commcare-hq
    def process_bulk_docs(self, docs, progress_logger):
        if len(docs) == 0:
            return True

        pillow_logging.info("Processing batch of %s docs", len(docs))

        changes = [
            self._doc_to_change(doc) for doc in docs
            if self.process_deletes or not is_deletion(doc.get('doc_type'))
        ]
        error_collector = ErrorCollector()

        bulk_changes = build_bulk_payload(self.index_info, changes,
                                          self.doc_transform, error_collector)

        for change, exception in error_collector.errors:
            pillow_logging.error("Error procesing doc %s: %s (%s)", change.id,
                                 type(exception), exception)

        es_interface = ElasticsearchInterface(self.es)
        try:
            es_interface.bulk_ops(bulk_changes)
        except (ESBulkIndexError, ES2BulkIndexError, ES7BulkIndexError) as e:
            pillow_logging.error("Bulk index errors\n%s", e.errors)
        except Exception:
            pillow_logging.exception("\tException sending payload to ES")
            return False

        return True
コード例 #3
0
    def process_bulk_docs(self, docs, progress_logger):
        if not docs:
            return True

        pillow_logging.info("Processing batch of %s docs", len(docs))
        changes = []
        for doc in docs:
            change = self._doc_to_change(doc)  # de-dupe the is_deletion check
            if self.process_deletes or not change.deleted:
                changes.append(change)
        error_collector = ErrorCollector()

        bulk_changes = build_bulk_payload(changes, self.doc_transform,
                                          error_collector)

        for change, exception in error_collector.errors:
            pillow_logging.error("Error processing doc %s: %s (%s)", change.id,
                                 type(exception), exception)

        es_interface = ElasticsearchInterface(self.es)
        try:
            es_interface.bulk_ops(self.index_info.alias, self.index_info.type,
                                  bulk_changes)
        except BulkIndexError as e:
            pillow_logging.error("Bulk index errors\n%s", e.errors)
        except Exception as exc:
            pillow_logging.exception(
                "Error sending bulk payload to Elasticsearch: %s", exc)
            return False

        return True
コード例 #4
0
    def process_bulk_docs(self, docs):
        if len(docs) == 0:
            return True

        pillow_logging.info("Processing batch of %s docs", len((docs)))

        changes = [self._doc_to_change(doc) for doc in docs]
        error_collector = ErrorCollector()

        bulk_changes = build_bulk_payload(self.index_info, changes, self.doc_transform, error_collector)

        for change, exception in error_collector.errors:
            pillow_logging.error("Error procesing doc %s: %s", change.id, exception)

        payloads = prepare_bulk_payloads(bulk_changes, MAX_PAYLOAD_SIZE)
        if len(payloads) > 1:
            pillow_logging.info("Payload split into %s parts" % len(payloads))

        for payload in payloads:
            success = self._send_payload_with_retries(payload)
            if not success:
                # stop the reindexer if we're unable to send a payload to ES
                return False

        return True
コード例 #5
0
ファイル: manager.py プロジェクト: dimagi/commcare-hq
    def update_to(self, seq, change=None):
        kafka_seq = None
        if isinstance(seq, dict):
            assert self.sequence_format == 'json'
            kafka_seq = seq
            seq = kafka_seq_to_str(seq)
        elif isinstance(seq, int):
            seq = str(seq)

        pillow_logging.info(
            "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq)
        )
        doc_modification_time = change.metadata.publish_timestamp if change else None
        with transaction.atomic():
            if kafka_seq:
                for topic_partition, offset in kafka_seq.items():
                    KafkaCheckpoint.objects.update_or_create(
                        checkpoint_id=self.checkpoint_id,
                        topic=topic_partition[0],
                        partition=topic_partition[1],
                        defaults={'offset': offset, 'doc_modification_time': doc_modification_time}
                    )
            checkpoint = self.get_or_create_wrapped(verify_unchanged=True)
            checkpoint.sequence = seq
            checkpoint.timestamp = datetime.utcnow()
            checkpoint.save()
        self._last_checkpoint = checkpoint
コード例 #6
0
 def run(self):
     """
     Main entry point for running pillows forever.
     """
     pillow_logging.info("Starting pillow %s" % self.__class__)
     self.process_changes(since=self.get_last_checkpoint_sequence(),
                          forever=True)
コード例 #7
0
    def _send_payload_with_retries(self, payload):
        pillow_logging.info("Sending payload to ES")

        retries = 0
        bulk_start = datetime.utcnow()
        success = False
        while retries < MAX_TRIES:
            if retries:
                retry_time = (datetime.utcnow() - bulk_start
                              ).seconds + retries * RETRY_TIME_DELAY_FACTOR
                pillow_logging.warning("\tRetrying in %s seconds" % retry_time)
                time.sleep(retry_time)
                pillow_logging.warning("\tRetrying now ...")
                # reset timestamp when looping again
                bulk_start = datetime.utcnow()

            try:
                self.es.bulk(payload)
                success = True
                break
            except Exception:
                retries += 1
                pillow_logging.exception("\tException sending payload to ES")

        return success
コード例 #8
0
    def process_bulk_docs(self, docs):
        if len(docs) == 0:
            return True

        pillow_logging.info("Processing batch of %s docs", len((docs)))

        changes = [self._doc_to_change(doc) for doc in docs]
        error_collector = ErrorCollector()

        bulk_changes = build_bulk_payload(self.index_info, changes, self.doc_transform, error_collector)

        for change, exception in error_collector.errors:
            pillow_logging.error("Error procesing doc %s: %s", change.id, exception)

        max_payload_size = pow(10, 8)  # ~ 100Mb
        payloads = prepare_bulk_payloads(bulk_changes, max_payload_size)
        if len(payloads) > 1:
            pillow_logging.info("Payload split into %s parts" % len(payloads))

        for payload in payloads:
            success = self._send_payload_with_retries(payload)
            if not success:
                # stop the reindexer if we're unable to send a payload to ES
                return False

        return True
コード例 #9
0
 def _update_modified_since(self, timestamp):
     """
     Find any data sources that have been modified since the last time this was bootstrapped
     and update the in-memory references.
     """
     for data_source in self.data_source_provider.get_data_sources_modified_since(timestamp):
         pillow_logging.info(f'updating modified registry data source: {data_source.domain}: {data_source._id}')
         self._add_or_update_data_source(data_source)
コード例 #10
0
 def update_to(self, seq):
     pillow_logging.info("(%s) setting checkpoint: %s" %
                         (self.checkpoint_id, seq))
     checkpoint = self.get_or_create_wrapped(verify_unchanged=True)
     checkpoint.sequence = seq
     checkpoint.timestamp = datetime.utcnow()
     checkpoint.save()
     self._last_checkpoint = checkpoint
コード例 #11
0
 def run(self):
     """
     Main entry point for running pillows forever.
     """
     pillow_logging.info("Starting pillow %s" % self.__class__)
     with configure_scope() as scope:
         scope.set_tag("pillow_name", self.get_name())
     self.process_changes(since=self.get_last_checkpoint_sequence(), forever=True)
コード例 #12
0
ファイル: manager.py プロジェクト: saketkanth/commcare-hq
 def update_to(self, seq):
     pillow_logging.info(
         "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq)
     )
     checkpoint = self.get_or_create_wrapped(verify_unchanged=True).document
     checkpoint.sequence = seq
     checkpoint.timestamp = datetime.utcnow()
     checkpoint.save()
     self._last_checkpoint = checkpoint
コード例 #13
0
    def reindex(self):
        for i, change in enumerate(self.change_provider.iter_all_changes(start_from=self.start_from)):
            try:
                self.pillow.process_change(change)
            except Exception:
                pillow_logging.exception("Unable to process change: %s", change.id)

            if i % 1000:
                pillow_logging.info("Processed %s docs", i)
コード例 #14
0
ファイル: es_utils.py プロジェクト: bazuzi/commcare-hq
def completely_initialize_pillow_index(pillow):
    """
    This utility can be used to initialize the elastic index and mapping for a pillow
    """
    index_exists = pillow_index_exists(pillow)
    if not index_exists:
        create_index_for_pillow(pillow)
    pillow_logging.info("Pillowtop [%s] Initializing mapping in ES" % pillow.get_name())
    initialize_mapping_if_necessary(pillow)
コード例 #15
0
    def reindex(self):
        for i, change in enumerate(self.change_provider.iter_all_changes(start_from=self.start_from)):
            try:
                self.pillow.process_change(change)
            except Exception:
                pillow_logging.exception("Unable to process change: %s", change.id)

            if i % 1000:
                pillow_logging.info("Processed %s docs", i)
コード例 #16
0
ファイル: manager.py プロジェクト: bazuzi/commcare-hq
 def update_to(self, seq):
     pillow_logging.info(
         "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq)
     )
     checkpoint = self.get_or_create(verify_unchanged=True).document
     checkpoint['seq'] = seq
     checkpoint['timestamp'] = get_formatted_current_timestamp()
     self._manager.update_checkpoint(self.checkpoint_id, checkpoint)
     self._last_checkpoint = checkpoint
コード例 #17
0
    def reindex(self):
        for i, change in enumerate(self.change_provider.iter_all_changes()):
            try:
                # below works because signature is same for pillow and processor
                self.pillow_or_processor.process_change(change)
            except Exception:
                pillow_logging.exception("Unable to process change: %s", change.id)

            if i % 1000:
                pillow_logging.info("Processed %s docs", i)
コード例 #18
0
ファイル: reindexer.py プロジェクト: dimagi/commcare-hq
    def reindex(self):
        for i, change in enumerate(self.change_provider.iter_all_changes()):
            try:
                # below works because signature is same for pillow and processor
                self.pillow_or_processor.process_change(change)
            except Exception:
                pillow_logging.exception("Unable to process change: %s", change.id)

            if i % 1000:
                pillow_logging.info("Processed %s docs", i)
コード例 #19
0
ファイル: es_utils.py プロジェクト: soitun/commcare-hq
def initialize_index(es, index_info):
    index = index_info.index
    mapping = index_info.mapping
    mapping['_meta']['created'] = datetime.isoformat(datetime.utcnow())
    meta = copy(index_info.meta)
    meta.update({'mappings': {index_info.type: mapping}})

    pillow_logging.info("Initializing elasticsearch index for [%s]" %
                        index_info.type)
    es.indices.create(index=index, body=meta)
    set_index_normal_settings(es, index)
コード例 #20
0
 def _add_data_sources_to_table_adapters(self, new_data_sources):
     for new_data_source in new_data_sources:
         pillow_logging.info(f'updating modified data source: {new_data_source.domain}: {new_data_source._id}')
         domain_adapters = self.table_adapters_by_domain[new_data_source.domain]
         # remove any previous adapters if they existed
         domain_adapters = [
             adapter for adapter in domain_adapters if adapter.config._id != new_data_source._id
         ]
         # add a new one
         domain_adapters.append(self._get_indicator_adapter(new_data_source))
         # update dictionary
         self.table_adapters_by_domain[new_data_source.domain] = domain_adapters
コード例 #21
0
ファイル: listener.py プロジェクト: dimagi/pillowtop
 def process_bulk(self, changes):
     if not changes:
         return
     self.allow_updates = False
     self.bulk = True
     bstart = datetime.utcnow()
     bulk_payload = '\n'.join(map(simplejson.dumps, self.bulk_builder(changes))) + "\n"
     pillow_logging.info(
         "%s,prepare_bulk,%s" % (self.get_name(), str(ms_from_timedelta(datetime.utcnow() - bstart) / 1000.0)))
     send_start = datetime.utcnow()
     self.send_bulk(bulk_payload)
     pillow_logging.info(
         "%s,send_bulk,%s" % (self.get_name(), str(ms_from_timedelta(datetime.utcnow() - send_start) / 1000.0)))
コード例 #22
0
    def update_checkpoint(self, change, context):
        if self.should_update_checkpoint(context):
            context.reset()
            self.checkpoint.update_to(self.get_new_seq(change))
            self.last_update = datetime.utcnow()
            if self.checkpoint_callback:
                self.checkpoint_callback.checkpoint_updated()
            return True
        elif (datetime.utcnow() - self.last_log).total_seconds() > 10:
            self.last_log = datetime.utcnow()
            pillow_logging.info("Heartbeat: %s", self.get_new_seq(change))

        return False
コード例 #23
0
ファイル: manager.py プロジェクト: alemat/commcare-hq
    def update_to(self, seq):
        if isinstance(seq, dict):
            seq = json.dumps(seq)
        elif isinstance(seq, int):
            seq = str(seq)

        pillow_logging.info(
            "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq)
        )
        checkpoint = self.get_or_create_wrapped(verify_unchanged=True)
        checkpoint.sequence = seq
        checkpoint.timestamp = datetime.utcnow()
        checkpoint.save()
        self._last_checkpoint = checkpoint
コード例 #24
0
ファイル: interface.py プロジェクト: solleks/commcare-hq
 def run(self):
     """
     Main entry point for running pillows forever.
     """
     pillow_logging.info("Starting pillow %s" % self.__class__)
     with configure_scope() as scope:
         scope.set_tag("pillow_name", self.get_name())
     if self.is_dedicated_migration_process:
         for processor in self.processors:
             processor.bootstrap_if_needed()
         time.sleep(10)
     else:
         self.process_changes(since=self.get_last_checkpoint_sequence(),
                              forever=True)
コード例 #25
0
ファイル: es_utils.py プロジェクト: solleks/commcare-hq
def initialize_index(es, index_info):
    index = index_info.index
    mapping = index_info.mapping
    mapping['_meta']['created'] = datetime.isoformat(datetime.utcnow())
    meta = copy(index_info.meta)
    if settings.ELASTICSEARCH_MAJOR_VERSION == 7:
        mapping = transform_for_es7(mapping)
        meta.update({'mappings': mapping})
    else:
        meta.update({'mappings': {index_info.type: mapping}})

    pillow_logging.info("Initializing elasticsearch index for [%s]" % index_info.type)
    es.indices.create(index=index, body=meta)
    set_index_normal_settings(es, index)
コード例 #26
0
 def update_to(self, seq):
     kafka_seq = seq
     seq = kafka_seq_to_str(seq)
     pillow_logging.info(
         "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq)
     )
     with transaction.atomic():
         if kafka_seq:
             for topic_partition, offset in kafka_seq.items():
                 KafkaCheckpoint.objects.update_or_create(
                     checkpoint_id=self.checkpoint_id,
                     topic=topic_partition[0],
                     partition=topic_partition[1],
                     defaults={'offset': offset}
                 )
コード例 #27
0
ファイル: manager.py プロジェクト: kkrampa/commcare-hq
    def update_to(self, seq, change=None):
        if isinstance(seq, six.string_types):
            kafka_seq = str_to_kafka_seq(seq)
        else:
            kafka_seq = seq
            seq = kafka_seq_to_str(seq)

        pillow_logging.info(
            "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq)
        )
        doc_modification_time = change.metadata.publish_timestamp if change else None

        with transaction.atomic():
            if kafka_seq:
                for topic_partition, offset in kafka_seq.items():
                    KafkaCheckpoint.objects.update_or_create(
                        checkpoint_id=self.checkpoint_id,
                        topic=topic_partition[0],
                        partition=topic_partition[1],
                        defaults={'offset': offset, 'doc_modification_time': doc_modification_time}
                    )
コード例 #28
0
    def update_to(self, seq, change=None):
        if isinstance(seq, str):
            kafka_seq = str_to_kafka_seq(seq)
        else:
            kafka_seq = seq
            seq = kafka_seq_to_str(seq)

        pillow_logging.info(
            "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq)
        )
        doc_modification_time = change.metadata.publish_timestamp if change else None

        with transaction.atomic():
            if kafka_seq:
                for topic_partition, offset in kafka_seq.items():
                    KafkaCheckpoint.objects.update_or_create(
                        checkpoint_id=self.checkpoint_id,
                        topic=topic_partition[0],
                        partition=topic_partition[1],
                        defaults={'offset': offset, 'doc_modification_time': doc_modification_time}
                    )
コード例 #29
0
ファイル: pillow.py プロジェクト: dankohn/commcare-hq
    def _rebuild_sql_tables(self, adapters):
        tables_by_engine = defaultdict(dict)
        all_adapters = []
        for adapter in adapters:
            if getattr(adapter, 'all_adapters', None):
                all_adapters.extend(adapter.all_adapters)
            else:
                all_adapters.append(adapter)
        for adapter in all_adapters:
            tables_by_engine[adapter.engine_id][
                adapter.get_table().name] = adapter

        _assert = soft_assert(notify_admins=True)
        _notify_rebuild = lambda msg, obj: _assert(False, msg, obj)

        for engine_id, table_map in tables_by_engine.items():
            table_names = list(table_map)
            engine = connection_manager.get_engine(engine_id)

            diffs = get_table_diffs(engine, table_names,
                                    get_metadata(engine_id))

            tables_to_act_on = get_tables_rebuild_migrate(diffs)
            for table_name in tables_to_act_on.rebuild:
                pillow_logging.info("[rebuild] Rebuilding table: %s",
                                    table_name)
                sql_adapter = table_map[table_name]
                table_diffs = [
                    diff for diff in diffs if diff.table_name == table_name
                ]
                if not sql_adapter.config.is_static:
                    try:
                        self.rebuild_table(sql_adapter, table_diffs)
                    except TableRebuildError as e:
                        _notify_rebuild(str(e), sql_adapter.config.to_json())
                else:
                    self.rebuild_table(sql_adapter, table_diffs)

            self.migrate_tables(engine, diffs, tables_to_act_on.migrate,
                                table_map)
コード例 #30
0
 def migrate_tables(self, engine, diffs, table_names, adapters_by_table):
     migration_diffs = [diff for diff in diffs if diff.table_name in table_names]
     for table in table_names:
         adapter = adapters_by_table[table]
         pillow_logging.info("[rebuild] Using config: %r", adapter.config)
         pillow_logging.info("[rebuild] sqlalchemy metadata: %r", get_metadata(adapter.engine_id).tables[table])
         pillow_logging.info("[rebuild] sqlalchemy table: %r", adapter.get_table())
     changes = migrate_tables(engine, migration_diffs)
     for table, diffs in changes.items():
         adapter = adapters_by_table[table]
         pillow_logging.info(
             "[rebuild] Migrating table: %s, from config %s at rev %s",
             table, adapter.config._id, adapter.config._rev
         )
         adapter.log_table_migrate(source='pillowtop', diffs=diffs)
コード例 #31
0
    def process_bulk_docs(self, docs):
        if len(docs) == 0:
            return True

        pillow_logging.info("Processing batch of %s docs", len((docs)))

        changes = [self._doc_to_change(doc) for doc in docs]
        error_collector = ErrorCollector()

        bulk_changes = build_bulk_payload(self.index_info, changes,
                                          self.doc_transform, error_collector)

        for change, exception in error_collector.errors:
            pillow_logging.error("Error procesing doc %s: %s (%s)", change.id,
                                 type(exception), exception)

        es_interface = ElasticsearchInterface(self.es)
        try:
            es_interface.bulk_ops(bulk_changes)
        except Exception:
            pillow_logging.exception("\tException sending payload to ES")
            return False

        return True
コード例 #32
0
    def _send_payload_with_retries(self, payload):
        pillow_logging.info("Sending payload to ES")

        retries = 0
        bulk_start = datetime.utcnow()
        success = False
        while retries < MAX_TRIES:
            if retries:
                retry_time = (datetime.utcnow() - bulk_start).seconds + retries * RETRY_TIME_DELAY_FACTOR
                pillow_logging.warning("\tRetrying in %s seconds" % retry_time)
                time.sleep(retry_time)
                pillow_logging.warning("\tRetrying now ...")
                # reset timestamp when looping again
                bulk_start = datetime.utcnow()

            try:
                self.es.bulk(payload)
                success = True
                break
            except Exception:
                retries += 1
                pillow_logging.exception("\tException sending payload to ES")

        return success
コード例 #33
0
    def run(self):
        """
        Main entry point for running pillows forever.
        """
        pillow_logging.info("Starting pillow %s" % self.__class__)
        with configure_scope() as scope:
            scope.set_tag("pillow_name", self.get_name())

        since = self.get_last_checkpoint_sequence()
        while True:
            pillow_logging.info(
                f"Processing from change feed starting at {since}")
            self.process_changes(since=since)
            since = self.get_last_checkpoint_sequence()
            pillow_logging.info(
                f"Change feed ended at {since}. Pausing until next message.")
            self.wait_for_change(since)
            pillow_logging.info("Next message arrived.")
コード例 #34
0
ファイル: es_utils.py プロジェクト: dimagi/commcare-hq
def initialize_mapping_if_necessary(es, index_info):
    """
    Initializes the elasticsearch mapping for this pillow if it is not found.
    """
    if not mapping_exists(es, index_info):
        pillow_logging.info("Initializing elasticsearch mapping for [%s]" % index_info.type)
        mapping = copy(index_info.mapping)
        mapping['_meta']['created'] = datetime.isoformat(datetime.utcnow())
        mapping_res = es.indices.put_mapping(index_info.type, {index_info.type: mapping}, index=index_info.index)
        if mapping_res.get('ok', False) and mapping_res.get('acknowledged', False):
            # API confirms OK, trust it.
            pillow_logging.info("Mapping set: [%s] %s" % (index_info.type, mapping_res))
    else:
        pillow_logging.info("Elasticsearch mapping for [%s] was already present." % index_info.type)
コード例 #35
0
ファイル: es_utils.py プロジェクト: lskdev/commcare-hq
def initialize_mapping_if_necessary(es, index_info):
    """
    Initializes the elasticsearch mapping for this pillow if it is not found.
    """
    if not mapping_exists(es, index_info):
        pillow_logging.info("Initializing elasticsearch mapping for [%s]" % index_info.type)
        mapping = copy(index_info.mapping)
        mapping['_meta']['created'] = datetime.isoformat(datetime.utcnow())
        mapping_res = es.indices.put_mapping(index_info.type, {index_info.type: mapping}, index=index_info.index)
        if mapping_res.get('ok', False) and mapping_res.get('acknowledged', False):
            # API confirms OK, trust it.
            pillow_logging.info("Mapping set: [%s] %s" % (index_info.type, mapping_res))
    else:
        pillow_logging.info("Elasticsearch mapping for [%s] was already present." % index_info.type)
コード例 #36
0
ファイル: es_utils.py プロジェクト: bazuzi/commcare-hq
def initialize_mapping_if_necessary(pillow):
    """
    Initializes the elasticsearch mapping for this pillow if it is not found.
    """
    es = pillow.get_es_new()
    if not pillow_mapping_exists(pillow):
        pillow_logging.info("Initializing elasticsearch mapping for [%s]" % pillow.es_type)
        mapping = copy(pillow.default_mapping)
        mapping['_meta']['created'] = datetime.isoformat(datetime.utcnow())
        mapping_res = es.indices.put_mapping(pillow.es_index, pillow.es_type, {pillow.es_type: mapping})
        if mapping_res.get('ok', False) and mapping_res.get('acknowledged', False):
            # API confirms OK, trust it.
            pillow_logging.info("Mapping set: [%s] %s" % (pillow.es_type, mapping_res))
    else:
        pillow_logging.info("Elasticsearch mapping for [%s] was already present." % pillow.es_type)
コード例 #37
0
ファイル: listener.py プロジェクト: philipkaare/commcare-hq
    def process_bulk(self, changes):
        if not changes:
            return
        self.allow_updates = False
        self.bulk = True
        bstart = datetime.utcnow()
        bulk_changes = self.bulk_builder(changes)

        max_payload_size = pow(10, 8)  # ~ 100Mb
        payloads = prepare_bulk_payloads(bulk_changes, max_payload_size)
        if len(payloads) > 1:
            pillow_logging.info("%s,payload split into %s parts" % (self.get_name(), len(payloads)))

        pillow_logging.info(
            "%s,prepare_bulk,%s" % (self.get_name(), str(ms_from_timedelta(datetime.utcnow() - bstart) / 1000.0)))
        send_start = datetime.utcnow()
        for payload in payloads:
            self.send_bulk(payload)
        pillow_logging.info(
            "%s,send_bulk,%s" % (self.get_name(), str(ms_from_timedelta(datetime.utcnow() - send_start) / 1000.0)))
コード例 #38
0
def _set_checkpoint(pillow):
    checkpoint_value = pillow.get_change_feed().get_checkpoint_value()
    pillow_logging.info('setting checkpoint to {}'.format(checkpoint_value))
    pillow.checkpoint.update_to(checkpoint_value)
コード例 #39
0
ファイル: interface.py プロジェクト: tlwakwella/commcare-hq
 def run(self):
     """
     Main entry point for running pillows forever.
     """
     pillow_logging.info("Starting pillow %s" % self.__class__)
     self.process_changes(since=self.get_last_checkpoint_sequence(), forever=True)
コード例 #40
0
ファイル: pillow.py プロジェクト: nnestle/commcare-hq
 def process_change(self, change, is_retry_attempt=False):
     # do nothing
     if self._changes_processed % KAFKA_CHECKPOINT_FREQUENCY == 0:
         # only log a small amount to avoid clogging up supervisor
         pillow_logging.info('Processed change {}: {}'.format(self._changes_processed, change))
     self._changes_processed += 1
コード例 #41
0
def _set_checkpoint(pillow):
    checkpoint_value = pillow.get_change_feed().get_checkpoint_value()
    pillow_logging.info('setting checkpoint to {}'.format(checkpoint_value))
    pillow.checkpoint.update_to(checkpoint_value)