Example #1
0
def handle_pillow_error(pillow, change, exception):
    from pillow_retry.models import PillowError
    error_id = None
    e = None

    # always retry document missing errors, because the error is likely with couch
    if pillow.retry_errors or isinstance(exception, DocumentMissingError):
        try:
            error = PillowError.get_or_create(change, pillow)
        except (DatabaseError, InterfaceError) as e:
            error_id = 'PillowError.get_or_create failed'
        else:
            error.add_attempt(exception, sys.exc_info()[2])
            error.save()
            error_id = error.id

    pillow_logging.exception(
        u"[%s] Error on change: %s, %s. Logged as: %s" % (
            pillow.get_name(),
            change['id'],
            exception,
            error_id
        )
    )

    if e:
        raise e
Example #2
0
    def _send_payload_with_retries(self, payload):
        pillow_logging.info("Sending payload to ES")

        retries = 0
        bulk_start = datetime.utcnow()
        success = False
        while retries < MAX_TRIES:
            if retries:
                retry_time = (datetime.utcnow() - bulk_start
                              ).seconds + retries * RETRY_TIME_DELAY_FACTOR
                pillow_logging.warning("\tRetrying in %s seconds" % retry_time)
                time.sleep(retry_time)
                pillow_logging.warning("\tRetrying now ...")
                # reset timestamp when looping again
                bulk_start = datetime.utcnow()

            try:
                self.es.bulk(payload)
                success = True
                break
            except Exception:
                retries += 1
                pillow_logging.exception("\tException sending payload to ES")

        return success
Example #3
0
    def process_changes_chunk(self, changes_chunk):
        with self._datadog_timing('bulk_extract'):
            bad_changes, docs = bulk_fetch_changes_docs(changes_chunk)

        with self._datadog_timing('bulk_transform'):
            changes_to_process = {
                change.id: change
                for change in changes_chunk
                if change.document and not self.doc_filter_fn(change.document)
            }
            retry_changes = list(bad_changes)

            error_collector = ErrorCollector()
            es_actions = build_bulk_payload(self.index_info,
                                            list(changes_to_process.values()),
                                            self.doc_transform_fn,
                                            error_collector)
            error_changes = error_collector.errors

        try:
            with self._datadog_timing('bulk_load'):
                _, errors = self.es_interface.bulk_ops(
                    es_actions, raise_on_error=False, raise_on_exception=False)
        except Exception as e:
            pillow_logging.exception("[%s] ES bulk load error")
            error_changes.extend([(change, e)
                                  for change in changes_to_process.values()])
        else:
            for change_id, error_msg in get_errors_with_ids(errors):
                error_changes.append((changes_to_process[change_id],
                                      BulkDocException(error_msg)))
        return retry_changes, error_changes
Example #4
0
    def process_bulk_docs(self, docs, progress_logger):
        if len(docs) == 0:
            return True

        pillow_logging.info("Processing batch of %s docs", len(docs))

        changes = [
            self._doc_to_change(doc) for doc in docs
            if self.process_deletes or not is_deletion(doc.get('doc_type'))
        ]
        error_collector = ErrorCollector()

        bulk_changes = build_bulk_payload(self.index_info, changes,
                                          self.doc_transform, error_collector)

        for change, exception in error_collector.errors:
            pillow_logging.error("Error procesing doc %s: %s (%s)", change.id,
                                 type(exception), exception)

        es_interface = ElasticsearchInterface(self.es)
        try:
            es_interface.bulk_ops(bulk_changes)
        except (ESBulkIndexError, ES2BulkIndexError, ES7BulkIndexError) as e:
            pillow_logging.error("Bulk index errors\n%s", e.errors)
        except Exception:
            pillow_logging.exception("\tException sending payload to ES")
            return False

        return True
Example #5
0
    def process_bulk_docs(self, docs, progress_logger):
        if not docs:
            return True

        pillow_logging.info("Processing batch of %s docs", len(docs))
        changes = []
        for doc in docs:
            change = self._doc_to_change(doc)  # de-dupe the is_deletion check
            if self.process_deletes or not change.deleted:
                changes.append(change)
        error_collector = ErrorCollector()

        bulk_changes = build_bulk_payload(changes, self.doc_transform,
                                          error_collector)

        for change, exception in error_collector.errors:
            pillow_logging.error("Error processing doc %s: %s (%s)", change.id,
                                 type(exception), exception)

        es_interface = ElasticsearchInterface(self.es)
        try:
            es_interface.bulk_ops(self.index_info.alias, self.index_info.type,
                                  bulk_changes)
        except BulkIndexError as e:
            pillow_logging.error("Bulk index errors\n%s", e.errors)
        except Exception as exc:
            pillow_logging.exception(
                "Error sending bulk payload to Elasticsearch: %s", exc)
            return False

        return True
Example #6
0
 def reindex(self):
     for change in self.change_provider.iter_all_changes(
             start_from=self.start_from):
         try:
             self.pillow.process_change(change)
         except Exception:
             pillow_logging.exception("Unable to process change: %s",
                                      change.id)
Example #7
0
    def reindex(self):
        for i, change in enumerate(self.change_provider.iter_all_changes(start_from=self.start_from)):
            try:
                self.pillow.process_change(change)
            except Exception:
                pillow_logging.exception("Unable to process change: %s", change.id)

            if i % 1000:
                pillow_logging.info("Processed %s docs", i)
Example #8
0
    def reindex(self):
        for i, change in enumerate(self.change_provider.iter_all_changes(start_from=self.start_from)):
            try:
                self.pillow.process_change(change)
            except Exception:
                pillow_logging.exception("Unable to process change: %s", change.id)

            if i % 1000:
                pillow_logging.info("Processed %s docs", i)
Example #9
0
    def reindex(self):
        for i, change in enumerate(self.change_provider.iter_all_changes()):
            try:
                # below works because signature is same for pillow and processor
                self.pillow_or_processor.process_change(change)
            except Exception:
                pillow_logging.exception("Unable to process change: %s", change.id)

            if i % 1000:
                pillow_logging.info("Processed %s docs", i)
Example #10
0
    def reindex(self):
        for i, change in enumerate(self.change_provider.iter_all_changes()):
            try:
                # below works because signature is same for pillow and processor
                self.pillow_or_processor.process_change(change)
            except Exception:
                pillow_logging.exception("Unable to process change: %s", change.id)

            if i % 1000:
                pillow_logging.info("Processed %s docs", i)
Example #11
0
def _exclude_missing_domains(configs):
    from corehq.apps.es import DomainES
    from corehq.elastic import ESError

    config_domains = {conf.domain for conf in configs}
    try:
        domains_present = set(DomainES().in_domains(config_domains).values_list('name', flat=True))
    except ESError:
        pillow_logging.exception("Unable to filter configs by domain")
        return configs

    return [config for config in configs if config.domain in domains_present]
Example #12
0
def handle_pillow_error(pillow, change, exception):
    from pillow_retry.models import PillowError
    error_id = None
    if pillow.retry_errors:
        error = PillowError.get_or_create(change, pillow)
        error.add_attempt(exception, sys.exc_info()[2])
        error.save()
        error_id = error.id

    pillow_logging.exception(
        "[%s] Error on change: %s, %s. Logged as: %s" %
        (pillow.get_name(), change['id'], exception, error_id))
Example #13
0
def save_document(indicator_config_ids, doc, from_pillow_id):
    error = PillowError.objects.get(doc_id=doc['_id'], pillow=from_pillow_id)
    try:
        for config_id in indicator_config_ids:
            config = _get_config_by_id(config_id)
            adapter = get_indicator_adapter(config, can_handle_laboratory=True)
            adapter.best_effort_save(doc)
    except Exception as exception:
        error.add_attempt(exception, sys.exc_info()[2])
        error.save()
        error_id = error.id
        pillow_logging.exception(
            "[%s] Error on change: %s, %s. Logged as: %s" %
            (from_pillow_id, doc['_id'], exception, error_id))
    else:
        error.delete()
Example #14
0
def handle_pillow_error(pillow, change, exception):
    from couchdbkit import ResourceNotFound
    from pillow_retry.models import PillowError
    meta = None
    if hasattr(pillow, 'get_couch_db'):
        try:
            meta = pillow.get_couch_db().show('domain_shows/domain_date',
                                              change['id'])
        except ResourceNotFound:
            pass

    error = PillowError.get_or_create(change, pillow, change_meta=meta)
    error.add_attempt(exception, sys.exc_info()[2])
    error.save()
    pillow_logging.exception(
        "[%s] Error on change: %s, %s. Logged as: %s" %
        (pillow.get_name(), change['id'], exception, error.id))
Example #15
0
def handle_pillow_error(pillow, change, exception):
    from pillow_retry.models import PillowError
    error_id = None
    if pillow.retry_errors:
        error = PillowError.get_or_create(change, pillow)
        error.add_attempt(exception, sys.exc_info()[2])
        error.save()
        error_id = error.id

    pillow_logging.exception(
        "[%s] Error on change: %s, %s. Logged as: %s" % (
            pillow.get_name(),
            change['id'],
            exception,
            error_id
        )
    )
Example #16
0
 def _handle_pillow_error(self, change, exception):
     try:
         # This breaks the module boundary by using a show function defined in commcare-hq
         # but it was decided that it wasn't worth the effort to maintain the separation.
         meta = self.get_couch_db().show('domain/domain_date', change['id'])
     except ResourceNotFound:
         # Show function does not exist
         meta = None
     error = PillowError.get_or_create(change, self, change_meta=meta)
     error.add_attempt(exception, sys.exc_info()[2])
     error.save()
     pillow_logging.exception(
         "[%s] Error on change: %s, %s. Logged as: %s" % (
             self.get_name(),
             change['id'],
             exception,
             error.id
         )
     )
Example #17
0
def handle_pillow_error(pillow, change, exception):
    from pillow_retry.models import PillowError

    pillow_logging.exception("[%s] Error on change: %s, %s" % (
        pillow.get_name(),
        change['id'],
        exception,
    ))

    datadog_counter('commcare.change_feed.changes.exceptions', tags=[
        'pillow_name:{}'.format(pillow.get_name()),
    ])

    # keep track of error attempt count
    change.increment_attempt_count()

    # always retry document missing errors, because the error is likely with couch
    if pillow.retry_errors or isinstance(exception, DocumentMissingError):
        error = PillowError.get_or_create(change, pillow)
        error.add_attempt(exception, sys.exc_info()[2], change.metadata)
        error.save()
def handle_pillow_error(pillow, change, exception):
    from pillow_retry.models import PillowError

    pillow_logging.exception("[%s] Error on change: %s, %s" % (
        pillow.get_name(),
        change['id'],
        exception,
    ))

    datadog_counter('commcare.change_feed.changes.exceptions', tags=[
        'pillow_name:{}'.format(pillow.get_name()),
    ])

    # keep track of error attempt count
    change.increment_attempt_count()

    # always retry document missing errors, because the error is likely with couch
    if pillow.retry_errors or isinstance(exception, DocumentMissingError):
        error = PillowError.get_or_create(change, pillow)
        error.add_attempt(exception, sys.exc_info()[2], change.metadata)
        error.save()
Example #19
0
def handle_pillow_error(pillow, change, exception):
    from couchdbkit import ResourceNotFound
    from pillow_retry.models import PillowError
    meta = None
    if hasattr(pillow, 'get_couch_db'):
        try:
            meta = pillow.get_couch_db().show('domain_shows/domain_date', change['id'])
        except ResourceNotFound:
            pass

    error = PillowError.get_or_create(change, pillow, change_meta=meta)
    error.add_attempt(exception, sys.exc_info()[2])
    error.save()
    pillow_logging.exception(
        "[%s] Error on change: %s, %s. Logged as: %s" % (
            pillow.get_name(),
            change['id'],
            exception,
            error.id
        )
    )
Example #20
0
    def process_bulk_docs(self, docs):
        if len(docs) == 0:
            return True

        pillow_logging.info("Processing batch of %s docs", len((docs)))

        changes = [self._doc_to_change(doc) for doc in docs]
        error_collector = ErrorCollector()

        bulk_changes = build_bulk_payload(self.index_info, changes,
                                          self.doc_transform, error_collector)

        for change, exception in error_collector.errors:
            pillow_logging.error("Error procesing doc %s: %s (%s)", change.id,
                                 type(exception), exception)

        es_interface = ElasticsearchInterface(self.es)
        try:
            es_interface.bulk_ops(bulk_changes)
        except Exception:
            pillow_logging.exception("\tException sending payload to ES")
            return False

        return True
Example #21
0
def send_to_elasticsearch(index_info, doc_type, doc_id, es_getter, name, data=None,
                          delete=False, es_merge_update=False):
    """
    More fault tolerant es.put method
    kwargs:
        es_merge_update: Set this to True to use Elasticsearch.update instead of Elasticsearch.index
            which merges existing ES doc and current update. If this is set to False, the doc will be replaced

    """
    alias = index_info.alias
    data = data if data is not None else {}
    current_tries = 0
    es_interface = _get_es_interface(es_getter)
    retries = _retries()
    propagate_failure = _propagate_failure()
    while current_tries < retries:
        try:
            if delete:
                es_interface.delete_doc(alias, doc_type, doc_id)
            else:
                if es_merge_update:
                    # The `retry_on_conflict` param is only valid on `update`
                    # requests. ES <5.x was lenient of its presence on `index`
                    # requests, ES >=5.x is not.
                    params = {'retry_on_conflict': 2}
                    es_interface.update_doc_fields(alias, doc_type, doc_id,
                                                   fields=data, params=params)
                else:
                    # use the same index API to create or update doc
                    es_interface.index_doc(alias, doc_type, doc_id, doc=data)
            break
        except ConnectionError:
            current_tries += 1
            if current_tries == retries:
                message = "[%s] Max retry error on %s/%s/%s"
                args = (name, alias, doc_type, doc_id)
                if propagate_failure:
                    raise PillowtopIndexingError(message % args)
                else:
                    pillow_logging.exception(message, *args)
            else:
                pillow_logging.exception("[%s] put_robust error attempt %s/%s", name, current_tries, retries)

            _sleep_between_retries(current_tries)
        except RequestError:
            message = "[%s] put_robust error: %s/%s/%s"
            args = (name, alias, doc_type, doc_id)
            if propagate_failure:
                raise PillowtopIndexingError(message % args)
            else:
                pillow_logging.exception(message, *args)
            break
        except ConflictError:
            break  # ignore the error if a doc already exists when trying to create it in the index
        except NotFoundError:
            break
Example #22
0
    def _send_payload_with_retries(self, payload):
        pillow_logging.info("Sending payload to ES")

        retries = 0
        bulk_start = datetime.utcnow()
        success = False
        while retries < MAX_TRIES:
            if retries:
                retry_time = (datetime.utcnow() - bulk_start).seconds + retries * RETRY_TIME_DELAY_FACTOR
                pillow_logging.warning("\tRetrying in %s seconds" % retry_time)
                time.sleep(retry_time)
                pillow_logging.warning("\tRetrying now ...")
                # reset timestamp when looping again
                bulk_start = datetime.utcnow()

            try:
                self.es.bulk(payload)
                success = True
                break
            except Exception:
                retries += 1
                pillow_logging.exception("\tException sending payload to ES")

        return success