コード例 #1
0
ファイル: elastic.py プロジェクト: xbryanc/commcare-hq
    def process_change(self, pillow_instance, change):
        if change.deleted and change.id:
            self._delete_doc_if_exists(change.id)
            return

        doc = change.get_document()

        ensure_document_exists(change)
        ensure_matched_revisions(change)

        if doc is None or (self.doc_filter_fn and self.doc_filter_fn(doc)):
            return

        # prepare doc for es
        doc_ready_to_save = self.doc_transform_fn(doc)
        # send it across
        send_to_elasticsearch(
            index=self.index_info.index,
            doc_type=self.index_info.type,
            doc_id=change.id,
            es_getter=self.es_getter,
            name=pillow_instance.get_name(),
            data=doc_ready_to_save,
            update=self._doc_exists(change.id),
        )
コード例 #2
0
    def process_change(self, pillow_instance, change):
        if change.deleted and change.id:
            self._delete_doc_if_exists(change.id)
            return

        doc = change.get_document()

        ensure_document_exists(change)
        ensure_matched_revisions(change)

        if self.doc_filter_fn and self.doc_filter_fn(doc):
            return

        # prepare doc for es
        doc_ready_to_save = self.doc_transform_fn(doc)
        # send it across
        send_to_elasticsearch(
            index=self.index_info.index,
            doc_type=self.index_info.type,
            doc_id=change.id,
            es_getter=self.es_getter,
            name=pillow_instance.get_name(),
            data=doc_ready_to_save,
            update=self._doc_exists(change.id),
        )
コード例 #3
0
ファイル: pillow.py プロジェクト: developer11092/commcare-hq
    def get_docs_for_changes(changes, domain):
        # break up by doctype
        changes_by_doctype = defaultdict(list)
        for change in changes:
            assert change.metadata.domain == domain
            changes_by_doctype[change.metadata.data_source_name].append(change)

        # query
        docs = []
        for _, _changes in six.iteritems(changes_by_doctype):
            doc_store = _changes[0].document_store
            doc_ids_to_query = [change.id for change in _changes if change.should_fetch_document()]
            new_docs = list(doc_store.iter_documents(doc_ids_to_query))
            docs_queried_prior = [change.document for change in _changes if not change.should_fetch_document()]
            docs.extend(new_docs + docs_queried_prior)

        # catch missing docs
        retry_changes = set()
        docs_by_id = {doc['_id']: doc for doc in docs}
        for change in changes:
            if change.id not in docs_by_id:
                # we need to capture DocumentMissingError which is not possible in bulk
                #   so let pillow fall back to serial mode to capture the error for missing docs
                retry_changes.add(change)
                continue
            else:
                # set this, so that subsequent doc lookups are avoided
                change.set_document(docs_by_id[change.id])
            try:
                ensure_matched_revisions(change, docs_by_id.get(change.id))
            except DocumentMismatchError:
                retry_changes.add(change)
        return retry_changes, docs
コード例 #4
0
    def process_change(self, pillow_instance, change):
        self.bootstrap_if_needed()
        if change.deleted:
            # we don't currently support hard-deletions at all.
            # we may want to change this at some later date but seem ok for now.
            # see https://github.com/dimagi/commcare-hq/pull/6944 for rationale
            return

        domain = change.metadata.domain
        if not domain:
            # if no domain we won't save to any UCR table
            return

        async_tables = []
        doc = change.get_document()
        ensure_document_exists(change)
        ensure_matched_revisions(change)

        if doc is None:
            return

        eval_context = EvaluationContext(doc)
        for table in self.table_adapters_by_domain[domain]:
            if table.config.filter(doc):
                if table.run_asynchronous:
                    async_tables.append(table.config._id)
                else:
                    self._save_doc_to_table(table, doc, eval_context)
                    eval_context.reset_iteration()
            elif table.config.deleted_filter(doc):
                table.delete(doc)

        if async_tables:
            AsyncIndicator.update_indicators(change, async_tables)
コード例 #5
0
    def process_change(self, change):
        if change.deleted and change.id:
            self._delete_doc_if_exists(change.id)
            return

        with self._datadog_timing('extract'):
            doc = change.get_document()

            ensure_document_exists(change)
            ensure_matched_revisions(change, doc)

        with self._datadog_timing('transform'):
            if doc is None or (self.doc_filter_fn and self.doc_filter_fn(doc)):
                return

            if doc.get('doc_type') is not None and doc['doc_type'].endswith(
                    "-Deleted"):
                self._delete_doc_if_exists(change.id)
                return

            # prepare doc for es
            doc_ready_to_save = self.doc_transform_fn(doc)

        # send it across
        with self._datadog_timing('load'):
            send_to_elasticsearch(
                index=self.index_info.index,
                doc_type=self.index_info.type,
                doc_id=change.id,
                es_getter=self.es_getter,
                name='ElasticProcessor',
                data=doc_ready_to_save,
                update=self._doc_exists(change.id),
            )
コード例 #6
0
ファイル: pillow.py プロジェクト: kkrampa/commcare-hq
    def get_docs_for_changes(changes, domain):
        # break up by doctype
        changes_by_doctype = defaultdict(list)
        for change in changes:
            assert change.metadata.domain == domain
            changes_by_doctype[change.metadata.data_source_name].append(change)

        # query
        docs = []
        for _, _changes in six.iteritems(changes_by_doctype):
            doc_store = _changes[0].document_store
            doc_ids_to_query = [change.id for change in _changes if change.should_fetch_document()]
            new_docs = list(doc_store.iter_documents(doc_ids_to_query))
            docs_queried_prior = [change.document for change in _changes if not change.should_fetch_document()]
            docs.extend(new_docs + docs_queried_prior)

        # catch missing docs
        retry_changes = set()
        docs_by_id = {doc['_id']: doc for doc in docs}
        for change in changes:
            if change.id not in docs_by_id:
                # we need to capture DocumentMissingError which is not possible in bulk
                #   so let pillow fall back to serial mode to capture the error for missing docs
                retry_changes.add(change)
                continue
            else:
                # set this, so that subsequent doc lookups are avoided
                change.set_document(docs_by_id[change.id])
            try:
                ensure_matched_revisions(change, docs_by_id.get(change.id))
            except DocumentMismatchError:
                retry_changes.add(change)
        return retry_changes, docs
コード例 #7
0
    def process_change(self, change):
        self.bootstrap_if_needed()

        domain = change.metadata.domain
        if not domain or domain not in self.table_adapters_by_domain:
            # if no domain we won't save to any UCR table
            return

        if change.deleted:
            adapters = list(self.table_adapters_by_domain[domain])
            for table in adapters:
                table.delete({'_id': change.metadata.document_id})

        async_tables = []
        doc = change.get_document()
        ensure_document_exists(change)
        ensure_matched_revisions(change, doc)

        if doc is None:
            return

        with TimingContext() as timer:
            eval_context = EvaluationContext(doc)
            # make copy to avoid modifying list during iteration
            adapters = list(self.table_adapters_by_domain[domain])
            doc_subtype = change.metadata.document_subtype
            for table in adapters:
                if table.config.filter(doc, eval_context):
                    if table.run_asynchronous:
                        async_tables.append(table.config._id)
                    else:
                        self._save_doc_to_table(domain, table, doc, eval_context)
                        eval_context.reset_iteration()
                elif (doc_subtype is None
                        or doc_subtype in table.config.get_case_type_or_xmlns_filter()):
                    table.delete(doc)

            if async_tables:
                AsyncIndicator.update_from_kafka_change(change, async_tables)

        self.domain_timing_context.update(**{
            domain: timer.duration
        })
コード例 #8
0
ファイル: elastic.py プロジェクト: soitun/commcare-hq
    def process_change(self, change):
        from corehq.apps.change_feed.document_types import get_doc_meta_object_from_document

        if self.change_filter_fn and self.change_filter_fn(change):
            return

        if change.deleted and change.id:
            doc = change.get_document()
            if doc and doc.get('doc_type'):
                current_meta = get_doc_meta_object_from_document(doc)
                if current_meta.is_deletion:
                    self._delete_doc_if_exists(change.id)
            else:
                self._delete_doc_if_exists(change.id)
            return

        with self._datadog_timing('extract'):
            doc = change.get_document()

            ensure_document_exists(change)
            ensure_matched_revisions(change, doc)

        with self._datadog_timing('transform'):
            if doc is None or (self.doc_filter_fn and self.doc_filter_fn(doc)):
                return

            if doc.get('doc_type') is not None and doc['doc_type'].endswith("-Deleted"):
                self._delete_doc_if_exists(change.id)
                return

            # prepare doc for es
            doc_ready_to_save = self.doc_transform_fn(doc)

        # send it across
        with self._datadog_timing('load'):
            send_to_elasticsearch(
                index_info=self.index_info,
                doc_type=self.index_info.type,
                doc_id=change.id,
                es_getter=self.es_getter,
                name='ElasticProcessor',
                data=doc_ready_to_save,
            )
コード例 #9
0
ファイル: pillow.py プロジェクト: kkrampa/commcare-hq
    def process_change(self, change):
        self.bootstrap_if_needed()

        domain = change.metadata.domain
        if not domain or domain not in self.table_adapters_by_domain:
            # if no domain we won't save to any UCR table
            return

        if change.deleted:
            adapters = list(self.table_adapters_by_domain[domain])
            for table in adapters:
                table.delete({'_id': change.metadata.document_id})

        async_tables = []
        doc = change.get_document()
        ensure_document_exists(change)
        ensure_matched_revisions(change, doc)

        if doc is None:
            return

        with TimingContext() as timer:
            eval_context = EvaluationContext(doc)
            # make copy to avoid modifying list during iteration
            adapters = list(self.table_adapters_by_domain[domain])
            for table in adapters:
                if table.config.filter(doc):
                    if table.run_asynchronous:
                        async_tables.append(table.config._id)
                    else:
                        self._save_doc_to_table(domain, table, doc, eval_context)
                        eval_context.reset_iteration()
                elif table.config.deleted_filter(doc) or table.doc_exists(doc):
                    table.delete(doc)

            if async_tables:
                AsyncIndicator.update_from_kafka_change(change, async_tables)

        self.domain_timing_context.update(**{
            domain: timer.duration
        })
コード例 #10
0
    def process_change(self, pillow_instance, change):
        self.bootstrap_if_needed()
        if change.deleted:
            # we don't currently support hard-deletions at all.
            # we may want to change this at some later date but seem ok for now.
            # see https://github.com/dimagi/commcare-hq/pull/6944 for rationale
            return

        domain = change.metadata.domain
        if not domain:
            # if no domain we won't save to any UCR table
            return

        for table in self.table_adapters_by_domain[domain]:
            doc = change.get_document()
            ensure_document_exists(change)
            ensure_matched_revisions(change)
            if table.config.filter(doc):
                # best effort will swallow errors in the table
                table.best_effort_save(doc)
            elif table.config.deleted_filter(doc):
                table.delete(doc)
コード例 #11
0
ファイル: pillow.py プロジェクト: alemat/commcare-hq
    def process_change(self, pillow_instance, change):
        self.bootstrap_if_needed()
        if change.deleted:
            # we don't currently support hard-deletions at all.
            # we may want to change this at some later date but seem ok for now.
            # see https://github.com/dimagi/commcare-hq/pull/6944 for rationale
            return

        domain = change.metadata.domain
        if not domain:
            # if no domain we won't save to any UCR table
            return

        async_tables = []
        doc = change.get_document()
        ensure_document_exists(change)
        ensure_matched_revisions(change)

        if doc is None:
            return

        for table in self.table_adapters_by_domain[domain]:
            if table.config.filter(doc):
                if table.run_asynchronous:
                    async_tables.append(table.config._id)
                else:
                    self._save_doc_to_table(table, doc)
            elif table.config.deleted_filter(doc):
                table.delete(doc)

        if async_tables:
            future_time = datetime.utcnow() + timedelta(days=1)
            error = PillowError.get_or_create(change, pillow_instance)
            error.date_next_attempt = future_time
            error.save()
            save_document.delay(async_tables, doc, pillow_instance.pillow_id)
コード例 #12
0
    def process_change(self, pillow_instance, change):
        self.bootstrap_if_needed()
        if change.deleted:
            # we don't currently support hard-deletions at all.
            # we may want to change this at some later date but seem ok for now.
            # see https://github.com/dimagi/commcare-hq/pull/6944 for rationale
            return

        domain = change.metadata.domain
        if not domain:
            # if no domain we won't save to any UCR table
            return

        for table in self.table_adapters:
            if table.config.domain == domain:
                # only bother getting the document if we have a domain match from the metadata
                doc = change.get_document()
                ensure_document_exists(change)
                ensure_matched_revisions(change)
                if table.config.filter(doc):
                    # best effort will swallow errors in the table
                    table.best_effort_save(doc)
                elif table.config.deleted_filter(doc):
                    table.delete(doc)