def process_change(self, pillow_instance, change): if change.deleted and change.id: self._delete_doc_if_exists(change.id) return doc = change.get_document() ensure_document_exists(change) ensure_matched_revisions(change) if doc is None or (self.doc_filter_fn and self.doc_filter_fn(doc)): return # prepare doc for es doc_ready_to_save = self.doc_transform_fn(doc) # send it across send_to_elasticsearch( index=self.index_info.index, doc_type=self.index_info.type, doc_id=change.id, es_getter=self.es_getter, name=pillow_instance.get_name(), data=doc_ready_to_save, update=self._doc_exists(change.id), )
def process_change(self, pillow_instance, change): if change.deleted and change.id: self._delete_doc_if_exists(change.id) return doc = change.get_document() ensure_document_exists(change) ensure_matched_revisions(change) if self.doc_filter_fn and self.doc_filter_fn(doc): return # prepare doc for es doc_ready_to_save = self.doc_transform_fn(doc) # send it across send_to_elasticsearch( index=self.index_info.index, doc_type=self.index_info.type, doc_id=change.id, es_getter=self.es_getter, name=pillow_instance.get_name(), data=doc_ready_to_save, update=self._doc_exists(change.id), )
def get_docs_for_changes(changes, domain): # break up by doctype changes_by_doctype = defaultdict(list) for change in changes: assert change.metadata.domain == domain changes_by_doctype[change.metadata.data_source_name].append(change) # query docs = [] for _, _changes in six.iteritems(changes_by_doctype): doc_store = _changes[0].document_store doc_ids_to_query = [change.id for change in _changes if change.should_fetch_document()] new_docs = list(doc_store.iter_documents(doc_ids_to_query)) docs_queried_prior = [change.document for change in _changes if not change.should_fetch_document()] docs.extend(new_docs + docs_queried_prior) # catch missing docs retry_changes = set() docs_by_id = {doc['_id']: doc for doc in docs} for change in changes: if change.id not in docs_by_id: # we need to capture DocumentMissingError which is not possible in bulk # so let pillow fall back to serial mode to capture the error for missing docs retry_changes.add(change) continue else: # set this, so that subsequent doc lookups are avoided change.set_document(docs_by_id[change.id]) try: ensure_matched_revisions(change, docs_by_id.get(change.id)) except DocumentMismatchError: retry_changes.add(change) return retry_changes, docs
def process_change(self, pillow_instance, change): self.bootstrap_if_needed() if change.deleted: # we don't currently support hard-deletions at all. # we may want to change this at some later date but seem ok for now. # see https://github.com/dimagi/commcare-hq/pull/6944 for rationale return domain = change.metadata.domain if not domain: # if no domain we won't save to any UCR table return async_tables = [] doc = change.get_document() ensure_document_exists(change) ensure_matched_revisions(change) if doc is None: return eval_context = EvaluationContext(doc) for table in self.table_adapters_by_domain[domain]: if table.config.filter(doc): if table.run_asynchronous: async_tables.append(table.config._id) else: self._save_doc_to_table(table, doc, eval_context) eval_context.reset_iteration() elif table.config.deleted_filter(doc): table.delete(doc) if async_tables: AsyncIndicator.update_indicators(change, async_tables)
def process_change(self, change): if change.deleted and change.id: self._delete_doc_if_exists(change.id) return with self._datadog_timing('extract'): doc = change.get_document() ensure_document_exists(change) ensure_matched_revisions(change, doc) with self._datadog_timing('transform'): if doc is None or (self.doc_filter_fn and self.doc_filter_fn(doc)): return if doc.get('doc_type') is not None and doc['doc_type'].endswith( "-Deleted"): self._delete_doc_if_exists(change.id) return # prepare doc for es doc_ready_to_save = self.doc_transform_fn(doc) # send it across with self._datadog_timing('load'): send_to_elasticsearch( index=self.index_info.index, doc_type=self.index_info.type, doc_id=change.id, es_getter=self.es_getter, name='ElasticProcessor', data=doc_ready_to_save, update=self._doc_exists(change.id), )
def process_change(self, change): self.bootstrap_if_needed() domain = change.metadata.domain if not domain or domain not in self.table_adapters_by_domain: # if no domain we won't save to any UCR table return if change.deleted: adapters = list(self.table_adapters_by_domain[domain]) for table in adapters: table.delete({'_id': change.metadata.document_id}) async_tables = [] doc = change.get_document() ensure_document_exists(change) ensure_matched_revisions(change, doc) if doc is None: return with TimingContext() as timer: eval_context = EvaluationContext(doc) # make copy to avoid modifying list during iteration adapters = list(self.table_adapters_by_domain[domain]) doc_subtype = change.metadata.document_subtype for table in adapters: if table.config.filter(doc, eval_context): if table.run_asynchronous: async_tables.append(table.config._id) else: self._save_doc_to_table(domain, table, doc, eval_context) eval_context.reset_iteration() elif (doc_subtype is None or doc_subtype in table.config.get_case_type_or_xmlns_filter()): table.delete(doc) if async_tables: AsyncIndicator.update_from_kafka_change(change, async_tables) self.domain_timing_context.update(**{ domain: timer.duration })
def process_change(self, change): from corehq.apps.change_feed.document_types import get_doc_meta_object_from_document if self.change_filter_fn and self.change_filter_fn(change): return if change.deleted and change.id: doc = change.get_document() if doc and doc.get('doc_type'): current_meta = get_doc_meta_object_from_document(doc) if current_meta.is_deletion: self._delete_doc_if_exists(change.id) else: self._delete_doc_if_exists(change.id) return with self._datadog_timing('extract'): doc = change.get_document() ensure_document_exists(change) ensure_matched_revisions(change, doc) with self._datadog_timing('transform'): if doc is None or (self.doc_filter_fn and self.doc_filter_fn(doc)): return if doc.get('doc_type') is not None and doc['doc_type'].endswith("-Deleted"): self._delete_doc_if_exists(change.id) return # prepare doc for es doc_ready_to_save = self.doc_transform_fn(doc) # send it across with self._datadog_timing('load'): send_to_elasticsearch( index_info=self.index_info, doc_type=self.index_info.type, doc_id=change.id, es_getter=self.es_getter, name='ElasticProcessor', data=doc_ready_to_save, )
def process_change(self, change): self.bootstrap_if_needed() domain = change.metadata.domain if not domain or domain not in self.table_adapters_by_domain: # if no domain we won't save to any UCR table return if change.deleted: adapters = list(self.table_adapters_by_domain[domain]) for table in adapters: table.delete({'_id': change.metadata.document_id}) async_tables = [] doc = change.get_document() ensure_document_exists(change) ensure_matched_revisions(change, doc) if doc is None: return with TimingContext() as timer: eval_context = EvaluationContext(doc) # make copy to avoid modifying list during iteration adapters = list(self.table_adapters_by_domain[domain]) for table in adapters: if table.config.filter(doc): if table.run_asynchronous: async_tables.append(table.config._id) else: self._save_doc_to_table(domain, table, doc, eval_context) eval_context.reset_iteration() elif table.config.deleted_filter(doc) or table.doc_exists(doc): table.delete(doc) if async_tables: AsyncIndicator.update_from_kafka_change(change, async_tables) self.domain_timing_context.update(**{ domain: timer.duration })
def process_change(self, pillow_instance, change): self.bootstrap_if_needed() if change.deleted: # we don't currently support hard-deletions at all. # we may want to change this at some later date but seem ok for now. # see https://github.com/dimagi/commcare-hq/pull/6944 for rationale return domain = change.metadata.domain if not domain: # if no domain we won't save to any UCR table return for table in self.table_adapters_by_domain[domain]: doc = change.get_document() ensure_document_exists(change) ensure_matched_revisions(change) if table.config.filter(doc): # best effort will swallow errors in the table table.best_effort_save(doc) elif table.config.deleted_filter(doc): table.delete(doc)
def process_change(self, pillow_instance, change): self.bootstrap_if_needed() if change.deleted: # we don't currently support hard-deletions at all. # we may want to change this at some later date but seem ok for now. # see https://github.com/dimagi/commcare-hq/pull/6944 for rationale return domain = change.metadata.domain if not domain: # if no domain we won't save to any UCR table return async_tables = [] doc = change.get_document() ensure_document_exists(change) ensure_matched_revisions(change) if doc is None: return for table in self.table_adapters_by_domain[domain]: if table.config.filter(doc): if table.run_asynchronous: async_tables.append(table.config._id) else: self._save_doc_to_table(table, doc) elif table.config.deleted_filter(doc): table.delete(doc) if async_tables: future_time = datetime.utcnow() + timedelta(days=1) error = PillowError.get_or_create(change, pillow_instance) error.date_next_attempt = future_time error.save() save_document.delay(async_tables, doc, pillow_instance.pillow_id)
def process_change(self, pillow_instance, change): self.bootstrap_if_needed() if change.deleted: # we don't currently support hard-deletions at all. # we may want to change this at some later date but seem ok for now. # see https://github.com/dimagi/commcare-hq/pull/6944 for rationale return domain = change.metadata.domain if not domain: # if no domain we won't save to any UCR table return for table in self.table_adapters: if table.config.domain == domain: # only bother getting the document if we have a domain match from the metadata doc = change.get_document() ensure_document_exists(change) ensure_matched_revisions(change) if table.config.filter(doc): # best effort will swallow errors in the table table.best_effort_save(doc) elif table.config.deleted_filter(doc): table.delete(doc)