def docs_to_upsert(): if "Groups" in namespace: LOG.error("DEBUGG:: es bulk upsert groups _ids: %s" % [x.get("_id") for x in docs]) doc = None for doc in docs: # Remove metadata and redundant _id index, doc_type = self._index_and_mapping(namespace) doc_id = str(doc.pop("_id")) document_action = { "_index": index, "_type": doc_type, "_id": doc_id, "_source": self._formatter.format_document(doc), } document_meta = { "_index": self.meta_index_name, "_type": self.meta_type, "_id": doc_id, "_source": { "ns": namespace, "_ts": timestamp }, } yield document_action yield document_meta if doc is None: raise errors.EmptyDocsError( "Cannot upsert an empty sequence of " "documents into Elastic Search")
def docs_to_upsert(): doc = None for doc in docs: # Remove metadata and redundant _id index, doc_type = self._index_and_mapping(namespace) doc_id = u(doc.pop("_id")) document_action = { "_index": index, "_type": doc_type, "_id": doc_id, "_source": self._formatter.format_document(doc) } document_meta = { "_index": self.meta_index_name, "_type": self.meta_type, "_id": doc_id, "_source": { "ns": index, "_ts": timestamp } } yield document_action yield document_meta if not doc: raise errors.EmptyDocsError( "Cannot upsert an empty sequence of " "documents into Elastic Search")
def bulk_upsert(self, docs): """Insert multiple documents into Splunk.""" for doc in docs: index = doc["ns"] doc["_time"] = doc["_id"].generation_time service = self.getConnection() source = index.split(".") index_name = index.replace("_", "-").replace(".", "_").lower() # Check index presence if index_name not in service.indexes: service.indexes.create(index_name) # Index the source document index = service.indexes[index_name] with index.attached_socket(sourcetype='json', source=source[0], host="abacus") as sock: sock.send(dumps(doc, sort_keys=True)) if not doc: raise errors.EmptyDocsError("Cannot upsert an empty sequence of " "documents into Splunk") return
def update(self, doc, update_spec): """Send updated doc to Splunk.""" doc = dict(doc.items() + update_spec.items()) index = doc["ns"] doc["_time"] = doc["_id"].generation_time service = self.getConnection() source = index.split(".") index_name = index.replace("_", "-").replace(".", "_").lower() # Check index presence if index_name not in service.indexes: service.indexes.create(index_name) # Index the source document index = service.indexes[index_name] with index.attached_socket(sourcetype='json', source=source[0], host="abacus") as sock: sock.send(dumps(doc, sort_keys=True)) print "Updation successful" if not doc: raise errors.EmptyDocsError("Cannot upsert an empty sequence of " "documents into Splunk") return
def docs_to_upsert(): doc = None for doc in docs: # Remove metadata and redundant _id index, doc_type = self._index_and_mapping(namespace) doc_id = u(doc.pop("_id")) document_action = { '_index': index, '_type': doc_type, '_id': doc_id, 'pipeline:': 'geoip', '_source': self._formatter.format_document(doc) } document_meta = { '_index': self.meta_index_name, '_type': self.meta_type, '_id': doc_id, 'pipeline:': 'geoip', '_source': { 'ns': namespace, '_ts': timestamp } } yield document_action yield document_meta if doc is None: raise errors.EmptyDocsError( "Cannot upsert an empty sequence of " "documents into Elastic Search")
def docs_to_upsert(): doc_count = 0 doc = None for doc in docs: # Remove metadata and redundant _id index, doc_type = self._index_and_mapping(namespace) doc_id = str(doc.pop("_id")) routing = False if os.environ.get('JOIN_INDEX'): if namespace == os.environ.get( 'JOIN_INDEX') + "." + os.environ.get('JOIN_INDEX'): if doc.get( os.environ.get('CHILD_FIELD_1')) and doc.get( os.environ.get('CHILD_FIELD_2')): routing = True doc["data_join"] = { "name": os.environ.get('JOIN_FIELD'), "parent": doc.get(os.environ.get('JOIN_FIELD')) } else: doc["data_join"] = {"name": "_id"} document_action = { "_index": index, "_type": doc_type, "_id": doc_id, "_source": self._formatter.format_document(doc), } document_meta = { "_index": self.meta_index_name, "_type": self.meta_type, "_id": doc_id, "_source": { "ns": namespace, "_ts": timestamp }, } if routing is True: document_meta["_routing"] = doc.get( os.environ.get('JOIN_FIELD')) document_action["_routing"] = doc.get( os.environ.get('JOIN_FIELD')) yield document_action yield document_meta doc_count += 1 if doc is None: raise errors.EmptyDocsError( "Cannot upsert an empty sequence of " "documents into Elastic Search") LOG.always(" - - - - - COLLECTION") LOG.always(collectionName) LOG.always(" - - - - - # OF DOCS") LOG.always(doc_count)
def docs_to_upsert(): elasticDocs = [] # import pdb; pdb.set_trace() if 'is_direct_update' in doc: elasticDoc = doc elasticDoc.pop("is_direct_update") elasticDocs.append(elasticDoc) # index = "catalog" # doc_type = "variant" # namespace = 'catalog.variant' namespace = 'catalog.variant' index, doc_type = self._index_and_mapping(namespace) LOG.info("final object "+str(elasticDocs) + ", "+index +","+doc_type) # print "final object "+str(elasticDocs) + ", "+index +","+doc_type else: elasticDocs = elastic_doc(doc) namespace = 'catalog.variant' index, doc_type = self._index_and_mapping(namespace) for elasticDoc in elasticDocs: # Remove metadata and redundant _id doc_id = u(elasticDoc.pop("_id")) # Remove parent field # parent_id = self._get_parent_id_from_mongodb(index, doc_type, # elasticDoc) document_action = { "_index": index, "_type": doc_type, "_id": doc_id, "_source": self._formatter.format_document(elasticDoc) } document_meta = { "_index": self.meta_index_name, "_type": self.meta_type, "_id": doc_id, "_source": { "ns": namespace, "_ts": timestamp } } # if parent_id is not None: # document_action["_parent"] = parent_id yield document_action yield document_meta if elasticDocs is None: raise errors.EmptyDocsError( "Cannot upsert an empty sequence of " "documents into Elastic Search")
def docs_to_upsert(): doc = None for doc in docs: index = doc["ns"] doc_id = str(doc.pop("_id")) yield { "_index": index, "_type": self.doc_type, "_id": doc_id, "_source": self._formatter.format_document(doc) } if not doc: raise errors.EmptyDocsError( "Cannot upsert an empty sequence of " "documents into Elastic Search")
def docs_to_upsert(): doc = None for doc in docs: index = doc["ns"] doc[self.unique_key] = str(doc[self.unique_key]) doc_id = doc[self.unique_key] yield { "_index": index, "_type": self.doc_type, "_id": doc_id, "_source": doc } if not doc: raise errors.EmptyDocsError( "Cannot upsert an empty sequence of " "documents into Elastic Search")
def docs_to_upsert(): doc = None for doc in docs: # Remove metadata and redundant _id index, doc_type = self._index_and_mapping(namespace) #import pdb; pdb.set_trace() new_fields = CONFIG.get('_newfields', {}).get(namespace, {}) if new_fields: doc = self._new_fields(new_fields, doc, doc_id) field_types = CONFIG.get('_mapping', {}).get(index, {}) if field_types: doc = self._cast_field_types(field_types, doc) doc_id = str(doc.pop("_id")) document_action = { "_index": index, "_type": doc_type, "_id": doc_id, "_source": self._formatter.format_document(doc), } document_meta = { "_index": self.meta_index_name, "_type": self.meta_type, "_id": doc_id, "_source": { "ns": namespace, "_ts": timestamp }, } yield document_action yield document_meta if doc is None: raise errors.EmptyDocsError( "Cannot upsert an empty sequence of " "documents into Elastic Search")
def docs_to_upsert(): doc = None for doc in docs: elasticDocs = elastic_doc(doc) for elasticDoc in elasticDocs: doc = elasticDoc # Remove metadata and redundant _id index, doc_type = self._index_and_mapping(namespace) doc_id = u(doc.pop("_id")) # Remove parent field # parent_id = self._get_parent_id_from_mongodb(index, doc_type, # doc) document_action = { "_index": index, "_type": doc_type, "_id": doc_id, "_source": self._formatter.format_document(doc) } document_meta = { "_index": self.meta_index_name, "_type": self.meta_type, "_id": doc_id, "_source": { "ns": namespace, "_ts": timestamp } } # if parent_id is not None: # document_action["_parent"] = parent_id yield document_action yield document_meta if doc is None: raise errors.EmptyDocsError( "Cannot upsert an empty sequence of " "documents into Elastic Search")