def docs_to_upsert():
     if "Groups" in namespace:
         LOG.error("DEBUGG:: es bulk upsert groups _ids: %s" %
                   [x.get("_id") for x in docs])
     doc = None
     for doc in docs:
         # Remove metadata and redundant _id
         index, doc_type = self._index_and_mapping(namespace)
         doc_id = str(doc.pop("_id"))
         document_action = {
             "_index": index,
             "_type": doc_type,
             "_id": doc_id,
             "_source": self._formatter.format_document(doc),
         }
         document_meta = {
             "_index": self.meta_index_name,
             "_type": self.meta_type,
             "_id": doc_id,
             "_source": {
                 "ns": namespace,
                 "_ts": timestamp
             },
         }
         yield document_action
         yield document_meta
     if doc is None:
         raise errors.EmptyDocsError(
             "Cannot upsert an empty sequence of "
             "documents into Elastic Search")
Exemple #2
0
 def docs_to_upsert():
     doc = None
     for doc in docs:
         # Remove metadata and redundant _id
         index, doc_type = self._index_and_mapping(namespace)
         doc_id = u(doc.pop("_id"))
         document_action = {
             "_index": index,
             "_type": doc_type,
             "_id": doc_id,
             "_source": self._formatter.format_document(doc)
         }
         document_meta = {
             "_index": self.meta_index_name,
             "_type": self.meta_type,
             "_id": doc_id,
             "_source": {
                 "ns": index,
                 "_ts": timestamp
             }
         }
         yield document_action
         yield document_meta
     if not doc:
         raise errors.EmptyDocsError(
             "Cannot upsert an empty sequence of "
             "documents into Elastic Search")
    def bulk_upsert(self, docs):
        """Insert multiple documents into Splunk."""

        for doc in docs:
            index = doc["ns"]
            doc["_time"] = doc["_id"].generation_time

            service = self.getConnection()

            source = index.split(".")
            index_name = index.replace("_", "-").replace(".", "_").lower()
            # Check index presence
            if index_name not in service.indexes:
                service.indexes.create(index_name)
            # Index the source document
            index = service.indexes[index_name]
            with index.attached_socket(sourcetype='json',
                                       source=source[0],
                                       host="abacus") as sock:
                sock.send(dumps(doc, sort_keys=True))

        if not doc:
            raise errors.EmptyDocsError("Cannot upsert an empty sequence of "
                                        "documents into Splunk")
        return
    def update(self, doc, update_spec):
        """Send updated doc to Splunk."""

        doc = dict(doc.items() + update_spec.items())
        index = doc["ns"]
        doc["_time"] = doc["_id"].generation_time

        service = self.getConnection()

        source = index.split(".")
        index_name = index.replace("_", "-").replace(".", "_").lower()
        # Check index presence
        if index_name not in service.indexes:
            service.indexes.create(index_name)
        # Index the source document
        index = service.indexes[index_name]
        with index.attached_socket(sourcetype='json',
                                   source=source[0],
                                   host="abacus") as sock:
            sock.send(dumps(doc, sort_keys=True))
        print "Updation successful"
        if not doc:
            raise errors.EmptyDocsError("Cannot upsert an empty sequence of "
                                        "documents into Splunk")
        return
Exemple #5
0
 def docs_to_upsert():
     doc = None
     for doc in docs:
         # Remove metadata and redundant _id
         index, doc_type = self._index_and_mapping(namespace)
         doc_id = u(doc.pop("_id"))
         document_action = {
             '_index': index,
             '_type': doc_type,
             '_id': doc_id,
             'pipeline:': 'geoip',
             '_source': self._formatter.format_document(doc)
         }
         document_meta = {
             '_index': self.meta_index_name,
             '_type': self.meta_type,
             '_id': doc_id,
             'pipeline:': 'geoip',
             '_source': {
                 'ns': namespace,
                 '_ts': timestamp
             }
         }
         yield document_action
         yield document_meta
     if doc is None:
         raise errors.EmptyDocsError(
             "Cannot upsert an empty sequence of "
             "documents into Elastic Search")
        def docs_to_upsert():
            doc_count = 0
            doc = None
            for doc in docs:
                # Remove metadata and redundant _id
                index, doc_type = self._index_and_mapping(namespace)
                doc_id = str(doc.pop("_id"))
                routing = False

                if os.environ.get('JOIN_INDEX'):
                    if namespace == os.environ.get(
                            'JOIN_INDEX') + "." + os.environ.get('JOIN_INDEX'):
                        if doc.get(
                                os.environ.get('CHILD_FIELD_1')) and doc.get(
                                    os.environ.get('CHILD_FIELD_2')):
                            routing = True
                            doc["data_join"] = {
                                "name": os.environ.get('JOIN_FIELD'),
                                "parent": doc.get(os.environ.get('JOIN_FIELD'))
                            }
                        else:
                            doc["data_join"] = {"name": "_id"}

                document_action = {
                    "_index": index,
                    "_type": doc_type,
                    "_id": doc_id,
                    "_source": self._formatter.format_document(doc),
                }

                document_meta = {
                    "_index": self.meta_index_name,
                    "_type": self.meta_type,
                    "_id": doc_id,
                    "_source": {
                        "ns": namespace,
                        "_ts": timestamp
                    },
                }

                if routing is True:
                    document_meta["_routing"] = doc.get(
                        os.environ.get('JOIN_FIELD'))
                    document_action["_routing"] = doc.get(
                        os.environ.get('JOIN_FIELD'))

                yield document_action
                yield document_meta

                doc_count += 1
            if doc is None:
                raise errors.EmptyDocsError(
                    "Cannot upsert an empty sequence of "
                    "documents into Elastic Search")

            LOG.always(" - - - - - COLLECTION")
            LOG.always(collectionName)
            LOG.always(" - - - - - # OF DOCS")
            LOG.always(doc_count)
Exemple #7
0
        def docs_to_upsert():
            elasticDocs = []
            # import pdb; pdb.set_trace()
            if 'is_direct_update' in doc:
                elasticDoc = doc
                elasticDoc.pop("is_direct_update")
                elasticDocs.append(elasticDoc)
                # index = "catalog"
                # doc_type = "variant"
                # namespace = 'catalog.variant'
                namespace = 'catalog.variant'
                index, doc_type = self._index_and_mapping(namespace)
                LOG.info("final object "+str(elasticDocs) + ", "+index +","+doc_type)
                # print "final object "+str(elasticDocs) + ", "+index +","+doc_type
            else:
                elasticDocs = elastic_doc(doc)
                namespace = 'catalog.variant'
                index, doc_type = self._index_and_mapping(namespace)
            for elasticDoc in elasticDocs:
                # Remove metadata and redundant _id
                
                doc_id = u(elasticDoc.pop("_id"))
                # Remove parent field
                # parent_id = self._get_parent_id_from_mongodb(index, doc_type,
                                                             # elasticDoc)
                document_action = {
                    "_index": index,
                    "_type": doc_type,
                    "_id": doc_id,
                    "_source": self._formatter.format_document(elasticDoc)
                }
                document_meta = {
                    "_index": self.meta_index_name,
                    "_type": self.meta_type,
                    "_id": doc_id,
                    "_source": {
                        "ns": namespace,
                        "_ts": timestamp
                    }
                }

                # if parent_id is not None:
                #     document_action["_parent"] = parent_id

                yield document_action
                yield document_meta
            if elasticDocs is None:
                raise errors.EmptyDocsError(
                    "Cannot upsert an empty sequence of "
                    "documents into Elastic Search")
 def docs_to_upsert():
     doc = None
     for doc in docs:
         index = doc["ns"]
         doc_id = str(doc.pop("_id"))
         yield {
             "_index": index,
             "_type": self.doc_type,
             "_id": doc_id,
             "_source": self._formatter.format_document(doc)
         }
     if not doc:
         raise errors.EmptyDocsError(
             "Cannot upsert an empty sequence of "
             "documents into Elastic Search")
Exemple #9
0
 def docs_to_upsert():
     doc = None
     for doc in docs:
         index = doc["ns"]
         doc[self.unique_key] = str(doc[self.unique_key])
         doc_id = doc[self.unique_key]
         yield {
             "_index": index,
             "_type": self.doc_type,
             "_id": doc_id,
             "_source": doc
         }
     if not doc:
         raise errors.EmptyDocsError(
             "Cannot upsert an empty sequence of "
             "documents into Elastic Search")
Exemple #10
0
        def docs_to_upsert():
            doc = None

            for doc in docs:
                # Remove metadata and redundant _id
                index, doc_type = self._index_and_mapping(namespace)

                #import pdb; pdb.set_trace()
                new_fields = CONFIG.get('_newfields', {}).get(namespace, {})
                if new_fields:
                    doc = self._new_fields(new_fields, doc, doc_id)

                field_types = CONFIG.get('_mapping', {}).get(index, {})
                if field_types:
                    doc = self._cast_field_types(field_types, doc)

                doc_id = str(doc.pop("_id"))
                document_action = {
                    "_index": index,
                    "_type": doc_type,
                    "_id": doc_id,
                    "_source": self._formatter.format_document(doc),
                }
                document_meta = {
                    "_index": self.meta_index_name,
                    "_type": self.meta_type,
                    "_id": doc_id,
                    "_source": {
                        "ns": namespace,
                        "_ts": timestamp
                    },
                }
                yield document_action
                yield document_meta
            if doc is None:
                raise errors.EmptyDocsError(
                    "Cannot upsert an empty sequence of "
                    "documents into Elastic Search")
Exemple #11
0
        def docs_to_upsert():
            doc = None
            for doc in docs:
                elasticDocs = elastic_doc(doc)
                for elasticDoc in elasticDocs:
                    doc = elasticDoc
                    # Remove metadata and redundant _id
                    index, doc_type = self._index_and_mapping(namespace)
                    doc_id = u(doc.pop("_id"))
                    # Remove parent field
                    # parent_id = self._get_parent_id_from_mongodb(index, doc_type,
                                                                 # doc)
                    document_action = {
                        "_index": index,
                        "_type": doc_type,
                        "_id": doc_id,
                        "_source": self._formatter.format_document(doc)
                    }
                    document_meta = {
                        "_index": self.meta_index_name,
                        "_type": self.meta_type,
                        "_id": doc_id,
                        "_source": {
                            "ns": namespace,
                            "_ts": timestamp
                        }
                    }

                    # if parent_id is not None:
                    #     document_action["_parent"] = parent_id

                    yield document_action
                    yield document_meta
            if doc is None:
                raise errors.EmptyDocsError(
                    "Cannot upsert an empty sequence of "
                    "documents into Elastic Search")