class ESDataTarget(base.DataTarget): """docstring for ClassName """ def __init__(self, document_type, database="test", host="127.0.0.1", port="9200", truncate=False, expand=False, **elasticsearch_args): """Creates a ElasticSearch data target stream. :Attributes: * document_ElasticSearch elasticsearch document_type name * database: database name * host: ElasticSearch database server host, default is ``localhost`` * port: ElasticSearch port, default is ``9200`` * expand: expand dictionary values and treat children as top-level keys with dot '.' separated key path to the child.. * truncate: delete existing data in the document_type. Default: False """ self.document_type = document_type self.database_name = database self.host = host self.port = port self.elasticsearch_args = elasticsearch_args self.expand = expand self.truncate = truncate self._fields = None def initialize(self): """Initialize ElasticSearch source stream: """ from pyes.es import ES from pyes.exceptions import IndexAlreadyExistsException args = self.elasticsearch_args.copy() server = "" if self.host: server = self.host if self.port: server += ":" + self.port create = args.pop("create", False) replace = args.pop("replace", False) self.connection = ES(server, **args) self.connection.default_indices = self.database_name self.connection.default_types = self.document_type created = False if create: try: self.connection.create_index(self.database_name) self.connection.refresh(self.database_name) created = True except IndexAlreadyExistsException: pass if replace and not created: self.connection.delete_index_if_exists(self.database_name) time.sleep(2) self.connection.create_index(self.database_name) self.connection.refresh(self.database_name) if self.truncate: self.connection.delete_mapping(self.database_name, self.document_type) self.connection.refresh(self.database_name) def append(self, obj): record = obj if not isinstance(obj, dict): record = dict(zip(self.fields.names(), obj)) if self.expand: record = expand_record(record) id = record.get('id') or record.get('_id') self.connection.index(record, self.database_name, self.document_type, id, bulk=True) def finalize(self): self.connection.flush_bulk(forced=True)
class ESDataTarget(DataTarget): """docstring for ClassName """ def __init__(self, document_type, index="test", host="127.0.0.1", port="9200", truncate=False, expand=False, **elasticsearch_args): """Creates a ElasticSearch data target stream. :Attributes: * document_ElasticSearch elasticsearch document_type name * index: database name * host: ElasticSearch database server host, default is ``localhost`` * port: ElasticSearch port, default is ``9200`` * expand: expand dictionary values and treat children as top-level keys with dot '.' separated key path to the child.. * truncate: delete existing data in the document_type. Default: False """ super(ESDataTarget, self).__init__() self.document_type = document_type self.index = index self.host = host self.port = port self.elasticsearch_args = elasticsearch_args self.expand = expand self.truncate = truncate self._fields = None def initialize(self): """ Initialize ElasticSearch source stream: """ from pyes.es import ES from pyes.exceptions import IndexAlreadyExistsException args = self.elasticsearch_args.copy() server = "" if self.host: server = self.host if self.port: server += ":" + self.port create = args.pop("create", False) replace = args.pop("replace", False) self.connection = ES(server, **args) self.connection.default_indices = self.index self.connection.default_types = self.document_type created = False if create: try: self.connection.create_index(self.index) self.connection.refresh(self.index) created = True except IndexAlreadyExistsException: pass if replace and not created: self.connection.delete_index_if_exists(self.index) self.connection.refresh(self.index) self.connection.create_index(self.index) self.connection.refresh(self.index) if self.truncate: self.connection.delete_mapping(self.index, self.document_type) self.connection.refresh(self.index) #check mapping try: self.connection.get_mapping(self.document_type, self.index) except TypeMissingException: self.connection.put_mapping(self.document_type, self._get_mapping(), self.index) def _get_mapping(self): """Build an ES optimized mapping for the given fields""" from pyes.mappings import DocumentObjectField, IntegerField, StringField, BooleanField, FloatField, DateField document = DocumentObjectField(name=self.document_type) for field in self.fields: st = field.storage_type if st == "unknown": #lets es detect the type continue elif st in ["string", "text"]: document.add_property(StringField(name=field.name)) elif st == "integer": document.add_property(IntegerField(name=field.name)) elif st == "boolean": document.add_property(BooleanField(name=field.name)) elif st == "date": document.add_property(DateField(name=field.name)) elif st == "float": document.add_property(FloatField(name=field.name)) return document def append(self, obj): record = obj if not isinstance(obj, dict): record = dict(zip(self.field_names, obj)) if self.expand: record = expand_record(record) id = record.get('id') or record.get('_id') self.connection.index(record, self.index, self.document_type, id, bulk=True) def finalize(self): self.connection.flush_bulk(forced=True)