Python Elasticsearch.bulk Examples

Programming Language: Python

Namespace/Package Name: elasticsearch.client

Class/Type: Elasticsearch

Method/Function: bulk

Examples at hotexamples.com: 9

Python Elasticsearch.bulk - 9 examples found. These are the top rated real world Python examples of elasticsearch.client.Elasticsearch.bulk extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Elasticsearch(30)

search(20)

index(8)

bulk(5)

create(5)

info(4)

update(4)

delete_by_query(3)

get(3)

mtermvectors(3)

scroll(2)

put_template(2)

ping(2)

delete(2)

count_percolate(1)

msearch(1)

termvector(1)

suggest(1)

search_template(1)

search_shards(1)

benchmark(1)

put_script(1)

percolate(1)

mlt(1)

mpercolate(1)

mget(1)

clear_scroll(1)

count(1)

abort_benchmark(1)

get_template(1)

get_source(1)

get_script(1)

explain(1)

exists(1)

delete_template(1)

delete_script(1)

list_benchmarks(1)

Example #1

Show file

File: create_index.py Project: huangchaosuper/elastalert

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--host', help='Elasticsearch host')
    parser.add_argument('--port', type=int, help='Elasticsearch port')
    parser.add_argument('--url-prefix', help='Elasticsearch URL prefix')
    parser.add_argument('--no-auth', action='store_const', const=True, help='Suppress prompt for basic auth')
    parser.add_argument('--ssl', action='store_true', default=None, help='Use SSL')
    parser.add_argument('--no-ssl', dest='ssl', action='store_false', help='Do not use SSL')
    parser.add_argument('--index', help='Index name to create')
    parser.add_argument('--old-index', help='Old index name to copy')
    args = parser.parse_args()

    if os.path.isfile('../config.yaml'):
        filename = '../config.yaml'
    elif os.path.isfile('config.yaml'):
        filename = 'config.yaml'
    else:
        filename = ''

    username = None
    password = None
    use_ssl = None
    url_prefix = None
    http_auth = None

    if filename:
        with open(filename) as config_file:
            data = yaml.load(config_file)
        host = data.get('es_host')
        port = data.get('es_port')
        username = data.get('es_username')
        password = data.get('es_password')
        url_prefix = data.get('es_url_prefix', '')
        use_ssl = data.get('use_ssl')
    else:
        host = args.host if args.host else raw_input('Enter elasticsearch host: ')
        port = args.port if args.port else int(raw_input('Enter elasticsearch port: '))
        use_ssl = (args.ssl if args.ssl is not None
                   else raw_input('Use SSL? t/f: ').lower() in ('t', 'true'))
        if args.no_auth is None:
            username = raw_input('Enter optional basic-auth username: '******'Enter optional basic-auth password: '******'Enter optional Elasticsearch URL prefix: '))

    if username and password:
        http_auth = username + ':' + password

    es = Elasticsearch(host=host, port=port, use_ssl=use_ssl, http_auth=http_auth, url_prefix=url_prefix)

    silence_mapping = {'silence': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'},
                                                  'until': {'type': 'date', 'format': 'dateOptionalTime'},
                                                  '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'}}}}
    ess_mapping = {'elastalert_status': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'},
                                                        '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'}}}}
    es_mapping = {'elastalert': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'},
                                                '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'},
                                                'match_body': {'enabled': False, 'type': 'object'},
                                                'aggregate_id': {'index': 'not_analyzed', 'type': 'string'}}}}
    past_mapping = {'past_elastalert': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'},
                                                       'match_body': {'enabled': False, 'type': 'object'},
                                                       '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'},
                                                       'aggregate_id': {'index': 'not_analyzed', 'type': 'string'}}}}
    error_mapping = {'elastalert_error': {'properties': {'data': {'type': 'object', 'enabled': False},
                                                         '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'}}}}

    index = args.index if args.index is not None else raw_input('New index name? (Default elastalert_status) ')
    if not index:
        index = 'elastalert_status'

    old_index = (args.old_index if args.old_index is not None
                 else raw_input('Name of existing index to copy? (Default None) '))

    res = None
    if old_index:
        print('Downloading existing data...')
        res = es.search(index=old_index, body={}, size=500000)
        print('Got %s documents' % (len(res['hits']['hits'])))

    es.indices.create(index)
    # To avoid a race condition. TODO: replace this with a real check
    time.sleep(2)
    es.indices.put_mapping(index=index, doc_type='elastalert', body=es_mapping)
    es.indices.put_mapping(index=index, doc_type='elastalert_status', body=ess_mapping)
    es.indices.put_mapping(index=index, doc_type='silence', body=silence_mapping)
    es.indices.put_mapping(index=index, doc_type='elastalert_error', body=error_mapping)
    es.indices.put_mapping(index=index, doc_type='past_elastalert', body=past_mapping)
    print('New index %s created' % (index))

    if res:
        bulk = ''.join(['%s\n%s\n' % (json.dumps({'create': {'_type': doc['_type'], '_index': index}}),
                                      json.dumps(doc['_source'])) for doc in res['hits']['hits']])
        print('Uploading data...')
        es.bulk(body=bulk, index=index)

    print('Done!')

Example #2

Show file

File: create_index.py Project: udoniyor/elastalert

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--host', help='Elasticsearch host')
    parser.add_argument('--port', type=int, help='Elasticsearch port')
    parser.add_argument('--url-prefix', help='Elasticsearch URL prefix')
    parser.add_argument('--no-auth',
                        action='store_const',
                        const=True,
                        help='Suppress prompt for basic auth')
    parser.add_argument('--ssl',
                        action='store_true',
                        default=None,
                        help='Use SSL')
    parser.add_argument('--no-ssl',
                        dest='ssl',
                        action='store_false',
                        help='Do not use SSL')
    parser.add_argument('--index', help='Index name to create')
    parser.add_argument('--old-index', help='Old index name to copy')
    parser.add_argument('--boto-profile',
                        default=None,
                        help='Boto profile to use for signing requests')
    parser.add_argument('--aws-region',
                        default=None,
                        help='AWS Region to use for signing requests')
    args = parser.parse_args()

    if os.path.isfile('../config.yaml'):
        filename = '../config.yaml'
    elif os.path.isfile('config.yaml'):
        filename = 'config.yaml'
    else:
        filename = ''

    if filename:
        with open(filename) as config_file:
            data = yaml.load(config_file)
        host = args.host if args.host else data.get('es_host')
        port = args.port if args.port else data.get('es_port')
        username = data.get('es_username')
        password = data.get('es_password')
        url_prefix = args.url_prefix if args.url_prefix is not None else data.get(
            'es_url_prefix', '')
        use_ssl = args.ssl if args.ssl is not None else data.get('use_ssl')
        aws_region = data.get('aws_region', None)
    else:
        username = None
        password = None
        aws_region = args.aws_region
        host = args.host if args.host else raw_input(
            'Enter elasticsearch host: ')
        port = args.port if args.port else int(
            raw_input('Enter elasticsearch port: '))
        use_ssl = (args.ssl if args.ssl is not None else
                   raw_input('Use SSL? t/f: ').lower() in ('t', 'true'))
        if args.no_auth is None:
            username = raw_input('Enter optional basic-auth username: '******'Enter optional basic-auth password: '******'Enter optional Elasticsearch URL prefix: '))

    auth = Auth()
    http_auth = auth(host=host,
                     username=username,
                     password=password,
                     aws_region=aws_region,
                     boto_profile=args.boto_profile)

    es = Elasticsearch(host=host,
                       port=port,
                       use_ssl=use_ssl,
                       connection_class=RequestsHttpConnection,
                       http_auth=http_auth,
                       url_prefix=url_prefix)

    silence_mapping = {
        'silence': {
            'properties': {
                'rule_name': {
                    'index': 'not_analyzed',
                    'type': 'string'
                },
                'until': {
                    'type': 'date',
                    'format': 'dateOptionalTime'
                },
                '@timestamp': {
                    'format': 'dateOptionalTime',
                    'type': 'date'
                }
            }
        }
    }
    ess_mapping = {
        'elastalert_status': {
            'properties': {
                'rule_name': {
                    'index': 'not_analyzed',
                    'type': 'string'
                },
                '@timestamp': {
                    'format': 'dateOptionalTime',
                    'type': 'date'
                }
            }
        }
    }
    es_mapping = {
        'elastalert': {
            'properties': {
                'rule_name': {
                    'index': 'not_analyzed',
                    'type': 'string'
                },
                '@timestamp': {
                    'format': 'dateOptionalTime',
                    'type': 'date'
                },
                'alert_time': {
                    'format': 'dateOptionalTime',
                    'type': 'date'
                },
                'match_body': {
                    'enabled': False,
                    'type': 'object'
                },
                'aggregate_id': {
                    'index': 'not_analyzed',
                    'type': 'string'
                }
            }
        }
    }
    past_mapping = {
        'past_elastalert': {
            'properties': {
                'rule_name': {
                    'index': 'not_analyzed',
                    'type': 'string'
                },
                'match_body': {
                    'enabled': False,
                    'type': 'object'
                },
                '@timestamp': {
                    'format': 'dateOptionalTime',
                    'type': 'date'
                },
                'aggregate_id': {
                    'index': 'not_analyzed',
                    'type': 'string'
                }
            }
        }
    }
    error_mapping = {
        'elastalert_error': {
            'properties': {
                'data': {
                    'type': 'object',
                    'enabled': False
                },
                '@timestamp': {
                    'format': 'dateOptionalTime',
                    'type': 'date'
                }
            }
        }
    }

    index = args.index if args.index is not None else raw_input(
        'New index name? (Default elastalert_status) ')
    if not index:
        index = 'elastalert_status'

    old_index = (args.old_index if args.old_index is not None else
                 raw_input('Name of existing index to copy? (Default None) '))

    res = None
    if old_index:
        print('Downloading existing data...')
        res = es.search(index=old_index, body={}, size=500000)
        print('Got %s documents' % (len(res['hits']['hits'])))

    es_index = IndicesClient(es)
    if es_index.exists(index):
        print('Index ' + index + ' already exists. Skipping index creation.')
        return None

    es.indices.create(index)
    # To avoid a race condition. TODO: replace this with a real check
    time.sleep(2)
    es.indices.put_mapping(index=index, doc_type='elastalert', body=es_mapping)
    es.indices.put_mapping(index=index,
                           doc_type='elastalert_status',
                           body=ess_mapping)
    es.indices.put_mapping(index=index,
                           doc_type='silence',
                           body=silence_mapping)
    es.indices.put_mapping(index=index,
                           doc_type='elastalert_error',
                           body=error_mapping)
    es.indices.put_mapping(index=index,
                           doc_type='past_elastalert',
                           body=past_mapping)
    print('New index %s created' % index)

    if res:
        bulk = ''.join([
            '%s\n%s\n' %
            (json.dumps({'create': {
                '_type': doc['_type'],
                '_index': index
            }}), json.dumps(doc['_source'])) for doc in res['hits']['hits']
        ])
        print('Uploading data...')
        es.bulk(body=bulk, index=index)

    print('Done!')

Example #3

Show file

File: create_index.py Project: zecchino1/elastalert

def main():
    if os.path.isfile('../config.yaml'):
        filename = '../config.yaml'
    elif os.path.isfile('config.yaml'):
        filename = 'config.yaml'
    else:
        filename = ''

    if filename:
        with open(filename) as config_file:
            data = yaml.load(config_file)
        host = data.get('es_host')
        port = data.get('es_port')
    else:
        host = raw_input("Enter elasticsearch host: ")
        port = int(raw_input("Enter elasticsearch port: "))

    es = Elasticsearch(host=host, port=port)

    silence_mapping = {
        'silence': {
            'properties': {
                'rule_name': {
                    'index': 'not_analyzed',
                    'type': 'string'
                }
            }
        }
    }
    ess_mapping = {
        'elastalert_status': {
            'properties': {
                'rule_name': {
                    'index': 'not_analyzed',
                    'type': 'string'
                },
                '@timestamp': {
                    'format': 'dateOptionalTime',
                    'type': 'date'
                }
            }
        }
    }
    es_mapping = {
        'elastalert': {
            'properties': {
                'rule_name': {
                    'index': 'not_analyzed',
                    'type': 'string'
                },
                'match_body': {
                    'enabled': False,
                    'type': 'object'
                }
            }
        }
    }
    error_mapping = {
        'elastalert_error': {
            'properties': {
                'data': {
                    'type': 'object',
                    'enabled': False
                }
            }
        }
    }

    index = raw_input('New index name? (Default elastalert_status) ')
    index = index if index else 'elastalert_status'
    old_index = raw_input('Name of existing index to copy? (Default None) ')

    res = None
    if old_index:
        print("Downloading existing data...")
        res = es.search(index=old_index, body={}, size=500000)
        print("Got %s documents" % (len(res['hits']['hits'])))

    es.indices.create(index)
    es.indices.put_mapping(index=index, doc_type='elastalert', body=es_mapping)
    es.indices.put_mapping(index=index,
                           doc_type='elastalert_status',
                           body=ess_mapping)
    es.indices.put_mapping(index=index,
                           doc_type='silence',
                           body=silence_mapping)
    es.indices.put_mapping(index=index,
                           doc_type='elastalert_error',
                           body=error_mapping)
    print("New index %s created" % (index))

    if res:
        bulk = ''.join([
            '%s\n%s\n' %
            (json.dumps({'create': {
                '_type': doc['_type'],
                '_index': index
            }}), json.dumps(doc['_source'])) for doc in res['hits']['hits']
        ])
        print("Uploading data...")
        es.bulk(body=bulk, index=index)

    print("Done!")

Example #4

Show file

def reindex(from_hosts,
            from_index,
            to_hosts,
            to_index,
            to_type,
            source='{"query":{"match_all":{}}}',
            max_docs=0,
            page_size=10,
            logging_per_docs=1000,
            es_scroll='5m',
            request_timeout=60):

    if from_index is None:
        logger.warn('from_index is empty.')
        return

    from_es = Elasticsearch(hosts=from_hosts)
    to_es = Elasticsearch(hosts=to_hosts)

    scroll_id = None
    counter = 0
    running = True
    bulk_data = []
    while(running):
        try:
            if scroll_id is None:
                response = from_es.search(index=from_index,
                                          body=source,
                                          params={"request_timeout": request_timeout,
                                                  "scroll": es_scroll,
                                                  "size": page_size})
            else:
                response = from_es.scroll(scroll_id=scroll_id,
                                          params={"request_timeout": request_timeout,
                                                  "scroll": es_scroll})
            if len(response['hits']['hits']) == 0:
                running = False
                break
            scroll_id = response['_scroll_id']
            for hit in response['hits']['hits']:
                if '_source' in hit:
                    counter += 1
                    if counter % logging_per_docs == 0:
                        logger.info(u'Loaded {0} docs.'.format(counter))
                    if max_docs > 0 and counter >= max_docs:
                        logger.info(u'{0} docs are loaded, but it exceeded {1} docs.'.format(counter, max_docs))
                        running = False
                        break
                    op_index = to_index if to_index is not None else hit['_index']
                    op_type = to_type if to_type is not None else hit['_type']
                    bulk_data.append({"index": {"_index": op_index,
                                                "_type": op_type,
                                                "_id": hit['_id']}
                                      })
                    bulk_data.append(hit['_source'])
            if len(bulk_data) != 0:
                to_es.bulk(body=bulk_data, params={"request_timeout": request_timeout})
                bulk_data = []
        except NotFoundError:
            break
        except:
            logger.exception(u"Failed to load documents from Elasticsearch(Loaded {0} doc).".format(counter))
            break

    if len(bulk_data) != 0:
        to_es.bulk(body=bulk_data, params={"request_timeout": request_timeout})

    logger.info('Loaded {0} documents.'.format(counter))

Example #5

Show file

File: elasticsearch.py Project: Exirel/djangoes

class BaseElasticsearchBackend(Base):
    """Base connection wrapper based on the ElasticSearch official library.

    It uses two entry points to configure the underlying connection:

    * ``transport_class``: the transport class from ``elasticsearch``. By
      default ``elasticsearch.transport.Transport``.
    * ``connection_class``: the connection class used by the transport class.
      It's undefined by default, as it is on the subclasses to provide one.

    If any of these elements is not defined, an ``ImproperlyConfigured`` error
    will be raised when the backend will try to configure the client.
    """
    #: ElasticSearch transport class used by the client class to perform
    #: requests.
    transport_class = Transport
    #: ElasticSearch connection class used by the transport class to perform
    #: requests.
    connection_class = None

    def configure_client(self):
        """Instantiate and configure the ElasticSearch client.

        It simply takes the given HOSTS list and uses PARAMS as the keyword
        arguments of the ElasticSearch class.

        The client's transport_class is given by the class attribute
        ``transport_class``, and the connection class used by the transport
        class is given by the class attribute ``connection_class``.

        An ``ImproperlyConfigured`` exception is raised if any of these
        elements is undefined.
        """
        hosts = self.server['HOSTS']
        params = self.server['PARAMS']

        if not self.transport_class:
            raise ImproperlyConfigured(
                'Djangoes backend %r is not properly configured: '
                'no transport class provided' % self.__class__)

        if not self.connection_class:
            raise ImproperlyConfigured(
                'Djangoes backend %r is not properly configured: '
                'no connection class provided' % self.__class__)

        #pylint: disable=star-args
        self.client = Elasticsearch(hosts,
                                    transport_class=self.transport_class,
                                    connection_class=self.connection_class,
                                    **params)

    # Server methods
    # ==============
    # The underlying client does not require index names to perform server
    # related queries, such as "ping" or "info". The connection wrapper act
    # for them as a proxy.

    def ping(self, **kwargs):
        return self.client.ping(**kwargs)

    def info(self, **kwargs):
        return self.client.info(**kwargs)

    def put_script(self, lang, script_id, body, **kwargs):
        return self.client.put_script(lang, script_id, body, **kwargs)

    def get_script(self, lang, script_id, **kwargs):
        return self.client.get_script(lang, script_id, **kwargs)

    def delete_script(self, lang, script_id, **kwargs):
        return self.client.delete_script(lang, script_id, **kwargs)

    def put_template(self, template_id, body, **kwargs):
        return self.client.put_template(template_id, body, **kwargs)

    def get_template(self, template_id, body=None, **kwargs):
        return self.client.get_template(template_id, body, **kwargs)

    def delete_template(self, template_id=None, **kwargs):
        return self.client.delete_template(template_id, **kwargs)

    # Bulk methods
    # ============
    # The underlying client does not require index names, but it can be used.
    # As it makes sense to not give an index, developers are free to use these
    # as they want, as long as they are careful.

    def mget(self, body, index=None, doc_type=None, **kwargs):
        return self.client.mget(body, index, doc_type, **kwargs)

    def bulk(self, body, index=None, doc_type=None, **kwargs):
        return self.client.bulk(body, index, doc_type, **kwargs)

    def msearch(self, body, index=None, doc_type=None, **kwargs):
        return self.client.msearch(body, index, doc_type, **kwargs)

    def mpercolate(self, body, index=None, doc_type=None, **kwargs):
        return self.client.mpercolate(body, index, doc_type, **kwargs)

    # Scroll methods
    # ==============
    # The underlying client does not require an index to perform scroll.

    def scroll(self, scroll_id, **kwargs):
        return self.client.scroll(scroll_id, **kwargs)

    def clear_scroll(self, scroll_id, body=None, **kwargs):
        return self.client.clear_scroll(scroll_id, body, **kwargs)

    # Query methods
    # =============
    # The underlying client requires index names (or alias names) to perform
    # queries. The connection wrapper overrides these client methods to
    # automatically uses the configured names (indices and/or aliases).

    def create(self, doc_type, body, doc_id=None, **kwargs):
        return self.client.create(
            self.indices, doc_type, body, doc_id, **kwargs)

    def index(self, doc_type, body, doc_id=None, **kwargs):
        return self.client.index(
            self.indices, doc_type, body, doc_id, **kwargs)

    def exists(self, doc_id, doc_type='_all', **kwargs):
        return self.client.exists(self.indices, doc_id, doc_type, **kwargs)

    def get(self, doc_id, doc_type='_all', **kwargs):
        return self.client.get(self.indices, doc_id, doc_type, **kwargs)

    def get_source(self, doc_id, doc_type='_all', **kwargs):
        return self.client.get_source(self.indices, doc_id, doc_type, **kwargs)

    def update(self, doc_type, doc_id, body=None, **kwargs):
        return self.client.update(
            self.indices, doc_type, doc_id, body, **kwargs)

    def search(self, doc_type=None, body=None, **kwargs):
        return self.client.search(self.indices, doc_type, body, **kwargs)

    def search_shards(self, doc_type=None, **kwargs):
        return self.client.search_shards(self.indices, doc_type, **kwargs)

    def search_template(self, doc_type=None, body=None, **kwargs):
        return self.client.search_template(
            self.indices, doc_type, body, **kwargs)

    def explain(self, doc_type, doc_id, body=None, **kwargs):
        return self.client.explain(
            self.indices, doc_type, doc_id, body, **kwargs)

    def delete(self, doc_type, doc_id, **kwargs):
        return self.client.delete(self.indices, doc_type, doc_id, **kwargs)

    def count(self, doc_type=None, body=None, **kwargs):
        return self.client.count(self.indices, doc_type, body, **kwargs)

    def delete_by_query(self, doc_type=None, body=None, **kwargs):
        return self.client.delete_by_query(
            self.indices, doc_type, body, **kwargs)

    def suggest(self, body, **kwargs):
        return self.client.suggest(body, self.indices, **kwargs)

    def percolate(self, doc_type, doc_id=None, body=None, **kwargs):
        return self.client.percolate(
            self.indices, doc_type, doc_id, body, **kwargs)

    def count_percolate(self, doc_type, doc_id=None, body=None, **kwargs):
        return self.client.count_percolate(
            self.indices, doc_type, doc_id, body, **kwargs)

    def mlt(self, doc_type, doc_id, body=None, **kwargs):
        return self.client.mlt(self.indices, doc_type, doc_id, body, **kwargs)

    def termvector(self, doc_type, doc_id, body=None, **kwargs):
        return self.client.termvector(
            self.indices, doc_type, doc_id, body, **kwargs)

    def mtermvectors(self, doc_type=None, body=None, **kwargs):
        return self.client.mtermvectors(self.indices, doc_type, body, **kwargs)

    def benchmark(self, doc_type=None, body=None, **kwargs):
        return self.client.benchmark(self.indices, doc_type, body, **kwargs)

    def abort_benchmark(self, name=None, **kwargs):
        return self.client.abort_benchmark(name, **kwargs)

    def list_benchmarks(self, doc_type=None, **kwargs):
        return self.client.list_benchmarks(self.indices, doc_type, **kwargs)

Example #6

Show file

File: create_index.py Project: d3sir3/elastalert

def main():
    if os.path.isfile('../config.yaml'):
        filename = '../config.yaml'
    elif os.path.isfile('config.yaml'):
        filename = 'config.yaml'
    else:
        filename = ''

    username = None
    password = None
    use_ssl = None
    http_auth = None

    if filename:
        with open(filename) as config_file:
            data = yaml.load(config_file)
        host = data.get('es_host')
        port = data.get('es_port')
        username = data.get('es_username')
        password = data.get('es_password')
        use_ssl = data.get('use_ssl')
    else:
        host = raw_input("Enter elasticsearch host: ")
        port = int(raw_input("Enter elasticsearch port: "))
        while use_ssl is None:
            resp = raw_input("Use SSL? t/f: ").lower()
            use_ssl = True if resp in ('t', 'true') else (False if resp in ('f', 'false') else None)
        username = raw_input("Enter optional basic-auth username: "******"Enter optional basic-auth password: "******"Downloading existing data...")
        res = es.search(index=old_index, body={}, size=500000)
        print("Got %s documents" % (len(res['hits']['hits'])))

    es.indices.create(index)
    es.indices.put_mapping(index=index, doc_type='elastalert', body=es_mapping)
    es.indices.put_mapping(index=index, doc_type='elastalert_status', body=ess_mapping)
    es.indices.put_mapping(index=index, doc_type='silence', body=silence_mapping)
    es.indices.put_mapping(index=index, doc_type='elastalert_error', body=error_mapping)
    print("New index %s created" % (index))

    if res:
        bulk = ''.join(['%s\n%s\n' % (json.dumps({'create': {'_type': doc['_type'], '_index': index}}),
                                      json.dumps(doc['_source'])) for doc in res['hits']['hits']])
        print("Uploading data...")
        es.bulk(body=bulk, index=index)

    print("Done!")

Example #7

Show file

class BaseElasticsearchBackend(Base):
    """Base connection wrapper based on the ElasticSearch official library.

    It uses two entry points to configure the underlying connection:

    * ``transport_class``: the transport class from ``elasticsearch``. By
      default ``elasticsearch.transport.Transport``.
    * ``connection_class``: the connection class used by the transport class.
      It's undefined by default, as it is on the subclasses to provide one.

    If any of these elements is not defined, an ``ImproperlyConfigured`` error
    will be raised when the backend will try to configure the client.
    """
    #: ElasticSearch transport class used by the client class to perform
    #: requests.
    transport_class = Transport
    #: ElasticSearch connection class used by the transport class to perform
    #: requests.
    connection_class = None

    def configure_client(self):
        """Instantiate and configure the ElasticSearch client.

        It simply takes the given HOSTS list and uses PARAMS as the keyword
        arguments of the ElasticSearch class.

        The client's transport_class is given by the class attribute
        ``transport_class``, and the connection class used by the transport
        class is given by the class attribute ``connection_class``.

        An ``ImproperlyConfigured`` exception is raised if any of these
        elements is undefined.
        """
        hosts = self.server['HOSTS']
        params = self.server['PARAMS']

        if not self.transport_class:
            raise ImproperlyConfigured(
                'Djangoes backend %r is not properly configured: '
                'no transport class provided' % self.__class__)

        if not self.connection_class:
            raise ImproperlyConfigured(
                'Djangoes backend %r is not properly configured: '
                'no connection class provided' % self.__class__)

        #pylint: disable=star-args
        self.client = Elasticsearch(hosts,
                                    transport_class=self.transport_class,
                                    connection_class=self.connection_class,
                                    **params)

    # Server methods
    # ==============
    # The underlying client does not require index names to perform server
    # related queries, such as "ping" or "info". The connection wrapper act
    # for them as a proxy.

    def ping(self, **kwargs):
        return self.client.ping(**kwargs)

    def info(self, **kwargs):
        return self.client.info(**kwargs)

    def put_script(self, lang, script_id, body, **kwargs):
        return self.client.put_script(lang, script_id, body, **kwargs)

    def get_script(self, lang, script_id, **kwargs):
        return self.client.get_script(lang, script_id, **kwargs)

    def delete_script(self, lang, script_id, **kwargs):
        return self.client.delete_script(lang, script_id, **kwargs)

    def put_template(self, template_id, body, **kwargs):
        return self.client.put_template(template_id, body, **kwargs)

    def get_template(self, template_id, body=None, **kwargs):
        return self.client.get_template(template_id, body, **kwargs)

    def delete_template(self, template_id=None, **kwargs):
        return self.client.delete_template(template_id, **kwargs)

    # Bulk methods
    # ============
    # The underlying client does not require index names, but it can be used.
    # As it makes sense to not give an index, developers are free to use these
    # as they want, as long as they are careful.

    def mget(self, body, index=None, doc_type=None, **kwargs):
        return self.client.mget(body, index, doc_type, **kwargs)

    def bulk(self, body, index=None, doc_type=None, **kwargs):
        return self.client.bulk(body, index, doc_type, **kwargs)

    def msearch(self, body, index=None, doc_type=None, **kwargs):
        return self.client.msearch(body, index, doc_type, **kwargs)

    def mpercolate(self, body, index=None, doc_type=None, **kwargs):
        return self.client.mpercolate(body, index, doc_type, **kwargs)

    # Scroll methods
    # ==============
    # The underlying client does not require an index to perform scroll.

    def scroll(self, scroll_id, **kwargs):
        return self.client.scroll(scroll_id, **kwargs)

    def clear_scroll(self, scroll_id, body=None, **kwargs):
        return self.client.clear_scroll(scroll_id, body, **kwargs)

    # Query methods
    # =============
    # The underlying client requires index names (or alias names) to perform
    # queries. The connection wrapper overrides these client methods to
    # automatically uses the configured names (indices and/or aliases).

    def create(self, doc_type, body, doc_id=None, **kwargs):
        return self.client.create(self.indices, doc_type, body, doc_id,
                                  **kwargs)

    def index(self, doc_type, body, doc_id=None, **kwargs):
        return self.client.index(self.indices, doc_type, body, doc_id,
                                 **kwargs)

    def exists(self, doc_id, doc_type='_all', **kwargs):
        return self.client.exists(self.indices, doc_id, doc_type, **kwargs)

    def get(self, doc_id, doc_type='_all', **kwargs):
        return self.client.get(self.indices, doc_id, doc_type, **kwargs)

    def get_source(self, doc_id, doc_type='_all', **kwargs):
        return self.client.get_source(self.indices, doc_id, doc_type, **kwargs)

    def update(self, doc_type, doc_id, body=None, **kwargs):
        return self.client.update(self.indices, doc_type, doc_id, body,
                                  **kwargs)

    def search(self, doc_type=None, body=None, **kwargs):
        return self.client.search(self.indices, doc_type, body, **kwargs)

    def search_shards(self, doc_type=None, **kwargs):
        return self.client.search_shards(self.indices, doc_type, **kwargs)

    def search_template(self, doc_type=None, body=None, **kwargs):
        return self.client.search_template(self.indices, doc_type, body,
                                           **kwargs)

    def explain(self, doc_type, doc_id, body=None, **kwargs):
        return self.client.explain(self.indices, doc_type, doc_id, body,
                                   **kwargs)

    def delete(self, doc_type, doc_id, **kwargs):
        return self.client.delete(self.indices, doc_type, doc_id, **kwargs)

    def count(self, doc_type=None, body=None, **kwargs):
        return self.client.count(self.indices, doc_type, body, **kwargs)

    def delete_by_query(self, doc_type=None, body=None, **kwargs):
        return self.client.delete_by_query(self.indices, doc_type, body,
                                           **kwargs)

    def suggest(self, body, **kwargs):
        return self.client.suggest(body, self.indices, **kwargs)

    def percolate(self, doc_type, doc_id=None, body=None, **kwargs):
        return self.client.percolate(self.indices, doc_type, doc_id, body,
                                     **kwargs)

    def count_percolate(self, doc_type, doc_id=None, body=None, **kwargs):
        return self.client.count_percolate(self.indices, doc_type, doc_id,
                                           body, **kwargs)

    def mlt(self, doc_type, doc_id, body=None, **kwargs):
        return self.client.mlt(self.indices, doc_type, doc_id, body, **kwargs)

    def termvector(self, doc_type, doc_id, body=None, **kwargs):
        return self.client.termvector(self.indices, doc_type, doc_id, body,
                                      **kwargs)

    def mtermvectors(self, doc_type=None, body=None, **kwargs):
        return self.client.mtermvectors(self.indices, doc_type, body, **kwargs)

    def benchmark(self, doc_type=None, body=None, **kwargs):
        return self.client.benchmark(self.indices, doc_type, body, **kwargs)

    def abort_benchmark(self, name=None, **kwargs):
        return self.client.abort_benchmark(name, **kwargs)

    def list_benchmarks(self, doc_type=None, **kwargs):
        return self.client.list_benchmarks(self.indices, doc_type, **kwargs)

Example #8

Show file

File: create_index.py Project: wting/elastalert

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--host', help='Elasticsearch host')
    parser.add_argument('--port', type=int, help='Elasticsearch port')
    parser.add_argument('--url-prefix', help='Elasticsearch URL prefix')
    parser.add_argument('--no-auth', action='store_const', const=True, help='Suppress prompt for basic auth')
    parser.add_argument('--ssl', action='store_const', const=True, help='Use SSL')
    parser.add_argument('--no-ssl', action='store_const', const=True, help='Do not use SSL')
    parser.add_argument('--index', help='Index name to create')
    parser.add_argument('--old-index', help='Old index name to copy')
    args = parser.parse_args()

    if os.path.isfile('../config.yaml'):
        filename = '../config.yaml'
    elif os.path.isfile('config.yaml'):
        filename = 'config.yaml'
    else:
        filename = ''

    username = None
    password = None
    use_ssl = None
    url_prefix = None
    http_auth = None

    if filename:
        with open(filename) as config_file:
            data = yaml.load(config_file)
        host = data.get('es_host')
        port = data.get('es_port')
        username = data.get('es_username')
        password = data.get('es_password')
        url_prefix = data.get('es_url_prefix', '')
        use_ssl = data.get('use_ssl')
    else:
        host = args.host if args.host else raw_input('Enter elasticsearch host: ')
        port = args.port if args.port else int(raw_input('Enter elasticsearch port: '))
        use_ssl = (args.ssl if args.ssl is not None
                   else args.no_ssl if args.no_ssl is not None
                   else raw_input('Use SSL? t/f: ').lower() in ('t', 'true'))
        if args.no_auth is None:
            username = raw_input('Enter optional basic-auth username: '******'Enter optional basic-auth password: '******'Enter optional Elasticsearch URL prefix: '))

    if username and password:
        http_auth = username + ':' + password

    es = Elasticsearch(host=host, port=port, use_ssl=use_ssl, http_auth=http_auth, url_prefix=url_prefix)

    silence_mapping = {'silence': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'},
                                                  'until': {'type': 'date', 'format': 'dateOptionalTime'}}}}
    ess_mapping = {'elastalert_status': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'},
                                                        '@timestamp': {'format': 'dateOptionalTime', 'type': 'date'}}}}
    es_mapping = {'elastalert': {'properties': {'rule_name': {'index': 'not_analyzed', 'type': 'string'},
                                                'match_body': {'enabled': False, 'type': 'object'},
                                                'aggregate_id': {'index': 'not_analyzed', 'type': 'string'}}}}
    error_mapping = {'elastalert_error': {'properties': {'data': {'type': 'object', 'enabled': False}}}}

    index = args.index if args.index is not None else raw_input('New index name? (Default elastalert_status) ')
    if not index:
        index = 'elastalert_status'

    old_index = (args.old_index if args.old_index is not None
                 else raw_input('Name of existing index to copy? (Default None) '))

    res = None
    if old_index:
        print('Downloading existing data...')
        res = es.search(index=old_index, body={}, size=500000)
        print('Got %s documents' % (len(res['hits']['hits'])))

    es.indices.create(index)
    es.indices.put_mapping(index=index, doc_type='elastalert', body=es_mapping)
    es.indices.put_mapping(index=index, doc_type='elastalert_status', body=ess_mapping)
    es.indices.put_mapping(index=index, doc_type='silence', body=silence_mapping)
    es.indices.put_mapping(index=index, doc_type='elastalert_error', body=error_mapping)
    print('New index %s created' % (index))

    if res:
        bulk = ''.join(['%s\n%s\n' % (json.dumps({'create': {'_type': doc['_type'], '_index': index}}),
                                      json.dumps(doc['_source'])) for doc in res['hits']['hits']])
        print('Uploading data...')
        es.bulk(body=bulk, index=index)

    print('Done!')

Example #9

Show file

File: create_index.py Project: Nexalogy/elastalert

def main(in_args=None):
    parser = argparse.ArgumentParser()
    parser.add_argument("--host", help="Elasticsearch host")
    parser.add_argument("--port", type=int, help="Elasticsearch port")
    parser.add_argument("--url-prefix", help="Elasticsearch URL prefix")
    parser.add_argument("--no-auth", action="store_const", const=True, help="Suppress prompt for basic auth")
    parser.add_argument("--ssl", action="store_true", default=None, help="Use SSL")
    parser.add_argument("--no-ssl", dest="ssl", action="store_false", help="Do not use SSL")
    parser.add_argument("--index", help="Index name to create")
    parser.add_argument("--old-index", help="Old index name to copy")
    parser.add_argument("--config", help="Config file name")

    args = parser.parse_args(in_args)

    if args.config:
        filename = args.config
    elif os.path.isfile("../config.yaml"):
        filename = "../config.yaml"
    elif os.path.isfile("config.yaml"):
        filename = "config.yaml"
    else:
        filename = ""

    username = None
    password = None
    use_ssl = None
    url_prefix = None
    http_auth = None

    if filename:
        with open(filename) as config_file:
            data = yaml.load(config_file)
        host = data.get("es_host")
        port = data.get("es_port")
        username = data.get("es_username")
        password = data.get("es_password")
        url_prefix = data.get("es_url_prefix", "")
        use_ssl = data.get("use_ssl")
    else:
        host = args.host if args.host else raw_input("Enter elasticsearch host: ")
        port = args.port if args.port else int(raw_input("Enter elasticsearch port: "))
        use_ssl = args.ssl if args.ssl is not None else raw_input("Use SSL? t/f: ").lower() in ("t", "true")
        if args.no_auth is None:
            username = raw_input("Enter optional basic-auth username: "******"Enter optional basic-auth password: "******"Enter optional Elasticsearch URL prefix: ")
        )

    if username and password:
        http_auth = username + ":" + password

    es = Elasticsearch(host=host, port=port, use_ssl=use_ssl, http_auth=http_auth, url_prefix=url_prefix)

    silence_mapping = {
        "silence": {
            "properties": {
                "rule_name": {"index": "not_analyzed", "type": "string"},
                "until": {"type": "date", "format": "dateOptionalTime"},
            }
        }
    }
    ess_mapping = {
        "elastalert_status": {
            "properties": {
                "rule_name": {"index": "not_analyzed", "type": "string"},
                "@timestamp": {"format": "dateOptionalTime", "type": "date"},
            }
        }
    }
    es_mapping = {
        "elastalert": {
            "properties": {
                "rule_name": {"index": "not_analyzed", "type": "string"},
                "match_body": {"enabled": False, "type": "object"},
                "aggregate_id": {"index": "not_analyzed", "type": "string"},
            }
        }
    }
    error_mapping = {"elastalert_error": {"properties": {"data": {"type": "object", "enabled": False}}}}

    index = args.index if args.index is not None else raw_input("New index name? (Default elastalert_status) ")
    if not index:
        index = "elastalert_status"

    res = None
    if args.old_index:
        print("Downloading existing data...")
        res = es.search(index=args.old_index, body={}, size=500000)
        print("Got %s documents" % (len(res["hits"]["hits"])))

    es.indices.create(index)
    es.indices.put_mapping(index=index, doc_type="elastalert", body=es_mapping)
    es.indices.put_mapping(index=index, doc_type="elastalert_status", body=ess_mapping)
    es.indices.put_mapping(index=index, doc_type="silence", body=silence_mapping)
    es.indices.put_mapping(index=index, doc_type="elastalert_error", body=error_mapping)
    print("New index %s created" % (index))

    if res:
        bulk = "".join(
            [
                "%s\n%s\n"
                % (json.dumps({"create": {"_type": doc["_type"], "_index": index}}), json.dumps(doc["_source"]))
                for doc in res["hits"]["hits"]
            ]
        )
        print("Uploading data...")
        es.bulk(body=bulk, index=index)

    print("Done!")