Example #1
0
 def __init__(self, servers, bulk_amount=100, bulk_refresh_time=30):
     self.es_connection = Elasticsearch(servers)
     self.es_connection.ping()
     self.bulk_queue = BulkQueue(self,
                                 threshold=bulk_amount,
                                 flush_time=bulk_refresh_time)
     initLogger()
Example #2
0
 def test_over_threshold(self):
     queue = BulkQueue(self.es_client, flush_time=3, threshold=10)
     queue.start_timer()
     for num in range(0, 201):
         queue.add(index='events', doc_type='event', body={'keyname': 'value' + str(num)})
     assert self.num_objects_saved() == 200
     assert queue.size() == 1
     time.sleep(4)
     assert self.num_objects_saved() == 201
     assert queue.size() == 0
     queue.stop_timer()
Example #3
0
 def test_ten_iterations(self):
     queue = BulkQueue(self.es_client, flush_time=3, threshold=10)
     queue.start_timer()
     total_events = 0
     for num_rounds in range(0, 10):
         for num in range(0, 20):
             total_events += 1
             queue.add(index='events', doc_type='event', body={'keyname': 'value' + str(num)})
         assert self.num_objects_saved() == total_events
     assert queue.size() == 0
     queue.stop_timer()
     assert self.num_objects_saved() == 200
Example #4
0
class TestBasicInit(BulkQueueTest):

    def setup(self):
        super(TestBasicInit, self).setup()
        self.queue = BulkQueue(self.es_client)

    def test_threshold(self):
        assert self.queue.threshold == 10

    def test_size(self):
        assert self.queue.size() == 0

    def test_flush_time(self):
        assert self.queue.flush_time == 30
Example #5
0
class ElasticsearchClient():
    def __init__(self, servers, bulk_amount=100, bulk_refresh_time=30):
        self.es_connection = Elasticsearch(servers)
        self.es_connection.ping()
        self.bulk_queue = BulkQueue(self,
                                    threshold=bulk_amount,
                                    flush_time=bulk_refresh_time)

    def delete_index(self, index_name, ignore_fail=False):
        ignore_codes = []
        if ignore_fail is True:
            ignore_codes = [400, 404]

        self.es_connection.indices.delete(index=index_name,
                                          ignore=ignore_codes)

    def get_indices(self):
        return self.es_connection.indices.stats()['indices'].keys()

    def index_exists(self, index_name):
        return self.es_connection.indices.exists(index_name)

    def create_index(self, index_name, index_config=None):
        if not index_config:
            index_config = '''
            {
              "mappings":{}
            }'''
        self.es_connection.indices.create(index=index_name,
                                          update_all_types='true',
                                          body=index_config)

    def create_alias(self, alias, index):
        actions = []
        if self.es_connection.indices.exists_alias('*', alias):
            actions.append({'remove': {'index': '*', 'alias': alias}})

        actions.append({'add': {'index': index, 'alias': alias}})
        self.es_connection.indices.update_aliases(dict(actions=actions))

    def create_alias_multiple_indices(self, alias_name, indices):
        actions = []
        if self.es_connection.indices.exists_alias('*', alias_name):
            actions.append({'remove': {'index': '*', 'alias': alias_name}})

        for index in indices:
            actions.append({'add': {'index': index, 'alias': alias_name}})
        self.es_connection.indices.update_aliases(dict(actions=actions))

    def get_alias(self, alias_name):
        return self.es_connection.indices.get_alias(index='*',
                                                    name=alias_name).keys()

    def refresh(self, index_name):
        self.es_connection.indices.refresh(index=index_name)

    def search(self, search_query, indices, size, request_timeout):
        results = []
        try:
            results = Search(using=self.es_connection, index=indices).params(
                size=size, request_timeout=request_timeout).filter(
                    search_query).execute()
        except NotFoundError:
            raise ElasticsearchInvalidIndex(indices)

        result_set = SimpleResults(results)
        return result_set

    def aggregated_search(self, search_query, indices, aggregations, size,
                          request_timeout):
        search_obj = Search(using=self.es_connection, index=indices).params(
            size=size, request_timeout=request_timeout)
        query_obj = search_obj.filter(search_query)
        for aggregation in aggregations:
            query_obj.aggs.bucket(name=aggregation.to_dict()['terms']['field'],
                                  agg_type=aggregation)
        results = query_obj.execute()

        result_set = AggregatedResults(results)
        return result_set

    def save_documents(self, documents):
        try:
            bulk(self.es_connection, documents)
        except BulkIndexError as e:
            logger.error("Error bulk indexing: " + str(e))

    def finish_bulk(self):
        self.bulk_queue.flush()
        self.bulk_queue.stop_thread()

    def __bulk_save_document(self, index, doc_type, body, doc_id=None):
        if not self.bulk_queue.started():
            self.bulk_queue.start_thread()
        self.bulk_queue.add(index=index,
                            doc_type=doc_type,
                            body=body,
                            doc_id=doc_id)

    def __save_document(self, index, doc_type, body, doc_id=None, bulk=False):
        if bulk:
            self.__bulk_save_document(index=index,
                                      doc_type=doc_type,
                                      body=body,
                                      doc_id=doc_id)
        else:
            return self.es_connection.index(index=index,
                                            doc_type=doc_type,
                                            id=doc_id,
                                            body=body)

    def __parse_document(self, body, doc_type):
        if type(body) is str:
            body = json.loads(body)

        if '_type' in body:
            doc_type = body['_type']

        doc_body = body
        if '_source' in body:
            doc_body = body['_source']
        return doc_body, doc_type

    def save_object(self, body, index, doc_type, doc_id=None, bulk=False):
        doc_body, doc_type = self.__parse_document(body, doc_type)
        return self.__save_document(index=index,
                                    doc_type=doc_type,
                                    body=doc_body,
                                    doc_id=doc_id,
                                    bulk=bulk)

    def save_alert(self,
                   body,
                   index='alerts',
                   doc_type='alert',
                   doc_id=None,
                   bulk=False):
        doc_body, doc_type = self.__parse_document(body, doc_type)
        return self.__save_document(index=index,
                                    doc_type=doc_type,
                                    body=doc_body,
                                    doc_id=doc_id,
                                    bulk=bulk)

    def save_event(self,
                   body,
                   index='events',
                   doc_type='event',
                   doc_id=None,
                   bulk=False):
        doc_body, doc_type = self.__parse_document(body, doc_type)
        event = Event(doc_body)
        event.add_required_fields()
        return self.__save_document(index=index,
                                    doc_type=doc_type,
                                    body=event,
                                    doc_id=doc_id,
                                    bulk=bulk)

    def get_object_by_id(self, object_id, indices):
        id_match = TermMatch('_id', object_id)
        search_query = SearchQuery()
        search_query.add_must(id_match)
        results = search_query.execute(self, indices=indices)
        if len(results['hits']) == 0:
            return None
        else:
            return results['hits'][0]

    def get_alert_by_id(self, alert_id):
        return self.get_object_by_id(alert_id, ['alerts'])

    def get_event_by_id(self, event_id):
        return self.get_object_by_id(event_id, ['events'])

    def save_dashboard(self, dash_file, dash_name):
        f = open(dash_file)
        dashboardjson = json.load(f)
        f.close()
        title = dashboardjson['title']
        dashid = dash_name.replace(' ', '-')
        if dash_name:
            title = dash_name
        dashboarddata = {
            "user": "******",
            "group": "guest",
            "title": title,
            "dashboard": json.dumps(dashboardjson)
        }

        return self.es_connection.index(index='.kibana',
                                        doc_type='dashboard',
                                        body=dashboarddata,
                                        id=dashid)

    def get_cluster_health(self):
        health_dict = self.es_connection.cluster.health()
        # To line up with the health stats from ES1, we're
        # removing certain keys
        health_dict.pop('active_shards_percent_as_number', None)
        health_dict.pop('delayed_unassigned_shards', None)
        health_dict.pop('number_of_in_flight_fetch', None)
        health_dict.pop('number_of_pending_tasks', None)
        health_dict.pop('task_max_waiting_in_queue_millis', None)

        return health_dict
Example #6
0
 def __init__(self, servers, bulk_amount=100, bulk_refresh_time=30):
     self.es_connection = Elasticsearch(servers)
     self.es_connection.ping()
     self.bulk_queue = BulkQueue(self, threshold=bulk_amount, flush_time=bulk_refresh_time)
Example #7
0
class ElasticsearchClient():

    def __init__(self, servers, bulk_amount=100, bulk_refresh_time=30):
        self.es_connection = Elasticsearch(servers)
        self.es_connection.ping()
        self.bulk_queue = BulkQueue(self, threshold=bulk_amount, flush_time=bulk_refresh_time)

    def close_index(self, index_name):
        return self.es_connection.indices.close(index=index_name)

    def open_index(self, index_name):
        return self.es_connection.indices.open(index=index_name)

    def delete_index(self, index_name, ignore_fail=False):
        ignore_codes = []
        if ignore_fail is True:
            ignore_codes = [400, 404]
        self.es_connection.indices.delete(index=index_name, ignore=ignore_codes)

    def get_indices(self):
        return self.es_connection.indices.stats()['indices'].keys()

    def index_exists(self, index_name):
        return self.es_connection.indices.exists(index_name)

    def create_index(self, index_name, index_config=None):
        if not index_config:
            index_config = '''
            {
              "mappings":{}
            }'''
        self.es_connection.indices.create(index=index_name, update_all_types='true', body=index_config)

    def create_alias(self, alias, index):
        actions = []
        if self.es_connection.indices.exists_alias('*', alias):
            actions.append({
                'remove': {'index': '*', 'alias': alias}
            })

        actions.append({
            'add': {'index': index, 'alias': alias}
        })
        self.es_connection.indices.update_aliases(dict(actions=actions))

    def create_alias_multiple_indices(self, alias_name, indices):
        actions = []
        if self.es_connection.indices.exists_alias('*', alias_name):
            actions.append({
                'remove': {'index': '*', 'alias': alias_name}
            })

        for index in indices:
            actions.append({'add': {'index': index, 'alias': alias_name}})
        self.es_connection.indices.update_aliases(dict(actions=actions))

    def get_alias(self, alias_name):
        return self.es_connection.indices.get_alias(index='*', name=alias_name).keys()

    def get_aliases(self):
        return self.es_connection.cat.stats()['indices'].keys()

    def refresh(self, index_name):
        self.es_connection.indices.refresh(index=index_name)

    def search(self, search_query, indices, size, request_timeout):
        results = []
        try:
            results = Search(using=self.es_connection, index=indices).params(size=size, request_timeout=request_timeout).filter(search_query).execute()
        except NotFoundError:
            raise ElasticsearchInvalidIndex(indices)

        result_set = SimpleResults(results)
        return result_set

    def aggregated_search(self, search_query, indices, aggregations, size, request_timeout):
        search_obj = Search(using=self.es_connection, index=indices).params(size=size, request_timeout=request_timeout)
        query_obj = search_obj.filter(search_query)
        for aggregation in aggregations:
            query_obj.aggs.bucket(name=aggregation.to_dict()['terms']['field'], agg_type=aggregation)
        results = query_obj.execute()

        result_set = AggregatedResults(results)
        return result_set

    def save_documents(self, documents):
        try:
            bulk(self.es_connection, documents)
        except BulkIndexError as e:
            logger.error("Error bulk indexing: " + str(e))

    def finish_bulk(self):
        self.bulk_queue.flush()
        self.bulk_queue.stop_thread()

    def __bulk_save_document(self, index, doc_type, body, doc_id=None):
        if not self.bulk_queue.started():
            self.bulk_queue.start_thread()
        self.bulk_queue.add(index=index, doc_type=doc_type, body=body, doc_id=doc_id)

    def __save_document(self, index, doc_type, body, doc_id=None, bulk=False):
        if bulk:
            self.__bulk_save_document(index=index, doc_type=doc_type, body=body, doc_id=doc_id)
        else:
            return self.es_connection.index(index=index, doc_type=doc_type, id=doc_id, body=body)

    def __parse_document(self, body, doc_type):
        if type(body) is str:
            body = json.loads(body)

        if '_type' in body:
            doc_type = body['_type']

        doc_body = body
        if '_source' in body:
            doc_body = body['_source']
        return doc_body, doc_type

    def save_object(self, body, index, doc_type, doc_id=None, bulk=False):
        doc_body, doc_type = self.__parse_document(body, doc_type)
        return self.__save_document(index=index, doc_type=doc_type, body=doc_body, doc_id=doc_id, bulk=bulk)

    def save_alert(self, body, index='alerts', doc_type='alert', doc_id=None, bulk=False):
        doc_body, doc_type = self.__parse_document(body, doc_type)
        return self.__save_document(index=index, doc_type=doc_type, body=doc_body, doc_id=doc_id, bulk=bulk)

    def save_event(self, body, index='events', doc_type='event', doc_id=None, bulk=False):
        doc_body, doc_type = self.__parse_document(body, doc_type)
        event = Event(doc_body)
        event.add_required_fields()
        return self.__save_document(index=index, doc_type=doc_type, body=event, doc_id=doc_id, bulk=bulk)

    def get_object_by_id(self, object_id, indices):
        id_match = TermMatch('_id', object_id)
        search_query = SearchQuery()
        search_query.add_must(id_match)
        results = search_query.execute(self, indices=indices)
        if len(results['hits']) == 0:
            return None
        else:
            return results['hits'][0]

    def get_alert_by_id(self, alert_id):
        return self.get_object_by_id(alert_id, ['alerts'])

    def get_event_by_id(self, event_id):
        return self.get_object_by_id(event_id, ['events'])

    def save_dashboard(self, dash_file, dash_name):
        f = open(dash_file)
        dashboardjson = json.load(f)
        f.close()
        title = dashboardjson['title']
        dashid = dash_name.replace(' ', '-')
        if dash_name:
            title = dash_name
        dashboarddata = {
            "user": "******",
            "group": "guest",
            "title": title,
            "dashboard": json.dumps(dashboardjson)
        }

        return self.es_connection.index(index='.kibana', doc_type='dashboard', body=dashboarddata, id=dashid)

    def get_cluster_health(self):
        health_dict = self.es_connection.cluster.health()
        # To line up with the health stats from ES1, we're
        # removing certain keys
        health_dict.pop('active_shards_percent_as_number', None)
        health_dict.pop('delayed_unassigned_shards', None)
        health_dict.pop('number_of_in_flight_fetch', None)
        health_dict.pop('number_of_pending_tasks', None)
        health_dict.pop('task_max_waiting_in_queue_millis', None)

        return health_dict
Example #8
0
 def test_basic_timer(self):
     queue = BulkQueue(self.es_client, flush_time=2)
     assert queue.started() is False
     queue.start_timer()
     assert queue.started() is True
     queue.add(index='events', doc_type='event', body={'keyname': 'valuename'})
     assert queue.size() == 1
     time.sleep(3)
     assert queue.size() == 0
     queue.stop_timer()
     assert queue.started() is False
Example #9
0
 def setup(self):
     super(TestAdd, self).setup()
     self.queue = BulkQueue(self.es_client, threshold=20)
Example #10
0
class TestAdd(BulkQueueTest):

    def setup(self):
        super(TestAdd, self).setup()
        self.queue = BulkQueue(self.es_client, threshold=20)

    def test_basic_add(self):
        assert self.queue.size() == 0
        self.queue.add(index='events', doc_type='event', body={'keyname', 'valuename'})
        assert self.queue.size() == 1
        assert self.queue.started() is False

    def test_add_exact_threshold(self):
        for num in range(0, 20):
            self.queue.add(index='events', doc_type='event', body={'keyname': 'value' + str(num)})
        assert self.queue.size() == 0
        assert self.num_objects_saved() == 20
        assert self.queue.started() is False

    def test_add_over_threshold(self):
        for num in range(0, 21):
            self.queue.add(index='events', doc_type='event', body={'keyname': 'value' + str(num)})
        assert self.num_objects_saved() == 20
        assert self.queue.size() == 1
        assert self.queue.started() is False

    def test_add_multiple_thresholds(self):
        for num in range(0, 201):
            self.queue.add(index='events', doc_type='event', body={'keyname': 'value' + str(num)})
        assert self.num_objects_saved() == 200
        assert self.queue.size() == 1
        assert self.queue.started() is False
Example #11
0
 def test_init_with_threshold(self):
     queue = BulkQueue(self.es_client, 100)
     assert queue.threshold == 100
Example #12
0
 def setup(self):
     super(TestBasicInit, self).setup()
     self.queue = BulkQueue(self.es_client)