Example #1
0
    def __init__(self, host="127.0.0.1", port=9200):
        """Create a OpenSearch client."""
        super().__init__()
        self._error_container = {}

        self.user = current_app.config.get("OPENSEARCH_USER", "user")
        self.password = current_app.config.get("OPENSEARCH_PASSWORD", "pass")
        self.ssl = current_app.config.get("OPENSEARCH_SSL", False)
        self.verify = current_app.config.get("OPENSEARCH_VERIFY_CERTS", True)
        self.timeout = current_app.config.get("OPENSEARCH_TIMEOUT", 10)

        parameters = {}
        if self.ssl:
            parameters["use_ssl"] = self.ssl
            parameters["verify_certs"] = self.verify

        if self.user and self.password:
            parameters["http_auth"] = (self.user, self.password)
        if self.timeout:
            parameters["timeout"] = self.timeout

        self.client = OpenSearch([{"host": host, "port": port}], **parameters)

        self.import_counter = Counter()
        self.import_events = []
        self._request_timeout = current_app.config.get(
            "TIMEOUT_FOR_EVENT_IMPORT", self.DEFAULT_EVENT_IMPORT_TIMEOUT)
Example #2
0
class ElasticsearchSampler():
    """Elasticsearchサンプルクラス
    """

    def __init__(self):
        host = 'localhost'
        port = 9200
        auth = ('admin', 'admin')
        # certs = 'esnode.pem'

        # Elasticsearchインタンスの作成
        self.es = OpenSearch(
            hosts=[{'host': host, 'port': port}],
            http_auth=auth,
            use_ssl=True,
            verify_certs=False,
            # ca_certs=certs,
            ssl_assert_hostname=False,
            ssl_show_warn=False,
        )

    def __del__(self):
        self.es.close()
        print("close elasticsearch instance--------------------------")

    def search(self, idx: str, query: str):
        """検索
        """
        result = self.es.search(index=idx, body=query)
        print('--[search]-------------------------------------------')
        pprint.pprint(result, sort_dicts=False)

    def bulk(self, index: str):
        """バルクインサート
        """

        try:
            # iterableなオブジェクトであればよいので以下どちらも可能
            # - ジェネレータで渡す
            success, failed = helpers.bulk(self.es, gendata3(index))
            # - list型で渡す
            # success, failed = helpers.bulk(self.es, bulklist())
        # except opensearchpy.ElasticsearchException as e:
        #     pprint.pprint(e)
        except Exception as e:
            pprint.pprint(e)
            return

        print('--[bulk  ]-------------------------------------------')
        pprint.pprint(success)
        pprint.pprint(failed)

    def delete_by_query(self, idx: str, query: str):
        """条件指定の削除
        """
        result = self.es.delete_by_query(index=idx, body=query)

        print(f'{type(result)}')
        print('--[delete_by_query]----------------------------------')
        pprint.pprint(result, sort_dicts=False)
Example #3
0
def getESConn():
  """
  Get connection to Amazon Elasticsearch service (the casebase).
  Can be modified to point to any other Elasticsearch cluster.
  """

  if(is_dev):
    return OpenSearch(
        hosts = [{'host': 'clood-opensearch', 'port': 9200}],
        http_compress = True, # enables gzip compression for request bodies
        http_auth = ('kibanaserver','kibanaserver'),
        use_ssl = False,
        verify_certs = False,
        ssl_assert_hostname = False,
        ssl_show_warn = False,
    )

  esconn = OpenSearch(
    hosts=[{'host': host, 'port': 443}],
    http_auth=AWS4Auth(access_key, secret_key, region, 'es'),
    use_ssl=True,
    verify_certs=True,
    connection_class=RequestsHttpConnection
  )
  return esconn
Example #4
0
    def __init__(self, host=settings.OPENSEARCH_HOST):

        protocol = settings.OPENSEARCH_PROTOCOL
        protocol_config = {}
        if protocol == "https":
            protocol_config = {
                "scheme": "https",
                "port": 443,
                "use_ssl": True,
                "verify_certs": settings.OPENSEARCH_VERIFY_CERTS,
            }

        if settings.IS_AWS:
            http_auth = ("supersurf", settings.OPENSEARCH_PASSWORD)
        else:
            http_auth = (None, None)

        self.client = OpenSearch([host],
                                 http_auth=http_auth,
                                 connection_class=RequestsHttpConnection,
                                 **protocol_config)
        self.index_nl = settings.OPENSEARCH_NL_INDEX
        self.index_en = settings.OPENSEARCH_EN_INDEX
        self.index_unk = settings.OPENSEARCH_UNK_INDEX
        self.languages = {"nl": self.index_nl, "en": self.index_en}
Example #5
0
 def __init__(self, host='127.0.0.1', port=9200, url=None):
     """Create an OpenSearch client."""
     super().__init__()
     if url:
         self.client = OpenSearch([url], timeout=30)
     else:
         self.client = OpenSearch([{
             'host': host,
             'port': port
         }],
                                  timeout=30)
     self.import_counter = collections.Counter()
     self.import_events = []
Example #6
0
    def build_es_connection(self):
        '''
        Creates an Elasticsearch connection object that can
        be used to query Elasticsearch
        '''

        if self.config['cafile'] != "":
            context = ssl.create_default_context(cafile=self.config['cafile'])
        else:
            context = ssl.create_default_context()
        context.check_hostname = self.config['check_hostname']

        CONTEXT_VERIFY_MODES = {
            "none": ssl.CERT_NONE,
            "optional": ssl.CERT_OPTIONAL,
            "required": ssl.CERT_REQUIRED
        }
        context.verify_mode = CONTEXT_VERIFY_MODES[
            self.config['cert_verification']]

        es_config = {'scheme': self.config['scheme'], 'ssl_context': context}

        if self.config['auth_method'] == 'api_key':
            es_config['api_key'] = self.credentials
        else:
            es_config['http_auth'] = self.credentials

        if 'distro' in self.config:
            if self.config['distro'] == 'opensearch':
                from opensearchpy import OpenSearch
                return OpenSearch(self.config['hosts'], **es_config)
            else:
                return Elasticsearch(self.config['hosts'], **es_config)
        else:
            return Elasticsearch(self.config['hosts'], **es_config)
Example #7
0
async def create_opensearch_client() -> Optional[OpenSearch]:
    """
    Create an OpenSearch client, connected to the configured OpenSearch server.

    :return: a connected OpenSearch client instance
    """
    settings = get_settings()

    # Create the client with SSL/TLS enabled, but hostname verification disabled
    client = OpenSearch(
        hosts=[{
            "host": settings.opensearch_server,
            "port": settings.opensearch_port
        }],
        http_compress=True,  # enables gzip compression for request bodies
        http_auth=(settings.opensearch_user, settings.opensearch_password),
        use_ssl=True,
        verify_certs=settings.certificate_verify,
        ssl_assert_hostname=False,
        ssl_show_warn=False,
        ca_certs=settings.certificate_authority_path,
    )

    logger.info("Created OpenSearch client")
    return client
Example #8
0
    def get_aes_client(self):
        service = "es"
        session = boto3.Session()
        credentials = session.get_credentials()
        region = session.region_name

        if credentials is not None:
            self.aws_auth = AWS4Auth(credentials.access_key,
                                     credentials.secret_key, region, service)
        else:
            click.secho(
                message=
                "Can not retrieve your AWS credentials, check your AWS config",
                fg="red",
            )

        aes_client = OpenSearch(
            hosts=[self.endpoint],
            http_auth=self.aws_auth,
            use_ssl=True,
            verify_certs=True,
            connection_class=RequestsHttpConnection,
        )

        return aes_client
Example #9
0
def get_search_client(conn, silent=False):
    """
    Returns the Open Search client connected through port forwarding settings
    """
    host = conn.config.open_search.host
    protocol_config = {
        "scheme": "https",
        "port": 443,
        "use_ssl": True,
        "verify_certs": True,
    }

    http_auth = ("supersurf", conn.config.secrets.opensearch.password,)

    es_client = OpenSearch(
        [host],
        http_auth=http_auth,
        connection_class=RequestsHttpConnection,
        **protocol_config
    )

    # test if it works
    if not silent and not es_client.cat.health(request_timeout=30):
        raise ValueError('Credentials do not work for Open Search')
    return es_client
Example #10
0
    def __init__(self):
        host = 'localhost'
        port = 9200
        auth = ('admin', 'admin')
        # certs = 'esnode.pem'

        # Elasticsearchインタンスの作成
        self.es = OpenSearch(
            hosts=[{'host': host, 'port': port}],
            http_auth=auth,
            use_ssl=True,
            verify_certs=False,
            # ca_certs=certs,
            ssl_assert_hostname=False,
            ssl_show_warn=False,
        )
def make_opensearch(index,
                    filters,
                    queries=None,
                    exclusion_filters=None,
                    range_filters=None,
                    prefix_filters=None,
                    terms_filters=None,
                    es_url='https://opensearch.lco.global'):
    """
    Make an OpenSearch query

    Parameters
    ----------
    index : str
            Name of index to search
    filters : list of dicts
              Each dict has a criterion for an OpenSearch "filter"
    queries : list of dicts
              Each dict has a "type" and "query" entry. The 'query' entry is a dict that has a criterion for an
              OpenSearch "query"
    exclusion_filters : list of dicts
                        Each dict has a criterion for an OpenSearch "exclude"
    range_filters: list of dicts
                   Each dict has a criterion an OpenSearch "range filter"
    prefix_filters:
    terms_filters:
    es_url : str
             URL of the OpenSearch host

    Returns
    -------
    search : opensearch_dsl.Search
             The OpenSearch object
    """
    if queries is None:
        queries = []
    if exclusion_filters is None:
        exclusion_filters = []
    if range_filters is None:
        range_filters = []
    if terms_filters is None:
        terms_filters = []
    if prefix_filters is None:
        prefix_filters = []
    es = OpenSearch(es_url)
    s = Search(using=es, index=index)
    for f in filters:
        s = s.filter('term', **f)
    for f in terms_filters:
        s = s.filter('terms', **f)
    for f in range_filters:
        s = s.filter('range', **f)
    for f in prefix_filters:
        s = s.filter('prefix', **f)
    for f in exclusion_filters:
        s = s.exclude('term', **f)
    for q in queries:
        s = s.query(q['type'], **q['query'])
    return s
Example #12
0
 def setUpClass(cls):
     super().setUpClass()
     cls.search = OpenSearch(
         [settings.OPENSEARCH_HOST]
     )
     cls.search.indices.create(settings.OPENSEARCH_NL_INDEX, ignore=400, body=cls.index_body('nl'))
     cls.search.indices.create(settings.OPENSEARCH_EN_INDEX, ignore=400, body=cls.index_body('en'))
     cls.search.indices.create(settings.OPENSEARCH_UNK_INDEX, ignore=400, body=cls.index_body('unk'))
Example #13
0
 def __init__(self, auth, host='localhost', port=9200, index_name='book'):
     self.fingerprintCreator = FootPrintCreator()
     self.index_name = index_name
     self.client = OpenSearch(
         hosts=[{'host': host, 'port': port}],
         http_compress=True,  # enables gzip compression for request bodies
         http_auth=auth,
         # client_cert = client_cert_path,
         # client_key = client_key_path,
         use_ssl=True,
         verify_certs=False,
         ssl_assert_hostname=False,
         ssl_show_warn=False,
         # ca_certs = ca_certs_path
     )
     if not self.client.indices.exists(index=index_name):
         response = self.client.indices.create(index=index_name, body=bookMapping)
         print('\nCreating index:')
         print(response)
Example #14
0
    def __get_es_client(self):
        if self.auth_type == OpensearchHandler.AuthType.NO_AUTH:
            if self._client is None:
                self._client = OpenSearch(
                    hosts=self.hosts,
                    use_ssl=self.use_ssl,
                    verify_certs=self.verify_certs,
                    connection_class=RequestsHttpConnection,
                    serializer=self.serializer)
            return self._client

        if self.auth_type == OpensearchHandler.AuthType.BASIC_AUTH:
            if self._client is None:
                return OpenSearch(hosts=self.hosts,
                                  http_auth=self.auth_details,
                                  use_ssl=self.use_ssl,
                                  verify_certs=self.verify_certs,
                                  connection_class=RequestsHttpConnection,
                                  serializer=self.serializer)
            return self._client

        if self.auth_type == OpensearchHandler.AuthType.AWS_SIGNED_AUTH:
            if self.aws_session is None:
                raise ValueError(
                    "AWS signed authentication enabled, but session object is None"
                )
            if self._client is None:
                credentials = self.aws_session.get_credentials()
                awsauth = AWS4Auth(credentials.access_key,
                                   credentials.secret_key,
                                   self.aws_session.region_name,
                                   'es',
                                   session_token=credentials.token)
                self._client = OpenSearch(
                    hosts=self.hosts,
                    http_auth=awsauth,
                    use_ssl=self.use_ssl,
                    verify_certs=self.verify_certs,
                    connection_class=RequestsHttpConnection,
                    serializer=self.serializer)
            return self._client

        raise ValueError("Authentication method not supported")
Example #15
0
def test_es_search():
    """Search after running management command to fill ES from data sources."""

    es = OpenSearch([{"host": "localhost", "port": 9200}])

    query = {
        "query": {
            "match": {
                "fi": {
                    "query": "kivist",
                    "fuzziness": "AUTO"
                }
            }
        }
    }

    s = es.search(index="test-index", body=query)

    hits = s["hits"]["total"]["value"]
    assert hits == 1
Example #16
0
def get_search_db_connection(endpoint: str, region_name: str):
    """
    Get a connection to an ElasticSearch or OpenSearch DB
    :param endpoint: cluster endpoint
    :param region_name: cluster region e.g. us-east-1
    """
    from opensearchpy import OpenSearch, RequestsHttpConnection
    from requests_aws4auth import AWS4Auth

    verify_certs = False
    use_ssl = False
    # use ssl?
    if "https://" in endpoint:
        use_ssl = True
        # TODO remove this condition once ssl certs are available for .es.localhost.localstack.cloud domains
        endpoint_netloc = urlparse(endpoint).netloc
        if not re.match(r"^.*(localhost(\.localstack\.cloud)?)(:\d+)?$",
                        endpoint_netloc):
            verify_certs = True

    LOG.debug("Creating ES client with endpoint %s", endpoint)
    if ENV_ACCESS_KEY in os.environ and ENV_SECRET_KEY in os.environ:
        access_key = os.environ.get(ENV_ACCESS_KEY)
        secret_key = os.environ.get(ENV_SECRET_KEY)
        session_token = os.environ.get(ENV_SESSION_TOKEN)
        awsauth = AWS4Auth(access_key,
                           secret_key,
                           region_name,
                           "es",
                           session_token=session_token)
        connection_class = RequestsHttpConnection
        return OpenSearch(
            hosts=[endpoint],
            verify_certs=verify_certs,
            use_ssl=use_ssl,
            connection_class=connection_class,
            http_auth=awsauth,
        )
    return OpenSearch(hosts=[endpoint],
                      verify_certs=verify_certs,
                      use_ssl=use_ssl)
Example #17
0
    def set_connection(self, is_reconnect=False):
        urllib3.disable_warnings()
        logging.captureWarnings(True)

        if self.http_auth:
            opensearch_client = self.get_opensearch_client()

        elif self.use_aws_authentication:
            opensearch_client = self.get_aes_client()
        else:
            opensearch_client = OpenSearch([self.endpoint], verify_certs=True)

        # check connection. check OpenSearch SQL plugin availability.
        try:
            if not self.is_sql_plugin_installed(opensearch_client):
                click.secho(
                    message=
                    "Must have OpenSearch SQL plugin installed in your OpenSearch"
                    "instance!\nCheck this out: https://github.com/opensearch-project/sql",
                    fg="red",
                )
                click.echo(self.plugins)
                sys.exit()

            # info() may throw ConnectionError, if connection fails to establish
            info = opensearch_client.info()
            self.opensearch_version = info["version"]["number"]
            self.client = opensearch_client
            self.get_indices()

        except ConnectionError as error:
            if is_reconnect:
                # re-throw error
                raise error
            else:
                click.secho(message="Can not connect to endpoint %s" %
                            self.endpoint,
                            fg="red")
                click.echo(repr(error))
                sys.exit(0)
def create_es_conn(awsauth, es_hostname):
    es_conn = OpenSearch(hosts=[{
        'host': es_hostname,
        'port': 443
    }],
                         http_auth=awsauth,
                         use_ssl=True,
                         http_compress=True,
                         verify_certs=True,
                         retry_on_timeout=True,
                         connection_class=RequestsHttpConnection,
                         timeout=60)
    return es_conn
Example #19
0
    def get_client(cls):
        """Returns an instantiated OpenSearch object. Caches result and returns cached object if already instantiated.

        If running on Production with the _VCAP_SERVICES environment variable, uses the bound OpenSearch service.
        If running locally, uses the OPENSEARCH_HOST and OPENSEARCH_PORT environment variables.
        """
        if not cls._os_client:
            logger.info("Instantiating OpenSearch client")
            if settings.OPENSEARCH_URI:
                credentials = settings.OPENSEARCH_URI
            else:
                credentials = {"host": settings.OPENSEARCH_HOST, "port": settings.OPENSEARCH_PORT}
            cls._os_client = OpenSearch([credentials])
        return cls._os_client
Example #20
0
    def get_opensearch_client(self):
        ssl_context = self.ssl_context = create_ssl_context()
        ssl_context.check_hostname = False
        ssl_context.verify_mode = ssl.CERT_NONE

        opensearch_client = OpenSearch(
            [self.endpoint],
            http_auth=self.http_auth,
            verify_certs=False,
            ssl_context=ssl_context,
            connection_class=RequestsHttpConnection,
        )

        return opensearch_client
Example #21
0
def get_search_client():

    opensearch_url = settings.OPENSEARCH_HOST
    protocol = settings.OPENSEARCH_PROTOCOL
    protocol_config = {}
    if protocol == "https":
        protocol_config = {
            "scheme": "https",
            "port": 443,
            "use_ssl": True,
            "verify_certs": settings.OPENSEARCH_VERIFY_CERTS,
        }

    if settings.IS_AWS:
        http_auth = ("supersurf", settings.OPENSEARCH_PASSWORD)
    else:
        http_auth = (None, None)

    return OpenSearch([opensearch_url],
                      http_auth=http_auth,
                      connection_class=RequestsHttpConnection,
                      **protocol_config)
def connect(
    host: str,
    port: Optional[int] = 443,
    boto3_session: Optional[boto3.Session] = boto3.Session(),
    region: Optional[str] = None,
    username: Optional[str] = None,
    password: Optional[str] = None,
) -> OpenSearch:
    """Create a secure connection to the specified Amazon OpenSearch domain.

    Note
    ----
    We use `opensearch-py <https://github.com/opensearch-project/opensearch-py>`_, an OpenSearch python client.

    The username and password are mandatory if the OS Cluster uses `Fine Grained Access Control \
<https://docs.aws.amazon.com/opensearch-service/latest/developerguide/fgac.html>`_.
    If fine grained access control is disabled, session access key and secret keys are used.

    Parameters
    ----------
    host : str
        Amazon OpenSearch domain, for example: my-test-domain.us-east-1.es.amazonaws.com.
    port : int
        OpenSearch Service only accepts connections over port 80 (HTTP) or 443 (HTTPS)
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 Session will be used if boto3_session receive None.
    region :
        AWS region of the Amazon OS domain. If not provided will be extracted from boto3_session.
    username :
        Fine-grained access control username. Mandatory if OS Cluster uses Fine Grained Access Control.
    password :
        Fine-grained access control password. Mandatory if OS Cluster uses Fine Grained Access Control.

    Returns
    -------
    opensearchpy.OpenSearch
        OpenSearch low-level client.
        https://github.com/opensearch-project/opensearch-py/blob/main/opensearchpy/client/__init__.py
    """
    valid_ports = {80, 443}

    if port not in valid_ports:
        raise ValueError(f"results: port must be one of {valid_ports}")

    if username and password:
        http_auth = (username, password)
    else:
        if region is None:
            region = _utils.get_region_from_session(
                boto3_session=boto3_session)
        creds = _utils.get_credentials_from_session(
            boto3_session=boto3_session)
        if creds.access_key is None or creds.secret_key is None:
            raise exceptions.InvalidArgument(
                "One of IAM Role or AWS ACCESS_KEY_ID and SECRET_ACCESS_KEY must be "
                "given. Unable to find ACCESS_KEY_ID and SECRET_ACCESS_KEY in boto3 "
                "session.")
        http_auth = AWS4Auth(creds.access_key,
                             creds.secret_key,
                             region,
                             "es",
                             session_token=creds.token)
    try:
        es = OpenSearch(
            host=_strip_endpoint(host),
            port=port,
            http_auth=http_auth,
            use_ssl=True,
            verify_certs=True,
            connection_class=RequestsHttpConnection,
            timeout=30,
            max_retries=10,
            retry_on_timeout=True,
        )
    except Exception as e:
        _logger.error(
            "Error connecting to Opensearch cluster. Please verify authentication details"
        )
        raise e
    return es
Example #23
0
class OpenSearchDataStore(object):
    """Implements the datastore."""

    # Number of events to queue up when bulk inserting events.
    DEFAULT_FLUSH_INTERVAL = 1000
    DEFAULT_SIZE = 100
    DEFAULT_LIMIT = DEFAULT_SIZE  # Max events to return
    DEFAULT_FROM = 0
    DEFAULT_STREAM_LIMIT = 5000  # Max events to return when streaming results

    DEFAULT_FLUSH_RETRY_LIMIT = 3  # Max retries for flushing the queue.
    DEFAULT_EVENT_IMPORT_TIMEOUT = "3m"  # Timeout value for importing events.

    def __init__(self, host="127.0.0.1", port=9200):
        """Create a OpenSearch client."""
        super().__init__()
        self._error_container = {}

        self.user = current_app.config.get("OPENSEARCH_USER", "user")
        self.password = current_app.config.get("OPENSEARCH_PASSWORD", "pass")
        self.ssl = current_app.config.get("OPENSEARCH_SSL", False)
        self.verify = current_app.config.get("OPENSEARCH_VERIFY_CERTS", True)
        self.timeout = current_app.config.get("OPENSEARCH_TIMEOUT", 10)

        parameters = {}
        if self.ssl:
            parameters["use_ssl"] = self.ssl
            parameters["verify_certs"] = self.verify

        if self.user and self.password:
            parameters["http_auth"] = (self.user, self.password)
        if self.timeout:
            parameters["timeout"] = self.timeout

        self.client = OpenSearch([{"host": host, "port": port}], **parameters)

        self.import_counter = Counter()
        self.import_events = []
        self._request_timeout = current_app.config.get(
            "TIMEOUT_FOR_EVENT_IMPORT", self.DEFAULT_EVENT_IMPORT_TIMEOUT)

    @staticmethod
    def _build_labels_query(sketch_id, labels):
        """Build OpenSearch query for Timesketch labels.

        Args:
            sketch_id: Integer of sketch primary key.
            labels: List of label names.

        Returns:
            OpenSearch query as a dictionary.
        """
        label_query = {"bool": {"must": []}}

        for label in labels:
            # Increase metrics counter per label
            METRICS["search_filter_label"].labels(label=label).inc()
            nested_query = {
                "nested": {
                    "query": {
                        "bool": {
                            "must": [
                                {
                                    "term": {
                                        "timesketch_label.name.keyword": label
                                    }
                                },
                                {
                                    "term": {
                                        "timesketch_label.sketch_id": sketch_id
                                    }
                                },
                            ]
                        }
                    },
                    "path": "timesketch_label",
                }
            }
            label_query["bool"]["must"].append(nested_query)
        return label_query

    @staticmethod
    def _build_events_query(events):
        """Build OpenSearch query for one or more document ids.

        Args:
            events: List of OpenSearch document IDs.

        Returns:
            OpenSearch query as a dictionary.
        """
        events_list = [event["event_id"] for event in events]
        query_dict = {"query": {"ids": {"values": events_list}}}
        return query_dict

    @staticmethod
    def _build_query_dsl(query_dsl, timeline_ids):
        """Build OpenSearch Search DSL query by adding in timeline filtering.

        Args:
            query_dsl: A dict with the current query_dsl
            timeline_ids: Either a list of timeline IDs (int) or None.

        Returns:
            OpenSearch query DSL as a dictionary.
        """
        # Remove any aggregation coming from user supplied Query DSL.
        # We have no way to display this data in a good way today.
        if query_dsl.get("aggregations", None):
            del query_dsl["aggregations"]

        if not timeline_ids:
            return query_dsl

        if not isinstance(timeline_ids, (list, tuple)):
            es_logger.error(
                "Attempting to pass in timelines to a query DSL, but the "
                "passed timelines are not a list.")
            return query_dsl

        if not all([isinstance(x, int) for x in timeline_ids]):
            es_logger.error("All timeline IDs need to be an integer.")
            return query_dsl

        old_query = query_dsl.get("query")
        if not old_query:
            return query_dsl

        query_dsl["query"] = {
            "bool": {
                "must": [],
                "should": [
                    {
                        "bool": {
                            "must":
                            old_query,
                            "must_not": [{
                                "exists": {
                                    "field": "__ts_timeline_id"
                                },
                            }],
                        }
                    },
                    {
                        "bool": {
                            "must": [
                                {
                                    "terms": {
                                        "__ts_timeline_id": timeline_ids
                                    }
                                },
                                old_query,
                            ],
                            "must_not": [],
                            "filter": [{
                                "exists": {
                                    "field": "__ts_timeline_id"
                                }
                            }],
                        }
                    },
                ],
                "must_not": [],
                "filter": [],
            }
        }
        return query_dsl

    @staticmethod
    def _convert_to_time_range(interval):
        """Convert an interval timestamp into start and end dates.

        Args:
            interval: Time frame representation

        Returns:
            Start timestamp in string format.
            End timestamp in string format.
        """
        # return ('2018-12-05T00:00:00', '2018-12-05T23:59:59')
        TS_FORMAT = "%Y-%m-%dT%H:%M:%S"
        get_digits = lambda s: int("".join(filter(str.isdigit, s)))
        get_alpha = lambda s: "".join(filter(str.isalpha, s))

        ts_parts = interval.split(" ")
        # The start date could be 1 or 2 first items
        start = " ".join(ts_parts[0:len(ts_parts) - 2])
        minus = get_digits(ts_parts[-2])
        plus = get_digits(ts_parts[-1])
        interval = get_alpha(ts_parts[-1])

        start_ts = parser.parse(start)

        rd = relativedelta.relativedelta
        if interval == "s":
            start_range = start_ts - rd(seconds=minus)
            end_range = start_ts + rd(seconds=plus)
        elif interval == "m":
            start_range = start_ts - rd(minutes=minus)
            end_range = start_ts + rd(minutes=plus)
        elif interval == "h":
            start_range = start_ts - rd(hours=minus)
            end_range = start_ts + rd(hours=plus)
        elif interval == "d":
            start_range = start_ts - rd(days=minus)
            end_range = start_ts + rd(days=plus)
        else:
            raise RuntimeError("Unable to parse the timestamp: " +
                               str(interval))

        return start_range.strftime(TS_FORMAT), end_range.strftime(TS_FORMAT)

    def build_query(
        self,
        sketch_id,
        query_string,
        query_filter,
        query_dsl=None,
        aggregations=None,
        timeline_ids=None,
    ):
        """Build OpenSearch DSL query.

        Args:
            sketch_id: Integer of sketch primary key
            query_string: Query string
            query_filter: Dictionary containing filters to apply
            query_dsl: Dictionary containing OpenSearch DSL query
            aggregations: Dict of OpenSearch aggregations
            timeline_ids: Optional list of IDs of Timeline objects that should
                be queried as part of the search.

        Returns:
            OpenSearch DSL query as a dictionary
        """

        if query_dsl:
            if not isinstance(query_dsl, dict):
                query_dsl = json.loads(query_dsl)

            if not query_dsl:
                query_dsl = {}

            return self._build_query_dsl(query_dsl, timeline_ids)

        if query_filter.get("events", None):
            events = query_filter["events"]
            return self._build_events_query(events)

        query_dsl = {
            "query": {
                "bool": {
                    "must": [],
                    "must_not": [],
                    "filter": []
                }
            }
        }

        if query_string:
            query_dsl["query"]["bool"]["must"].append({
                "query_string": {
                    "query": query_string,
                    "default_operator": "AND"
                }
            })

        # New UI filters
        if query_filter.get("chips", None):
            labels = []
            must_filters = query_dsl["query"]["bool"]["must"]
            must_not_filters = query_dsl["query"]["bool"]["must_not"]
            datetime_ranges = {
                "bool": {
                    "should": [],
                    "minimum_should_match": 1
                }
            }

            for chip in query_filter["chips"]:
                # Exclude chips that the user disabled
                if not chip.get("active", True):
                    continue

                # Increase metrics per chip type
                METRICS["search_filter_type"].labels(type=chip["type"]).inc()
                if chip["type"] == "label":
                    labels.append(chip["value"])

                elif chip["type"] == "term":
                    term_filter = {
                        "match_phrase": {
                            "{}".format(chip["field"]): {
                                "query": "{}".format(chip["value"])
                            }
                        }
                    }

                    if chip["operator"] == "must":
                        must_filters.append(term_filter)

                    elif chip["operator"] == "must_not":
                        must_not_filters.append(term_filter)

                elif chip["type"].startswith("datetime"):
                    range_filter = lambda start, end: {
                        "range": {
                            "datetime": {
                                "gte": start,
                                "lte": end
                            }
                        }
                    }
                    if chip["type"] == "datetime_range":
                        start, end = chip["value"].split(",")
                    elif chip["type"] == "datetime_interval":
                        start, end = self._convert_to_time_range(chip["value"])
                    else:
                        continue
                    datetime_ranges["bool"]["should"].append(
                        range_filter(start, end))

            label_filter = self._build_labels_query(sketch_id, labels)
            must_filters.append(label_filter)
            must_filters.append(datetime_ranges)

        # Pagination
        if query_filter.get("from", None):
            query_dsl["from"] = query_filter["from"]

        # Number of events to return
        if query_filter.get("size", None):
            query_dsl["size"] = query_filter["size"]

        # Make sure we are sorting.
        if not query_dsl.get("sort", None):
            query_dsl["sort"] = {"datetime": query_filter.get("order", "asc")}

        # Add any pre defined aggregations
        if aggregations:
            # post_filter happens after aggregation so we need to move the
            # filter to the query instead.
            if query_dsl.get("post_filter", None):
                query_dsl["query"]["bool"]["filter"] = query_dsl["post_filter"]
                query_dsl.pop("post_filter", None)
            query_dsl["aggregations"] = aggregations

        # TODO: Simplify this when we don't have to support both timelines
        # that have __ts_timeline_id set and those that don't.
        # (query_string AND timeline_id NOT EXISTS) OR (
        #       query_string AND timeline_id in LIST)
        if timeline_ids and isinstance(timeline_ids, (list, tuple)):
            must_filters_pre = copy.copy(query_dsl["query"]["bool"]["must"])
            must_not_filters_pre = copy.copy(
                query_dsl["query"]["bool"]["must_not"])

            must_filters_post = copy.copy(query_dsl["query"]["bool"]["must"])
            must_not_filters_post = copy.copy(
                query_dsl["query"]["bool"]["must_not"])

            must_not_filters_pre.append({
                "exists": {
                    "field": "__ts_timeline_id"
                },
            })

            must_filters_post.append(
                {"terms": {
                    "__ts_timeline_id": timeline_ids
                }})

            query_dsl["query"] = {
                "bool": {
                    "must": [],
                    "should": [
                        {
                            "bool": {
                                "must": must_filters_pre,
                                "must_not": must_not_filters_pre,
                            }
                        },
                        {
                            "bool": {
                                "must":
                                must_filters_post,
                                "must_not":
                                must_not_filters_post,
                                "filter": [{
                                    "exists": {
                                        "field": "__ts_timeline_id"
                                    }
                                }],
                            }
                        },
                    ],
                    "must_not": [],
                    "filter": [],
                }
            }

        return query_dsl

    # pylint: disable=too-many-arguments
    def search(
        self,
        sketch_id,
        query_string,
        query_filter,
        query_dsl,
        indices,
        count=False,
        aggregations=None,
        return_fields=None,
        enable_scroll=False,
        timeline_ids=None,
    ):
        """Search OpenSearch. This will take a query string from the UI
        together with a filter definition. Based on this it will execute the
        search request on OpenSearch and get result back.

        Args:
            sketch_id: Integer of sketch primary key
            query_string: Query string
            query_filter: Dictionary containing filters to apply
            query_dsl: Dictionary containing OpenSearch DSL query
            indices: List of indices to query
            count: Boolean indicating if we should only return result count
            aggregations: Dict of OpenSearch aggregations
            return_fields: List of fields to return
            enable_scroll: If OpenSearch scroll API should be used
            timeline_ids: Optional list of IDs of Timeline objects that should
                be queried as part of the search.

        Returns:
            Set of event documents in JSON format
        """
        scroll_timeout = None
        if enable_scroll:
            scroll_timeout = "1m"  # Default to 1 minute scroll timeout

        # Exit early if we have no indices to query
        if not indices:
            return {"hits": {"hits": [], "total": 0}, "took": 0}

        # Make sure that the list of index names is uniq.
        indices = list(set(indices))

        # Check if we have specific events to fetch and get indices.
        if query_filter.get("events", None):
            indices = {
                event["index"]
                for event in query_filter["events"]
                if event["index"] in indices
            }

        query_dsl = self.build_query(
            sketch_id=sketch_id,
            query_string=query_string,
            query_filter=query_filter,
            query_dsl=query_dsl,
            aggregations=aggregations,
            timeline_ids=timeline_ids,
        )

        # Default search type for OpenSearch is query_then_fetch.
        search_type = "query_then_fetch"

        # Only return how many documents matches the query.
        if count:
            if "sort" in query_dsl:
                del query_dsl["sort"]
            try:
                count_result = self.client.count(body=query_dsl,
                                                 index=list(indices))
            except NotFoundError:
                es_logger.error(
                    "Unable to count due to an index not found: {0:s}".format(
                        ",".join(indices)))
                return 0
            METRICS["search_requests"].labels(type="count").inc()
            return count_result.get("count", 0)

        if not return_fields:
            # Suppress the lint error because opensearchpy adds parameters
            # to the function with a decorator and this makes pylint sad.
            # pylint: disable=unexpected-keyword-arg
            return self.client.search(
                body=query_dsl,
                index=list(indices),
                search_type=search_type,
                scroll=scroll_timeout,
            )

        # The argument " _source_include" changed to "_source_includes" in
        # ES version 7. This check add support for both version 6 and 7 clients.
        # pylint: disable=unexpected-keyword-arg
        try:
            if self.version.startswith("6"):
                _search_result = self.client.search(
                    body=query_dsl,
                    index=list(indices),
                    search_type=search_type,
                    _source_include=return_fields,
                    scroll=scroll_timeout,
                )
            else:
                _search_result = self.client.search(
                    body=query_dsl,
                    index=list(indices),
                    search_type=search_type,
                    _source_includes=return_fields,
                    scroll=scroll_timeout,
                )
        except RequestError as e:
            root_cause = e.info.get("error", {}).get("root_cause")
            if root_cause:
                error_items = []
                for cause in root_cause:
                    error_items.append("[{0:s}] {1:s}".format(
                        cause.get("type", ""), cause.get("reason", "")))
                cause = ", ".join(error_items)
            else:
                cause = str(e)

            es_logger.error("Unable to run search query: {0:s}".format(cause),
                            exc_info=True)
            raise ValueError(cause) from e

        METRICS["search_requests"].labels(type="single").inc()
        return _search_result

    # pylint: disable=too-many-arguments
    def search_stream(
        self,
        sketch_id=None,
        query_string=None,
        query_filter=None,
        query_dsl=None,
        indices=None,
        return_fields=None,
        enable_scroll=True,
        timeline_ids=None,
    ):
        """Search OpenSearch. This will take a query string from the UI
        together with a filter definition. Based on this it will execute the
        search request on OpenSearch and get result back.

        Args :
            sketch_id: Integer of sketch primary key
            query_string: Query string
            query_filter: Dictionary containing filters to apply
            query_dsl: Dictionary containing OpenSearch DSL query
            indices: List of indices to query
            return_fields: List of fields to return
            enable_scroll: Boolean determining whether scrolling is enabled.
            timeline_ids: Optional list of IDs of Timeline objects that should
                be queried as part of the search.

        Returns:
            Generator of event documents in JSON format
        """
        # Make sure that the list of index names is uniq.
        indices = list(set(indices))

        METRICS["search_requests"].labels(type="stream").inc()

        if not query_filter.get("size"):
            query_filter["size"] = self.DEFAULT_STREAM_LIMIT

        if not query_filter.get("terminate_after"):
            query_filter["terminate_after"] = self.DEFAULT_STREAM_LIMIT

        result = self.search(
            sketch_id=sketch_id,
            query_string=query_string,
            query_dsl=query_dsl,
            query_filter=query_filter,
            indices=indices,
            return_fields=return_fields,
            enable_scroll=enable_scroll,
            timeline_ids=timeline_ids,
        )

        if enable_scroll:
            scroll_id = result["_scroll_id"]
            scroll_size = result["hits"]["total"]
        else:
            scroll_id = None
            scroll_size = 0

        # Elasticsearch version 7.x returns total hits as a dictionary.
        # TODO: Refactor when version 6.x has been deprecated.
        if isinstance(scroll_size, dict):
            scroll_size = scroll_size.get("value", 0)

        for event in result["hits"]["hits"]:
            yield event

        while scroll_size > 0:
            # pylint: disable=unexpected-keyword-arg
            result = self.client.scroll(scroll_id=scroll_id, scroll="5m")
            scroll_id = result["_scroll_id"]
            scroll_size = len(result["hits"]["hits"])
            for event in result["hits"]["hits"]:
                yield event

    def get_filter_labels(self, sketch_id, indices):
        """Aggregate labels for a sketch.

        Args:
            sketch_id: The Sketch ID
            indices: List of indices to aggregate on

        Returns:
            List with label names.
        """
        # This is a workaround to return all labels by setting the max buckets
        # to something big. If a sketch has more than this amount of labels
        # the list will be incomplete but it should be uncommon to have >10k
        # labels in a sketch.
        max_labels = 10000

        # pylint: disable=line-too-long
        aggregation = {
            "aggs": {
                "nested": {
                    "nested": {
                        "path": "timesketch_label"
                    },
                    "aggs": {
                        "inner": {
                            "filter": {
                                "bool": {
                                    "must": [{
                                        "term": {
                                            "timesketch_label.sketch_id":
                                            sketch_id
                                        }
                                    }]
                                }
                            },
                            "aggs": {
                                "labels": {
                                    "terms": {
                                        "size": max_labels,
                                        "field":
                                        "timesketch_label.name.keyword",
                                    }
                                }
                            },
                        }
                    },
                }
            }
        }

        # Make sure that the list of index names is uniq.
        indices = list(set(indices))

        labels = []
        # pylint: disable=unexpected-keyword-arg
        try:
            result = self.client.search(index=indices,
                                        body=aggregation,
                                        size=0)
        except NotFoundError:
            es_logger.error("Unable to find the index/indices: {0:s}".format(
                ",".join(indices)))
            return labels

        buckets = (result.get("aggregations",
                              {}).get("nested",
                                      {}).get("inner",
                                              {}).get("labels",
                                                      {}).get("buckets", []))

        for bucket in buckets:
            new_bucket = {}
            new_bucket["label"] = bucket.pop("key")
            new_bucket["count"] = bucket.pop("doc_count")
            labels.append(new_bucket)
        return labels

    # pylint: disable=inconsistent-return-statements
    def get_event(self, searchindex_id, event_id):
        """Get one event from the datastore.

        Args:
            searchindex_id: String of OpenSearch index id
            event_id: String of OpenSearch event id

        Returns:
            Event document in JSON format
        """
        METRICS["search_get_event"].inc()
        try:
            # Suppress the lint error because opensearchpy adds parameters
            # to the function with a decorator and this makes pylint sad.
            # pylint: disable=unexpected-keyword-arg
            if self.version.startswith("6"):
                event = self.client.get(
                    index=searchindex_id,
                    id=event_id,
                    doc_type="_all",
                    _source_exclude=["timesketch_label"],
                )
            else:
                event = self.client.get(
                    index=searchindex_id,
                    id=event_id,
                    doc_type="_all",
                    _source_excludes=["timesketch_label"],
                )

            return event

        except NotFoundError:
            abort(HTTP_STATUS_CODE_NOT_FOUND)

    def count(self, indices):
        """Count number of documents.

        Args:
            indices: List of indices.

        Returns:
            Tuple containing number of documents and size on disk.
        """
        if not indices:
            return 0, 0

        # Make sure that the list of index names is uniq.
        indices = list(set(indices))

        try:
            es_stats = self.client.indices.stats(index=indices,
                                                 metric="docs, store")

        except NotFoundError:
            es_logger.error("Unable to count indices (index not found)")
            return 0, 0

        except RequestError:
            es_logger.error("Unable to count indices (request error)",
                            exc_info=True)
            return 0, 0

        doc_count_total = (es_stats.get("_all",
                                        {}).get("primaries",
                                                {}).get("docs",
                                                        {}).get("count", 0))
        doc_bytes_total = (es_stats.get("_all", {}).get("primaries", {}).get(
            "store", {}).get("size_in_bytes", 0))

        return doc_count_total, doc_bytes_total

    def set_label(
        self,
        searchindex_id,
        event_id,
        event_type,
        sketch_id,
        user_id,
        label,
        toggle=False,
        remove=False,
        single_update=True,
    ):
        """Set label on event in the datastore.

        Args:
            searchindex_id: String of OpenSearch index id
            event_id: String of OpenSearch event id
            event_type: String of OpenSearch document type
            sketch_id: Integer of sketch primary key
            user_id: Integer of user primary key
            label: String with the name of the label
            remove: Optional boolean value if the label should be removed
            toggle: Optional boolean value if the label should be toggled
            single_update: Boolean if the label should be indexed immediately.

        Returns:
            Dict with updated document body, or None if this is a single update.
        """
        # OpenSearch painless script.
        update_body = {
            "script": {
                "lang": "painless",
                "source": UPDATE_LABEL_SCRIPT,
                "params": {
                    "timesketch_label": {
                        "name": str(label),
                        "user_id": user_id,
                        "sketch_id": sketch_id,
                    },
                    remove: remove,
                },
            }
        }

        if toggle:
            update_body["script"]["source"] = TOGGLE_LABEL_SCRIPT

        if not single_update:
            script = update_body["script"]
            return dict(source=script["source"],
                        lang=script["lang"],
                        params=script["params"])

        doc = self.client.get(index=searchindex_id,
                              id=event_id,
                              doc_type="_all")
        try:
            doc["_source"]["timesketch_label"]
        except KeyError:
            doc = {"doc": {"timesketch_label": []}}
            self.client.update(index=searchindex_id,
                               doc_type=event_type,
                               id=event_id,
                               body=doc)

        self.client.update(index=searchindex_id,
                           id=event_id,
                           doc_type=event_type,
                           body=update_body)

        return None

    def create_index(self,
                     index_name=uuid4().hex,
                     doc_type="generic_event",
                     mappings=None):
        """Create index with Timesketch settings.

        Args:
            index_name: Name of the index. Default is a generated UUID.
            doc_type: Name of the document type. Default id generic_event.
            mappings: Optional dict with the document mapping for OpenSearch.

        Returns:
            Index name in string format.
            Document type in string format.
        """
        if mappings:
            _document_mapping = mappings
        else:
            _document_mapping = {
                "properties": {
                    "timesketch_label": {
                        "type": "nested"
                    },
                    "datetime": {
                        "type": "date"
                    },
                }
            }

        # TODO: Remove when we deprecate OpenSearch version 6.x
        if self.version.startswith("6"):
            _document_mapping = {doc_type: _document_mapping}

        if not self.client.indices.exists(index_name):
            try:
                self.client.indices.create(
                    index=index_name, body={"mappings": _document_mapping})
            except ConnectionError as e:
                raise RuntimeError(
                    "Unable to connect to Timesketch backend.") from e
            except RequestError:
                index_exists = self.client.indices.exists(index_name)
                es_logger.warning(
                    "Attempting to create an index that already exists "
                    "({0:s} - {1:s})".format(index_name, str(index_exists)))

        return index_name, doc_type

    def delete_index(self, index_name):
        """Delete OpenSearch index.

        Args:
            index_name: Name of the index to delete.
        """
        if self.client.indices.exists(index_name):
            try:
                self.client.indices.delete(index=index_name)
            except ConnectionError as e:
                raise RuntimeError(
                    "Unable to connect to Timesketch backend: {}".format(
                        e)) from e

    def import_event(
        self,
        index_name,
        event_type,
        event=None,
        event_id=None,
        flush_interval=DEFAULT_FLUSH_INTERVAL,
        timeline_id=None,
    ):
        """Add event to OpenSearch.

        Args:
            index_name: Name of the index in OpenSearch
            event_type: Type of event (e.g. plaso_event)
            event: Event dictionary
            event_id: Event OpenSearch ID
            flush_interval: Number of events to queue up before indexing
            timeline_id: Optional ID number of a Timeline object this event
                belongs to. If supplied an additional field will be added to
                the store indicating the timeline this belongs to.
        """
        if event:
            for k, v in event.items():
                if not isinstance(k, six.text_type):
                    k = codecs.decode(k, "utf8")

                # Make sure we have decoded strings in the event dict.
                if isinstance(v, six.binary_type):
                    v = codecs.decode(v, "utf8")

                event[k] = v

            # Header needed by OpenSearch when bulk inserting.
            header = {
                "index": {
                    "_index": index_name,
                }
            }
            update_header = {"update": {"_index": index_name, "_id": event_id}}

            # TODO: Remove when we deprecate Elasticsearch version 6.x
            if self.version.startswith("6"):
                header["index"]["_type"] = event_type
                update_header["update"]["_type"] = event_type

            if event_id:
                # Event has "lang" defined if there is a script used for import.
                if event.get("lang"):
                    event = {"script": event}
                else:
                    event = {"doc": event}
                header = update_header

            if timeline_id:
                event["__ts_timeline_id"] = timeline_id

            self.import_events.append(header)
            self.import_events.append(event)
            self.import_counter["events"] += 1

            if self.import_counter["events"] % int(flush_interval) == 0:
                _ = self.flush_queued_events()
                self.import_events = []
        else:
            # Import the remaining events in the queue.
            if self.import_events:
                _ = self.flush_queued_events()

        return self.import_counter["events"]

    def flush_queued_events(self, retry_count=0):
        """Flush all queued events.

        Returns:
            dict: A dict object that contains the number of events
                that were sent to OpenSearch as well as information
                on whether there were any errors, and what the
                details of these errors if any.
            retry_count: optional int indicating whether this is a retry.
        """
        if not self.import_events:
            return {}

        return_dict = {
            "number_of_events": len(self.import_events) / 2,
            "total_events": self.import_counter["events"],
        }

        try:
            # pylint: disable=unexpected-keyword-arg
            results = self.client.bulk(body=self.import_events,
                                       timeout=self._request_timeout)
        except (ConnectionTimeout, socket.timeout):
            if retry_count >= self.DEFAULT_FLUSH_RETRY_LIMIT:
                es_logger.error("Unable to add events, reached recount max.",
                                exc_info=True)
                return {}

            es_logger.error("Unable to add events (retry {0:d}/{1:d})".format(
                retry_count, self.DEFAULT_FLUSH_RETRY_LIMIT))
            return self.flush_queued_events(retry_count + 1)

        errors_in_upload = results.get("errors", False)
        return_dict["errors_in_upload"] = errors_in_upload

        if errors_in_upload:
            items = results.get("items", [])
            return_dict["errors"] = []

            es_logger.error("Errors while attempting to upload events.")
            for item in items:
                index = item.get("index", {})
                index_name = index.get("_index", "N/A")

                _ = self._error_container.setdefault(index_name, {
                    "errors": [],
                    "types": Counter(),
                    "details": Counter()
                })

                error_counter = self._error_container[index_name]["types"]
                error_detail_counter = self._error_container[index_name][
                    "details"]
                error_list = self._error_container[index_name]["errors"]

                error = index.get("error", {})
                status_code = index.get("status", 0)
                doc_id = index.get("_id", "(unable to get doc id)")
                caused_by = error.get("caused_by", {})

                caused_reason = caused_by.get("reason",
                                              "Unkown Detailed Reason")

                error_counter[error.get("type")] += 1
                detail_msg = "{0:s}/{1:s}".format(
                    caused_by.get("type", "Unknown Detailed Type"),
                    " ".join(caused_reason.split()[:5]),
                )
                error_detail_counter[detail_msg] += 1

                error_msg = "<{0:s}> {1:s} [{2:s}/{3:s}]".format(
                    error.get("type", "Unknown Type"),
                    error.get("reason", "No reason given"),
                    caused_by.get("type", "Unknown Type"),
                    caused_reason,
                )
                error_list.append(error_msg)
                try:
                    es_logger.error(
                        "Unable to upload document: {0:s} to index {1:s} - "
                        "[{2:d}] {3:s}".format(doc_id, index_name, status_code,
                                               error_msg))
                # We need to catch all exceptions here, since this is a crucial
                # call that we do not want to break operation.
                except Exception:  # pylint: disable=broad-except
                    es_logger.error(
                        "Unable to upload document, and unable to log the "
                        "error itself.",
                        exc_info=True,
                    )

        return_dict["error_container"] = self._error_container

        self.import_events = []
        return return_dict

    @property
    def version(self):
        """Get OpenSearch version.

        Returns:
          Version number as a string.
        """
        version_info = self.client.info().get("version")
        return version_info.get("number")
Example #24
0
from opensearchpy import OpenSearch, NotFoundError

import json
import os
import pprint

INDEX_DOES_NOT_EXIST = 'index_not_found_exception'

search_domain_scheme = os.environ.get('OPENSEARCH_DOMAIN_SCHEME', 'https')
search_domain_host = os.environ['OPENSEARCH_DOMAIN_HOST']
search_domain_port = os.environ.get('OPENSEARCH_DOMAIN_PORT', 443)
INDEX_PRODUCTS = 'products'

search_client = OpenSearch(
    [search_domain_host],
    scheme=search_domain_scheme,
    port=search_domain_port,
)


# -- Logging
class LoggingMiddleware(object):
    def __init__(self, app):
        self._app = app

    def __call__(self, environ, resp):
        errorlog = environ['wsgi.errors']
        pprint.pprint(('REQUEST', environ), stream=errorlog)

        def log_response(status, headers, *args):
            pprint.pprint(('RESPONSE', status, headers), stream=errorlog)
Example #25
0
if __name__ == '__main__':
    parser = ArgumentParser(
        description=
        'import data from microcontroller_and_processors-2020-08-14.xlsx')
    parser.add_argument(
        'infile',
        default='microcontroller_and_processors-2020-08-14.xlsx',
        help='input file')
    parser.add_argument('--tryout',
                        '-t',
                        action='store_true',
                        help='output data instead of writing')
    args = parser.parse_args()

    df = pd.read_excel(args.infile)
    df_obj = df.select_dtypes(['object'])
    df[df_obj.columns] = df_obj.apply(clean_strings)
    df = df.T
    data = [clean_dict(df[d].to_dict()) for d in df]
    if args.tryout:
        from pprint import pprint
        pprint(data)
    else:
        ops = [{
            '_op_type': 'index',
            '_index': 'mcs',
            '_type': 'document',
            '_source': set_vendor(m)
        } for m in data]
        osbulk(OpenSearch(**OPENSEARCH_PARAMS), ops)
Example #26
0
# -*- coding: utf-8 -*-
import argparse

from opensearchpy import OpenSearch

from salver.common.facts import all_facts

client = OpenSearch(hosts=[{
    "host": "localhost",
    "port": 9200
}],
                    http_auth=('admin', 'admin'),
                    use_ssl=True,
                    verify_certs=False)
replicas = 0
refresh_interval = "5s"


def create_es_mappings():
    for fact, body in all_facts.items():
        index_name = f"salver-facts-{fact.lower()}-*"
        mapping = body.elastic_mapping()

        template = {
            "settings": {
                "index": {
                    "number_of_shards": 2,
                    "number_of_replicas": 1
                }
            },
            **mapping
Example #27
0
 def __init__(self, args):
     self.args = args
     self.filenames = set()
     self.osearch = OpenSearch(**OPENSEARCH_PARAMS)
     self._walk()
     self._read_files()
Example #28
0
class OpenSearchDataStore():
    """Implements the datastore."""

    # Number of events to queue up when bulk inserting events.
    DEFAULT_FLUSH_INTERVAL = 20000
    DEFAULT_SIZE = 1000  # Max events to return

    def __init__(self, host='127.0.0.1', port=9200, url=None):
        """Create an OpenSearch client."""
        super().__init__()
        if url:
            self.client = OpenSearch([url], timeout=30)
        else:
            self.client = OpenSearch([{
                'host': host,
                'port': port
            }],
                                     timeout=30)
        self.import_counter = collections.Counter()
        self.import_events = []

    @staticmethod
    def build_query(query_string):
        """Build OpenSearch DSL query.

    Args:
      query_string: Query string

    Returns:
      OpenSearch DSL query as a dictionary
    """

        query_dsl = {
            'query': {
                'bool': {
                    'must': [{
                        'query_string': {
                            'query': query_string
                        }
                    }]
                }
            }
        }

        return query_dsl

    def create_index(self, index_name):
        """Create an index.

    Args:
      index_name: Name of the index

    Returns:
      Index name in string format.
    """
        if not self.client.indices.exists(index_name):
            try:
                self.client.indices.create(index=index_name)
            except exceptions.ConnectionError as e:
                raise RuntimeError(
                    'Unable to connect to backend datastore.') from e

        return index_name

    def delete_index(self, index_name):
        """Delete OpenSearch index.

    Args:
      index_name: Name of the index to delete.
    """
        if self.client.indices.exists(index_name):
            try:
                self.client.indices.delete(index=index_name)
            except exceptions.ConnectionError as e:
                raise RuntimeError(
                    'Unable to connect to backend datastore.') from e

    def import_event(self,
                     index_name,
                     event=None,
                     flush_interval=DEFAULT_FLUSH_INTERVAL):
        """Add event to OpenSearch.

    Args:
      index_name: Name of the index in OpenSearch
      event: Event dictionary
      flush_interval: Number of events to queue up before indexing

    Returns:
      The number of events processed.
    """
        if event:
            # Header needed by OpenSearch when bulk inserting.
            header = {'index': {'_index': index_name}}

            self.import_events.append(header)
            self.import_events.append(event)
            self.import_counter['events'] += 1

            if self.import_counter['events'] % int(flush_interval) == 0:
                self.client.bulk(body=self.import_events)
                self.import_events = []
        else:
            # Import the remaining events in the queue.
            if self.import_events:
                self.client.bulk(body=self.import_events)

        return self.import_counter['events']

    def index_exists(self, index_name):
        """Check if an index already exists.

    Args:
      index_name: Name of the index

    Returns:
      True if the index exists, False if not.
    """
        return self.client.indices.exists(index_name)

    def search(self, index_id, query_string, size=DEFAULT_SIZE):
        """Search OpenSearch.

    This will take a query string from the UI together with a filter definition.
    Based on this it will execute the search request on OpenSearch and get the
    result back.

    Args:
      index_id: Index to be searched
      query_string: Query string
      size: Maximum number of results to return

    Returns:
      Set of event documents in JSON format
    """

        query_dsl = self.build_query(query_string)

        # Default search type for OpenSearch is query_then_fetch.
        search_type = 'query_then_fetch'

        # pylint: disable=unexpected-keyword-arg
        return self.client.search(body=query_dsl,
                                  index=index_id,
                                  size=size,
                                  search_type=search_type)
def _get_distribution(client: OpenSearch) -> Any:
    return client.info().get("version", {}).get("distribution",
                                                "elasticsearch")
def _get_version(client: OpenSearch) -> Any:
    return client.info().get("version", {}).get("number")