def __init__(
     self,
     ingest_token,
     base_url="http://localhost:3000",
 ):
     """
     :param ingest_token: Ingest token to access ingest.
     :type ingest_token: string
     :param base_url: Url of Humio instance.
     :type base_url: string
     """
     super().__init__(base_url)
     self.ingest_token = ingest_token
     self.webcaller = WebCaller(self.base_url)
Ejemplo n.º 2
0
 def __init__(self, query_id, base_url, repository, user_token):
     """
     Parameters:
     query_id (string): Id of queryjob
     base_url (string): Url of Humio instance
     repository (string): Repository being queried
     user_token (string): Token used to access resource
     """
     self.query_id = query_id
     self.segment_is_done = False
     self.segment_is_cancelled = False
     self.more_segments_can_be_polled = True
     self.time_at_last_poll = 0
     self.wait_time_until_next_poll = 0
     self.base_url = base_url
     self.repository = repository
     self.user_token = user_token
     self.webcaller = WebCaller(self.base_url)
class HumioClient(BaseHumioClient):
    """
    A Humio client that gives full access to the underlying API.
    While this client can be used for ingesting data,
    we recommend using the HumioIngestClient made exclusivly for ingestion.
    """
    def __init__(
        self,
        repository,
        user_token,
        base_url="http://localhost:3000",
    ):
        """
        :param repository: Repository associated with client
        :type repository: str
        :param user_token: User token to get access to repository
        :type user_token: str
        :param base_url: Url of Humio instance
        :type repository: str
        """
        super().__init__(base_url)
        self.repository = repository
        self.user_token = user_token

    @property
    def _default_user_headers(self):
        """ 
        :return: Default headers used for web requests
        :rtype: dict
        """
        return {
            "Content-Type": "application/json",
            "Authorization": "Bearer {}".format(self.user_token),
        }

    @property
    def _state(self):
        """
        :return: State of all field variables
        :rtype: dict
        """
        return json.dumps({
            "user_token": self.user_token,
            "repository": self.repository,
            "base_url": self.base_url,
        })

    def _streaming_query(self,
                         query_string,
                         start=None,
                         end=None,
                         is_live=None,
                         timezone_offset_minutes=None,
                         arguments=None,
                         raw_data=None,
                         media_type="application/x-ndjson",
                         **kwargs):
        """
        Method wrapped by streaming_query to perform a Humio streaming Query.

        :return: An iterable that contains query results from stream as raw strings
        :rtype: Webstreamer
        """

        if raw_data is None:
            raw_data = {}

        endpoint = "dataspaces/{}/query".format(self.repository)

        headers = self._default_user_headers
        headers["Accept"] = media_type
        headers.update(kwargs.pop("headers", {}))

        data = dict((k, v) for k, v in [
            ("queryString", query_string),
            ("start", start),
            ("end", end),
            ("isLive", is_live),
            ("timeZoneOffsetMinutes", timezone_offset_minutes),
            ("arguments", arguments),
        ] if v is not None)

        data.update(raw_data)

        connection = self.webcaller.call_rest("post",
                                              endpoint,
                                              data=json.dumps(data),
                                              headers=headers,
                                              stream=True,
                                              **kwargs)

        return WebStreamer(connection)

    # Wrap method to be pythonic
    def streaming_query(self,
                        query_string,
                        start=None,
                        end=None,
                        is_live=None,
                        timezone_offset_minutes=None,
                        arguments=None,
                        raw_data=None,
                        **kwargs):
        """
        Humio Query type that opens up a streaming socket connection to Humio.
        This is the preferred way to do static queries with large result sizes.
        It can be used for live queries, but not that if data is not passed back from
        Humio for a while, the connection will be lost, resulting in an error.

        :param query_string: Humio query
        :type query_string: str
        :param start: Starting time of query
        :type start: str, optional
        :param end: Ending time of query
        :type end: str, optional
        :param is_live: Ending time of query
        :type is_live: bool, optional
        :param is_live: Timezone offset in minutes
        :type is_live: int, optional
        :param argument: Arguments specified in query
        :type argument: dict(string->string), optional
        :param raw_data: Additional arguments to add to POST body under other keys
        :type raw_data: dict(string->string), optional

        :return: A generator that returns query results as python objects
        :rtype: Generator
        """

        media_type = "application/x-ndjson"
        encoding = "utf-8"

        res = self._streaming_query(
            query_string=query_string,
            start=start,
            end=end,
            is_live=is_live,
            timezone_offset_minutes=timezone_offset_minutes,
            arguments=arguments,
            media_type=media_type,
            raw_data=raw_data,
            **kwargs)

        for event in res:
            yield json.loads(event.decode(encoding))

    def create_queryjob(self,
                        query_string,
                        start=None,
                        end=None,
                        is_live=None,
                        timezone_offset_minutes=None,
                        arguments=None,
                        raw_data=None,
                        **kwargs):
        """
        Creates a queryjob on Humio, which executes asynchronously of the calling code.
        The returned QueryJob instance can be used to get the query results at a later time.
        Queryjobs are good to use for live queries, or static queries that return smaller
        amounts of data. 

        :param query_string: Humio query
        :type query_string: str
        :param start: Starting time of query
        :type start: str, optional
        :param end: Ending time of query
        :type end: str, optional
        :param is_live: Ending time of query
        :type is_live: bool, optional
        :param is_live: Timezone offset in minutes
        :type is_live: int, optional
        :param argument: Arguments specified in query
        :type argument: dict(string->string), optional
        :param raw_data: Additional arguments to add to POST body under other keys
        :type raw_data: dict(string->string), optional
        
        :return:  An instance that grants access to the created queryjob and associated results
        :rtype: QueryJob
        """

        endpoint = "dataspaces/{}/queryjobs".format(self.repository)

        headers = self._default_user_headers
        headers.update(kwargs.pop("headers", {}))

        data = dict((k, v) for k, v in [
            ("queryString", query_string),
            ("start", start),
            ("end", end),
            ("isLive", is_live),
            ("timeZoneOffsetMinutes", timezone_offset_minutes),
            ("arguments", arguments),
        ] if v is not None)

        if raw_data is not None:
            data.update(raw_data)

        query_id = self.webcaller.call_rest("post",
                                            endpoint,
                                            data=json.dumps(data),
                                            headers=headers,
                                            **kwargs).json()['id']

        if is_live:
            return LiveQueryJob(query_id, self.base_url, self.repository,
                                self.user_token)
        else:
            return StaticQueryJob(query_id, self.base_url, self.repository,
                                  self.user_token)

    def _ingest_json_data(self, json_elements=None, **kwargs):
        """
        Ingest structured json data to repository.
        Structure of ingested data is discussed in: https://docs.humio.com/api/ingest/#structured-data

        :param messages: A list of event strings.
        :type messages: list(string), optional
        :param parser:  Name of parser to use on messages.
        :type parser: string, optional
        :param fields:  Fields that should be added to events after parsing.
        :type fields: dict(string->string), optional
        :param tags:  Tags to associate with the messages.
        :type tags: dict(string->string), optional

        :return: Response to web request as json string
        :rtype: str
        """

        if json_elements is None:
            json_elements = []

        headers = self._default_user_headers
        headers.update(kwargs.pop("headers", {}))

        endpoint = "dataspaces/{}/ingest".format(self.repository)

        return self.webcaller.call_rest("post",
                                        endpoint,
                                        data=json.dumps(json_elements),
                                        headers=headers,
                                        **kwargs)

    # Wrap method to be pythonic
    ingest_json_data = WebCaller.response_as_json(_ingest_json_data)

    def _ingest_messages(self,
                         messages=None,
                         parser=None,
                         fields=None,
                         tags=None,
                         **kwargs):
        """
        Ingest unstructred messages to repository.

        :param messages: A list of event strings.
        :type messages: list(string), optional
        :param parser:  Name of parser to use on messages.
        :type parser: string, optional
        :param fields:  Fields that should be added to events after parsing.
        :type fields: dict(string->string), optional
        :param tags:  Tags to associate with the messages.
        :type tags: dict(string->string), optional

        :return: Response to web request as json string
        :rtype: str
        """
        if messages is None:
            messages = []

        headers = self._default_user_headers
        headers.update(kwargs.pop("headers", {}))

        endpoint = "dataspaces/{}/ingest-messages".format(self.repository)

        obj = self._create_unstructured_data_object(messages,
                                                    parser=parser,
                                                    fields=fields,
                                                    tags=tags)

        return self.webcaller.call_rest("post",
                                        endpoint,
                                        data=json.dumps([obj]),
                                        headers=headers,
                                        **kwargs)

    # Wrap method to be pythonic
    ingest_messages = WebCaller.response_as_json(_ingest_messages)

    # status
    def _get_status(self, **kwargs):
        """
        Gets status of Humio instance

        :return: Response to web request as json string
        :rtype: str
        """
        endpoint = "status"
        return self.webcaller.call_rest("get", endpoint, **kwargs)

    # Wrap method to be pythonic
    get_status = WebCaller.response_as_json(_get_status)

    # user management
    def _get_users(self):
        """
        Gets users registered to Humio instance

        :return: Response to web request as json string
        :rtype: str
        """
        endpoint = "users"
        return self.webcaller.call_rest("get",
                                        endpoint,
                                        headers=self._default_user_headers)

    # Wrap method to be pythonic
    get_users = WebCaller.response_as_json(_get_users)

    def get_user_by_email(self, email):
        """
        Get a user associated with Humio instance by email

        :param email: Email of queried user
        :type email: str

        :return: Response to web request as json string
        :rtype: str
        """
        user_list = self.get_users()
        for user in user_list:
            if email == user["email"]:
                return user
        return None

    def _create_user(self, email, isRoot=False):
        """
        Create user on Humio instance. Method is idempotent

        :param email: Email of user to create
        :type email: str
        :param isRoot: Indicates whether user should be root
        :type isRoot: bool, optional

        :return: Response to web request as json string
        :rtype: str
        """

        endpoint = "users"

        data = {"email": email, "isRoot": isRoot}

        return self.webcaller.call_rest("post",
                                        endpoint,
                                        data=json.dumps(data),
                                        headers=self._default_user_headers)

    # Wrap method to be pythonic
    create_user = WebCaller.response_as_json(_create_user)

    def _delete_user_by_id(self, user_id):
        """
        Delete user from Humio instance.

        :param user_id: Id of user to delete.
        :type user_id: string

        :return: Response to web request as json string
        :rtype: str
        """

        link = "users/{}".format(user_id)

        return self.webcaller.call_rest("delete",
                                        link,
                                        headers=self._default_user_headers)

    # Wrap method to be pythonic
    delete_user_by_id = WebCaller.response_as_json(_delete_user_by_id)

    def delete_user_by_email(self, email):
        """
        Delete user by email.

        :param email: Email of user to delete.
        :type email: string

        :return: Response to web request as json string
        :rtype: str
        """
        for user in self.get_users():
            if email == user["email"]:
                return self.delete_user_by_id(user["userID"])
        return None

    # organizations
    def _list_organizations(self):
        """
        List organizations.

        :return: Response to web request as json string
        :rtype: str
        """

        headers = self._default_user_headers
        request = {
            "query": "query {organizations{id, name, description}}",
            "variables": None,
        }

        return self.webcaller.call_graphql(headers=headers,
                                           data=json.dumps(request))

    # Wrap method to be pythonic
    def list_organizations(self):
        resp = self._list_organizations()
        return resp.json()["data"]["organizations"]

    def _create_organization(self, name, description):
        """
        Create new organiztion.

        :param name: Name of organization.
        :type name: string
        :param description: Description of organization.
        :type description: string

        :return: Response to web request as json string
        :rtype: str
        """

        headers = self._default_user_headers
        request = {
            "query":
            "mutation($name: String!, $description: String!){createOrganization(name: $name, description: $description){organization{id}}}",
            "variables": {
                "name": name,
                "description": description
            },
        }
        return self.webcaller.call_graphql(headers=headers,
                                           data=json.dumps(request))

    # Wrap method to be pythonic
    def create_organization(self, name, description):
        resp = self._create_organization(name, description)
        return resp.json()["data"]

    # files API
    def _upload_file(self, filepath):
        """
        Upload file to repository

        :param filepath: Path to file.
        :type filepath: string

        :return: Response to web request as json string
        :rtype: str
        """

        endpoint = "dataspaces/{}/files".format(self.repository)
        headers = {
            "Authorization": "Bearer {}".format(self.user_token)
        }  # Not using default headers as files are sent
        with open(filepath, "rb") as f:
            return self.webcaller.call_rest("post",
                                            endpoint,
                                            files={"file": f},
                                            headers=headers)

    # Wrap method to be pythonic
    upload_file = WebCaller.response_as_json(_upload_file)

    def _list_files(self):
        """
        List uploaded files on repository

        :return: Response to web request as json string
        :rtype: str
        """

        headers = self._default_user_headers
        request = {
            "query":
            "query {{listUploadedFiles(name: {})}}".format(
                json.dumps(self.repository)),
            "variables":
            None,
        }
        return self.webcaller.call_graphql(headers=headers,
                                           data=json.dumps(request))

    def list_files(self):
        resp = self._list_files()
        return resp.json()["data"]["listUploadedFiles"]

    def _get_file(self, file_name):
        """
        Get specific file on repository

        :param file_name: Name of file to get.
        :type file_name: string

        :return: Response to web request as json string
        :rtype: str
        """
        endpoint = "dataspaces/{}/files/{}".format(self.repository, file_name)
        headers = {
            "Authorization": "Bearer {}".format(self.user_token)
        }  # Not using default headers as files are sent
        return self.webcaller.call_rest("get", endpoint, headers=headers)

    def get_file(self, file_name, encoding=None):
        resp = self._get_file(file_name)
        raw_data = resp.content
        if encoding is None:
            return raw_data
        else:
            return raw_data.decode("utf-8")
class HumioIngestClient(BaseHumioClient):
    """
    A Humio client that is used exclusivly for ingesting data
    """
    def __init__(
        self,
        ingest_token,
        base_url="http://localhost:3000",
    ):
        """
        :param ingest_token: Ingest token to access ingest.
        :type ingest_token: string
        :param base_url: Url of Humio instance.
        :type base_url: string
        """
        super().__init__(base_url)
        self.ingest_token = ingest_token
        self.webcaller = WebCaller(self.base_url)

    @property
    def _default_ingest_headers(self):
        """ 
        :return: Default headers used for web requests
        :rtype: dict
        """

        return {
            "Content-Type": "application/json",
            "Authorization": "Bearer {}".format(self.ingest_token),
        }

    @property
    def _state(self):
        """
        :return: State of all field variables
        :rtype: dict
        """

        return json.dumps({
            "base_url": self.base_url,
            "ingest_token": self.ingest_token,
        })

    def _ingest_json_data(self, json_elements=None, **kwargs):
        """
        Ingest structured json data to repository.
        Structure of ingested data is discussed in: https://docs.humio.com/api/ingest/#structured-data

        Parameters:
        json_elements (convertable to json string): Structured data that can be converted to a json string.

        Returns:
        string : response to web request as json string
        """

        if json_elements is None:
            json_elements = []

        headers = self._default_ingest_headers
        headers.update(kwargs.pop("headers", {}))

        endpoint = "ingest/humio-structured"

        return self.webcaller.call_rest("post",
                                        endpoint,
                                        data=json.dumps(json_elements),
                                        headers=headers,
                                        **kwargs)

    # Wrap method to be pythonic
    ingest_json_data = WebCaller.response_as_json(_ingest_json_data)

    def _ingest_messages(self,
                         messages=None,
                         parser=None,
                         fields=None,
                         tags=None,
                         **kwargs):
        """
        Ingest unstructred messages to repository.
        Structure of ingested data is discussed in: https://docs.humio.com/api/ingest/#structured-data

        :param messages: A list of event strings.
        :type messages: list(string), optional
        :param parser:  Name of parser to use on messages.
        :type parser: string, optional
        :param fields:  Fields that should be added to events after parsing.
        :type fields: dict(string->string), optional
        :param tags:  Tags to associate with the messages.
        :type tags: dict(string->string), optional

        :return: Response to web request as json string
        :rtype: str
        """

        if messages is None:
            messages = []

        headers = self._default_ingest_headers
        headers.update(kwargs.pop("headers", {}))

        endpoint = "ingest/humio-unstructured"

        obj = self._create_unstructured_data_object(messages,
                                                    parser=parser,
                                                    fields=fields,
                                                    tags=tags)

        return self.webcaller.call_rest("post",
                                        endpoint,
                                        data=json.dumps([obj]),
                                        headers=headers)

    # Wrap method to be pythonic
    ingest_messages = WebCaller.response_as_json(_ingest_messages)
 def __init__(self, base_url):
     self.base_url = base_url
     self.webcaller = WebCaller(self.base_url)
Ejemplo n.º 6
0
class BaseQueryJob():
    """
    Base QueryJob class, not meant to be instantiated.
    This class and its children manage access to queryjobs created on a Humio instance,
    they are mainly used for extracting results from queryjobs.
    """
    def __init__(self, query_id, base_url, repository, user_token):
        """
        Parameters:
        query_id (string): Id of queryjob
        base_url (string): Url of Humio instance
        repository (string): Repository being queried
        user_token (string): Token used to access resource
        """
        self.query_id = query_id
        self.segment_is_done = False
        self.segment_is_cancelled = False
        self.more_segments_can_be_polled = True
        self.time_at_last_poll = 0
        self.wait_time_until_next_poll = 0
        self.base_url = base_url
        self.repository = repository
        self.user_token = user_token
        self.webcaller = WebCaller(self.base_url)

    @property
    def _default_user_headers(self):
        """ 
        :return: Default headers used for web requests
        :rtype: dict
        """
        return {
            "Content-Type": "application/json",
            "Authorization": "Bearer {}".format(self.user_token),
        }

    def _wait_till_next_poll(self):
        """
        A potentially blocking operation, that waits until the queryjob may be polled again.
        This will always pass on the first poll to the queryjob.
        """
        time_since_last_poll = time.time() - self.time_at_last_poll
        if (time_since_last_poll < self.wait_time_until_next_poll):
            time.sleep(
                (self.wait_time_until_next_poll - time_since_last_poll) /
                1000.0)

    def _fetch_next_segment(self, link, headers, **kwargs):
        """
        Polls the queryjob for the next segment of data. 
        May block, if the queryjob is not ready to be polled again.

        :param link: url to access queryjob.
        :type link: str
        :param headers: headers used for web request.
        :type headers: list(dict)

        :return: A data object that contains events of the polled segment and metadata about the poll
        :rtype: PollResult
        """
        self._wait_till_next_poll()

        try:
            response = self.webcaller.call_rest("get",
                                                link,
                                                headers=headers,
                                                **kwargs).json()
        except HumioHTTPException as e:
            # In the case that the queryjob has expired, a custom exception is thrown.
            # The calling code must itself decide how to respond to the error.
            # It has been considered whether this instance should simply restart the queryjob automatically,
            # but that would require the calling code to handle cases where
            # a queryjob restart returns previously received query results.
            if e.status_code == 404:
                raise HumioQueryJobExpiredException(e.message)
            else:
                raise e

        self.wait_time_until_next_poll = response["metaData"]["pollAfter"]
        self.segment_is_done = response["done"]
        self.segment_is_cancelled = response["cancelled"]
        self.time_at_last_poll = time.time()

        return PollResult(response["events"], response["metaData"])

    def _is_streaming_query(self, metadata):
        """
        Checks whether the query is a streaming query and not an aggregate

        :param metaData: query response metadata.
        :type metadata: dict

        :return: Answer to whether query is of type streaming
        :rtype: Bool
        """
        return not metadata["isAggregate"]

    def poll(self, **kwargs):
        """
        Polls the queryjob for the next segment of data, and handles edge cases for data polled

        :return: A data object that contains events of the polled segment and metadata about the poll
        :rtype: PollResult
        """
        link = "dataspaces/{}/queryjobs/{}".format(self.repository,
                                                   self.query_id)

        headers = self._default_user_headers
        headers.update(kwargs.pop("headers", {}))

        poll_result = self._fetch_next_segment(link, headers, **kwargs)
        while not self.segment_is_done:  # In case the segment hasn't been completed, we poll until is is
            poll_result = self._fetch_next_segment(link, headers, **kwargs)

        if self._is_streaming_query(poll_result.metadata):
            self.more_segments_can_be_polled = poll_result.metadata[
                "extraData"]["hasMoreEvents"] == 'true'
        else:  # is aggregate query
            self.more_segments_can_be_polled = False

        return poll_result