Ejemplo n.º 1
0
    def __init__(self,
                 stream_name: str,
                 partition_key: str,
                 *,
                 chunk_size: int = DEFAULT_CHUNK_SIZE,
                 encoder: str = 'utf-8',
                 workers: int = 1,
                 **boto_session_kwargs):
        """
        :param stream_name: Name of the Kinesis stream
        :type stream_name: str
        :param partition_key: Kinesis partition key used to group data by shards
        :type partition_key: str
        :param chunk_size: the size of a a chunk of records for rotation threshold (default 524288)
        :type chunk_size: int
        :param encoder: the encoder to be used for log records (default 'utf-8')
        :type encoder: str
        :param workers: the number of background workers that rotate log records (default 1)
        :type workers: int
        :param boto_session_kwargs: additional keyword arguments for the AWS Kinesis Resource
        :type boto_session_kwargs: boto3 resource keyword arguments
        """

        args_validation = (
            ValidationRule(stream_name, is_non_empty_string,
                           empty_str_err('stream_name')),
            ValidationRule(chunk_size, is_positive_int,
                           bad_integer_err('chunk_size')),
            ValidationRule(encoder, is_non_empty_string,
                           empty_str_err('encoder')),
            ValidationRule(workers, is_positive_int,
                           bad_integer_err('workers')),
        )

        for rule in args_validation:
            assert rule[1](rule[0]), rule[3]

        self.stream = KinesisStream(stream_name,
                                    partition_key,
                                    chunk_size=chunk_size,
                                    encoder=encoder,
                                    workers=workers,
                                    **boto_session_kwargs)

        # Make sure we gracefully clear the buffers and upload the missing parts before exiting
        signal.signal(signal.SIGTERM, self._teardown)
        signal.signal(signal.SIGINT, self._teardown)
        signal.signal(signal.SIGQUIT, self._teardown)
        atexit.register(self.close)

        StreamHandler.__init__(self, self.stream)
    def __init__(self, file_path, bucket, key_id, secret, chunk_size=DEFAULT_CHUNK_SIZE,
                 time_rotation=DEFAULT_ROTATION_TIME_SECS, max_file_size_bytes=MAX_FILE_SIZE_BYTES, encoder='utf-8',
                 max_threads=3, compress=False):
        """

        :param file_path: The path of the S3 object
        :param bucket: The id of the S3 bucket
        :param key_id: Authentication key
        :param secret: Authentication secret
        :param chunk_size: Size of a chunk in the multipart upload in bytes - default 5MB
        :param time_rotation: Interval in seconds to rotate the file by - default 12 hours
        :param max_file_size_bytes: Maximum file size in bytes before rotation - default 100MB
        :param encoder: default utf-8
        :param max_threads: the number of threads that a stream handler would run for file and chunk rotation tasks
        :param compress: Boolean indicating weather to save a compressed gz suffixed file
        """

        args_validation = (
            ValidationRule(file_path, is_non_empty_string, empty_str_err('file_path')),
            ValidationRule(bucket, is_non_empty_string, empty_str_err('bucket')),
            ValidationRule(key_id, is_non_empty_string, empty_str_err('key_id')),
            ValidationRule(secret, is_non_empty_string, empty_str_err('secret')),
            ValidationRule(chunk_size, is_positive_int, bad_integer_err('chunk_size')),
            ValidationRule(time_rotation, is_positive_int, bad_integer_err('time_rotation')),
            ValidationRule(max_file_size_bytes, is_positive_int, bad_integer_err('max_file_size_bytes')),
            ValidationRule(encoder, is_non_empty_string, empty_str_err('encoder')),
            ValidationRule(max_threads, is_positive_int, bad_integer_err('thread_count')),
            ValidationRule(compress, is_boolean, bad_type_error('compress', 'boolean'))
        )

        for rule in args_validation:
            assert rule[1](rule[0]), rule[3]

        self.bucket = bucket
        self.secret = secret
        self.key_id = key_id
        self.stream = S3Streamer(self.bucket, file_path, chunk_size=chunk_size, max_file_log_time=time_rotation,
                                 max_file_size_bytes=max_file_size_bytes, encoder=encoder, workers=max_threads,
                                 compress=compress, key_id=self.key_id, secret=self.secret)

        # Make sure we gracefully clear the buffers and upload the missing parts before exiting
        signal.signal(signal.SIGTERM, self.close)
        signal.signal(signal.SIGINT, self.close)
        signal.signal(signal.SIGQUIT, self.close)
        atexit.register(self.close)

        StreamHandler.__init__(self, self.stream)
Ejemplo n.º 3
0
    def __init__(self, key: str, bucket: str, *, chunk_size: int = DEFAULT_CHUNK_SIZE,
                 time_rotation: int = DEFAULT_ROTATION_TIME_SECS, max_file_size_bytes: int = MAX_FILE_SIZE_BYTES,
                 encoder: str = 'utf-8',
                 max_threads: int = 1, compress: bool = False, **boto_session_kwargs):
        """

        :param key: The path of the S3 object
        :type key: str
        :param bucket: The id of the S3 bucket
        :type bucket: str
        :param chunk_size: size of a chunk in the multipart upload in bytes (default 5MB)
        :type chunk_size: int
        :param time_rotation: Interval in seconds to rotate the file by (default 12 hours)
        :type time_rotation: int
        :param max_file_size_bytes: maximum file size in bytes before rotation (default 100MB)
        :type max_file_size_bytes: int
        :param encoder: default utf-8
        :type encoder: str
        :param max_threads: the number of threads that a stream handler would run for file and chunk rotation tasks,
               only useful if emitting lot's of records
        :type max_threads: int
        :param compress: indicating weather to save a compressed gz suffixed file
        :type compress: bool
        """

        args_validation = (
            ValidationRule(time_rotation, is_positive_int, bad_integer_err('time_rotation')),
            ValidationRule(max_file_size_bytes, is_positive_int, bad_integer_err('max_file_size_bytes')),
            ValidationRule(encoder, is_non_empty_string, empty_str_err('encoder')),
            ValidationRule(max_threads, is_positive_int, bad_integer_err('thread_count')),
        )

        for rule in args_validation:
            assert rule.func(rule.arg), rule.message

        self.bucket = bucket
        self.stream = S3Stream(self.bucket, key, chunk_size=chunk_size, max_file_log_time=time_rotation,
                               max_file_size_bytes=max_file_size_bytes, encoder=encoder, workers=max_threads,
                               compress=compress, **boto_session_kwargs)

        # Make sure we gracefully clear the buffers and upload the missing parts before exiting
        signal.signal(signal.SIGTERM, self._teardown)
        signal.signal(signal.SIGINT, self._teardown)
        signal.signal(signal.SIGQUIT, self._teardown)
        atexit.register(self.close)

        StreamHandler.__init__(self, self.stream)
Ejemplo n.º 4
0
    def __init__(self, key_id, secret, stream_name, region, partition='single', chunk_size=DEFAULT_CHUNK_SIZE,
                 encoder='utf-8', workers=3):
        """

        :param key_id: Authentication key
        :param secret: Authentication secret
        :param stream_name: The name of the kinesis stream
        :param region: The AWS region for the kinesis stream
        :param partition: A partition name in case multiple shards are used
        :param chunk_size: Size of a chunk in the multipart upload in bytes - default 5MB
        :param encoder: default utf-8
        :param workers: the number of threads that a stream handler would run for file and chunk rotation tasks
        """

        args_validation = (
            ValidationRule(key_id, is_non_empty_string, empty_str_err('key_id')),
            ValidationRule(secret, is_non_empty_string, empty_str_err('secret')),
            ValidationRule(stream_name, is_non_empty_string, empty_str_err('stream_name')),
            ValidationRule(region, is_non_empty_string, empty_str_err('region')),
            ValidationRule(partition, is_non_empty_string, empty_str_err('partition')),
            ValidationRule(chunk_size, is_positive_int, bad_integer_err('chunk_size')),
            ValidationRule(encoder, is_non_empty_string, empty_str_err('encoder')),
            ValidationRule(workers, is_positive_int, bad_integer_err('workers')),
        )

        for rule in args_validation:
            assert rule[1](rule[0]), rule[3]

        self.secret = secret
        self.key_id = key_id
        self.stream = KinesisStreamer(self.key_id, self.secret, stream_name, region, partition, chunk_size, encoder,
                                      workers=workers)

        # Make sure we gracefully clear the buffers and upload the missing parts before exiting
        signal.signal(signal.SIGTERM, self.close)
        signal.signal(signal.SIGINT, self.close)
        signal.signal(signal.SIGQUIT, self.close)
        atexit.register(self.close)

        StreamHandler.__init__(self, self.stream)
Ejemplo n.º 5
0
                 max_file_size_bytes=MAX_FILE_SIZE_BYTES, encoder='utf-8',
                 max_threads=3, compress=False):
        """

        :param file_path: The path of the S3 object
        :param bucket: The id of the S3 bucket
        :param chunk_size: Size of a chunk in the multipart upload in bytes - default 5MB
        :param time_rotation: Interval in seconds to rotate the file by - default 12 hours
        :param max_file_size_bytes: Maximum file size in bytes before rotation - default 100MB
        :param encoder: default utf-8
        :param max_threads: the number of threads that a stream handler would run for file and chunk rotation tasks
        :param compress: Boolean indicating weather to save a compressed gz suffixed file
        """

        args_validation = (
            ValidationRule(file_path, is_non_empty_string, empty_str_err('file_path')),
            ValidationRule(bucket, is_non_empty_string, empty_str_err('bucket')),
            ValidationRule(chunk_size, is_positive_int, bad_integer_err('chunk_size')),
            ValidationRule(time_rotation, is_positive_int, bad_integer_err('time_rotation')),
            ValidationRule(max_file_size_bytes, is_positive_int, bad_integer_err('max_file_size_bytes')),
            ValidationRule(encoder, is_non_empty_string, empty_str_err('encoder')),
            ValidationRule(max_threads, is_positive_int, bad_integer_err('thread_count')),
            ValidationRule(compress, is_boolean, bad_type_error('compress', 'boolean'))
        )

        for rule in args_validation:
            assert rule[1](rule[0]), rule[3]

        self.bucket = bucket
        self.stream = S3Streamer(self.bucket, file_path, chunk_size, time_rotation,
                                 ServerSideEncryption=ServerSideEncryption, SSEKMSKeyId=SSEKMSKeyId,