Example #1
0
class KinesisStreamer(BufferedIOBase):
    """
    The stream interface used by the handler which binds to Kinesis and utilizes the object class
    """

    _stream_buffer_queue = queue.Queue()

    def __init__(self, key_id, secret, stream_name, region, partition, chunk_size=DEFAULT_CHUNK_SIZE, encoder='utf-8',
                 workers=2):

        self.kinesis = client('kinesis', region_name=region, aws_access_key_id=key_id,
                              aws_secret_access_key=secret)
        self.chunk_size = chunk_size
        self.stream_name = stream_name
        self.region = region
        self.tasks = queue.Queue()
        self.partition = partition
        self.encoder = encoder

        try:
            stream_desc = self.kinesis.describe_stream(StreamName=self.stream_name)
            if stream_desc['StreamDescription']['StreamStatus'] != 'ACTIVE':
                raise AssertionError
        except Exception:
            raise ValueError('Kinesis stream %s does not exist or inactive, or insufficient permissions' % stream_name)

        self.workers = [threading.Thread(target=task_worker, args=(self.tasks,)).start() for _ in
                        range(int(max(workers, MIN_WORKERS_NUM) / 2) + 1)]
        self._stream = BytesIO()

        self._is_open = True

        BufferedIOBase.__init__(self)

    def add_task(self, task):
        self.tasks.put(task)

    def join_tasks(self):
        self.tasks.join()

    def _rotate_chunk(self, async=True):

        assert self._stream, "Stream object not found"

        buffer = self._stream
        self._stream = BytesIO()
        if buffer.tell() > MAX_CHUNK_SIZE:
            # We are limited to a size of 1 MB per stream upload command so we need to enforce it
            chunk_delta = MAX_CHUNK_SIZE - buffer.tell()
            buffer.seek(chunk_delta)
            self._stream.write(buffer.read())
        buffer.seek(0)

        if async:
            self.add_task(Task(self._upload_part, buffer))
        else:
            self._upload_part(buffer)
Example #2
0
    def _rotate_file(self):

        if self._current_object.buffer.tell() > 0:
            self._rotate_chunk()

        temp_object = self._current_object
        self._add_task(Task(self._close_stream, stream_object=temp_object))
        self.start_time = int(datetime.utcnow().strftime('%s'))
        new_filename = self.get_filename()
        self._current_object = self._get_stream_object(new_filename)
Example #3
0
    def _rotate_chunk(self, run_async=True):

        assert self._current_object, "Stream object not found"

        part_num = self._current_object.chunk_count + 1
        part = self._current_object.uploader.Part(part_num)
        buffer = self._current_object.buffer
        self._current_object.buffer = BytesIO()
        buffer.seek(0)
        if run_async:
            self._current_object.add_task(Task(self._upload_part, self._current_object, part, part_num, buffer))
        else:
            self._upload_part(self._current_object, part, part_num, buffer)

        self._current_object.chunk_count += 1
Example #4
0
    def _rotate_chunk(self, run_async=True):

        assert self._stream, "Stream object not found"

        buffer = self._stream
        self._stream = BytesIO()
        if buffer.tell() > MAX_CHUNK_SIZE:
            # We are limited to a size of 1 MB per stream upload command so we need to enforce it
            chunk_delta = MAX_CHUNK_SIZE - buffer.tell()
            buffer.seek(chunk_delta)
            self._stream.write(buffer.read())
        buffer.seek(0)

        if run_async:
            self.add_task(Task(self._upload_part, buffer))
        else:
            self._upload_part(buffer)
    def _rotate_chunk(self, run_async=True):
        """
        Send the accumulated records to the stream and clear the buffer
        :param run_async: Indicates whether the rotation should by asynchronous on a different thread
        :type run_async: bool
        :return:
        """

        assert self._stream, "Stream object not found"

        buffer = self._stream
        self._stream = BytesIO()
        if buffer.tell() > MAX_CHUNK_SIZE:
            # We are limited to a size of 1 MB per stream upload command so we need to enforce it
            chunk_delta = MAX_CHUNK_SIZE - buffer.tell()
            buffer.seek(chunk_delta)
            self._stream.write(buffer.read())
        buffer.seek(0)

        if run_async:
            self.add_task(Task(self._upload_part, buffer))
        else:
            self._upload_part(buffer)
Example #6
0
class S3Streamer(BufferedIOBase):
    """
    The stream interface used by the handler which binds to S3 and utilizes the object class
    """

    _stream_buffer_queue = queue.Queue()
    _rotation_queue = queue.Queue()

    def __init__(self,
                 bucket,
                 key_id,
                 secret,
                 key,
                 chunk_size=DEFAULT_CHUNK_SIZE,
                 max_file_log_time=DEFAULT_ROTATION_TIME_SECS,
                 max_file_size_bytes=MAX_FILE_SIZE_BYTES,
                 encoder='utf-8',
                 workers=2,
                 compress=False):

        self.session = Session(key_id, secret)
        self.s3 = self.session.resource('s3')
        self.start_time = int(datetime.utcnow().strftime('%s'))
        self.key = key.strip('/')
        self.chunk_size = chunk_size
        self.max_file_log_time = max_file_log_time
        self.max_file_size_bytes = max_file_size_bytes
        self.current_file_name = "{}_{}".format(
            key, int(datetime.utcnow().strftime('%s')))
        if compress:
            self.current_file_name = "{}.gz".format(self.current_file_name)
        self.encoder = encoder

        try:
            self.s3.meta.client.head_bucket(Bucket=bucket)
        except Exception:
            raise ValueError(
                'Bucket %s does not exist, or missing permissions' % bucket)

        self._bucket = self.s3.Bucket(bucket)
        self._current_object = self._get_stream_object(self.current_file_name)
        self.workers = [
            threading.Thread(target=task_worker,
                             args=(self._rotation_queue, )).start()
            for _ in range(int(max(workers, MIN_WORKERS_NUM) / 2) + 1)
        ]
        self.stream_bg_workers = [
            threading.Thread(target=task_worker,
                             args=(self._stream_buffer_queue, )).start()
            for _ in range(max(int(max(workers, MIN_WORKERS_NUM) / 2), 1))
        ]

        self._is_open = True
        self.compress = compress

        BufferedIOBase.__init__(self)

    def get_filename(self):
        filename = "{}_{}".format(self.key, self.start_time)
        if not self.compress:
            return filename
        return "{}.gz".format(filename)

    def add_task(self, task):
        self._rotation_queue.put(task)

    def join_tasks(self):
        self._rotation_queue.join()

    def _get_stream_object(self, filename):
        try:
            return StreamObject(self.s3, self._bucket.name, filename,
                                self._stream_buffer_queue)

        except Exception:
            raise RuntimeError('Failed to open new S3 stream object')

    def _rotate_chunk(self, async=True):

        assert self._current_object, "Stream object not found"

        part_num = self._current_object.chunk_count + 1
        part = self._current_object.uploader.Part(part_num)
        buffer = self._current_object.buffer
        self._current_object.buffer = BytesIO()
        buffer.seek(0)
        if async:
            self._current_object.add_task(
                Task(self._upload_part, self._current_object, part, part_num,
                     buffer))
        else:
            self._upload_part(self._current_object, part, part_num, buffer)

        self._current_object.chunk_count += 1
Example #7
0
            )

        except Exception:
            raise RuntimeError('Failed to open new S3 stream object')

    def _rotate_chunk(self, async=True):

        assert self._current_object, 'Stream object not found'

        part_num = self._current_object.chunk_count + 1
        part = self._current_object.uploader.Part(part_num)
        buffer = self._current_object.buffer
        self._current_object.buffer = BytesIO()
        buffer.seek(0)
        if async:
            self._current_object.add_task(Task(self._upload_part, self._current_object, part, part_num, buffer))
        else:
            self._upload_part(self._current_object, part, part_num, buffer)

        self._current_object.chunk_count += 1

    @staticmethod
    def _upload_part(s3_object, part, part_num, buffer):
        upload = part.upload(Body=buffer)
        s3_object.parts.append({'ETag': upload['ETag'], 'PartNumber': part_num})

    def _rotate_file(self):

        if self._current_object.buffer.tell() > 0:
            self._rotate_chunk()