예제 #1
0
    def __init__(self, data=None, stream=None):
        if data:
            self.stream = BytesIO(data)
        elif stream:
            self.stream = stream
        else:
            raise InvalidParameterError(
                'Either bytes or a stream must be provided')

        self.reader = BufferedReader(self.stream)
        self._last = None  # Should come in handy to spot -404 errors
예제 #2
0
    def test_happy_path(self):
        url = 'https://example.com/path/file'
        dest_dir = tempfile.mkdtemp()
        content = b'abc'
        reader = BufferedReader(BytesIO(content), buffer_size=1)
        responses.add(responses.GET, url, body=reader)
        result = download_url(url, dest_dir)

        assert os.path.basename(result) == 'file'
        with open(result, 'rb') as f:
            assert f.read() == content
예제 #3
0
    def test_io_bufferedreader(self):
        fp = BytesIO(b'foo')
        resp = HTTPResponse(fp, preload_content=False)
        br = BufferedReader(resp)

        assert br.read() == b'foo'

        br.close()
        assert resp.closed

        b = b'!tenbytes!'
        fp = BytesIO(b)
        resp = HTTPResponse(fp, preload_content=False)
        br = BufferedReader(resp, 5)

        # This is necessary to make sure the "no bytes left" part of `readinto`
        # gets tested.
        assert len(br.read(5)) == 5
        assert len(br.read(5)) == 5
        assert len(br.read(5)) == 0
예제 #4
0
def load(root_type, reader, options=None):
    options = options.copy() if options else Options()
    context = Context()
    context.source = source(reader)
    with context.transaction, ProgressIO('load', reader) as reader:
        with BufferedReader(reader, 0x1000) as reader:
            g = read(root_type, reader, context, None, options)
            assert reader.read(
            ) == b''  # there should be no unparsed data left
    assert not context.transaction.active
    return g
def s3_handler(event):
    s3 = boto3.client("s3")

    # Get the object from the event and show its content type
    bucket = event["Records"][0]["s3"]["bucket"]["name"]
    key = urllib.unquote_plus(event["Records"][0]["s3"]["object"]["key"]).decode("utf8")

    metadata[DD_SOURCE] = parse_event_source(event, key)
    ##default service to source value
    metadata[DD_SERVICE] = metadata[DD_SOURCE]

    # Extract the S3 object
    response = s3.get_object(Bucket=bucket, Key=key)
    body = response["Body"]
    data = body.read()

    # If the name has a .gz extension, then decompress the data
    if key[-3:] == ".gz":
        with gzip.GzipFile(fileobj=BytesIO(data)) as decompress_stream:
            # Reading line by line avoid a bug where gzip would take a very long time (>5min) for
            # file around 60MB gzipped
            data = "".join(BufferedReader(decompress_stream))

    if is_cloudtrail(str(key)):
        cloud_trail = json.loads(data)
        for event in cloud_trail["Records"]:
            # Create structured object and send it
            structured_line = merge_dicts(
                event, {"aws": {"s3": {"bucket": bucket, "key": key}}}
            )
            yield structured_line
    elif metadata[DD_SOURCE] == "auth0":
        auth0_data = json.loads(data)
        if isinstance(auth0_data, list):
            for event in auth0_data:
                # Create structured object and send it
                structured_line = merge_dicts(
                    event, {"aws": {"s3": {"bucket": bucket, "key": key}}}
                )
                yield structured_line
        else:
            structured_line = merge_dicts(
                auth0_data, {"aws": {"s3": {"bucket": bucket, "key": key}}}
            )
            yield structured_logs
    else:
        # Send lines to Datadog
        for line in data.splitlines():
            # Create structured object and send it
            structured_line = {
                "aws": {"s3": {"bucket": bucket, "key": key}},
                "message": line,
            }
            yield structured_line
예제 #6
0
    def __init__(self, fobj, mode='rb', bufsize=-1, close=True):
        """
        :param fobj: Either an integer fileno, or an object supporting the
            usual :meth:`socket.fileno` method. The file will be
            put in non-blocking mode.
        """
        if isinstance(fobj, int):
            fileno = fobj
            fobj = None
        else:
            fileno = fobj.fileno()
        if not isinstance(fileno, int):
            raise TypeError('fileno must be int: %r' % fileno)

        orig_mode = mode
        mode = (mode or 'rb').replace('b', '')
        if 'U' in mode:
            self._translate = True
            mode = mode.replace('U', '')
        else:
            self._translate = False
        if len(mode) != 1:
            # Python 3 builtin `open` raises a ValueError for invalid modes;
            # Python 2 ignores in. In the past, we raised an AssertionError, if __debug__ was
            # enabled (which it usually was). Match Python 3 because it makes more sense
            # and because __debug__ may not be enabled
            raise ValueError('mode can only be [rb, rU, wb], not %r' %
                             (orig_mode, ))

        self._fobj = fobj
        self._closed = False
        self._close = close

        self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close)

        if bufsize < 0:
            bufsize = self.default_bufsize
        if mode == 'r':
            if bufsize == 0:
                bufsize = 1
            elif bufsize == 1:
                bufsize = self.default_bufsize
            self.io = BufferedReader(self.fileio, bufsize)
        elif mode == 'w':
            if bufsize == 0:
                bufsize = 1
            elif bufsize == 1:
                bufsize = self.default_bufsize
            self.io = BufferedWriter(self.fileio, bufsize)
        else:
            # QQQ: not used
            self.io = BufferedRandom(self.fileio, bufsize)
        if self._translate:
            self.io = TextIOWrapper(self.io)
예제 #7
0
 def __init__(self, file_handler, read_chunk_size, *args, **kwargs):
     """
     :param file_handler: the file handler to read from
     :param read_file_size: file size for correctly setting the s3 etag chunk size
     """
     self._hashers = dict(crc32c=CRC32C(),
                          sha1=hashlib.sha1(),
                          sha256=hashlib.sha256(),
                          s3_etag=S3Etag(read_chunk_size))
     self._reader = BufferedReader(file_handler, *args, **kwargs)
     self.raw = self._reader.raw
예제 #8
0
파일: io.py 프로젝트: vandanabn/oio-sds
    def __init__(self,
                 source,
                 sysmeta,
                 chunk_preparer,
                 storage_method,
                 headers=None,
                 connection_timeout=None,
                 write_timeout=None,
                 read_timeout=None,
                 deadline=None,
                 chunk_checksum_algo='md5',
                 **_kwargs):
        """
        :param connection_timeout: timeout to establish the connection
        :param write_timeout: timeout to send a buffer of data
        :param read_timeout: timeout to read a buffer of data from source
        :param chunk_checksum_algo: algorithm to use to compute chunk
            checksums locally. Can be `None` to disable local checksum
            computation and let the rawx compute it (will be md5).
        """
        if isinstance(source, IOBase):
            self.source = BufferedReader(source)
        else:
            self.source = BufferedReader(IOBaseWrapper(source))
        if isinstance(chunk_preparer, dict):

            def _sort_and_yield():
                for pos in sorted(chunk_preparer.keys()):
                    yield chunk_preparer[pos]

            self.chunk_prep = _sort_and_yield
        else:
            self.chunk_prep = chunk_preparer
        self.sysmeta = sysmeta
        self.storage_method = storage_method
        self.headers = headers or dict()
        self.connection_timeout = connection_timeout or CONNECTION_TIMEOUT
        self.deadline = deadline
        self._read_timeout = read_timeout or CLIENT_TIMEOUT
        self._write_timeout = write_timeout or CHUNK_TIMEOUT
        self.chunk_checksum_algo = chunk_checksum_algo
예제 #9
0
파일: controller.py 프로젝트: 0x5d/redpanda
def decode_topic_command(record):
    rdr = Reader(BufferedReader(BytesIO(record.value)))
    k_rdr = Reader(BytesIO(record.key))
    cmd = {}
    cmd['type'] = rdr.read_int8()
    if cmd['type'] == 0:
        cmd['type_string'] = 'create_topic'
        version = Reader(BytesIO(rdr.peek(4))).read_int32()
        if version < 0:
            assert version == -1
            rdr.skip(4)
        else:
            version = 0
        cmd['namespace'] = rdr.read_string()
        cmd['topic'] = rdr.read_string()
        cmd['partitions'] = rdr.read_int32()
        cmd['replication_factor'] = rdr.read_int16()
        cmd['compression'] = rdr.read_optional(lambda r: r.read_int8())
        cmd['cleanup_policy_bitflags'] = rdr.read_optional(
            lambda r: decode_cleanup_policy(r.read_int8()))
        cmd['compaction_strategy'] = rdr.read_optional(lambda r: r.read_int8())
        cmd['timestamp_type'] = rdr.read_optional(lambda r: r.read_int8())
        cmd['segment_size'] = rdr.read_optional(lambda r: r.read_int64())
        cmd['retention_bytes'] = rdr.read_tristate(lambda r: r.read_int64())
        cmd['retention_duration'] = rdr.read_tristate(lambda r: r.read_int64())
        if version == -1:
            cmd["recovery"] = rdr.read_optional(lambda r: r.read_bool())
            cmd["shadow_indexing"] = rdr.read_optional(lambda r: r.read_int8())
        cmd['assignments'] = rdr.read_vector(read_partition_assignment)
    elif cmd['type'] == 1:
        cmd['type_string'] = 'delete_topic'
        cmd['namespace'] = rdr.read_string()
        cmd['topic'] = rdr.read_string()
    elif cmd['type'] == 2:
        cmd['type_string'] = 'update_partitions'
        cmd['namespace'] = k_rdr.read_string()
        cmd['topic'] = k_rdr.read_string()
        cmd['partition'] = k_rdr.read_int32()
        cmd['replicas'] = rdr.read_vector(lambda r: read_broker_shard(r))

    elif cmd['type'] == 3:
        cmd['type_string'] = 'finish_partitions_update'
        cmd['namespace'] = k_rdr.read_string()
        cmd['topic'] = k_rdr.read_string()
        cmd['partition'] = k_rdr.read_int32()
        cmd['replicas'] = rdr.read_vector(lambda r: read_broker_shard(r))
    elif cmd['type'] == 4:
        cmd['type_string'] = 'update_topic_properties'
        cmd['namespace'] = k_rdr.read_string()
        cmd['topic'] = k_rdr.read_string()
        cmd['update'] = read_incremental_properties_update(rdr)

    return cmd
예제 #10
0
def read_fastq(filename):
    """
    return a stream of FASTQ entries, handling gzipped and empty files
    borrowed from https://github.com/vals/umis
    """
    if not filename:
        return itertools.cycle((None, ))
    if filename.endswith('gz'):
        filename_fh = BufferedReader(gzip.open(filename, mode='rt'))
    else:
        filename_fh = open(filename)
    return stream_fastq(filename_fh)
def awslogs_handler(event, context):
    # Get logs
    with gzip.GzipFile(fileobj=BytesIO(
            base64.b64decode(event["awslogs"]["data"]))) as decompress_stream:
        # Reading line by line avoid a bug where gzip would take a very long time (>5min) for
        # file around 60MB gzipped
        data = "".join(BufferedReader(decompress_stream))
    logs = json.loads(str(data))
    # Set the source on the logs
    source = logs.get("logGroup", "cloudwatch")
    metadata[DD_SOURCE] = parse_event_source(event, source)
    ##default service to source value
    metadata[DD_SERVICE] = metadata[DD_SOURCE]

    # Send lines to Datadog
    for log in logs["logEvents"]:
        # Create structured object and send it
        structured_line = merge_dicts(
            log,
            {
                "aws": {
                    "awslogs": {
                        "logGroup": logs["logGroup"],
                        "logStream": logs["logStream"],
                        "owner": logs["owner"],
                    }
                }
            },
        )
        ## For Lambda logs, we want to extract the function name
        ## and we reconstruct the the arn of the monitored lambda
        # 1. we split the log group to get the function name
        if metadata[DD_SOURCE] == "lambda":
            loggroupsplit = logs["logGroup"].split("/lambda/")
            if len(loggroupsplit) > 0:
                functioname = loggroupsplit[1].lower()
                # 2. We split the arn of the forwarder to extract the prefix
                arnsplit = context.invoked_function_arn.split("function:")
                if len(arnsplit) > 0:
                    arn_prefix = arnsplit[0]
                    # 3. We replace the function name in the arn
                    arn = arn_prefix + "function:" + functioname
                    # 4. We add the arn as a log attribute
                    structured_line = merge_dicts(log,
                                                  {"lambda": {
                                                      "arn": arn
                                                  }})
                    # 5. We add the function name as tag
                    metadata[DD_CUSTOM_TAGS] = (metadata[DD_CUSTOM_TAGS] +
                                                ",functionname:" + functioname)
                    #6 we set the arn as the hostname
                    metadata[DD_HOST] = arn
        yield structured_line
예제 #12
0
    async def upload(self, request):
        file_name = request.query.get("filename", "")
        content_type = request.headers.get("Content-Type",
                                           "application/octet-stream")
        client = next(iter(self.pan_clients.values()))

        body = await request.read()
        try:
            response, maybe_keys = await client.upload(
                data_provider=BufferedReader(BytesIO(body)),
                content_type=content_type,
                filename=file_name,
                encrypt=True,
                filesize=len(body),
            )

            if not isinstance(response, UploadResponse):
                return web.Response(
                    status=response.transport_response.status,
                    content_type=response.transport_response.content_type,
                    headers=CORS_HEADERS,
                    body=await response.transport_response.read(),
                )

            self.store.save_upload(self.name, response.content_uri, file_name,
                                   content_type)

            mxc = urlparse(response.content_uri)
            mxc_server = mxc.netloc.strip("/")
            mxc_path = mxc.path.strip("/")

            logger.info(
                f"Adding media info for {mxc_server}/{mxc_path} to the store")
            media_info = MediaInfo(
                mxc_server,
                mxc_path,
                maybe_keys["key"],
                maybe_keys["iv"],
                maybe_keys["hashes"],
            )
            self.store.save_media(self.name, media_info)

            return web.Response(
                status=response.transport_response.status,
                content_type=response.transport_response.content_type,
                headers=CORS_HEADERS,
                body=await response.transport_response.read(),
            )

        except ClientConnectionError as e:
            return web.Response(status=500, text=str(e))
        except SendRetryError as e:
            return web.Response(status=503, text=str(e))
def s3_handler(event, context, metadata):
    s3 = boto3.client("s3")

    # Get the object from the event and show its content type
    bucket = event["Records"][0]["s3"]["bucket"]["name"]
    key = urllib.parse.unquote_plus(event["Records"][0]["s3"]["object"]["key"])

    source = parse_event_source(event, key)
    metadata[DD_SOURCE] = source
    ##default service to source value
    metadata[DD_SERVICE] = source
    ##Get the ARN of the service and set it as the hostname
    hostname = parse_service_arn(source, key, bucket, context)
    if hostname:
        metadata[DD_HOST] = hostname

    # Extract the S3 object
    response = s3.get_object(Bucket=bucket, Key=key)
    body = response["Body"]
    data = body.read()

    # Decompress data that has a .gz extension or magic header http://www.onicos.com/staff/iz/formats/gzip.html
    if key[-3:] == ".gz" or data[:2] == b"\x1f\x8b":
        with gzip.GzipFile(fileobj=BytesIO(data)) as decompress_stream:
            # Reading line by line avoid a bug where gzip would take a very long time (>5min) for
            # file around 60MB gzipped
            data = b"".join(BufferedReader(decompress_stream))

    if is_cloudtrail(str(key)):
        cloud_trail = json.loads(data)
        for event in cloud_trail["Records"]:
            # Create structured object and send it
            structured_line = merge_dicts(
                event, {"aws": {"s3": {"bucket": bucket, "key": key}}}
            )
            yield structured_line
    else:
        # Check if using multiline log regex pattern
        # and determine whether line or pattern separated logs
        data = data.decode("utf-8")
        if DD_MULTILINE_LOG_REGEX_PATTERN and multiline_regex_start_pattern.match(data):
            split_data = multiline_regex.split(data)
        else:
            split_data = data.splitlines()

        # Send lines to Datadog
        for line in split_data:
            # Create structured object and send it
            structured_line = {
                "aws": {"s3": {"bucket": bucket, "key": key}},
                "message": line,
            }
            yield structured_line
예제 #14
0
 def open(self, bucket, name, mode='rb'):
     full_name = '%s/%s' % (self.bucket(bucket), name)
     if 'w' in mode:
         logger.info("Opening for writing: %s", full_name)
         # Manually commit if __exit__ without error
         fp = self.fs.open(full_name, mode, autocommit=False)
         return _commit_discard_context(fp, full_name)
     else:
         logger.info("Opening for reading: %s", full_name)
         fp = BufferedReader(self.fs.open(full_name, mode), 10240000)
         logger.info("Opened for reading: %s", full_name)
         return fp
예제 #15
0
def chain_streams(streams, buffer_size=io.DEFAULT_BUFFER_SIZE):
    """
    Chain an iterable of streams together into a single buffered stream.
    Usage:
        def generate_open_file_streams():
            for file in filenames:
                yield open(file, 'rb')
        f = chain_streams(generate_open_file_streams())
        f.read()
    """
    class ChainStream(RawIOBase):
        def __init__(self):
            self.leftover = b''
            self.stream_iter = iter(streams)
            try:
                self.stream = next(self.stream_iter)
            except StopIteration:
                self.stream = None

        def readable(self):
            return True

        def _read_next_chunk(self, max_length):
            # Return 0 or more bytes from the current stream, first returning all
            # leftover bytes. If the stream is closed returns b''
            if self.leftover:
                return self.leftover
            elif self.stream is not None:
                return self.stream.read(max_length)
            else:
                return b''

        def readinto(self, b):
            buffer_length = len(b)
            chunk = self._read_next_chunk(buffer_length)
            while len(chunk) == 0:
                # move to next stream
                if self.stream is not None:
                    self.stream.close()
                try:
                    self.stream = next(self.stream_iter)
                    chunk = self._read_next_chunk(buffer_length)
                except StopIteration:
                    # No more streams to chain together
                    self.stream = None
                    return 0  # indicate EOF
            output, self.leftover = chunk[:buffer_length], chunk[
                buffer_length:]
            b[:len(output)] = output
            return len(output)

    return BufferedReader(ChainStream(), buffer_size=buffer_size)
예제 #16
0
def awslogs_handler(event, context, metadata):
    # Get logs
    with gzip.GzipFile(
        fileobj=BytesIO(base64.b64decode(event["awslogs"]["data"]))
    ) as decompress_stream:
        # Reading line by line avoid a bug where gzip would take a very long
        # time (>5min) for file around 60MB gzipped
        data = "".join(BufferedReader(decompress_stream))
    logs = json.loads(str(data))

    # Set the source on the logs
    source = logs.get("logGroup", "cloudwatch")
    metadata[DD_SOURCE] = parse_event_source(event, source)

    # Default service to source value
    metadata[DD_SERVICE] = metadata[DD_SOURCE]

    # Build aws attributes
    aws_attributes = {
        "aws": {
            "awslogs": {
                "logGroup": logs["logGroup"],
                "logStream": logs["logStream"],
                "owner": logs["owner"],
            }
        }
    }

    # For Lambda logs we want to extract the function name,
    # then rebuild the arn of the monitored lambda using that name.
    # Start by splitting the log group to get the function name
    if metadata[DD_SOURCE] == "lambda":
        log_group_parts = logs["logGroup"].split("/lambda/")
        if len(log_group_parts) > 0:
            function_name = log_group_parts[1].lower()
            # Split the arn of the forwarder to extract the prefix
            arn_parts = context.invoked_function_arn.split("function:")
            if len(arn_parts) > 0:
                arn_prefix = arn_parts[0]
                # Rebuild the arn by replacing the function name
                arn = arn_prefix + "function:" + function_name
                # Add the arn as a log attribute
                arn_attributes = {"lambda": {"arn": arn}}
                aws_attributes = merge_dicts(aws_attributes, arn_attributes)
                # Add the function name as tag
                metadata[DD_CUSTOM_TAGS] += ",functionname:" + function_name
                # Set the arn as the hostname
                metadata[DD_HOST] = arn

    # Create and send structured logs to Datadog
    for log in logs["logEvents"]:
        yield merge_dicts(log, aws_attributes)
예제 #17
0
def read_fastq(filename):
    """
    return a stream of FASTQ entries, handling gzipped and empty files
    """
    if not filename:
        return itertools.cycle((None, ))
    if filename == "-":
        filename_fh = sys.stdin
    elif filename.endswith('gz'):
        filename_fh = BufferedReader(gzip.open(filename, mode='rt'))
    else:
        filename_fh = open(filename)
    return stream_fastq(filename_fh)
예제 #18
0
def load_unpacked_bits(file_name: str,
                       count: int = -1,
                       offset: int = 0) -> nparray:
    with open(file_name, 'rb') as bit_file:
        bit_file.seek(offset)
        reader = BufferedReader(bit_file)
        bools = read_to_bools(reader)
        bool_array = nparray(list(bools), dtype=bool)

        if count == -1:
            return bool_array

        return bool_array[:count]
예제 #19
0
def create(chat, name):
    try:
        dig = DigestReader(open(name, 'rb', buffering=0))
    except OSError:
        print('zombie {}'.format(name), flush=True)
        return
    with BufferedReader(dig) as buf:
        print('create {}'.format(name), flush=True)
        msg = chat.send_document(buf, filename=name, timeout=120)
        file = File(msg.message_id, dig.hexdigest())
        print('upload {} ({}, {})'.format(name, file.id, file.digest),
              flush=True)
        return file
def run_amock(app=hello_app, data=b"GET / HTTP/1.0\n\n"):
    server = make_server("", 80, app, MockServer, MockHandler)
    inp = BufferedReader(BytesIO(data))
    out = BytesIO()
    olderr = sys.stderr
    err = sys.stderr = StringIO()

    try:
        server.finish_request((inp, out), ("127.0.0.1", 8888))
    finally:
        sys.stderr = olderr

    return out.getvalue(), err.getvalue()
예제 #21
0
    def test_io_bufferedreader(self):
        fp = BytesIO(b"foo")
        resp = HTTPResponse(fp, preload_content=False)
        br = BufferedReader(resp)

        assert br.read() == b"foo"

        br.close()
        assert resp.closed

        b = b"fooandahalf"
        fp = BytesIO(b)
        resp = HTTPResponse(fp, preload_content=False)
        br = BufferedReader(resp, 5)

        br.read(1)  # sets up the buffer, reading 5
        assert len(fp.read()) == (len(b) - 5)

        # This is necessary to make sure the "no bytes left" part of `readinto`
        # gets tested.
        while not br.closed:
            br.read(5)
예제 #22
0
    def __init__(self, stream, boundary=None):
        b = "\r\n--" + boundary + "--"
        stream = _StreamWrapper(stream)

        self.buf_reader = BufferedReader(stream)
        self.nl = b[:2]
        self.nl_dash_boundary = b[:len(b) - 2]
        self.dash_boundary_dash = b[2:]
        self.dash_boundary = b[2:len(b) - 2]
        self.headers = {}
        self.parts_read = 0

        self.current_part = None
예제 #23
0
    def connect(self):
        """
        Open serial connection.
        """

        try:
            self._serial_object = Serial(
                self._port, self._baudrate, timeout=self._timeout
            )
            self._ubxreader = UBXReader(BufferedReader(self._serial_object), False)
            self._connected = True
        except (SerialException, SerialTimeoutException) as err:
            print(f"Error connecting to serial port {err}")
예제 #24
0
    def test_io_bufferedreader(self):
        fp = BytesIO(b'foo')
        resp = HTTPResponse(fp, preload_content=False)
        br = BufferedReader(resp)

        self.assertEqual(br.read(), b'foo')

        br.close()
        self.assertEqual(resp.closed, True)

        b = b'fooandahalf'
        fp = BytesIO(b)
        resp = HTTPResponse(fp, preload_content=False)
        br = BufferedReader(resp, 5)

        br.read(1)  # sets up the buffer, reading 5
        self.assertEqual(len(fp.read()), len(b) - 5)

        # This is necessary to make sure the "no bytes left" part of `readinto`
        # gets tested.
        while not br.closed:
            br.read(5)
예제 #25
0
def s3_handler(event, context, metadata):
    s3 = boto3.client("s3")

    # Get the object from the event and show its content type
    bucket = event["Records"][0]["s3"]["bucket"]["name"]
    key = urllib.unquote_plus(event["Records"][0]["s3"]["object"]["key"]).decode("utf8")

    source = parse_event_source(event, key)
    metadata[DD_SOURCE] = source
    ##default service to source value
    metadata[DD_SERVICE] = source
    ##Get the ARN of the service and set it as the hostname
    hostname = parse_service_arn(source, key, bucket, context)
    if hostname:
        metadata[DD_HOST] = hostname

    # Extract the S3 object
    response = s3.get_object(Bucket=bucket, Key=key)
    body = response["Body"]
    data = body.read()

    # If the name has a .gz extension, then decompress the data
    if key[-3:] == ".gz":
        with gzip.GzipFile(fileobj=BytesIO(data)) as decompress_stream:
            # Reading line by line avoid a bug where gzip would take a very long time (>5min) for
            # file around 60MB gzipped
            data = "".join(BufferedReader(decompress_stream))

    if is_cloudtrail(str(key)):
        cloud_trail = json.loads(data)
        for event in cloud_trail["Records"]:
            # Create structured object and send it
            structured_line = merge_dicts(
                event, {"aws": {"s3": {"bucket": bucket, "key": key}}}
            )
            yield structured_line
    else:
        # Send lines to Datadog
        for line in data.splitlines():
            # Trace Ids need to be numeric...
            x = re.search("\"Root=([a-zA-Z0-9\-]*)\"", line)
            trace_id = x.group(1)
            new_trace_id = abs(int(hashlib.sha1(trace_id.encode('utf-8')).hexdigest(), 16) % (10 ** 14))
            line = re.sub(trace_id, str(new_trace_id), line)

            # Create structured object and send it
            structured_line = {
                "aws": {"s3": {"bucket": bucket, "key": key}},
                "message": line,
            }
            yield structured_line
예제 #26
0
def initialize_stdio_raw(
    global_level: LogLevel,
    log_show_rust_3rdparty: bool,
    show_target: bool,
    log_levels_by_target: dict[str, LogLevel],
    print_stacktrace: bool,
    ignore_warnings: list[str],
    pants_workdir: str,
) -> Iterator[None]:
    literal_filters = []
    regex_filters = []
    for filt in ignore_warnings:
        if filt.startswith("$regex$"):
            regex_filters.append(strip_prefix(filt, "$regex$"))
        else:
            literal_filters.append(filt)

    # Set the pants log destination.
    log_path = str(pants_log_path(PurePath(pants_workdir)))
    safe_mkdir_for(log_path)

    # Initialize thread-local stdio, and replace sys.std* with proxies.
    original_stdin, original_stdout, original_stderr = sys.stdin, sys.stdout, sys.stderr
    try:
        raw_stdin, sys.stdout, sys.stderr = native_engine.stdio_initialize(
            global_level.level,
            log_show_rust_3rdparty,
            show_target,
            {k: v.level
             for k, v in log_levels_by_target.items()},
            tuple(literal_filters),
            tuple(regex_filters),
            log_path,
        )
        sys.stdin = TextIOWrapper(
            BufferedReader(raw_stdin),
            # NB: We set the default encoding explicitly to bypass logic in the TextIOWrapper
            # constructor that would poke the underlying file (which is not valid until a
            # `stdio_destination` is set).
            encoding=locale.getpreferredencoding(False),
        )

        sys.__stdin__, sys.__stdout__, sys.__stderr__ = sys.stdin, sys.stdout, sys.stderr  # type: ignore[assignment]
        # Install a Python logger that will route through the Rust logger.
        with _python_logging_setup(global_level,
                                   log_levels_by_target,
                                   print_stacktrace=print_stacktrace):
            yield
    finally:
        sys.stdin, sys.stdout, sys.stderr = original_stdin, original_stdout, original_stderr
        sys.__stdin__, sys.__stdout__, sys.__stderr__ = sys.stdin, sys.stdout, sys.stderr  # type: ignore[assignment]
예제 #27
0
 def __enter__(self):
     if self.__to_close is not None:
         raise Exception(f"{self!r} is already a context manager")
     stream = IOWrapper(self.__stream)
     reader = BufferedReader(stream.__enter__())
     to_close = [reader]
     if reader.peek(len(GZIP_MAGIC)) == GZIP_MAGIC:
         ret = GzipIOWrapper(reader)
         to_close.append(ret)
         ret = ret.__enter__()
     else:
         ret = reader
     self.__to_close = (stream, ) + tuple(to_close)
     return ret
예제 #28
0
def s3_handler(event):
    s3 = boto3.client('s3')

    # Get the object from the event and show its content type
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = urllib.unquote_plus(
        event['Records'][0]['s3']['object']['key']).decode('utf8')

    metadata[DD_SOURCE] = parse_event_source(event, key)
    ##default service to source value
    metadata[DD_SERVICE] = metadata[DD_SOURCE]

    # Extract the S3 object
    response = s3.get_object(Bucket=bucket, Key=key)
    body = response['Body']
    data = body.read()

    # If the name has a .gz extension, then decompress the data
    if key[-3:] == '.gz':
        with gzip.GzipFile(fileobj=BytesIO(data)) as decompress_stream:
            # Reading line by line avoid a bug where gzip would take a very long time (>5min) for
            # file around 60MB gzipped
            data = ''.join(BufferedReader(decompress_stream))

    if is_cloudtrail(str(key)):
        cloud_trail = json.loads(data)
        for event in cloud_trail['Records']:
            # Create structured object and send it
            structured_line = merge_dicts(
                event, {"aws": {
                    "s3": {
                        "bucket": bucket,
                        "key": key
                    }
                }})
            yield structured_line
    else:
        # Send lines to Datadog
        for line in data.splitlines():
            # Create structured object and send it
            structured_line = {
                "aws": {
                    "s3": {
                        "bucket": bucket,
                        "key": key
                    }
                },
                "message": line
            }
            yield structured_line
예제 #29
0
    def connect(self):
        """
        Open serial connection.
        """

        settings = self.__app.frm_settings.get_settings()

        self._port = settings["port"]
        port_desc = settings["port_desc"]
        baudrate = settings["baudrate"]
        databits = settings["databits"]
        stopbits = settings["stopbits"]
        parity = PARITIES[settings["parity"]]
        xonxoff = settings["xonxoff"]
        rtscts = settings["rtscts"]
        self._datalogging = settings["datalogging"]
        self._recordtrack = settings["recordtrack"]

        try:
            self._serial_object = Serial(
                self._port,
                baudrate,
                bytesize=databits,
                stopbits=stopbits,
                parity=parity,
                xonxoff=xonxoff,
                rtscts=rtscts,
                timeout=SERIAL_TIMEOUT,
            )
            self._serial_buffer = BufferedReader(self._serial_object)
            self.__app.frm_banner.update_conn_status(CONNECTED)
            self.__app.set_connection(
                f"{self._port}:{port_desc} @ {str(baudrate)}", "green")
            self.__app.frm_settings.set_controls(CONNECTED)
            self._connected = True
            self.start_read_thread()

            if self._datalogging:
                self.__app.file_handler.open_logfile_output()

            if self._recordtrack:
                self.__app.file_handler.open_trackfile()

        except (IOError, SerialException, SerialTimeoutException) as err:
            self._connected = False
            self.__app.set_connection(
                f"{self._port}:{port_desc} @ {str(baudrate)}", "red")
            self.__app.set_status(SEROPENERROR.format(err), "red")
            self.__app.frm_banner.update_conn_status(DISCONNECTED)
            self.__app.frm_settings.set_controls(DISCONNECTED)
예제 #30
0
 def __init__(self, fnin, fnout, expect_protocol: InteractionProtocol, nickname: str):
     self.nickname = nickname
     self._cc = None
     os.mkfifo(fnin)
     self.fpin = open(fnin, 'wb', buffering=0)
     wait_for_creation(fnout)
     self.fnout = fnout
     f = open(fnout, 'rb', buffering=0)
     # noinspection PyTypeChecker
     self.fpout = BufferedReader(f, buffer_size=1)
     self.nreceived = 0
     self.expect_protocol = expect_protocol
     self.node_protocol = None
     self.data_protocol = None