Esempio n. 1
0
def put_records(stream_name, records):
    stream = get_stream(stream_name)
    for dest in stream['Destinations']:
        if 'ESDestinationDescription' in dest:
            es_dest = dest['ESDestinationDescription']
            es_index = es_dest['IndexName']
            es_type = es_dest['TypeName']
            es = connect_elasticsearch()
            for record in records:
                obj_id = uuid.uuid4()
                data = base64.b64decode(record['Data'])
                body = json.loads(data)
                try:
                    es.create(index=es_index, doc_type=es_type, id=obj_id, body=body)
                except Exception as e:
                    LOG.error('Unable to put record to stream: %s %s' % (e, traceback.format_exc()))
                    raise e
        if 'S3DestinationDescription' in dest:
            s3_dest = dest['S3DestinationDescription']
            bucket = bucket_name(s3_dest['BucketARN'])
            prefix = s3_dest['Prefix']
            s3 = get_s3_client()
            for record in records:
                data = base64.b64decode(record['Data'])
                obj_name = str(uuid.uuid4())
                obj_path = '%s%s' % (prefix, obj_name)
                try:
                    s3.Object(bucket, obj_path).put(Body=data)
                except Exception as e:
                    LOG.error('Unable to put record to stream: %s %s' % (e, traceback.format_exc()))
                    raise e
Esempio n. 2
0
def check_infra_elasticsearch(expect_shutdown=False):
    out = None
    try:
        # check Elasticsearch
        es = aws_stack.connect_elasticsearch()
        out = es.indices.get_aliases().keys()
    except Exception, e:
        pass
Esempio n. 3
0
def check_infra_elasticsearch(expect_shutdown=False):
    out = None
    try:
        # check Elasticsearch
        es = aws_stack.connect_elasticsearch()
        out = es.indices.get_aliases().keys()
    except Exception, e:
        pass
Esempio n. 4
0
def check_infra_elasticsearch(expect_shutdown=False, print_error=False):
    out = None
    try:
        # check Elasticsearch
        es = aws_stack.connect_elasticsearch()
        out = es.cat.aliases()
    except Exception, e:
        if print_error:
            print('Elasticsearch health check failed: %s %s' % (e, traceback.format_exc()))
Esempio n. 5
0
def put_records(stream_name, records):
    stream = get_stream(stream_name)
    for dest in stream['Destinations']:
        if 'ESDestinationDescription' in dest:
            es_dest = dest['ESDestinationDescription']
            es_index = es_dest['IndexName']
            es_type = es_dest['TypeName']
            es = connect_elasticsearch()
            for record in records:
                obj_id = uuid.uuid4()

                # DirectPut
                if 'Data' in record:
                    data = base64.b64decode(record['Data'])
                # KinesisAsSource
                elif 'data' in record:
                    data = base64.b64decode(record['data'])

                body = json.loads(data)

                try:
                    es.create(index=es_index,
                              doc_type=es_type,
                              id=obj_id,
                              body=body)
                except Exception as e:
                    LOG.error('Unable to put record to stream: %s %s' %
                              (e, traceback.format_exc()))
                    raise e
        if 'S3DestinationDescription' in dest:
            s3_dest = dest['S3DestinationDescription']
            bucket = bucket_name(s3_dest['BucketARN'])
            prefix = s3_dest.get('Prefix', '')
            s3 = get_s3_client()
            for record in records:

                # DirectPut
                if 'Data' in record:
                    data = base64.b64decode(record['Data'])
                # KinesisAsSource
                elif 'data' in record:
                    data = base64.b64decode(record['data'])

                obj_name = str(uuid.uuid4())
                obj_path = '%s%s%s' % (prefix, '' if prefix.endswith('/') else
                                       '/', obj_name)
                try:
                    s3.Object(bucket, obj_path).put(Body=data)
                except Exception as e:
                    LOG.error('Unable to put record to stream: %s %s' %
                              (e, traceback.format_exc()))
                    raise e
Esempio n. 6
0
def check_infra_elasticsearch(expect_shutdown=False, print_error=False):
    out = None
    try:
        # check Elasticsearch
        es = aws_stack.connect_elasticsearch()
        out = es.cat.aliases()
    except Exception as e:
        if print_error:
            LOGGER.error('Elasticsearch health check failed: %s %s' % (e, traceback.format_exc()))
    if expect_shutdown:
        assert out is None
    else:
        assert isinstance(out, six.string_types)
Esempio n. 7
0
def put_records(stream_name, records):
    stream = get_stream(stream_name)
    if not stream:
        return error_not_found(stream_name)
    for dest in stream['Destinations']:
        if 'ESDestinationDescription' in dest:
            es_dest = dest['ESDestinationDescription']
            es_index = es_dest['IndexName']
            es_type = es_dest.get('TypeName')
            es = connect_elasticsearch(endpoint=es_dest.get('ClusterEndpoint'),
                                       domain=es_dest.get('DomainARN'))
            for record in records:
                obj_id = uuid.uuid4()

                # DirectPut
                if 'Data' in record:
                    data = base64.b64decode(record['Data'])
                # KinesisAsSource
                elif 'data' in record:
                    data = base64.b64decode(record['data'])

                body = json.loads(data)

                try:
                    es.create(index=es_index,
                              doc_type=es_type,
                              id=obj_id,
                              body=body)
                except Exception as e:
                    LOG.error('Unable to put record to stream: %s %s' %
                              (e, traceback.format_exc()))
                    raise e
        if 'S3DestinationDescription' in dest:
            s3_dest = dest['S3DestinationDescription']
            bucket = bucket_name(s3_dest['BucketARN'])
            prefix = s3_dest.get('Prefix', '')

            s3 = connect_to_resource('s3')
            batched_data = b''.join([
                base64.b64decode(r.get('Data') or r['data']) for r in records
            ])

            obj_path = get_s3_object_path(stream_name, prefix)
            try:
                s3.Object(bucket, obj_path).put(Body=batched_data)
            except Exception as e:
                LOG.error('Unable to put record to stream: %s %s' %
                          (e, traceback.format_exc()))
                raise e
    return {'RecordId': str(uuid.uuid4())}
Esempio n. 8
0
def put_records(stream_name: str, records: List[Dict]) -> Dict:
    """Put a list of records to the firehose stream - either directly from a PutRecord API call, or
    received from an underlying Kinesis stream (if 'KinesisStreamAsSource' is configured)"""
    stream = get_stream(stream_name)
    if not stream:
        return error_not_found(stream_name)

    # preprocess records, add any missing attributes
    add_missing_record_attributes(records)

    for dest in stream.get("Destinations", []):

        # apply processing steps to incoming items
        proc_config = {}
        for child in dest.values():
            proc_config = (isinstance(child, dict)
                           and child.get("ProcessingConfiguration")
                           or proc_config)
        if proc_config.get("Enabled") is not False:
            for processor in proc_config.get("Processors", []):
                # TODO: run processors asynchronously, to avoid request timeouts on PutRecord API calls
                records = preprocess_records(processor, records)

        if "ESDestinationDescription" in dest:
            es_dest = dest["ESDestinationDescription"]
            es_index = es_dest["IndexName"]
            es_type = es_dest.get("TypeName")
            es = connect_elasticsearch(endpoint=es_dest.get("ClusterEndpoint"),
                                       domain=es_dest.get("DomainARN"))
            for record in records:
                obj_id = uuid.uuid4()

                data = "{}"
                # DirectPut
                if "Data" in record:
                    data = base64.b64decode(record["Data"])
                # KinesisAsSource
                elif "data" in record:
                    data = base64.b64decode(record["data"])

                body = json.loads(data)

                try:
                    es.create(index=es_index,
                              doc_type=es_type,
                              id=obj_id,
                              body=body)
                except Exception as e:
                    LOG.error("Unable to put record to stream: %s %s" %
                              (e, traceback.format_exc()))
                    raise e
        if "S3DestinationDescription" in dest:
            s3_dest = dest["S3DestinationDescription"]
            bucket = s3_bucket_name(s3_dest["BucketARN"])
            prefix = s3_dest.get("Prefix", "")

            s3 = connect_to_resource("s3")
            batched_data = b"".join([
                base64.b64decode(r.get("Data") or r["data"]) for r in records
            ])

            obj_path = get_s3_object_path(stream_name, prefix)
            try:
                s3.Object(bucket, obj_path).put(Body=batched_data)
            except Exception as e:
                LOG.error("Unable to put record to stream: %s %s" %
                          (e, traceback.format_exc()))
                raise e
        if "HttpEndpointDestinationDescription" in dest:
            http_dest = dest["HttpEndpointDestinationDescription"]
            end_point = http_dest["EndpointConfiguration"]
            url = end_point["Url"]
            record_to_send = {
                "requestId": str(uuid.uuid4()),
                "timestamp": (int(time.time())),
                "records": [],
            }
            for record in records:
                data = record.get("Data") or record.get("data")
                record_to_send["records"].append({"data": data})
            headers = {
                "Content-Type": "application/json",
            }
            try:
                requests.post(url, json=record_to_send, headers=headers)
            except Exception as e:
                LOG.info(
                    "Unable to put Firehose records to HTTP endpoint %s: %s %s"
                    % (url, e, traceback.format_exc()))
                raise e
    return {"RecordId": str(uuid.uuid4())}
Esempio n. 9
0
def put_records(stream_name, records):
    stream = get_stream(stream_name)
    if not stream:
        return error_not_found(stream_name)
    for dest in stream.get("Destinations", []):
        if "ESDestinationDescription" in dest:
            es_dest = dest["ESDestinationDescription"]
            es_index = es_dest["IndexName"]
            es_type = es_dest.get("TypeName")
            es = connect_elasticsearch(endpoint=es_dest.get("ClusterEndpoint"),
                                       domain=es_dest.get("DomainARN"))
            for record in records:
                obj_id = uuid.uuid4()

                # DirectPut
                if "Data" in record:
                    data = base64.b64decode(record["Data"])
                # KinesisAsSource
                elif "data" in record:
                    data = base64.b64decode(record["data"])

                body = json.loads(data)

                try:
                    es.create(index=es_index,
                              doc_type=es_type,
                              id=obj_id,
                              body=body)
                except Exception as e:
                    LOG.error("Unable to put record to stream: %s %s" %
                              (e, traceback.format_exc()))
                    raise e
        if "S3DestinationDescription" in dest:
            s3_dest = dest["S3DestinationDescription"]
            bucket = bucket_name(s3_dest["BucketARN"])
            prefix = s3_dest.get("Prefix", "")

            s3 = connect_to_resource("s3")
            batched_data = b"".join([
                base64.b64decode(r.get("Data") or r["data"]) for r in records
            ])

            obj_path = get_s3_object_path(stream_name, prefix)
            try:
                s3.Object(bucket, obj_path).put(Body=batched_data)
            except Exception as e:
                LOG.error("Unable to put record to stream: %s %s" %
                          (e, traceback.format_exc()))
                raise e
        if "HttpEndpointDestinationDescription" in dest:
            http_dest = dest["HttpEndpointDestinationDescription"]
            end_point = http_dest["EndpointConfiguration"]
            url = end_point["Url"]
            record_to_send = {
                "requestId": str(uuid.uuid4()),
                "timestamp": (int(time.time())),
                "records": [],
            }
            for record in records:
                data = record.get("Data") or record.get("data")
                record_to_send["records"].append({"data": data})
            headers = {
                "Content-Type": "application/json",
            }
            try:
                requests.post(url, json=record_to_send, headers=headers)
            except Exception as e:
                LOG.info(
                    "Unable to put Firehose records to HTTP endpoint %s: %s %s"
                    % (url, e, traceback.format_exc()))
                raise e
    return {"RecordId": str(uuid.uuid4())}