コード例 #1
0
    def test_deagg_with_real_kpl_generated_ka_non_agg_rec_no_ehks(self):

        # Each entry is of the form (pk, ehk, data) and corresponds to the expected results
        actual_user_records = [
           {
             'partitionKey': '1562602074896',
             'data': 'RECORD 749 pjqrcfbxafzdndzmbgane',
             'recordId': '49597411459012285111935017620416693517210978893395132418'
           }
        ]

        records = deagg.deaggregate_records(json.loads(json.dumps(kpl_generated_ka_non_agg_rec)))

        self.assertEqual(1, len(records), 'Deaggregated the wrong number of records aggregated record.')

        stuff = []
        for i in range(0, len(records)):
            generated_record = records[i]
            actual_record = actual_user_records[i]

            stuff.append(generated_record)

            self.assertEqual(generated_record['kinesis']['partitionKey'], actual_record['partitionKey'],
                             'Actual and generated partition keys do not match for record %d.' % i)

            self.assertEqual(generated_record['kinesis']['recordId'], actual_record['recordId'],
                             'Actual and generated recordIds keys do not match for record %d' % i)

            decoded_data = base64.b64decode(generated_record['kinesis']['data']).decode('utf-8')
            actual_data = actual_record['data']

            self.assertEqual(decoded_data, actual_data,
                             'Deaggregated data does not match expected actual data for record %d.' % i)
コード例 #2
0
def lambda_handler(event, context):
    """A Python AWS Lambda function to process Kinesis aggregated
    records in a bulk fashion."""
    
    raw_kinesis_records = event['Records']
    
    # Deaggregate all records in one call
    user_records = deaggregate_records(raw_kinesis_records)
    
    # Iterate through deaggregated records
    for record in user_records:
        
        # Kinesis data in Python Lambdas is base64 encoded
        payload = base64.b64decode(record['kinesis']['data'])

        # dump the payload assuming it is string.  If payload contains
        # AVRO binary data, print statement below will show binary characters
        # refer to avro_decode() method to see how record is decoded
        #six.print_('%s' % payload)

        # decode the binary AVRO data and show records
        avro_decode(payload)

        # This is where your custom code will go

    
    resp = 'Successfully processed {} records.'.format(len(user_records))
    print(resp)

    return resp
コード例 #3
0
def lambda_handler(event, context):
    """
    Receive a batch of events from Kinesis and insert into our DynamoDB table
    """
    #print "time:", time.time()
    #print('Received request')
    item = None
    dynamo_db = boto3.resource('dynamodb')
    table = dynamo_db.Table('benchmark_kinesis')
    records = [record for record in event['Records']]
    new_records = deaggregate_records(records)
    #decoded_record_data = [record['kinesis']['data'] for record in new_records]
    #deserialized_data = [decoded_record for decoded_record in records]
    #for data in decoded_record_data:
    with table.batch_writer() as batch_writer:
        for record in new_records:
            #d_record = "%.15g" % record['kinesis']['partitionKey']
            #con_time = "%.15g" % time.time()
            item = {
                'creation_time': Decimal(record['kinesis']['partitionKey']),
                'consumer_time': Decimal(time.time()),
                'value': record['kinesis']['data']
            }
            # Add a processed time so we have a rough idea how far behind we are
            #item['processed'] = datetime.datetime.utcnow().isoformat()
            batch_writer.put_item(Item=item)

    # Print the last item to make it easy to see how we're doing
    #print(json.dumps(item))
    #print "end time:", time.time()
    print('Number of records: {}'.format(str(len(new_records))))
コード例 #4
0
    def test_deagg_with_real_kpl_generated_ka_rec_with_ehks(self):

        # Each entry is of the form (pk, ehk, data) and corresponds to the expected results
        actual_user_records = [
           {
             'explicitHashKey': '339606600942967391854603552402021847292',
             'partitionKey': '1562602074896',
             'data': 'RECORD 2005 pjqrcfbxafzdndzmbgane',
             'recordId': '49597411459012285111935017621194032819223185658889633794'
           },
           {
             'explicitHashKey': '339606600942967391854603552402021847292',
             'partitionKey': '1562602074896',
             'data': 'RECORD 2006 pjqrcfbxafzdndzmbgane',
             'recordId': '49597411459012285111935017621194032819223185658889633794'
           },
           {
             'explicitHashKey': '339606600942967391854603552402021847292',
             'partitionKey': '1562602074896',
             'data': 'RECORD 2007 pjqrcfbxafzdndzmbgane',
             'recordId': '49597411459012285111935017621194032819223185658889633794'
           },
           {
             'explicitHashKey': '339606600942967391854603552402021847292',
             'partitionKey': '1562602074896',
             'data': 'RECORD 2008 pjqrcfbxafzdndzmbgane',
             'recordId': '49597411459012285111935017621194032819223185658889633794'
           }
        ]

        records = deagg.deaggregate_records(json.loads(json.dumps(kpl_generated_ka_rec)))

        self.assertEqual(4, len(records), 'Deaggregated the wrong number of records aggregated record.')

        stuff = []
        for i in range(0, len(records)):
            generated_record = records[i]
            actual_record = actual_user_records[i]

            stuff.append(generated_record)

            self.assertEqual(generated_record['kinesis']['partitionKey'], actual_record['partitionKey'],
                             'Actual and generated partition keys do not match for record %d.' % i)

            self.assertEqual(generated_record['kinesis']['explicitHashKey'], actual_record['explicitHashKey'],
                             'Actual and generated explicit hash keys do not match for record %d' % i)

            self.assertEqual(generated_record['kinesis']['recordId'], actual_record['recordId'],
                             'Actual and generated recordIds keys do not match for record %d' % i)

            decoded_data = base64.b64decode(generated_record['kinesis']['data']).decode('utf-8')
            actual_data = actual_record['data']

            self.assertEqual(decoded_data, actual_data,
                             'Deaggregated data does not match expected actual data for record %d.' % i)
コード例 #5
0
    def test_deagg_with_real_kpl_data_no_ehks(self):

        # Each entry is of the form (pk, ehk, data) and corresponds to the expected results
        actual_user_records = [{
            'partitionKey': '78feb12a-dba0-45ca-a91e-bb1f2f1819b4',
            'data': 'RECORD 18 iklrpsvyhxnerodfeheru\n'
        }, {
            'partitionKey': 'c2a75787-469b-4310-840d-d486dca7eb5e',
            'data': 'RECORD 19 lnmnudyhvpvcgrokfmzdh\n'
        }, {
            'partitionKey': '650e9362-572b-4427-b3fc-13549d7e5ae5',
            'data': 'RECORD 20 pjqrcfbxafzdndzmbgane\n'
        }, {
            'partitionKey': '1d8d9600-00b7-46f3-a983-de83477540c0',
            'data': 'RECORD 21 aafreyrntwuwxliygkbhk\n'
        }, {
            'partitionKey': 'f2e718a0-389b-494a-b796-3355b07569cc',
            'data': 'RECORD 22 qsaadhbsgmvqgdifguyou\n'
        }, {
            'partitionKey': '965b8da4-a6bd-4755-81c9-e7181ab7dc9a',
            'data': 'RECORD 23 wjokoiwvzzhlftlzhqfjk\n'
        }, {
            'partitionKey': 'd693c5f0-5772-46c9-8d86-08c7055bdb75',
            'data': 'RECORD 24 zopcascrthbbpnjiwuhjb\n'
        }, {
            'partitionKey': '8d92476b-eb89-4e81-99fb-c2ba63ad6d90',
            'data': 'RECORD 25 txbbvnozrwsboxuomqboq\n'
        }]

        records = deagg.deaggregate_records(
            json.loads(json.dumps(kpl_generated_rec_no_ehks)))

        self.assertEqual(
            8, len(records),
            'Deaggregated the wrong number of records aggregated record.')

        for i in range(0, len(records)):
            generated_record = records[i]
            actual_record = actual_user_records[i]

            self.assertEqual(
                generated_record['kinesis']['partitionKey'],
                actual_record['partitionKey'],
                'Actual and generated partition keys do not match for record %d.'
                % i)
            self.assertIsNone(generated_record['kinesis']['explicitHashKey'])

            decoded_data = base64.b64decode(
                generated_record['kinesis']['data']).decode('utf-8')
            actual_data = actual_record['data']

            self.assertEqual(
                decoded_data, actual_data,
                'Deaggregated data does not match expected actual data for record %d.'
                % i)
コード例 #6
0
def lambda_handler(event, context):
    """
    Receive a batch of events from Kinesis and insert into our DynamoDB table
    """
    print('Received request')
    item = None

    mysql_host = '54.212.197.235'
    mysql_username = '******'
    mysql_password = '******'
    mysql_dbname = 'rts_kinesis'
    mysql_tablename = 'benchmark_kinesis'

    print('Start connection')
    conn = mysql.connector.connect(host=mysql_host,
                                   user=mysql_username,
                                   passwd=mysql_password,
                                   db=mysql_dbname)
    print('End connection')
    '''Write the message to the mysql database'''
    cur = conn.cursor()

    #dynamo_db = boto3.resource('dynamodb')
    #table = dynamo_db.Table('benchmark_kinesis')
    _mysql_buffer = [
    ]  #ad-hoc message buffering for mysql, equivalent to dynamodb batch-write behavior
    _mysql_buffer_limit = 25
    records = [record for record in event['Records']]
    new_records = deaggregate_records(records)
    #decoded_record_data = [record['kinesis']['data'] for record in new_records]
    #deserialized_data = [decoded_record for decoded_record in records]
    #for data in decoded_record_data:
    for record in new_records:
        #d_record = "%.15g" % record['kinesis']['partitionKey']
        #con_time = "%.15g" % time.time()
        creation_time = Decimal(record['kinesis']['partitionKey'])
        consumer_time = Decimal(time.time())
        value = record['kinesis']['data']
        #cur.execute('INSERT INTO '+mysql_tablename+'(creation_time, consumer_time, value) VALUES (%s, %s, %s)', (creation_time, consumer_time, value))
        sql = 'INSERT INTO ' + mysql_tablename + '(creation_time, consumer_time, value) VALUES (%s, %s, %s)'
        _mysql_buffer.append((creation_time, consumer_time, value))
        if len(_mysql_buffer) > _mysql_buffer_limit:
            cur.executemany(sql, _mysql_buffer)
            _mysql_buffer = []
        # Add a processed time so we have a rough idea how far behind we are
        #item['processed'] = datetime.datetime.utcnow().isoformat()

    conn.commit()
    conn.close()
    cur.close()
    # Print the last item to make it easy to see how we're doing
    #print(json.dumps(item))
    print('Number of records: {}'.format(str(len(new_records))))
コード例 #7
0
    def test_multiple_records(self):

        inputs = [('partition_key1', 'abcdefghijklmnopqrstuvwxyz'),
                  ('partition_key2', 'zyxwvutsrqponmlkjihgfedcba'),
                  ('partition_key3', 'some_third_data_string')]

        aggregator = agg.RecordAggregator()
        self.assertEqual(0, aggregator.get_num_user_records(),
                         'New aggregator reported non-empty content.')

        for i in range(0, len(inputs)):
            result = aggregator.add_user_record(partition_key=inputs[i][0],
                                                data=inputs[i][1])
            if result is not None:
                self.fail('Agg record reporting as full when it should not.')
            self.assertEqual(
                i + 1, aggregator.get_num_user_records(),
                'New aggregator reported incorrect number of records.')

        agg_record = aggregator.clear_and_get()
        if not agg_record:
            self.fail('Failed to extract aggregated record.')

        self.assertEqual(len(inputs), agg_record.get_num_user_records(),
                         'Improper number of user records in agg record.')
        self.assertEqual(
            0, aggregator.get_num_user_records(),
            'Agg record is not empty after clear_and_get() call.')

        intermediate_pk, intermediate_ehk, intermediate_data = agg_record.get_contents(
        )

        self.assertEqual(inputs[0][0], intermediate_pk,
                         'Intermediate PK and input PK do not match.')

        event = create_kinesis_lambda_record(intermediate_pk, intermediate_ehk,
                                             intermediate_data)
        records = deagg.deaggregate_records(event['Records'])

        self.assertEqual(len(inputs), len(records))

        for i in range(0, len(records)):
            record = records[i]
            output_pk = record['kinesis']['partitionKey']
            output_ehk = record['kinesis']['explicitHashKey']
            output_data = base64.b64decode(record['kinesis']['data'])

            self.assertEqual(inputs[i][0], output_pk,
                             'Input and output partition keys do not match.')
            self.assertIsNone(output_ehk, 'Explicit hash key should be None.')
            self.assertEqual(inputs[i][1], output_data.decode('utf-8'),
                             'Input and output record data does not match.')
コード例 #8
0
    def test_single_user_record_with_ehk(self):

        input_pk = 'partition_key'
        input_data = 'abcdefghijklmnopqrstuvwxyz'
        input_ehk = '339606600942967391854603552402021847292'

        aggregator = agg.RecordAggregator()
        self.assertEqual(0, aggregator.get_num_user_records(),
                         'New aggregator reported non-empty content.')

        result = aggregator.add_user_record(partition_key=input_pk,
                                            data=input_data,
                                            explicit_hash_key=input_ehk)
        if result is not None:
            self.fail('Agg record reporting as full when it should not.')

        agg_record = aggregator.clear_and_get()
        if not agg_record:
            self.fail('Failed to extract aggregated record.')

        self.assertEqual(1, agg_record.get_num_user_records(),
                         'Improper number of user records in agg record.')
        self.assertEqual(
            0, aggregator.get_num_user_records(),
            'Agg record is not empty after clear_and_get() call.')

        intermediate_pk, intermediate_ehk, intermediate_data = agg_record.get_contents(
        )

        self.assertEqual(input_pk, intermediate_pk,
                         'Intermediate PK and input PK do not match.')
        self.assertEqual(intermediate_ehk, input_ehk,
                         'Intermediate EHK and input EHK do not match.')

        event = create_kinesis_lambda_record(intermediate_pk, intermediate_ehk,
                                             intermediate_data)
        records = deagg.deaggregate_records(event['Records'])

        self.assertEqual(1, len(records))

        record = records[0]
        output_pk = record['kinesis']['partitionKey']
        output_ehk = record['kinesis']['explicitHashKey']
        output_data = base64.b64decode(record['kinesis']['data'])

        self.assertEqual(input_pk, output_pk,
                         'Input and output partition keys do not match.')
        self.assertEqual(input_ehk, output_ehk,
                         'Input and output explicit hash keys do not match.')
        self.assertEqual(input_data, output_data.decode('utf-8'),
                         'Input and output record data does not match.')
コード例 #9
0
def lambda_handler(event, context):
    """A Python AWS Lambda function to process Kinesis aggregated
    records in a bulk fashion."""
    global kinesis_cross_account_role
    global forward_to_stream_name

    ## pre-flight checks
    try:
        if not os.environ['cross_account_role']:
            errorMessage = 'cross_account_role is not defined in lambda environment'
            raise Exception(errorMessage)
        if not os.environ['forward_to_stream_name']:
            errorMessage = 'forward_to_stream_name is not defined in lambda environment'
            raise Exception(errorMessage)
    except Exception as details:
        errorMessage = 'error while accessing environment variables for lambda', details
        raise Exception(errorMessage)

    # initialize
    kinesis_cross_account_role = os.environ['cross_account_role']
    forward_to_stream_name = os.environ['forward_to_stream_name']
    raw_kinesis_records = event['Records']

    # Deaggregate all records in one call
    user_records = deaggregate_records(raw_kinesis_records)

    # Iterate through deaggregated records
    for record in user_records:

        # For Future Use - resharding!
        # 1. get record partition key and store
        # 2. generate a random explicit hash key
        # 3. create a new AggRecord from existing 'record' but
        #    but add the partition and explicit hash key from #1 and #2
        # 4. forward the record to another kinesis stream

        # Kinesis data in Python Lambdas is base64 encoded
        payload = base64.b64decode(record['kinesis']['data'])

        # dump the payload assuming it is string.  If payload contains
        # AVRO binary data, print statement below will show binary characters
        # refer to avro_decode() method to see how record is decoded
        #six.print_('%s' % payload)

        # Forward the decoded+deaggregated record to target kinesis stream
        check_and_forward(payload)

    resp = 'Successfully processed {} records.'.format(len(user_records))
    print(resp)

    return resp
コード例 #10
0
    def test_deagg_with_real_kpl_data(self):

        # Each entry is of the form (pk, ehk, data) and corresponds to the
        # expected results
        actual_user_records = [{
            'partitionKey': 'fc03dd88-3e79-448a-b01a-7cf1bd47b784',
            'explicitHashKey': '38486495867508399078159723846051807020',
            'data': 'RECORD 22 peeobhczbzdmskboupgyq\n'
        }, {
            'partitionKey': 'cae41b1c-ea61-43f2-90be-b8755ebf88e2',
            'explicitHashKey': '193787600037681706952143357071916352604',
            'data': 'RECORD 23 uswkxftxroeusscxsjhno\n'
        }, {
            'partitionKey': 'd490690c-e74d-4db2-a3c8-d8f2f184fd23',
            'explicitHashKey': '266880436964932424265466916734068684439',
            'data': 'RECORD 24 casehdgivfaxeustlyszy\n'
        }, {
            'partitionKey': 'c924bc09-b85e-47f1-b32e-336522ee53c8',
            'explicitHashKey': '339606600942967391854603552402021847292',
            'data': 'RECORD 25 nvffvpmuogdopjhamevrk\n'
        }]

        records = deagg.deaggregate_records(
            json.loads(json.dumps(kpl_generated_rec)))

        self.assertEqual(
            4, len(records),
            'Deaggregated the wrong number of records aggregated record.')

        for i in range(0, len(records)):
            generated_record = records[i]
            actual_record = actual_user_records[i]

            self.assertEqual(
                generated_record['kinesis']['partitionKey'],
                actual_record['partitionKey'],
                'Actual and generated partition keys do not match for record %d'
                % i)
            self.assertEqual(
                generated_record['kinesis']['explicitHashKey'],
                actual_record['explicitHashKey'],
                'Actual and generated explicit hash keys do not match for record %d'
                % i)

            # Decode base64 to bytes and decode bytes to utf-8
            decoded_data = base64.b64decode(
                generated_record['kinesis']['data']).decode('utf-8')
            actual_data = actual_record['data']
            self.assertEqual(
                decoded_data, actual_data,
                'Deaggregated data does not match expected actual data.')
コード例 #11
0
def lambda_handler(event, context):
    """
    Triggered for a batch of kinesis records.
    Parses QLDB Journal streams and  sends an SNS notification for Person and Vehicle Registration Events.
    """

    sns_topic_arn = os.environ['SNS_ARN']
    raw_kinesis_records = event['Records']

    # Deaggregate all records in one call
    records = deaggregate_records(raw_kinesis_records)

    # Iterate through deaggregated records
    for record in records:

        # Kinesis data in Python Lambdas is base64 encoded
        payload = base64.b64decode(record['kinesis']['data'])
        # payload is the actual ion binary record published by QLDB to the stream
        ion_record = ion.loads(payload)
        print("Ion reocord: ", (ion.dumps(ion_record, binary=False)))

        if (("recordType" in ion_record) and
            (ion_record["recordType"] == REVISION_DETAILS_RECORD_TYPE)):

            revision_data, revision_metadata = get_data_metdata_from_revision_record(
                ion_record)
            table_info = get_table_info_from_revision_record(ion_record)

            if (revision_metadata["version"] == 0):  # a new record inserted
                if (table_info and table_info["tableName"] == PERSON_TABLENAME
                        and person_data_has_required_fields(revision_data)):
                    send_sns_notification(
                        sns_topic_arn,
                        'New User Registered. Name: {first_name} {last_name}'.
                        format(first_name=revision_data["FirstName"],
                               last_name=revision_data["LastName"]))

                elif (table_info and table_info["tableName"]
                      == VEHICLE_REGISTRATION_TABLENAME
                      and vehicle_registration_data_has_required_fields(
                          revision_data)):
                    send_sns_notification(
                        sns_topic_arn, 'New Vehicle Registered. '
                        'VIN: {vin}, LicensePlateNumber: {license_plate_number}'
                        .format(vin=revision_data["VIN"],
                                license_plate_number=revision_data[
                                    "LicensePlateNumber"]))
            else:
                print("No Action")

    return {'statusCode': 200}
コード例 #12
0
def lambda_bulk_handler(event, context):
    """A Python AWS Lambda function to process Kinesis aggregated
    records in a bulk fashion."""

    raw_kinesis_records = event['Records']

    # Deaggregate all records in one call
    user_records = deaggregate_records(raw_kinesis_records)

    # Iterate through deaggregated records
    for record in user_records:

        # Kinesis data in Python Lambdas is base64 encoded
        payload = base64.b64decode(record['kinesis']['data'])
        print('%s' % payload)

    return 'Successfully processed {} records.'.format(len(user_records))
コード例 #13
0
def lambda_bulk_handler(event, context):
    '''A Python AWS Lambda function to process Kinesis aggregated
    records in a bulk fashion.'''
    
    raw_kinesis_records = event['Records']
    
    #Deaggregate all records in one call
    user_records = deaggregate_records(raw_kinesis_records)
    
    #Iterate through deaggregated records
    for record in user_records:        
        
        # Kinesis data in Python Lambdas is base64 encoded
        payload = base64.b64decode(record['kinesis']['data'])
        print('%s' % (payload))
    
    return 'Successfully processed {} records.'.format(len(user_records))
コード例 #14
0
def lambda_handler(event, context):
    """
    Triggered for a batch of kinesis records.
    Parses QLDB Journal streams and indexes documents to Elasticsearch for
    Person and Vehicle Registration Events.
    """
    raw_kinesis_records = event['Records']

    # Deaggregate all records in one call
    records = deaggregate_records(raw_kinesis_records)

    # Iterate through deaggregated records of Person and VehicleRegistration Table
    for record in filtered_records_generator(records,
                                             table_names=[Constants.PERSON_TABLENAME,
                                                          Constants.VEHICLE_REGISTRATION_TABLENAME]):
        table_name = record["table_info"]["tableName"]
        revision_data = record["revision_data"]
        revision_metadata = record["revision_metadata"]
        version = revision_metadata["version"]
        document = None

        if revision_data:
            # if record is for Person table and is an insert event
            if (table_name == Constants.PERSON_TABLENAME) and (version == 0) and \
                    __fields_are_present(Constants.PERSON_TABLE_FIELDS, revision_data):

                document = __create_document(Constants.PERSON_TABLE_FIELDS, revision_data)
                elasticsearch_client.index(index=TABLE_TO_INDEX_MAP[table_name],
                                           id=revision_metadata["id"], body=document, version=version)

            # if record is for VehicleRegistration table and is an insert or update event
            elif table_name == Constants.VEHICLE_REGISTRATION_TABLENAME and \
                    __fields_are_present(Constants.VEHICLE_REGISTRATION_TABLE_FIELDS, revision_data):
                document = __create_document(Constants.VEHICLE_REGISTRATION_TABLE_FIELDS, revision_data)
                elasticsearch_client.index(index=TABLE_TO_INDEX_MAP[table_name],
                                           id=revision_metadata["id"], body=document, version=version)

        else:
            # delete record
            elasticsearch_client.delete(index=TABLE_TO_INDEX_MAP[table_name],
                                           id=revision_metadata["id"], version=version)


    return {
        'statusCode': 200
    }
コード例 #15
0
def lambda_bulk_handler(event, context):
    """A Python AWS Lambda function to process Kinesis aggregated
    records in a bulk fashion."""

    logger.info('Starting bulk loading')

    raw_kinesis_records = event['Records']

    # Deaggregate all records in one call
    user_records = deaggregate_records(raw_kinesis_records)

    total_records = len(user_records)

    log_stream = {
        "records": [],
        "lastEventId": {
            "commitNum": -1,
            "opNum": 0
        },
        "totalRecords": total_records
    }

    for user_record in user_records:
        records_json = base64.b64decode(user_record['kinesis']['data'])
        try:
            records = json.loads(records_json)
        except Exception as e:
            logger.error('Error parsing JSON: \'{}\': {}'.format(
                records_json, str(e)))
            raise e
        for record in records:
            log_stream['records'].append(record)
            log_stream['lastEventId']['commitNum'] = record['eventId'][
                'commitNum']
            log_stream['lastEventId']['opNum'] = record['eventId']['opNum']

    logger.info('{} records to process'.format(total_records))

    for result in handler.handle_records(log_stream):
        records_processed = result.records_processed
        logger.info('{} records processed'.format(records_processed))
        #metrics_publisher_client.publish_metrics(metrics_publisher_client.generate_record_processed_metrics(records_processed))

    logger.info('Finished bulk loading')
コード例 #16
0
ファイル: consumer.py プロジェクト: gyan42/aws-kinesis-poc
def process(kpl_records):
    output = []
    for kpl_record in kpl_records:
        recordId = kpl_record['recordId']
        records_deaggregated = deaggregate_records(kpl_record)
        decoded_data = []
        for kpl_record in records_deaggregated:
            data = base64.b64decode(
                kpl_record['kinesis']['data']) \
                .decode('utf-8')
            decoded_data.append(data)
        output_data = "".join(decoded_data)
        output_record = {
            'recordId': recordId,
            'result': 'Ok',
            'data':
            base64.b64encode(output_data.encode('utf-8')).decode('utf-8')
        }
        output.append(output_record)
    return output
コード例 #17
0
def handler(event, context):
    """
    Triggered for a batch of kinesis records.
    Parses QLDB Journal streams.
    """
    raw_kinesis_records = event['Records']

    # Deaggregate all records in one call
    records = deaggregate_records(raw_kinesis_records)

    # Iterate through deaggregated records of ApiEvent Table
    for record in filtered_records_generator(records,
                                             table_names=['ApiEvent']):
        table_name = record["table_info"]["tableName"]
        revision_data = record["revision_data"]
        revision_metadata = record["revision_metadata"]
        document_id = revision_metadata["id"]
        version = revision_metadata["version"]

        logger.info(
            f'Table: {table_name}, Id: {document_id}, Version: {version}, '
            f'Data: {ion.dumps(revision_data, binary=False)}')
コード例 #18
0
def lambda_handler(event, context):
    """
    lambda handler reads records ingested into the kinesis data analytics application. 
    Incase of Kinesis Datastreams, records are aggregated so are manually deaggregated. 

    records are recieved as base64 encoded string. Decoding them returns a bytes string
    which is further decoded to get the actual string object

    :param event:  the event object generated by the service invoked
    :param context: This object provides methods and properties that provide information about the invocation, function\
    and execution environment.
    :return: none
    """

    output = []
    raw_kinesis_records = event['records']

    for record in raw_kinesis_records:
        recordId = record['recordId']
        deaggregated_records = deaggregate_records(record)
        decoded_data = []
        for record in deaggregated_records:
            data = base64.b64decode(record['kinesis']['data']).decode('utf-8')
            decoded_data.append(data)

        # all the decoded records are converted into a string that is sent back to KDA application
        # join like this "".join(decoded_data) only if records already have a \n as records delimiter,
        # otherwise do "\n".join(decoded_data)

        output_data = "".join(decoded_data)

        output_data_base64 = base64.b64encode(output_data.encode('utf-8'))
        output_data_str = output_data_base64.decode('utf-8')

        output_record = kda_response_formatter(recordId, output_data_str)
        output.append(output_record)

    print('Successfully processed {} records.'.format(len(event['records'])))
    return {'records': output}
コード例 #19
0
ファイル: util.py プロジェクト: haje01/wdfwd
def iter_kinesis_records(knc, shid, seqn):
    from base64 import b64decode
    from aws_kinesis_agg.deaggregator import deaggregate_records

    ret = knc.get_shard_iterator(
        StreamName=KN_TEST_STREAM,
        ShardId=shid,
        ShardIteratorType='AT_SEQUENCE_NUMBER',
        StartingSequenceNumber=seqn
    )
    assert 'ShardIterator' in ret
    shdit = ret['ShardIterator']
    while True:
        ret = knc.get_records(ShardIterator=shdit)
        if len(ret['Records']) == 0:
            break
        assert 'Records' in ret
        records = deaggregate_records(aws_lambda_dform(ret['Records']))
        for rec in records:
            data = b64decode(rec['kinesis']['data'])
            yield data

        shdit = ret['NextShardIterator']
コード例 #20
0
ファイル: util.py プロジェクト: fossabot/wdfwd
def iter_kinesis_records(knc, shid, seqn):
    from base64 import b64decode
    from aws_kinesis_agg.deaggregator import deaggregate_records

    ret = knc.get_shard_iterator(
        StreamName=KN_TEST_STREAM,
        ShardId=shid,
        ShardIteratorType='AT_SEQUENCE_NUMBER',
        StartingSequenceNumber=seqn
    )
    assert 'ShardIterator' in ret
    shdit = ret['ShardIterator']
    while True:
        ret = knc.get_records(ShardIterator=shdit)
        if len(ret['Records']) == 0:
            break
        assert 'Records' in ret
        records = deaggregate_records(aws_lambda_dform(ret['Records']))
        for rec in records:
            data = b64decode(rec['kinesis']['data'])
            yield data

        shdit = ret['NextShardIterator']
コード例 #21
0
 def deaggregate(self, records):
     return deagg.deaggregate_records(records)
コード例 #22
0
def lambda_bulk_handler(event, context):
    """A Python AWS Lambda function to process Kinesis aggregated
    records in a bulk fashion."""
    
    logger.info('Starting bulk loading')
    
    raw_kinesis_records = event['Records']
    
    logger.info('Aggregated Kinesis record count: {}'.format(len(raw_kinesis_records)))
    
    # Deaggregate all records in one call
    user_records = deaggregate_records(raw_kinesis_records)
    
    total_records = len(user_records)
    
    logger.info('Deaggregated record count: {}'.format(total_records))
    
    log_stream = {
            "records": [],
            "lastEventId": {
                "commitNum": -1,
                "opNum": 0
            },
            "totalRecords": total_records
        }
        
    first_commit_num = None
    first_op_num = None
    prev_commit_num = None
    prev_op_num = None
    commit_nums = set()
        
    for user_record in user_records:
        records_json = base64.b64decode(user_record['kinesis']['data'])
        try:          
            records = json.loads(records_json)
        except Exception as e:
            logger.error('Error parsing JSON: \'{}\': {}'.format(records_json, str(e)))
            raise e
        for record in records:
            
            commit_num = record['eventId']['commitNum']
            op_num = record['eventId']['opNum']
            
            if log_commit_nums:
                commit_nums.add(commit_num)
            
            if not first_commit_num:
                first_commit_num = commit_num
                
            if not first_op_num:
                first_op_num = op_num
                
            #logger.info('Stream record: (commitNum: {}, opNum: {})'.format(commit_num, op_num)) 
            
            #if prev_commit_num and commit_num < prev_commit_num:
            #    logger.warn('Current commitNum [{}] is less than previous commitNum [{}]'.format(commit_num, prev_commit_num))
                
            if prev_commit_num and commit_num == prev_commit_num:
                if prev_op_num and op_num < prev_op_num:
                    logger.warn('Current opNum [{}] is less than previous opNum [{}] (commitNum [{}])'.format(op_num, prev_op_num, commit_num))
                    
            log_stream['records'].append(record)
            
            prev_commit_num = commit_num
            prev_op_num = op_num
            
    log_stream['lastEventId']['commitNum'] = prev_commit_num if prev_commit_num else -1
    log_stream['lastEventId']['opNum'] = prev_op_num if prev_op_num else 0
        
    logger.info('Log stream record count: {}'.format(len(log_stream['records']))) 
    logger.info('First record: (commitNum: {}, opNum: {})'.format(first_commit_num, first_op_num))
    logger.info('Last record: (commitNum: {}, opNum: {})'.format(prev_commit_num, prev_op_num))  
    
    if log_commit_nums:
        logger.info('Commit nums: {}'.format(commit_nums))  
        
    for result in handler.handle_records(log_stream):
        records_processed = result.records_processed
        logger.info('{} records processed'.format(records_processed))
        #metrics_publisher_client.publish_metrics(metrics_publisher_client.generate_record_processed_metrics(records_processed))
        
    logger.info('Finished bulk loading')