예제 #1
0
    def test_write_records_to_kinesis_reattempts_only_failed_records(self):
        """tests that only the records that kinesis says have failed are the the ones that will be re-pushed"""
        mocked_kinesis_client = Mock()
        mocked_kinesis_client.put_records = mock_put_records
        mocked_di_framework_client = Mock()
        records = [{
            "PartitionKey": "test",
            "Data": {
                "id": "1"
            },
            "fail": True
        }, {
            "PartitionKey": "test",
            "Data": {
                "id": "2"
            },
            "fail": False
        }, {
            "PartitionKey": "test",
            "Data": {
                "id": "3"
            },
            "fail": True
        }, {
            "PartitionKey": "test",
            "Data": {
                "id": "4"
            },
            "fail": False
        }]

        kinesis_producer = KinesisProducer("region_name", "stream_name",
                                           mocked_kinesis_client)
        res = kinesis_producer.write_records_to_kinesis(
            records, mocked_di_framework_client)
        number_of_expected_failed_records = 2
        expected_res = {
            'FailedRecordCount':
            number_of_expected_failed_records,
            'Records': [{
                'ErrorCode': 'ProvisionedThroughputExceededException',
                'ErrorMessage': 'this is a mock error message'
            }] * number_of_expected_failed_records,
            'EncryptionType':
            'NONE'
        }
        self.assertEqual(res, expected_res)
 def get_producer(self, args):
     if os.getenv("KINESIS_STREAM_NAME", False):
         api_name = os.environ.get("KINESIS_API_NAME", 'firehose')
         region_name = os.environ.get("KINESIS_REGION", 'us-west-2')
         stream_name = os.environ.get("KINESIS_STREAM_NAME",
                                      'TwitterStream')
         from kinesis_producer import KinesisProducer
         return KinesisProducer(api_name, region_name, stream_name)
     elif args.kinesis_stream_name is not None:
         # fails if not all are set
         api_name = args.kinesis_api_name
         region_name = args.kinesis_region
         stream_name = args.kinesis_stream_name
         from kinesis_producer import KinesisProducer
         return KinesisProducer(api_name, region_name, stream_name)
     else:
         from kinesis_producer import StdoutProducer
         return StdoutProducer()
def get_producer():
    if os.getenv("KAFKA_SERVICE_HOST", False):
        from kafka_producer import KafkaProducer
        return KafkaProducer()
    elif os.getenv("KAFKA_BOOTSTRAP_SERVER", False):
        from kafka_producer import KafkaProducer
        return KafkaProducer()
    elif os.getenv("KINESIS_STREAM_NAME", False):
        api_name = os.environ.get("KINESIS_API_NAME", 'firehose')
        region_name = os.environ.get("KINESIS_REGION", 'us-east-1')
        stream_name = os.environ.get("KINESIS_STREAM_NAME", 'TwitterStream')
        from kinesis_producer import KinesisProducer
        return KinesisProducer(api_name, region_name, stream_name)
    else:
        from tweepy_stream import StdoutProducer
        return StdoutProducer()
예제 #4
0
def main():
    try:
        config = utils.get_config(
        )  # create a ./local_config.json file if you want to run this locally or this will fail
        transis_consumer = TransisConsumer(config["transis_config_prod"])

        kinesis_client = boto3.client('kinesis',
                                      config["kinesis_config"]["region_name"])
        kinesis_producer = KinesisProducer(
            config["kinesis_config"]["region_name"],
            config["kinesis_config"]["stream_name"], kinesis_client)
        di_framework_client = di_framework.DIFramework(
            config["di_framework_config"])
        transis_kinesis_connector = TransisKinesisConnector(
            transis_consumer, kinesis_producer, di_framework_client)
        transis_kinesis_connector.run()
    except Exception as e:
        logging.critical(
            f"shutting down the service as a fatal error has occured: {e}")
        try:
            di_framework_client.close_db_connection()
        except UnboundLocalError:
            pass
        exit()
예제 #5
0
      pos = buf.index(newline)
      yield buf[:pos]
      buf = buf[pos + len(newline):]
    chunk = f.read(512)
    if not chunk:
      yield buf
      break
    buf += chunk


log = logging.getLogger('kinesis_producer.client')
level = logging.getLevelName('DEBUG')
logging.basicConfig()
log.setLevel(level)

k = KinesisProducer(config=config)
k2 = KinesisProducer(config=config2)

for line in myreadlines(sys.stdin, ";"):
    line = line.strip()
    mesg = line[2:]
    k2.send(mesg)
    type = pms.adsb.typecode(mesg)
    if 1 <= type <= 4:
        type = pms.adsb.typecode(mesg)
        icao = pms.adsb.icao(mesg)
        callsign = pms.adsb.callsign(mesg)
        print('aircraft id message')
        print("Type %s message" % type)
        print('callsign: %s' % pms.adsb.callsign(mesg))
        jsonobj =  {
예제 #6
0
metadata = MetaData()

engine = create_engine('mysql://root@localhost/st5_dev')

connection = engine.connect()

statement = "select * from users"

result_proxy = connection.execute(statement)
result_set = result_proxy.fetchall()
records = result_set[:1]

config = dict(
    aws_region='us-east-1',
    buffer_size_limit=100000,
    buffer_time_limit=0.2,
    kinesis_concurrency=1,
    kinesis_max_retries=10,
    record_delimiter='\n',
    stream_name='smartzip-challenge-5',
)

k = KinesisProducer(config=config)

for record in records:
    k.send(record)

k.close()
k.join()
예제 #7
0
logging.basicConfig(level=logging.DEBUG)
logging.getLogger('botocore').setLevel(logging.WARNING)

log = logging.getLogger(__name__)

config = dict(
    aws_region='us-east-1',
    buffer_size_limit=200000,
    buffer_time_limit=0.2,
    kinesis_concurrency=4,
    kinesis_max_retries=10,
    record_delimiter='\n',
    stream_name='jz-python-devlocal',
    )

k = KinesisProducer(config=config)

payload = '{MSG:%%05i %s}' % ('X' * 1000)

try:
    print ' <> MSGS'
    for msg_id in range(50000):
        record = payload % msg_id
        k.send(record)
        # time.sleep(0.2)

    # time.sleep(5)
except KeyboardInterrupt:
    pass
finally:
    print ' <> CLOSE'
예제 #8
0
def restream(bucket, key, stream, start, end, yes, delimiter):
    """Replay saved Firehose Streams into Kinesis streams.

    The data in the KEY path inside the BUCKET will be sent to STREAM.
    """

    # Instanciate Kinesis Producer
    kinesis_config = dict(
        aws_region='us-east-1',
        buffer_size_limit=1,
        buffer_time_limit=1,
        kinesis_concurrency=1,
        kinesis_max_retries=10,
        record_delimiter=b'\n',
        stream_name=stream,
    )

    producer = KinesisProducer(config=kinesis_config)

    # Instanciate S3 client
    s3 = boto3.client('s3')

    # Parse dates
    start_date = parse_date(start)
    end_date = parse_date(end)

    # Allow to skip confirmation prompt
    if yes:
        click.confirm('Restream from {} to {}'.format(start_date, end_date),
                      abort=True)

    # Iterate each hourly by bucket and restream the records individually
    for dt in rrule.rrule(rrule.HOURLY, dtstart=start_date, until=end_date):
        # Generate folder prefix
        prefix = key + '/' + dt.strftime('%Y/%m/%d/%H')

        # Grab list of objects in the prefix folder
        objects = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
        contents = objects.get('Contents')

        # Skip folders that don't exists
        if not contents:
            click.echo('Warning! {} not found'.format(prefix))
            continue

        for s3_object in contents:
            object_response = s3.get_object(Bucket=bucket,
                                            Key=s3_object.get('Key'))

            object_body = object_response['Body']
            object_data = object_body.read()

            data = decode(object_data)

            if delimiter:
                records = data.split(delimiter)
            else:
                records = data.splitlines()

            # Send the individual records to Kinesis
            object_name = s3_object.get('Key').split('/')[-1]
            label = 'Sending {} records:'.format(object_name)
            with click.progressbar(records, label=label) as bar:
                for record in bar:
                    if not record:
                        click.echo('Empty record!')
                        continue
                    producer.send(record.encode())

        producer.close()
        producer.join()