def test_write_records_to_kinesis_reattempts_only_failed_records(self): """tests that only the records that kinesis says have failed are the the ones that will be re-pushed""" mocked_kinesis_client = Mock() mocked_kinesis_client.put_records = mock_put_records mocked_di_framework_client = Mock() records = [{ "PartitionKey": "test", "Data": { "id": "1" }, "fail": True }, { "PartitionKey": "test", "Data": { "id": "2" }, "fail": False }, { "PartitionKey": "test", "Data": { "id": "3" }, "fail": True }, { "PartitionKey": "test", "Data": { "id": "4" }, "fail": False }] kinesis_producer = KinesisProducer("region_name", "stream_name", mocked_kinesis_client) res = kinesis_producer.write_records_to_kinesis( records, mocked_di_framework_client) number_of_expected_failed_records = 2 expected_res = { 'FailedRecordCount': number_of_expected_failed_records, 'Records': [{ 'ErrorCode': 'ProvisionedThroughputExceededException', 'ErrorMessage': 'this is a mock error message' }] * number_of_expected_failed_records, 'EncryptionType': 'NONE' } self.assertEqual(res, expected_res)
def get_producer(self, args): if os.getenv("KINESIS_STREAM_NAME", False): api_name = os.environ.get("KINESIS_API_NAME", 'firehose') region_name = os.environ.get("KINESIS_REGION", 'us-west-2') stream_name = os.environ.get("KINESIS_STREAM_NAME", 'TwitterStream') from kinesis_producer import KinesisProducer return KinesisProducer(api_name, region_name, stream_name) elif args.kinesis_stream_name is not None: # fails if not all are set api_name = args.kinesis_api_name region_name = args.kinesis_region stream_name = args.kinesis_stream_name from kinesis_producer import KinesisProducer return KinesisProducer(api_name, region_name, stream_name) else: from kinesis_producer import StdoutProducer return StdoutProducer()
def get_producer(): if os.getenv("KAFKA_SERVICE_HOST", False): from kafka_producer import KafkaProducer return KafkaProducer() elif os.getenv("KAFKA_BOOTSTRAP_SERVER", False): from kafka_producer import KafkaProducer return KafkaProducer() elif os.getenv("KINESIS_STREAM_NAME", False): api_name = os.environ.get("KINESIS_API_NAME", 'firehose') region_name = os.environ.get("KINESIS_REGION", 'us-east-1') stream_name = os.environ.get("KINESIS_STREAM_NAME", 'TwitterStream') from kinesis_producer import KinesisProducer return KinesisProducer(api_name, region_name, stream_name) else: from tweepy_stream import StdoutProducer return StdoutProducer()
def main(): try: config = utils.get_config( ) # create a ./local_config.json file if you want to run this locally or this will fail transis_consumer = TransisConsumer(config["transis_config_prod"]) kinesis_client = boto3.client('kinesis', config["kinesis_config"]["region_name"]) kinesis_producer = KinesisProducer( config["kinesis_config"]["region_name"], config["kinesis_config"]["stream_name"], kinesis_client) di_framework_client = di_framework.DIFramework( config["di_framework_config"]) transis_kinesis_connector = TransisKinesisConnector( transis_consumer, kinesis_producer, di_framework_client) transis_kinesis_connector.run() except Exception as e: logging.critical( f"shutting down the service as a fatal error has occured: {e}") try: di_framework_client.close_db_connection() except UnboundLocalError: pass exit()
pos = buf.index(newline) yield buf[:pos] buf = buf[pos + len(newline):] chunk = f.read(512) if not chunk: yield buf break buf += chunk log = logging.getLogger('kinesis_producer.client') level = logging.getLevelName('DEBUG') logging.basicConfig() log.setLevel(level) k = KinesisProducer(config=config) k2 = KinesisProducer(config=config2) for line in myreadlines(sys.stdin, ";"): line = line.strip() mesg = line[2:] k2.send(mesg) type = pms.adsb.typecode(mesg) if 1 <= type <= 4: type = pms.adsb.typecode(mesg) icao = pms.adsb.icao(mesg) callsign = pms.adsb.callsign(mesg) print('aircraft id message') print("Type %s message" % type) print('callsign: %s' % pms.adsb.callsign(mesg)) jsonobj = {
metadata = MetaData() engine = create_engine('mysql://root@localhost/st5_dev') connection = engine.connect() statement = "select * from users" result_proxy = connection.execute(statement) result_set = result_proxy.fetchall() records = result_set[:1] config = dict( aws_region='us-east-1', buffer_size_limit=100000, buffer_time_limit=0.2, kinesis_concurrency=1, kinesis_max_retries=10, record_delimiter='\n', stream_name='smartzip-challenge-5', ) k = KinesisProducer(config=config) for record in records: k.send(record) k.close() k.join()
logging.basicConfig(level=logging.DEBUG) logging.getLogger('botocore').setLevel(logging.WARNING) log = logging.getLogger(__name__) config = dict( aws_region='us-east-1', buffer_size_limit=200000, buffer_time_limit=0.2, kinesis_concurrency=4, kinesis_max_retries=10, record_delimiter='\n', stream_name='jz-python-devlocal', ) k = KinesisProducer(config=config) payload = '{MSG:%%05i %s}' % ('X' * 1000) try: print ' <> MSGS' for msg_id in range(50000): record = payload % msg_id k.send(record) # time.sleep(0.2) # time.sleep(5) except KeyboardInterrupt: pass finally: print ' <> CLOSE'
def restream(bucket, key, stream, start, end, yes, delimiter): """Replay saved Firehose Streams into Kinesis streams. The data in the KEY path inside the BUCKET will be sent to STREAM. """ # Instanciate Kinesis Producer kinesis_config = dict( aws_region='us-east-1', buffer_size_limit=1, buffer_time_limit=1, kinesis_concurrency=1, kinesis_max_retries=10, record_delimiter=b'\n', stream_name=stream, ) producer = KinesisProducer(config=kinesis_config) # Instanciate S3 client s3 = boto3.client('s3') # Parse dates start_date = parse_date(start) end_date = parse_date(end) # Allow to skip confirmation prompt if yes: click.confirm('Restream from {} to {}'.format(start_date, end_date), abort=True) # Iterate each hourly by bucket and restream the records individually for dt in rrule.rrule(rrule.HOURLY, dtstart=start_date, until=end_date): # Generate folder prefix prefix = key + '/' + dt.strftime('%Y/%m/%d/%H') # Grab list of objects in the prefix folder objects = s3.list_objects_v2(Bucket=bucket, Prefix=prefix) contents = objects.get('Contents') # Skip folders that don't exists if not contents: click.echo('Warning! {} not found'.format(prefix)) continue for s3_object in contents: object_response = s3.get_object(Bucket=bucket, Key=s3_object.get('Key')) object_body = object_response['Body'] object_data = object_body.read() data = decode(object_data) if delimiter: records = data.split(delimiter) else: records = data.splitlines() # Send the individual records to Kinesis object_name = s3_object.get('Key').split('/')[-1] label = 'Sending {} records:'.format(object_name) with click.progressbar(records, label=label) as bar: for record in bar: if not record: click.echo('Empty record!') continue producer.send(record.encode()) producer.close() producer.join()