def records_iterator(self, limit=100): """ Read the latest batch of records from a Kinesis stream """ shard_it = kinesis.get_shard_iterator(self.stream_name, self.first_shard_id, "LATEST")["ShardIterator"] while True: t0 = time.time() out = kinesis.get_records(shard_it, limit=limit) shard_it = out["NextShardIterator"] yield out t1 = time.time() if t1 - t0 < 0.2: time.sleep(0.2 - (t1 - t0))
def main(): from boto import kinesis kinesis = kinesis.connect_to_region("us-east-2") shard_id = 'shardId-000000000000' #we only have one shard! shard_it = kinesis.get_shard_iterator("end-stream", shard_id, "LATEST")["ShardIterator"] while 1 == 1: out = kinesis.get_records(shard_it, limit=1) shard_it = out["NextShardIterator"] if len(out["Records"]) > 0: print(out["Records"][0]["Data"]) data_dict = json.loads(out["Records"][0]["Data"]) timestamp, userName, device_name, detail_result = extract_data_from_kinesis_stream( data_dict) # print(shard_it) write_to_db(timestamp, userName, device_name, detail_result) time.sleep(0.1)
def main(): from boto import kinesis kinesis = kinesis.connect_to_region("us-east-2") shard_id = 'shardId-000000000000' #we only have one shard! shard_it = kinesis.get_shard_iterator("end-stream", shard_id, "LATEST")["ShardIterator"] timestamps = [i for i in range(20)] bodyorientationpitches = [i for i in range(20)] steps = [i for i in range(20)] while 1 == 1: out = kinesis.get_records(shard_it, limit=1) shard_it = out["NextShardIterator"] if len(out["Records"]) > 0: print(out["Records"][0]["Data"]) data_dict = json.loads(out["Records"][0]["Data"]) timestamp, userName, device_name, detail_result = extract_data_from_kinesis_stream( data_dict) timestamps.insert(0, timestamp) timestamps = timestamps[:-1] bodyorientationpitches.insert( 0, detail_result["IMU"]["BodyOrientationPitch"]) bodyorientationpitches = bodyorientationpitches[:-1] steps.insert(0, detail_result["IMU"]["StepCount"]) steps = steps[:-1] x1 = np.array(timestamps) y1 = np.array(bodyorientationpitches) x2 = np.array(timestamps) y2 = np.array(steps) plot_vals(x1, y1, x2, y2)
db=DB ) cursor = mydb.cursor() kinesis = kinesis.connect_to_region('us-east-1') shard_id = 'shardId-0000000000000' shard_it = kinesis.get_shard_iterator(MYSTREAM, shard_id, 'LATEST')['ShardIterator'] while 1==1: print("getting record") out = kinesis.get_records(shard_it,limit=1) for i in out['Records']: recordId = json.loads(i['Data'])['SaleId']['N'] productId = json.loads(i['Data'])['ProductId']['N'] quantity = json.loads(i['Data'])['Quantity']['N'] saleDate = json.loads(i['Data'])['SaleDate']['S'] inv_quantity = 0 - int(quantity) add_record = "INSERT INTO FactInventory (idFactInventory, ProductID, QuantityChange, DateTime) VALUES (%s, %s, %s, %s)" data_record = { 'record_no': recordId, 'productID': productId, 'quantity': quantity, 'dateTime': saleDate, } cursor.execute(add_record, (str(recordId), str(productId), str(inv_quantity), str(saleDate)))
from boto import kinesis import time # Using boto connect to the region in which your kinesis stream is created kinesis = kinesis.connect_to_region("eu-west-1") shard_id = 'shardId-000000000000' # Iterator to go throough the latest stream values shard_it = kinesis.get_shard_iterator("BotoDemo", shard_id, "LATEST")["ShardIterator"] # Get thed ata while True: out = kinesis.get_records(shard_it, limit=2) shard_it = out["NextShardIterator"] print out time.sleep(0.3)
from boto import kinesis import time # boto kinesis API kinesis = kinesis.connect_to_region("eu-west-1") # Stream to read from stream = 'logbuffer-dev' # Lookup shards of stream shards = kinesis.describe_stream(stream)["StreamDescription"]["Shards"] n_shards = len(shards) iterators = [] # create iterators for each shard for shard in shards: shard_id = shard["ShardId"] # iterators.append(kinesis.get_shard_iterator(stream, shard_id, "AT_SEQUENCE_NUMBER", shard["SequenceNumberRange"]["StartingSequenceNumber"])["ShardIterator"]) iterators.append(kinesis.get_shard_iterator(stream, shard_id, "LATEST")["ShardIterator"]) # circular query of iterators while 1==1: iterator = iterators.pop(0) out = kinesis.get_records(iterator, limit=500) iterators.append(out["NextShardIterator"]) for record in out["Records"]: print(record["Data"]) # Kinesis Limit: 5 transactions per second per shard for reads time.sleep(0.2/n_shards)
# Is the stream active? if response['StreamDescription']['StreamStatus'] == 'ACTIVE': # Yes, the stream is active and we are ready to start reading from it. Get the shard ID. shard_id = response['StreamDescription']['Shards'][0]['ShardId'] break # The stream is not active, wait for 15 seconds and try again time.sleep(15) else: # The stream took too long to become active raise TimeoutError('Stream is still not active, aborting...') # Get the shard iterator and get only new data (TRIM_HORIZON) response = kinesis.get_shard_iterator(stream_name, shard_id, 'TRIM_HORIZON') shard_iterator = response['ShardIterator'] # Loop forever while True: # Get the next set of records response = kinesis.get_records(shard_iterator) # Get the next shard iterator shard_iterator = response['NextShardIterator'] # Loop through all of the records and print them for line in response['Records']: print line # Sleep for a second so we don't hit the Kinesis API too fast time.sleep(1)
iterator_type='LATEST' stream=kinesis.describe_stream(streamName) print(json.dumps(stream,sort_keys=True,indent=2,separators=(',',':'))) shards=stream['StreamDescription']['Shards'] print('# Shard Count:', len(shards)) def processRecords(records): for record in records: text=record['Data'].lower() print 'Processing record with data:' + text i=0 response=kinesis.get_shard_iterator(streamName,shards[0]['ShardId'], 'TRIM_HORIZON',starting_sequence_number=None) next_iterator=response['ShardIterator'] print('Gettinng next records using iterator:', next_iterator) while i<4000: try: response=kinesis.get_records(next_iterator,limit=1) #print response if len(response['Records'])>0: #print 'Number of records fetched:' + str(len(response['Records'])) processRecords(response['Records']) next_iterator=response['NextShardIterator'] time.sleep(1) i=i+1 except ProvisionedThroughputExceededException as ptee: print(ptee.message) time.sleep(5)
''' Informando de onde vamos pegar os dados e como vamos iterar ''' shard_it = kinesis.get_shard_iterator("big-data-analytics-desafio", shard_id, "LATEST")["ShardIterator"] result = {} t = 0 ''' iniciando while que vai parar de rodar apenas quando pegarmos a quantidade de registros validos informado no inicio ''' while i == 0: ''' atribuindo uma variavel com valores coletados e ja buscando o proximo registro ''' out = kinesis.get_records(shard_it) shard_it = out["NextShardIterator"] ''' Verifica se existe informações para processar ''' if len(out["Records"]) != 0: ''' Controle de quantos registros validos pegamos ''' t += 1 ''' Iteramos sobre o Record encontrado ''' for resp in out["Records"]:
from boto import kinesis from settings import KINESIS_REGION, KINESIS_STREAM_NAME FLUSH_INTERVAL = 5 BATCH_SIZE = 20 running = True def process_messages(batch_msgs): print('messages processed: {}'.format(len(batch_msgs['Records']))) for msg in batch_msgs['Records']: print('message: "{}" offset: {}'.format(msg['Data'], msg['SequenceNumber'])) print('Connect to Kinesis Streams') kinesis = kinesis.connect_to_region(region_name=KINESIS_REGION) stream = kinesis.describe_stream(KINESIS_STREAM_NAME) shardId = stream['StreamDescription']['Shards'][0]['ShardId'] shardIterator = kinesis.get_shard_iterator(KINESIS_STREAM_NAME, shardId, 'TRIM_HORIZON') print('Kinesis consumer started!') while running: msgs = kinesis.get_records(shardIterator['ShardIterator'], limit=BATCH_SIZE) process_messages(msgs) shardIterator['ShardIterator'] = msgs['NextShardIterator'] print('\nnext batch in {} seconds...'.format(FLUSH_INTERVAL)) time.sleep(FLUSH_INTERVAL)