def records_iterator(self, limit=100): """ Read the latest batch of records from a Kinesis stream """ shard_it = kinesis.get_shard_iterator(self.stream_name, self.first_shard_id, "LATEST")["ShardIterator"] while True: t0 = time.time() out = kinesis.get_records(shard_it, limit=limit) shard_it = out["NextShardIterator"] yield out t1 = time.time() if t1 - t0 < 0.2: time.sleep(0.2 - (t1 - t0))
def main(): from boto import kinesis kinesis = kinesis.connect_to_region("us-east-2") shard_id = 'shardId-000000000000' #we only have one shard! shard_it = kinesis.get_shard_iterator("end-stream", shard_id, "LATEST")["ShardIterator"] while 1 == 1: out = kinesis.get_records(shard_it, limit=1) shard_it = out["NextShardIterator"] if len(out["Records"]) > 0: print(out["Records"][0]["Data"]) data_dict = json.loads(out["Records"][0]["Data"]) timestamp, userName, device_name, detail_result = extract_data_from_kinesis_stream( data_dict) # print(shard_it) write_to_db(timestamp, userName, device_name, detail_result) time.sleep(0.1)
def main(): from boto import kinesis kinesis = kinesis.connect_to_region("us-east-2") shard_id = 'shardId-000000000000' #we only have one shard! shard_it = kinesis.get_shard_iterator("end-stream", shard_id, "LATEST")["ShardIterator"] timestamps = [i for i in range(20)] bodyorientationpitches = [i for i in range(20)] steps = [i for i in range(20)] while 1 == 1: out = kinesis.get_records(shard_it, limit=1) shard_it = out["NextShardIterator"] if len(out["Records"]) > 0: print(out["Records"][0]["Data"]) data_dict = json.loads(out["Records"][0]["Data"]) timestamp, userName, device_name, detail_result = extract_data_from_kinesis_stream( data_dict) timestamps.insert(0, timestamp) timestamps = timestamps[:-1] bodyorientationpitches.insert( 0, detail_result["IMU"]["BodyOrientationPitch"]) bodyorientationpitches = bodyorientationpitches[:-1] steps.insert(0, detail_result["IMU"]["StepCount"]) steps = steps[:-1] x1 = np.array(timestamps) y1 = np.array(bodyorientationpitches) x2 = np.array(timestamps) y2 = np.array(steps) plot_vals(x1, y1, x2, y2)
mydb = mysql.connector.connect( host=HOST, user=USER, passwd=PASSWORD, db=DB ) cursor = mydb.cursor() kinesis = kinesis.connect_to_region('us-east-1') shard_id = 'shardId-0000000000000' shard_it = kinesis.get_shard_iterator(MYSTREAM, shard_id, 'LATEST')['ShardIterator'] while 1==1: print("getting record") out = kinesis.get_records(shard_it,limit=1) for i in out['Records']: recordId = json.loads(i['Data'])['SaleId']['N'] productId = json.loads(i['Data'])['ProductId']['N'] quantity = json.loads(i['Data'])['Quantity']['N'] saleDate = json.loads(i['Data'])['SaleDate']['S'] inv_quantity = 0 - int(quantity) add_record = "INSERT INTO FactInventory (idFactInventory, ProductID, QuantityChange, DateTime) VALUES (%s, %s, %s, %s)" data_record = {
from boto import kinesis import time # Using boto connect to the region in which your kinesis stream is created kinesis = kinesis.connect_to_region("eu-west-1") shard_id = 'shardId-000000000000' # Iterator to go throough the latest stream values shard_it = kinesis.get_shard_iterator("BotoDemo", shard_id, "LATEST")["ShardIterator"] # Get thed ata while True: out = kinesis.get_records(shard_it, limit=2) shard_it = out["NextShardIterator"] print out time.sleep(0.3)
from boto import kinesis import time if __name__ == '__main__': kinesis = kinesis.connect_to_region("ap-southeast-1") print kinesis.describe_stream("payments") print kinesis.list_streams() shard_id = 'shardId-000000000000' shard_it = kinesis.get_shard_iterator("payments", shard_id, "LATEST")["ShardIterator"] while 1==1: out = kinesis.get_records(shard_it, limit=2) shard_it = out["NextShardIterator"] print out time.sleep(0.2)
from boto import kinesis import time # boto kinesis API kinesis = kinesis.connect_to_region("eu-west-1") # Stream to read from stream = 'logbuffer-dev' # Lookup shards of stream shards = kinesis.describe_stream(stream)["StreamDescription"]["Shards"] n_shards = len(shards) iterators = [] # create iterators for each shard for shard in shards: shard_id = shard["ShardId"] # iterators.append(kinesis.get_shard_iterator(stream, shard_id, "AT_SEQUENCE_NUMBER", shard["SequenceNumberRange"]["StartingSequenceNumber"])["ShardIterator"]) iterators.append(kinesis.get_shard_iterator(stream, shard_id, "LATEST")["ShardIterator"]) # circular query of iterators while 1==1: iterator = iterators.pop(0) out = kinesis.get_records(iterator, limit=500) iterators.append(out["NextShardIterator"]) for record in out["Records"]: print(record["Data"]) # Kinesis Limit: 5 transactions per second per shard for reads time.sleep(0.2/n_shards)
response = kinesis.describe_stream(stream_name) # Is the stream active? if response['StreamDescription']['StreamStatus'] == 'ACTIVE': # Yes, the stream is active and we are ready to start reading from it. Get the shard ID. shard_id = response['StreamDescription']['Shards'][0]['ShardId'] break # The stream is not active, wait for 15 seconds and try again time.sleep(15) else: # The stream took too long to become active raise TimeoutError('Stream is still not active, aborting...') # Get the shard iterator and get only new data (TRIM_HORIZON) response = kinesis.get_shard_iterator(stream_name, shard_id, 'TRIM_HORIZON') shard_iterator = response['ShardIterator'] # Loop forever while True: # Get the next set of records response = kinesis.get_records(shard_iterator) # Get the next shard iterator shard_iterator = response['NextShardIterator'] # Loop through all of the records and print them for line in response['Records']: print line # Sleep for a second so we don't hit the Kinesis API too fast
from boto import kinesis import time kinesis = kinesis.connect_to_region("eu-west-1") shard_id = 'shardId-000000000000' #we only have one shard! shard_it = kinesis.get_shard_iterator("BotoDemo", shard_id, "LATEST")["ShardIterator"] while 1 == 1: out = kinesis.get_records(shard_it, limit=2) shard_it = out["NextShardIterator"] print out time.sleep(0.2)
from boto import kinesis import time kinesis = kinesis.connect_to_region("us-east-2") shard_id = 'shardId-000000000000' #we only have one shard! shard_it = kinesis.get_shard_iterator("end-stream", shard_id, "LATEST")["ShardIterator"] while 1 == 1: out = kinesis.get_records(shard_it, limit=1) shard_it = out["NextShardIterator"] if len(out["Records"]) > 0: print(out["Records"][0]["Data"]) # print(shard_it) time.sleep(0.3) # {'MillisBehindLatest': 0, # 'NextShardIterator': 'AAAAAAAAAAFF9/mBhBuL7fpnx+/w6YomDqex3tSNgI9ZI57g7i92nH2A/yTng4OQSTOtYbSHzwy+KA32ezEV/32oAYrMfkRuDQnnIvJvbaTBCDZdQSJLL+UTZthKkGUNoSwh8iGEohXHZjnra1R/Ky2JQwD/RU/WyWKVxhnsGiyHfltq72rgQ5y5raMhI4i0XGrY6Cc3kK4Ieu8V0rikCfNVl7Pjz26n', # 'Records': # [ # { # 'ApproximateArrivalTimestamp': 1597406867.724, # 'Data': '{"deviceID": 59, "userName": "******", "timeStamp": 1597406867, "param1": 9, "param2": 7, "param3": 2, "param4": 9}', 'PartitionKey': 'partitionkey', 'SequenceNumber': '49609830172400645887225606482176990396391791989677359106'}]}
partitionKey="IoTExample" shardCount=1 iterator_type='LATEST' stream=kinesis.describe_stream(streamName) print(json.dumps(stream,sort_keys=True,indent=2,separators=(',',':'))) shards=stream['StreamDescription']['Shards'] print('# Shard Count:', len(shards)) def processRecords(records): for record in records: text=record['Data'].lower() print 'Processing record with data:' + text i=0 response=kinesis.get_shard_iterator(streamName,shards[0]['ShardId'], 'TRIM_HORIZON',starting_sequence_number=None) next_iterator=response['ShardIterator'] print('Gettinng next records using iterator:', next_iterator) while i<4000: try: response=kinesis.get_records(next_iterator,limit=1) #print response if len(response['Records'])>0: #print 'Number of records fetched:' + str(len(response['Records'])) processRecords(response['Records']) next_iterator=response['NextShardIterator'] time.sleep(1) i=i+1 except ProvisionedThroughputExceededException as ptee:
ws.write(i, 3, "clicou") ''' informando region que vamos conectar ''' kinesis = kinesis.connect() .connect_to_region("us-east-1") ''' iniciando shard_id ''' shard_id = 'shardId-000000000000' ''' Informando de onde vamos pegar os dados e como vamos iterar ''' shard_it = kinesis.get_shard_iterator("big-data-analytics-desafio", shard_id, "LATEST")["ShardIterator"] result = {} t = 0 ''' iniciando while que vai parar de rodar apenas quando pegarmos a quantidade de registros validos informado no inicio ''' while i == 0: ''' atribuindo uma variavel com valores coletados e ja buscando o proximo registro ''' out = kinesis.get_records(shard_it) shard_it = out["NextShardIterator"] '''
from boto import kinesis import time kinesis = kinesis.connect_to_region("us-west-2") stream_name = "TestStream" tries = 0 while tries < 100: tries += 1 try: response = kinesis.describe_stream(stream_name) #print(response) if response['StreamDescription']['StreamStatus'] == 'ACTIVE': print("stream is active") shards = response['StreamDescription']['Shards'] for shard in shards: shard_id = shard["ShardId"] print(repr(shard)) shard_it = kinesis.get_shard_iterator( stream_name, shard_id, "LATEST")["ShardIterator"] while True: out = kinesis.get_records(shard_it, limit=2) for o in out["Records"]: print(o["Data"]) # You specific data processing goes here shard_it = out["NextShardIterator"] except: print('error while trying to describe kinesis stream : %s')
bruce = "awesome" while bruce == "awesome": with open("Kinesis_Test_Data.csv", 'rb') as source_file: contents = csv.reader(source_file, delimiter=',', quotechar='|') for event in contents: data = dict() initial_uid = initial_uid + 1 data['uid'] = initial_uid data['event'] = event[0] data['timestamp'] = str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) data['unit'] = event[1] data['package'] = event[2] data['price'] = event[3] data['platform'] = event[4] json_data = json.dumps(data, ensure_ascii=False) print json_data kinesis.put_record("rawdata", json_data, "partitionkey") shard_id = 'shardId-000000000000' shard_it = kinesis.get_shard_iterator("rawdata", shard_id, "LATEST")["ShardIterator"] print shard_it
from boto import kinesis from settings import KINESIS_REGION, KINESIS_STREAM_NAME FLUSH_INTERVAL = 5 BATCH_SIZE = 20 running = True def process_messages(batch_msgs): print('messages processed: {}'.format(len(batch_msgs['Records']))) for msg in batch_msgs['Records']: print('message: "{}" offset: {}'.format(msg['Data'], msg['SequenceNumber'])) print('Connect to Kinesis Streams') kinesis = kinesis.connect_to_region(region_name=KINESIS_REGION) stream = kinesis.describe_stream(KINESIS_STREAM_NAME) shardId = stream['StreamDescription']['Shards'][0]['ShardId'] shardIterator = kinesis.get_shard_iterator(KINESIS_STREAM_NAME, shardId, 'TRIM_HORIZON') print('Kinesis consumer started!') while running: msgs = kinesis.get_records(shardIterator['ShardIterator'], limit=BATCH_SIZE) process_messages(msgs) shardIterator['ShardIterator'] = msgs['NextShardIterator'] print('\nnext batch in {} seconds...'.format(FLUSH_INTERVAL)) time.sleep(FLUSH_INTERVAL)