예제 #1
0
    def records_iterator(self, limit=100):
        """
        Read the latest batch of records from a Kinesis stream

        """
        shard_it = kinesis.get_shard_iterator(self.stream_name,
                                              self.first_shard_id,
                                              "LATEST")["ShardIterator"]
        while True:
            t0 = time.time()
            out = kinesis.get_records(shard_it, limit=limit)
            shard_it = out["NextShardIterator"]
            yield out

            t1 = time.time()
            if t1 - t0 < 0.2:
                time.sleep(0.2 - (t1 - t0))
예제 #2
0
def main():
    from boto import kinesis
    kinesis = kinesis.connect_to_region("us-east-2")
    shard_id = 'shardId-000000000000'  #we only have one shard!
    shard_it = kinesis.get_shard_iterator("end-stream", shard_id,
                                          "LATEST")["ShardIterator"]

    while 1 == 1:
        out = kinesis.get_records(shard_it, limit=1)
        shard_it = out["NextShardIterator"]
        if len(out["Records"]) > 0:
            print(out["Records"][0]["Data"])
            data_dict = json.loads(out["Records"][0]["Data"])
            timestamp, userName, device_name, detail_result = extract_data_from_kinesis_stream(
                data_dict)
            # print(shard_it)
            write_to_db(timestamp, userName, device_name, detail_result)
        time.sleep(0.1)
예제 #3
0
def main():
    from boto import kinesis
    kinesis = kinesis.connect_to_region("us-east-2")
    shard_id = 'shardId-000000000000'  #we only have one shard!
    shard_it = kinesis.get_shard_iterator("end-stream", shard_id,
                                          "LATEST")["ShardIterator"]
    timestamps = [i for i in range(20)]
    bodyorientationpitches = [i for i in range(20)]
    steps = [i for i in range(20)]

    while 1 == 1:
        out = kinesis.get_records(shard_it, limit=1)
        shard_it = out["NextShardIterator"]
        if len(out["Records"]) > 0:
            print(out["Records"][0]["Data"])
            data_dict = json.loads(out["Records"][0]["Data"])
            timestamp, userName, device_name, detail_result = extract_data_from_kinesis_stream(
                data_dict)

            timestamps.insert(0, timestamp)
            timestamps = timestamps[:-1]

            bodyorientationpitches.insert(
                0, detail_result["IMU"]["BodyOrientationPitch"])
            bodyorientationpitches = bodyorientationpitches[:-1]

            steps.insert(0, detail_result["IMU"]["StepCount"])
            steps = steps[:-1]

            x1 = np.array(timestamps)
            y1 = np.array(bodyorientationpitches)

            x2 = np.array(timestamps)
            y2 = np.array(steps)

            plot_vals(x1, y1, x2, y2)
예제 #4
0
  db=DB
)

cursor = mydb.cursor()

kinesis = kinesis.connect_to_region('us-east-1')

shard_id = 'shardId-0000000000000'

shard_it = kinesis.get_shard_iterator(MYSTREAM, shard_id, 'LATEST')['ShardIterator']



while 1==1:
  print("getting record")
  out = kinesis.get_records(shard_it,limit=1)
  for i in out['Records']:
    recordId = json.loads(i['Data'])['SaleId']['N']
    productId = json.loads(i['Data'])['ProductId']['N']
    quantity = json.loads(i['Data'])['Quantity']['N']
    saleDate = json.loads(i['Data'])['SaleDate']['S']
    inv_quantity = 0 - int(quantity)

    add_record = "INSERT INTO FactInventory (idFactInventory, ProductID, QuantityChange, DateTime) VALUES (%s, %s, %s, %s)"
data_record = {
      'record_no': recordId,
      'productID': productId,
      'quantity': quantity,
      'dateTime': saleDate,
    }
    cursor.execute(add_record, (str(recordId), str(productId), str(inv_quantity), str(saleDate)))
from boto import kinesis
import time

# Using boto connect to the region in which your kinesis stream is created
kinesis = kinesis.connect_to_region("eu-west-1")


shard_id = 'shardId-000000000000' 
# Iterator to go throough the latest stream values
shard_it = kinesis.get_shard_iterator("BotoDemo", shard_id, "LATEST")["ShardIterator"]

# Get thed ata 
while True:
    out = kinesis.get_records(shard_it, limit=2)
    shard_it = out["NextShardIterator"]
    print out
    time.sleep(0.3)
from boto import kinesis
import time

# boto kinesis API
kinesis = kinesis.connect_to_region("eu-west-1")

# Stream to read from
stream = 'logbuffer-dev'

# Lookup shards of stream
shards = kinesis.describe_stream(stream)["StreamDescription"]["Shards"]
n_shards = len(shards)
iterators = []

# create iterators for each shard
for shard in shards:
	shard_id = shard["ShardId"]
#	iterators.append(kinesis.get_shard_iterator(stream, shard_id, "AT_SEQUENCE_NUMBER", shard["SequenceNumberRange"]["StartingSequenceNumber"])["ShardIterator"])
	iterators.append(kinesis.get_shard_iterator(stream, shard_id, "LATEST")["ShardIterator"])

# circular query of iterators
while 1==1:
	iterator = iterators.pop(0)
	out = kinesis.get_records(iterator, limit=500)
	iterators.append(out["NextShardIterator"])
	for record in out["Records"]:
		print(record["Data"])
	
	# Kinesis Limit: 5 transactions per second per shard for reads 
	time.sleep(0.2/n_shards)
예제 #7
0
  # Is the stream active?
  if response['StreamDescription']['StreamStatus'] == 'ACTIVE':
    # Yes, the stream is active and we are ready to start reading from it.  Get the shard ID.
    shard_id = response['StreamDescription']['Shards'][0]['ShardId']
    break

  # The stream is not active, wait for 15 seconds and try again
  time.sleep(15)
else:
  # The stream took too long to become active
  raise TimeoutError('Stream is still not active, aborting...')

# Get the shard iterator and get only new data (TRIM_HORIZON)
response = kinesis.get_shard_iterator(stream_name, shard_id, 'TRIM_HORIZON')
shard_iterator = response['ShardIterator']

# Loop forever
while True:
  # Get the next set of records
  response = kinesis.get_records(shard_iterator)

  # Get the next shard iterator
  shard_iterator = response['NextShardIterator']

  # Loop through all of the records and print them
  for line in response['Records']:
    print line

  # Sleep for a second so we don't hit the Kinesis API too fast
  time.sleep(1)
예제 #8
0
iterator_type='LATEST'

stream=kinesis.describe_stream(streamName)
print(json.dumps(stream,sort_keys=True,indent=2,separators=(',',':')))
shards=stream['StreamDescription']['Shards']
print('# Shard Count:', len(shards))

def processRecords(records):
    for record in records:
        text=record['Data'].lower()
        print 'Processing record with data:' + text

i=0
response=kinesis.get_shard_iterator(streamName,shards[0]['ShardId'], 'TRIM_HORIZON',starting_sequence_number=None)
next_iterator=response['ShardIterator']
print('Gettinng next records using iterator:', next_iterator)
while i<4000:
    try:
       response=kinesis.get_records(next_iterator,limit=1)
             #print response
       if len(response['Records'])>0:
             #print 'Number of records fetched:' + str(len(response['Records']))
             processRecords(response['Records'])

       next_iterator=response['NextShardIterator']
       time.sleep(1)
       i=i+1

    except ProvisionedThroughputExceededException as ptee:
       print(ptee.message)
       time.sleep(5)
예제 #9
0
'''
      Informando de onde vamos pegar os dados e como vamos iterar 
'''
shard_it = kinesis.get_shard_iterator("big-data-analytics-desafio", shard_id, "LATEST")["ShardIterator"]

result = {}
t = 0

'''
      iniciando while que vai parar de rodar apenas quando pegarmos a quantidade de registros validos informado no inicio
'''
while i == 0:
    '''
          atribuindo uma variavel com valores coletados e ja buscando o proximo registro
    '''
    out = kinesis.get_records(shard_it)
    shard_it = out["NextShardIterator"]

    '''
          Verifica se existe informações para processar 
    '''
    if len(out["Records"]) != 0:
        '''
              Controle de quantos registros validos pegamos
        '''
        t += 1

        '''
              Iteramos sobre o Record encontrado
        '''
        for resp in out["Records"]:
from boto import kinesis

from settings import KINESIS_REGION, KINESIS_STREAM_NAME

FLUSH_INTERVAL = 5
BATCH_SIZE = 20
running = True


def process_messages(batch_msgs):
    print('messages processed: {}'.format(len(batch_msgs['Records'])))
    for msg in batch_msgs['Records']:
        print('message: "{}" offset: {}'.format(msg['Data'], msg['SequenceNumber']))


print('Connect to Kinesis Streams')
kinesis = kinesis.connect_to_region(region_name=KINESIS_REGION)
stream = kinesis.describe_stream(KINESIS_STREAM_NAME)
shardId = stream['StreamDescription']['Shards'][0]['ShardId']
shardIterator = kinesis.get_shard_iterator(KINESIS_STREAM_NAME, shardId, 'TRIM_HORIZON')

print('Kinesis consumer started!')
while running:
    msgs = kinesis.get_records(shardIterator['ShardIterator'], limit=BATCH_SIZE)

    process_messages(msgs)
    shardIterator['ShardIterator'] = msgs['NextShardIterator']

    print('\nnext batch in {} seconds...'.format(FLUSH_INTERVAL))
    time.sleep(FLUSH_INTERVAL)