コード例 #1
0
ファイル: wikipedia_load.py プロジェクト: DiUS/lambdastorm
def process_article(article_path):
    global processed_count
    global start_time
    global kinesis_stream
    global doc_buffer

    with open(article_path, "r") as article_file:
        text = article_file.read()

    lines = text.split("\n")
    header = lines[0]
    m = re.match(r"<doc id=\"([0-9]+)\" url=\"(.+)\" title=\"(.+)\"", header)
    if m:
        article_id = int(m.group(1))
        article_text = "\n".join(lines[1:-1])
        doc = {"article_id": str(uuid.uuid4()), "url": m.group(2), "title": m.group(3), "body": article_text}

        if INSERT_IN_BATCHES:
            doc_buffer.append(json.dumps(doc))
            if len(doc_buffer) == BATCH_SIZE:
                kinesis.put_records(kinesis_stream, doc_buffer, str(uuid.uuid4()))
                doc_buffer = []
        else:
            kinesis.put_record(kinesis_stream, json.dumps(doc), str(uuid.uuid4()))
            print "Sent article '{}' ({})".format(doc["title"], doc["article_id"])

    else:
        print "Header not found in {0}".format(article_file)
コード例 #2
0
    def put_record(self, record):
        """
        Put a record (<=1MB) on to a Kinesis stream

        """
        kinesis.put_record(self.stream_name, json.dumps(record),
                           self.get_partition_key())
コード例 #3
0
ファイル: RedditTester.py プロジェクト: esaaren/ErikPython
def run():
    from boto import kinesis
    kinesis = kinesis.connect_to_region("ca-central-1")
    #stream = kinesis.create_stream("ErikSparkPOC", 1)
    start = time.time()
    reddit = praw.Reddit(client_id='gluFwvMrQLqLuA',
                         client_secret='nowLOmNuC8tS76mrc-LQUlarngw',
                         user_agent='testscript by /u/plasmatrendybot',
                         password='******',
                         username='******')
    # Subreddit name
    subreddit = reddit.subreddit('askreddit')
    comments = subreddit.stream.comments()
    escape_limit = 100000
    x = 0
    for comment in comments:
        if comment.ups > 0:
            print(comment.body, comment.ups)
            reddit_comment = RedditComment(
                comment.body.encode('ascii', 'ignore'))
            try:
                kinesis.put_record("ErikSparkPOC", json.dumps(reddit_comment),
                                   "partitionkey")
            except:
                print("Failed to insert to Kinesis")
        x = x + 1
        if x > escape_limit:
            break
コード例 #4
0
def extractCSV(csvfile):
    with open(csvfile) as sec_file:
        reader = csv.DictReader(sec_file, delimiter=',')
        for row in reader:
            data = json.dumps(
                getPricesData(row['Securityid'], row['Ticker'], row['Price'],
                              row['Date']))
            kinesis.put_record("SecurityStream", data, "partitionkey")
            print(data)
コード例 #5
0
def run():
    from boto import kinesis
    kinesis = kinesis.connect_to_region("ca-central-1")
    #stream = kinesis.create_stream("ErikDemo", 1)


    i = 0

    for i in xrange(10):
        user = User("Demo")
        #print(json.dumps(user))
        print("Did I make it here?")
        kinesis.put_record("ErikDemo", json.dumps(user), "partitionkey")
コード例 #6
0
def runController():
    sound=readSoundSensor()
    timestamp=datetime.datetime.utcnow()
    record=str(timestamp)+":"+str(sound)
    print "Putting record in stream:"+record
    response=kinesis.put_record(stream_name=streamName,data=record,partition_key=partitionKey)
    print ("-=put seqNum:",response['SequenceNumber'])
コード例 #7
0
def encode_and_send_frame(frame,
                          frame_count,
                          enable_kinesis=True,
                          enable_rekog=False,
                          write_file=False):
    try:
        #convert opencv Mat to jpg image
        #print "----FRAME---"
        retval, buff = cv2.imencode(".jpg", frame)

        img_bytes = bytearray(buff)

        utc_dt = pytz.utc.localize(datetime.datetime.now())
        now_ts_utc = (
            utc_dt -
            datetime.datetime(1970, 1, 1, tzinfo=pytz.utc)).total_seconds()

        frame_package = {
            'ApproximateCaptureTime': now_ts_utc,
            'FrameCount': frame_count,
            'ImageBytes': img_bytes
        }

        if write_file:
            print("Writing file img_{}.jpg".format(frame_count))
            target = open("img_{}.jpg".format(frame_count), 'w')
            target.write(img_bytes)
            target.close()

        # Put encoded image in kinesis stream
        if enable_kinesis:
            print("....Sending image to Kinesis")
            response = kinesis.put_record(
                stream_name=stream_name,  # StreamName in boto3
                data=cPickle.dumps(frame_package, 0),  # Data in boto3
                partition_key="partitionkey"  # PartitionKey in boto3
            )
            print('Response: \n', response)

        if enable_rekog:
            response = rekog_client.detect_labels(Image={'Bytes': img_bytes},
                                                  MaxLabels=rekog_max_labels,
                                                  MinConfidence=rekog_min_conf)
            print(response)

    except Exception as e:
        print(e)
コード例 #8
0
from boto import kinesis

from settings import KINESIS_PARTITION_KEY, KINESIS_STREAM_NAME, KINESIS_REGION

MESSAGE_COUNT = 1000

print('Kinesis stream producer started!')
kinesis = kinesis.connect_to_region(KINESIS_REGION)

for i in range(MESSAGE_COUNT):
    message = 'hello world - {}'.format(i + 1)
    kinesis.put_record(KINESIS_STREAM_NAME, message, KINESIS_PARTITION_KEY)
    print('{}/{} - {}'.format(i + 1, MESSAGE_COUNT, message))
コード例 #9
0
from lib.users import Users
import json
from boto import kinesis

aws_region = "us-west-2"
user = "******"
password = "******"
interval = 1500
count = 100
stream_name = "TestStream"

u = Users(user, password, interval, count)
x = u.list()
for line in x.iter_lines():
    kinesis = kinesis.connect_to_region(aws_region)
    kinesis.put_record(stream_name, line, "partitionkey")
    if line:
        print(line)
コード例 #10
0
from boto import kinesis
import testdata,json
# Creating fake data
class Users(testdata.DictFactory):
    firstname = testdata.FakeDataFactory("firstName")
    lastname = testdata.FakeDataFactory("lastName")
    age = testdata.RandomInteger(10,30)
    gender = testdata.RandomSelection(['female','male'])

# Using boto connect to the region in which your kinesis stream is created
kinesis = kinesis.connect_to_region("eu-west-1")

for user in Users().generate(50):
    print user
    kinesis.put_record("EdisonDemo", json.dumps(user), "partitionkey")

コード例 #11
0
from boto import kinesis
import testdata
import datetime
import json
import time


class Users(testdata.DictFactory):
    custid = testdata.RandomInteger(1, 10)
    amount = testdata.RandomInteger(1, 100)
    gateway = testdata.RandomSelection(
        ['visa', 'paypal', 'master', 'stripe', 'wallet'])


if __name__ == '__main__':
    kinesis = kinesis.connect_to_region("ap-southeast-1")
    print kinesis.describe_stream("payments")
    print kinesis.list_streams()

    for user in Users().generate(10):
        print(user)
        print kinesis.put_record("payments", json.dumps(user), "partitionkey")
        time.sleep(1)
コード例 #12
0
import json
import requests
#import testdata
from boto import kinesis

#connecting to Kinesis stream
region = 'us-east-1'
kinesisStreamName = 'kinesis-demo'
kinesis = kinesis.connect_to_region(region)
partitionKey = 'shardId-000000000000'

# generating data and feeding kinesis.
while True:
    response = requests.get(
        'https://chasing-coins.com/api/v1/top-coins/20').json()
    for coin in response:
        data = json.dumps(response[coin])
        print data
        result = kinesis.put_record(kinesisStreamName, data, partitionKey)

    time.sleep(0.2)

# class Users(testdata.DictFactory):
#     firstname = testdata.FakeDataFactory('firstName')
#     lastname = testdata.FakeDataFactory('lastName')
#     age = testdata.RandomInteger(10, 30)
#     gender = testdata.RandomSelection(['female', 'male'])

# for user in Users().generate(50):
#     print(user)
#     kinesis.put_record(kinesisStreamName, json.dumps(user), partitionKey)
コード例 #13
0
kinesis = kinesis.connect_to_region(region)

# generating data and feeding kinesis.

while True:

    y = random_generator(10, "techsummit2015")

    urls = ['foo.com', 'amazon.com', 'testing.com', 'google.com', 'sydney.com']
    x = random.randint(0, 4)
    userid = random.randint(25, 35) + 1200

    now = datetime.now()
    timeformatted = str(now.month) + "/" + str(now.day) + "/" + str(
        now.year) + " " + str(now.hour) + ":" + str(now.minute) + ":" + str(
            now.second)

    #building the pay load for kinesis puts.

    putString = str(
        userid) + ',' + 'www.' + urls[x] + '/' + y + ',' + timeformatted
    patitionKey = random.choice('abcdefghij')

    # schema of the imput string now userid,url,timestamp

    print putString

    result = kinesis.put_record(kinesisStreamName, putString, patitionKey)

    print result
コード例 #14
0
# Get our instance ID out of the metadata
instance_id = instance_metadata['instance-id']

# Use the instance ID as our stream name
stream_name = instance_id

# Use only one shard
shard_count = 1

try:
    # Connect to Kinesis
    kinesis = boto.connect_kinesis()

    # Create the stream for this instance ID
    kinesis.create_stream(stream_name, shard_count)
except boto.kinesis.exceptions.ResourceInUseException:
    # Stream has already been created, this can be safely ignored
    pass

# Read data from stdin
line = sys.stdin.readline()

# Loop until there is no data left
while line:
    # Put the data into Kinesis
    kinesis.put_record(stream_name, line, stream_name)

    # Read the next line
    line = sys.stdin.readline()
コード例 #15
0
from boto import kinesis
import testdata
import json

kinesis = kinesis.connect_to_region("us-east-1")


class Users(testdata.DictFactory):
    firstname = testdata.FakeDataFactory('firstName')
    lastname = testdata.FakeDataFactory('lastName')
    age = testdata.RandomInteger(10, 30)
    gender = testdata.RandomSelection(['female', 'male'])


for user in Users().generate(10):
    print(user)
    kinesis.put_record("push-notifications", json.dumps(user), "123")
コード例 #16
0
# Get our instance ID out of the metadata
instance_id = instance_metadata['instance-id']

# Use the instance ID as our stream name
stream_name = instance_id

# Use only one shard
shard_count = 1

try:
  # Connect to Kinesis
  kinesis = boto.connect_kinesis()

  # Create the stream for this instance ID
  kinesis.create_stream(stream_name, shard_count)
except boto.kinesis.exceptions.ResourceInUseException:
  # Stream has already been created, this can be safely ignored
  pass

# Read data from stdin
line = sys.stdin.readline()

# Loop until there is no data left
while line:
  # Put the data into Kinesis
  kinesis.put_record(stream_name, line, stream_name)

  # Read the next line
  line = sys.stdin.readline()
コード例 #17
0
# import testdata
import json
from boto import kinesis
import sys

# seed the pseudorandom number generator
from random import seed
from random import randint
import time
import random

kinesis = kinesis.connect_to_region("us-east-2")
print(kinesis.list_streams())

seed(1)

i = 0
while 1 == 1:

    new_dict = {}
    new_dict["timestamp"] = int(time.time())
    new_dict["dataNum"] = "data" + str(i)
    new_dict["device_name"] = "dev"
    new_dict["HeartRate"] = random.randint(60, 120)

    print("loading ", json.dumps(new_dict))
    kinesis.put_record("end-stream", json.dumps(new_dict), "partitionkey")
    time.sleep(0.2)
    i += 1
コード例 #18
0
kinesis = kinesis.connect_to_region(region)

# generating data and feeding kinesis.

while True:


    y = random_generator(10,"techsummit2015")

    urls = ['foo.com','amazon.com','testing.com','google.com','sydney.com']
    x = random.randint(0,4)
    userid = random.randint(25,35)+1200

    now = datetime.now()
    timeformatted = str(now.month) + "/" + str(now.day) + "/" + str(now.year) + " " + str(now.hour) + ":" +str(now.minute) + ":" + str(now.second)


    #building the pay load for kinesis puts.

    putString = str(userid)+','+'www.'+urls[x]+'/'+y+','+timeformatted
    patitionKey = random.choice('abcdefghij')

    # schema of the imput string now userid,url,timestamp

    print putString

    result = kinesis.put_record(kinesisStreamName,putString,patitionKey)

    print result
コード例 #19
0
from lib.users import Users
import json
from boto import kinesis
user = "******"
password = "******"
interval = 1500
count = 100
stream_name = "TestStream"
u = Users(user, password, interval, count)
x = u.list()
for line in x.iter_lines():
        kinesis = kinesis.connect_to_region("eu-west-1")
        kinesis.put_record(stream_name, line, "partitionkey")
        if line:
            print (line)
コード例 #20
0
import testdata
import json
from boto import kinesis

kinesis = kinesis.connect_to_region("eu-west-1")

class Users(testdata.DictFactory):
     firstname = testdata.FakeDataFactory('firstName')
     lastname = testdata.FakeDataFactory('lastName')
     age = testdata.RandomInteger(10, 30)
     gender = testdata.RandomSelection(['female', 'male'])

for user in Users().generate(5000):
     print(user)
     kinesis.put_record("BotoDemo", json.dumps(user), "partitionkey")
コード例 #21
0
import json
import datetime
import random
import testdata
from boto import kinesis

kinesis = kinesis.connect_to_region("us-west-2")


def getData(iotName, lowVal, highVal):
    data = {}
    data["iotName"] = iotName
    data["iotValue"] = random.randint(lowVal, highVal)
    return data


while 1:
    rnd = random.random()
    if (rnd < 0.01):
        data = json.dumps(getData("DemoSensor", 100, 120))
        kinesis.put_record("RawStreamData", data, "DemoSensor")
        print '***************************** anomaly ************************* ' + data
    else:
        data = json.dumps(getData("DemoSensor", 10, 20))
        kinesis.put_record("RawStreamData", data, "DemoSensor")
        print data
コード例 #22
0
bruce = "awesome"

while bruce == "awesome":
    with open("Kinesis_Test_Data.csv", 'rb') as source_file:
        contents = csv.reader(source_file, delimiter=',', quotechar='|')

        for event in contents:
            data = dict()

            initial_uid = initial_uid + 1
            data['uid'] = initial_uid
            data['event'] = event[0]
            data['timestamp'] = str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
            data['unit'] = event[1]
            data['package'] = event[2]
            data['price'] = event[3]
            data['platform'] = event[4]

            json_data = json.dumps(data, ensure_ascii=False)

            print json_data

            kinesis.put_record("rawdata", json_data, "partitionkey")


    shard_id = 'shardId-000000000000'
    shard_it = kinesis.get_shard_iterator("rawdata", shard_id, "LATEST")["ShardIterator"]
    print shard_it