def lambda_handler(event, context):
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']

    try:
        response = s3.get_object(Bucket=bucket, Key=key)
              
    except Exception as e:
        print(e)
        raise e
    
    try:
        s3_file_content = response['Body'].read()
        if s3_file_content.endswith(',\n'):
            s3_file_content = s3_file_content[:-2]
        tweets_str = '['+s3_file_content+']'
        tweets = json.loads(tweets_str)
   
    except Exception as e:
        print(e)
        raise e
    
    try:
        twitter_to_es.load(tweets)

    except Exception as e:
        print(e)
        raise e    
Beispiel #2
0
def manual_function(event, context):
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']

    try:
        resp = s3.get_object(Bucket=bucket, Key=key)

    except Exception as e:
        print(e)
        print('Error getting object')
        raise e

    try:
        s3_content = resp['Body'].read()
        if s3_content.endswith(',\n'):
            s3_content = s3_content[:-2]
        tweets_str = '[' + s3_content + ']'
        tweets = json.loads(tweets_str)

    except Exception as e:
        print(e)
        print('Error loading json from object')
        raise e

    try:
        twitter_to_es.load(tweets)

    except Exception as e:
        print(e)
        print('Error loading data into ElasticSearch')
        raise e
def lambda_handler(event, context):
    for record in event['Records']:

        # Get the bucket name and key for the new file
        bucket = record['s3']['bucket']['name']
        key = record['s3']['object']['key']

        # Get s3 object, read, and split the file into lines
        try:
            obj = s3.get_object(Bucket=bucket, Key=key)

        except Exception as e:
            print(e)
            print(
                'Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'
                .format(key, bucket))
            raise e

            # Parse s3 object content (JSON)
        try:
            # https://stackoverflow.com/questions/31976273/open-s3-object-as-a-string-with-boto3
            print(obj)
            s3_file_content = obj['Body'].read().decode('utf-8')

            # clean trailing comma
            #if s3_file_content.endswith(',\n'):
            #    s3_file_content = s3_file_content[:-2]

            #commenting out below line to try to troubleshoot
            #tweets_str = '['+s3_file_content+']'

            # print(tweets_str)
            #tweets = json.loads(tweets_str)
            tweets = [
                json.loads(jline) for jline in s3_file_content.splitlines()
            ]

        except Exception as e:
            print(e)
            print('Error loading json from object {} in bucket {}'.format(
                key, bucket))
            raise e

        # Load data into ES
        try:
            twitter_to_es.load(tweets)

        except Exception as e:
            print(e)
            print('Error loading data into ElasticSearch')
            raise e
Beispiel #4
0
def lambda_handler(event, context):
    # print("Received event: " + json.dumps(event, indent=2))

    # Get the object from the event and show its content type
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']

    # Getting s3 object
    try:
        response = s3.get_object(Bucket=bucket, Key=key)

    except Exception as e:
        print(e)
        print(
            'Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'
            .format(key, bucket))
        raise e

    # Parse s3 object content (JSON)
    try:
        s3_file_content = response['Body'].read()
        #clean trailing comma
        tweet_array = ','.join(s3_file_content.decode().split('\n'))

        if tweet_array.endswith(','):
            tweet_array = tweet_array[:-1]

        tweets_str = '[' + tweet_array + ']'
        tweets = json.loads(tweets_str)

    except Exception as e:
        print(e)
        print('Error loading json from object {} in bucket {}'.format(
            key, bucket))
        raise e

    # Load data into ES
    try:
        twitter_to_es.load(tweets)

    except Exception as e:
        print(e)
        print('Error loading data into ElasticSearch')
        raise e
def lambda_handler(event, context):
    print("Received event: " + json.dumps(event, indent=2))

    # Get the object from the event and show its content type
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']

    # Getting s3 object
    try:
        response = s3.get_object(Bucket=bucket, Key=key)
              
    except Exception as e:
        print(e)
        print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket))
        raise e
    
    # Parse s3 object content (JSON)
    try:
        s3_file_content = response['Body'].read()
        #clean trailing comma
        if s3_file_content.endswith(',\n'):
            s3_file_content = s3_file_content[:-2]
        tweets_str = '['+s3_file_content+']'
        tweets = json.loads(tweets_str)
   
    except Exception as e:
        print(e)
        print('Error loading json from object {} in bucket {}'.format(key, bucket))
        raise e
    
    # Load data into ES
    try:
        twitter_to_es.load(tweets)

    except Exception as e:
        print(e)
        print('Error loading data into ElasticSearch')
        raise e    
Beispiel #6
0
import json
import boto3
import config
import twitter_to_es
from elasticsearch import Elasticsearch

def create_index(es,index_name,mapping):
    print('creating index {}...'.format(index_name))
    mapping_dict = {'mapping': mapping}
    mapping_str = str(mapping_dict)
    print json.dumps(mapping_dict)
    mapping_dict = "{\"mappings\":{\"logs_june\":{\"_timestamp\": {\"enabled\": \"true\"},\"properties\":{\"logdate\":{\"type\":\"date\",\"format\":\"dd/MM/yyy HH:mm:ss\"}}}}}"
    es.indices.create(index_name, body = {'mappings': mapping})
    print {'mapping': mapping}
    
bucket = 'mentzera'
key = 'twitter/2015/10/16/14/twitter-stream-1-2015-10-16-14-21-36-7e019a27-7b3d-47d5-8805-344832c67be4'
key = 'twitter/2015/10/20/20/twitter-stream-1-2015-10-20-20-23-33-8f39af04-ee9f-45d6-a2da-06dc068f0c15'
s3 = boto3.client('s3')
response = s3.get_object(Bucket=bucket, Key=key)
s3_file_content = response['Body'].read()
if s3_file_content.endswith(',\n'):
    s3_file_content = s3_file_content[:-2]
tweets_str = '['+s3_file_content+']'

tweets = json.loads(tweets_str)
print len(tweets)

twitter_to_es.load(tweets)
    #     es.indices.create(index_name, body = json.dumps(mapping_dict))
    es.indices.create(index_name, body={'mappings': mapping})
    print {'mapping': mapping}


bucket = 'store-twitter-stream'
# key = 'twitter/2015/10/16/14/twitter-stream-1-2015-10-16-14-21-36-7e019a27-7b3d-47d5-8805-344832c67be4'
# key = 'twitter/2015/10/20/20/twitter-stream-1-2015-10-20-20-23-33-8f39af04-ee9f-45d6-a2da-06dc068f0c15'
# key = 'twitter/raw-data/2017/07/28/15/twitter-delivery-stream-1-2017-07-28-15-46-45-c7deaace-4db4-4711-b07c-08bbbf3fe451'
# key = 'twitter/raw-data/2017/08/01/15/twitter-delivery-stream-1-2017-08-01-15-00-56-f458fe4e-c192-458b-a42f-1512f8a18b95'
# key = 'twitter/raw-data/2017/08/01/15/twitter-delivery-stream-1-2017-08-01-15-05-56-89a3c060-7119-4332-a034-5f1089a86335'
# key = 'twitter/raw-data/2017/07/20/00/twitter-delivery-stream-1-2017-07-20-00-01-21-30463ecb-fdf1-4ac8-810c-2e8f85123cb0'
# key = 'twitter/raw-data/2017/07/20/21/twitter-delivery-stream-1-2017-07-20-21-04-20-8bba6196-cdb2-4daa-a64a-10c8b06314ff'
# key = 'twitter/raw-data/2017/07/21/00/twitter-delivery-stream-1-2017-07-21-00-09-56-0d959232-f4cd-440e-b2b3-ac9b95aa2a04'
key = 'twitter/raw-data/2017/07/22/18/twitter-delivery-stream-1-2017-07-22-18-03-16-8baafc4a-f1ee-46e2-b2f7-3ebc45eaabcb'
s3 = boto3.client('s3')
response = s3.get_object(Bucket=bucket, Key=key)
s3_file_content = response['Body'].read()
#clean trailing comma
if s3_file_content.endswith(',\n'):
    s3_file_content = s3_file_content[:-2]
tweets_str = '[' + s3_file_content + ']'
# tweets_str = '['+response['Body'].read().replace('}{','},\n{')+']'
# with open("/tmp/1.txt", "w") as text_file:
#     text_file.write(tweets_str)

tweets = json.loads(tweets_str)
print len(tweets)

twitter_to_es.load(tweets)
def send_to_es(line):
    doc = json.loads(line)
    if "limit" not in doc:
        twitter_to_es.load(doc)