def lambda_handler(event, context): bucket = event['Records'][0]['s3']['bucket']['name'] key = event['Records'][0]['s3']['object']['key'] try: response = s3.get_object(Bucket=bucket, Key=key) except Exception as e: print(e) raise e try: s3_file_content = response['Body'].read() if s3_file_content.endswith(',\n'): s3_file_content = s3_file_content[:-2] tweets_str = '['+s3_file_content+']' tweets = json.loads(tweets_str) except Exception as e: print(e) raise e try: twitter_to_es.load(tweets) except Exception as e: print(e) raise e
def manual_function(event, context): bucket = event['Records'][0]['s3']['bucket']['name'] key = event['Records'][0]['s3']['object']['key'] try: resp = s3.get_object(Bucket=bucket, Key=key) except Exception as e: print(e) print('Error getting object') raise e try: s3_content = resp['Body'].read() if s3_content.endswith(',\n'): s3_content = s3_content[:-2] tweets_str = '[' + s3_content + ']' tweets = json.loads(tweets_str) except Exception as e: print(e) print('Error loading json from object') raise e try: twitter_to_es.load(tweets) except Exception as e: print(e) print('Error loading data into ElasticSearch') raise e
def lambda_handler(event, context): for record in event['Records']: # Get the bucket name and key for the new file bucket = record['s3']['bucket']['name'] key = record['s3']['object']['key'] # Get s3 object, read, and split the file into lines try: obj = s3.get_object(Bucket=bucket, Key=key) except Exception as e: print(e) print( 'Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.' .format(key, bucket)) raise e # Parse s3 object content (JSON) try: # https://stackoverflow.com/questions/31976273/open-s3-object-as-a-string-with-boto3 print(obj) s3_file_content = obj['Body'].read().decode('utf-8') # clean trailing comma #if s3_file_content.endswith(',\n'): # s3_file_content = s3_file_content[:-2] #commenting out below line to try to troubleshoot #tweets_str = '['+s3_file_content+']' # print(tweets_str) #tweets = json.loads(tweets_str) tweets = [ json.loads(jline) for jline in s3_file_content.splitlines() ] except Exception as e: print(e) print('Error loading json from object {} in bucket {}'.format( key, bucket)) raise e # Load data into ES try: twitter_to_es.load(tweets) except Exception as e: print(e) print('Error loading data into ElasticSearch') raise e
def lambda_handler(event, context): # print("Received event: " + json.dumps(event, indent=2)) # Get the object from the event and show its content type bucket = event['Records'][0]['s3']['bucket']['name'] key = event['Records'][0]['s3']['object']['key'] # Getting s3 object try: response = s3.get_object(Bucket=bucket, Key=key) except Exception as e: print(e) print( 'Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.' .format(key, bucket)) raise e # Parse s3 object content (JSON) try: s3_file_content = response['Body'].read() #clean trailing comma tweet_array = ','.join(s3_file_content.decode().split('\n')) if tweet_array.endswith(','): tweet_array = tweet_array[:-1] tweets_str = '[' + tweet_array + ']' tweets = json.loads(tweets_str) except Exception as e: print(e) print('Error loading json from object {} in bucket {}'.format( key, bucket)) raise e # Load data into ES try: twitter_to_es.load(tweets) except Exception as e: print(e) print('Error loading data into ElasticSearch') raise e
def lambda_handler(event, context): print("Received event: " + json.dumps(event, indent=2)) # Get the object from the event and show its content type bucket = event['Records'][0]['s3']['bucket']['name'] key = event['Records'][0]['s3']['object']['key'] # Getting s3 object try: response = s3.get_object(Bucket=bucket, Key=key) except Exception as e: print(e) print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket)) raise e # Parse s3 object content (JSON) try: s3_file_content = response['Body'].read() #clean trailing comma if s3_file_content.endswith(',\n'): s3_file_content = s3_file_content[:-2] tweets_str = '['+s3_file_content+']' tweets = json.loads(tweets_str) except Exception as e: print(e) print('Error loading json from object {} in bucket {}'.format(key, bucket)) raise e # Load data into ES try: twitter_to_es.load(tweets) except Exception as e: print(e) print('Error loading data into ElasticSearch') raise e
import json import boto3 import config import twitter_to_es from elasticsearch import Elasticsearch def create_index(es,index_name,mapping): print('creating index {}...'.format(index_name)) mapping_dict = {'mapping': mapping} mapping_str = str(mapping_dict) print json.dumps(mapping_dict) mapping_dict = "{\"mappings\":{\"logs_june\":{\"_timestamp\": {\"enabled\": \"true\"},\"properties\":{\"logdate\":{\"type\":\"date\",\"format\":\"dd/MM/yyy HH:mm:ss\"}}}}}" es.indices.create(index_name, body = {'mappings': mapping}) print {'mapping': mapping} bucket = 'mentzera' key = 'twitter/2015/10/16/14/twitter-stream-1-2015-10-16-14-21-36-7e019a27-7b3d-47d5-8805-344832c67be4' key = 'twitter/2015/10/20/20/twitter-stream-1-2015-10-20-20-23-33-8f39af04-ee9f-45d6-a2da-06dc068f0c15' s3 = boto3.client('s3') response = s3.get_object(Bucket=bucket, Key=key) s3_file_content = response['Body'].read() if s3_file_content.endswith(',\n'): s3_file_content = s3_file_content[:-2] tweets_str = '['+s3_file_content+']' tweets = json.loads(tweets_str) print len(tweets) twitter_to_es.load(tweets)
# es.indices.create(index_name, body = json.dumps(mapping_dict)) es.indices.create(index_name, body={'mappings': mapping}) print {'mapping': mapping} bucket = 'store-twitter-stream' # key = 'twitter/2015/10/16/14/twitter-stream-1-2015-10-16-14-21-36-7e019a27-7b3d-47d5-8805-344832c67be4' # key = 'twitter/2015/10/20/20/twitter-stream-1-2015-10-20-20-23-33-8f39af04-ee9f-45d6-a2da-06dc068f0c15' # key = 'twitter/raw-data/2017/07/28/15/twitter-delivery-stream-1-2017-07-28-15-46-45-c7deaace-4db4-4711-b07c-08bbbf3fe451' # key = 'twitter/raw-data/2017/08/01/15/twitter-delivery-stream-1-2017-08-01-15-00-56-f458fe4e-c192-458b-a42f-1512f8a18b95' # key = 'twitter/raw-data/2017/08/01/15/twitter-delivery-stream-1-2017-08-01-15-05-56-89a3c060-7119-4332-a034-5f1089a86335' # key = 'twitter/raw-data/2017/07/20/00/twitter-delivery-stream-1-2017-07-20-00-01-21-30463ecb-fdf1-4ac8-810c-2e8f85123cb0' # key = 'twitter/raw-data/2017/07/20/21/twitter-delivery-stream-1-2017-07-20-21-04-20-8bba6196-cdb2-4daa-a64a-10c8b06314ff' # key = 'twitter/raw-data/2017/07/21/00/twitter-delivery-stream-1-2017-07-21-00-09-56-0d959232-f4cd-440e-b2b3-ac9b95aa2a04' key = 'twitter/raw-data/2017/07/22/18/twitter-delivery-stream-1-2017-07-22-18-03-16-8baafc4a-f1ee-46e2-b2f7-3ebc45eaabcb' s3 = boto3.client('s3') response = s3.get_object(Bucket=bucket, Key=key) s3_file_content = response['Body'].read() #clean trailing comma if s3_file_content.endswith(',\n'): s3_file_content = s3_file_content[:-2] tweets_str = '[' + s3_file_content + ']' # tweets_str = '['+response['Body'].read().replace('}{','},\n{')+']' # with open("/tmp/1.txt", "w") as text_file: # text_file.write(tweets_str) tweets = json.loads(tweets_str) print len(tweets) twitter_to_es.load(tweets)
def send_to_es(line): doc = json.loads(line) if "limit" not in doc: twitter_to_es.load(doc)