예제 #1
0
def invoke_prediction(mail_body):
    mail_body = mail_body.replace('\r\n', ' ').replace('\r',
                                                       ' ').replace('\n', ' ')
    mail_body = [mail_body]
    # print(mail_body)
    runtime = boto3.client('runtime.sagemaker')
    one_hot_test_messages = one_hot_encode(mail_body, vocabulary_length)
    encoded_test_messages = vectorize_sequences(one_hot_test_messages,
                                                vocabulary_length)
    msg = json.dumps(encoded_test_messages.tolist())
    response = runtime.invoke_endpoint(EndpointName=sagemaker_endpoint,
                                       ContentType='application/json',
                                       Body=msg)
    # print(response)
    result = response['Body']
    res = json.loads(result.read().decode("utf-8"))
    # print(res)
    predicted_score = int(res['predicted_label'][0][0])
    predicted_probability = float(res['predicted_probability'][0][0])
    predicted_label = 'Spam' if predicted_score == 1 else 'Ham'
    predicted_probability = predicted_probability if predicted_score == 1 else (
        1 - predicted_probability)
    print("Predicted Label = %s ; Prediction Confidence = %.2f" %
          (predicted_label, predicted_probability))
    return predicted_label, predicted_probability
예제 #2
0
def handler(event, context):

    sms = event['body']


    if 'httpMethod' in event:
        if event['httpMethod'] == 'OPTIONS':
            return response(200, '')

        elif event['httpMethod'] == 'POST':
            test_messages = [sms.encode('ascii','ignore')]

            one_hot_test_messages = one_hot_encode(test_messages, vocabulary_lenght)
            encoded_test_messages = vectorize_sequences(one_hot_test_messages, vocabulary_lenght)

            encoded_test_messages = mx.nd.array(encoded_test_messages)
            output = net(encoded_test_messages)
            sigmoid_output = output.sigmoid()
            prediction = mx.nd.abs(mx.nd.ceil(sigmoid_output - 0.5))
            
            output_obj = {}
            output_obj['predicted_label'] = np.array2string(prediction.asnumpy()[0][0])
            output_obj['predicted_probability'] = np.array2string(sigmoid_output.asnumpy()[0][0])

            return response(200, output_obj)

        else:
            return response(405, 'null')
예제 #3
0
def predictspam(body):
    vocabulary_length = 9013

    #test_messages = ["FreeMsg: Txt: CALL to No: 86888 & claim your reward of 3 hours talk time to use from your phone now! ubscribe6GBP/ mnth inc 3hrs 16 stop?txtStop"]

    one_hot_test_messages = one_hot_encode(body, vocabulary_length)
    encoded_test_messages = vectorize_sequences(one_hot_test_messages,
                                                vocabulary_length)
    data = json.dumps(encoded_test_messages.tolist())

    response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME,
                                       ContentType="application/json",
                                       Body=data)
    # print("Response: ", response)
    # result = response["Body"].read()
    # print("Result: ", result)
    # pred = int(result["predictions"][0]["score"])
    # print("Prediction: ", pred)
    # return pred

    res = json.loads(response["Body"].read())
    print("Prediction: ", res['predicted_label'])
    if res['predicted_label'][0][0] == 0:
        label = 'Ham'
    else:
        label = 'Spam'
    score = round(res['predicted_probability'][0][0], 4)
    return label, score * 100
def checkForSpam(body):
    one_hot_test_messages = one_hot_encode([body], vocabulary_length)
    encoded_test_messages = vectorize_sequences(one_hot_test_messages,
                                                vocabulary_length)
    response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME,
                                       ContentType='application/json',
                                       Body=json.dumps(
                                           encoded_test_messages.tolist()))

    return json.loads(response["Body"].read().decode())
def hit_sagemaker(text):
    """
        Hit sagemaker endpoint with text and get response. Return the confidence and other information.
    """
    ENDPOINT_NAME = "sms-spam-classifier-mxnet-2020-05-12-05-34-38-136"
    runtime= boto3.client('runtime.sagemaker')
    vocabulary_length = 9013
    
    test_messages = [text]
    
    one_hot_test_messages = one_hot_encode(test_messages, vocabulary_length)
    encoded_test_messages = vectorize_sequences(one_hot_test_messages, vocabulary_length)
    response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME, ContentType='application/json', Body=json.dumps(encoded_test_messages.tolist()))
    return response['Body'].read().decode('UTF-8')
    pass
def lambda_handler(event, context):
    # TODO implement
    print(event)
    test_messages = read_from_s3()

    #test_messages = ["FreeMsg: Txt: CALL to No: 86888 & claim your reward of 3 hours talk time to use from your phone now! ubscribe6GBP/ mnth inc 3hrs 16 stop?txtStop"]
    one_hot_test_messages = one_hot_encode(test_messages, 9013)
    encoded_test_messages = vectorize_sequences(one_hot_test_messages, 9013)
    print(encoded_test_messages)
    payload = json.dumps(encoded_test_messages.tolist())
    lo = payload.strip('[')
    lo = lo.strip(']')

    endpoint_name = 'sms-spam-classifier-ll-2020-05-07-07-24-58-952'
    runtime = boto3.Session().client(service_name='sagemaker-runtime',
                                     region_name='us-east-1')
    response = runtime.invoke_endpoint(EndpointName=endpoint_name,
                                       ContentType='text/csv',
                                       Body=lo)
    result = json.loads(response['Body'].read().decode())
    res = result['predictions']
    print(res)
    return {'statusCode': 200, 'body': json.dumps('Hello from Lambda!')}
예제 #7
0
import pandas as pd
import numpy as np
import pickle
from sms_spam_classifier_utilities import one_hot_encode
from sms_spam_classifier_utilities import vectorize_sequences

df = pd.read_csv('dataset/SMSSpamCollection', sep='\t', header=None)
df[df.columns[0]] = df[df.columns[0]].map({'ham': 0, 'spam': 1})

targets = df[df.columns[0]].values
messages = df[df.columns[1]].values

# one hot encoding for each SMS message
one_hot_data = one_hot_encode(messages, vocabulary_length)
encoded_messages = vectorize_sequences(one_hot_data, vocabulary_length)

df2 = pd.DataFrame(encoded_messages)
df2.insert(0, 'spam', targets)

# Split into training and validation sets (80%/20% split)
split_index = int(np.ceil(df.shape[0] * 0.8))
train_set = df2[:split_index]
val_set = df2[split_index:]

train_set.to_csv('dataset/sms_train_set.csv', header=False, index=False)
val_set.to_csv('dataset/sms_val_set.csv', header=False, index=False)


# We have to upload the two files back to Amazon S3 in order to be accessed by the Amazon SageMaker training cluster.
def lambda_handler(event, context):

    s3 = boto3.client("s3")
    vocabulary_length = 9013
    ENDPOINT_NAME = os.environ['ENDPOINT_NAME']
    runtime = boto3.client('runtime.sagemaker')
    ses = boto3.client('ses')
    test_messages = []

    if event:

        print("My Event is : ", event)
        file_obj = event["Records"][0]
        filename = str(file_obj["s3"]['object']['key'])
        print("filename: ", filename)
        fileObj = s3.get_object(Bucket="filtered-mail-box", Key=filename)
        print("file has been gotten!")
        msg = email.message_from_bytes(fileObj['Body'].read())
        subject = msg["Subject"]
        return_path = msg["Return-Path"]
        date = msg["Date"]

        if msg.is_multipart():
            for part in msg.walk():
                content_type = part.get_content_type()
                if "plain" in content_type:
                    payload = part.get_payload().rstrip("\n")
                    #print(payload)

        newpayload = ""
        for line in payload:
            newpayload = newpayload + line.rstrip('\n')
        print(newpayload)

        test_messages.append(newpayload)
        one_hot_test_messages = one_hot_encode(test_messages,
                                               vocabulary_length)
        encoded_test_messages = vectorize_sequences(one_hot_test_messages,
                                                    vocabulary_length)

        data = np.array(encoded_test_messages)
        #print(data)

        data = encoded_test_messages
        payload_to_endpoint = json.dumps(data.tolist())

        response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME,
                                           ContentType='application/json',
                                           Body=payload_to_endpoint)

        result = json.loads(response['Body'].read().decode())

        print(result)

        predicted_probability = result['predicted_probability'][0][0]
        predicted_label = result['predicted_label'][0][0]

        classification_confidence_score = predicted_probability * 100

        #print(predicted_label)

        if predicted_label == 1.0:
            classification = 'SPAM'
        else:
            classification = 'HAM'

        print("The email was categorized as {} with a {}% confidence.".format(
            classification, classification_confidence_score))
        print(subject, return_path, date)

        SENDER = "Karan Mankar <*****@*****.**>"
        RECIPIENT = return_path

        BODY_TEXT = (
            "We received your email sent at {} with the subject {}.\n\n"
            "Here is a 240 character sample of the email body:{} \n\n"
            "The email was categorized as {} with a {}% confidence.".format(
                date, subject, payload, classification,
                classification_confidence_score))

        CHARSET = "UTF-8"

        response = ses.send_email(
            Destination={
                'ToAddresses': [
                    RECIPIENT,
                ],
            },
            Message={
                'Body': {
                    'Text': {
                        'Charset': CHARSET,
                        'Data': BODY_TEXT
                    },
                },
                'Subject': {
                    'Charset': CHARSET,
                    'Data': "Spam Classification",
                },
            },
            Source=SENDER,
        )

        print(response)

    return {'statusCode': 200, 'body': json.dumps('Hello from Lambda!')}
예제 #9
0
def lambda_handler(event, context):
    sender=os.environ["sender"]
    endpoint=os.environ["endpoint"]
    print(endpoint)
    BUCKET_NAME,KEY_NAME=event["Records"][0]["s3"]["bucket"]["name"],event["Records"][0]["s3"]["object"]["key"]
    s3=boto3.client("s3")
    response=s3.get_object(Bucket=BUCKET_NAME, Key=KEY_NAME)
    emailcontent = response['Body'].read().decode('utf-8')
    text=email.message_from_string(emailcontent)
    content=str(text.get_payload()[0]).split("\n")
    print(content)
    content=[" ".join(content[2:-1])] if len(content)>=2 else content
    print(content)
    subject,received_date,email_address=text.get("subject"),text.get("Date"),text.get("From")
    print(subject,received_date,email_address)
    # exit()
    # email=re.search("Return-Path: <.*>", emailcontent).group(0)[14:-1]
    # received_date=re.findall("Date: .*\r\n",emailcontent)[-1][6:].replace("\r\n","")
    # subject=re.findall("Subject: .*\r\n",emailcontent)[-1][9:].replace("\r\n","")
    # print(received_date,subject)
    # emailcontent=emailcontent.split("\r\n")
    # print(json.dumps(emailcontent,indent=2))
    # pos1=emailcontent.index("Content-Type: text/plain; charset=\"UTF-8\"")
    # # print(pos1)
    # uni=emailcontent[pos1-1]
    # # print(uni)
    # start,end=None,None
    # for i,v in enumerate(emailcontent):
    #     if uni==v:
    #         if start is None:
    #             start=i
    #         elif end is None:
    #             end=i
    #         else:
    #             break
    # # print(start,end)
    # content=[" ".join([v for v in emailcontent[start+3:end-1] if v!="\n"])]
    # print(content)
    #test_messages = ["FreeMsg: Txt: CALL to No: 86888 & claim your reward of 3 hours talk time to use from your phone now! ubscribe6GBP/ mnth inc 3hrs 16 stop?txtStop"]
    vocabulary_length = 9013
    one_hot_test_messages = one_hot_encode(content, vocabulary_length)
    encoded_test_messages = vectorize_sequences(one_hot_test_messages, vocabulary_length)
    #endpoint="sms-spam-classifier-mxnet-2020-05-09-11-08-10-423"
    contentType = "application/json"
    runtime=boto3.client('sagemaker-runtime')
    response = runtime.invoke_endpoint(EndpointName=endpoint,
                                       ContentType=contentType,
                                       Body=json.dumps(encoded_test_messages.tolist()))
    res=json.loads(response["Body"].read().decode())
    label="spam" if res["predicted_label"][0][0]>0.5 else "normal"
    probability=res["predicted_probability"][0][0]*100

    m1="We received your email sent at {} with the subject {}.".format(received_date,subject)
    m2="Here is a 240 character sample of the email body:"
    m3=content[0][:240]
    m4="The email was categorized as {} with a {}% confidence.".format(label,probability)
    msg="\n".join([m1,m2,m3,m4])
    print(msg)
    send_msg_to_visitor(sender,email_address,msg)
    return {
        'statusCode': 200,
        'body': json.dumps('Hello from Lambda!')
    }
예제 #10
0
def lambda_handler(event, context):
    print("DEBUG event:", event)
    s3_info = event['Records'][0]['s3']
    bucket_name = s3_info['bucket']['name']
    key_name = s3_info['object']['key']
    
    
    # get the email in S3
    s3_response = s3_client.get_object(
        Bucket=bucket_name,
        Key=key_name,
    )
    print('DEBUG s3_response:', s3_response)
    
    email_string = s3_response['Body'].read().decode('utf-8')
    email_arr = email_string.splitlines()
    
    email_subject = ""
    received_from = ""
    from_idx = 0
    for i, line in enumerate(email_arr):
        if line.startswith('Subject:'):
            email_subject = line[9:]
        elif line.startswith('From:'):
            received_from = line[6:]
        elif line.startswith('X-SES-Outgoing'):
            from_idx = i + 1
        
        if email_subject and received_from and from_idx:
            break
            
    body_arr = email_arr[from_idx:]
    print("DEBUG body_arr:", body_arr)
    
    body_string_to_check = ' '.join(body_arr)
    print("DEBUG body_string_to_check:", body_string_to_check)
    body_string_to_send = '\n'.join(body_arr)
    body_string_to_send = body_string_to_send[:240]

    
    # vectorizing message
    test_messages = [body_string_to_check]
    one_hot_test_messages = one_hot_encode(test_messages, vocabulary_length)
    encoded_test_messages = vectorize_sequences(one_hot_test_messages, vocabulary_length)
    json_message = json.dumps(encoded_test_messages.tolist())
    
    # send vectorized message to sage maker
    sagemaker_response = runtime.invoke_endpoint(EndpointName = ENDPOINT_NAME,
                                                 ContentType = "application/json",
                                                 Body = json_message)
    
    decoded_sagemaker_response = json.loads(sagemaker_response['Body'].read().decode())
    print("DEBUG decoded_sagemaker_response:", decoded_sagemaker_response)
    
    classification_number = decoded_sagemaker_response['predicted_label'][0][0]
    if classification_number == 0.0:
        classification = 'HAM'
    elif classification_number == 1.0:
        classification = 'SPAM'
    classification_confidence_score = decoded_sagemaker_response['predicted_probability'][0][0] * 100
    print("DEBUG classification:", classification)
    print("DEBUG classification_confidence_score:", classification_confidence_score)
    
    # write reponse email
    email_time = s3_response['LastModified']

    
    # create both timezone objects
    curr_tz = pytz.timezone("US/Eastern")
    curr_tz_time = email_time.astimezone(curr_tz)

    
    subject = f'Spam Indentification of email "{email_subject}"'
    body = f'''
    We received your email sent at {curr_tz_time.ctime()} with the subject "{email_subject}".
        
    Here is a 240 character sample of the email body:
    {body_string_to_send}
        
    The email was categorized as {classification} with a {classification_confidence_score}% confidence.
    '''
    
    # send response email
    try:
        response = ses_client.send_email(
            Destination={
                'ToAddresses': [
                    '*****@*****.**',
                ],
            },
            Message={
                'Body': {
                    'Text': {
                        'Charset': 'UTF-8',
                        'Data': body,
                    },
                },
                'Subject': {
                    'Charset': 'UTF-8',
                    'Data': subject,
                },
            },
            Source='*****@*****.**',
        )
    except ClientError as e:
        print("DEBUG error:", e.response['Error'])
    else:
        print("Email sent! Message ID:"),
        print(response['MessageId'])
예제 #11
0
def lambda_handler(event, context):
    print("DEBUG event:", event)
    s3_info = event['Records'][0]['s3']
    bucket_name = s3_info['bucket']['name']
    key_name = s3_info['object']['key']

    # get the email in S3
    s3_response = s3_client.get_object(
        Bucket=bucket_name,
        Key=key_name,
    )
    print('DEBUG s3_response:', s3_response)

    email_string = s3_response['Body'].read().decode('utf-8')
    msg = email.message_from_string(email_string)
    received_from = msg["from"]
    email_subject = msg["subject"]

    #reading the body part of msg
    if msg.is_multipart():
        for part in msg.walk():
            ctype = part.get_content_type()
            cdispo = str(part.get("Content-Disposition"))

            # skip any text/plain (txt) attachments
            if ctype == "text/plain" and "attachment" not in cdispo:
                email_payload = part.get_payload(decode=True)  # decode
                break
    else:
        email_payload = msg.get_payload(decode=True)

    email_body_string = email_payload.decode("utf-8")
    body_arr = email_body_string.splitlines()
    print("DEBUG body_arr:", body_arr)
    print("DEBUG received_from:", received_from)
    print("DEBUG email_subject:", email_subject)

    body_string_to_check = ' '.join(body_arr)
    print("DEBUG body_string_to_check:", body_string_to_check)
    body_string_to_send = '\n'.join(body_arr)
    body_string_to_send = body_string_to_send[:240]

    # vectorizing message
    test_messages = [body_string_to_check]
    one_hot_test_messages = one_hot_encode(test_messages, vocabulary_length)
    encoded_test_messages = vectorize_sequences(one_hot_test_messages,
                                                vocabulary_length)
    json_message = json.dumps(encoded_test_messages.tolist())

    # send vectorized message to sage maker
    sagemaker_response = runtime.invoke_endpoint(
        EndpointName=ENDPOINT_NAME,
        ContentType="application/json",
        Body=json_message)

    decoded_sagemaker_response = json.loads(
        sagemaker_response['Body'].read().decode())
    print("DEBUG decoded_sagemaker_response:", decoded_sagemaker_response)

    classification_number = decoded_sagemaker_response['predicted_label'][0][0]
    if classification_number == 0.0:
        classification = 'HAM'
    elif classification_number == 1.0:
        classification = 'SPAM'
    classification_confidence_score = decoded_sagemaker_response[
        'predicted_probability'][0][0] * 100
    print("DEBUG classification:", classification)
    print("DEBUG classification_confidence_score:",
          classification_confidence_score)

    # write reponse email
    email_time = s3_response['LastModified']

    # create both timezone objects
    curr_tz = pytz.timezone("US/Eastern")
    curr_tz_time = email_time.astimezone(curr_tz)

    subject = f'Spam Indentification of email "{email_subject}"'
    body = f'''
    We received your email sent at {curr_tz_time.ctime()} with the subject "{email_subject}".
        
    Here is a 240 character sample of the email body:
    {body_string_to_send}
        
    The email was categorized as {classification} with a {classification_confidence_score}% confidence.
    '''

    # send response email
    try:
        response = ses_client.send_email(
            Destination={
                'ToAddresses': [
                    received_from,
                ],
            },
            Message={
                'Body': {
                    'Text': {
                        'Charset': 'UTF-8',
                        'Data': body,
                    },
                },
                'Subject': {
                    'Charset': 'UTF-8',
                    'Data': subject,
                },
            },
            Source='*****@*****.**',
        )
    except ClientError as e:
        print("DEBUG error:", e.response['Error'])
    else:
        print("Email sent! Message ID:"),
        print(response['MessageId'])
def lambda_handler(event, context):
    # Update
    EndpointName = os.environ['myEnvParameterEndpointName']

    objectKey = event["Records"][0]["s3"]["object"]["key"]
    bucket = 'hw3-spam-detection-email-s1-cf'
    s3 = boto3.resource('s3')
    obj = s3.Object(bucket, objectKey)
    msg = obj.get()['Body'].read().decode('utf-8')
    msg = email.message_from_string(msg)

    pay_load_text = ''
    if msg.is_multipart():
        for part in msg.get_payload():
            if part.get_content_type() == 'text/plain':
                pay_load_text = part.get_payload()
    else:
        pay_load_text = msg.get_payload()

    EMAIL_BODY = pay_load_text
    messages = re.sub('[ \n\r\t\f]+', ' ', pay_load_text).replace('*',
                                                                  '').strip()
    print(messages)
    EMAIL_RECEIVE_DATE = msg['Date']
    EMAIL_SUBJECT = msg['Subject']
    EMAIL_FROM = msg['from']

    # EMAIL_BODY = "FreeMsg: Txt: CALL to No: 86888 & claim your reward of 3 hours talk time to use from your phone now! ubscribe6GBP/ mnth inc 3hrs 16 stop?txtStop"
    # EMAIL_BODY = re.sub('[ \t\n]+', ' ', EMAIL_BODY).replace('*', '').strip()
    # print(EMAIL_BODY)
    # # EMAIL_BODY = "I am Jim."
    # EMAIL_RECEIVE_DATE = 'Fri, 26 Mar 2021 05:58:41 +0000'
    # EMAIL_SUBJECT = 'Dummy email subject'
    # EMAIL_FROM = 'iPhone5 A1429 <*****@*****.**>'

    # Encode to narray
    vocabulary_length = 9013
    messages = [messages]
    one_hot_messages = one_hot_encode(messages, vocabulary_length)
    encoded_messages = vectorize_sequences(one_hot_messages, vocabulary_length)
    payload = json.dumps(encoded_messages.tolist())

    # Predict by SageMaker
    client = boto3.client('sagemaker-runtime')
    response = client.invoke_endpoint(EndpointName=EndpointName,
                                      Body=payload,
                                      ContentType='application/json')

    result = json.loads(response['Body'].read().decode())
    CLASSIFICATION_CONFIDENCE_SCORE = result['predicted_probability'][0][0]
    predicted_label = result['predicted_label'][0][0]
    CLASSIFICATION = 'ham'
    if predicted_label == 0:
        CLASSIFICATION = 'ham'
        CLASSIFICATION_CONFIDENCE_SCORE = 1 - CLASSIFICATION_CONFIDENCE_SCORE
    else:
        CLASSIFICATION = 'spam'

    # Reply to the sender of the email
    # Amazon SES
    SENDER = "iPhone5 A1429 <*****@*****.**>"
    RECIPIENT = EMAIL_FROM
    AWS_REGION = "us-east-1"
    SUBJECT = 'Spam detection of ' + EMAIL_SUBJECT

    # The email body for recipients with non-HTML email clients.
    BODY_TEXT = ("We received your email sent at " + EMAIL_RECEIVE_DATE +
                 " with the subject " + EMAIL_SUBJECT + ".\n\n" +
                 "Here is the email body:\n" + EMAIL_BODY + "\n\n" +
                 "The email was categorized as " + CLASSIFICATION +
                 " with a " +
                 str(float(CLASSIFICATION_CONFIDENCE_SCORE) * 100) +
                 "% confidence.")

    # The character encoding for the email.
    CHARSET = "UTF-8"

    client = boto3.client('ses',
                          region_name=AWS_REGION,
                          aws_access_key_id='aws_access_key_id',
                          aws_secret_access_key='aws_secret_access_key')

    # Try to send the email.
    try:
        #Provide the contents of the email.
        response = client.send_email(
            Destination={
                'ToAddresses': [
                    RECIPIENT,
                ],
            },
            Message={
                'Body': {
                    'Text': {
                        'Charset': CHARSET,
                        'Data': BODY_TEXT,
                    },
                },
                'Subject': {
                    'Charset': CHARSET,
                    'Data': SUBJECT,
                },
            },
            Source=SENDER,
            # If you are not using a configuration set, comment or delete the
            # following line
            # ConfigurationSetName=CONFIGURATION_SET,
        )
    # Display an error if something goes wrong.
    except ClientError as e:
        print(e.response['Error']['Message'])
    else:
        print("Email sent! Message ID:")
        print(response['MessageId'])

    # TODO implement
    return {'statusCode': 200, 'body': json.dumps('Hello from Lambda!')}
예제 #13
0
def lambda_handler(event, context):
    print("Received event: " + json.dumps(event))
    
    for each_email in event['Records']:
        bucket = each_email['s3']['bucket']['name']
        key = each_email['s3']['object']['key']
        
        # retrieve information part
        email_obj = s3_client.Object(bucket, key)
        email_file_body = str(email_obj.get()['Body'].read())
        Info_start = email_file_body.rfind("From:")
        email_file_body = email_file_body[Info_start:]
        
        
        # get email body
        CLASSIFICATION = ''
        CLASSIFICATION_CONFIDENCE_SCORE = 0.00
        
        email_body = email_file_body.replace("\\r", "").replace("\\n", "")
        left_charset = email_body.find("charset=")
        right_charset = email_body.rfind("charset=")
        email_body = email_body[left_charset:right_charset]
        right = email_body.rfind("--0000")
        email_body = email_body[15:right]
        if len(email_body) > 240:
            email_body = email_body[:240]
            
            
        # get the classification result of email_body
        mxnet_pred = MXNetPredictor('spam')
        vocabulary_length = 9013
        test_messages = [email_body]
        one_hot_test_messages = one_hot_encode(test_messages, vocabulary_length)
        encoded_test_messages = vectorize_sequences(one_hot_test_messages, vocabulary_length)

        result = mxnet_pred.predict(encoded_test_messages)
        CLASSIFICATION_CONFIDENCE_SCORE = round(float(result["predicted_probability"][0][0]) * 100, 2)
        if result["predicted_label"][0][0] == 1.0:
            CLASSIFICATION = "Spam Message"
        elif result["predicted_label"][0][0] == 0.0:
            CLASSIFICATION = "Ham Message"
        
        # get other email information
        email_info = email_file_body.split("\\r\\n")
        email_address = ''
        email_date = ''
        email_subject = ''
        
        for each_info in email_info:
            if each_info.startswith('From: '):
                starting_pos = each_info.find('<') + 1
                email_address = each_info[starting_pos:-1]
                
            elif each_info.startswith('Date: '):
                starting_pos = each_info.find(':') + 2
                temp = each_info[starting_pos:]
                temp_end = temp.find(':') - 2
                email_date = temp[:temp_end].strip()
                
            elif each_info.startswith('Subject: '):
                starting_pos = each_info.find(':') + 2
                email_subject = each_info[starting_pos:]
        
        reply1 = 'We received your email sent at ' + email_date + ' with the subject ' + email_subject + '.\n'
        reply2 = 'Here is a 240 character sample of the email body: \n'
        reply3 = email_body + '\n'
        reply4 = 'The email was categorized as ' + CLASSIFICATION + ' with a ' + str(CLASSIFICATION_CONFIDENCE_SCORE) + '% confidence.'
        
        response = ses_client.send_email(
            Source = '*****@*****.**',
            Destination = {
                'ToAddresses': [
                    email_address,
                ]
            },
            Message = {
                'Subject': {
                    'Data': email_subject
                },
                'Body': {
                    'Text': {
                        'Data': reply1 + reply2 + reply3 + reply4
                    },
                    'Html': {
                        'Data': '<p>' + reply1 + '</p>' + '<p>' + reply2 + '</p>' + '<p>' + reply3 + '</p>' + '<p>' + reply4 + '</p>'
                    }
                }
            },
            ReplyToAddresses = [
                email_address,
            ]
        )
        
    return {
        'statusCode': 200,
        'body': json.dumps('Hello from Lambda!')
    }
예제 #14
0
def lambda_handler(event, context):
    s3 = boto3.client('s3', region_name='us-east-1')
    info = event['Records'][0]['s3']
    bucket = info['bucket']['name']
    name = info['object']['key']

    # Get the email
    file = s3.get_object(Bucket=bucket, Key=name)['Body'].read()

    message = email.message_from_string(file.decode("utf-8"))
    #print(message)
    ret_body = ''
    if message.is_multipart():
        for pl in message.get_payload():
            ret_body += pl.get_payload()
    else:
        ret_body += message.get_payload()

    print(ret_body)

    ret_body = ret_body.replace("=E2=80=93=20\r\n", "\r\n")
    ret_body = ret_body.replace("=\r\n", "\r\n").replace("\r\n=20", "\r\n")
    print(ret_body)
    body = ret_body.replace("\n", "").replace("\r", "")
    print(body)

    # Sagemaker part
    runtime = boto3.client('runtime.sagemaker')
    # Prepare the message to test
    vocabulary_length = 9013
    test_msg = [body]
    one_hot_test_messages = one_hot_encode(test_msg, vocabulary_length)
    encoded_test_messages = vectorize_sequences(one_hot_test_messages,
                                                vocabulary_length)
    encoded_json_msg = json.dumps(encoded_test_messages.tolist())

    runtime = boto3.client('runtime.sagemaker', region_name='us-east-1')
    response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME,
                                       ContentType='text/csv',
                                       Body=encoded_json_msg)
    response = json.loads(response['Body'].read().decode())
    print("Response: ", response)
    score = int(response["predicted_label"][0][0])
    Classification = 'HAM' if score == 0 else "SPAM"
    Probability = response["predicted_probability"][0][0]

    # Prepare the return email
    aws_region = "us-east-1"
    sender = message['To']
    recipient = message['From']
    subject = "Analysis for your email"
    body_text = "We received your email sent at {} with the subject {}.\r\n".format(
        message['Date'], message['SUBJECT'])
    body_text += "\r\nHere is a 240 character sample of the email body:\r\n"
    body_text += "\r\n" + ret_body[:240] + "\r\n"
    body_text += "\r\nThe email was categorized as {} ".format(Classification)
    body_text += "with a {:.5%} confidence.".format(Probability)

    send_email(sender, recipient, aws_region, subject, body_text)

    return {
        'statusCode': 200,
        'body': json.dumps('We have already know if your email is spam or not')
    }