Esempio n. 1
0
def handler(_, __):
    df_source = S3ApiETL.get_object_as_dataframe(
        s3_client, DATALAKE_BUCKET, f'{RAW_PREFIX}/complaints.csv')
    df_result_age, df_result_group = apply_transformation(df_source)

    s3_helper_age.save_df(df_result_age)
    s3_helper_group.save_df(df_result_group)
Esempio n. 2
0
def handler(_, __):
    df_source = S3ApiETL.get_object_as_dataframe(
        s3_client, DATALAKE_BUCKET, f'{RAW_PREFIX}/complaints.csv')
    df_result, df_result_group_sentiments, df_group_city_state_sentiments = apply_transformation(
        df_source)

    s3_helper.save_df(df_result)
    s3_helper_sentiments.save_df(df_result_group_sentiments)
    s3_helper_city_state_sentiments.save_df(df_group_city_state_sentiments)
Esempio n. 3
0
import os
import boto3
import pandas as pd

from utils.etl_s3 import S3ApiETL           # pylint: disable=import-error

DATALAKE_BUCKET = os.getenv('DATALAKE_BUCKET')
ENRICHED_PREFIX = os.getenv('ENRICHED_PREFIX')

target_prefix = f'{ENRICHED_PREFIX}/complaints-standard'

s3_client = boto3.client("s3")
s3_helper = S3ApiETL(s3_client, DATALAKE_BUCKET, target_prefix)


def handler(_, __):
    df_source_list = get_source_list()
    df_result = apply_transformation(df_source_list)

    s3_helper.save_df(df_result)


def apply_transformation(df_source_list):
    df_result = df_source_list["source"].copy()
    # df_cluster = df_source_list["cluster"]
    df_sentiment = df_source_list["sentiment"]
    df_priority = df_source_list["priority"]
    df_age = df_source_list["age"]

    df_age["Ticket #"] = df_age["Ticket #"].astype(str)
    df_age = df_age[["Ticket #", "age_in_days", "age_in_year_month_day"]]
Esempio n. 4
0
from datetime import datetime
from utils.etl_s3 import S3ApiETL  # pylint: disable=import-error

DAYS_IN_MONTH = 30
LAST_DATE = datetime(2015, 7, 1)

DATALAKE_BUCKET = os.getenv('DATALAKE_BUCKET')
ENRICHED_PREFIX = os.getenv('ENRICHED_PREFIX')
RAW_PREFIX = os.getenv('RAW_PREFIX')

s3_client = boto3.client("s3")

target_prefix_age = f'{ENRICHED_PREFIX}/complaints-with-age'
target_prefix_group = f'{ENRICHED_PREFIX}/complaints-count-by-age'

s3_helper_age = S3ApiETL(s3_client, DATALAKE_BUCKET, target_prefix_age)
s3_helper_group = S3ApiETL(s3_client, DATALAKE_BUCKET, target_prefix_group)


def handler(_, __):
    df_source = S3ApiETL.get_object_as_dataframe(
        s3_client, DATALAKE_BUCKET, f'{RAW_PREFIX}/complaints.csv')
    df_result_age, df_result_group = apply_transformation(df_source)

    s3_helper_age.save_df(df_result_age)
    s3_helper_group.save_df(df_result_group)


def apply_transformation(df_source):
    complaints_tickets_df = df_source[[
        "Ticket #", "Customer Complaint", "Date", "Time", "Status"
Esempio n. 5
0
def handler_lvl2(_, __):
    df_source = S3ApiETL.get_object_as_dataframe(
        s3_client, DATALAKE_BUCKET, f'{RAW_PREFIX}/complaints.csv')
    df_result = apply_transformation(df_source, 1100, 3000)

    s3_helper_lvl2.save_df(df_result)
Esempio n. 6
0
import re

from utils.etl_s3 import S3ApiETL  # pylint: disable=import-error

ENDPOINT_ARN = "arn:aws:comprehend:us-east-1:193024568733:document-classifier-endpoint/complaints-training-v2"
DATALAKE_BUCKET = os.getenv('DATALAKE_BUCKET')
ENRICHED_PREFIX = os.getenv('ENRICHED_PREFIX')
RAW_PREFIX = os.getenv('RAW_PREFIX')

s3_client = boto3.client("s3")
comprehend_client = boto3.client('comprehend')

target_prefix_lvl1 = f'{ENRICHED_PREFIX}/clustered-mails/0'
target_prefix_lvl2 = f'{ENRICHED_PREFIX}/clustered-mails/1100'

s3_helper_lvl1 = S3ApiETL(s3_client, DATALAKE_BUCKET, target_prefix_lvl1)
s3_helper_lvl2 = S3ApiETL(s3_client, DATALAKE_BUCKET, target_prefix_lvl2)


def handler_lvl1(_, __):
    df_source = S3ApiETL.get_object_as_dataframe(
        s3_client, DATALAKE_BUCKET, f'{RAW_PREFIX}/complaints.csv')
    df_result = apply_transformation(df_source, 0, 1100)

    s3_helper_lvl1.save_df(df_result)


def handler_lvl2(_, __):
    df_source = S3ApiETL.get_object_as_dataframe(
        s3_client, DATALAKE_BUCKET, f'{RAW_PREFIX}/complaints.csv')
    df_result = apply_transformation(df_source, 1100, 3000)
Esempio n. 7
0
from utils.etl_s3 import S3ApiETL  # pylint: disable=import-error

s3_client = boto3.client("s3")
comprehend_client = boto3.client('comprehend')

MAX_BATCH_LIMIT = 23
MAX_SENTENCE_LENGTH_IN_CHARS = 4500
DATALAKE_BUCKET = os.getenv('DATALAKE_BUCKET')
ENRICHED_PREFIX = os.getenv('ENRICHED_PREFIX')
RAW_PREFIX = os.getenv('RAW_PREFIX')

target_prefix = f'{ENRICHED_PREFIX}/sentiment-analysis'
target_prefix_sentiments = f'{ENRICHED_PREFIX}/tickets-by-sentiments'
target_prefix_city_state_sentiments = f'{ENRICHED_PREFIX}/tickets-by-city-state-sentiments'

s3_helper = S3ApiETL(s3_client, DATALAKE_BUCKET, target_prefix)
s3_helper_sentiments = S3ApiETL(s3_client, DATALAKE_BUCKET,
                                target_prefix_sentiments)
s3_helper_city_state_sentiments = S3ApiETL(
    s3_client, DATALAKE_BUCKET, target_prefix_city_state_sentiments)


def handler(_, __):
    df_source = S3ApiETL.get_object_as_dataframe(
        s3_client, DATALAKE_BUCKET, f'{RAW_PREFIX}/complaints.csv')
    df_result, df_result_group_sentiments, df_group_city_state_sentiments = apply_transformation(
        df_source)

    s3_helper.save_df(df_result)
    s3_helper_sentiments.save_df(df_result_group_sentiments)
    s3_helper_city_state_sentiments.save_df(df_group_city_state_sentiments)