def handler(_, __): df_source = S3ApiETL.get_object_as_dataframe( s3_client, DATALAKE_BUCKET, f'{RAW_PREFIX}/complaints.csv') df_result_age, df_result_group = apply_transformation(df_source) s3_helper_age.save_df(df_result_age) s3_helper_group.save_df(df_result_group)
def handler(_, __): df_source = S3ApiETL.get_object_as_dataframe( s3_client, DATALAKE_BUCKET, f'{RAW_PREFIX}/complaints.csv') df_result, df_result_group_sentiments, df_group_city_state_sentiments = apply_transformation( df_source) s3_helper.save_df(df_result) s3_helper_sentiments.save_df(df_result_group_sentiments) s3_helper_city_state_sentiments.save_df(df_group_city_state_sentiments)
import os import boto3 import pandas as pd from utils.etl_s3 import S3ApiETL # pylint: disable=import-error DATALAKE_BUCKET = os.getenv('DATALAKE_BUCKET') ENRICHED_PREFIX = os.getenv('ENRICHED_PREFIX') target_prefix = f'{ENRICHED_PREFIX}/complaints-standard' s3_client = boto3.client("s3") s3_helper = S3ApiETL(s3_client, DATALAKE_BUCKET, target_prefix) def handler(_, __): df_source_list = get_source_list() df_result = apply_transformation(df_source_list) s3_helper.save_df(df_result) def apply_transformation(df_source_list): df_result = df_source_list["source"].copy() # df_cluster = df_source_list["cluster"] df_sentiment = df_source_list["sentiment"] df_priority = df_source_list["priority"] df_age = df_source_list["age"] df_age["Ticket #"] = df_age["Ticket #"].astype(str) df_age = df_age[["Ticket #", "age_in_days", "age_in_year_month_day"]]
from datetime import datetime from utils.etl_s3 import S3ApiETL # pylint: disable=import-error DAYS_IN_MONTH = 30 LAST_DATE = datetime(2015, 7, 1) DATALAKE_BUCKET = os.getenv('DATALAKE_BUCKET') ENRICHED_PREFIX = os.getenv('ENRICHED_PREFIX') RAW_PREFIX = os.getenv('RAW_PREFIX') s3_client = boto3.client("s3") target_prefix_age = f'{ENRICHED_PREFIX}/complaints-with-age' target_prefix_group = f'{ENRICHED_PREFIX}/complaints-count-by-age' s3_helper_age = S3ApiETL(s3_client, DATALAKE_BUCKET, target_prefix_age) s3_helper_group = S3ApiETL(s3_client, DATALAKE_BUCKET, target_prefix_group) def handler(_, __): df_source = S3ApiETL.get_object_as_dataframe( s3_client, DATALAKE_BUCKET, f'{RAW_PREFIX}/complaints.csv') df_result_age, df_result_group = apply_transformation(df_source) s3_helper_age.save_df(df_result_age) s3_helper_group.save_df(df_result_group) def apply_transformation(df_source): complaints_tickets_df = df_source[[ "Ticket #", "Customer Complaint", "Date", "Time", "Status"
def handler_lvl2(_, __): df_source = S3ApiETL.get_object_as_dataframe( s3_client, DATALAKE_BUCKET, f'{RAW_PREFIX}/complaints.csv') df_result = apply_transformation(df_source, 1100, 3000) s3_helper_lvl2.save_df(df_result)
import re from utils.etl_s3 import S3ApiETL # pylint: disable=import-error ENDPOINT_ARN = "arn:aws:comprehend:us-east-1:193024568733:document-classifier-endpoint/complaints-training-v2" DATALAKE_BUCKET = os.getenv('DATALAKE_BUCKET') ENRICHED_PREFIX = os.getenv('ENRICHED_PREFIX') RAW_PREFIX = os.getenv('RAW_PREFIX') s3_client = boto3.client("s3") comprehend_client = boto3.client('comprehend') target_prefix_lvl1 = f'{ENRICHED_PREFIX}/clustered-mails/0' target_prefix_lvl2 = f'{ENRICHED_PREFIX}/clustered-mails/1100' s3_helper_lvl1 = S3ApiETL(s3_client, DATALAKE_BUCKET, target_prefix_lvl1) s3_helper_lvl2 = S3ApiETL(s3_client, DATALAKE_BUCKET, target_prefix_lvl2) def handler_lvl1(_, __): df_source = S3ApiETL.get_object_as_dataframe( s3_client, DATALAKE_BUCKET, f'{RAW_PREFIX}/complaints.csv') df_result = apply_transformation(df_source, 0, 1100) s3_helper_lvl1.save_df(df_result) def handler_lvl2(_, __): df_source = S3ApiETL.get_object_as_dataframe( s3_client, DATALAKE_BUCKET, f'{RAW_PREFIX}/complaints.csv') df_result = apply_transformation(df_source, 1100, 3000)
from utils.etl_s3 import S3ApiETL # pylint: disable=import-error s3_client = boto3.client("s3") comprehend_client = boto3.client('comprehend') MAX_BATCH_LIMIT = 23 MAX_SENTENCE_LENGTH_IN_CHARS = 4500 DATALAKE_BUCKET = os.getenv('DATALAKE_BUCKET') ENRICHED_PREFIX = os.getenv('ENRICHED_PREFIX') RAW_PREFIX = os.getenv('RAW_PREFIX') target_prefix = f'{ENRICHED_PREFIX}/sentiment-analysis' target_prefix_sentiments = f'{ENRICHED_PREFIX}/tickets-by-sentiments' target_prefix_city_state_sentiments = f'{ENRICHED_PREFIX}/tickets-by-city-state-sentiments' s3_helper = S3ApiETL(s3_client, DATALAKE_BUCKET, target_prefix) s3_helper_sentiments = S3ApiETL(s3_client, DATALAKE_BUCKET, target_prefix_sentiments) s3_helper_city_state_sentiments = S3ApiETL( s3_client, DATALAKE_BUCKET, target_prefix_city_state_sentiments) def handler(_, __): df_source = S3ApiETL.get_object_as_dataframe( s3_client, DATALAKE_BUCKET, f'{RAW_PREFIX}/complaints.csv') df_result, df_result_group_sentiments, df_group_city_state_sentiments = apply_transformation( df_source) s3_helper.save_df(df_result) s3_helper_sentiments.save_df(df_result_group_sentiments) s3_helper_city_state_sentiments.save_df(df_group_city_state_sentiments)