Example #1
0
    def _get_manifests_to_process(self, event):
        try:
            batch_id = event.get("BatchId")
            data = dict()
            data["jamurl"] = ""
            data["alerturl"] = ""
            data["irregularityurl"] = ""
            data["irregularity_alerturl"] = ""
            data["irregularity_jamurl"] = ""
            data["irregularity_point_sequenceurl"] = ""
            data["jam_point_sequenceurl"] = ""
            data["queueUrl"] = event.get("queueUrl")
            data["receiptHandle"] = event.get("receiptHandle")
            if batch_id is None:
                return data
            dynamodb = boto3.resource('dynamodb', region_name='us-east-1')
            table = dynamodb.Table(os.environ["CURATION_MANIFEST_TABLE"])
            response = table.query(
                IndexName="dev-BatchId-TableName-index",
                KeyConditionExpression=Key('BatchId').eq(batch_id),
                FilterExpression=Attr('FileStatus').eq('open'))

            data["batchId"] = batch_id
            for item in response['Items']:
                url = item["TableName"] + "url"
                data[url] = item["ManifestS3Key"]

            return data
        except Exception as e:
            LoggerUtility.log_error("Error getting manifests for batches")
            raise e
Example #2
0
 def handle_bucket_event(self, event):
     LoggerUtility.set_level()
     bucket_name, object_key = self.fetch_s3_details_from_event(event)
     s3_head_object = self.get_s3_head_object(bucket_name, object_key)
     metadata = self.create_metadata_object(s3_head_object, object_key)
     self.push_metadata_to_elasticsearch(bucket_name, metadata)
     self.publish_custom_metrics_to_cloudwatch(bucket_name, metadata)
def persist_curated_datasets(event, batch_id):
    LoggerUtility.set_level()

    table_name = event["tablename"]
    manifest_url_parameter = table_name + "url"
    manifest_url = event[manifest_url_parameter]
    is_historical = event["is_historical"] == 'true'
    sql_file_name = FUNCTION_LOGIC + "_" + table_name + ".sql"
    __persist_records_to_redshift(manifest_url, table_name, sql_file_name, batch_id, is_historical)
 def get_latest_batch(self, latest_batch_id):
     try:
         ssm = boto3.client('ssm', region_name='us-east-1')
         response = ssm.get_parameter(Name=latest_batch_id, WithDecryption=False)
         LoggerUtility.log_info("Response from parameter store - {}".format(response))
         current_batch_id = response["Parameter"]["Value"]
     except Exception as ex:
         LoggerUtility.log_error("Unable to get latest batch with reason - {}".format(ex))
         raise ex
     return current_batch_id
 def push_batch_id_to_queue(self, current_batch_id):
     try:
         sqs = boto3.resource('sqs', region_name='us-east-1')
         queue_name = os.environ["SQS_CURATED_BATCHES_QUEUE_ARN"].rsplit(':', 1)[1]
         curated_batches_queue = sqs.get_queue_by_name(QueueName=queue_name)
         curated_batches_queue.send_message(
             MessageBody=json.dumps({'BatchId': current_batch_id}),
             MessageGroupId="WazeCuratedBatchesMessageGroup"
         )
         LoggerUtility.log_info("Successfully pushed the message to queue for batchid - {}".format(current_batch_id))
     except Exception as ex:
         LoggerUtility.log_error("Failed to push the batch to queue - {}".format(ex))
         raise ex
 def __init__(self, user, password, redshift_jdbc_url):
     endpoint_and_rest = redshift_jdbc_url.split('://')[1].split(':')
     endpoint = endpoint_and_rest[0]
     port_and_dbname = endpoint_and_rest[1].split('/')
     port = port_and_dbname[0]
     dbname = port_and_dbname[1]
     self.connection = psycopg2.connect(database=dbname,
                                        port=port,
                                        host=endpoint,
                                        password=password,
                                        user=user)
     self.connection.set_session(autocommit=True)
     self.cursor = self.connection.cursor()
     LoggerUtility.log_info("Established connection successfully")
Example #7
0
    def create_metadata_object(self, s3_head_object, key):
        metadata = {
            Constants.KEY_REFERENCE:
            key,
            Constants.CONTENT_LENGTH_REFERENCE:
            s3_head_object[Constants.CONTENT_LENGTH_REFERENCE],
            Constants.SIZE_MIB_REFERENCE:
            s3_head_object[Constants.CONTENT_LENGTH_REFERENCE] / 1024**2,
            Constants.LAST_MODIFIED_REFERENCE:
            s3_head_object[Constants.LAST_MODIFIED_REFERENCE].isoformat(),
            Constants.CONTENT_TYPE_REFERENCE:
            s3_head_object[Constants.CONTENT_TYPE_REFERENCE],
            Constants.ETAG_REFERENCE:
            s3_head_object[Constants.ETAG_REFERENCE],
            Constants.DATASET_REFERENCE:
            key.split('/')[0],
            Constants.ENVIRONMENT_NAME:
            os.environ["ENVIRONMENT_NAME"]
        }

        if key.split('/')[0] == "waze":
            if 'type' in key:
                type_value = key.split('/type=')[1].split('/')[0]
                type_metadata = {Constants.TRAFFIC_TYPE_REFERENCE: type_value}
                metadata.update(type_metadata)

            if 'table' in key:
                table_value = key.split('/table=')[1].split('/')[0]
                table_metadata = {Constants.TABLE_NAME_REFERENCE: table_value}
                metadata.update(table_metadata)

            if 'state' in key:
                state_value = key.split('/state=')[1].split('/')[0]
                state_metadata = {Constants.STATE_REFERENCE: state_value}
                metadata.update(state_metadata)
        elif key.split('/')[0] == "cv":
            data_provider_type_value = key.split('/')[1]
            data_provider_type_metadata = {
                Constants.DATA_PROVIDER_REFERENCE: data_provider_type_value
            }
            metadata.update(data_provider_type_metadata)

            data_type_value = key.split('/')[2]
            data_type_metadata = {
                Constants.DATA_TYPE_REFERENCE: data_type_value
            }
            metadata.update(data_type_metadata)

        LoggerUtility.log_info("METADATA: " + str(metadata))
        return metadata
 def create_new_batch_id(self, latest_batch_id):
     new_batch_id = str(int(time.time()))
     try:
         ssm = boto3.client('ssm', region_name='us-east-1')
         ssm.put_parameter(
             Name=latest_batch_id,
             Description='Parameter to hold the latest value of a batch used for processing waze transactions',
             Value=new_batch_id,
             Type='String',
             Overwrite=True,
             AllowedPattern='\\d+')
         LoggerUtility.log_info("Successfully created a new batch with id - {}".format(new_batch_id))
     except Exception as ex:
         LoggerUtility.log_error("Failed to create new batch with reason - {}".format(ex))
         raise ex
     return new_batch_id
    def register_kibana_dashboard(self):
        LoggerUtility.set_level()
        try:
            es_endpoint = os.environ[Constants.ES_ENDPOINT_ENV_VAR]
        except KeyError as e:
            LoggerUtility.log_error(str(e) + " not configured")
            LoggerUtility.log_error("Failed to register kibana dashboard")
            raise e

        es_client = ElasticsearchClient.get_client(es_endpoint)
        try:
            self._create_metadata_visualizations(es_client)
        except ElasticsearchException as e:
            LoggerUtility.log_error(e)
            LoggerUtility.log_error("Failed to register kibana dashboard")
            raise e
 def execute_from_file(self, file_name, **query_kwargs):
     LoggerUtility.log_info("Filename - {}, Role - {}".format(
         file_name, self.redshift_role_arn))
     query = self.query_loader.load_from_file(
         file_name,
         region_name=self.region_name,
         redshift_role_arn=self.redshift_role_arn,
         **query_kwargs)
     LoggerUtility.log_info("Query details - {}".format(query))
     LoggerUtility.log_info("Executing redshift copy command")
     self.redshift_connection.execute(query)
     LoggerUtility.log_info("Completed redshift copy command")
Example #11
0
    def put_message_sqs(self, batch_id, sqs_persist):
        """
        Puts a batch into a queue via Amazon's Simple Queue Service
        :param batch_id: the batch id of the batch
        :param sqs_persist: the name of the queue
        :return:
        """
        try:
            queue = self.sqs.get_queue_by_name(QueueName=sqs_persist)
            response = queue.send_message(MessageBody=json.dumps({
                'BatchId': batch_id
            }))

            LoggerUtility.log_info(
                "Successfully put message to persist sqs for batch id - {}, response - {}".format(batch_id, response))
        except Exception as e:
            LoggerUtility.log_error(
                "Unable to put message to persist sqs for batch id - {} , sqs - {}".format(batch_id, sqs_persist))
            raise e
Example #12
0
    def get_s3_head_object(self, bucket_name, object_key):
        """

        :param bucket_name:
        :param object_key:
        :return:
        """
        s3_client = boto3.client('s3', region_name='us-east-1')
        try:
            response = s3_client.head_object(Bucket=bucket_name,
                                             Key=object_key)
        except ClientError as e:
            LoggerUtility.log_error(e)
            LoggerUtility.log_error(
                'Error getting object {} from bucket {}. Make sure they exist, '
                'your bucket is in the same region as this function and necessary permissions '
                'have been granted.'.format(object_key, bucket_name))
            raise e
        else:
            return response
Example #13
0
    def delete_sqs_message(self, event, context):
        """
        Moves a message to the persistence queue, then deletes it from the previous queue via Amazon's Simple Queue
        Service.
        :param event: a list with a dictionary that contains information on a batch
        :param context: not used.  Logging it
        :return:
        """
        LoggerUtility.log_info("context: {}".format(context))
        batch_id = ""
        try:
            if "queueUrl" in event[0]:
                queue_url = event[0]["queueUrl"]
                receipt_handle = event[0]["receiptHandle"]
                batch_id = event[0]["batch_id"]
                is_historical = event[0]["is_historical"] == "true"

                persistence_queue = os.environ['SQS_PERSIST_ARN']
                if is_historical:
                    persistence_queue = os.environ['SQS_PERSIST_HISTORICAL_ARN']

                # put the message into the persistence queue via batchId
                self.put_message_sqs(batch_id, persistence_queue)
                txt = json.dumps(event[0])

                # delete message from the previous queue.
                if json.loads(txt).get("queueUrl") is not None:
                    message = self.sqs.Message(queue_url, receipt_handle)
                    message.delete()
                    LoggerUtility.log_info("Message deleted from sqs for batchId {}".format(batch_id))
                    self.publish_message_to_sns({"BatchId": batch_id, "Status": "Manifest generation completed"})
        except Exception as e:
            LoggerUtility.log_error("Unable to delete sqs message for batchId {}".format(batch_id))
            raise e
Example #14
0
 def fetch_s3_details_from_event(self, event):
     """
     Pull bucket name and key from an event.
     :param event: Json object
     :return: bucket, key
     """
     try:
         sns_message = json.loads(event["Records"][0]["Sns"]["Message"])
         bucket = sns_message["Records"][0]["s3"]["bucket"]["name"]
         key = urllib.parse.unquote_plus(
             sns_message["Records"][0]["s3"]["object"]["key"])
     except Exception as e:
         LoggerUtility.log_error(str(e))
         LoggerUtility.log_error("Failed to process the event")
         raise e
     else:
         LoggerUtility.log_info("Bucket name: " + bucket)
         LoggerUtility.log_info("Object key: " + key)
         return bucket, key
    def create_batch(self):
        LoggerUtility.set_level()
        LoggerUtility.log_info("Initiating batch creation process")
        latest_batch_id = os.environ["LATEST_BATCH_ID"]
        current_batch_id = self.get_latest_batch(latest_batch_id)
        if "" == current_batch_id:
            new_batch_id = self.create_new_batch_id(latest_batch_id)
        else:
            current_batch_id = self.get_latest_batch(latest_batch_id)
            self.push_batch_id_to_queue(current_batch_id)
            new_batch_id = self.create_new_batch_id(latest_batch_id)

        LoggerUtility.log_info("Completed batch creation process with batch id - {}".format(new_batch_id))
def __persist_records_to_redshift(manifest_s3key_name, table_name, sql_file_name, batch_id, is_historical):
    """

    :param manifest_s3key_name:
    :param table_name:
    :param sql_file_name:
    :param batch_id:
    :param is_historical:
    :return:
    """
    try:

        s3_resource = boto3.resource('s3')
        LoggerUtility.log_info("Started persistence for table_name - {}".format(table_name))
        curated_bucket = os.environ['CURATED_BUCKET_NAME']
        LoggerUtility.log_info("Manifest s3 key = {}".format(manifest_s3key_name))
        redshift_manager = __make_redshift_manager()
        # Download the file from S3 to REDSHIFT_SQL_DIR path
        query_file_temp_name = str(uuid.uuid4()) + sql_file_name
        s3_resource.Bucket(CONFIG_BUCKET).download_file(SQL_KEY_PREFIX + "/" + sql_file_name,
                                                        REDSHIFT_SQL_DIR + "/" + query_file_temp_name)
        LoggerUtility.log_info("Downloaded file from S3 - {}".format(query_file_temp_name))
        dw_schema_name = "dw_waze"
        elt_schema_name = "elt_waze"

        if is_historical:
            dw_schema_name = "dw_waze_history"
            elt_schema_name = "elt_waze_history"

        redshift_manager.execute_from_file(query_file_temp_name,
                                           curated_bucket_name=curated_bucket,
                                           manifest_curated_key=manifest_s3key_name,
                                           batchIdValue=batch_id,
                                           dw_schema_name=dw_schema_name,
                                           elt_schema_name=elt_schema_name)
        # delete the file once executed
        os.remove(REDSHIFT_SQL_DIR + "/" + query_file_temp_name)
    except Exception as e:
        LoggerUtility.log_info("Failed to persist curated data to redshift for table "
                               "name - {} with exception - {}".format(table_name, e))
        raise
Example #17
0
 def push_metadata_to_elasticsearch(self, bucket_name, metadata):
     try:
         elasticsearch_endpoint = os.environ[Constants.ES_ENDPOINT_ENV_VAR]
     except KeyError as e:
         LoggerUtility.log_error(str(e) + " not configured")
         raise e
     es_client = ElasticsearchClient.get_client(elasticsearch_endpoint)
     try:
         es_client.index(index=Constants.DEFAULT_INDEX_ID,
                         doc_type=bucket_name,
                         body=json.dumps(metadata))
     except ElasticsearchException as e:
         LoggerUtility.log_error(e)
         LoggerUtility.log_error("Could not index in Elasticsearch")
         raise e
    def poll_for_batches(self, event, context):
        """
        gets the messages from the data persistence queue to start the persistence in Redshift
        :param event: a dictionary, or a list of a dictionary, that contains information on a batch
        :param context: Not used
        :return:
        """
        LoggerUtility.log_info("Context: {}".format(context))
        try:
            sqs = boto3.resource('sqs', region_name='us-east-1')
            is_historical = event["is_historical"] == "true"
            persist_sqs = os.environ["persistence_sqs"]
            if is_historical:
                persist_sqs = os.environ["persistence_historical_sqs"]

            queue = sqs.get_queue_by_name(QueueName=persist_sqs)
            data = dict()
            data["is_historical"] = str(is_historical).lower()

            # if no batch id assigned, gather BatchId, queueUrl, and receiptHandle from the messages in the queue.
            if 'BatchId' not in event:
                for message in queue.receive_messages():
                    json_body = json.loads(message.body)
                    data["BatchId"] = json_body["BatchId"]
                    data["queueUrl"] = message.queue_url
                    data["receiptHandle"] = message.receipt_handle
                    LoggerUtility.log_info(
                        "Batch {} retrieved for processing".format(
                            json_body["BatchId"]))
                    break

            # Otherwise, only assign the BatchId from the event.
            else:
                data["BatchId"] = event['BatchId']

            if 'BatchId' in data:
                self.publish_message_to_sns({
                    "BatchId":
                    data["BatchId"],
                    "Status":
                    "Persistence process started"
                })
            return data
        except Exception as e:
            LoggerUtility.log_error("Error polling for batches")
            raise e
Example #19
0
 def push_batch_id_to_nightly_sqs_queue(self, event, context):
     LoggerUtility.log_info("context: {}".format(context))
     current_batch_id = ""
     try:
         if "batchId" in event[0]:
             sqs = boto3.resource('sqs', region_name='us-east-1')
             current_batch_id = event[0]["batchId"]
             nightly_queue_name = os.environ["SQS_NIGHTLY_PERSISTENCE_QUEUE_NAME"]
             nightly_batches_queue = sqs.get_queue_by_name(QueueName=nightly_queue_name)
             response = nightly_batches_queue.send_message(MessageBody=json.dumps({
                 'BatchId': current_batch_id
             }), MessageGroupId="WazeNightlyPersistenceBatchesMessageGroup")
             LoggerUtility.log_info("Successfully pushed the message to nightly queue for batch_id -"
                                    " {} with response - {}".format(current_batch_id, response))
     except Exception as e:
         LoggerUtility.log_error(
             "Unable to push sqs message to nightly queue for batchId {}".format(current_batch_id))
         raise e
Example #20
0
 def delete_sqs_message(self, event, context):
     LoggerUtility.log_info("context: {}".format(context))
     batch_id = ""
     try:
         if "queueUrl" in event[0] and "batchId" in event[0]:
             sqs = boto3.resource('sqs', region_name='us-east-1')
             queue_url = event[0]["queueUrl"]
             receipt_handle = event[0]["receiptHandle"]
             batch_id = event[0]["batchId"]
             txt = json.dumps(event[0])
             if json.loads(txt).get("queueUrl") is not None:
                 message = sqs.Message(queue_url, receipt_handle)
                 message.delete()
                 LoggerUtility.log_info("Message deleted from sqs for batchId {}".format(batch_id))
                 self.publish_message_to_sns({"BatchId": batch_id, "Status": "Persistence process completed"})
     except Exception as e:
         LoggerUtility.log_error("Unable to delete sqs message for batchId {}".format(batch_id))
         raise e
Example #21
0
    def get_client(elasticsearch_endpoint):
        LoggerUtility.set_level()
        try:
            awsauth = AWSRequestsAuth(
                aws_access_key=os.environ[Constants.ACCESS_KEY_ENV_VAR],
                aws_secret_access_key=os.environ[Constants.SECRET_KEY_ENV_VAR],
                aws_token=os.environ[Constants.SESSION_TOKEN_ENV_VAR],
                aws_host=elasticsearch_endpoint,
                aws_region=os.environ[Constants.REGION_ENV_VAR],
                aws_service=Constants.ELASTICSEARCH_SERVICE_CLIENT
            )
        except KeyError as e:
            LoggerUtility.log_error(str(e) + " not configured")
            LoggerUtility.log_error("Failed to register kibana dashboard")
            raise e

        return Elasticsearch(
            hosts=['{0}:443'.format(elasticsearch_endpoint)],
            use_ssl=True,
            connection_class=RequestsHttpConnection,
            http_auth=awsauth
        )
    def persist_record_to_dynamodb_table(s3_key, table_name, state,
                                         num_records, bucket, batch_id,
                                         is_historical, month, year):
        try:
            dynamodb_curated_records_table_name = os.environ[
                'DDB_CURATED_RECORDS_TABLE_ARN'].split('/')[1]
            persist_records = bool(int(os.environ['PERSIST_RECORDS']))
            s3_key = "s3://" + bucket + "/" + s3_key

            if persist_records:
                dynamodb = boto3.resource('dynamodb')
                curated_record_table = dynamodb.Table(
                    dynamodb_curated_records_table_name)
                response = curated_record_table.put_item(
                    Item={
                        'CurationRecordId': str(uuid.uuid4()),
                        'BatchId': batch_id,
                        'DataTableName': table_name,
                        'S3Key': s3_key,
                        'State': state,
                        'TotalNumCuratedRecords': num_records,
                        'IsHistorical': is_historical,
                        'Year': year,
                        'Month': month
                    })
                LoggerUtility.log_info(
                    "Successfully persisted record to dynamo db table - {}".
                    format(response))
            else:
                LoggerUtility.log_info(
                    "Persist records flag is disabled, so not persisting "
                    "any records to dynamodb table")

        except Exception as e:
            LoggerUtility.log_error(
                "Failed to persist record to dynamo db table for key - {}".
                format(s3_key))
            raise e
    def update_manifest_status(self, event, context):

        LoggerUtility.log_info("context: {}".format(context))
        batch_id = ""
        table_name = ""
        try:
            session = boto3.session.Session()
            ddb = session.resource('dynamodb', region_name='us-east-1')
            ddb_table_name = os.environ['DDB_MANIFEST_TABLE_ARN'].split('/')[1]
            manifest_index_name = os.environ['DDB_MANIFEST_FILES_INDEX_NAME']
            table_name = event['tablename']
            batch_id = event['batchId']
            ddb_table = ddb.Table(ddb_table_name)
            response = ddb_table.query(
                IndexName=manifest_index_name,
                KeyConditionExpression=Key('BatchId').eq(batch_id)
                & Key('TableName').eq(table_name),
                FilterExpression=Attr('FileStatus').eq('open'))

            if response['Count'] > 0:
                for item in response['Items']:
                    if table_name == item['TableName']:
                        ddb_table.update_item(
                            Key={
                                'ManifestId': item['ManifestId'],
                                'BatchId': batch_id
                            },
                            UpdateExpression='set FileStatus = :f',
                            ExpressionAttributeValues={':f': 'completed'})
                        LoggerUtility.log_error(
                            "Updated manifest status for  batchId {} and table {}"
                            .format(batch_id, table_name))
                        break
        except Exception as e:
            LoggerUtility.log_error(
                "Unable to update manifest status for  batchId {} and table {}"
                .format(batch_id, table_name))
            raise e
def lambda_handler(event, context):
    LoggerUtility.set_level()
    update_manifest_handle_event = ManifestHandler()
    update_manifest_handle_event.update_manifest(event, context)
    return event
def lambda_handler(event, context):
    LoggerUtility.set_level()
    get_batches_handle_event = SqsHandler()
    return get_batches_handle_event.get_batches(event, context)
Example #26
0
class ClosePipeline:

    sqs = boto3.resource('sqs', region_name='us-east-1')
    sns = boto3.client('sns', region_name='us-east-1')
    LoggerUtility.log_info("Test test 123")

    def publish_message_to_sns(self, message):
        """
        Publishes a message to Amazon's Simple Notification Service
        :param message: dict
        """
        self.sns.publish(
            TargetArn=os.environ['BATCH_NOTIFICATION_SNS'],
            Message=json.dumps({'default': json.dumps(message)}),
            MessageStructure='json'
        )

    def put_message_sqs(self, batch_id, sqs_persist):
        """
        Puts a batch into a queue via Amazon's Simple Queue Service
        :param batch_id: the batch id of the batch
        :param sqs_persist: the name of the queue
        :return:
        """
        try:
            queue = self.sqs.get_queue_by_name(QueueName=sqs_persist)
            response = queue.send_message(MessageBody=json.dumps({
                'BatchId': batch_id
            }))

            LoggerUtility.log_info(
                "Successfully put message to persist sqs for batch id - {}, response - {}".format(batch_id, response))
        except Exception as e:
            LoggerUtility.log_error(
                "Unable to put message to persist sqs for batch id - {} , sqs - {}".format(batch_id, sqs_persist))
            raise e

    def delete_sqs_message(self, event, context):
        """
        Moves a message to the persistence queue, then deletes it from the previous queue via Amazon's Simple Queue
        Service.
        :param event: a list with a dictionary that contains information on a batch
        :param context: not used.  Logging it
        :return:
        """
        LoggerUtility.log_info("context: {}".format(context))
        batch_id = ""
        try:
            if "queueUrl" in event[0]:
                queue_url = event[0]["queueUrl"]
                receipt_handle = event[0]["receiptHandle"]
                batch_id = event[0]["batch_id"]
                is_historical = event[0]["is_historical"] == "true"

                persistence_queue = os.environ['SQS_PERSIST_ARN']
                if is_historical:
                    persistence_queue = os.environ['SQS_PERSIST_HISTORICAL_ARN']

                # put the message into the persistence queue via batchId
                self.put_message_sqs(batch_id, persistence_queue)
                txt = json.dumps(event[0])

                # delete message from the previous queue.
                if json.loads(txt).get("queueUrl") is not None:
                    message = self.sqs.Message(queue_url, receipt_handle)
                    message.delete()
                    LoggerUtility.log_info("Message deleted from sqs for batchId {}".format(batch_id))
                    self.publish_message_to_sns({"BatchId": batch_id, "Status": "Manifest generation completed"})
        except Exception as e:
            LoggerUtility.log_error("Unable to delete sqs message for batchId {}".format(batch_id))
            raise e

    def close_pipeline(self, event, context):
        """
        Executes delete_sqs_message
        :param event: a list with a dictionary that contains information on a batch
        :param context: not used.  Just passed to
        :return:
        """
        self.delete_sqs_message(event, context)
def lambda_handler(event, context):
    LoggerUtility.set_level()
    get_manifests_handle_event = ManifestHandler()
    return get_manifests_handle_event.get_manifests(event)
Example #28
0
 def publish_custom_metrics_to_cloudwatch(self, bucket_name, metadata):
     cloudwatch_client = boto3.client('cloudwatch', region_name='us-east-1')
     try:
         if bucket_name == os.environ[
                 "SUBMISSIONS_BUCKET_NAME"] and metadata[
                     "Dataset"] == "waze":
             cloudwatch_client.put_metric_data(
                 Namespace=os.environ["WAZE_SUBMISSIONS_COUNT_METRIC"],
                 MetricData=[
                     {
                         'MetricName':
                         'Counts by state and traffic type',
                         'Dimensions': [{
                             'Name': 'State',
                             'Value': metadata["State"]
                         }, {
                             'Name': 'TrafficType',
                             'Value': metadata["TrafficType"]
                         }],
                         'Value':
                         1,
                         'Unit':
                         'Count'
                     },
                 ])
             if metadata["ContentLength"] <= 166:
                 cloudwatch_client.put_metric_data(
                     Namespace=os.
                     environ["WAZE_ZERO_BYTE_SUBMISSIONS_COUNT_METRIC"],
                     MetricData=[
                         {
                             'MetricName':
                             'Zero Byte Submissions by State and traffic type',
                             'Dimensions': [{
                                 'Name': 'State',
                                 'Value': metadata["State"]
                             }, {
                                 'Name': 'TrafficType',
                                 'Value': metadata["TrafficType"]
                             }],
                             'Value':
                             1,
                             'Unit':
                             'Count'
                         },
                     ])
         elif bucket_name == os.environ[
                 "SUBMISSIONS_BUCKET_NAME"] and metadata["Dataset"] == "cv":
             cloudwatch_client.put_metric_data(
                 Namespace=os.environ["CV_SUBMISSIONS_COUNTS_METRIC"],
                 MetricData=[
                     {
                         'MetricName':
                         'Counts by provider and datatype',
                         'Dimensions': [{
                             'Name': 'DataProvider',
                             'Value': metadata["DataProvider"]
                         }, {
                             'Name': 'DataType',
                             'Value': metadata["DataType"]
                         }],
                         'Value':
                         1,
                         'Unit':
                         'Count'
                     },
                 ])
         elif bucket_name == os.environ["CURATED_BUCKET_NAME"] and metadata[
                 "Dataset"] == "waze":
             cloudwatch_client.put_metric_data(
                 Namespace=os.environ["WAZE_CURATED_COUNTS_METRIC"],
                 MetricData=[
                     {
                         'MetricName':
                         'Counts by state and table name',
                         'Dimensions': [{
                             'Name': 'State',
                             'Value': metadata["State"]
                         }, {
                             'Name': 'TableName',
                             'Value': metadata["TableName"]
                         }],
                         'Value':
                         1,
                         'Unit':
                         'Count'
                     },
                 ])
     except Exception as e:
         LoggerUtility.log_error(e)
         LoggerUtility.log_error(
             "Failed to publish custom cloudwatch metrics")
         raise e
Example #29
0
def lambda_handler(event, context):
    LoggerUtility.set_level()
    close_pipeline_handle_event = ClosePipeline()
    close_pipeline_handle_event.close_pipeline(event, context)
    return event