def tag_file_as_processed(bucket_name: str, key_prefix: str):
    """
    This method appends processed status tags. This utility method can be called once the files are processed by
    the respective processors
    """
    s3 = get_service_client("s3")
    # get existing tags
    tags = s3.get_object_tagging(Bucket=bucket_name, Key=key_prefix)["TagSet"]
    logger.debug(f"Old tags retrieved are: {tags}")

    # merge old tags and new tags
    tags.extend([
        {
            "Key": "processed_on",
            "Value": datetime.now(timezone.utc).strftime(TIMESTAMP_FORMAT)
        },
        {
            "Key": "processing_status",
            "Value": "COMPLETE"
        },
    ])

    s3.put_object_tagging(
        Bucket=bucket_name,
        Key=key_prefix,
        Tagging={"TagSet": tags},
    )
def publish_config(config_event: ConfigEvent, event_bus=None):
    if not event_bus:
        event_bus = service_helper.get_service_client("events")

    for url in config_event.url_list:
        event = {
            "platform": config_event.platform,
            "account": config_event.account,
            "query": config_event.query,
            "url": url,
        }

        if config_event.topic:
            event["topic"] = config_event.topic

        service_response = event_bus.put_events(
            Entries=[
                {
                    "EventBusName": os.environ["EVENT_BUS_NAME"],
                    "Source": os.environ["INGESTION_NAMESPACE"],
                    "Detail": json.dumps(event),
                    "DetailType": "config",
                }
            ]
        )

        if service_response["FailedEntryCount"]:
            logger.error(f"Failed to publish following event: {event}")

        logger.info(f"Published event {event} on event bus with response {service_response}")
def update_query_timestamp(video_id):
    ddb = get_service_client("dynamodb")
    table_name = os.environ["TARGET_DDB_TABLE"]
    current_time = datetime.now(timezone.utc)

    # defaulting to 7 days if ingestion window is not provided
    expiry_window = str(
        int((current_time + timedelta(days=int(
            os.environ.get("VIDEO_SEARCH_INGESTION_WINDOW", 7)))).timestamp() *
            1000))

    ddb_response = ddb.put_item(
        TableName=table_name,
        Item={
            "VIDEO_ID": {
                "S": video_id
            },
            "LAST_QUERIED_TIMESTAMP": {
                "S": current_time.isoformat()
            },
            "EXP_DATE": {
                "N": expiry_window
            },
        },
    )

    logger.debug(
        f"Response from ddb transaction write: {json.dumps(ddb_response)}")
def get_event_bus_stubber():
    global event_bus_stubber

    if not event_bus_stubber:
        event_bus_client = get_service_client("events")
        event_bus_stubber = Stubber(event_bus_client)
    return event_bus_stubber
def get_event_bus_stubber():
    global event_bus_stubber
    from shared_util.service_helper import get_service_client

    if not event_bus_stubber:
        event_bus_client = get_service_client("events")
        event_bus_stubber = Stubber(event_bus_client)
    return event_bus_stubber
Example #6
0
def buffer_data_into_stream(data, partition_key=None):
    """
    This method buffers data in to a Kinesis Data Stream. The lambda function calling it, should have an environment
    variable 'STREAM_NAME' whose value should be the name of the stream.
    """
    kds_client = get_service_client("kinesis")

    stream_name = os.environ["STREAM_NAME"]

    if not partition_key:
        partition_key = str(uuid.uuid4())

    response = kds_client.put_record(StreamName=stream_name,
                                     Data=json.dumps(data),
                                     PartitionKey=partition_key)
    logger.debug(f"Response from buffering the stream is {response}")
    return response
Example #7
0
def process_response(youtube_response, video_search_params):
    event_bus = get_service_client("events")
    count = 1
    comments = []

    logger.debug(f"video search parameters {json.dumps(video_search_params)}")
    search_query = f'{video_search_params.get("q", None)}#{video_search_params.get("channelId", None)}'

    for index, item in enumerate(youtube_response["items"]):
        logger.debug(f"Item is {item}")
        comments.append({
            "EventBusName":
            os.environ["EVENT_BUS_NAME"],
            "Source":
            os.environ["VIDEO_NAMESPACE"],
            "Detail":
            json.dumps({
                "VideoId": item["id"]["videoId"],
                "SearchQuery": search_query,
                "Title": item["snippet"]["title"]
            }),
            "DetailType":
            "Video",
        })
        logger.debug(f"Count is {count}")
        # optimize the loop to perform put_events with every 10 items
        if count == 10 or len(youtube_response["items"]) - index == 1:
            service_response = event_bus.put_events(Entries=comments)

            logger.debug(
                f"Put events response is {json.dumps(service_response)}")
            failed_entry_count = service_response.get("FailedEntryCount", None)
            if failed_entry_count > 0:
                logger.error(
                    f"Error in put events {json.dumps(service_response['Entries'])}"
                )

            count = 1
            comments = []
        else:
            count = count + 1  # optimize the loop to perform put_events with every 10 items
Example #8
0
def send_event(payload, detail_type, source):
    """
    Method to publish json data as events to custom bus. The method expects the event bus
    name to be set as a lambda environment variable
    """
    if os.environ.get("INTEGRATION_BUS_NAME", None):
        events = get_service_client("events")
        response = events.put_events(
            Entries=[{
                "EventBusName": os.environ["INTEGRATION_BUS_NAME"],
                "Detail": payload,
                "DetailType": detail_type,
                "Source": source,
            }])

        if response["FailedEntryCount"] and response["FailedEntryCount"] > 0:
            err_msg = f"Following record failed publishing {payload}"
            logger.error(err_msg)
            raise EventPublishException(err_msg)
        return response
    else:
        err_msg = "Event bus name not set in environment variable configuration for this lambda function"
        logger.error(err_msg)
        raise IncorrectEnvSetup(err_msg)
def get_api_key():
    api_key = get_service_client("ssm").get_parameter(
        Name=os.environ["SSM_API_KEY"],
        WithDecryption=True)["Parameter"]["Value"]

    return api_key
Example #10
0
 def test_service_client(self):
     service_client = service_helper.get_service_client("s3")
     self.assertIsNotNone(service_client)
     self.assertTrue("https://s3." in service_client.meta.endpoint_url)
Example #11
0
def stream_tear_down(stream_name):
    kds_client = service_helper.get_service_client("kinesis")
    kds_client.delete_stream(StreamName=stream_name)
Example #12
0
def stream_setup(stream_name):
    kds_client = service_helper.get_service_client("kinesis")
    kds_client.create_stream(StreamName=stream_name, ShardCount=1)
    return kds_client
Example #13
0
def get_service_client(service_name):
    """Get the global service boto3 client"""
    return service_helper.get_service_client(service_name)