Beispiel #1
0
def record_content_submission(
        dynamodb_table: Table,
        content_id: str,
        content_type: ContentType,
        content_ref: str,
        content_ref_type: ContentRefType,
        additional_fields: t.Set = set(),
        force_resubmit: bool = False,
) -> bool:
    """
    Write a content object that is submitted to the dynamodb_table.

    Note: this method does not store the data of the content itself
    If we want to store the media itself that is done either:
    - by a client using a presign url we give them
    - direct s3 put call in the case of raw bytes
    - not at all in the case of CDN-URL submission
        - (WIP: possibly done after a match is found)

    This function is also called directly by api_root when handling s3 uploads to partner
    banks. If editing, ensure the logic in api_root.process_s3_event is still correct

    Return True with recording was successful.
    """

    submit_time = datetime.datetime.now()
    content_obj = ContentObject(
        content_id=content_id,
        content_type=content_type,
        content_ref=content_ref,
        content_ref_type=content_ref_type,
        additional_fields=additional_fields,
        submission_times=[submit_time
                          ],  # Note: custom write_to_table impl appends.
        created_at=submit_time,
        updated_at=submit_time,
    )

    if force_resubmit:
        # Allow an overwrite or resubmission of content objects
        content_obj.write_to_table(dynamodb_table)
        return True

    return content_obj.write_to_table_if_not_found(dynamodb_table)
Beispiel #2
0
    def content() -> t.Optional[ContentObject]:
        """
        Return content object for given ID.
        """
        content_id = bottle.request.query.content_id or None

        if content_id:
            return ContentObject.get_from_content_id(dynamodb_table,
                                                     content_id)
        return None
Beispiel #3
0
    def test_query_content_object(self):
        """
        Test ContentObject write table with get_from_content_id query
        """
        obj = self.get_example_content_object()
        obj.write_to_table(self.get_table())

        query_obj = ContentObject.get_from_content_id(
            self.get_table(), TestContentModels.TEST_CONTENT_ID)

        assert obj == query_obj
Beispiel #4
0
 def get_example_content_object():
     now = TestContentModels.TEST_TIME
     return ContentObject(
         content_id=TestContentModels.TEST_CONTENT_ID,
         content_type=PhotoContent,
         content_ref="key_of_s3_bucket_object_123",
         content_ref_type=ContentRefType.DEFAULT_S3_BUCKET,
         additional_fields={"additional", "ham"},
         submission_times=[now],
         created_at=now,
         updated_at=now,
     )  # .write_to_table(table)
Beispiel #5
0
def lambda_handler(event, context):
    """
    This lambda is called when one or more matches are found. If a single hash matches
    multiple datasets, this will be called only once.

    Action labels are generated for each match message, then an action is performed
    corresponding to each action label.
    """
    config = ActionEvaluatorConfig.get()

    for sqs_record in event["Records"]:
        # TODO research max # sqs records / lambda_handler invocation
        sqs_record_body = json.loads(sqs_record["body"])
        logger.info("sqs record body %s", sqs_record["body"])
        match_message = MatchMessage.from_aws_json(sqs_record_body["Message"])

        logger.info("Evaluating match_message: %s", match_message)

        action_rules = get_action_rules()

        logger.info("Evaluating against action_rules: %s", action_rules)

        submitted_content = ContentObject.get_from_content_id(
            config.dynamo_db_table, match_message.content_key)

        action_label_to_action_rules = get_actions_to_take(
            match_message,
            action_rules,
            submitted_content.additional_fields,
        )
        action_labels = list(action_label_to_action_rules.keys())
        for action_label in action_labels:
            action_message = ActionMessage.from_match_message_action_label_action_rules_and_additional_fields(
                match_message,
                action_label,
                action_label_to_action_rules[action_label],
                list(submitted_content.additional_fields),
            )

            logger.info("Sending Action message: %s", action_message)
            config.sqs_client.send_message(
                QueueUrl=config.actions_queue_url,
                MessageBody=action_message.to_aws_json(),
            )

        writeback_message = WritebackMessage.from_match_message_and_type(
            match_message, WritebackTypes.SawThisToo)
        writeback_message.send_to_queue(config.sqs_client,
                                        config.writeback_queue_url)

    return {"evaluation_completed": "true"}
Beispiel #6
0
    def image():
        """
        Return the a URL to submitted media for a given ID.
        If URL was submitted is it returned
        else creates a signed URL for s3 uploads.
        Also works for videos.
        """
        content_id = bottle.request.query.content_id or None

        if not content_id:
            return bottle.abort(400, "content_id must be provided.")

        content_object: ContentObject = ContentObject.get_from_content_id(
            table=dynamodb_table, content_id=content_id)

        if not content_object:
            return bottle.abort(404, "content_id does not exist.")
        preview_url = get_preview_url(content_id, content_object)

        return ContentPreviewResponse(preview_url)
Beispiel #7
0
    def pipeline_progress() -> ContentPipelineProgress:
        """
        WARNING: UNOPTIMIZED. DO NOT CALL FROM AUTOMATED SYSTEMS.

        Build a history of the stages that this piece of content has gone
        through and what their results were. Do not call this from anything but
        a UI. This is not optimized for performance.
        """
        content_id = bottle.request.query.content_id or None

        if not content_id:
            return bottle.abort(400, "content_id must be provided.")
        content_id = t.cast(str, content_id)

        content_object = ContentObject.get_from_content_id(
            dynamodb_table, content_id)
        if not content_object:
            return bottle.abort(400,
                                f"Content with id '{content_id}' not found.")
        content_object = t.cast(ContentObject, content_object)

        preview_url = get_preview_url(content_id, content_object)

        # The result object will be gradually built up as records are retrieved.
        result = ContentPipelineProgress(
            content_id=content_id,
            content_type=content_object.content_type,
            content_preview_url=preview_url,
            submitted_at=content_object.updated_at,
            submission_additional_fields=list(
                content_object.additional_fields),
        )

        hash_records = PipelineHashRecord.get_from_content_id(
            dynamodb_table, content_id)
        if len(hash_records) != 0:
            result.hashed_at = max(hash_records,
                                   key=lambda r: r.updated_at).updated_at
            for hash_record in hash_records:
                # Assume that each signal type has a single hash
                if hash_record.signal_type.get_name() in result.hash_results:
                    return bottle.abort(
                        500,
                        f"Content with id '{content_id}' has multiple hash records for signal-type: '{hash_record.signal_type.get_name()}'.",
                    )

                result.hash_results[hash_record.signal_type.get_name(
                )] = hash_record.content_hash

        match_records = MatchRecord.get_from_content_id(
            dynamodb_table, content_id)
        if len(match_records) != 0:
            result.matched_at = max(match_records,
                                    key=lambda r: r.updated_at).updated_at

            # TODO #751 Until we resolve type agnostic storage of signal data,
            # we can't populate match details.
            # actually populate result.match_results.

        # TODO: ActionEvaluation does not yet leave a trail. Either record
        # action evaluation or remove the evaluation stage from the
        # pipeline-progress indicator.

        action_records = ActionEvent.get_from_content_id(
            dynamodb_table, content_id)
        if len(action_records) != 0:
            result.action_performed_at = max(
                action_records, key=lambda r: r.performed_at).performed_at
            result.action_perform_results = [
                r.action_label for r in action_records
            ]

        return result