Esempio n. 1
0
def get_indexes_api(indexes_bucket_name: str,
                    indexer_function_name: str) -> bottle.Bottle:
    indexes_api = SubApp()

    @indexes_api.get("/last-modified", apply=[jsoninator])
    def all_indexes_last_modified() -> IndexesLastModifiedResponse:
        """
        Returns the max of last_modified time of all indexes. Read as: when was
        the latest index rebuilt.
        """
        return IndexesLastModifiedResponse(
            S3BackedInstrumentedIndexMixin.get_latest_last_modified(
                bucket_name=indexes_bucket_name))

    @indexes_api.post("/rebuild-all")
    def rebuild_all_indexes():
        """
        Well, it rebuilds all your indexes. Async operation. Just triggers, does
        not wait.
        """
        response = _get_lambda_client().invoke(
            FunctionName=indexer_function_name,
            InvocationType="Event",
        )

    return indexes_api
Esempio n. 2
0
def get_actions_api(hma_config_table: str) -> bottle.Bottle:
    # The documentation below expects prefix to be '/actions/'
    actions_api = SubApp()
    HMAConfig.initialize(hma_config_table)

    @actions_api.get("/", apply=[jsoninator])
    def fetch_all_actions() -> FetchAllActionsResponse:
        """
        Return all action configs.
        """
        action_configs = ActionPerformer.get_all()
        return FetchAllActionsResponse(
            actions_response=[config.__dict__ for config in action_configs])

    @actions_api.put(
        "/<old_name>/<old_config_sub_stype>",
        apply=[jsoninator(CreateUpdateActionRequest)],
    )
    def update_action(request: CreateUpdateActionRequest, old_name: str,
                      old_config_sub_stype: str) -> UpdateActionResponse:
        """
        Update the action, name, url, and headers for action with name=<old_name> and subtype=<old_config_sub_stype>.
        """
        if old_name != request.name or old_config_sub_stype != request.config_subtype:
            # The name field can't be updated because it is the primary key
            # The config sub type can't be updated because it is the config class level param
            delete_action(old_name)
            create_action(request)
        else:
            config = ActionPerformer._get_subtypes_by_name()[
                request.config_subtype].getx(request.name)
            for key, value in request.fields.items():
                setattr(config, key, value)
            hmaconfig.update_config(config)
        return UpdateActionResponse(response="The action config is updated.")

    @actions_api.post("/", apply=[jsoninator(CreateUpdateActionRequest)])
    def create_action(
            request: CreateUpdateActionRequest) -> CreateActionResponse:
        """
        Create an action.
        """
        config = ActionPerformer._get_subtypes_by_name()[
            request.config_subtype](**{
                "name": request.name,
                **request.fields
            })
        hmaconfig.create_config(config)
        return CreateActionResponse(response="The action config is created.")

    @actions_api.delete("/<name>", apply=[jsoninator])
    def delete_action(name: str) -> DeleteActionResponse:
        """
        Delete the action with name=<name>.
        """
        hmaconfig.delete_config_by_type_and_name("ActionPerformer", name)
        return DeleteActionResponse(response="The action config is deleted.")

    return actions_api
Esempio n. 3
0
def get_lcc_api(signal_type_mapping: HMASignalTypeMapping,
                storage_path: str) -> bottle.Bottle:
    """
    Some APIs to provide live content clustering cabilities. This will index all
    recently seen content and provide a way to query for content_ids matching a
    given hash.
    """

    lcc_api = SubApp()

    @lcc_api.get("/recently-seen/", apply=[jsoninator])
    def recently_seen_in_lcc() -> RecentlySeenLCCResponse:
        """
        Given a signal_type and a hash, have we seen something like this
        recently? Uses default precision knobs.

        TODO: Make thresholds configurable.
        """
        signal_type = signal_type_mapping.get_signal_type_enforce(
            bottle.request.query.signal_type)
        hash_value = bottle.request.query.hash

        index = get_index(storage_path, signal_type.get_name())

        match_array = index.query(hash_value)
        found_match = bool(len(match_array))

        if not found_match:
            return RecentlySeenLCCResponse(False, None, None)

        # Use the first result as the content_id, TODO: convert content_Id to
        # preview_url, also add content_type
        content_id = match_array[0].metadata
        preview_url = content_id
        return RecentlySeenLCCResponse(found_match, content_id, preview_url)

    return lcc_api
Esempio n. 4
0
def get_content_api(dynamodb_table: Table, image_bucket: str,
                    image_prefix: str) -> bottle.Bottle:
    """
    A Closure that includes all dependencies that MUST be provided by the root
    API that this API plugs into. Declare dependencies here, but initialize in
    the root API alone.
    """
    def get_preview_url(content_id, content_object) -> str:
        """
        Given a content_id and a content_object, returns a URL you can use to
        preview it.
        """
        content_object = t.cast(ContentObject, content_object)
        preview_url = ""
        if content_object.content_ref_type == ContentRefType.DEFAULT_S3_BUCKET:
            source = S3BucketContentSource(image_bucket, image_prefix)

            preview_url = create_presigned_url(image_bucket,
                                               source.get_s3_key(content_id),
                                               None, 3600, "get_object")
        elif content_object.content_ref_type == ContentRefType.URL:
            preview_url = content_object.content_ref
        if not preview_url:
            return bottle.abort(400, "preview_url not found.")
        return preview_url

    # A prefix to all routes must be provided by the api_root app
    # The documentation below expects prefix to be '/content/'
    content_api = SubApp()

    @content_api.get("/", apply=[jsoninator])
    def content() -> t.Optional[ContentObject]:
        """
        Return content object for given ID.
        """
        content_id = bottle.request.query.content_id or None

        if content_id:
            return ContentObject.get_from_content_id(dynamodb_table,
                                                     content_id)
        return None

    @content_api.get("/pipeline-progress/", apply=[jsoninator])
    def pipeline_progress() -> ContentPipelineProgress:
        """
        WARNING: UNOPTIMIZED. DO NOT CALL FROM AUTOMATED SYSTEMS.

        Build a history of the stages that this piece of content has gone
        through and what their results were. Do not call this from anything but
        a UI. This is not optimized for performance.
        """
        content_id = bottle.request.query.content_id or None

        if not content_id:
            return bottle.abort(400, "content_id must be provided.")
        content_id = t.cast(str, content_id)

        content_object = ContentObject.get_from_content_id(
            dynamodb_table, content_id)
        if not content_object:
            return bottle.abort(400,
                                f"Content with id '{content_id}' not found.")
        content_object = t.cast(ContentObject, content_object)

        preview_url = get_preview_url(content_id, content_object)

        # The result object will be gradually built up as records are retrieved.
        result = ContentPipelineProgress(
            content_id=content_id,
            content_type=content_object.content_type,
            content_preview_url=preview_url,
            submitted_at=content_object.updated_at,
            submission_additional_fields=list(
                content_object.additional_fields),
        )

        hash_records = PipelineHashRecord.get_from_content_id(
            dynamodb_table, content_id)
        if len(hash_records) != 0:
            result.hashed_at = max(hash_records,
                                   key=lambda r: r.updated_at).updated_at
            for hash_record in hash_records:
                # Assume that each signal type has a single hash
                if hash_record.signal_type.get_name() in result.hash_results:
                    return bottle.abort(
                        500,
                        f"Content with id '{content_id}' has multiple hash records for signal-type: '{hash_record.signal_type.get_name()}'.",
                    )

                result.hash_results[hash_record.signal_type.get_name(
                )] = hash_record.content_hash

        match_records = MatchRecord.get_from_content_id(
            dynamodb_table, content_id)
        if len(match_records) != 0:
            result.matched_at = max(match_records,
                                    key=lambda r: r.updated_at).updated_at

            # TODO #751 Until we resolve type agnostic storage of signal data,
            # we can't populate match details.
            # actually populate result.match_results.

        # TODO: ActionEvaluation does not yet leave a trail. Either record
        # action evaluation or remove the evaluation stage from the
        # pipeline-progress indicator.

        action_records = ActionEvent.get_from_content_id(
            dynamodb_table, content_id)
        if len(action_records) != 0:
            result.action_performed_at = max(
                action_records, key=lambda r: r.performed_at).performed_at
            result.action_perform_results = [
                r.action_label for r in action_records
            ]

        return result

    @content_api.get("/action-history/", apply=[jsoninator])
    def action_history() -> ActionHistoryResponse:
        """
        Return list of action event records for a given ID.
        """
        if content_id := bottle.request.query.content_id or None:
            return ActionHistoryResponse(
                ActionEvent.get_from_content_id(dynamodb_table,
                                                f"{content_id}"))
        return ActionHistoryResponse()
Esempio n. 5
0
def get_stats_api(counts_table: Table) -> bottle.Bottle:
    """
    Closure for all dependencies for the stats APIs.
    """

    # A prefix to all routes must be provided by the api_root app
    # The documentation below expects prefix to be '/stats/'
    stats_api = SubApp()

    stat_name_to_metric = {
        "hashes": metrics.names.pdq_hasher_lambda.hash,
        "matches": metrics.names.pdq_matcher_lambda.write_match_record,
    }

    @stats_api.get("/", apply=[jsoninator])
    def default_stats() -> StatResponse:
        """
        If measure performance tfvar/os.env is true, it returns stats, else,
        returns 404. A 404 should be surfaced by clients with instructions on
        how to enable metrics tracking.

        The graph_data always contains the start_time and end_time timestamps
        with 0 values to make graphing easier.
        """
        if not is_publishing_metrics():
            return bottle.abort(
                404, "This HMA instance is not publishing metrics.")

        if (not bottle.request.query.stat_name
                or bottle.request.query.stat_name not in stat_name_to_metric):
            return bottle.abort(
                400,
                f"Must specifiy stat_name in query parameters. Must be one of {stat_name_to_metric.keys()}",
            )

        metric = stat_name_to_metric[bottle.request.query.stat_name]

        time_span_arg = bottle.request.query.time_span
        metric_time_period = {
            "24h": metrics_query.MetricTimePeriod.HOURS_24,
            "1h": metrics_query.MetricTimePeriod.HOURS_1,
            "7d": metrics_query.MetricTimePeriod.DAYS_7,
        }.get(time_span_arg, metrics_query.MetricTimePeriod.HOURS_24)

        count_with_graphs = metrics_query.get_count_with_graph(
            [metric],
            metric_time_period,
        )

        return StatResponse(
            StatsCard(
                count_with_graphs[metric].count,
                metric_time_period,
                count_with_graphs[metric].graph_data,
            ))

    @stats_api.get("/counts/", apply=[jsoninator])
    def aggregate_counts() -> AggregateCountResponse:
        """
        return the set of aggregate_counts
        """
        if not is_publishing_metrics():
            return bottle.abort(
                404, "This HMA instance is not publishing metrics.")

        PIPELINE_COUNTS_TO_SURFACE = [
            AggregateCount.PipelineNames.submits,
            AggregateCount.PipelineNames.hashes,
            AggregateCount.PipelineNames.matches,
        ]

        return AggregateCountResponse({
            count_name:
            int(AggregateCount(count_name).get_value(counts_table))
            for count_name in PIPELINE_COUNTS_TO_SURFACE
        })

    return stats_api
Esempio n. 6
0
def get_action_rules_api(hma_config_table: str) -> bottle.Bottle:
    # The endpoints below imply a prefix of '/action-rules'
    action_rules_api = SubApp()
    HMAConfig.initialize(hma_config_table)

    @action_rules_api.get("/", apply=[jsoninator])
    def get_action_rules() -> ActionRulesResponse:
        """
        Return all action rules.
        """
        error_message = ""
        action_rules = []

        try:
            action_rules = ActionRule.get_all()
            logger.info("action_rules: %s", action_rules)
        except Exception as e:
            error_message = "Unexpected error."
            handle_unexpected_error(e)

        return ActionRulesResponse(error_message, action_rules)

    @action_rules_api.post("/", apply=[jsoninator(ActionRulesRequest)])
    def create_action_rule(
        request: ActionRulesRequest, ) -> ActionRulesResponse:
        """
        Create an action rule.
        """
        logger.info("request: %s", request)
        error_message = ""

        try:
            hmaconfig.create_config(request.action_rule)
        except ClientError as e:
            # TODO this test for "already exists" should be moved to a common place
            if e.response["Error"][
                    "Code"] == "ConditionalCheckFailedException":
                error_message = f"An action rule with the name '{request.action_rule.name}' already exists."
                logger.warning(
                    "Duplicate action rule creation attempted: %s",
                    e.response["Error"]["Message"],
                )
            else:
                error_message = "Unexpected error."
                logger.error("Unexpected client error: %s",
                             e.response["Error"]["Message"])
                logger.exception(e)
            response.status = 500
        except Exception as e:
            error_message = "Unexpected error."
            handle_unexpected_error(e)

        return ActionRulesResponse(error_message)

    @action_rules_api.put("/<old_name>",
                          apply=[jsoninator(ActionRulesRequest)])
    def update_action_rule(
        request: ActionRulesRequest,
        old_name: str,
    ) -> ActionRulesResponse:
        """
        Update the action rule with name=<oldname>.
        """
        logger.info("old_name: %s", old_name)
        logger.info("request: %s", request)
        error_message = ""

        if ActionRule.exists(request.action_rule.name):
            try:
                hmaconfig.update_config(request.action_rule)
            except Exception as e:
                error_message = "Unexpected error."
                handle_unexpected_error(e)
        elif ActionRule.exists(old_name):
            try:
                hmaconfig.create_config(request.action_rule)
                hmaconfig.delete_config_by_type_and_name(
                    "ActionRule", old_name)
            except Exception as e:
                error_message = "Unexpected error."
                handle_unexpected_error(e)
        else:
            error_message = f"An action rule named '{request.action_rule.name}' or '{old_name}' does not exist."
            logger.warning(
                "An attempt was made to update an action rule named either '%s' or '%s' but neither exist.",
                request.action_rule.name,
                old_name,
            )
            response.status = 500

        return ActionRulesResponse(error_message)

    @action_rules_api.delete("/<name>", apply=[jsoninator])
    def delete_action_rule(name: str) -> ActionRulesResponse:
        """
        Delete the action rule with name=<name>.
        """
        logger.info("name: %s", name)
        error_message = ""

        if ActionRule.exists(name):
            try:
                hmaconfig.delete_config_by_type_and_name("ActionRule", name)
            except Exception as e:
                error_message = "Unexpected error."
                handle_unexpected_error(e)
        else:
            error_message = f"An action rule named '{name}' does not exist."
            logger.warning(
                "An attempt was made to delete an action rule named '%s' that does not exist.",
                name,
            )
            response.status = 500

        return ActionRulesResponse(error_message)

    return action_rules_api
Esempio n. 7
0
def get_bank_api(bank_table: Table, bank_user_media_bucket: str,
                 submissions_queue_url: str) -> bottle.Bottle:
    """
    Closure for dependencies of the bank API
    """

    bank_api = SubApp()
    table_manager = BanksTable(table=bank_table)

    # Bank Management

    @bank_api.get("/get-all-banks", apply=[jsoninator])
    def get_all_banks() -> AllBanksEnvelope:
        """
        Get all banks.
        """
        return AllBanksEnvelope(banks=table_manager.get_all_banks())

    @bank_api.get("/get-bank/<bank_id>", apply=[jsoninator])
    def get_bank(bank_id=None) -> Bank:
        """
        Get a specific bank from a bank_id.
        """
        bank = table_manager.get_bank(bank_id=bank_id)
        return bank

    @bank_api.post("/create-bank", apply=[jsoninator])
    def create_bank() -> Bank:
        """
        Create a bank using only the name and description.
        """
        return table_manager.create_bank(
            bank_name=bottle.request.json["bank_name"],
            bank_description=bottle.request.json["bank_description"],
        )

    @bank_api.post("/update-bank/<bank_id>", apply=[jsoninator])
    def update_bank(bank_id=None) -> Bank:
        """
        Update name and description for a bank_id.
        """
        return table_manager.update_bank(
            bank_id=bank_id,
            bank_name=bottle.request.json["bank_name"],
            bank_description=bottle.request.json["bank_description"],
        )

    # Member Management

    @bank_api.get("/get-members/<bank_id>", apply=[jsoninator])
    def get_members(bank_id=None) -> BankMembersPage:
        """
        Get a page of bank members. Use the "continuation_token" from this
        response to get subsequent pages.
        """
        continuation_token = (
            bottle.request.query.continuation_token
            and json.loads(bottle.request.query.continuation_token) or None)

        try:
            content_type = get_content_type_for_name(
                bottle.request.query.content_type)

        except:
            bottle.abort(
                400, "content_type must be provided as a query parameter.")

        db_response = table_manager.get_all_bank_members_page(
            bank_id=bank_id,
            content_type=content_type,
            exclusive_start_key=continuation_token,
        )

        continuation_token = None
        if db_response.last_evaluated_key:
            continuation_token = uriencode(
                json.dumps(db_response.last_evaluated_key))

        return BankMembersPage(
            bank_members=with_preview_urls(db_response.items),
            continuation_token=continuation_token,
        )

    @bank_api.post("/add-member/<bank_id>", apply=[jsoninator])
    def add_member(bank_id=None) -> PreviewableBankMember:
        """
        Add a bank member. Expects a JSON object with following fields:
        - content_type: ["photo"|"video"]
        - storage_bucket: s3bucket for the media
        - storage_key: key for the media on s3
        - notes: String, any additional notes you want to associate with this
            member.

        Clients would want to use get_media_upload_url() to get a
        storage_bucket, storage_key and a upload_url before using add_member()

        Returns 200 OK with the resulting bank_member. 500 on failure.
        """
        content_type = get_content_type_for_name(
            bottle.request.json["content_type"])
        storage_bucket = bottle.request.json["storage_bucket"]
        storage_key = bottle.request.json["storage_key"]
        notes = bottle.request.json["notes"]

        return with_preview_url(
            bank_ops.add_bank_member(
                banks_table=table_manager,
                sqs_client=_get_sqs_client(),
                submissions_queue_url=submissions_queue_url,
                bank_id=bank_id,
                content_type=content_type,
                storage_bucket=storage_bucket,
                storage_key=storage_key,
                raw_content=None,
                notes=notes,
            ))

    @bank_api.post("/add-detached-member-signal/<bank_id>", apply=[jsoninator])
    def add_detached_bank_member_signal(bank_id=None) -> BankMemberSignal:
        """
        Add a virtual bank_member (without any associated media) and a
        corresponding signal.

        Requires JSON object with following fields:
        - signal_type: ["pdq"|"pdq_ocr","photo_md5"] -> anything from
          threatexchange.content_type.meta.get_signal_types_by_name()'s keys
        - content_type: ["photo"|"video"] to get the content_type for the
          virtual member.
        - signal_value: the hash to store against this signal. Will
          automatically de-dupe against existing signals.
        """
        content_type = get_content_type_for_name(
            bottle.request.json["content_type"])
        signal_type = get_signal_types_by_name()[
            bottle.request.json["signal_type"]]
        signal_value = bottle.request.json["signal_value"]

        return bank_ops.add_detached_bank_member_signal(
            banks_table=table_manager,
            bank_id=bank_id,
            content_type=content_type,
            signal_type=signal_type,
            signal_value=signal_value,
        )

    # Miscellaneous
    @bank_api.post("/get-media-upload-url")
    def get_media_upload_url(media_type=None):
        """
        Get a presigned S3 url that can be used by the client to PUT an object.

        Request Payload must be json with the following attributes:

        `media_type` must be something like ['image/gif', 'image/png', 'application/zip']
        `extension` must be a period followed by file extension. eg. `.mp4`, `.jpg`
        """
        extension = bottle.request.json.get("extension")
        media_type = bottle.request.json.get("media_type")

        if (not extension) or extension[0] != ".":
            bottle.abort(400, "extension must start with a period. eg. '.mp4'")

        id = str(uuid.uuid4())
        today_fragment = datetime.now().isoformat("|").split("|")[
            0]  # eg. 2019-09-12
        s3_key = f"bank-media/{media_type}/{today_fragment}/{id}{extension}"

        return {
            "storage_bucket":
            bank_user_media_bucket,
            "storage_key":
            s3_key,
            "upload_url":
            create_presigned_put_url(
                bucket_name=bank_user_media_bucket,
                key=s3_key,
                file_type=media_type,
                expiration=3600,
            ),
        }

    @bank_api.get("/get-member/<bank_member_id>", apply=[jsoninator])
    def get_member(bank_member_id=None) -> PreviewableBankMemberWithSignals:
        """
        Get a bank member with signals...
        """
        member = table_manager.get_bank_member(bank_member_id=bank_member_id)
        signals = table_manager.get_signals_for_bank_member(
            bank_member_id=bank_member_id)

        return PreviewableBankMemberWithSignals(**asdict(
            with_preview_url(member)),
                                                signals=signals)

    @bank_api.post("/remove-bank-member/<bank_member_id>")
    def remove_bank_member(bank_member_id: str):
        """
        Remove bank member signals from the processing index and mark
        bank_member as is_removed=True.

        Returns empty json object.
        """
        bank_ops.remove_bank_member(
            banks_table=table_manager,
            bank_member_id=bank_member_id,
        )
        return {}

    return bank_api
Esempio n. 8
0
def get_matches_api(
    datastore_table: Table,
    hma_config_table: str,
    indexes_bucket_name: str,
    writeback_queue_url: str,
    bank_table: Table,
    signal_type_mapping: HMASignalTypeMapping,
) -> bottle.Bottle:
    """
    A Closure that includes all dependencies that MUST be provided by the root
    API that this API plugs into. Declare dependencies here, but initialize in
    the root API alone.
    """

    # A prefix to all routes must be provided by the api_root app
    # The documentation below expects prefix to be '/matches/'
    matches_api = SubApp()
    HMAConfig.initialize(hma_config_table)

    banks_table = BanksTable(table=bank_table,
                             signal_type_mapping=signal_type_mapping)

    @matches_api.get("/", apply=[jsoninator])
    def matches() -> MatchSummariesResponse:
        """
        Return all, or a filtered list of matches based on query params.
        """
        signal_q = bottle.request.query.signal_q or None  # type: ignore # ToDo refactor to use `jsoninator(<requestObj>, from_query=True)``
        signal_source = bottle.request.query.signal_source or None  # type: ignore # ToDo refactor to use `jsoninator(<requestObj>, from_query=True)``
        content_q = bottle.request.query.content_q or None  # type: ignore # ToDo refactor to use `jsoninator(<requestObj>, from_query=True)``

        if content_q:
            records = MatchRecord.get_from_content_id(datastore_table,
                                                      content_q,
                                                      signal_type_mapping)
        elif signal_q:
            records = MatchRecord.get_from_signal(datastore_table, signal_q,
                                                  signal_source or "",
                                                  signal_type_mapping)
        else:
            # TODO: Support pagination after implementing in UI.
            records = MatchRecord.get_recent_items_page(
                datastore_table, signal_type_mapping).items

        return MatchSummariesResponse(match_summaries=[
            MatchSummary(
                content_id=record.content_id,
                signal_id=record.signal_id,
                signal_source=record.signal_source,
                updated_at=record.updated_at.isoformat(),
            ) for record in records
        ])

    @matches_api.get("/match/", apply=[jsoninator])
    def match_details() -> MatchDetailsResponse:
        """
        Return the match details for a given content id.
        """
        results = []
        if content_id := bottle.request.query.content_id or None:  # type: ignore # ToDo refactor to use `jsoninator(<requestObj>, from_query=True)``
            results = get_match_details(
                datastore_table=datastore_table,
                banks_table=banks_table,
                content_id=content_id,
                signal_type_mapping=signal_type_mapping,
            )
        return MatchDetailsResponse(match_details=results)
Esempio n. 9
0
def get_submit_api(
    dynamodb_table: Table,
    image_bucket: str,
    image_prefix: str,
    submissions_queue_url: str,
    hash_queue_url: str,
) -> bottle.Bottle:
    """
    A Closure that includes all dependencies that MUST be provided by the root
    API that this API plugs into. Declare dependencies here, but initialize in
    the root API alone.
    """

    # A prefix to all routes must be provided by the api_root app
    # The documentation below expects prefix to be '/submit/'
    submit_api = SubApp()
    s3_bucket_image_source = S3BucketContentSource(image_bucket, image_prefix)

    def _content_exist_error(content_id: str):
        return bottle.abort(
            400,
            f"Content with id '{content_id}' already exists if you want to resubmit `force_resubmit=True` must be included in payload.",
        )

    def _record_content_submission_from_request(
        request: SubmitRequestBodyBase, ) -> bool:
        """
        Given a request object submission record the content object to the table passed to
        the API using 'record_content_submission'
        Note: this method does not store the content media itself.
        """

        content_ref, content_ref_type = request.get_content_ref_details()

        return record_content_submission(
            dynamodb_table,
            content_id=request.content_id,
            content_type=request.content_type,
            content_ref=content_ref,
            content_ref_type=content_ref_type,
            additional_fields=set(request.additional_fields)
            if request.additional_fields else set(),
            force_resubmit=request.force_resubmit,
        )

    @submit_api.post("/s3/",
                     apply=[jsoninator(SubmitContents3ObjectRequestBody)])
    def submit_s3(
        request: SubmitContents3ObjectRequestBody,
    ) -> t.Union[SubmitResponse, SubmitError]:
        """
        Submission of a s3 object of a piece of content.
        """
        submit_content_request_from_s3_object(
            dynamodb_table,
            submissions_queue_url=submissions_queue_url,
            bucket=request.bucket_name,
            key=request.object_key,
            content_id=request.content_id,
            content_type=request.content_type,
            additional_fields=set(request.additional_fields)
            if request.additional_fields else set(),
            force_resubmit=request.force_resubmit,
        )

        return SubmitResponse(content_id=request.content_id,
                              submit_successful=True)

    @submit_api.post("/url/",
                     apply=[jsoninator(SubmitContentViaURLRequestBody)])
    def submit_url(
        request: SubmitContentViaURLRequestBody,
    ) -> t.Union[SubmitResponse, SubmitError]:
        """
        Submission via a url to content. This does not store a copy of the content in s3
        """
        if not _record_content_submission_from_request(request):
            return _content_exist_error(request.content_id)

        send_submission_to_url_queue(
            dynamodb_table,
            submissions_queue_url,
            request.content_id,
            request.content_type,
            request.content_url,
        )

        return SubmitResponse(content_id=request.content_id,
                              submit_successful=True)

    @submit_api.post("/bytes/",
                     apply=[jsoninator(SubmitContentBytesRequestBody)])
    def submit_bytes(
        request: SubmitContentBytesRequestBody,
    ) -> t.Union[SubmitResponse, SubmitError]:
        """
        Submit of media to HMA via a direct transfer of bytes to the system's s3 bucket.
        """
        content_id = request.content_id
        file_contents = base64.b64decode(request.content_bytes)

        # We want to record the submission before triggering and processing on
        # the content itself therefore we write to dynamodb before s3
        if not _record_content_submission_from_request(request):
            return _content_exist_error(request.content_id)

        s3_bucket_image_source.put_image_bytes(content_id, file_contents)

        return SubmitResponse(content_id=request.content_id,
                              submit_successful=True)

    @submit_api.post(
        "/put-url/",
        apply=[jsoninator(SubmitContentViaPutURLUploadRequestBody)])
    def submit_put_url(
        request: SubmitContentViaPutURLUploadRequestBody,
    ) -> t.Union[SubmitViaUploadUrlResponse, SubmitError]:
        """
        Submission of content to HMA in two steps
        1st the creation to a content record and put url based on request body
        2nd Upload to the system's s3 bucket by said put url returned by this method
        """
        presigned_url = create_presigned_put_url(
            bucket_name=image_bucket,
            key=s3_bucket_image_source.get_s3_key(request.content_id),
            file_type=request.file_type,
        )

        if presigned_url:
            if not _record_content_submission_from_request(request):
                return _content_exist_error(request.content_id)

            return SubmitViaUploadUrlResponse(
                content_id=request.content_id,
                file_type=str(request.file_type),
                presigned_url=presigned_url,
            )

        bottle.response.status = 400
        return SubmitError(
            content_id=request.content_id,
            message="Failed to generate upload url",
        )

    @submit_api.post("/hash/",
                     apply=[jsoninator(SubmitContentHashRequestBody)])
    def submit_hash(
        request: SubmitContentHashRequestBody,
    ) -> t.Union[SubmitResponse, SubmitError]:
        """
        Submission of a hash from a piece of content.
        Functions the same as other submission endpoint but skips
        the hasher and media storage.
        """

        # Record content object (even though we don't store anything just like with url)
        if not _record_content_submission_from_request(request):
            return _content_exist_error(request.content_id)

        # Record hash
        #   ToDo expand submit hash API to include `signal_specific_attributes`
        hash_record = PipelineHashRecord(
            content_id=request.content_id,
            signal_type=t.cast(t.Type[SignalType], request.signal_type),
            content_hash=request.signal_value,
            updated_at=datetime.datetime.now(),
        )
        hash_record.write_to_table(dynamodb_table)

        # Send hash directly to matcher
        # todo this could maybe try and reuse the methods in UnifiedHasher in #749
        _get_sqs_client().send_message(
            QueueUrl=hash_queue_url,
            MessageBody=json.dumps(hash_record.to_sqs_message()),
        )

        return SubmitResponse(content_id=request.content_id,
                              submit_successful=True)

    return submit_api
Esempio n. 10
0
def get_datasets_api(
    hma_config_table: str,
    datastore_table: Table,
    threat_exchange_data_bucket_name: str,
    threat_exchange_data_folder: str,
) -> bottle.Bottle:
    """
    ToDo / FixMe: this file is probably more about privacy groups than datasets...
    """
    # The documentation below expects prefix to be '/datasets/'
    datasets_api = SubApp()
    HMAConfig.initialize(hma_config_table)

    @datasets_api.get("/", apply=[jsoninator])
    def get_all_dataset_summaries() -> DatasetSummariesResponse:
        """
        Returns summaries for all datasets. Summary includes all facts that are
        not configurable. Eg. its name, the number of hashes it has, the
        number of matches it has caused, etc.
        """
        return DatasetSummariesResponse(
            threat_exchange_datasets=_get_threat_exchange_datasets(
                datastore_table,
                threat_exchange_data_bucket_name,
                threat_exchange_data_folder,
            ))

    @datasets_api.post("/update", apply=[jsoninator(UpdateDatasetRequest)])
    def update_dataset(request: UpdateDatasetRequest) -> Dataset:
        """
        Update dataset values: fetcher_active, write_back, and matcher_active.
        """
        config = ThreatExchangeConfig.getx(str(request.privacy_group_id))
        config.fetcher_active = request.fetcher_active
        config.write_back = request.write_back
        config.matcher_active = request.matcher_active
        updated_config = hmaconfig.update_config(config).__dict__
        updated_config["privacy_group_id"] = updated_config["name"]

        additional_config = AdditionalMatchSettingsConfig.get(
            str(request.privacy_group_id))
        if request.pdq_match_threshold:
            if additional_config:
                additional_config.pdq_match_threshold = int(
                    request.pdq_match_threshold)
                hmaconfig.update_config(additional_config)
            else:
                additional_config = AdditionalMatchSettingsConfig(
                    str(request.privacy_group_id),
                    int(request.pdq_match_threshold))
                hmaconfig.create_config(additional_config)
        elif additional_config:  # pdq_match_threshold was set and now should be removed
            hmaconfig.delete_config(additional_config)

        return Dataset.from_dict(updated_config)

    @datasets_api.post("/create", apply=[jsoninator(CreateDatasetRequest)])
    def create_dataset(request: CreateDatasetRequest) -> CreateDatasetResponse:
        """
        Create a local dataset (defaults defined in CreateDatasetRequest)
        """
        assert isinstance(request, CreateDatasetRequest)

        create_privacy_group_if_not_exists(
            privacy_group_id=str(request.privacy_group_id),
            privacy_group_name=request.privacy_group_name,
            description=request.description,
            in_use=True,
            fetcher_active=request.fetcher_active,
            matcher_active=request.matcher_active,
            write_back=request.write_back,
        )

        return CreateDatasetResponse(
            response=f"Created dataset {request.privacy_group_id}")

    @datasets_api.post("/sync", apply=[jsoninator])
    def sync_datasets() -> SyncDatasetResponse:
        """
        Fetch new collaborations from ThreatExchange and sync with the configs stored in DynamoDB.
        """
        sync_privacy_groups()
        return SyncDatasetResponse(response="Privacy groups are up to date")

    @datasets_api.post("/delete/<key>", apply=[jsoninator])
    def delete_dataset(key=None) -> DeleteDatasetResponse:
        """
        Delete the dataset with key=<key>
        """
        config = ThreatExchangeConfig.getx(str(key))
        hmaconfig.delete_config(config)
        return DeleteDatasetResponse(response="The privacy group is deleted")

    @datasets_api.get("/match-settings", apply=[jsoninator])
    def get_all_match_settings() -> MatchSettingsResponse:
        """
        Return all match settings configs
        """
        return MatchSettingsResponse(match_settings=[
            MatchSettingsResponseBody(c)
            for c in AdditionalMatchSettingsConfig.get_all()
        ])

    @datasets_api.get("/match-settings/<key>", apply=[jsoninator])
    def get_match_settings(key=None, ) -> MatchSettingsResponseBody:
        """
        Return a match settings config for a given privacy_group_id
        """
        if config := AdditionalMatchSettingsConfig.get(str(key)):
            return MatchSettingsResponseBody(config)
        return bottle.abort(400, f"No match_settings for pg_id {key} found")