def get_indexes_api(indexes_bucket_name: str, indexer_function_name: str) -> bottle.Bottle: indexes_api = SubApp() @indexes_api.get("/last-modified", apply=[jsoninator]) def all_indexes_last_modified() -> IndexesLastModifiedResponse: """ Returns the max of last_modified time of all indexes. Read as: when was the latest index rebuilt. """ return IndexesLastModifiedResponse( S3BackedInstrumentedIndexMixin.get_latest_last_modified( bucket_name=indexes_bucket_name)) @indexes_api.post("/rebuild-all") def rebuild_all_indexes(): """ Well, it rebuilds all your indexes. Async operation. Just triggers, does not wait. """ response = _get_lambda_client().invoke( FunctionName=indexer_function_name, InvocationType="Event", ) return indexes_api
def get_actions_api(hma_config_table: str) -> bottle.Bottle: # The documentation below expects prefix to be '/actions/' actions_api = SubApp() HMAConfig.initialize(hma_config_table) @actions_api.get("/", apply=[jsoninator]) def fetch_all_actions() -> FetchAllActionsResponse: """ Return all action configs. """ action_configs = ActionPerformer.get_all() return FetchAllActionsResponse( actions_response=[config.__dict__ for config in action_configs]) @actions_api.put( "/<old_name>/<old_config_sub_stype>", apply=[jsoninator(CreateUpdateActionRequest)], ) def update_action(request: CreateUpdateActionRequest, old_name: str, old_config_sub_stype: str) -> UpdateActionResponse: """ Update the action, name, url, and headers for action with name=<old_name> and subtype=<old_config_sub_stype>. """ if old_name != request.name or old_config_sub_stype != request.config_subtype: # The name field can't be updated because it is the primary key # The config sub type can't be updated because it is the config class level param delete_action(old_name) create_action(request) else: config = ActionPerformer._get_subtypes_by_name()[ request.config_subtype].getx(request.name) for key, value in request.fields.items(): setattr(config, key, value) hmaconfig.update_config(config) return UpdateActionResponse(response="The action config is updated.") @actions_api.post("/", apply=[jsoninator(CreateUpdateActionRequest)]) def create_action( request: CreateUpdateActionRequest) -> CreateActionResponse: """ Create an action. """ config = ActionPerformer._get_subtypes_by_name()[ request.config_subtype](**{ "name": request.name, **request.fields }) hmaconfig.create_config(config) return CreateActionResponse(response="The action config is created.") @actions_api.delete("/<name>", apply=[jsoninator]) def delete_action(name: str) -> DeleteActionResponse: """ Delete the action with name=<name>. """ hmaconfig.delete_config_by_type_and_name("ActionPerformer", name) return DeleteActionResponse(response="The action config is deleted.") return actions_api
def get_lcc_api(signal_type_mapping: HMASignalTypeMapping, storage_path: str) -> bottle.Bottle: """ Some APIs to provide live content clustering cabilities. This will index all recently seen content and provide a way to query for content_ids matching a given hash. """ lcc_api = SubApp() @lcc_api.get("/recently-seen/", apply=[jsoninator]) def recently_seen_in_lcc() -> RecentlySeenLCCResponse: """ Given a signal_type and a hash, have we seen something like this recently? Uses default precision knobs. TODO: Make thresholds configurable. """ signal_type = signal_type_mapping.get_signal_type_enforce( bottle.request.query.signal_type) hash_value = bottle.request.query.hash index = get_index(storage_path, signal_type.get_name()) match_array = index.query(hash_value) found_match = bool(len(match_array)) if not found_match: return RecentlySeenLCCResponse(False, None, None) # Use the first result as the content_id, TODO: convert content_Id to # preview_url, also add content_type content_id = match_array[0].metadata preview_url = content_id return RecentlySeenLCCResponse(found_match, content_id, preview_url) return lcc_api
def get_content_api(dynamodb_table: Table, image_bucket: str, image_prefix: str) -> bottle.Bottle: """ A Closure that includes all dependencies that MUST be provided by the root API that this API plugs into. Declare dependencies here, but initialize in the root API alone. """ def get_preview_url(content_id, content_object) -> str: """ Given a content_id and a content_object, returns a URL you can use to preview it. """ content_object = t.cast(ContentObject, content_object) preview_url = "" if content_object.content_ref_type == ContentRefType.DEFAULT_S3_BUCKET: source = S3BucketContentSource(image_bucket, image_prefix) preview_url = create_presigned_url(image_bucket, source.get_s3_key(content_id), None, 3600, "get_object") elif content_object.content_ref_type == ContentRefType.URL: preview_url = content_object.content_ref if not preview_url: return bottle.abort(400, "preview_url not found.") return preview_url # A prefix to all routes must be provided by the api_root app # The documentation below expects prefix to be '/content/' content_api = SubApp() @content_api.get("/", apply=[jsoninator]) def content() -> t.Optional[ContentObject]: """ Return content object for given ID. """ content_id = bottle.request.query.content_id or None if content_id: return ContentObject.get_from_content_id(dynamodb_table, content_id) return None @content_api.get("/pipeline-progress/", apply=[jsoninator]) def pipeline_progress() -> ContentPipelineProgress: """ WARNING: UNOPTIMIZED. DO NOT CALL FROM AUTOMATED SYSTEMS. Build a history of the stages that this piece of content has gone through and what their results were. Do not call this from anything but a UI. This is not optimized for performance. """ content_id = bottle.request.query.content_id or None if not content_id: return bottle.abort(400, "content_id must be provided.") content_id = t.cast(str, content_id) content_object = ContentObject.get_from_content_id( dynamodb_table, content_id) if not content_object: return bottle.abort(400, f"Content with id '{content_id}' not found.") content_object = t.cast(ContentObject, content_object) preview_url = get_preview_url(content_id, content_object) # The result object will be gradually built up as records are retrieved. result = ContentPipelineProgress( content_id=content_id, content_type=content_object.content_type, content_preview_url=preview_url, submitted_at=content_object.updated_at, submission_additional_fields=list( content_object.additional_fields), ) hash_records = PipelineHashRecord.get_from_content_id( dynamodb_table, content_id) if len(hash_records) != 0: result.hashed_at = max(hash_records, key=lambda r: r.updated_at).updated_at for hash_record in hash_records: # Assume that each signal type has a single hash if hash_record.signal_type.get_name() in result.hash_results: return bottle.abort( 500, f"Content with id '{content_id}' has multiple hash records for signal-type: '{hash_record.signal_type.get_name()}'.", ) result.hash_results[hash_record.signal_type.get_name( )] = hash_record.content_hash match_records = MatchRecord.get_from_content_id( dynamodb_table, content_id) if len(match_records) != 0: result.matched_at = max(match_records, key=lambda r: r.updated_at).updated_at # TODO #751 Until we resolve type agnostic storage of signal data, # we can't populate match details. # actually populate result.match_results. # TODO: ActionEvaluation does not yet leave a trail. Either record # action evaluation or remove the evaluation stage from the # pipeline-progress indicator. action_records = ActionEvent.get_from_content_id( dynamodb_table, content_id) if len(action_records) != 0: result.action_performed_at = max( action_records, key=lambda r: r.performed_at).performed_at result.action_perform_results = [ r.action_label for r in action_records ] return result @content_api.get("/action-history/", apply=[jsoninator]) def action_history() -> ActionHistoryResponse: """ Return list of action event records for a given ID. """ if content_id := bottle.request.query.content_id or None: return ActionHistoryResponse( ActionEvent.get_from_content_id(dynamodb_table, f"{content_id}")) return ActionHistoryResponse()
def get_stats_api(counts_table: Table) -> bottle.Bottle: """ Closure for all dependencies for the stats APIs. """ # A prefix to all routes must be provided by the api_root app # The documentation below expects prefix to be '/stats/' stats_api = SubApp() stat_name_to_metric = { "hashes": metrics.names.pdq_hasher_lambda.hash, "matches": metrics.names.pdq_matcher_lambda.write_match_record, } @stats_api.get("/", apply=[jsoninator]) def default_stats() -> StatResponse: """ If measure performance tfvar/os.env is true, it returns stats, else, returns 404. A 404 should be surfaced by clients with instructions on how to enable metrics tracking. The graph_data always contains the start_time and end_time timestamps with 0 values to make graphing easier. """ if not is_publishing_metrics(): return bottle.abort( 404, "This HMA instance is not publishing metrics.") if (not bottle.request.query.stat_name or bottle.request.query.stat_name not in stat_name_to_metric): return bottle.abort( 400, f"Must specifiy stat_name in query parameters. Must be one of {stat_name_to_metric.keys()}", ) metric = stat_name_to_metric[bottle.request.query.stat_name] time_span_arg = bottle.request.query.time_span metric_time_period = { "24h": metrics_query.MetricTimePeriod.HOURS_24, "1h": metrics_query.MetricTimePeriod.HOURS_1, "7d": metrics_query.MetricTimePeriod.DAYS_7, }.get(time_span_arg, metrics_query.MetricTimePeriod.HOURS_24) count_with_graphs = metrics_query.get_count_with_graph( [metric], metric_time_period, ) return StatResponse( StatsCard( count_with_graphs[metric].count, metric_time_period, count_with_graphs[metric].graph_data, )) @stats_api.get("/counts/", apply=[jsoninator]) def aggregate_counts() -> AggregateCountResponse: """ return the set of aggregate_counts """ if not is_publishing_metrics(): return bottle.abort( 404, "This HMA instance is not publishing metrics.") PIPELINE_COUNTS_TO_SURFACE = [ AggregateCount.PipelineNames.submits, AggregateCount.PipelineNames.hashes, AggregateCount.PipelineNames.matches, ] return AggregateCountResponse({ count_name: int(AggregateCount(count_name).get_value(counts_table)) for count_name in PIPELINE_COUNTS_TO_SURFACE }) return stats_api
def get_action_rules_api(hma_config_table: str) -> bottle.Bottle: # The endpoints below imply a prefix of '/action-rules' action_rules_api = SubApp() HMAConfig.initialize(hma_config_table) @action_rules_api.get("/", apply=[jsoninator]) def get_action_rules() -> ActionRulesResponse: """ Return all action rules. """ error_message = "" action_rules = [] try: action_rules = ActionRule.get_all() logger.info("action_rules: %s", action_rules) except Exception as e: error_message = "Unexpected error." handle_unexpected_error(e) return ActionRulesResponse(error_message, action_rules) @action_rules_api.post("/", apply=[jsoninator(ActionRulesRequest)]) def create_action_rule( request: ActionRulesRequest, ) -> ActionRulesResponse: """ Create an action rule. """ logger.info("request: %s", request) error_message = "" try: hmaconfig.create_config(request.action_rule) except ClientError as e: # TODO this test for "already exists" should be moved to a common place if e.response["Error"][ "Code"] == "ConditionalCheckFailedException": error_message = f"An action rule with the name '{request.action_rule.name}' already exists." logger.warning( "Duplicate action rule creation attempted: %s", e.response["Error"]["Message"], ) else: error_message = "Unexpected error." logger.error("Unexpected client error: %s", e.response["Error"]["Message"]) logger.exception(e) response.status = 500 except Exception as e: error_message = "Unexpected error." handle_unexpected_error(e) return ActionRulesResponse(error_message) @action_rules_api.put("/<old_name>", apply=[jsoninator(ActionRulesRequest)]) def update_action_rule( request: ActionRulesRequest, old_name: str, ) -> ActionRulesResponse: """ Update the action rule with name=<oldname>. """ logger.info("old_name: %s", old_name) logger.info("request: %s", request) error_message = "" if ActionRule.exists(request.action_rule.name): try: hmaconfig.update_config(request.action_rule) except Exception as e: error_message = "Unexpected error." handle_unexpected_error(e) elif ActionRule.exists(old_name): try: hmaconfig.create_config(request.action_rule) hmaconfig.delete_config_by_type_and_name( "ActionRule", old_name) except Exception as e: error_message = "Unexpected error." handle_unexpected_error(e) else: error_message = f"An action rule named '{request.action_rule.name}' or '{old_name}' does not exist." logger.warning( "An attempt was made to update an action rule named either '%s' or '%s' but neither exist.", request.action_rule.name, old_name, ) response.status = 500 return ActionRulesResponse(error_message) @action_rules_api.delete("/<name>", apply=[jsoninator]) def delete_action_rule(name: str) -> ActionRulesResponse: """ Delete the action rule with name=<name>. """ logger.info("name: %s", name) error_message = "" if ActionRule.exists(name): try: hmaconfig.delete_config_by_type_and_name("ActionRule", name) except Exception as e: error_message = "Unexpected error." handle_unexpected_error(e) else: error_message = f"An action rule named '{name}' does not exist." logger.warning( "An attempt was made to delete an action rule named '%s' that does not exist.", name, ) response.status = 500 return ActionRulesResponse(error_message) return action_rules_api
def get_bank_api(bank_table: Table, bank_user_media_bucket: str, submissions_queue_url: str) -> bottle.Bottle: """ Closure for dependencies of the bank API """ bank_api = SubApp() table_manager = BanksTable(table=bank_table) # Bank Management @bank_api.get("/get-all-banks", apply=[jsoninator]) def get_all_banks() -> AllBanksEnvelope: """ Get all banks. """ return AllBanksEnvelope(banks=table_manager.get_all_banks()) @bank_api.get("/get-bank/<bank_id>", apply=[jsoninator]) def get_bank(bank_id=None) -> Bank: """ Get a specific bank from a bank_id. """ bank = table_manager.get_bank(bank_id=bank_id) return bank @bank_api.post("/create-bank", apply=[jsoninator]) def create_bank() -> Bank: """ Create a bank using only the name and description. """ return table_manager.create_bank( bank_name=bottle.request.json["bank_name"], bank_description=bottle.request.json["bank_description"], ) @bank_api.post("/update-bank/<bank_id>", apply=[jsoninator]) def update_bank(bank_id=None) -> Bank: """ Update name and description for a bank_id. """ return table_manager.update_bank( bank_id=bank_id, bank_name=bottle.request.json["bank_name"], bank_description=bottle.request.json["bank_description"], ) # Member Management @bank_api.get("/get-members/<bank_id>", apply=[jsoninator]) def get_members(bank_id=None) -> BankMembersPage: """ Get a page of bank members. Use the "continuation_token" from this response to get subsequent pages. """ continuation_token = ( bottle.request.query.continuation_token and json.loads(bottle.request.query.continuation_token) or None) try: content_type = get_content_type_for_name( bottle.request.query.content_type) except: bottle.abort( 400, "content_type must be provided as a query parameter.") db_response = table_manager.get_all_bank_members_page( bank_id=bank_id, content_type=content_type, exclusive_start_key=continuation_token, ) continuation_token = None if db_response.last_evaluated_key: continuation_token = uriencode( json.dumps(db_response.last_evaluated_key)) return BankMembersPage( bank_members=with_preview_urls(db_response.items), continuation_token=continuation_token, ) @bank_api.post("/add-member/<bank_id>", apply=[jsoninator]) def add_member(bank_id=None) -> PreviewableBankMember: """ Add a bank member. Expects a JSON object with following fields: - content_type: ["photo"|"video"] - storage_bucket: s3bucket for the media - storage_key: key for the media on s3 - notes: String, any additional notes you want to associate with this member. Clients would want to use get_media_upload_url() to get a storage_bucket, storage_key and a upload_url before using add_member() Returns 200 OK with the resulting bank_member. 500 on failure. """ content_type = get_content_type_for_name( bottle.request.json["content_type"]) storage_bucket = bottle.request.json["storage_bucket"] storage_key = bottle.request.json["storage_key"] notes = bottle.request.json["notes"] return with_preview_url( bank_ops.add_bank_member( banks_table=table_manager, sqs_client=_get_sqs_client(), submissions_queue_url=submissions_queue_url, bank_id=bank_id, content_type=content_type, storage_bucket=storage_bucket, storage_key=storage_key, raw_content=None, notes=notes, )) @bank_api.post("/add-detached-member-signal/<bank_id>", apply=[jsoninator]) def add_detached_bank_member_signal(bank_id=None) -> BankMemberSignal: """ Add a virtual bank_member (without any associated media) and a corresponding signal. Requires JSON object with following fields: - signal_type: ["pdq"|"pdq_ocr","photo_md5"] -> anything from threatexchange.content_type.meta.get_signal_types_by_name()'s keys - content_type: ["photo"|"video"] to get the content_type for the virtual member. - signal_value: the hash to store against this signal. Will automatically de-dupe against existing signals. """ content_type = get_content_type_for_name( bottle.request.json["content_type"]) signal_type = get_signal_types_by_name()[ bottle.request.json["signal_type"]] signal_value = bottle.request.json["signal_value"] return bank_ops.add_detached_bank_member_signal( banks_table=table_manager, bank_id=bank_id, content_type=content_type, signal_type=signal_type, signal_value=signal_value, ) # Miscellaneous @bank_api.post("/get-media-upload-url") def get_media_upload_url(media_type=None): """ Get a presigned S3 url that can be used by the client to PUT an object. Request Payload must be json with the following attributes: `media_type` must be something like ['image/gif', 'image/png', 'application/zip'] `extension` must be a period followed by file extension. eg. `.mp4`, `.jpg` """ extension = bottle.request.json.get("extension") media_type = bottle.request.json.get("media_type") if (not extension) or extension[0] != ".": bottle.abort(400, "extension must start with a period. eg. '.mp4'") id = str(uuid.uuid4()) today_fragment = datetime.now().isoformat("|").split("|")[ 0] # eg. 2019-09-12 s3_key = f"bank-media/{media_type}/{today_fragment}/{id}{extension}" return { "storage_bucket": bank_user_media_bucket, "storage_key": s3_key, "upload_url": create_presigned_put_url( bucket_name=bank_user_media_bucket, key=s3_key, file_type=media_type, expiration=3600, ), } @bank_api.get("/get-member/<bank_member_id>", apply=[jsoninator]) def get_member(bank_member_id=None) -> PreviewableBankMemberWithSignals: """ Get a bank member with signals... """ member = table_manager.get_bank_member(bank_member_id=bank_member_id) signals = table_manager.get_signals_for_bank_member( bank_member_id=bank_member_id) return PreviewableBankMemberWithSignals(**asdict( with_preview_url(member)), signals=signals) @bank_api.post("/remove-bank-member/<bank_member_id>") def remove_bank_member(bank_member_id: str): """ Remove bank member signals from the processing index and mark bank_member as is_removed=True. Returns empty json object. """ bank_ops.remove_bank_member( banks_table=table_manager, bank_member_id=bank_member_id, ) return {} return bank_api
def get_matches_api( datastore_table: Table, hma_config_table: str, indexes_bucket_name: str, writeback_queue_url: str, bank_table: Table, signal_type_mapping: HMASignalTypeMapping, ) -> bottle.Bottle: """ A Closure that includes all dependencies that MUST be provided by the root API that this API plugs into. Declare dependencies here, but initialize in the root API alone. """ # A prefix to all routes must be provided by the api_root app # The documentation below expects prefix to be '/matches/' matches_api = SubApp() HMAConfig.initialize(hma_config_table) banks_table = BanksTable(table=bank_table, signal_type_mapping=signal_type_mapping) @matches_api.get("/", apply=[jsoninator]) def matches() -> MatchSummariesResponse: """ Return all, or a filtered list of matches based on query params. """ signal_q = bottle.request.query.signal_q or None # type: ignore # ToDo refactor to use `jsoninator(<requestObj>, from_query=True)`` signal_source = bottle.request.query.signal_source or None # type: ignore # ToDo refactor to use `jsoninator(<requestObj>, from_query=True)`` content_q = bottle.request.query.content_q or None # type: ignore # ToDo refactor to use `jsoninator(<requestObj>, from_query=True)`` if content_q: records = MatchRecord.get_from_content_id(datastore_table, content_q, signal_type_mapping) elif signal_q: records = MatchRecord.get_from_signal(datastore_table, signal_q, signal_source or "", signal_type_mapping) else: # TODO: Support pagination after implementing in UI. records = MatchRecord.get_recent_items_page( datastore_table, signal_type_mapping).items return MatchSummariesResponse(match_summaries=[ MatchSummary( content_id=record.content_id, signal_id=record.signal_id, signal_source=record.signal_source, updated_at=record.updated_at.isoformat(), ) for record in records ]) @matches_api.get("/match/", apply=[jsoninator]) def match_details() -> MatchDetailsResponse: """ Return the match details for a given content id. """ results = [] if content_id := bottle.request.query.content_id or None: # type: ignore # ToDo refactor to use `jsoninator(<requestObj>, from_query=True)`` results = get_match_details( datastore_table=datastore_table, banks_table=banks_table, content_id=content_id, signal_type_mapping=signal_type_mapping, ) return MatchDetailsResponse(match_details=results)
def get_submit_api( dynamodb_table: Table, image_bucket: str, image_prefix: str, submissions_queue_url: str, hash_queue_url: str, ) -> bottle.Bottle: """ A Closure that includes all dependencies that MUST be provided by the root API that this API plugs into. Declare dependencies here, but initialize in the root API alone. """ # A prefix to all routes must be provided by the api_root app # The documentation below expects prefix to be '/submit/' submit_api = SubApp() s3_bucket_image_source = S3BucketContentSource(image_bucket, image_prefix) def _content_exist_error(content_id: str): return bottle.abort( 400, f"Content with id '{content_id}' already exists if you want to resubmit `force_resubmit=True` must be included in payload.", ) def _record_content_submission_from_request( request: SubmitRequestBodyBase, ) -> bool: """ Given a request object submission record the content object to the table passed to the API using 'record_content_submission' Note: this method does not store the content media itself. """ content_ref, content_ref_type = request.get_content_ref_details() return record_content_submission( dynamodb_table, content_id=request.content_id, content_type=request.content_type, content_ref=content_ref, content_ref_type=content_ref_type, additional_fields=set(request.additional_fields) if request.additional_fields else set(), force_resubmit=request.force_resubmit, ) @submit_api.post("/s3/", apply=[jsoninator(SubmitContents3ObjectRequestBody)]) def submit_s3( request: SubmitContents3ObjectRequestBody, ) -> t.Union[SubmitResponse, SubmitError]: """ Submission of a s3 object of a piece of content. """ submit_content_request_from_s3_object( dynamodb_table, submissions_queue_url=submissions_queue_url, bucket=request.bucket_name, key=request.object_key, content_id=request.content_id, content_type=request.content_type, additional_fields=set(request.additional_fields) if request.additional_fields else set(), force_resubmit=request.force_resubmit, ) return SubmitResponse(content_id=request.content_id, submit_successful=True) @submit_api.post("/url/", apply=[jsoninator(SubmitContentViaURLRequestBody)]) def submit_url( request: SubmitContentViaURLRequestBody, ) -> t.Union[SubmitResponse, SubmitError]: """ Submission via a url to content. This does not store a copy of the content in s3 """ if not _record_content_submission_from_request(request): return _content_exist_error(request.content_id) send_submission_to_url_queue( dynamodb_table, submissions_queue_url, request.content_id, request.content_type, request.content_url, ) return SubmitResponse(content_id=request.content_id, submit_successful=True) @submit_api.post("/bytes/", apply=[jsoninator(SubmitContentBytesRequestBody)]) def submit_bytes( request: SubmitContentBytesRequestBody, ) -> t.Union[SubmitResponse, SubmitError]: """ Submit of media to HMA via a direct transfer of bytes to the system's s3 bucket. """ content_id = request.content_id file_contents = base64.b64decode(request.content_bytes) # We want to record the submission before triggering and processing on # the content itself therefore we write to dynamodb before s3 if not _record_content_submission_from_request(request): return _content_exist_error(request.content_id) s3_bucket_image_source.put_image_bytes(content_id, file_contents) return SubmitResponse(content_id=request.content_id, submit_successful=True) @submit_api.post( "/put-url/", apply=[jsoninator(SubmitContentViaPutURLUploadRequestBody)]) def submit_put_url( request: SubmitContentViaPutURLUploadRequestBody, ) -> t.Union[SubmitViaUploadUrlResponse, SubmitError]: """ Submission of content to HMA in two steps 1st the creation to a content record and put url based on request body 2nd Upload to the system's s3 bucket by said put url returned by this method """ presigned_url = create_presigned_put_url( bucket_name=image_bucket, key=s3_bucket_image_source.get_s3_key(request.content_id), file_type=request.file_type, ) if presigned_url: if not _record_content_submission_from_request(request): return _content_exist_error(request.content_id) return SubmitViaUploadUrlResponse( content_id=request.content_id, file_type=str(request.file_type), presigned_url=presigned_url, ) bottle.response.status = 400 return SubmitError( content_id=request.content_id, message="Failed to generate upload url", ) @submit_api.post("/hash/", apply=[jsoninator(SubmitContentHashRequestBody)]) def submit_hash( request: SubmitContentHashRequestBody, ) -> t.Union[SubmitResponse, SubmitError]: """ Submission of a hash from a piece of content. Functions the same as other submission endpoint but skips the hasher and media storage. """ # Record content object (even though we don't store anything just like with url) if not _record_content_submission_from_request(request): return _content_exist_error(request.content_id) # Record hash # ToDo expand submit hash API to include `signal_specific_attributes` hash_record = PipelineHashRecord( content_id=request.content_id, signal_type=t.cast(t.Type[SignalType], request.signal_type), content_hash=request.signal_value, updated_at=datetime.datetime.now(), ) hash_record.write_to_table(dynamodb_table) # Send hash directly to matcher # todo this could maybe try and reuse the methods in UnifiedHasher in #749 _get_sqs_client().send_message( QueueUrl=hash_queue_url, MessageBody=json.dumps(hash_record.to_sqs_message()), ) return SubmitResponse(content_id=request.content_id, submit_successful=True) return submit_api
def get_datasets_api( hma_config_table: str, datastore_table: Table, threat_exchange_data_bucket_name: str, threat_exchange_data_folder: str, ) -> bottle.Bottle: """ ToDo / FixMe: this file is probably more about privacy groups than datasets... """ # The documentation below expects prefix to be '/datasets/' datasets_api = SubApp() HMAConfig.initialize(hma_config_table) @datasets_api.get("/", apply=[jsoninator]) def get_all_dataset_summaries() -> DatasetSummariesResponse: """ Returns summaries for all datasets. Summary includes all facts that are not configurable. Eg. its name, the number of hashes it has, the number of matches it has caused, etc. """ return DatasetSummariesResponse( threat_exchange_datasets=_get_threat_exchange_datasets( datastore_table, threat_exchange_data_bucket_name, threat_exchange_data_folder, )) @datasets_api.post("/update", apply=[jsoninator(UpdateDatasetRequest)]) def update_dataset(request: UpdateDatasetRequest) -> Dataset: """ Update dataset values: fetcher_active, write_back, and matcher_active. """ config = ThreatExchangeConfig.getx(str(request.privacy_group_id)) config.fetcher_active = request.fetcher_active config.write_back = request.write_back config.matcher_active = request.matcher_active updated_config = hmaconfig.update_config(config).__dict__ updated_config["privacy_group_id"] = updated_config["name"] additional_config = AdditionalMatchSettingsConfig.get( str(request.privacy_group_id)) if request.pdq_match_threshold: if additional_config: additional_config.pdq_match_threshold = int( request.pdq_match_threshold) hmaconfig.update_config(additional_config) else: additional_config = AdditionalMatchSettingsConfig( str(request.privacy_group_id), int(request.pdq_match_threshold)) hmaconfig.create_config(additional_config) elif additional_config: # pdq_match_threshold was set and now should be removed hmaconfig.delete_config(additional_config) return Dataset.from_dict(updated_config) @datasets_api.post("/create", apply=[jsoninator(CreateDatasetRequest)]) def create_dataset(request: CreateDatasetRequest) -> CreateDatasetResponse: """ Create a local dataset (defaults defined in CreateDatasetRequest) """ assert isinstance(request, CreateDatasetRequest) create_privacy_group_if_not_exists( privacy_group_id=str(request.privacy_group_id), privacy_group_name=request.privacy_group_name, description=request.description, in_use=True, fetcher_active=request.fetcher_active, matcher_active=request.matcher_active, write_back=request.write_back, ) return CreateDatasetResponse( response=f"Created dataset {request.privacy_group_id}") @datasets_api.post("/sync", apply=[jsoninator]) def sync_datasets() -> SyncDatasetResponse: """ Fetch new collaborations from ThreatExchange and sync with the configs stored in DynamoDB. """ sync_privacy_groups() return SyncDatasetResponse(response="Privacy groups are up to date") @datasets_api.post("/delete/<key>", apply=[jsoninator]) def delete_dataset(key=None) -> DeleteDatasetResponse: """ Delete the dataset with key=<key> """ config = ThreatExchangeConfig.getx(str(key)) hmaconfig.delete_config(config) return DeleteDatasetResponse(response="The privacy group is deleted") @datasets_api.get("/match-settings", apply=[jsoninator]) def get_all_match_settings() -> MatchSettingsResponse: """ Return all match settings configs """ return MatchSettingsResponse(match_settings=[ MatchSettingsResponseBody(c) for c in AdditionalMatchSettingsConfig.get_all() ]) @datasets_api.get("/match-settings/<key>", apply=[jsoninator]) def get_match_settings(key=None, ) -> MatchSettingsResponseBody: """ Return a match settings config for a given privacy_group_id """ if config := AdditionalMatchSettingsConfig.get(str(key)): return MatchSettingsResponseBody(config) return bottle.abort(400, f"No match_settings for pg_id {key} found")