def _create_200_members(self) -> str: """Create a bank, 200 members and return bank_id.""" table_manager = BanksTable(self.get_table(), get_default_signal_type_mapping()) bank = table_manager.create_bank("TEST_BANK", "TEST BANK Description") for _ in range(200): table_manager.add_bank_member( bank_id=bank.bank_id, content_type=PhotoContent, raw_content=None, storage_bucket="hma-test-media", storage_key="videos/breaking-news.mp4", notes="", ) return bank.bank_id
def _create_bank_and_bank_member(self) -> t.Tuple[str, str]: table_manager = BanksTable(self.get_table()) bank = table_manager.create_bank("TEST_BANK", "Test bank description") bank_member = table_manager.add_bank_member( bank_id=bank.bank_id, content_type=VideoContent, raw_content=None, storage_bucket="hma-test-media", storage_key="irrrelevant", notes="", ) return (bank.bank_id, bank_member.bank_member_id)
def add_bank_member( banks_table: BanksTable, sqs_client: SQSClient, submissions_queue_url: str, bank_id: str, content_type: t.Type[ContentType], storage_bucket: t.Optional[str], storage_key: t.Optional[str], raw_content: t.Optional[str], notes: str, bank_member_tags: t.Set[str], ) -> BankMember: """ Write bank-member to database. Send a message to hashing lambda to extract signals. """ member = banks_table.add_bank_member( bank_id=bank_id, content_type=content_type, storage_bucket=storage_bucket, storage_key=storage_key, raw_content=raw_content, notes=notes, bank_member_tags=bank_member_tags, ) submission_message = BankSubmissionMessage( content_type=content_type, url=create_presigned_url(storage_bucket, storage_key, None, 3600, "get_object"), bank_id=bank_id, bank_member_id=member.bank_member_id, ) sqs_client.send_message( QueueUrl=submissions_queue_url, MessageBody=json.dumps(submission_message.to_sqs_message()), ) return member
class MatchFiltersTestCase(BanksTableTestBase, unittest.TestCase): # NOTE: Table is defined in base class BanksTableTestBase def _create_banks(self): self.table_manager = BanksTable( self.get_table(), get_default_signal_type_mapping() ) self.active_bank = self.table_manager.create_bank("TEST_BANK", "Is Active") self.active_bank_member = self.table_manager.add_bank_member( bank_id=self.active_bank.bank_id, content_type=PhotoContent, raw_content=None, storage_bucket=None, storage_key=None, notes=None, ) self.table_manager.update_bank( bank_id=self.active_bank.bank_id, bank_name=self.active_bank.bank_name, bank_description=self.active_bank.bank_description, is_active=True, ) self.inactive_bank = self.table_manager.create_bank( "TEST_BANK_2", "Is Inactive" ) self.table_manager.update_bank( bank_id=self.inactive_bank.bank_id, bank_name=self.inactive_bank.bank_name, bank_description=self.inactive_bank.bank_description, is_active=False, ) self.inactive_bank_member = self.table_manager.add_bank_member( bank_id=self.inactive_bank.bank_id, content_type=PhotoContent, raw_content=None, storage_bucket=None, storage_key=None, notes=None, ) def _create_privacy_groups(self): # Since we already have a mock_dynamodb2 courtesy BanksTableTestBase, # re-use it for initing configs. Requires some clever hot-wiring. config_test_mock = config_test.ConfigTest() config_test_mock.mock_dynamodb2 = self.__class__.mock_dynamodb2 config_test_mock.create_mocked_table() HMAConfig.initialize(config_test_mock.TABLE_NAME) # Hot wiring ends... self.active_pg = ThreatExchangeConfig( "ACTIVE_PG", True, "", True, True, True, "ACTIVE_PG" ) create_config(self.active_pg) # Active PG has a distance threshold of 31. create_config(AdditionalMatchSettingsConfig("ACTIVE_PG", 31)) self.inactive_pg = ThreatExchangeConfig( "INACTIVE_PG", True, "", True, True, False, "INACTIVE_PG" ) create_config(self.inactive_pg) def _init_data_if_required(self): self._create_banks() self._create_privacy_groups() def _active_pg_match(self): return IndexMatch( 0, [ ThreatExchangeIndicatorIndexMetadata( "indicator_id", "hash_value", self.active_pg.privacy_group_id, ) ], ) def _inactive_pg_match(self): return IndexMatch( 0, [ ThreatExchangeIndicatorIndexMetadata( "indicator_id", "hash_value", self.inactive_pg.privacy_group_id, ) ], ) def _active_bank_match(self): return IndexMatch( 0, [ BankedSignalIndexMetadata( "signal", "signal_value", self.active_bank_member.bank_member_id ) ], ) def _inactive_bank_match(self): return IndexMatch( 0, [ BankedSignalIndexMetadata( "signal", "signal_value", self.inactive_bank_member.bank_member_id ) ], ) def test_matcher_filters_out_inactive_pg(self): with self.fresh_dynamodb(): self._init_data_if_required() matcher = Matcher("", [PdqSignal, VideoMD5Signal], self.table_manager) filtered_matches = matcher.filter_match_results( [self._active_pg_match(), self._inactive_pg_match()], PdqSignal, ) self.assertEqual( len(filtered_matches), 1, "Failed to filter out inactive pg match" ) self.assertEqual( filtered_matches[0].metadata[0].privacy_group, self.active_pg.privacy_group_id, "The filtered privacy group id is wrong. It should be the active pg's id.", ) def test_matcher_filters_out_based_on_distance(self): with self.fresh_dynamodb(): self._init_data_if_required() match_1 = self._active_pg_match() match_2 = self._active_pg_match() match_2.distance = 100 matcher = Matcher("", [PdqSignal, VideoMD5Signal], self.table_manager) filtered_matches = matcher.filter_match_results( [match_1, match_2], PdqSignal ) self.assertEqual( len(filtered_matches), 1, "Failed to filter out match with distance > threshold", ) self.assertEqual( filtered_matches[0].distance, 0, "Filtered out the wrong match. Match with distance = 100 should be filtered out.", ) def test_matcher_filters_out_based_on_bank_active(self): with self.fresh_dynamodb(): self._init_data_if_required() matcher = Matcher("", [PdqSignal, VideoMD5Signal], self.table_manager) filtered_matches = matcher.filter_match_results( [self._active_bank_match(), self._inactive_bank_match()], PdqSignal, ) self.assertEqual( len(filtered_matches), 1, "Failed to filter out inactive bank's match" ) self.assertEqual( filtered_matches[0].metadata[0].bank_member_id, self.active_bank_member.bank_member_id, "The filtered bank_member id is wrong. It should be the active bank's bank_member's id.", )
def test_bank_member_removes_from_get_members_page(self): NUM_MEMBERS = 100 REMOVE_EVERY_XTH_MEMBER = 4 with self.fresh_dynamodb(): table_manager = BanksTable(self.get_table()) bank_id, bank_member_id = self._create_bank_and_bank_member() for i in range(NUM_MEMBERS): bank_member = table_manager.add_bank_member( bank_id=bank_id, content_type=VideoContent, raw_content=None, storage_bucket="hma-test-media", storage_key="irrrelevant", notes="", ) members = [] exclusive_start_key = None while True: page = table_manager.get_all_bank_members_page( bank_id=bank_id, content_type=VideoContent, exclusive_start_key=exclusive_start_key, ) members += page.items exclusive_start_key = page.last_evaluated_key if not page.has_next_page(): break self.assertEqual( len(members), 101, "All the pages together have as many members as we added.", ) count_members_removed = 0 for i, member in enumerate(members): if i // REMOVE_EVERY_XTH_MEMBER == 0: table_manager.remove_bank_member(member.bank_member_id) count_members_removed += 1 members = [] exclusive_start_key = None while True: page = table_manager.get_all_bank_members_page( bank_id=bank_id, content_type=VideoContent, exclusive_start_key=exclusive_start_key, ) members += page.items exclusive_start_key = page.last_evaluated_key if not page.has_next_page(): break self.assertEqual( len(members), 101 - count_members_removed, "All the pages together have as many members as we added minus the ones we removed.", )
import os from hmalib.common.models.models_base import DynamoDBItem from hmalib.common.models.bank import BanksTable, BankMember from threatexchange.content_type.video import VideoContent from mypy_boto3_dynamodb.service_resource import Table import boto3 dynamodb = boto3.resource("dynamodb") table_name = "" test_bank_name = "" num_members = 1000 # must add thes values assert table_name != "" assert test_bank_name != "" table = dynamodb.Table(table_name) table_manager = BanksTable(table) bank = table_manager.create_bank(test_bank_name, "test bank description") for _ in range(num_members): table_manager.add_bank_member( bank_id=bank.bank_id, content_type=VideoContent, raw_content=None, storage_bucket="hma-test-media", storage_key="videos/breaking-news.mp4", notes="", )