def test_pdq_signal_metadata_update_tags_in_table(self): """ Test PDQSignalMetadata write to table with update_tags_in_table_if_exists """ metadata = self.get_example_pdq_signal_metadata() # change id since table persists betweeen test new_signal_id = "123456789" metadata.signal_id = new_signal_id # first attempt at update should return false (doesn't exist) assert not metadata.update_tags_in_table_if_exists(self.table) metadata.write_to_table(self.table) query_metadata = ThreatExchangeSignalMetadata.get_from_signal( self.table, new_signal_id, get_default_signal_type_mapping())[0] assert metadata.signal_hash == query_metadata.signal_hash for tag in metadata.tags: assert tag in query_metadata.tags replaced_tags = ["new", "list", "of", "tags"] metadata.tags = replaced_tags # second attmept at update should succeed assert metadata.update_tags_in_table_if_exists(self.table) query_metadata = ThreatExchangeSignalMetadata.get_from_signal( self.table, new_signal_id, get_default_signal_type_mapping(), )[0] for tag in replaced_tags: assert tag in query_metadata.tags
def test_pdq_signal_metadata_update_pending_chage_in_table(self): """ Test PDQSignalMetadata write to table with update_pending_opinion_change_in_table_if_exists """ metadata = self.get_example_pdq_signal_metadata() # change id since table persists betweeen test new_signal_id = "987654321" metadata.signal_id = new_signal_id # first attempt at update should return false (doesn't exist) assert not metadata.update_pending_opinion_change_in_table_if_exists( self.table) metadata.write_to_table(self.table) query_metadata = ThreatExchangeSignalMetadata.get_from_signal( self.table, new_signal_id, get_default_signal_type_mapping(), )[0] assert metadata.signal_hash == query_metadata.signal_hash assert (PendingThreatExchangeOpinionChange.NONE.value == query_metadata.pending_opinion_change.value) metadata.pending_opinion_change = ( PendingThreatExchangeOpinionChange.MARK_TRUE_POSITIVE) # second attmept at update should succeed assert metadata.update_pending_opinion_change_in_table_if_exists( self.table) query_metadata = ThreatExchangeSignalMetadata.get_from_signal( self.table, new_signal_id, get_default_signal_type_mapping())[0] assert (PendingThreatExchangeOpinionChange.MARK_TRUE_POSITIVE.value == query_metadata.pending_opinion_change.value)
def test_order_of_signals_is_chronological(self): with self.fresh_dynamodb(): table_manager = BanksTable(self.get_table(), get_default_signal_type_mapping()) bank_id, bank_member_id = self._create_bank_and_bank_member() signals = [ table_manager.add_bank_member_signal( bank_id=bank_id, bank_member_id=bank_member_id, signal_type=VideoMD5Signal, signal_value="A VIDEO MD5 SIGNAL. WILTY?" + str(random.random()), ) for _ in range(20) ] signal_ids_in_order = list( map(lambda s: s.signal_id, sorted(signals, key=lambda x: x.updated_at))) to_process_signal_ids = [ signal.signal_id for signal in table_manager.get_bank_member_signals_to_process_page( signal_type=VideoMD5Signal).items ] self.assertListEqual(signal_ids_in_order, to_process_signal_ids)
def test_query_md5_hash_record(self): record = self.get_example_md5_hash_record() record.write_to_table(self.table) assert any([ record == item for item in models.PipelineHashRecord.get_from_content_id( self.table, TestPDQModels.TEST_CONTENT_ID, get_default_signal_type_mapping(), ) ])
def test_query_recent_hash_records(self): record = self.get_example_pdq_hash_record() record.write_to_table(self.table) query_record = models.PipelineHashRecord.get_recent_items_page( self.table, get_default_signal_type_mapping()).items[0] record.signal_specific_attributes = {} # While signal_specific_attributes are stored in the table, the index # does not store them. I do not think they need to either. assert record == query_record
def test_query_content_object(self): """ Test ContentObject write table with get_from_content_id query """ obj = self.get_example_content_object() obj.write_to_table(self.get_table()) query_obj = ContentObject.get_from_content_id( self.get_table(), TestContentModels.TEST_CONTENT_ID, get_default_signal_type_mapping(), ) assert obj == query_obj
def test_query_match_record_by_content_id(self): """ Test MatchRecord write table with get_from_content_key query """ record = self.get_example_pdq_match_record() record.write_to_table(self.table) query_record = models.MatchRecord.get_from_content_id( self.table, TestPDQModels.TEST_CONTENT_ID, get_default_signal_type_mapping())[0] assert record == query_record
def test_pdq_signal_metadata_by_signal(self): """ Test PDQSignalMetadata write table with get_from_signal """ metadata = self.get_example_pdq_signal_metadata() metadata.write_to_table(self.table) query_metadata = ThreatExchangeSignalMetadata.get_from_signal( self.table, TestPDQModels.TEST_SIGNAL_ID, get_default_signal_type_mapping())[0] assert metadata.signal_hash == query_metadata.signal_hash for tag in metadata.tags: assert tag in query_metadata.tags
def _create_bank_and_bank_member(self) -> t.Tuple[str, str]: table_manager = BanksTable( self.get_table(), signal_type_mapping=get_default_signal_type_mapping()) bank = table_manager.create_bank("TEST_BANK", "Test bank description") bank_member = table_manager.add_bank_member( bank_id=bank.bank_id, content_type=VideoContent, raw_content=None, storage_bucket="hma-test-media", storage_key="irrrelevant", notes="", ) return (bank.bank_id, bank_member.bank_member_id)
def test_key_indicator_type_mapping(self): for signal_type in KNOWN_SIGNAL_TYPES: store = ThreatUpdateS3Store( 1, 1, None, "does-not-matter", "does-not-matter", "does-not-matter", KNOWN_SIGNAL_TYPES, signal_type_mapping=get_default_signal_type_mapping(), ) assert signal_type == ThreatUpdateS3Store.get_signal_type_from_object_key( store.get_s3_object_key(signal_type.INDICATOR_TYPE) )
def test_query_match_recent_record(self): """ Test MatchRecord write table with get_from_content_key query by recency """ record = self.get_example_pdq_match_record() record.write_to_table(self.table) query_record = models.MatchRecord.get_recent_items_page( self.table, get_default_signal_type_mapping()).items[0] record.signal_specific_attributes = {} # While signal_specific_attributes are stored in the table, the index # does not store them. I do not think they need to either. assert record == query_record
def _create_200_members(self) -> str: """Create a bank, 200 members and return bank_id.""" table_manager = BanksTable(self.get_table(), get_default_signal_type_mapping()) bank = table_manager.create_bank("TEST_BANK", "TEST BANK Description") for _ in range(200): table_manager.add_bank_member( bank_id=bank.bank_id, content_type=PhotoContent, raw_content=None, storage_bucket="hma-test-media", storage_key="videos/breaking-news.mp4", notes="", ) return bank.bank_id
def test_pagination_produces_correct_number_of_pages(self): bank_id = self._create_200_members() api = TApp( get_bank_api( self.get_table(), "irrelevant_s3_bucket_for_this_test", "irrelevant_sqs_queue", get_default_signal_type_mapping(), ) ) running_count = 0 continuation_token = None unique_member_ids = set() while True: if continuation_token: response = json.loads( api.get( f"/get-members/{bank_id}?content_type=photo&continuation_token={continuation_token}" ).body ) else: response = json.loads( api.get(f"/get-members/{bank_id}?content_type=photo").body ) running_count += len(response["bank_members"]) continuation_token = response["continuation_token"] unique_member_ids.update( map(lambda member: member["bank_member_id"], response["bank_members"]) ) if continuation_token == None: # Last page should not have any continuation_token break # Checks for total number of items received. Should work with any page size. assert running_count == 200 # Checks that the number of unique member ids is equal to the expected # value (ie. no repeats) assert len(unique_member_ids) == 200
def test_single_signal_is_retrieved(self): with self.fresh_dynamodb(): table_manager = BanksTable(self.get_table(), get_default_signal_type_mapping()) bank_id, bank_member_id = self._create_bank_and_bank_member() bank_member_signal = table_manager.add_bank_member_signal( bank_id=bank_id, bank_member_id=bank_member_id, signal_type=VideoMD5Signal, signal_value="A VIDEO MD5 SIGNAL. WILTY?", ) # expect this to now be available to process to_process = table_manager.get_bank_member_signals_to_process_page( signal_type=VideoMD5Signal) self.assertEqual(len(to_process.items), 1) self.assertEqual(bank_member_signal.signal_id, to_process.items[0].signal_id)
def test_query_match_record_by_signal_id(self): """ Test MatchRecord write table with get_from_content_key query by signal """ record = self.get_example_pdq_match_record() record.signal_specific_attributes = {} # GSI-1: Signal ID index does not contain signal_specific attributes # yet. I'm not yet sure whether to include them. record.write_to_table(self.table) query_record = models.MatchRecord.get_from_signal( self.table, TestPDQModels.TEST_SIGNAL_ID, TestPDQModels.TEST_SIGNAL_SOURCE, get_default_signal_type_mapping(), )[0] assert record == query_record
def _create_banks(self): self.table_manager = BanksTable( self.get_table(), get_default_signal_type_mapping() ) self.active_bank = self.table_manager.create_bank("TEST_BANK", "Is Active") self.active_bank_member = self.table_manager.add_bank_member( bank_id=self.active_bank.bank_id, content_type=PhotoContent, raw_content=None, storage_bucket=None, storage_key=None, notes=None, ) self.table_manager.update_bank( bank_id=self.active_bank.bank_id, bank_name=self.active_bank.bank_name, bank_description=self.active_bank.bank_description, is_active=True, ) self.inactive_bank = self.table_manager.create_bank( "TEST_BANK_2", "Is Inactive" ) self.table_manager.update_bank( bank_id=self.inactive_bank.bank_id, bank_name=self.inactive_bank.bank_name, bank_description=self.inactive_bank.bank_description, is_active=False, ) self.inactive_bank_member = self.table_manager.add_bank_member( bank_id=self.inactive_bank.bank_id, content_type=PhotoContent, raw_content=None, storage_bucket=None, storage_key=None, notes=None, )
def test_order_of_signals_multi_page(self): with self.fresh_dynamodb(): table_manager = BanksTable(self.get_table(), get_default_signal_type_mapping()) bank_id, bank_member_id = self._create_bank_and_bank_member() signals = [ table_manager.add_bank_member_signal( bank_id=bank_id, bank_member_id=bank_member_id, signal_type=VideoMD5Signal, signal_value="A VIDEO TMK PDQF SIGNAL. WILTY?" + str(random.random()), ) for _ in range(20) ] signal_ids_in_order = list( map(lambda s: s.signal_id, sorted(signals, key=lambda x: x.updated_at))) queried_signal_ids = [] exclusive_start_key = None while True: response = table_manager.get_bank_member_signals_to_process_page( signal_type=VideoMD5Signal, limit=4, exclusive_start_key=exclusive_start_key, ) exclusive_start_key = response.last_evaluated_key queried_signal_ids += [ signal.signal_id for signal in response.items ] if not response.has_next_page(): break self.assertListEqual(signal_ids_in_order, queried_signal_ids)
def test_multiple_signals_are_retrieved(self): with self.fresh_dynamodb(): table_manager = BanksTable(self.get_table(), get_default_signal_type_mapping()) bank_id, bank_member_id = self._create_bank_and_bank_member() signal_ids = [ table_manager.add_bank_member_signal( bank_id=bank_id, bank_member_id=bank_member_id, signal_type=VideoMD5Signal, signal_value="A VIDEO MD5 SIGNAL. WILTY?" + str(random.random()), ).signal_id for _ in range(20) ] to_process_signal_ids = [ signal.signal_id for signal in table_manager.get_bank_member_signals_to_process_page( signal_type=VideoMD5Signal).items ] self.assertListEqual(signal_ids, to_process_signal_ids)