def test_pdq_signal_metadata_update_tags_in_table(self):
        """
        Test PDQSignalMetadata write to table with update_tags_in_table_if_exists
        """
        metadata = self.get_example_pdq_signal_metadata()

        # change id since table persists betweeen test
        new_signal_id = "123456789"
        metadata.signal_id = new_signal_id

        # first attempt at update should return false (doesn't exist)
        assert not metadata.update_tags_in_table_if_exists(self.table)

        metadata.write_to_table(self.table)

        query_metadata = ThreatExchangeSignalMetadata.get_from_signal(
            self.table, new_signal_id, get_default_signal_type_mapping())[0]
        assert metadata.signal_hash == query_metadata.signal_hash
        for tag in metadata.tags:
            assert tag in query_metadata.tags

        replaced_tags = ["new", "list", "of", "tags"]
        metadata.tags = replaced_tags

        # second attmept at update should succeed
        assert metadata.update_tags_in_table_if_exists(self.table)
        query_metadata = ThreatExchangeSignalMetadata.get_from_signal(
            self.table,
            new_signal_id,
            get_default_signal_type_mapping(),
        )[0]
        for tag in replaced_tags:
            assert tag in query_metadata.tags
    def test_pdq_signal_metadata_update_pending_chage_in_table(self):
        """
        Test PDQSignalMetadata write to table with update_pending_opinion_change_in_table_if_exists
        """
        metadata = self.get_example_pdq_signal_metadata()

        # change id since table persists betweeen test
        new_signal_id = "987654321"
        metadata.signal_id = new_signal_id

        # first attempt at update should return false (doesn't exist)
        assert not metadata.update_pending_opinion_change_in_table_if_exists(
            self.table)

        metadata.write_to_table(self.table)

        query_metadata = ThreatExchangeSignalMetadata.get_from_signal(
            self.table,
            new_signal_id,
            get_default_signal_type_mapping(),
        )[0]
        assert metadata.signal_hash == query_metadata.signal_hash
        assert (PendingThreatExchangeOpinionChange.NONE.value ==
                query_metadata.pending_opinion_change.value)

        metadata.pending_opinion_change = (
            PendingThreatExchangeOpinionChange.MARK_TRUE_POSITIVE)

        # second attmept at update should succeed
        assert metadata.update_pending_opinion_change_in_table_if_exists(
            self.table)
        query_metadata = ThreatExchangeSignalMetadata.get_from_signal(
            self.table, new_signal_id, get_default_signal_type_mapping())[0]
        assert (PendingThreatExchangeOpinionChange.MARK_TRUE_POSITIVE.value ==
                query_metadata.pending_opinion_change.value)
Ejemplo n.º 3
0
    def test_order_of_signals_is_chronological(self):
        with self.fresh_dynamodb():
            table_manager = BanksTable(self.get_table(),
                                       get_default_signal_type_mapping())
            bank_id, bank_member_id = self._create_bank_and_bank_member()

            signals = [
                table_manager.add_bank_member_signal(
                    bank_id=bank_id,
                    bank_member_id=bank_member_id,
                    signal_type=VideoMD5Signal,
                    signal_value="A VIDEO MD5 SIGNAL. WILTY?" +
                    str(random.random()),
                ) for _ in range(20)
            ]

            signal_ids_in_order = list(
                map(lambda s: s.signal_id,
                    sorted(signals, key=lambda x: x.updated_at)))

            to_process_signal_ids = [
                signal.signal_id for signal in
                table_manager.get_bank_member_signals_to_process_page(
                    signal_type=VideoMD5Signal).items
            ]

            self.assertListEqual(signal_ids_in_order, to_process_signal_ids)
    def test_query_md5_hash_record(self):
        record = self.get_example_md5_hash_record()
        record.write_to_table(self.table)

        assert any([
            record == item
            for item in models.PipelineHashRecord.get_from_content_id(
                self.table,
                TestPDQModels.TEST_CONTENT_ID,
                get_default_signal_type_mapping(),
            )
        ])
    def test_query_recent_hash_records(self):
        record = self.get_example_pdq_hash_record()

        record.write_to_table(self.table)

        query_record = models.PipelineHashRecord.get_recent_items_page(
            self.table, get_default_signal_type_mapping()).items[0]

        record.signal_specific_attributes = {}
        # While signal_specific_attributes are stored in the table, the index
        # does not store them. I do not think they need to either.

        assert record == query_record
    def test_query_content_object(self):
        """
        Test ContentObject write table with get_from_content_id query
        """
        obj = self.get_example_content_object()
        obj.write_to_table(self.get_table())

        query_obj = ContentObject.get_from_content_id(
            self.get_table(),
            TestContentModels.TEST_CONTENT_ID,
            get_default_signal_type_mapping(),
        )

        assert obj == query_obj
    def test_query_match_record_by_content_id(self):
        """
        Test MatchRecord write table with get_from_content_key query
        """

        record = self.get_example_pdq_match_record()

        record.write_to_table(self.table)

        query_record = models.MatchRecord.get_from_content_id(
            self.table, TestPDQModels.TEST_CONTENT_ID,
            get_default_signal_type_mapping())[0]

        assert record == query_record
    def test_pdq_signal_metadata_by_signal(self):
        """
        Test PDQSignalMetadata write table with get_from_signal
        """
        metadata = self.get_example_pdq_signal_metadata()

        metadata.write_to_table(self.table)

        query_metadata = ThreatExchangeSignalMetadata.get_from_signal(
            self.table, TestPDQModels.TEST_SIGNAL_ID,
            get_default_signal_type_mapping())[0]

        assert metadata.signal_hash == query_metadata.signal_hash
        for tag in metadata.tags:
            assert tag in query_metadata.tags
Ejemplo n.º 9
0
    def _create_bank_and_bank_member(self) -> t.Tuple[str, str]:
        table_manager = BanksTable(
            self.get_table(),
            signal_type_mapping=get_default_signal_type_mapping())

        bank = table_manager.create_bank("TEST_BANK", "Test bank description")
        bank_member = table_manager.add_bank_member(
            bank_id=bank.bank_id,
            content_type=VideoContent,
            raw_content=None,
            storage_bucket="hma-test-media",
            storage_key="irrrelevant",
            notes="",
        )

        return (bank.bank_id, bank_member.bank_member_id)
Ejemplo n.º 10
0
    def test_key_indicator_type_mapping(self):
        for signal_type in KNOWN_SIGNAL_TYPES:
            store = ThreatUpdateS3Store(
                1,
                1,
                None,
                "does-not-matter",
                "does-not-matter",
                "does-not-matter",
                KNOWN_SIGNAL_TYPES,
                signal_type_mapping=get_default_signal_type_mapping(),
            )

            assert signal_type == ThreatUpdateS3Store.get_signal_type_from_object_key(
                store.get_s3_object_key(signal_type.INDICATOR_TYPE)
            )
    def test_query_match_recent_record(self):
        """
        Test MatchRecord write table with get_from_content_key query by recency
        """

        record = self.get_example_pdq_match_record()

        record.write_to_table(self.table)

        query_record = models.MatchRecord.get_recent_items_page(
            self.table, get_default_signal_type_mapping()).items[0]

        record.signal_specific_attributes = {}
        # While signal_specific_attributes are stored in the table, the index
        # does not store them. I do not think they need to either.

        assert record == query_record
Ejemplo n.º 12
0
    def _create_200_members(self) -> str:
        """Create a bank, 200 members and return bank_id."""
        table_manager = BanksTable(self.get_table(), get_default_signal_type_mapping())

        bank = table_manager.create_bank("TEST_BANK", "TEST BANK Description")

        for _ in range(200):
            table_manager.add_bank_member(
                bank_id=bank.bank_id,
                content_type=PhotoContent,
                raw_content=None,
                storage_bucket="hma-test-media",
                storage_key="videos/breaking-news.mp4",
                notes="",
            )

        return bank.bank_id
Ejemplo n.º 13
0
    def test_pagination_produces_correct_number_of_pages(self):
        bank_id = self._create_200_members()
        api = TApp(
            get_bank_api(
                self.get_table(),
                "irrelevant_s3_bucket_for_this_test",
                "irrelevant_sqs_queue",
                get_default_signal_type_mapping(),
            )
        )

        running_count = 0
        continuation_token = None

        unique_member_ids = set()

        while True:
            if continuation_token:
                response = json.loads(
                    api.get(
                        f"/get-members/{bank_id}?content_type=photo&continuation_token={continuation_token}"
                    ).body
                )
            else:
                response = json.loads(
                    api.get(f"/get-members/{bank_id}?content_type=photo").body
                )

            running_count += len(response["bank_members"])
            continuation_token = response["continuation_token"]

            unique_member_ids.update(
                map(lambda member: member["bank_member_id"], response["bank_members"])
            )

            if continuation_token == None:
                # Last page should not have any continuation_token
                break

        # Checks for total number of items received. Should work with any page size.
        assert running_count == 200

        # Checks that the number of unique member ids is equal to the expected
        # value (ie. no repeats)
        assert len(unique_member_ids) == 200
Ejemplo n.º 14
0
    def test_single_signal_is_retrieved(self):
        with self.fresh_dynamodb():
            table_manager = BanksTable(self.get_table(),
                                       get_default_signal_type_mapping())
            bank_id, bank_member_id = self._create_bank_and_bank_member()

            bank_member_signal = table_manager.add_bank_member_signal(
                bank_id=bank_id,
                bank_member_id=bank_member_id,
                signal_type=VideoMD5Signal,
                signal_value="A VIDEO MD5 SIGNAL. WILTY?",
            )

            # expect this to now be available to process
            to_process = table_manager.get_bank_member_signals_to_process_page(
                signal_type=VideoMD5Signal)

            self.assertEqual(len(to_process.items), 1)
            self.assertEqual(bank_member_signal.signal_id,
                             to_process.items[0].signal_id)
    def test_query_match_record_by_signal_id(self):
        """
        Test MatchRecord write table with get_from_content_key query by signal
        """

        record = self.get_example_pdq_match_record()

        record.signal_specific_attributes = {}
        #  GSI-1: Signal ID index does not contain signal_specific attributes
        # yet. I'm not yet sure whether to include them.

        record.write_to_table(self.table)

        query_record = models.MatchRecord.get_from_signal(
            self.table,
            TestPDQModels.TEST_SIGNAL_ID,
            TestPDQModels.TEST_SIGNAL_SOURCE,
            get_default_signal_type_mapping(),
        )[0]

        assert record == query_record
    def _create_banks(self):
        self.table_manager = BanksTable(
            self.get_table(), get_default_signal_type_mapping()
        )

        self.active_bank = self.table_manager.create_bank("TEST_BANK", "Is Active")
        self.active_bank_member = self.table_manager.add_bank_member(
            bank_id=self.active_bank.bank_id,
            content_type=PhotoContent,
            raw_content=None,
            storage_bucket=None,
            storage_key=None,
            notes=None,
        )
        self.table_manager.update_bank(
            bank_id=self.active_bank.bank_id,
            bank_name=self.active_bank.bank_name,
            bank_description=self.active_bank.bank_description,
            is_active=True,
        )

        self.inactive_bank = self.table_manager.create_bank(
            "TEST_BANK_2", "Is Inactive"
        )
        self.table_manager.update_bank(
            bank_id=self.inactive_bank.bank_id,
            bank_name=self.inactive_bank.bank_name,
            bank_description=self.inactive_bank.bank_description,
            is_active=False,
        )
        self.inactive_bank_member = self.table_manager.add_bank_member(
            bank_id=self.inactive_bank.bank_id,
            content_type=PhotoContent,
            raw_content=None,
            storage_bucket=None,
            storage_key=None,
            notes=None,
        )
Ejemplo n.º 17
0
    def test_order_of_signals_multi_page(self):
        with self.fresh_dynamodb():
            table_manager = BanksTable(self.get_table(),
                                       get_default_signal_type_mapping())
            bank_id, bank_member_id = self._create_bank_and_bank_member()

            signals = [
                table_manager.add_bank_member_signal(
                    bank_id=bank_id,
                    bank_member_id=bank_member_id,
                    signal_type=VideoMD5Signal,
                    signal_value="A VIDEO TMK PDQF SIGNAL. WILTY?" +
                    str(random.random()),
                ) for _ in range(20)
            ]

            signal_ids_in_order = list(
                map(lambda s: s.signal_id,
                    sorted(signals, key=lambda x: x.updated_at)))

            queried_signal_ids = []
            exclusive_start_key = None
            while True:
                response = table_manager.get_bank_member_signals_to_process_page(
                    signal_type=VideoMD5Signal,
                    limit=4,
                    exclusive_start_key=exclusive_start_key,
                )

                exclusive_start_key = response.last_evaluated_key
                queried_signal_ids += [
                    signal.signal_id for signal in response.items
                ]

                if not response.has_next_page():
                    break

            self.assertListEqual(signal_ids_in_order, queried_signal_ids)
Ejemplo n.º 18
0
    def test_multiple_signals_are_retrieved(self):
        with self.fresh_dynamodb():
            table_manager = BanksTable(self.get_table(),
                                       get_default_signal_type_mapping())
            bank_id, bank_member_id = self._create_bank_and_bank_member()

            signal_ids = [
                table_manager.add_bank_member_signal(
                    bank_id=bank_id,
                    bank_member_id=bank_member_id,
                    signal_type=VideoMD5Signal,
                    signal_value="A VIDEO MD5 SIGNAL. WILTY?" +
                    str(random.random()),
                ).signal_id for _ in range(20)
            ]

            to_process_signal_ids = [
                signal.signal_id for signal in
                table_manager.get_bank_member_signals_to_process_page(
                    signal_type=VideoMD5Signal).items
            ]

            self.assertListEqual(signal_ids, to_process_signal_ids)