def _create_bank_and_bank_member(self) -> t.Tuple[str, str]:
        table_manager = BanksTable(self.get_table())

        bank = table_manager.create_bank("TEST_BANK", "Test bank description")
        bank_member = table_manager.add_bank_member(
            bank_id=bank.bank_id,
            content_type=VideoContent,
            raw_content=None,
            storage_bucket="hma-test-media",
            storage_key="irrrelevant",
            notes="",
        )

        return (bank.bank_id, bank_member.bank_member_id)
Exemple #2
0
    def _create_200_members(self) -> str:
        """Create a bank, 200 members and return bank_id."""
        table_manager = BanksTable(self.get_table(), get_default_signal_type_mapping())

        bank = table_manager.create_bank("TEST_BANK", "TEST BANK Description")

        for _ in range(200):
            table_manager.add_bank_member(
                bank_id=bank.bank_id,
                content_type=PhotoContent,
                raw_content=None,
                storage_bucket="hma-test-media",
                storage_key="videos/breaking-news.mp4",
                notes="",
            )

        return bank.bank_id
class MatchFiltersTestCase(BanksTableTestBase, unittest.TestCase):
    # NOTE: Table is defined in base class BanksTableTestBase

    def _create_banks(self):
        self.table_manager = BanksTable(
            self.get_table(), get_default_signal_type_mapping()
        )

        self.active_bank = self.table_manager.create_bank("TEST_BANK", "Is Active")
        self.active_bank_member = self.table_manager.add_bank_member(
            bank_id=self.active_bank.bank_id,
            content_type=PhotoContent,
            raw_content=None,
            storage_bucket=None,
            storage_key=None,
            notes=None,
        )
        self.table_manager.update_bank(
            bank_id=self.active_bank.bank_id,
            bank_name=self.active_bank.bank_name,
            bank_description=self.active_bank.bank_description,
            is_active=True,
        )

        self.inactive_bank = self.table_manager.create_bank(
            "TEST_BANK_2", "Is Inactive"
        )
        self.table_manager.update_bank(
            bank_id=self.inactive_bank.bank_id,
            bank_name=self.inactive_bank.bank_name,
            bank_description=self.inactive_bank.bank_description,
            is_active=False,
        )
        self.inactive_bank_member = self.table_manager.add_bank_member(
            bank_id=self.inactive_bank.bank_id,
            content_type=PhotoContent,
            raw_content=None,
            storage_bucket=None,
            storage_key=None,
            notes=None,
        )

    def _create_privacy_groups(self):
        # Since we already have a mock_dynamodb2 courtesy BanksTableTestBase,
        # re-use it for initing configs. Requires some clever hot-wiring.
        config_test_mock = config_test.ConfigTest()
        config_test_mock.mock_dynamodb2 = self.__class__.mock_dynamodb2
        config_test_mock.create_mocked_table()
        HMAConfig.initialize(config_test_mock.TABLE_NAME)
        # Hot wiring ends...

        self.active_pg = ThreatExchangeConfig(
            "ACTIVE_PG", True, "", True, True, True, "ACTIVE_PG"
        )
        create_config(self.active_pg)

        # Active PG has a distance threshold of 31.
        create_config(AdditionalMatchSettingsConfig("ACTIVE_PG", 31))

        self.inactive_pg = ThreatExchangeConfig(
            "INACTIVE_PG", True, "", True, True, False, "INACTIVE_PG"
        )
        create_config(self.inactive_pg)

    def _init_data_if_required(self):
        self._create_banks()
        self._create_privacy_groups()

    def _active_pg_match(self):
        return IndexMatch(
            0,
            [
                ThreatExchangeIndicatorIndexMetadata(
                    "indicator_id",
                    "hash_value",
                    self.active_pg.privacy_group_id,
                )
            ],
        )

    def _inactive_pg_match(self):
        return IndexMatch(
            0,
            [
                ThreatExchangeIndicatorIndexMetadata(
                    "indicator_id",
                    "hash_value",
                    self.inactive_pg.privacy_group_id,
                )
            ],
        )

    def _active_bank_match(self):
        return IndexMatch(
            0,
            [
                BankedSignalIndexMetadata(
                    "signal", "signal_value", self.active_bank_member.bank_member_id
                )
            ],
        )

    def _inactive_bank_match(self):
        return IndexMatch(
            0,
            [
                BankedSignalIndexMetadata(
                    "signal", "signal_value", self.inactive_bank_member.bank_member_id
                )
            ],
        )

    def test_matcher_filters_out_inactive_pg(self):
        with self.fresh_dynamodb():
            self._init_data_if_required()

            matcher = Matcher("", [PdqSignal, VideoMD5Signal], self.table_manager)
            filtered_matches = matcher.filter_match_results(
                [self._active_pg_match(), self._inactive_pg_match()],
                PdqSignal,
            )

            self.assertEqual(
                len(filtered_matches), 1, "Failed to filter out inactive pg match"
            )
            self.assertEqual(
                filtered_matches[0].metadata[0].privacy_group,
                self.active_pg.privacy_group_id,
                "The filtered privacy group id is wrong. It should be the active pg's id.",
            )

    def test_matcher_filters_out_based_on_distance(self):
        with self.fresh_dynamodb():
            self._init_data_if_required()

            match_1 = self._active_pg_match()
            match_2 = self._active_pg_match()

            match_2.distance = 100

            matcher = Matcher("", [PdqSignal, VideoMD5Signal], self.table_manager)
            filtered_matches = matcher.filter_match_results(
                [match_1, match_2], PdqSignal
            )

            self.assertEqual(
                len(filtered_matches),
                1,
                "Failed to filter out match with distance > threshold",
            )

            self.assertEqual(
                filtered_matches[0].distance,
                0,
                "Filtered out the wrong match. Match with distance = 100 should be filtered out.",
            )

    def test_matcher_filters_out_based_on_bank_active(self):
        with self.fresh_dynamodb():
            self._init_data_if_required()

            matcher = Matcher("", [PdqSignal, VideoMD5Signal], self.table_manager)
            filtered_matches = matcher.filter_match_results(
                [self._active_bank_match(), self._inactive_bank_match()],
                PdqSignal,
            )

            self.assertEqual(
                len(filtered_matches), 1, "Failed to filter out inactive bank's match"
            )
            self.assertEqual(
                filtered_matches[0].metadata[0].bank_member_id,
                self.active_bank_member.bank_member_id,
                "The filtered bank_member id is wrong. It should be the active bank's bank_member's id.",
            )
Exemple #4
0
import os
from hmalib.common.models.models_base import DynamoDBItem
from hmalib.common.models.bank import BanksTable, BankMember
from threatexchange.content_type.video import VideoContent
from mypy_boto3_dynamodb.service_resource import Table
import boto3

dynamodb = boto3.resource("dynamodb")
table_name = ""
test_bank_name = ""
num_members = 1000

# must add thes values
assert table_name != ""
assert test_bank_name != ""

table = dynamodb.Table(table_name)
table_manager = BanksTable(table)

bank = table_manager.create_bank(test_bank_name, "test bank description")

for _ in range(num_members):
    table_manager.add_bank_member(
        bank_id=bank.bank_id,
        content_type=VideoContent,
        raw_content=None,
        storage_bucket="hma-test-media",
        storage_key="videos/breaking-news.mp4",
        notes="",
    )