Esempio n. 1
0
    def _create_200_members(self) -> str:
        """Create a bank, 200 members and return bank_id."""
        table_manager = BanksTable(self.get_table(), get_default_signal_type_mapping())

        bank = table_manager.create_bank("TEST_BANK", "TEST BANK Description")

        for _ in range(200):
            table_manager.add_bank_member(
                bank_id=bank.bank_id,
                content_type=PhotoContent,
                raw_content=None,
                storage_bucket="hma-test-media",
                storage_key="videos/breaking-news.mp4",
                notes="",
            )

        return bank.bank_id
    def _create_bank_and_bank_member(self) -> t.Tuple[str, str]:
        table_manager = BanksTable(self.get_table())

        bank = table_manager.create_bank("TEST_BANK", "Test bank description")
        bank_member = table_manager.add_bank_member(
            bank_id=bank.bank_id,
            content_type=VideoContent,
            raw_content=None,
            storage_bucket="hma-test-media",
            storage_key="irrrelevant",
            notes="",
        )

        return (bank.bank_id, bank_member.bank_member_id)
def add_bank_member(
    banks_table: BanksTable,
    sqs_client: SQSClient,
    submissions_queue_url: str,
    bank_id: str,
    content_type: t.Type[ContentType],
    storage_bucket: t.Optional[str],
    storage_key: t.Optional[str],
    raw_content: t.Optional[str],
    notes: str,
    bank_member_tags: t.Set[str],
) -> BankMember:
    """
    Write bank-member to database. Send a message to hashing lambda to extract signals.
    """
    member = banks_table.add_bank_member(
        bank_id=bank_id,
        content_type=content_type,
        storage_bucket=storage_bucket,
        storage_key=storage_key,
        raw_content=raw_content,
        notes=notes,
        bank_member_tags=bank_member_tags,
    )

    submission_message = BankSubmissionMessage(
        content_type=content_type,
        url=create_presigned_url(storage_bucket, storage_key, None, 3600,
                                 "get_object"),
        bank_id=bank_id,
        bank_member_id=member.bank_member_id,
    )
    sqs_client.send_message(
        QueueUrl=submissions_queue_url,
        MessageBody=json.dumps(submission_message.to_sqs_message()),
    )

    return member
class MatchFiltersTestCase(BanksTableTestBase, unittest.TestCase):
    # NOTE: Table is defined in base class BanksTableTestBase

    def _create_banks(self):
        self.table_manager = BanksTable(
            self.get_table(), get_default_signal_type_mapping()
        )

        self.active_bank = self.table_manager.create_bank("TEST_BANK", "Is Active")
        self.active_bank_member = self.table_manager.add_bank_member(
            bank_id=self.active_bank.bank_id,
            content_type=PhotoContent,
            raw_content=None,
            storage_bucket=None,
            storage_key=None,
            notes=None,
        )
        self.table_manager.update_bank(
            bank_id=self.active_bank.bank_id,
            bank_name=self.active_bank.bank_name,
            bank_description=self.active_bank.bank_description,
            is_active=True,
        )

        self.inactive_bank = self.table_manager.create_bank(
            "TEST_BANK_2", "Is Inactive"
        )
        self.table_manager.update_bank(
            bank_id=self.inactive_bank.bank_id,
            bank_name=self.inactive_bank.bank_name,
            bank_description=self.inactive_bank.bank_description,
            is_active=False,
        )
        self.inactive_bank_member = self.table_manager.add_bank_member(
            bank_id=self.inactive_bank.bank_id,
            content_type=PhotoContent,
            raw_content=None,
            storage_bucket=None,
            storage_key=None,
            notes=None,
        )

    def _create_privacy_groups(self):
        # Since we already have a mock_dynamodb2 courtesy BanksTableTestBase,
        # re-use it for initing configs. Requires some clever hot-wiring.
        config_test_mock = config_test.ConfigTest()
        config_test_mock.mock_dynamodb2 = self.__class__.mock_dynamodb2
        config_test_mock.create_mocked_table()
        HMAConfig.initialize(config_test_mock.TABLE_NAME)
        # Hot wiring ends...

        self.active_pg = ThreatExchangeConfig(
            "ACTIVE_PG", True, "", True, True, True, "ACTIVE_PG"
        )
        create_config(self.active_pg)

        # Active PG has a distance threshold of 31.
        create_config(AdditionalMatchSettingsConfig("ACTIVE_PG", 31))

        self.inactive_pg = ThreatExchangeConfig(
            "INACTIVE_PG", True, "", True, True, False, "INACTIVE_PG"
        )
        create_config(self.inactive_pg)

    def _init_data_if_required(self):
        self._create_banks()
        self._create_privacy_groups()

    def _active_pg_match(self):
        return IndexMatch(
            0,
            [
                ThreatExchangeIndicatorIndexMetadata(
                    "indicator_id",
                    "hash_value",
                    self.active_pg.privacy_group_id,
                )
            ],
        )

    def _inactive_pg_match(self):
        return IndexMatch(
            0,
            [
                ThreatExchangeIndicatorIndexMetadata(
                    "indicator_id",
                    "hash_value",
                    self.inactive_pg.privacy_group_id,
                )
            ],
        )

    def _active_bank_match(self):
        return IndexMatch(
            0,
            [
                BankedSignalIndexMetadata(
                    "signal", "signal_value", self.active_bank_member.bank_member_id
                )
            ],
        )

    def _inactive_bank_match(self):
        return IndexMatch(
            0,
            [
                BankedSignalIndexMetadata(
                    "signal", "signal_value", self.inactive_bank_member.bank_member_id
                )
            ],
        )

    def test_matcher_filters_out_inactive_pg(self):
        with self.fresh_dynamodb():
            self._init_data_if_required()

            matcher = Matcher("", [PdqSignal, VideoMD5Signal], self.table_manager)
            filtered_matches = matcher.filter_match_results(
                [self._active_pg_match(), self._inactive_pg_match()],
                PdqSignal,
            )

            self.assertEqual(
                len(filtered_matches), 1, "Failed to filter out inactive pg match"
            )
            self.assertEqual(
                filtered_matches[0].metadata[0].privacy_group,
                self.active_pg.privacy_group_id,
                "The filtered privacy group id is wrong. It should be the active pg's id.",
            )

    def test_matcher_filters_out_based_on_distance(self):
        with self.fresh_dynamodb():
            self._init_data_if_required()

            match_1 = self._active_pg_match()
            match_2 = self._active_pg_match()

            match_2.distance = 100

            matcher = Matcher("", [PdqSignal, VideoMD5Signal], self.table_manager)
            filtered_matches = matcher.filter_match_results(
                [match_1, match_2], PdqSignal
            )

            self.assertEqual(
                len(filtered_matches),
                1,
                "Failed to filter out match with distance > threshold",
            )

            self.assertEqual(
                filtered_matches[0].distance,
                0,
                "Filtered out the wrong match. Match with distance = 100 should be filtered out.",
            )

    def test_matcher_filters_out_based_on_bank_active(self):
        with self.fresh_dynamodb():
            self._init_data_if_required()

            matcher = Matcher("", [PdqSignal, VideoMD5Signal], self.table_manager)
            filtered_matches = matcher.filter_match_results(
                [self._active_bank_match(), self._inactive_bank_match()],
                PdqSignal,
            )

            self.assertEqual(
                len(filtered_matches), 1, "Failed to filter out inactive bank's match"
            )
            self.assertEqual(
                filtered_matches[0].metadata[0].bank_member_id,
                self.active_bank_member.bank_member_id,
                "The filtered bank_member id is wrong. It should be the active bank's bank_member's id.",
            )
    def test_bank_member_removes_from_get_members_page(self):
        NUM_MEMBERS = 100
        REMOVE_EVERY_XTH_MEMBER = 4

        with self.fresh_dynamodb():
            table_manager = BanksTable(self.get_table())
            bank_id, bank_member_id = self._create_bank_and_bank_member()
            for i in range(NUM_MEMBERS):
                bank_member = table_manager.add_bank_member(
                    bank_id=bank_id,
                    content_type=VideoContent,
                    raw_content=None,
                    storage_bucket="hma-test-media",
                    storage_key="irrrelevant",
                    notes="",
                )

            members = []
            exclusive_start_key = None

            while True:
                page = table_manager.get_all_bank_members_page(
                    bank_id=bank_id,
                    content_type=VideoContent,
                    exclusive_start_key=exclusive_start_key,
                )
                members += page.items
                exclusive_start_key = page.last_evaluated_key

                if not page.has_next_page():
                    break

            self.assertEqual(
                len(members),
                101,
                "All the pages together have as many members as we added.",
            )

            count_members_removed = 0
            for i, member in enumerate(members):
                if i // REMOVE_EVERY_XTH_MEMBER == 0:
                    table_manager.remove_bank_member(member.bank_member_id)
                    count_members_removed += 1

            members = []
            exclusive_start_key = None

            while True:
                page = table_manager.get_all_bank_members_page(
                    bank_id=bank_id,
                    content_type=VideoContent,
                    exclusive_start_key=exclusive_start_key,
                )
                members += page.items
                exclusive_start_key = page.last_evaluated_key

                if not page.has_next_page():
                    break

            self.assertEqual(
                len(members),
                101 - count_members_removed,
                "All the pages together have as many members as we added minus the ones we removed.",
            )
Esempio n. 6
0
import os
from hmalib.common.models.models_base import DynamoDBItem
from hmalib.common.models.bank import BanksTable, BankMember
from threatexchange.content_type.video import VideoContent
from mypy_boto3_dynamodb.service_resource import Table
import boto3

dynamodb = boto3.resource("dynamodb")
table_name = ""
test_bank_name = ""
num_members = 1000

# must add thes values
assert table_name != ""
assert test_bank_name != ""

table = dynamodb.Table(table_name)
table_manager = BanksTable(table)

bank = table_manager.create_bank(test_bank_name, "test bank description")

for _ in range(num_members):
    table_manager.add_bank_member(
        bank_id=bank.bank_id,
        content_type=VideoContent,
        raw_content=None,
        storage_bucket="hma-test-media",
        storage_key="videos/breaking-news.mp4",
        notes="",
    )