def test_matcher_filters_out_based_on_distance(self):
        with self.fresh_dynamodb():
            self._init_data_if_required()

            match_1 = self._active_pg_match()
            match_2 = self._active_pg_match()

            match_2.distance = 100

            matcher = Matcher("", [PdqSignal, VideoMD5Signal], self.table_manager)
            filtered_matches = matcher.filter_match_results(
                [match_1, match_2], PdqSignal
            )

            self.assertEqual(
                len(filtered_matches),
                1,
                "Failed to filter out match with distance > threshold",
            )

            self.assertEqual(
                filtered_matches[0].distance,
                0,
                "Filtered out the wrong match. Match with distance = 100 should be filtered out.",
            )
예제 #2
0
    def _matches_for_hash(signal_type: t.Type[SignalType],
                          signal_value: str) -> t.List[MatchesForHash]:
        matches = _get_matcher(indexes_bucket_name,
                               banks_table=banks_table).match(
                                   signal_type, signal_value)

        match_objects: t.List[MatchesForHash] = []

        # First get all threatexchange objects
        for match in matches:
            match_objects.extend([
                MatchesForHash(
                    match_distance=int(match.distance),
                    matched_signal=signal_metadata,
                ) for signal_metadata in
                Matcher.get_te_metadata_objects_from_match(signal_type, match)
            ])

        # now get all bank objects
        for match in matches:
            for metadata_obj in filter(
                    lambda m: m.get_source() == BANKS_SOURCE_SHORT_CODE,
                    match.metadata):
                metadata_obj = t.cast(BankedSignalIndexMetadata, metadata_obj)
                match_objects.append(
                    MatchesForHash(
                        match_distance=int(match.distance),
                        matched_signal=banks_table.get_bank_member(
                            metadata_obj.bank_member_id),
                    ))

        return match_objects
    def test_matcher_filters_out_based_on_bank_active(self):
        with self.fresh_dynamodb():
            self._init_data_if_required()

            matcher = Matcher("", [PdqSignal, VideoMD5Signal], self.table_manager)
            filtered_matches = matcher.filter_match_results(
                [self._active_bank_match(), self._inactive_bank_match()],
                PdqSignal,
            )

            self.assertEqual(
                len(filtered_matches), 1, "Failed to filter out inactive bank's match"
            )
            self.assertEqual(
                filtered_matches[0].metadata[0].bank_member_id,
                self.active_bank_member.bank_member_id,
                "The filtered bank_member id is wrong. It should be the active bank's bank_member's id.",
            )
    def test_matcher_filters_out_inactive_pg(self):
        with self.fresh_dynamodb():
            self._init_data_if_required()

            matcher = Matcher("", [PdqSignal, VideoMD5Signal], self.table_manager)
            filtered_matches = matcher.filter_match_results(
                [self._active_pg_match(), self._inactive_pg_match()],
                PdqSignal,
            )

            self.assertEqual(
                len(filtered_matches), 1, "Failed to filter out inactive pg match"
            )
            self.assertEqual(
                filtered_matches[0].metadata[0].privacy_group,
                self.active_pg.privacy_group_id,
                "The filtered privacy group id is wrong. It should be the active pg's id.",
            )
예제 #5
0
def get_matcher(banks_table: BanksTable):
    global _matcher
    if _matcher is None:
        _matcher = Matcher(
            index_bucket_name=INDEXES_BUCKET_NAME,
            supported_signal_types=[PdqSignal, VideoMD5Signal],
            banks_table=banks_table,
        )
    return _matcher
예제 #6
0
def _get_matcher(index_bucket_name: str, banks_table: BanksTable) -> Matcher:
    global _matcher
    if _matcher is None:
        _matcher = Matcher(
            index_bucket_name=index_bucket_name,
            supported_signal_types=[PdqSignal, VideoMD5Signal],
            banks_table=banks_table,
        )

    return _matcher
예제 #7
0
    def for_hash(request: MatchesForHashRequest) -> MatchesForHashResponse:
        """
        For a given hash/signal check the index(es) for matches and return the details.

        This does not change system state, metadata returned will not be written any tables
        unlike when matches are found for submissions.
        """

        matches = _get_matcher(indexes_bucket_name).match(
            request.signal_type, request.signal_value)

        match_objects: t.List[ThreatExchangeSignalMetadata] = []

        for match in matches:
            match_objects.extend(
                Matcher.get_metadata_objects_from_match(
                    request.signal_type, match))

        return MatchesForHashResponse(match_objects, request.signal_value)
예제 #8
0
HMAConfig.initialize(HMA_CONFIG_TABLE)


@functools.lru_cache(maxsize=None)
def get_dynamodb() -> DynamoDBServiceResource:
    return boto3.resource("dynamodb")


@functools.lru_cache(maxsize=None)
def get_sns_client() -> SNSClient:
    return boto3.client("sns")


matcher = Matcher(
    index_bucket_name=INDEXES_BUCKET_NAME,
    supported_signal_types=[PdqSignal, VideoMD5Signal],
)

logger = get_logger(__name__)


def lambda_handler(event, context):
    """
    Listens to SQS events fired when new hash is generated. Loads the index
    stored in an S3 bucket and looks for a match.

    When matched, publishes a notification to an SNS endpoint. Note this is in
    contrast with hasher and indexer. They publish to SQS directly. Publishing
    to SQS implies there can be only one consumer.

    Because, here, in the matcher, we publish to SNS, we can plug multiple
예제 #9
0
def _get_matcher(index_bucket_name: str) -> Matcher:
    return Matcher(
        index_bucket_name=index_bucket_name,
        supported_signal_types=[PdqSignal, VideoMD5Signal],
    )