Exemple #1
0
    def request_signal_opinion_change() -> ChangeSignalOpinionResponse:
        """
        request a change to the opinion for a signal in a dataset
        """
        signal_q = bottle.request.query.signal_q or None
        signal_source = bottle.request.query.signal_source or None
        dataset_q = bottle.request.query.dataset_q or None
        opinion_change = bottle.request.query.opinion_change or None

        if not signal_q or not signal_source or not dataset_q or not opinion_change:
            return ChangeSignalOpinionResponse(False)

        # TODO send message to action framework to actually request the change in TE
        logger.info(
            f"Mock: Reaction change enqueued for {signal_source}:{signal_q} in {dataset_q} change={opinion_change}"
        )

        signal = PDQSignalMetadata(
            signal_id=signal_q,
            ds_id=dataset_q,
            updated_at=datetime.datetime.now(),
            signal_source=signal_source,
            signal_hash="",  # SignalHash not needed for update
            tags=[],  # Tags not needed for update
            pending_opinion_change=PendingOpinionChange(opinion_change),
        )
        success = signal.update_pending_opinion_change_in_table_if_exists(
            dynamodb_table)
        if not success:
            logger.info(f"Attempting to update {signal} in db failed")

        return ChangeSignalOpinionResponse(success)
Exemple #2
0
    def post_apply(self, updated: t.Dict = {}):
        """
        After the fetcher applies an update, check for matches
        to any of the signals in data_store_table and if found update
        their tags.

        TODO: Additionally, if writebacks are enabled for this privacy group write back
        INGESTED to ThreatExchange
        """
        table = dynamodb.Table(self.data_store_table)

        for update in updated.values():
            row: t.List[str] = update.as_csv_row()
            # example row format: ('<raw_indicator>', '<indicator-id>', '<descriptor-id>', '<time added>', '<space-separated-tags>')
            # e.g (10736405276340','096a6f9...064f', '1234567890', '2020-07-31T18:47:45+0000', 'true_positive hma_test')
            new_tags = row[4].split(" ") if row[4] else []

            metadata = PDQSignalMetadata.get_from_signal_and_ds_id(
                table,
                int(row[1]),
                S3ThreatDataConfig.SOURCE_STR,
                str(self.privacy_group),
            )

            if metadata:
                new_pending_opinion_change = self.get_new_pending_opinion_change(
                    metadata, new_tags)
            else:
                # If this is a new indicator without metadata there is nothing for us to update
                return

            metadata = PDQSignalMetadata(
                signal_id=row[1],
                ds_id=str(self.privacy_group),
                updated_at=datetime.now(),
                signal_source=S3ThreatDataConfig.SOURCE_STR,
                signal_hash=row[
                    0],  # note: not used by update_tags_in_table_if_exists
                tags=new_tags,
                pending_opinion_change=new_pending_opinion_change,
            )
            # TODO: Combine 2 update functions into single function
            if metadata.update_tags_in_table_if_exists(table):
                logger.info(
                    "Updated Signal Tags in DB for indicator id: %s source: %s for privacy group: %d",
                    row[1],
                    S3ThreatDataConfig.SOURCE_STR,
                    self.privacy_group,
                )
            if metadata.update_pending_opinion_change_in_table_if_exists(
                    table):
                logger.info(
                    "Updated Pending Opinion in DB for indicator id: %s source: %s for privacy group: %d",
                    row[1],
                    S3ThreatDataConfig.SOURCE_STR,
                    self.privacy_group,
                )
Exemple #3
0
    def post_apply(self, updated: t.Dict = {}):
        """
        After the fetcher applies an update, check for matches
        to any of the signals in data_store_table and if found update
        their tags.
        """
        table = dynamodb.Table(self.data_store_table)

        for update in updated.values():
            row = update.as_csv_row()
            # example row format: ('<signal>', '<id>', '<time added>', '<tag1 tags2>')
            # e.g ('096a6f9...064f', 1234567891234567, '2020-07-31T18:47:45+0000', 'true_positive hma_test')
            if PDQSignalMetadata(
                    signal_id=int(row[1]),
                    ds_id=str(self.privacy_group),
                    updated_at=datetime.now(),
                    signal_source=S3ThreatDataConfig.SOURCE_STR,
                    signal_hash=row[
                        0],  # note: not used by update_tags_in_table_if_exists
                    tags=row[3].split(" ") if row[3] else [],
            ).update_tags_in_table_if_exists(table):
                logger.info(
                    "Updated Signal Tags in DB for signal id: %s source: %s for privacy group: %d",
                    row[1],
                    S3ThreatDataConfig.SOURCE_STR,
                    self.privacy_group,
                )
    def request_signal_opinion_change() -> ChangeSignalOpinionResponse:
        """
        request a change to the opinion for a signal in a dataset
        """
        signal_id = bottle.request.query.signal_q or None
        signal_source = bottle.request.query.signal_source or None
        ds_id = bottle.request.query.dataset_q or None
        opinion_change = bottle.request.query.opinion_change or None

        if not signal_id or not signal_source or not ds_id or not opinion_change:
            return ChangeSignalOpinionResponse(False)

        signal_id = str(signal_id)
        pending_opinion_change = PendingOpinionChange(opinion_change)

        writeback_message = WritebackMessage.from_banked_signal_and_opinion_change(
            BankedSignal(signal_id, ds_id, signal_source),
            pending_opinion_change)
        writeback_message.send_to_queue()
        logger.info(
            f"Opinion change enqueued for {signal_source}:{signal_id} in {ds_id} change={opinion_change}"
        )

        signal = PDQSignalMetadata(
            signal_id=signal_id,
            ds_id=ds_id,
            updated_at=datetime.datetime.now(),
            signal_source=signal_source,
            signal_hash="",  # SignalHash not needed for update
            tags=[],  # Tags not needed for update
            pending_opinion_change=pending_opinion_change,
        )
        success = signal.update_pending_opinion_change_in_table_if_exists(
            dynamodb_table)
        if not success:
            logger.info(f"Attempting to update {signal} in db failed")

        return ChangeSignalOpinionResponse(success)
Exemple #5
0
def get_signal_details(table: Table, signal_id: t.Union[str, int],
                       signal_source: str) -> t.List[MatchDetailMetadata]:
    if not signal_id or not signal_source:
        return []

    return [
        MatchDetailMetadata(
            dataset=metadata.ds_id,
            tags=[
                tag for tag in metadata.tags
                if tag not in ThreatDescriptor.SPECIAL_TAGS
            ],
            opinion=get_opinion_from_tags(metadata.tags).value,
            pending_opinion_change=metadata.pending_opinion_change.value,
        ) for metadata in PDQSignalMetadata.get_from_signal(
            table, signal_id, signal_source)
    ]
def lambda_handler(event, context):
    """
    Listens to SQS events fired when new hash is generated. Loads the index
    stored in an S3 bucket and looks for a match.

    As per the default configuration
    - the index data bucket is INDEXES_BUCKET_NAME
    - the key name must be PDQ_INDEX_KEY

    When matched, publishes a notification to an SNS endpoint. Note this is in
    contrast with hasher and indexer. They publish to SQS directly. Publishing
    to SQS implies there can be only one consumer.

    Because, here, in the matcher, we publish to SNS, we can plug multiple
    queues behind it and profit!
    """
    records_table = dynamodb.Table(DYNAMODB_TABLE)

    hash_index: PDQIndex = get_index(INDEXES_BUCKET_NAME, PDQ_INDEX_KEY)
    logger.info("loaded_hash_index")

    for sqs_record in event["Records"]:
        message = json.loads(sqs_record["body"])
        if message.get("Event") == "TestEvent":
            logger.info("Disregarding Test Event")
            continue

        hash_str = message["hash"]
        key = message["key"]
        current_datetime = datetime.datetime.now()

        with metrics.timer(metrics.names.pdq_matcher_lambda.search_index):
            results = hash_index.query(hash_str)

        if results:
            match_ids = []
            matching_banked_signals: t.List[BankedSignal] = []
            for match in results:
                metadata = match.metadata
                logger.info("Match found for key: %s, hash %s -> %s", key,
                            hash_str, metadata)
                privacy_group_list = metadata.get("privacy_groups", [])
                metadata["privacy_groups"] = list(
                    filter(
                        lambda x: get_privacy_group_matcher_active(
                            str(x),
                            time.time() // CACHED_TIME,
                            # CACHED_TIME default to 300 seconds, this will convert time.time() to an int parameter which changes every 300 seconds
                        ),
                        privacy_group_list,
                    ))
                if metadata["privacy_groups"]:
                    signal_id = str(metadata["id"])

                    with metrics.timer(metrics.names.pdq_matcher_lambda.
                                       write_match_record):
                        # TODO: Add source (threatexchange) tags to match record
                        PDQMatchRecord(
                            key,
                            hash_str,
                            current_datetime,
                            signal_id,
                            metadata["source"],
                            metadata["hash"],
                        ).write_to_table(records_table)

                    for pg in metadata.get("privacy_groups", []):
                        # Only update the metadata if it is not found in the table
                        # once intally created it is the fetcher's job to keep the item up to date
                        PDQSignalMetadata(
                            signal_id,
                            pg,
                            current_datetime,
                            metadata["source"],
                            metadata["hash"],
                            metadata["tags"].get(pg, []),
                        ).write_to_table_if_not_found(records_table)

                    match_ids.append(signal_id)

                    # TODO: change naming upstream and here from privacy_group[s]
                    # to dataset[s]
                    for privacy_group in metadata.get("privacy_groups", []):
                        banked_signal = BankedSignal(str(signal_id),
                                                     str(privacy_group),
                                                     str(metadata["source"]))
                        for tag in metadata["tags"].get(privacy_group, []):
                            banked_signal.add_classification(tag)
                        matching_banked_signals.append(banked_signal)

            # TODO: Add source (threatexchange) tags to match message
            if matching_banked_signals:
                match_message = MatchMessage(
                    content_key=key,
                    content_hash=hash_str,
                    matching_banked_signals=matching_banked_signals,
                )

                logger.info(f"Publishing match_message: {match_message}")

                # Publish one message for the set of matches.
                sns_client.publish(TopicArn=OUTPUT_TOPIC_ARN,
                                   Message=match_message.to_aws_json())

        else:
            logger.info(f"No matches found for key: {key} hash: {hash_str}")

    metrics.flush()