def create_privacy_group_if_not_exists(
    privacy_group_id: str,
    privacy_group_name: str,
    description: str = "",
    in_use: bool = True,
    fetcher_active: bool = FETCHER_ACTIVE_DEFAULT,
    matcher_active: bool = MATCHER_ACTIVE_DEFAULT,
    write_back: bool = WRITE_BACK_DEFAULT,
):
    logger.info("Adding collaboration name %s", privacy_group_name)
    config = ThreatExchangeConfig(
        privacy_group_id,
        fetcher_active=fetcher_active,
        privacy_group_name=privacy_group_name,
        in_use=in_use,
        description=description,
        matcher_active=matcher_active,
        write_back=write_back,
    )
    try:
        hmaconfig.create_config(config)
    except ClientError as e:
        if e.response["Error"]["Code"] == "ConditionalCheckFailedException":
            logger.warning(
                "Can't insert duplicated config, %s",
                e.response["Error"]["Message"],
            )
            if description:
                update_privacy_group_description(privacy_group_id, description)
        else:
            raise
Beispiel #2
0
def get_privacy_group_matcher_active(privacy_group_id: str, _) -> bool:
    config = ThreatExchangeConfig.get(privacy_group_id)
    if not config:
        logger.warning("Privacy group %s is not found!", privacy_group_id)
        return False
    logger.info("matcher_active for %s is %s", privacy_group_id, config.matcher_active)
    return config.matcher_active
Beispiel #3
0
 def delete_dataset(key=None) -> DeleteDatasetResponse:
     """
     Delete the dataset with key=<key>
     """
     config = ThreatExchangeConfig.getx(str(key))
     hmaconfig.delete_config(config)
     return DeleteDatasetResponse(response="The privacy group is deleted")
Beispiel #4
0
    def update_dataset(request: UpdateDatasetRequest) -> Dataset:
        """
        Update dataset values: fetcher_active, write_back, and matcher_active.
        """
        config = ThreatExchangeConfig.getx(str(request.privacy_group_id))
        config.fetcher_active = request.fetcher_active
        config.write_back = request.write_back
        config.matcher_active = request.matcher_active
        updated_config = hmaconfig.update_config(config).__dict__
        updated_config["privacy_group_id"] = updated_config["name"]

        additional_config = AdditionalMatchSettingsConfig.get(
            str(request.privacy_group_id))
        if request.pdq_match_threshold:
            if additional_config:
                additional_config.pdq_match_threshold = int(
                    request.pdq_match_threshold)
                hmaconfig.update_config(additional_config)
            else:
                additional_config = AdditionalMatchSettingsConfig(
                    str(request.privacy_group_id),
                    int(request.pdq_match_threshold))
                hmaconfig.create_config(additional_config)
        elif additional_config:  # pdq_match_threshold was set and now should be removed
            hmaconfig.delete_config(additional_config)

        return Dataset.from_dict(updated_config)
Beispiel #5
0
 def writeback_is_enabled(self, writeback_signal: BankedSignal) -> bool:
     privacy_group_id = writeback_signal.bank_id
     privacy_group_config = ThreatExchangeConfig.cached_get(
         privacy_group_id)
     if isinstance(privacy_group_config, ThreatExchangeConfig):
         return privacy_group_config.write_back
     # If no config, dont write back
     logger.warn("No config found for privacy group " +
                 str(privacy_group_id))
     return False
Beispiel #6
0
def _get_all_matcher_active_privacy_groups(cache_buster) -> t.List[str]:
    configs = ThreatExchangeConfig.get_all()
    return list(
        map(
            lambda c: c.name,
            filter(
                lambda c: c.matcher_active,
                configs,
            ),
        ))
Beispiel #7
0
 def update_dataset(request: UpdateDatasetRequest) -> Dataset:
     """
     Update dataset values: fetcher_active, write_back, and matcher_active.
     """
     config = ThreatExchangeConfig.getx(str(request.privacy_group_id))
     config.fetcher_active = request.fetcher_active
     config.write_back = request.write_back
     config.matcher_active = request.matcher_active
     updated_config = hmaconfig.update_config(config).__dict__
     updated_config["privacy_group_id"] = updated_config["name"]
     return Dataset.from_dict(updated_config)
    def _create_privacy_groups(self):
        # Since we already have a mock_dynamodb2 courtesy BanksTableTestBase,
        # re-use it for initing configs. Requires some clever hot-wiring.
        config_test_mock = config_test.ConfigTest()
        config_test_mock.mock_dynamodb2 = self.__class__.mock_dynamodb2
        config_test_mock.create_mocked_table()
        HMAConfig.initialize(config_test_mock.TABLE_NAME)
        # Hot wiring ends...

        self.active_pg = ThreatExchangeConfig(
            "ACTIVE_PG", True, "", True, True, True, "ACTIVE_PG"
        )
        create_config(self.active_pg)

        # Active PG has a distance threshold of 31.
        create_config(AdditionalMatchSettingsConfig("ACTIVE_PG", 31))

        self.inactive_pg = ThreatExchangeConfig(
            "INACTIVE_PG", True, "", True, True, False, "INACTIVE_PG"
        )
        create_config(self.inactive_pg)
Beispiel #9
0
def _get_threat_exchange_datasets(
    table: Table,
    threat_exchange_data_bucket_name: str,
    threat_exchange_data_folder: str,
) -> t.List[ThreatExchangeDatasetSummary]:
    collaborations = ThreatExchangeConfig.get_all()
    hash_counts: t.Dict[str, t.Tuple[
        int, str]] = _get_signal_hash_count_and_last_modified(
            threat_exchange_data_bucket_name,
            threat_exchange_data_folder,
        )

    summaries = []
    for collab in collaborations:
        if additional_config := AdditionalMatchSettingsConfig.get(
                str(collab.privacy_group_id)):
            pdq_match_threshold = str(additional_config.pdq_match_threshold)
        else:
            pdq_match_threshold = ""
        summaries.append(
            ThreatExchangeDatasetSummary(
                collab.privacy_group_id,
                collab.privacy_group_name,
                collab.description,
                collab.fetcher_active,
                collab.matcher_active,
                collab.write_back,
                collab.in_use,
                hash_count=t.cast(
                    int,
                    hash_counts.get(
                        collab.privacy_group_id,
                        [-1, ""],
                    )[0],
                ),
                match_count=-1,  # fix will be based on new count system
                pdq_match_threshold=pdq_match_threshold,
            ))
Beispiel #10
0
def _get_threat_exchange_datasets(
    table: Table,
    threat_exchange_data_bucket_name: str,
    threat_exchange_data_folder: str,
    threat_exchange_pdq_file_extension: str,
) -> t.List[ThreatExchangeDatasetSummary]:
    collaborations = ThreatExchangeConfig.get_all()
    hash_counts: t.Dict[str, t.Tuple[
        int, str]] = _get_signal_hash_count_and_last_modified(
            threat_exchange_data_bucket_name,
            threat_exchange_data_folder,
            threat_exchange_pdq_file_extension,
        )

    match_counts: t.Dict[str, int] = MatchByPrivacyGroupCounter.get_all_counts(
        table)

    return [
        ThreatExchangeDatasetSummary(
            collab.privacy_group_id,
            collab.privacy_group_name,
            collab.description,
            collab.fetcher_active,
            collab.matcher_active,
            collab.write_back,
            collab.in_use,
            hash_count=t.cast(
                int,
                hash_counts.get(
                    f"{threat_exchange_data_folder}{collab.privacy_group_id}{threat_exchange_pdq_file_extension}",
                    [0, ""],
                )[0],
            ),
            match_count=match_counts.get(collab.privacy_group_id, 0),
        ) for collab in collaborations
    ]
def load_defaults(_args):
    """
    Load a hardcoded set of defaults which are useful in testing
    """

    # Could also put the default on the class, but seems too fancy

    configs = [
        ThreatExchangeConfig(
            name="303636684709969",
            fetcher_active=True,
            privacy_group_name="Test Config 1",
            write_back=True,
            in_use=True,
            description="test description",
            matcher_active=True,
        ),
        ThreatExchangeConfig(
            name="258601789084078",
            fetcher_active=True,
            privacy_group_name="Test Config 2",
            write_back=True,
            in_use=True,
            description="test description",
            matcher_active=True,
        ),
        WebhookPostActionPerformer(
            name="EnqueueForReview",
            url="https://webhook.site/ff7ebc37-514a-439e-9a03-46f86989e195",
            headers='{"Connection":"keep-alive"}',
            # monitoring page:
            # https://webhook.site/#!/ff7ebc37-514a-439e-9a03-46f86989e195
        ),
        WebhookPostActionPerformer(
            name="EnqueueMiniCastleForReview",
            url="https://webhook.site/01cef721-bdcc-4681-8430-679c75659867",
            headers='{"Connection":"keep-alive"}',
            # monitoring page:
            # https://webhook.site/#!/01cef721-bdcc-4681-8430-679c75659867
        ),
        WebhookPostActionPerformer(
            name="EnqueueSailboatForReview",
            url="https://webhook.site/fa5c5ad5-f5cc-4692-bf03-a03a4ae3f714",
            headers='{"Connection":"keep-alive"}',
            # monitoring page:
            # https://webhook.site/#!/fa5c5ad5-f5cc-4692-bf03-a03a4ae3f714
        ),
        ActionRule(
            name="Enqueue Mini-Castle for Review",
            action_label=ActionLabel("EnqueueMiniCastleForReview"),
            must_have_labels=set([
                BankIDClassificationLabel("303636684709969"),
                ClassificationLabel("true_positive"),
            ]),
            must_not_have_labels=set(
                [BankedContentIDClassificationLabel("3364504410306721")]),
        ),
        ActionRule(
            name="Enqueue Sailboat for Review",
            action_label=ActionLabel("EnqueueSailboatForReview"),
            must_have_labels=set([
                BankIDClassificationLabel("303636684709969"),
                ClassificationLabel("true_positive"),
                BankedContentIDClassificationLabel("3364504410306721"),
            ]),
            must_not_have_labels=set(),
        ),
    ]

    for config in configs:
        # Someday maybe can do filtering or something, I dunno
        # Add try catch block to avoid test failure

        try:
            hmaconfig.create_config(config)
        except ClientError as e:
            if e.response["Error"][
                    "Code"] == "ConditionalCheckFailedException":
                print(
                    "Can't insert duplicated config, " +
                    e.response["Error"]["Message"], )
            else:
                raise
        print(config)
class WritebackerTestCase(unittest.TestCase):

    banked_signals = [
        BankedSignal("2862392437204724", "pg 4", "te"),
        BankedSignal("4194946153908639", "pg 4", "te"),
        BankedSignal("3027465034605137", "pg 3", "te"),
        BankedSignal("evil.jpg", "bank 4", "non-te-source"),
    ]

    match_message = MatchMessage("key", "hash", banked_signals)

    # Writebacks are enabled for the trustworth privacy group not for
    # the untrustworthy one
    configs = [
        ThreatExchangeConfig("pg 4", True, "Trustworthy PG",
                             "test description", True, True, True),
        ThreatExchangeConfig("pg 3", True, "UnTrustworthy PG",
                             "test description", True, False, True),
    ]

    for config in configs:
        hmaconfig.mock_create_config(config)

    def test_saw_this_too(self):
        os.environ["MOCK_TE_API"] = "True"
        os.environ["CONFIG_TABLE_NAME"] = "test-HMAConfig"

        writeback = WritebackTypes.SawThisToo
        writeback_message = WritebackMessage.from_match_message_and_type(
            self.match_message, writeback)
        event = {"Records": [{"body": writeback_message.to_aws_json()}]}

        result = lambda_handler(event, None)
        assert result == {
            "writebacks_performed": {
                "te": [
                    "Reacted SAW_THIS_TOO to descriptor a2|2862392437204724\nReacted SAW_THIS_TOO to descriptor a3|2862392437204724",
                    "Reacted SAW_THIS_TOO to descriptor a2|4194946153908639\nReacted SAW_THIS_TOO to descriptor a3|4194946153908639",
                    "No writeback performed for banked content id 3027465034605137 becuase writebacks were disabled",
                ]
            }
        }

        os.environ["MOCK_TE_API"] = "False"

    def test_false_positive(self):
        os.environ["MOCK_TE_API"] = "True"
        os.environ["CONFIG_TABLE_NAME"] = "test-HMAConfig"

        writeback = WritebackTypes.FalsePositive
        writeback_message = WritebackMessage.from_match_message_and_type(
            self.match_message, writeback)
        event = {"Records": [{"body": writeback_message.to_aws_json()}]}

        result = lambda_handler(event, None)
        assert result == {
            "writebacks_performed": {
                "te": [
                    "Reacted DISAGREE_WITH_TAGS to descriptor a2|2862392437204724\nReacted DISAGREE_WITH_TAGS to descriptor a3|2862392437204724",
                    "Reacted DISAGREE_WITH_TAGS to descriptor a2|4194946153908639\nReacted DISAGREE_WITH_TAGS to descriptor a3|4194946153908639",
                    "No writeback performed for banked content id 3027465034605137 becuase writebacks were disabled",
                ]
            }
        }

        os.environ["MOCK_TE_API"] = "False"

    def test_true_positve(self):
        os.environ["MOCK_TE_API"] = "True"
        os.environ["CONFIG_TABLE_NAME"] = "test-HMAConfig"

        writeback = WritebackTypes.TruePositive
        writeback_message = WritebackMessage.from_match_message_and_type(
            self.match_message, writeback)
        event = {"Records": [{"body": writeback_message.to_aws_json()}]}

        result = lambda_handler(event, None)
        assert result == {
            "writebacks_performed": {
                "te": [
                    "Wrote back TruePositive for indicator 2862392437204724\nBuilt descriptor a1|2862392437204724 with privacy groups pg 4",
                    "Wrote back TruePositive for indicator 4194946153908639\nBuilt descriptor a1|4194946153908639 with privacy groups pg 4",
                    "No writeback performed for banked content id 3027465034605137 becuase writebacks were disabled",
                ]
            }
        }

        os.environ["MOCK_TE_API"] = "False"

    def test_remove_opinion(self):
        os.environ["MOCK_TE_API"] = "True"
        os.environ["CONFIG_TABLE_NAME"] = "test-HMAConfig"

        writeback = WritebackTypes.RemoveOpinion
        writeback_message = WritebackMessage.from_match_message_and_type(
            self.match_message, writeback)
        event = {"Records": [{"body": writeback_message.to_aws_json()}]}

        result = lambda_handler(event, None)
        assert result == {
            "writebacks_performed": {
                "te": [
                    "\n".join((
                        "Deleted decriptor a1|2862392437204724 for indicator 2862392437204724",
                        "Removed reaction DISAGREE_WITH_TAGS from descriptor a2|2862392437204724",
                        "Removed reaction DISAGREE_WITH_TAGS from descriptor a3|2862392437204724",
                    )),
                    "\n".join((
                        "Deleted decriptor a1|4194946153908639 for indicator 4194946153908639",
                        "Removed reaction DISAGREE_WITH_TAGS from descriptor a2|4194946153908639",
                        "Removed reaction DISAGREE_WITH_TAGS from descriptor a3|4194946153908639",
                    )),
                    "No writeback performed for banked content id 3027465034605137 becuase writebacks were disabled",
                ]
            }
        }

        os.environ["MOCK_TE_API"] = "False"
Beispiel #13
0
def lambda_handler(event, context):
    lambda_init_once()
    config = FetcherConfig.get()
    collabs = ThreatExchangeConfig.get_all()

    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")

    names = [collab.privacy_group_name for collab in collabs[:5]]
    if len(names) < len(collabs):
        names[-1] = "..."

    data = f"Triggered at time {current_time}, found {len(collabs)} collabs: {', '.join(names)}"
    logger.info(data)

    api_key = AWSSecrets().te_api_key()
    api = ThreatExchangeAPI(api_key)

    for collab in collabs:
        logger.info(
            "Processing updates for collaboration %s", collab.privacy_group_name
        )

        if not is_int(collab.privacy_group_id):
            logger.info(
                f"Fetch skipped because privacy_group_id({collab.privacy_group_id}) is not an int"
            )
            continue

        indicator_store = ThreatUpdateS3Store(
            int(collab.privacy_group_id),
            api.app_id,
            s3_client=get_s3_client(),
            s3_bucket_name=config.s3_bucket,
            s3_te_data_folder=config.s3_te_data_folder,
            data_store_table=config.data_store_table,
            supported_signal_types=[VideoMD5Signal, PdqSignal],
        )

        indicator_store.load_checkpoint()

        if indicator_store.stale:
            logger.warning(
                "Store for %s - %d stale! Resetting.",
                collab.privacy_group_name,
                int(collab.privacy_group_id),
            )
            indicator_store.reset()

        if indicator_store.fetch_checkpoint >= now.timestamp():
            continue

        delta = indicator_store.next_delta

        try:
            delta.incremental_sync_from_threatexchange(
                api,
            )
        except:
            # Don't need to call .exception() here because we're just re-raising
            logger.error("Exception occurred! Attempting to save...")
            # Force delta to show finished
            delta.end = delta.current
            raise
        finally:
            if delta:
                logging.info("Fetch complete, applying %d updates", len(delta.updates))
                indicator_store.apply_updates(
                    delta, post_apply_fn=indicator_store.post_apply
                )
            else:
                logging.error("Failed before fetching any records")
def update_privacy_groups_in_use(priavcy_group_id_in_use: set) -> None:
    collabs = ThreatExchangeConfig.get_all()
    for collab in collabs:
        if str(collab.privacy_group_id) not in priavcy_group_id_in_use:
            collab.in_use = False
            hmaconfig.update_config(collab)
def update_privacy_group_description(privacy_group_id: str,
                                     description: str) -> None:
    config = ThreatExchangeConfig.getx(privacy_group_id)
    config.description = description
    hmaconfig.update_config(config)
Beispiel #16
0
def lambda_handler(_event, _context):
    """
    Run through threatexchange privacy groups and fetch updates to them. If this
    is the first time for a privacy group, will fetch from the start, else only
    updates since the last time.

    Note: since this is a scheduled job, we swallow all exceptions. We only log
    exceptions and move on.
    """

    lambda_init_once()
    config = FetcherConfig.get()
    collabs = ThreatExchangeConfig.get_all()

    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")

    names = [collab.privacy_group_name for collab in collabs[:5]]
    if len(names) < len(collabs):
        names[-1] = "..."

    data = f"Triggered at time {current_time}, found {len(collabs)} collabs: {', '.join(names)}"
    logger.info(data)

    api_token = AWSSecrets().te_api_token()
    api = ThreatExchangeAPI(api_token)

    for collab in collabs:
        logger.info(
            "Processing updates for collaboration %s", collab.privacy_group_name
        )

        if not is_int(collab.privacy_group_id):
            logger.info(
                f"Fetch skipped because privacy_group_id({collab.privacy_group_id}) is not an int"
            )
            continue

        if not collab.fetcher_active:
            logger.info(
                f"Fetch skipped because configs has `fetcher_active` set to false for privacy_group_id({collab.privacy_group_id})"
            )
            continue

        indicator_store = ThreatUpdateS3Store(
            int(collab.privacy_group_id),
            api.app_id,
            s3_client=get_s3_client(),
            s3_bucket_name=config.s3_bucket,
            s3_te_data_folder=config.s3_te_data_folder,
            data_store_table=config.data_store_table,
            supported_signal_types=[VideoMD5Signal, PdqSignal],
        )

        try:
            indicator_store.load_checkpoint()

            if indicator_store.stale:
                logger.warning(
                    "Store for %s - %d stale! Resetting.",
                    collab.privacy_group_name,
                    int(collab.privacy_group_id),
                )
                indicator_store.reset()

            if indicator_store.fetch_checkpoint >= now.timestamp():
                continue

            delta = indicator_store.next_delta

            delta.incremental_sync_from_threatexchange(
                api, limit=MAX_DESCRIPTORS_UPDATED, progress_fn=ProgressLogger()
            )
        except Exception:  # pylint: disable=broad-except
            logger.exception(
                "Encountered exception while getting updates. Will attempt saving.."
            )
            # Force delta to show finished
            delta.end = delta.current
        finally:
            if delta:
                logging.info("Fetch complete, applying %d updates", len(delta.updates))
                indicator_store.apply_updates(
                    delta, post_apply_fn=indicator_store.post_apply
                )
            else:
                logging.error("Failed before fetching any records")