Example #1
0
def _get_all_matcher_active_privacy_groups(cache_buster) -> t.List[str]:
    configs = ThreatExchangeConfig.get_all()
    return list(
        map(
            lambda c: c.name,
            filter(
                lambda c: c.matcher_active,
                configs,
            ),
        ))
Example #2
0
def _get_threat_exchange_datasets(
    table: Table,
    threat_exchange_data_bucket_name: str,
    threat_exchange_data_folder: str,
) -> t.List[ThreatExchangeDatasetSummary]:
    collaborations = ThreatExchangeConfig.get_all()
    hash_counts: t.Dict[str, t.Tuple[
        int, str]] = _get_signal_hash_count_and_last_modified(
            threat_exchange_data_bucket_name,
            threat_exchange_data_folder,
        )

    summaries = []
    for collab in collaborations:
        if additional_config := AdditionalMatchSettingsConfig.get(
                str(collab.privacy_group_id)):
            pdq_match_threshold = str(additional_config.pdq_match_threshold)
        else:
            pdq_match_threshold = ""
        summaries.append(
            ThreatExchangeDatasetSummary(
                collab.privacy_group_id,
                collab.privacy_group_name,
                collab.description,
                collab.fetcher_active,
                collab.matcher_active,
                collab.write_back,
                collab.in_use,
                hash_count=t.cast(
                    int,
                    hash_counts.get(
                        collab.privacy_group_id,
                        [-1, ""],
                    )[0],
                ),
                match_count=-1,  # fix will be based on new count system
                pdq_match_threshold=pdq_match_threshold,
            ))
Example #3
0
def _get_threat_exchange_datasets(
    table: Table,
    threat_exchange_data_bucket_name: str,
    threat_exchange_data_folder: str,
    threat_exchange_pdq_file_extension: str,
) -> t.List[ThreatExchangeDatasetSummary]:
    collaborations = ThreatExchangeConfig.get_all()
    hash_counts: t.Dict[str, t.Tuple[
        int, str]] = _get_signal_hash_count_and_last_modified(
            threat_exchange_data_bucket_name,
            threat_exchange_data_folder,
            threat_exchange_pdq_file_extension,
        )

    match_counts: t.Dict[str, int] = MatchByPrivacyGroupCounter.get_all_counts(
        table)

    return [
        ThreatExchangeDatasetSummary(
            collab.privacy_group_id,
            collab.privacy_group_name,
            collab.description,
            collab.fetcher_active,
            collab.matcher_active,
            collab.write_back,
            collab.in_use,
            hash_count=t.cast(
                int,
                hash_counts.get(
                    f"{threat_exchange_data_folder}{collab.privacy_group_id}{threat_exchange_pdq_file_extension}",
                    [0, ""],
                )[0],
            ),
            match_count=match_counts.get(collab.privacy_group_id, 0),
        ) for collab in collaborations
    ]
Example #4
0
def lambda_handler(event, context):
    lambda_init_once()
    config = FetcherConfig.get()
    collabs = ThreatExchangeConfig.get_all()

    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")

    names = [collab.privacy_group_name for collab in collabs[:5]]
    if len(names) < len(collabs):
        names[-1] = "..."

    data = f"Triggered at time {current_time}, found {len(collabs)} collabs: {', '.join(names)}"
    logger.info(data)

    api_key = AWSSecrets().te_api_key()
    api = ThreatExchangeAPI(api_key)

    for collab in collabs:
        logger.info(
            "Processing updates for collaboration %s", collab.privacy_group_name
        )

        if not is_int(collab.privacy_group_id):
            logger.info(
                f"Fetch skipped because privacy_group_id({collab.privacy_group_id}) is not an int"
            )
            continue

        indicator_store = ThreatUpdateS3Store(
            int(collab.privacy_group_id),
            api.app_id,
            s3_client=get_s3_client(),
            s3_bucket_name=config.s3_bucket,
            s3_te_data_folder=config.s3_te_data_folder,
            data_store_table=config.data_store_table,
            supported_signal_types=[VideoMD5Signal, PdqSignal],
        )

        indicator_store.load_checkpoint()

        if indicator_store.stale:
            logger.warning(
                "Store for %s - %d stale! Resetting.",
                collab.privacy_group_name,
                int(collab.privacy_group_id),
            )
            indicator_store.reset()

        if indicator_store.fetch_checkpoint >= now.timestamp():
            continue

        delta = indicator_store.next_delta

        try:
            delta.incremental_sync_from_threatexchange(
                api,
            )
        except:
            # Don't need to call .exception() here because we're just re-raising
            logger.error("Exception occurred! Attempting to save...")
            # Force delta to show finished
            delta.end = delta.current
            raise
        finally:
            if delta:
                logging.info("Fetch complete, applying %d updates", len(delta.updates))
                indicator_store.apply_updates(
                    delta, post_apply_fn=indicator_store.post_apply
                )
            else:
                logging.error("Failed before fetching any records")
def update_privacy_groups_in_use(priavcy_group_id_in_use: set) -> None:
    collabs = ThreatExchangeConfig.get_all()
    for collab in collabs:
        if str(collab.privacy_group_id) not in priavcy_group_id_in_use:
            collab.in_use = False
            hmaconfig.update_config(collab)
Example #6
0
def lambda_handler(_event, _context):
    """
    Run through threatexchange privacy groups and fetch updates to them. If this
    is the first time for a privacy group, will fetch from the start, else only
    updates since the last time.

    Note: since this is a scheduled job, we swallow all exceptions. We only log
    exceptions and move on.
    """

    lambda_init_once()
    config = FetcherConfig.get()
    collabs = ThreatExchangeConfig.get_all()

    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")

    names = [collab.privacy_group_name for collab in collabs[:5]]
    if len(names) < len(collabs):
        names[-1] = "..."

    data = f"Triggered at time {current_time}, found {len(collabs)} collabs: {', '.join(names)}"
    logger.info(data)

    api_token = AWSSecrets().te_api_token()
    api = ThreatExchangeAPI(api_token)

    for collab in collabs:
        logger.info(
            "Processing updates for collaboration %s", collab.privacy_group_name
        )

        if not is_int(collab.privacy_group_id):
            logger.info(
                f"Fetch skipped because privacy_group_id({collab.privacy_group_id}) is not an int"
            )
            continue

        if not collab.fetcher_active:
            logger.info(
                f"Fetch skipped because configs has `fetcher_active` set to false for privacy_group_id({collab.privacy_group_id})"
            )
            continue

        indicator_store = ThreatUpdateS3Store(
            int(collab.privacy_group_id),
            api.app_id,
            s3_client=get_s3_client(),
            s3_bucket_name=config.s3_bucket,
            s3_te_data_folder=config.s3_te_data_folder,
            data_store_table=config.data_store_table,
            supported_signal_types=[VideoMD5Signal, PdqSignal],
        )

        try:
            indicator_store.load_checkpoint()

            if indicator_store.stale:
                logger.warning(
                    "Store for %s - %d stale! Resetting.",
                    collab.privacy_group_name,
                    int(collab.privacy_group_id),
                )
                indicator_store.reset()

            if indicator_store.fetch_checkpoint >= now.timestamp():
                continue

            delta = indicator_store.next_delta

            delta.incremental_sync_from_threatexchange(
                api, limit=MAX_DESCRIPTORS_UPDATED, progress_fn=ProgressLogger()
            )
        except Exception:  # pylint: disable=broad-except
            logger.exception(
                "Encountered exception while getting updates. Will attempt saving.."
            )
            # Force delta to show finished
            delta.end = delta.current
        finally:
            if delta:
                logging.info("Fetch complete, applying %d updates", len(delta.updates))
                indicator_store.apply_updates(
                    delta, post_apply_fn=indicator_store.post_apply
                )
            else:
                logging.error("Failed before fetching any records")