Beispiel #1
0
def _get_threat_exchange_datasets(
    table: Table,
    threat_exchange_data_bucket_name: str,
    threat_exchange_data_folder: str,
    threat_exchange_pdq_file_extension: str,
) -> t.List[ThreatExchangeDatasetSummary]:
    collaborations = ThreatExchangeConfig.get_all()
    hash_counts: t.Dict[
        str, t.Tuple[int, str]
    ] = _get_signal_hash_count_and_last_modified(
        threat_exchange_data_bucket_name,
        threat_exchange_data_folder,
        threat_exchange_pdq_file_extension,
    )

    match_counts: t.Dict[str, int] = MatchByPrivacyGroupCounter.get_all_counts(table)

    return [
        ThreatExchangeDatasetSummary(
            collab.privacy_group_id,
            collab.privacy_group_name,
            collab.description,
            collab.fetcher_active,
            collab.matcher_active,
            collab.write_back,
            collab.in_use,
            hash_count=t.cast(
                int,
                hash_counts.get(
                    f"{threat_exchange_data_folder}{collab.privacy_group_id}{threat_exchange_pdq_file_extension}",
                    [0, ""],
                )[0],
            ),
            match_count=match_counts.get(collab.privacy_group_id, 0),
        )
        for collab in collaborations
    ]
Beispiel #2
0
def update_privacy_groups_in_use(priavcy_group_id_in_use: set) -> None:
    collabs = ThreatExchangeConfig.get_all()
    for collab in collabs:
        if str(collab.privacy_group_id) not in priavcy_group_id_in_use:
            collab.in_use = False
            hmaconfig.update_config(collab)
Beispiel #3
0
def lambda_handler(event, context):
    lambda_init_once()
    config = FetcherConfig.get()
    collabs = ThreatExchangeConfig.get_all()

    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")

    names = [collab.privacy_group_name for collab in collabs[:5]]
    if len(names) < len(collabs):
        names[-1] = "..."

    data = f"Triggered at time {current_time}, found {len(collabs)} collabs: {', '.join(names)}"
    logger.info(data)

    api_key = AWSSecrets().te_api_key()
    api = ThreatExchangeAPI(api_key)

    te_data_bucket = s3.Bucket(config.s3_bucket)

    stores = []
    for collab in collabs:
        logger.info("Processing updates for collaboration %s",
                    collab.privacy_group_name)

        if not is_int(collab.privacy_group_id):
            logger.info(
                f"Fetch skipped because privacy_group_id({collab.privacy_group_id}) is not an int"
            )
            continue

        indicator_store = ThreatUpdateS3PDQStore(
            int(collab.privacy_group_id),
            api.app_id,
            te_data_bucket,
            config.s3_te_data_folder,
            config.data_store_table,
        )
        stores.append(indicator_store)
        indicator_store.load_checkpoint()
        if indicator_store.stale:
            logger.warning(
                "Store for %s - %d stale! Resetting.",
                collab.privacy_group_name,
                int(collab.privacy_group_id),
            )
            indicator_store.reset()

        if indicator_store.fetch_checkpoint >= now.timestamp():
            continue

        delta = indicator_store.next_delta

        try:
            delta.incremental_sync_from_threatexchange(api, )
        except:
            # Don't need to call .exception() here because we're just re-raising
            logger.error("Exception occurred! Attempting to save...")
            # Force delta to show finished
            delta.end = delta.current
            raise
        finally:
            if delta:
                logging.info("Fetch complete, applying %d updates",
                             len(delta.updates))
                indicator_store.apply_updates(
                    delta, post_apply_fn=indicator_store.post_apply)
            else:
                logging.error("Failed before fetching any records")