def _get_all_matcher_active_privacy_groups(cache_buster) -> t.List[str]: configs = ThreatExchangeConfig.get_all() return list( map( lambda c: c.name, filter( lambda c: c.matcher_active, configs, ), ))
def _get_threat_exchange_datasets( table: Table, threat_exchange_data_bucket_name: str, threat_exchange_data_folder: str, ) -> t.List[ThreatExchangeDatasetSummary]: collaborations = ThreatExchangeConfig.get_all() hash_counts: t.Dict[str, t.Tuple[ int, str]] = _get_signal_hash_count_and_last_modified( threat_exchange_data_bucket_name, threat_exchange_data_folder, ) summaries = [] for collab in collaborations: if additional_config := AdditionalMatchSettingsConfig.get( str(collab.privacy_group_id)): pdq_match_threshold = str(additional_config.pdq_match_threshold) else: pdq_match_threshold = "" summaries.append( ThreatExchangeDatasetSummary( collab.privacy_group_id, collab.privacy_group_name, collab.description, collab.fetcher_active, collab.matcher_active, collab.write_back, collab.in_use, hash_count=t.cast( int, hash_counts.get( collab.privacy_group_id, [-1, ""], )[0], ), match_count=-1, # fix will be based on new count system pdq_match_threshold=pdq_match_threshold, ))
def _get_threat_exchange_datasets( table: Table, threat_exchange_data_bucket_name: str, threat_exchange_data_folder: str, threat_exchange_pdq_file_extension: str, ) -> t.List[ThreatExchangeDatasetSummary]: collaborations = ThreatExchangeConfig.get_all() hash_counts: t.Dict[str, t.Tuple[ int, str]] = _get_signal_hash_count_and_last_modified( threat_exchange_data_bucket_name, threat_exchange_data_folder, threat_exchange_pdq_file_extension, ) match_counts: t.Dict[str, int] = MatchByPrivacyGroupCounter.get_all_counts( table) return [ ThreatExchangeDatasetSummary( collab.privacy_group_id, collab.privacy_group_name, collab.description, collab.fetcher_active, collab.matcher_active, collab.write_back, collab.in_use, hash_count=t.cast( int, hash_counts.get( f"{threat_exchange_data_folder}{collab.privacy_group_id}{threat_exchange_pdq_file_extension}", [0, ""], )[0], ), match_count=match_counts.get(collab.privacy_group_id, 0), ) for collab in collaborations ]
def lambda_handler(event, context): lambda_init_once() config = FetcherConfig.get() collabs = ThreatExchangeConfig.get_all() now = datetime.now() current_time = now.strftime("%H:%M:%S") names = [collab.privacy_group_name for collab in collabs[:5]] if len(names) < len(collabs): names[-1] = "..." data = f"Triggered at time {current_time}, found {len(collabs)} collabs: {', '.join(names)}" logger.info(data) api_key = AWSSecrets().te_api_key() api = ThreatExchangeAPI(api_key) for collab in collabs: logger.info( "Processing updates for collaboration %s", collab.privacy_group_name ) if not is_int(collab.privacy_group_id): logger.info( f"Fetch skipped because privacy_group_id({collab.privacy_group_id}) is not an int" ) continue indicator_store = ThreatUpdateS3Store( int(collab.privacy_group_id), api.app_id, s3_client=get_s3_client(), s3_bucket_name=config.s3_bucket, s3_te_data_folder=config.s3_te_data_folder, data_store_table=config.data_store_table, supported_signal_types=[VideoMD5Signal, PdqSignal], ) indicator_store.load_checkpoint() if indicator_store.stale: logger.warning( "Store for %s - %d stale! Resetting.", collab.privacy_group_name, int(collab.privacy_group_id), ) indicator_store.reset() if indicator_store.fetch_checkpoint >= now.timestamp(): continue delta = indicator_store.next_delta try: delta.incremental_sync_from_threatexchange( api, ) except: # Don't need to call .exception() here because we're just re-raising logger.error("Exception occurred! Attempting to save...") # Force delta to show finished delta.end = delta.current raise finally: if delta: logging.info("Fetch complete, applying %d updates", len(delta.updates)) indicator_store.apply_updates( delta, post_apply_fn=indicator_store.post_apply ) else: logging.error("Failed before fetching any records")
def update_privacy_groups_in_use(priavcy_group_id_in_use: set) -> None: collabs = ThreatExchangeConfig.get_all() for collab in collabs: if str(collab.privacy_group_id) not in priavcy_group_id_in_use: collab.in_use = False hmaconfig.update_config(collab)
def lambda_handler(_event, _context): """ Run through threatexchange privacy groups and fetch updates to them. If this is the first time for a privacy group, will fetch from the start, else only updates since the last time. Note: since this is a scheduled job, we swallow all exceptions. We only log exceptions and move on. """ lambda_init_once() config = FetcherConfig.get() collabs = ThreatExchangeConfig.get_all() now = datetime.now() current_time = now.strftime("%H:%M:%S") names = [collab.privacy_group_name for collab in collabs[:5]] if len(names) < len(collabs): names[-1] = "..." data = f"Triggered at time {current_time}, found {len(collabs)} collabs: {', '.join(names)}" logger.info(data) api_token = AWSSecrets().te_api_token() api = ThreatExchangeAPI(api_token) for collab in collabs: logger.info( "Processing updates for collaboration %s", collab.privacy_group_name ) if not is_int(collab.privacy_group_id): logger.info( f"Fetch skipped because privacy_group_id({collab.privacy_group_id}) is not an int" ) continue if not collab.fetcher_active: logger.info( f"Fetch skipped because configs has `fetcher_active` set to false for privacy_group_id({collab.privacy_group_id})" ) continue indicator_store = ThreatUpdateS3Store( int(collab.privacy_group_id), api.app_id, s3_client=get_s3_client(), s3_bucket_name=config.s3_bucket, s3_te_data_folder=config.s3_te_data_folder, data_store_table=config.data_store_table, supported_signal_types=[VideoMD5Signal, PdqSignal], ) try: indicator_store.load_checkpoint() if indicator_store.stale: logger.warning( "Store for %s - %d stale! Resetting.", collab.privacy_group_name, int(collab.privacy_group_id), ) indicator_store.reset() if indicator_store.fetch_checkpoint >= now.timestamp(): continue delta = indicator_store.next_delta delta.incremental_sync_from_threatexchange( api, limit=MAX_DESCRIPTORS_UPDATED, progress_fn=ProgressLogger() ) except Exception: # pylint: disable=broad-except logger.exception( "Encountered exception while getting updates. Will attempt saving.." ) # Force delta to show finished delta.end = delta.current finally: if delta: logging.info("Fetch complete, applying %d updates", len(delta.updates)) indicator_store.apply_updates( delta, post_apply_fn=indicator_store.post_apply ) else: logging.error("Failed before fetching any records")