def delete_dataset(key=None) -> DeleteDatasetResponse: """ Delete dataset """ config = ThreatExchangeConfig.getx(str(key)) hmaconfig.delete_config(config) return DeleteDatasetResponse(response="The privacy group is deleted")
def create_privacy_group_if_not_exists( privacy_group_id: str, privacy_group_name: str, description: str = "", in_use: bool = True, fetcher_active: bool = FETCHER_ACTIVE_DEFAULT, matcher_active: bool = MATCHER_ACTIVE_DEFAULT, write_back: bool = WRITE_BACK_DEFAULT, ): logger.info("Adding collaboration name %s", privacy_group_name) config = ThreatExchangeConfig( privacy_group_id, fetcher_active=fetcher_active, privacy_group_name=privacy_group_name, in_use=in_use, description=description, matcher_active=matcher_active, write_back=write_back, ) try: hmaconfig.create_config(config) except ClientError as e: if e.response["Error"]["Code"] == "ConditionalCheckFailedException": logger.warning( "Can't insert duplicated config, %s", e.response["Error"]["Message"], ) if description: update_privacy_group_description(privacy_group_id, description) else: raise
def get_privacy_group_matcher_active(privacy_group_id: str, _) -> bool: config = ThreatExchangeConfig.get(privacy_group_id) if not config: logger.warning("Privacy group %s is not found!", privacy_group_id) return False logger.info("matcher_active for %s is %s", privacy_group_id, config.matcher_active) return config.matcher_active
def update_dataset(request: UpdateDatasetRequest) -> Dataset: """ Update dataset fetcher_active, write_back and matcher_active """ config = ThreatExchangeConfig.getx(str(request.privacy_group_id)) config.fetcher_active = request.fetcher_active config.write_back = request.write_back config.matcher_active = request.matcher_active updated_config = hmaconfig.update_config(config).__dict__ updated_config["privacy_group_id"] = updated_config["name"] return Dataset.from_dict(updated_config)
def _get_threat_exchange_datasets( table: Table, threat_exchange_data_bucket_name: str, threat_exchange_data_folder: str, threat_exchange_pdq_file_extension: str, ) -> t.List[ThreatExchangeDatasetSummary]: collaborations = ThreatExchangeConfig.get_all() hash_counts: t.Dict[ str, t.Tuple[int, str] ] = _get_signal_hash_count_and_last_modified( threat_exchange_data_bucket_name, threat_exchange_data_folder, threat_exchange_pdq_file_extension, ) match_counts: t.Dict[str, int] = MatchByPrivacyGroupCounter.get_all_counts(table) return [ ThreatExchangeDatasetSummary( collab.privacy_group_id, collab.privacy_group_name, collab.description, collab.fetcher_active, collab.matcher_active, collab.write_back, collab.in_use, hash_count=t.cast( int, hash_counts.get( f"{threat_exchange_data_folder}{collab.privacy_group_id}{threat_exchange_pdq_file_extension}", [0, ""], )[0], ), match_count=match_counts.get(collab.privacy_group_id, 0), ) for collab in collaborations ]
def update_privacy_groups_in_use(priavcy_group_id_in_use: set) -> None: collabs = ThreatExchangeConfig.get_all() for collab in collabs: if str(collab.privacy_group_id) not in priavcy_group_id_in_use: collab.in_use = False hmaconfig.update_config(collab)
def update_privacy_group_description(privacy_group_id: str, description: str) -> None: config = ThreatExchangeConfig.getx(privacy_group_id) config.description = description hmaconfig.update_config(config)
class WritebackerTestCase(unittest.TestCase): banked_signals = [ BankedSignal("2862392437204724", "pg 4", "te"), BankedSignal("4194946153908639", "pg 4", "te"), BankedSignal("3027465034605137", "pg 3", "te"), BankedSignal("evil.jpg", "bank 4", "non-te-source"), ] match_message = MatchMessage("key", "hash", banked_signals) # Writebacks are enabled for the trustworth privacy group not for # the untrustworthy one configs = [ ThreatExchangeConfig( "pg 4", True, "Trustworthy PG", "test description", True, True, True ), ThreatExchangeConfig( "pg 3", True, "UnTrustworthy PG", "test description", True, False, True ), ] for config in configs: hmaconfig.mock_create_config(config) def test_saw_this_too(self): os.environ["MOCK_TE_API"] = "True" os.environ["CONFIG_TABLE_NAME"] = "test-HMAConfig" writeback = WritebackTypes.SawThisToo writeback_message = WritebackMessage.from_match_message_and_type( self.match_message, writeback ) event = {"Records": [{"body": writeback_message.to_aws_json()}]} result = lambda_handler(event, None) assert result == { "writebacks_performed": { "te": [ "Reacted SAW_THIS_TOO to descriptor a2|2862392437204724\nReacted SAW_THIS_TOO to descriptor a3|2862392437204724", "Reacted SAW_THIS_TOO to descriptor a2|4194946153908639\nReacted SAW_THIS_TOO to descriptor a3|4194946153908639", "No writeback performed for banked content id 3027465034605137 becuase writebacks were disabled", ] } } os.environ["MOCK_TE_API"] = "False" def test_false_positive(self): os.environ["MOCK_TE_API"] = "True" os.environ["CONFIG_TABLE_NAME"] = "test-HMAConfig" writeback = WritebackTypes.FalsePositive writeback_message = WritebackMessage.from_match_message_and_type( self.match_message, writeback ) event = {"Records": [{"body": writeback_message.to_aws_json()}]} result = lambda_handler(event, None) assert result == { "writebacks_performed": { "te": [ "Reacted DISAGREE_WITH_TAGS to descriptor a2|2862392437204724\nReacted DISAGREE_WITH_TAGS to descriptor a3|2862392437204724", "Reacted DISAGREE_WITH_TAGS to descriptor a2|4194946153908639\nReacted DISAGREE_WITH_TAGS to descriptor a3|4194946153908639", "No writeback performed for banked content id 3027465034605137 becuase writebacks were disabled", ] } } os.environ["MOCK_TE_API"] = "False" def test_true_positve(self): os.environ["MOCK_TE_API"] = "True" os.environ["CONFIG_TABLE_NAME"] = "test-HMAConfig" writeback = WritebackTypes.TruePositive writeback_message = WritebackMessage.from_match_message_and_type( self.match_message, writeback ) event = {"Records": [{"body": writeback_message.to_aws_json()}]} result = lambda_handler(event, None) assert result == { "writebacks_performed": { "te": [ "Wrote back TruePositive for indicator 2862392437204724\nBuilt descriptor a1|2862392437204724 with privacy groups pg 4", "Wrote back TruePositive for indicator 4194946153908639\nBuilt descriptor a1|4194946153908639 with privacy groups pg 4", "No writeback performed for banked content id 3027465034605137 becuase writebacks were disabled", ] } } os.environ["MOCK_TE_API"] = "False" def test_remove_opinion(self): os.environ["MOCK_TE_API"] = "True" os.environ["CONFIG_TABLE_NAME"] = "test-HMAConfig" writeback = WritebackTypes.RemoveOpinion writeback_message = WritebackMessage.from_match_message_and_type( self.match_message, writeback ) event = {"Records": [{"body": writeback_message.to_aws_json()}]} result = lambda_handler(event, None) assert result == { "writebacks_performed": { "te": [ "\n".join( ( "Deleted decriptor a1|2862392437204724 for indicator 2862392437204724", "Removed reaction DISAGREE_WITH_TAGS from descriptor a2|2862392437204724", "Removed reaction DISAGREE_WITH_TAGS from descriptor a3|2862392437204724", ) ), "\n".join( ( "Deleted decriptor a1|4194946153908639 for indicator 4194946153908639", "Removed reaction DISAGREE_WITH_TAGS from descriptor a2|4194946153908639", "Removed reaction DISAGREE_WITH_TAGS from descriptor a3|4194946153908639", ) ), "No writeback performed for banked content id 3027465034605137 becuase writebacks were disabled", ] } } os.environ["MOCK_TE_API"] = "False"
def lambda_handler(event, context): lambda_init_once() config = FetcherConfig.get() collabs = ThreatExchangeConfig.get_all() now = datetime.now() current_time = now.strftime("%H:%M:%S") names = [collab.privacy_group_name for collab in collabs[:5]] if len(names) < len(collabs): names[-1] = "..." data = f"Triggered at time {current_time}, found {len(collabs)} collabs: {', '.join(names)}" logger.info(data) api_key = AWSSecrets().te_api_key() api = ThreatExchangeAPI(api_key) te_data_bucket = s3.Bucket(config.s3_bucket) stores = [] for collab in collabs: logger.info("Processing updates for collaboration %s", collab.privacy_group_name) if not is_int(collab.privacy_group_id): logger.info( f"Fetch skipped because privacy_group_id({collab.privacy_group_id}) is not an int" ) continue indicator_store = ThreatUpdateS3PDQStore( int(collab.privacy_group_id), api.app_id, te_data_bucket, config.s3_te_data_folder, config.data_store_table, ) stores.append(indicator_store) indicator_store.load_checkpoint() if indicator_store.stale: logger.warning( "Store for %s - %d stale! Resetting.", collab.privacy_group_name, int(collab.privacy_group_id), ) indicator_store.reset() if indicator_store.fetch_checkpoint >= now.timestamp(): continue delta = indicator_store.next_delta try: delta.incremental_sync_from_threatexchange(api, ) except: # Don't need to call .exception() here because we're just re-raising logger.error("Exception occurred! Attempting to save...") # Force delta to show finished delta.end = delta.current raise finally: if delta: logging.info("Fetch complete, applying %d updates", len(delta.updates)) indicator_store.apply_updates( delta, post_apply_fn=indicator_store.post_apply) else: logging.error("Failed before fetching any records")
def load_defaults(_args): """ Load a hardcoded set of defaults which are useful in testing """ # Could also put the default on the class, but seems too fancy configs = [ ThreatExchangeConfig( name="303636684709969", fetcher_active=True, privacy_group_name="Test Config 1", write_back=True, in_use=True, description="test description", matcher_active=True, ), ThreatExchangeConfig( name="258601789084078", fetcher_active=True, privacy_group_name="Test Config 2", write_back=True, in_use=True, description="test description", matcher_active=True, ), WebhookPostActionPerformer( name="EnqueueForReview", url="https://webhook.site/ff7ebc37-514a-439e-9a03-46f86989e195", headers='{"Connection":"keep-alive"}', # monitoring page: # https://webhook.site/#!/ff7ebc37-514a-439e-9a03-46f86989e195 ), WebhookPostActionPerformer( name="EnqueueMiniCastleForReview", url="https://webhook.site/01cef721-bdcc-4681-8430-679c75659867", headers='{"Connection":"keep-alive"}', # monitoring page: # https://webhook.site/#!/01cef721-bdcc-4681-8430-679c75659867 ), WebhookPostActionPerformer( name="EnqueueSailboatForReview", url="https://webhook.site/fa5c5ad5-f5cc-4692-bf03-a03a4ae3f714", headers='{"Connection":"keep-alive"}', # monitoring page: # https://webhook.site/#!/fa5c5ad5-f5cc-4692-bf03-a03a4ae3f714 ), ActionRule( name="Enqueue Mini-Castle for Review", action_label=ActionLabel("EnqueueMiniCastleForReview"), must_have_labels=set([ BankIDClassificationLabel("303636684709969"), ClassificationLabel("true_positive"), ]), must_not_have_labels=set( [BankedContentIDClassificationLabel("3364504410306721")]), ), ActionRule( name="Enqueue Sailboat for Review", action_label=ActionLabel("EnqueueSailboatForReview"), must_have_labels=set([ BankIDClassificationLabel("303636684709969"), ClassificationLabel("true_positive"), BankedContentIDClassificationLabel("3364504410306721"), ]), must_not_have_labels=set(), ), ] for config in configs: # Someday maybe can do filtering or something, I dunno # Add try catch block to avoid test failure try: hmaconfig.create_config(config) except ClientError as e: if e.response["Error"][ "Code"] == "ConditionalCheckFailedException": print( "Can't insert duplicated config, " + e.response["Error"]["Message"], ) else: raise print(config)