def init_argparse(cls, settings: CLISettings, ap: argparse.ArgumentParser) -> None: signal_types = [ s for s in settings.get_all_signal_types() if issubclass(s, FileHasher) ] content_choices = sorted(s.get_name() for s in settings.get_all_content_types()) signal_choices = sorted( s.get_name() for s in signal_types if issubclass(s, FileHasher) ) ap.add_argument( "content_type", **common.argparse_choices_pre_type_kwargs( choices=content_choices, type=settings.get_content_type, ), help="what kind of content to hash", ) ap.add_argument( "files", nargs=argparse.REMAINDER, action=FlexFilesInputAction, help="list of files, URLs, - for stdin, or -- to interpret remainder as a string", ) ap.add_argument( "--signal-type", "-S", **common.argparse_choices_pre_type_kwargs( choices=signal_choices, type=settings.get_signal_type, ), help="only generate these signal types", )
def execute_remove(self, settings: CLISettings) -> None: if not self.module: raise CommandError("Which module you are remove is required", 2) config = settings.get_persistent_config() if self.module not in config.extensions: raise CommandError(f"You haven't added {self.module}", 2) config.extensions.remove(self.module) settings.set_persistent_config(config)
def init_argparse(cls, settings: CLISettings, ap: argparse.ArgumentParser) -> None: ap.add_argument( "content_type", **common.argparse_choices_pre_type_kwargs( [c.get_name() for c in settings.get_all_content_types()], settings.get_content_type, ), help="what kind of content to match", ) ap.add_argument( "--only-signal", "-S", **common.argparse_choices_pre_type_kwargs( [s.get_name() for s in settings.get_all_signal_types()], settings.get_signal_type, ), help="limit to this signal type", ) ap.add_argument( "--hashes", "-H", action="store_true", help= ("force input to be interpreted as signals for the given signal type" ), ) ap.add_argument( "files", nargs=argparse.REMAINDER, action=FlexFilesInputAction, help="list of files or -- to interpret remainder as a string", ) ap.add_argument( "--show-false-positives", action="store_true", help="show matches even if you've marked them false_positive", ) ap.add_argument( "--hide-disputed", action="store_true", help="hide matches if someone has disputed them", )
def _get_settings(config: CLiConfig, dir: pathlib.Path) -> t.Tuple[CLISettings, _ExtendedTypes]: """ Configure the behavior and functionality. """ extensions = _get_extended_functionality(config) signals = meta.SignalTypeMapping( [ photo.PhotoContent, video.VideoContent, url.URLContent, text.TextContent ] + extensions.content_types, list(_DEFAULT_SIGNAL_TYPES) + extensions.signal_types, ) base_apis: t.List[SignalExchangeAPI] = [ StaticSampleSignalExchangeAPI(), LocalFileSignalExchangeAPI(), StopNCIISignalExchangeAPI(*_get_stopncii_tokens(config)), NCMECSignalExchangeAPI(*_get_ncmec_credentials(config)), FBThreatExchangeSignalExchangeAPI(_get_fb_tx_app_token(config)), ] fetchers = meta.FetcherMapping(base_apis + extensions.api_instances) state = CliState(list(fetchers.fetchers_by_name.values()), dir=dir) return ( CLISettings(meta.FunctionalityMapping(signals, fetchers, state), state), extensions, )
def execute(self, settings: CLISettings) -> None: existing = settings.get_collab(self.collab_name) if existing: if self.create: raise CommandError( f'there\'s an existing collaboration named "{self.collab_name}"', 2) if existing.api != self._API_CLS.get_name(): raise CommandError( f"the existing collab is for the {existing.api} api, delete that one first", 2, ) assert (existing.__class__ == self._API_CLS.get_config_class() ), "api name the same, but class different?" for name, val in self.edit_kwargs.items(): setattr(existing, name, val) settings._state.update_collab(existing) elif self.create: logging.debug("Creating config with args: %s", self.edit_kwargs) to_create = self._API_CLS.get_config_class()(**self.edit_kwargs) settings._state.update_collab(to_create) else: raise CommandError("no such config! Did you mean to use --create?", 2)
def get_signal_types(self, settings: CLISettings) -> t.Set[t.Type[SignalType]]: signal_types = self.only_signals or settings.get_all_signal_types() if self.only_content: signal_types = [ s for s in signal_types if any(c in self.only_content for c in s.get_content_types()) ] return set(signal_types)
def execute(self, settings: CLISettings) -> None: if not settings.index.list(): if not settings.in_demo_mode: raise CommandError( "No indices available. Do you need to fetch?") self.stderr( "You haven't built any indices, so we'll call `fetch` for you!" ) FetchCommand().execute(settings) signal_types = settings.get_signal_types_for_content(self.content_type) if self.only_signal: signal_types = [self.only_signal] types: t.Tuple[type, ...] = (FileHasher, MatchesStr) if self.as_hashes: types = (BytesHasher, TextHasher, FileHasher) signal_types = [s for s in signal_types if issubclass(s, types)] if self.as_hashes and len(signal_types) > 1: raise CommandError( "Too many SignalTypes for --as-hashes. Use also --only-signal", 2) logging.info( "Signal types that apply: %s", ", ".join(s.get_name() for s in signal_types) or "None!", ) indices: t.List[t.Tuple[t.Type[SignalType], SignalTypeIndex]] = [] for s_type in signal_types: index = settings.index.load(s_type) if index is None: logging.info("No index for %s, skipping", s_type.get_name()) continue indices.append((s_type, index)) if not indices: self.stderr("No data to match against") return for path in self.files: for s_type, index in indices: seen = set() # TODO - maybe take the highest certainty? results = [] if self.as_hashes: results = _match_hashes(path, s_type, index) else: results = _match_file(path, s_type, index) for r in results: metadatas: t.List[t.Tuple[ str, FetchedSignalMetadata]] = r.metadata for collab, fetched_data in metadatas: if collab in seen: continue seen.add(collab) print(s_type.get_name(), f"- ({collab})", fetched_data)
def execute_list(self, settings: CLISettings) -> None: if self.module: manifest = self.get_manifest(self.module) self.print_extension(manifest) return for module_name in sorted(settings.get_persistent_config().extensions): print(module_name) manifest = self.get_manifest(module_name) self.print_extension(manifest, indent=2)
def execute_import(self, settings: CLISettings, privacy_group_id: int) -> None: api = self.get_te_api(settings) pg = api.get_privacy_group(privacy_group_id) if settings.get_collab(pg.name) is not None: raise CommandError( f"A collaboration already exists with the name {pg.name}", 2) settings._state.update_collab( FBThreatExchangeCollabConfig(name=pg.name, privacy_group=privacy_group_id))
def execute_for_collab( self, settings: CLISettings, fetcher: SignalExchangeAPI, collab: CollaborationConfigBase, ) -> bool: store = settings.fetched_state.get_for_api(fetcher.__class__) checkpoint = self._verify_store_and_checkpoint(store, collab) self.progress_fetched_count = 0 self.current_collab = collab.name self.current_api = fetcher.get_name() completed = False try: it = fetcher.fetch_iter(settings.get_all_signal_types(), collab, checkpoint) delta: FetchDeltaTyped for delta in it: logging.info("fetch() with %d new records", len(delta.updates)) next_checkpoint = delta.checkpoint self._fetch_progress(len(delta.updates), next_checkpoint) assert next_checkpoint is not None # Infinite loop protection store.merge(collab, delta) if self.has_hit_limits(): self.stderr("Hit limits, checkpointing") break if self.should_checkpoint(): self._print_progress(checkpoint=True) store.flush() self.last_checkpoint_time = time.time() completed = True except Exception: self._stderr_current("failed to fetch!") logging.exception("Failed to fetch %s", collab.name) return False except KeyboardInterrupt: self._stderr_current("Interrupted, writing a checkpoint...") raise finally: store.flush() self._print_progress(done=completed) return True
def execute(self, settings: CLISettings) -> None: hashers = [ s for s in settings.get_signal_types_for_content(self.content_type) if issubclass(s, FileHasher) ] if self.signal_type is not None: if self.signal_type not in hashers: raise CommandError.user( f"{self.signal_type.get_name()} " f"does not apply to {self.content_type.get_name()}" ) hashers = [self.signal_type] # type: ignore # can't detect intersection types for file in self.files: for hasher in hashers: hash_str = hasher.hash_from_file(file) if hash_str: print(hasher.get_name(), hash_str)
def execute(self, settings: CLISettings) -> None: # Verify collab arguments self.collabs = settings.get_all_collabs() if self.only_collab: self.collabs = [ c for c in self.collabs if c.name == self.only_collab ] if not self.collabs: raise command_base.CommandError( f"No such collab '{self.only_collab}'", 2) if all(not c.enabled for c in self.collabs): self.stderr("All collabs are disabled. Nothing to do.") return # Do work if self.clear: self.stderr("Clearing fetched state") for api in settings.apis: store = settings.fetched_state.get_for_api(api.__class__) for collab in self.collabs: if self.only_collab not in (None, collab.name): continue logging.info("Clearing %s - %s", api.get_name(), collab.name) store.clear(collab) return all_succeeded = True any_succeded = False for api in settings.apis: logging.info("Fetching all %s's configs", api.get_name()) succeeded = self.execute_for_fetcher(settings, api) all_succeeded &= succeeded any_succeded |= succeeded if any_succeded and not self.skip_index_rebuild: self.stderr("Rebuilding match indices...") DatasetCommand().execute_generate_indices(settings) if not all_succeeded: raise command_base.CommandError("Some collabs had errors!", 3)
def execute_add(self, settings: CLISettings) -> None: if not self.module: raise CommandError("module is required", 2) if self.module in settings.get_persistent_config().extensions: self.execute_list(settings) return manifest = self.get_manifest(self.module) # Validate our new setups by pretending to create a new mapping with the new classes content_and_settings = tx_meta.SignalTypeMapping( list( itertools.chain(settings.get_all_content_types(), manifest.content_types)), list( itertools.chain(settings.get_all_signal_types(), manifest.signal_types)), ) # For APIs, we also need to make sure they can be instanciated without args for the CLI apis = [] for new_api in manifest.apis: try: instance = new_api() except Exception as e: logging.exception( f"Failed to instanciante API {new_api.get_name()}") raise CommandError( f"Not able to instanciate API {new_api.get_name()} - throws {e}" ) apis.append(instance) apis.extend(settings.apis.get_all()) tx_meta.FetcherMapping(apis) self.execute_list(settings) config = settings.get_persistent_config() config.extensions.add(self.module) settings.set_persistent_config(config)
def execute_list(self, settings: CLISettings) -> None: signals = settings.get_all_signal_types() for name, class_name in sorted( (st.get_name(), _fully_qualified_name(st)) for st in signals): print(name, class_name)
def init_argparse(cls, settings: CLISettings, ap: ArgumentParser) -> None: label_with = ap.add_mutually_exclusive_group() label_with.add_argument( "--labels", "-l", type=lambda s: set(s.strip().split(",")), metavar="CSV", default=set(), help="labels to apply to item", ) label_with.add_argument( "--seen", action="store_true", help="mark you've seen this item", ) label_with.add_argument( "--false-positive", action="store_true", help="mark that this doesn't belong in the collaboration", ) label_with.add_argument( "--true-positive", action="store_true", help="mark that this does belong in the collaboration", ) signal_group = ap.add_mutually_exclusive_group() signal_group.add_argument( "--only-signals", "-S", nargs="+", type=common.argparse_choices_pre_type( [s.get_name() for s in settings.get_all_signal_types()], settings.get_signal_type, ), default=[], help="limit to this signal type", ) signal_group.add_argument( "--as-hash", "-H", metavar="SIGNAL_TYPE", type=common.argparse_choices_pre_type( [s.get_name() for s in settings.get_all_signal_types()], settings.get_signal_type, ), help="interpret input as a hash of this type", ) ap.add_argument( "collab", type=lambda n: _collab_type(n, settings), help="The name of the collaboration", ) ap.add_argument( "content_type", type=common.argparse_choices_pre_type( [c.get_name() for c in settings.get_all_content_types()], settings.get_content_type, ), help="the type of what you are labeling", ) ap.add_argument( "files", nargs=argparse.REMAINDER, action=FlexFilesInputAction, help="list of files or -- to interpret remainder as a string", )
def execute(self, settings: CLISettings) -> None: if self.credentials is not None: config = settings.get_persistent_config() config.ncmec_credentials = self.credentials settings.set_persistent_config(config)
def execute_config(self, settings: CLISettings) -> None: if self.api_token is not None: config = settings.get_persistent_config() config.fb_threatexchange_api_token = self.api_token settings.set_persistent_config(config)
def execute(self, settings: CLISettings) -> None: for collab in settings.get_all_collabs(default_to_sample=False): api = settings.apis.get_for_collab(collab) print(api.get_name(), collab.name)
def get_collabs(self, settings: CLISettings) -> t.List[CollaborationConfigBase]: collabs = [c for c in settings.get_all_collabs() if c.enabled] if self.only_collabs: collabs = [c for c in collabs if c.name in self.only_collabs] return collabs
def _collab_type(name: str, settings: CLISettings) -> CollaborationConfigBase: ret = settings.get_collab(name) if ret is None: raise ArgumentTypeError(f"No such collab '{name}'!") return ret
def execute(self, settings: CLISettings) -> None: collab = settings.get_collab(self.collab_name) if collab is None: raise CommandError("No such collab", 2) settings._state.delete_collab( collab) # TODO clean private member access
def init_argparse(cls, settings: CLISettings, ap: ArgumentParser) -> None: actions = ap.add_mutually_exclusive_group() actions.add_argument( "--rebuild-indices", "-r", action="store_true", help="rebuild indices from fetched data", ) actions.add_argument( "--clear-indices", "-X", action="store_true", help="clear all indices", ) actions.add_argument( "--signal-summary", action="store_true", help="print summary in terms of signals (default action)", ) actions.add_argument( "--print-signals", "-P", action="store_true", help="print signals to screen", ) type_selector = ap.add_mutually_exclusive_group() type_selector.add_argument( "--only-signals", "-s", nargs="+", default=[], type=common.argparse_choices_pre_type( choices=[ s.get_name() for s in settings.get_all_signal_types() ], type=settings.get_signal_type, ), metavar="SIGNAL_TYPE", help="only use signals of this type", ) type_selector.add_argument( "--only-content", "-C", nargs="+", default=[], type=common.argparse_choices_pre_type( choices=[ s.get_name() for s in settings.get_all_content_types() ], type=settings.get_content_type, ), metavar="CONTENT_TYPE", help="only use signals for these content types", ) ap.add_argument( "--print-zeroes", "-z", action="store_true", help="[--signal-summary] print counts of 0", ) ap.add_argument( "--only-collabs", "-c", nargs="+", default=[], metavar="NAME", help="[-S|-P] only use items with this tag", ) ap.add_argument( "--only-tags", "-t", nargs="+", default=[], metavar="STR", help="[-S|-P] only use items with these tags", ) csv_mutual_group = ap.add_mutually_exclusive_group() csv_mutual_group.add_argument( "--signals-only", "-S", action="store_true", help="[-P] only print signals", ) csv_mutual_group.add_argument( "--csv", action="store_true", help="[-P] print in csv format (including header)", )
def execute_list(self, settings: CLISettings) -> None: content_types = settings.get_all_content_types() for name, class_name in sorted( (c.get_name(), _fully_qualified_name(c)) for c in content_types): print(name, class_name)