Example #1
0
    def init_argparse(cls, settings: CLISettings, ap: argparse.ArgumentParser) -> None:

        signal_types = [
            s for s in settings.get_all_signal_types() if issubclass(s, FileHasher)
        ]

        content_choices = sorted(s.get_name() for s in settings.get_all_content_types())
        signal_choices = sorted(
            s.get_name() for s in signal_types if issubclass(s, FileHasher)
        )
        ap.add_argument(
            "content_type",
            **common.argparse_choices_pre_type_kwargs(
                choices=content_choices,
                type=settings.get_content_type,
            ),
            help="what kind of content to hash",
        )

        ap.add_argument(
            "files",
            nargs=argparse.REMAINDER,
            action=FlexFilesInputAction,
            help="list of files, URLs, - for stdin, or -- to interpret remainder as a string",
        )

        ap.add_argument(
            "--signal-type",
            "-S",
            **common.argparse_choices_pre_type_kwargs(
                choices=signal_choices,
                type=settings.get_signal_type,
            ),
            help="only generate these signal types",
        )
Example #2
0
 def execute_remove(self, settings: CLISettings) -> None:
     if not self.module:
         raise CommandError("Which module you are remove is required", 2)
     config = settings.get_persistent_config()
     if self.module not in config.extensions:
         raise CommandError(f"You haven't added {self.module}", 2)
     config.extensions.remove(self.module)
     settings.set_persistent_config(config)
Example #3
0
    def init_argparse(cls, settings: CLISettings,
                      ap: argparse.ArgumentParser) -> None:

        ap.add_argument(
            "content_type",
            **common.argparse_choices_pre_type_kwargs(
                [c.get_name() for c in settings.get_all_content_types()],
                settings.get_content_type,
            ),
            help="what kind of content to match",
        )

        ap.add_argument(
            "--only-signal",
            "-S",
            **common.argparse_choices_pre_type_kwargs(
                [s.get_name() for s in settings.get_all_signal_types()],
                settings.get_signal_type,
            ),
            help="limit to this signal type",
        )

        ap.add_argument(
            "--hashes",
            "-H",
            action="store_true",
            help=
            ("force input to be interpreted as signals for the given signal type"
             ),
        )

        ap.add_argument(
            "files",
            nargs=argparse.REMAINDER,
            action=FlexFilesInputAction,
            help="list of files or -- to interpret remainder as a string",
        )

        ap.add_argument(
            "--show-false-positives",
            action="store_true",
            help="show matches even if you've marked them false_positive",
        )

        ap.add_argument(
            "--hide-disputed",
            action="store_true",
            help="hide matches if someone has disputed them",
        )
Example #4
0
def _get_settings(config: CLiConfig,
                  dir: pathlib.Path) -> t.Tuple[CLISettings, _ExtendedTypes]:
    """
    Configure the behavior and functionality.
    """

    extensions = _get_extended_functionality(config)

    signals = meta.SignalTypeMapping(
        [
            photo.PhotoContent, video.VideoContent, url.URLContent,
            text.TextContent
        ] + extensions.content_types,
        list(_DEFAULT_SIGNAL_TYPES) + extensions.signal_types,
    )
    base_apis: t.List[SignalExchangeAPI] = [
        StaticSampleSignalExchangeAPI(),
        LocalFileSignalExchangeAPI(),
        StopNCIISignalExchangeAPI(*_get_stopncii_tokens(config)),
        NCMECSignalExchangeAPI(*_get_ncmec_credentials(config)),
        FBThreatExchangeSignalExchangeAPI(_get_fb_tx_app_token(config)),
    ]
    fetchers = meta.FetcherMapping(base_apis + extensions.api_instances)
    state = CliState(list(fetchers.fetchers_by_name.values()), dir=dir)

    return (
        CLISettings(meta.FunctionalityMapping(signals, fetchers, state),
                    state),
        extensions,
    )
Example #5
0
    def execute(self, settings: CLISettings) -> None:
        existing = settings.get_collab(self.collab_name)

        if existing:
            if self.create:
                raise CommandError(
                    f'there\'s an existing collaboration named "{self.collab_name}"',
                    2)
            if existing.api != self._API_CLS.get_name():
                raise CommandError(
                    f"the existing collab is for the {existing.api} api, delete that one first",
                    2,
                )
            assert (existing.__class__ == self._API_CLS.get_config_class()
                    ), "api name the same, but class different?"
            for name, val in self.edit_kwargs.items():
                setattr(existing, name, val)
            settings._state.update_collab(existing)
        elif self.create:
            logging.debug("Creating config with args: %s", self.edit_kwargs)
            to_create = self._API_CLS.get_config_class()(**self.edit_kwargs)
            settings._state.update_collab(to_create)
        else:
            raise CommandError("no such config! Did you mean to use --create?",
                               2)
Example #6
0
 def get_signal_types(self,
                      settings: CLISettings) -> t.Set[t.Type[SignalType]]:
     signal_types = self.only_signals or settings.get_all_signal_types()
     if self.only_content:
         signal_types = [
             s for s in signal_types
             if any(c in self.only_content for c in s.get_content_types())
         ]
     return set(signal_types)
Example #7
0
    def execute(self, settings: CLISettings) -> None:
        if not settings.index.list():
            if not settings.in_demo_mode:
                raise CommandError(
                    "No indices available. Do you need to fetch?")
            self.stderr(
                "You haven't built any indices, so we'll call `fetch` for you!"
            )
            FetchCommand().execute(settings)

        signal_types = settings.get_signal_types_for_content(self.content_type)

        if self.only_signal:
            signal_types = [self.only_signal]
        types: t.Tuple[type, ...] = (FileHasher, MatchesStr)
        if self.as_hashes:
            types = (BytesHasher, TextHasher, FileHasher)
        signal_types = [s for s in signal_types if issubclass(s, types)]
        if self.as_hashes and len(signal_types) > 1:
            raise CommandError(
                "Too many SignalTypes for --as-hashes. Use also --only-signal",
                2)

        logging.info(
            "Signal types that apply: %s",
            ", ".join(s.get_name() for s in signal_types) or "None!",
        )

        indices: t.List[t.Tuple[t.Type[SignalType], SignalTypeIndex]] = []
        for s_type in signal_types:
            index = settings.index.load(s_type)
            if index is None:
                logging.info("No index for %s, skipping", s_type.get_name())
                continue
            indices.append((s_type, index))

        if not indices:
            self.stderr("No data to match against")
            return

        for path in self.files:
            for s_type, index in indices:
                seen = set()  # TODO - maybe take the highest certainty?
                results = []
                if self.as_hashes:
                    results = _match_hashes(path, s_type, index)
                else:
                    results = _match_file(path, s_type, index)

                for r in results:
                    metadatas: t.List[t.Tuple[
                        str, FetchedSignalMetadata]] = r.metadata
                    for collab, fetched_data in metadatas:
                        if collab in seen:
                            continue
                        seen.add(collab)
                        print(s_type.get_name(), f"- ({collab})", fetched_data)
Example #8
0
 def execute_list(self, settings: CLISettings) -> None:
     if self.module:
         manifest = self.get_manifest(self.module)
         self.print_extension(manifest)
         return
     for module_name in sorted(settings.get_persistent_config().extensions):
         print(module_name)
         manifest = self.get_manifest(module_name)
         self.print_extension(manifest, indent=2)
Example #9
0
 def execute_import(self, settings: CLISettings,
                    privacy_group_id: int) -> None:
     api = self.get_te_api(settings)
     pg = api.get_privacy_group(privacy_group_id)
     if settings.get_collab(pg.name) is not None:
         raise CommandError(
             f"A collaboration already exists with the name {pg.name}", 2)
     settings._state.update_collab(
         FBThreatExchangeCollabConfig(name=pg.name,
                                      privacy_group=privacy_group_id))
Example #10
0
    def execute_for_collab(
        self,
        settings: CLISettings,
        fetcher: SignalExchangeAPI,
        collab: CollaborationConfigBase,
    ) -> bool:

        store = settings.fetched_state.get_for_api(fetcher.__class__)
        checkpoint = self._verify_store_and_checkpoint(store, collab)

        self.progress_fetched_count = 0
        self.current_collab = collab.name
        self.current_api = fetcher.get_name()
        completed = False

        try:
            it = fetcher.fetch_iter(settings.get_all_signal_types(), collab,
                                    checkpoint)
            delta: FetchDeltaTyped
            for delta in it:
                logging.info("fetch() with %d new records", len(delta.updates))
                next_checkpoint = delta.checkpoint
                self._fetch_progress(len(delta.updates), next_checkpoint)
                assert next_checkpoint is not None  # Infinite loop protection
                store.merge(collab, delta)
                if self.has_hit_limits():
                    self.stderr("Hit limits, checkpointing")
                    break
                if self.should_checkpoint():
                    self._print_progress(checkpoint=True)
                    store.flush()
                    self.last_checkpoint_time = time.time()
            completed = True
        except Exception:
            self._stderr_current("failed to fetch!")
            logging.exception("Failed to fetch %s", collab.name)
            return False
        except KeyboardInterrupt:
            self._stderr_current("Interrupted, writing a checkpoint...")
            raise
        finally:
            store.flush()

        self._print_progress(done=completed)
        return True
Example #11
0
    def execute(self, settings: CLISettings) -> None:
        hashers = [
            s
            for s in settings.get_signal_types_for_content(self.content_type)
            if issubclass(s, FileHasher)
        ]
        if self.signal_type is not None:
            if self.signal_type not in hashers:
                raise CommandError.user(
                    f"{self.signal_type.get_name()} "
                    f"does not apply to {self.content_type.get_name()}"
                )
            hashers = [self.signal_type]  # type: ignore  # can't detect intersection types

        for file in self.files:
            for hasher in hashers:
                hash_str = hasher.hash_from_file(file)
                if hash_str:
                    print(hasher.get_name(), hash_str)
Example #12
0
    def execute(self, settings: CLISettings) -> None:
        # Verify collab arguments
        self.collabs = settings.get_all_collabs()
        if self.only_collab:
            self.collabs = [
                c for c in self.collabs if c.name == self.only_collab
            ]
            if not self.collabs:
                raise command_base.CommandError(
                    f"No such collab '{self.only_collab}'", 2)
        if all(not c.enabled for c in self.collabs):
            self.stderr("All collabs are disabled. Nothing to do.")
            return

        # Do work
        if self.clear:
            self.stderr("Clearing fetched state")
            for api in settings.apis:
                store = settings.fetched_state.get_for_api(api.__class__)
                for collab in self.collabs:
                    if self.only_collab not in (None, collab.name):
                        continue
                    logging.info("Clearing %s - %s", api.get_name(),
                                 collab.name)
                    store.clear(collab)
            return

        all_succeeded = True
        any_succeded = False

        for api in settings.apis:
            logging.info("Fetching all %s's configs", api.get_name())
            succeeded = self.execute_for_fetcher(settings, api)
            all_succeeded &= succeeded
            any_succeded |= succeeded

        if any_succeded and not self.skip_index_rebuild:
            self.stderr("Rebuilding match indices...")
            DatasetCommand().execute_generate_indices(settings)

        if not all_succeeded:
            raise command_base.CommandError("Some collabs had errors!", 3)
Example #13
0
    def execute_add(self, settings: CLISettings) -> None:
        if not self.module:
            raise CommandError("module is required", 2)
        if self.module in settings.get_persistent_config().extensions:
            self.execute_list(settings)
            return

        manifest = self.get_manifest(self.module)

        # Validate our new setups by pretending to create a new mapping with the new classes
        content_and_settings = tx_meta.SignalTypeMapping(
            list(
                itertools.chain(settings.get_all_content_types(),
                                manifest.content_types)),
            list(
                itertools.chain(settings.get_all_signal_types(),
                                manifest.signal_types)),
        )

        # For APIs, we also need to make sure they can be instanciated without args for the CLI
        apis = []
        for new_api in manifest.apis:
            try:
                instance = new_api()
            except Exception as e:
                logging.exception(
                    f"Failed to instanciante API {new_api.get_name()}")
                raise CommandError(
                    f"Not able to instanciate API {new_api.get_name()} - throws {e}"
                )
            apis.append(instance)
        apis.extend(settings.apis.get_all())
        tx_meta.FetcherMapping(apis)

        self.execute_list(settings)

        config = settings.get_persistent_config()
        config.extensions.add(self.module)
        settings.set_persistent_config(config)
Example #14
0
 def execute_list(self, settings: CLISettings) -> None:
     signals = settings.get_all_signal_types()
     for name, class_name in sorted(
         (st.get_name(), _fully_qualified_name(st)) for st in signals):
         print(name, class_name)
Example #15
0
    def init_argparse(cls, settings: CLISettings, ap: ArgumentParser) -> None:
        label_with = ap.add_mutually_exclusive_group()
        label_with.add_argument(
            "--labels",
            "-l",
            type=lambda s: set(s.strip().split(",")),
            metavar="CSV",
            default=set(),
            help="labels to apply to item",
        )

        label_with.add_argument(
            "--seen",
            action="store_true",
            help="mark you've seen this item",
        )

        label_with.add_argument(
            "--false-positive",
            action="store_true",
            help="mark that this doesn't belong in the collaboration",
        )

        label_with.add_argument(
            "--true-positive",
            action="store_true",
            help="mark that this does belong in the collaboration",
        )

        signal_group = ap.add_mutually_exclusive_group()

        signal_group.add_argument(
            "--only-signals",
            "-S",
            nargs="+",
            type=common.argparse_choices_pre_type(
                [s.get_name() for s in settings.get_all_signal_types()],
                settings.get_signal_type,
            ),
            default=[],
            help="limit to this signal type",
        )

        signal_group.add_argument(
            "--as-hash",
            "-H",
            metavar="SIGNAL_TYPE",
            type=common.argparse_choices_pre_type(
                [s.get_name() for s in settings.get_all_signal_types()],
                settings.get_signal_type,
            ),
            help="interpret input as a hash of this type",
        )

        ap.add_argument(
            "collab",
            type=lambda n: _collab_type(n, settings),
            help="The name of the collaboration",
        )

        ap.add_argument(
            "content_type",
            type=common.argparse_choices_pre_type(
                [c.get_name() for c in settings.get_all_content_types()],
                settings.get_content_type,
            ),
            help="the type of what you are labeling",
        )

        ap.add_argument(
            "files",
            nargs=argparse.REMAINDER,
            action=FlexFilesInputAction,
            help="list of files or -- to interpret remainder as a string",
        )
Example #16
0
 def execute(self, settings: CLISettings) -> None:
     if self.credentials is not None:
         config = settings.get_persistent_config()
         config.ncmec_credentials = self.credentials
         settings.set_persistent_config(config)
Example #17
0
 def execute_config(self, settings: CLISettings) -> None:
     if self.api_token is not None:
         config = settings.get_persistent_config()
         config.fb_threatexchange_api_token = self.api_token
         settings.set_persistent_config(config)
Example #18
0
 def execute(self, settings: CLISettings) -> None:
     for collab in settings.get_all_collabs(default_to_sample=False):
         api = settings.apis.get_for_collab(collab)
         print(api.get_name(), collab.name)
Example #19
0
 def get_collabs(self,
                 settings: CLISettings) -> t.List[CollaborationConfigBase]:
     collabs = [c for c in settings.get_all_collabs() if c.enabled]
     if self.only_collabs:
         collabs = [c for c in collabs if c.name in self.only_collabs]
     return collabs
Example #20
0
def _collab_type(name: str, settings: CLISettings) -> CollaborationConfigBase:
    ret = settings.get_collab(name)
    if ret is None:
        raise ArgumentTypeError(f"No such collab '{name}'!")
    return ret
Example #21
0
 def execute(self, settings: CLISettings) -> None:
     collab = settings.get_collab(self.collab_name)
     if collab is None:
         raise CommandError("No such collab", 2)
     settings._state.delete_collab(
         collab)  # TODO clean private member access
Example #22
0
    def init_argparse(cls, settings: CLISettings, ap: ArgumentParser) -> None:
        actions = ap.add_mutually_exclusive_group()
        actions.add_argument(
            "--rebuild-indices",
            "-r",
            action="store_true",
            help="rebuild indices from fetched data",
        )
        actions.add_argument(
            "--clear-indices",
            "-X",
            action="store_true",
            help="clear all indices",
        )
        actions.add_argument(
            "--signal-summary",
            action="store_true",
            help="print summary in terms of signals (default action)",
        )
        actions.add_argument(
            "--print-signals",
            "-P",
            action="store_true",
            help="print signals to screen",
        )

        type_selector = ap.add_mutually_exclusive_group()
        type_selector.add_argument(
            "--only-signals",
            "-s",
            nargs="+",
            default=[],
            type=common.argparse_choices_pre_type(
                choices=[
                    s.get_name() for s in settings.get_all_signal_types()
                ],
                type=settings.get_signal_type,
            ),
            metavar="SIGNAL_TYPE",
            help="only use signals of this type",
        )
        type_selector.add_argument(
            "--only-content",
            "-C",
            nargs="+",
            default=[],
            type=common.argparse_choices_pre_type(
                choices=[
                    s.get_name() for s in settings.get_all_content_types()
                ],
                type=settings.get_content_type,
            ),
            metavar="CONTENT_TYPE",
            help="only use signals for these content types",
        )
        ap.add_argument(
            "--print-zeroes",
            "-z",
            action="store_true",
            help="[--signal-summary] print counts of 0",
        )
        ap.add_argument(
            "--only-collabs",
            "-c",
            nargs="+",
            default=[],
            metavar="NAME",
            help="[-S|-P] only use items with this tag",
        )
        ap.add_argument(
            "--only-tags",
            "-t",
            nargs="+",
            default=[],
            metavar="STR",
            help="[-S|-P] only use items with these tags",
        )
        csv_mutual_group = ap.add_mutually_exclusive_group()
        csv_mutual_group.add_argument(
            "--signals-only",
            "-S",
            action="store_true",
            help="[-P] only print signals",
        )
        csv_mutual_group.add_argument(
            "--csv",
            action="store_true",
            help="[-P] print in csv format (including header)",
        )
Example #23
0
 def execute_list(self, settings: CLISettings) -> None:
     content_types = settings.get_all_content_types()
     for name, class_name in sorted(
         (c.get_name(), _fully_qualified_name(c)) for c in content_types):
         print(name, class_name)