def get_parser(self, prog_name): parser = super().get_parser(prog_name) parser.formatter_class = argparse.RawDescriptionHelpFormatter cache_file = CTU_Dataset.get_cache_file() parser.add_argument('--cache-file', action='store', dest='cache_file', default=cache_file, help=('Cache file path ' f'(default: ``{cache_file}``)')) parser.add_argument( '--ignore-cache', action='store_true', dest='ignore_cache', default=False, help="Ignore any cached results (default: ``False``)") attributes = ", ".join( [c.lower() for c in CTU_Dataset.get_index_columns(min=False)]) parser.add_argument( 'attribute', nargs='?', default='infection_date', choices=[ c.lower() for c in CTU_Dataset.get_index_columns(min=False) ], help='Attribute to quantify (default: ``infection_date``)') parser.epilog = textwrap.dedent(f"""\ Shows the selected dataset attribute and a count of unique instances in reverse order of occurance. :: $ lim ctu stats md5 | head +----------------------------------+-------+ | MD5 | Count | +----------------------------------+-------+ | e515267ba19417974a63b51e4f7dd9e9 | 10 | | - | 9 | | e1090d7126dd88d0d1d39b68ea3aae11 | 6 | | 05a00c320754934782ec5dec1d5c0476 | 6 | | 48616dd47e12e369feef53a57830158a | 5 | | 11bc606269a161555431bacf37f7c1e4 | 5 | | bf08e6b02e00d2bc6dd493e93e69872f | 4 | Possible attributes are those that come from the CTU index file (``{attributes}``). To see more detailed descriptions of the CTU datasets as a whole, use ``lim ctu overview`` to view the appropriate web page. """) # noqa return parser
def get_parser(self, prog_name): parser = super().get_parser(prog_name) parser.formatter_class = argparse.RawDescriptionHelpFormatter cache_file = CTU_Dataset.get_cache_file() parser.add_argument( '--cache-file', action='store', dest='cache_file', default=cache_file, help=('Cache file path for CTU metadata ' '(Env: ``LIM_CTU_CACHE``; ' f'default: ``{cache_file}``)') ) parser.add_argument( '--ignore-cache', action='store_true', dest='ignore_cache', default=False, help="Ignore any cached results (default: ``False``)" ) parser.add_argument( 'scenario', nargs='?', type=normalize_ctu_name, default=None) parser.epilog = textwrap.dedent("""\ Shows details about an individual scenario in tabular form. See ``lim ctu list --help`` for more on the ``scenario`` argument. :: $ lim ctu show iot-3-1 +----------------+----------------------------------------------------------------------------------+ | Field | Value | +----------------+----------------------------------------------------------------------------------+ | Infection_Date | 2018-05-19 | | Capture_Name | CTU-IoT-Malware-Capture-3-1 | | Malware | Muhstik | | MD5 | b8849fe97e39ae3afd6def618568bb09 | | SHA256 | 5ce13670bc875e913e6f087a4ac0a9e343347d5babb3b5c63e1d1b199371f69a | | Capture_URL | https://mcfp.felk.cvut.cz/publicDatasets/IoTDatasets/CTU-IoT-Malware-Capture-3-1 | | ZIP | fce7b8bbd1c1fba1d75b9dc1a60b25f49f68c9ec16b3656b52ed28290fc93c72.zip | | LABELED | None | | BINETFLOW | 2018-05-21_capture.binetflow | | PCAP | 2018-05-21_capture.pcap | | WEBLOGNG | 2018-05-21_capture.weblogng | +----------------+----------------------------------------------------------------------------------+ """) # noqa return parser
def get_parser(self, prog_name): parser = super().get_parser(prog_name) parser.formatter_class = argparse.RawDescriptionHelpFormatter parser = add_browser_options(parser) cache_file = CTU_Dataset.get_cache_file() parser.add_argument( '--cache-file', action='store', dest='cache_file', default=cache_file, help=('Cache file path for CTU metadata ' '(Env: ``LIM_CTU_CACHE``; ' f'default: ``{cache_file}``)') ) parser.add_argument( '--ignore-cache', action='store_true', dest='ignore_cache', default=False, help="Ignore any cached results (default: ``False``)" ) parser.add_argument( 'scenario', nargs='*', type=normalize_ctu_name, default=None) parser.epilog = textwrap.dedent("""\ Opens a browser for the web page containing the scenario descriptions and data links. Arguments are scenario names using either the full name form (e.g., ``CTU-Malware-Capture-Botnet-123-1``) or an abbreviated form (e.g., ``Botnet-123-1``). The URL to use is the one seen in the ``SCENARIO_URL`` column of the output of the ``lim ctu list`` command. To see help information about how the browser option works and how you can configure it, see ``lim about --help``. """) return parser
def get_parser(self, prog_name): parser = super().get_parser(prog_name) parser.formatter_class = argparse.RawDescriptionHelpFormatter parser.add_argument( '--force', action='store_true', dest='force', default=False, help="Force over-writing files if they exist (default: ``False``)") parser.add_argument('--no-subdir', action='store_true', dest='no_subdir', default=False, help=('Do not maintain scenario name subdirectory ' '(default: ``False``)')) _default_protocols = ",".join(DEFAULT_PROTOCOLS) parser.add_argument('-P', '--protocols', metavar='<protocol-list>', dest='protocols', type=lambda s: [i for i in s.split(',')], default=_default_protocols, help=("Protocols to include, or 'any' " f'(default: ``{_default_protocols}``)')) parser.add_argument( '-L', '--maxlines', metavar='<lines>', dest='maxlines', default=None, help="Maximum number of lines to get (default: ``None``)") cache_file = CTU_Dataset.get_cache_file() parser.add_argument('--cache-file', action='store', dest='cache_file', default=cache_file, help=('Cache file path for CTU metadata ' '(Env: ``LIM_CTU_CACHE``; ' f'default: ``{cache_file}``)')) parser.add_argument( '--ignore-cache', action='store_true', dest='ignore_cache', default=False, help="Ignore any cached results (default: ``False``)") parser.add_argument('scenario', nargs=1, type=normalize_ctu_name, default=None) data_types = str(", ".join( [f'{i.lower()}' for i in CTU_Dataset.get_data_columns()])) parser.add_argument( 'data', nargs='+', type=str.lower, choices=[ c.lower() for c in CTU_Dataset.get_data_columns() + ['all'] ], default=None) parser.epilog = textwrap.dedent(f"""\ Get one or more data components from a scenario. These components are the raw PCAP file, Netflow file, and other analytic products from intrusion detection system processing, etc. See ``lim ctu list --help`` for more on the ``scenario`` argument. For the ``data`` argument, you can use ``all`` to recursively download all scenario data, or one or more of the data files by type: ``{data_types}`` By default, or when using the ``all`` attribute identifier, the file(s) are placed in a subdirectory with the full name of the scenario to better organize data across multiple scenarios. You can override this when getting specific files (i.e., not using ``all``) with the ``--no-subdir`` option. \n""") + CTU_Dataset.get_disclaimer() # noqa return parser
def get_parser(self, prog_name): parser = super().get_parser(prog_name) parser.formatter_class = argparse.RawDescriptionHelpFormatter cache_file = CTU_Dataset.get_cache_file() parser.add_argument('--cache-file', action='store', dest='cache_file', default=cache_file, help=('Cache file path for CTU metadata ' '(Env: ``LIM_CTU_CACHE``; ' f'default: ``{cache_file}``)')) parser.add_argument( '--ignore-cache', action='store_true', dest='ignore_cache', default=False, help="Ignore any cached results (default: ``False``)") parser.add_argument('--date-starting', dest='date_starting', metavar='<YYYY-MM-DD>', default='1970-01-01', help=('List scenarios starting from this date ' "(default: '1970-01-01')")) TODAY_DATE = arrow.now().format('YYYY-MM-DD') parser.add_argument('--date-ending', dest='date_ending', metavar='<YYYY-MM-DD>', default=TODAY_DATE, help=('List scenarios up to this date ' f"(default: '{TODAY_DATE}')")) parser.add_argument('--fullnames', action='store_true', dest='fullnames', default=False, help=("Show full names")) parser.add_argument('-a', '--everything', action='store_true', dest='everything', default=False, help=("Show all metadata columns " "(default : False)")) parser.add_argument('--hash', dest='hash', metavar='<{md5_hash|sha256_hash}>', default=None, help=('Only list scenarios that involve a ' 'specific hash (default: ``None``)')) parser.add_argument( '--malware-includes', dest='malware_includes', metavar='<string>', default=None, help=('Only list scenarios including this string' "in the 'Malware' column (default: ``None``)")) parser.add_argument( '--name-includes', dest='name_includes', metavar='<string>', default=None, help=('Only list scenario including this string' "in the 'Capture_Name' column (default: ``None``)")) parser.add_argument('--description-includes', dest='description_includes', metavar='<string>', default=None, help=('Only list scenarios including this string' 'in the description (default: ``None``)')) parser.add_argument('scenario', nargs='*', type=normalize_ctu_name, default=None) all_columns = ", ".join( [f'{i.lower()}' for i in CTU_Dataset.get_all_columns()]) default_columns = ", ".join( [f"{i.lower()}" for i in CTU_Dataset.get_index_columns()]) parser.epilog = textwrap.dedent(f"""\ List scenarios (a.k.a., "captures") and related metadata. By default, all scenarios are listed. You can limit the output by filtering on several attributes (e.g., by ``Capture_Name`` field, by date range, contents of the malware name or web page description, etc.) You can also limit the number of items shown if necessary when the number of results is large. The ``scenario`` argument equates to the field ``Capture_Name`` in the index. This can be the scenario's full name (e.g., ``CTU-IoT-Malware-Capture-34-1``) or an abbreviated form of the name (e.g., ``IoT-34-1`` or just ``34-1``). :: $ lim ctu list IoT-34-1 Botnet-42 +----------------+-------------------------------+---------+ | Infection_Date | Capture_Name | Malware | +----------------+-------------------------------+---------+ | 2011-08-10 | CTU-Malware-Capture-Botnet-42 | Neeris | | 2018-12-21 | CTU-IoT-Malware-Capture-34-1 | Mirai | +----------------+-------------------------------+---------+ A larger number of attributes are available. You can get all of them using the ``-a`` (``--everything``) flag. The subset of columns shown by default is: ``{default_columns}`` Valid column labels for options ``-c``, ``--column``, ``--sort-column``, or to be shown with ``-a``, include: ``{all_columns}`` Using ``lim ctu list -a`` produces very wide output. Even if many fields are ``None`` and ``--fit-width`` is included, it is still unwieldy for just one scenario as you can see here. Consider using ``lim ctu show`` instead. :: $ lim ctu list --name-includes IoT --malware-includes muhstik --fit-width -a +----------------+------------------------+---------+------------------------+------------------------+------------------------+------------------------+---------+------------------------+------------------------+-----------------------------+ | Infection_Date | Capture_Name | Malware | MD5 | SHA256 | Capture_URL | ZIP | LABELED | BINETFLOW | PCAP | WEBLOGNG | +----------------+------------------------+---------+------------------------+------------------------+------------------------+------------------------+---------+------------------------+------------------------+-----------------------------+ | 2018-05-19 | CTU-IoT-Malware- | Muhstik | b8849fe97e39ae3afd6def | 5ce13670bc875e913e6f08 | https://mcfp.felk.cvut | fce7b8bbd1c1fba1d75b9d | None | 2018-05-21_capture.bin | 2018-05-21_capture.pca | 2018-05-21_capture.weblogng | | | Capture-3-1 | | 618568bb09 | 7a4ac0a9e343347d5babb3 | .cz/publicDatasets/IoT | c1a60b25f49f68c9ec16b3 | | etflow | p | | | | | | | b5c63e1d1b199371f69a | Datasets/CTU-IoT- | 656b52ed28290fc93c72.z | | | | | | | | | | | Malware-Capture-3-1 | ip | | | | | +----------------+------------------------+---------+------------------------+------------------------+------------------------+------------------------+---------+------------------------+------------------------+-----------------------------+ There are also a number of filters that can be applied, including MD5 and SHA256 hash, substrings in the ``Capture_Name`` or ``Malware`` fields, start and end dates, or description of the scenario in its web page. The ``--hash`` option makes an exact match on any of the stored hash values. This is the hash of the executable binary referenced in the ``ZIP`` column. This example uses the most frequently occuring MD5 hash as seen in ``lim ctu stats --help``:: $ lim ctu list --hash e515267ba19417974a63b51e4f7dd9e9 +----------------+----------------------------------+---------+ | Infection_Date | Capture_Name | Malware | +----------------+----------------------------------+---------+ | 2015-03-04 | CTU-Malware-Capture-Botnet-110-1 | HTBot | | 2015-03-04 | CTU-Malware-Capture-Botnet-110-2 | HTBot | | 2015-03-09 | CTU-Malware-Capture-Botnet-110-3 | HTBot | | 2015-03-09 | CTU-Malware-Capture-Botnet-111-2 | HTBot | | 2015-04-09 | CTU-Malware-Capture-Botnet-110-4 | HTBot | | 2015-04-09 | CTU-Malware-Capture-Botnet-111-3 | HTBot | | 2015-04-22 | CTU-Malware-Capture-Botnet-110-5 | HTBot | | 2015-04-22 | CTU-Malware-Capture-Botnet-111-4 | HTBot | | 2015-04-23 | CTU-Malware-Capture-Botnet-110-6 | HTBot | | 2015-06-09 | CTU-Malware-Capture-Botnet-111-5 | HTBot | +----------------+----------------------------------+---------+ The ``--malware-includes`` option is rather simplistic, matching any occurance of the substring (case insensitive) in the ``Malware`` field. The same applies for the ``--name-includes`` option with respect to the ``Capture_Name`` field. For more accurate matching, you may want to use something like the ``-f csv`` option and match on regular expressions using one of the ``grep`` variants. Or add regular expression handling and submit a pull request! ;) \n""") + CTU_Dataset.get_disclaimer() # noqa return parser