Esempio n. 1
0
class CTUShow(ShowOne):
    """Show scenario details."""

    log = logging.getLogger(__name__)

    def get_parser(self, prog_name):
        parser = super().get_parser(prog_name)
        parser.formatter_class = argparse.RawDescriptionHelpFormatter
        cache_file = CTU_Dataset.get_cache_file()
        parser.add_argument(
            '--cache-file',
            action='store',
            dest='cache_file',
            default=cache_file,
            help=('Cache file path for CTU metadata '
                  '(Env: ``LIM_CTU_CACHE``; '
                  f'default: ``{cache_file}``)')
        )
        parser.add_argument(
            '--ignore-cache',
            action='store_true',
            dest='ignore_cache',
            default=False,
            help="Ignore any cached results (default: ``False``)"
        )
        parser.add_argument(
            'scenario',
            nargs='?',
            type=normalize_ctu_name,
            default=None)
        parser.epilog = textwrap.dedent("""\
            Shows details about an individual scenario in tabular form.

            See ``lim ctu list --help`` for more on the ``scenario`` argument.

            ::

                $ lim ctu show iot-3-1
                +----------------+----------------------------------------------------------------------------------+
                | Field          | Value                                                                            |
                +----------------+----------------------------------------------------------------------------------+
                | Infection_Date | 2018-05-19                                                                       |
                | Capture_Name   | CTU-IoT-Malware-Capture-3-1                                                      |
                | Malware        | Muhstik                                                                          |
                | MD5            | b8849fe97e39ae3afd6def618568bb09                                                 |
                | SHA256         | 5ce13670bc875e913e6f087a4ac0a9e343347d5babb3b5c63e1d1b199371f69a                 |
                | Capture_URL    | https://mcfp.felk.cvut.cz/publicDatasets/IoTDatasets/CTU-IoT-Malware-Capture-3-1 |
                | ZIP            | fce7b8bbd1c1fba1d75b9dc1a60b25f49f68c9ec16b3656b52ed28290fc93c72.zip             |
                | LABELED        | None                                                                             |
                | BINETFLOW      | 2018-05-21_capture.binetflow                                                     |
                | PCAP           | 2018-05-21_capture.pcap                                                          |
                | WEBLOGNG       | 2018-05-21_capture.weblogng                                                      |
                +----------------+----------------------------------------------------------------------------------+

           """)  # noqa
        return parser

    def take_action(self, parsed_args):
        self.log.debug('[+] showing scenario details')
        if 'ctu_metadata' not in dir(self):
            self.ctu_metadata = CTU_Dataset(
                cache_file=parsed_args.cache_file,
                ignore_cache=parsed_args.ignore_cache,
                debug=self.app_args.debug)
        self.ctu_metadata.load_ctu_metadata()
        fullname = self.ctu_metadata.get_fullname(
            name=parsed_args.scenario)
        if not self.ctu_metadata.is_valid_scenario(fullname):
            sys.exit(1)
        columns = self.ctu_metadata.get_extended_columns()
        data = self.ctu_metadata.get_extended_data(fullname)
        return columns, data
Esempio n. 2
0
class Test_CTU_Dataset(unittest.TestCase):
    def setUp(self):
        self.ctu_dataset = CTU_Dataset(cache_file=TEST_CACHE)
        self.ctu_dataset.load_ctu_metadata()

    def tearDown(self):
        pass

    def test_cache_exists(self):
        self.assertTrue(os.path.exists(TEST_CACHE))

    def test_get_file_last_mtime_exists(self):
        self.assertNotEqual(
            get_file_last_mtime(file_path=TEST_CACHE), 0)

    def test_get_file_last_mtime_notexists(self):
        self.assertEqual(
            get_file_last_mtime(file_path=TEST_EMPTY_CACHE), 0)

    def test_get_file_last_mtime_nopath(self):
        self.assertRaises(RuntimeError,
                          get_file_last_mtime)

    def test_get_file_last_mtime_relative_path(self):
        self.assertRaises(RuntimeError,
                          get_file_last_mtime,
                          file_path='../../../etc/passwd')

    def test_get_file_last_mtime_clean_empty(self):
        os.makedirs(os.path.dirname(TEST_EMPTY_CACHE), exist_ok=True)
        f = open(TEST_EMPTY_CACHE, 'w')
        f.close()
        self.assertEqual(
            get_file_last_mtime(file_path=TEST_EMPTY_CACHE, clean=True), 0)
        self.assertRaises(FileNotFoundError,
                          open,
                          TEST_EMPTY_CACHE,
                          'r')

    def test_get_data_columns(self):
        columns = CTU_Dataset.get_data_columns()
        self.assertIs(type(columns), type(list()))
        self.assertTrue(len(columns) > 0)

    def test_get_index_columns(self):
        columns = CTU_Dataset.get_index_columns()
        self.assertIs(type(columns), type(list()))
        self.assertTrue(len(columns) > 0)

    def test_get_all_columns(self):
        columns = CTU_Dataset.get_all_columns()
        self.assertIs(type(columns), type(list()))
        self.assertTrue(len(columns) > 0)

    def test_get_disclaimer(self):
        disclaimer = CTU_Dataset.get_disclaimer()
        self.assertTrue("http://dx.doi.org/10.1016/j.cose.2014.05.011" in disclaimer)

    def test_get_scenarios(self):
        scenarios = self.ctu_dataset.get_scenarios()
        self.assertIs(type(scenarios), type(dict()))
        self.assertIn('CTU-Malware-Capture-Botnet-48', scenarios)

    def test_get_scenario_names(self):
        scenario_names = self.ctu_dataset.get_scenario_names()
        self.assertIs(type(scenario_names), type(list()))
        self.assertTrue(len(scenario_names) > 0)
        self.assertEqual(scenario_names[0], 'CTU-Malware-Capture-Botnet-90',
            msg=f'scenario_names[0]={scenario_names[0]:40}...')

    def test_is_valid_scenario_short_MATCH(self):
        self.assertFalse(self.ctu_dataset.is_valid_scenario('Botnet-48'))

    def test_is_valid_scenario_long_MATCH(self):
        self.assertTrue(self.ctu_dataset.is_valid_scenario('CTU-Malware-Capture-Botnet-48'))

    def test_is_valid_scenario_FAIL(self):
        self.assertFalse(self.ctu_dataset.is_valid_scenario('CTU-Milware-Copture-Botnet-48'))

    def test_get_scenario_data_url_SUCCESS(self):
        self.assertEqual(
            self.ctu_dataset.get_scenario_data('CTU-Malware-Capture-Botnet-48',
                                              'Capture_URL'),
            'https://mcfp.felk.cvut.cz/publicDatasets/CTU-Malware-Capture-Botnet-48')

    def test_get_data_columns(self):
        items = [a for a in CTU_Dataset.__DATA_COLUMNS__]
        self.assertListEqual(items, self.ctu_dataset.get_data_columns())

    def test_get_scenario_data_url_FAIL(self):
        try:
            _ = self.ctu_dataset.get_scenario_data('CTU-Malware-Capture-Botnet-48',
                                                   'Capture_ORL')
        except RuntimeError as err:
            self.assertIn('is not supported', str(err))

    def test_get_scenario_data_pcap(self):
        url = self.ctu_dataset.get_scenario_data('CTU-Malware-Capture-Botnet-113-1',
                                                 'PCAP')
        self.assertEqual(url,
            'https://mcfp.felk.cvut.cz/publicDatasets/CTU-Malware-Capture-Botnet-113-1/2015-03-12_capture-win6.pcap',
            msg=f'url={url}')

    def test_get_scenario_page_short(self):
        self.assertIn('DOCTYPE HTML PUBLIC',
                      self.ctu_dataset.get_scenario_page('Malware-Botnet-42'))

    def test_get_scenario_page_full(self):
        self.assertIn('DOCTYPE HTML PUBLIC',
                      self.ctu_dataset.get_scenario_page('CTU-Malware-Capture-Botnet-42'))

    def test_filename_from_url(self):
        filename = self.ctu_dataset.filename_from_url(
                'https://mcfp.felk.cvut.cz/publicDatasets/CTU-Mixed-Capture-1/2015-07-28_mixed.pcap')
        self.assertEqual(filename, '2015-07-28_mixed.pcap',
                         msg='filename={}'.format(filename))

    def test_get_fullname_short_5parts(self):
        fullname = self.ctu_dataset.get_fullname(name='CTU-Malware-Capture-Botnet-116-1')
        self.assertEqual(fullname, 'CTU-Malware-Capture-Botnet-116-1')

    def test_get_fullname_short_4parts(self):
        fullname = self.ctu_dataset.get_fullname('Malware-Capture-Botnet-116-1')
        self.assertEqual(fullname, 'CTU-Malware-Capture-Botnet-116-1')

    def test_get_fullname_short_3parts1(self):
        fullname = self.ctu_dataset.get_fullname(name='Malware-Botnet-116-1')
        self.assertEqual(fullname, 'CTU-Malware-Capture-Botnet-116-1')

    def test_get_fullname_short_3parts2(self):
        fullname = self.ctu_dataset.get_fullname(name='Malware-Capture-42')
        self.assertEqual(fullname, 'CTU-Malware-Capture-Botnet-42')

    def test_get_fullname_short_2parts1(self):
        fullname = self.ctu_dataset.get_fullname(name='Malware-42')
        self.assertEqual(fullname, 'CTU-Malware-Capture-Botnet-42')

    def test_get_fullname_short_2parts2(self):
        fullname = self.ctu_dataset.get_fullname(name='Capture-42')
        self.assertEqual(fullname, 'CTU-Malware-Capture-Botnet-42')

    def test_get_fullname_short_1part_number(self):
        fullname = self.ctu_dataset.get_fullname(name='42')
        self.assertEqual(fullname, 'CTU-Malware-Capture-Botnet-42')

    def test_get_fullname_short_1part_name(self):
        self.assertRaises(SystemExit,
                          self.ctu_dataset.get_fullname,
                          name='IoT')

    def test_get_fullname_short_fail(self):
        fullname = self.ctu_dataset.get_fullname(name='Botnet-1')
        self.assertEqual(fullname, None)

    def test_get_fullname_typo(self):
        fullname = self.ctu_dataset.get_fullname(name='CTU_Malware_Capture-Botnet-42')
        self.assertEqual(fullname, None)

    def test_get_shortname_match(self):
        shortname = self.ctu_dataset.get_shortname(name='CTU-Malware-Capture-Botnet-42')
        self.assertEqual(shortname, 'Malware-Botnet-42')

    def test_normalize_ctu_name_lower(self):
        self.assertEqual(normalize_ctu_name('ctu-malware-botnet-42'),
                        'CTU-Malware-Botnet-42')
        self.assertEqual(normalize_ctu_name('iot-malware-33-1'),
                        'IoT-Malware-33-1')
    def test_normalize_ctu_name_upper(self):
        self.assertEqual(normalize_ctu_name('CTU-MALWARE-BOTNET-42'),
                        'CTU-Malware-Botnet-42')
        self.assertEqual(normalize_ctu_name('IOT-MALWARE-33-1'),
                        'IoT-Malware-33-1')
    def test_normalize_ctu_name_mixed(self):
        self.assertEqual(normalize_ctu_name('Ctu-Malware-Botnet-42'),
                        'CTU-Malware-Botnet-42')
        self.assertEqual(normalize_ctu_name('Iot-Malware-33-1'),
                        'IoT-Malware-33-1')
    def test_normalize_ctu_name_random(self):
        self.assertEqual(normalize_ctu_name('CTU-MALWARE-BOTNET-42'),
                        'CTU-Malware-Botnet-42')
        self.assertEqual(normalize_ctu_name('IoT-MaLwArE-33-1'),
                        'IoT-Malware-33-1')
Esempio n. 3
0
class CTUGet(Command):
    """Get CTU dataset components."""

    log = logging.getLogger(__name__)

    def get_parser(self, prog_name):
        parser = super().get_parser(prog_name)
        parser.formatter_class = argparse.RawDescriptionHelpFormatter
        parser.add_argument(
            '--force',
            action='store_true',
            dest='force',
            default=False,
            help="Force over-writing files if they exist (default: ``False``)")
        parser.add_argument('--no-subdir',
                            action='store_true',
                            dest='no_subdir',
                            default=False,
                            help=('Do not maintain scenario name subdirectory '
                                  '(default: ``False``)'))
        _default_protocols = ",".join(DEFAULT_PROTOCOLS)
        parser.add_argument('-P',
                            '--protocols',
                            metavar='<protocol-list>',
                            dest='protocols',
                            type=lambda s: [i for i in s.split(',')],
                            default=_default_protocols,
                            help=("Protocols to include, or 'any' "
                                  f'(default: ``{_default_protocols}``)'))
        parser.add_argument(
            '-L',
            '--maxlines',
            metavar='<lines>',
            dest='maxlines',
            default=None,
            help="Maximum number of lines to get (default: ``None``)")
        cache_file = CTU_Dataset.get_cache_file()
        parser.add_argument('--cache-file',
                            action='store',
                            dest='cache_file',
                            default=cache_file,
                            help=('Cache file path for CTU metadata '
                                  '(Env: ``LIM_CTU_CACHE``; '
                                  f'default: ``{cache_file}``)'))
        parser.add_argument(
            '--ignore-cache',
            action='store_true',
            dest='ignore_cache',
            default=False,
            help="Ignore any cached results (default: ``False``)")
        parser.add_argument('scenario',
                            nargs=1,
                            type=normalize_ctu_name,
                            default=None)
        data_types = str(", ".join(
            [f'{i.lower()}' for i in CTU_Dataset.get_data_columns()]))
        parser.add_argument(
            'data',
            nargs='+',
            type=str.lower,
            choices=[
                c.lower() for c in CTU_Dataset.get_data_columns() + ['all']
            ],
            default=None)
        parser.epilog = textwrap.dedent(f"""\
            Get one or more data components from a scenario. These
            components are the raw PCAP file, Netflow file, and
            other analytic products from intrusion detection system
            processing, etc.

            See ``lim ctu list --help`` for more on the ``scenario`` argument.

            For the ``data`` argument, you can use ``all`` to recursively
            download all scenario data, or one or more of the data
            files by type: ``{data_types}``

            By default, or when using the ``all`` attribute identifier,
            the file(s) are placed in a subdirectory with the full name
            of the scenario to better organize data across multiple
            scenarios. You can override this when getting specific files
            (i.e., not using ``all``) with the ``--no-subdir`` option.
           \n""") + CTU_Dataset.get_disclaimer()  # noqa
        return parser

    def take_action(self, parsed_args):
        self.log.debug('[+] getting CTU data')
        if 'ctu_metadata' not in dir(self):
            self.ctu_metadata = CTU_Dataset(
                cache_file=parsed_args.cache_file,
                ignore_cache=parsed_args.ignore_cache,
                debug=self.app_args.debug)
            # TODO(dittrich): Work this back into init() method.
        self.ctu_metadata.load_ctu_metadata()

        scenario = self.ctu_metadata.get_fullname(name=parsed_args.scenario[0])
        if not self.ctu_metadata.is_valid_scenario(scenario):
            raise RuntimeError(f"[-] scenario '{scenario}' does not exist")
        if parsed_args.no_subdir:
            data_dir = self.app_args.data_dir
        else:
            data_dir = os.path.join(self.app_args.data_dir, scenario)
        if 'all' in parsed_args.data:
            self.recursive_get_all(name=scenario, data_dir=data_dir)
        else:
            for attribute in parsed_args.data:
                self.log.debug(f'[+] downloading {attribute} data '
                               f"for scenario '{scenario}' to {data_dir}")
                self.ctu_metadata.fetch_scenario_content_byattribute(
                    data_dir=data_dir, name=scenario, attribute=attribute)

    def recursive_get_all(self,
                          name,
                          data_dir=os.getcwd(),
                          stderr=subprocess.STDOUT,
                          shell=False):
        """Use wget to recursively get all scenario data."""
        # Ensure data directory exists
        os.makedirs(os.path.abspath(data_dir), exist_ok=True)
        cmd = ['wget', '-h']
        result = ""
        try:
            result = subprocess.check_output(  # nosec
                cmd, stderr=stderr, shell=shell).decode('UTF-8').splitlines()
        except Exception as err:
            message = f'[-] cannot run "wget": { err }'
        else:
            message = '[-] cannot run "wget"'
        if len(result) > 1 and result[0].find(' Wget ') < 0:
            raise RuntimeError(message)

        url = self.ctu_metadata.get_scenario_data(name=name,
                                                  attribute='Capture_URL')
        url_path = urlparse(url).path.lstrip('/')
        cut_dirs = len(url_path.split('/'))
        cmd = [
            'wget', '--mirror', '-l3', '--no-parent', '--no-host-directories',
            f'--cut-dirs={cut_dirs}', '--reject=index.html?*', '-P', data_dir,
            '--no-check-certificate'
        ]
        if not url.endswith('/'):
            # Required by wget --no-parent to work right
            url = f"{url}/"
        cmd.append(url)
        """Use subprocess.check_ouput to run subcommand"""
        self.log.debug('[+] cmd: {" ".join(cmd)}')
        self.log.info('[+] recursively getting all data '
                      f"from {url} to '{data_dir}'")
        try:
            result = subprocess.check_output(  # nosec
                cmd, cwd=data_dir, stderr=stderr,
                shell=shell).decode('UTF-8').splitlines()
        except subprocess.CalledProcessError as err:
            sys.stderr.write('\n'.join([line for line in result]) + '\n')
            sys.stderr.write(str(err.output) + '\n')
            sys.exit(err.returncode)
        pass
Esempio n. 4
0
class CTUOverview(Command):
    """Get CTU dataset overview."""

    log = logging.getLogger(__name__)

    def get_parser(self, prog_name):
        parser = super().get_parser(prog_name)
        parser.formatter_class = argparse.RawDescriptionHelpFormatter
        parser = add_browser_options(parser)
        cache_file = CTU_Dataset.get_cache_file()
        parser.add_argument(
            '--cache-file',
            action='store',
            dest='cache_file',
            default=cache_file,
            help=('Cache file path for CTU metadata '
                  '(Env: ``LIM_CTU_CACHE``; '
                  f'default: ``{cache_file}``)')
        )
        parser.add_argument(
            '--ignore-cache',
            action='store_true',
            dest='ignore_cache',
            default=False,
            help="Ignore any cached results (default: ``False``)"
        )
        parser.add_argument(
            'scenario',
            nargs='*',
            type=normalize_ctu_name,
            default=None)
        parser.epilog = textwrap.dedent("""\
            Opens a browser for the web page containing the scenario
            descriptions and data links.

            Arguments are scenario names using either the full name
            form (e.g., ``CTU-Malware-Capture-Botnet-123-1``) or an
            abbreviated form (e.g., ``Botnet-123-1``).

            The URL to use is the one seen in the ``SCENARIO_URL`` column
            of the output of the ``lim ctu list`` command.

            To see help information about how the browser option works and
            how you can configure it, see ``lim about --help``.
            """)
        return parser

    def take_action(self, parsed_args):
        self.log.debug('[+] showing overview of CTU datasets')
        # TODO(dittrich): Getting really not DRY: Move this into class.
        pages = []
        # Expand scenario names if abbreviated
        scenarios = [CTU_Dataset.get_fullname(name=s)
                     for s in parsed_args.scenario]
        if 'ctu_metadata' not in dir(self):
            self.ctu_metadata = CTU_Dataset(
                cache_file=parsed_args.cache_file,
                ignore_cache=parsed_args.ignore_cache,
                debug=self.app_args.debug)
        self.ctu_metadata.load_ctu_metadata()
        if len(scenarios) == 0:
            print("{}".format(CTU_Dataset.get_disclaimer()))
            pages.append(CTU_Dataset.get_ctu_datasets_overview_url())
        else:
            for scenario in scenarios:
                page = self.ctu_metadata.get_scenario_data(scenario,
                                                           'Capture_URL')
                if page is not None:
                    pages.append(page)
        for page in pages:
            open_browser(page=page,
                         browser=parsed_args.browser,
                         force=parsed_args.force)
Esempio n. 5
0
class CTUStats(Lister):
    """List CTU dataset metadata."""

    log = logging.getLogger(__name__)

    def get_parser(self, prog_name):
        parser = super().get_parser(prog_name)
        parser.formatter_class = argparse.RawDescriptionHelpFormatter
        cache_file = CTU_Dataset.get_cache_file()
        parser.add_argument('--cache-file',
                            action='store',
                            dest='cache_file',
                            default=cache_file,
                            help=('Cache file path '
                                  f'(default: ``{cache_file}``)'))
        parser.add_argument(
            '--ignore-cache',
            action='store_true',
            dest='ignore_cache',
            default=False,
            help="Ignore any cached results (default: ``False``)")
        attributes = ", ".join(
            [c.lower() for c in CTU_Dataset.get_index_columns(min=False)])
        parser.add_argument(
            'attribute',
            nargs='?',
            default='infection_date',
            choices=[
                c.lower() for c in CTU_Dataset.get_index_columns(min=False)
            ],
            help='Attribute to quantify (default: ``infection_date``)')
        parser.epilog = textwrap.dedent(f"""\
            Shows the selected dataset attribute and a count of unique
            instances in reverse order of occurance.

            ::

                $ lim ctu stats md5 | head
                +----------------------------------+-------+
                | MD5                              | Count |
                +----------------------------------+-------+
                | e515267ba19417974a63b51e4f7dd9e9 |    10 |
                | -                                |     9 |
                | e1090d7126dd88d0d1d39b68ea3aae11 |     6 |
                | 05a00c320754934782ec5dec1d5c0476 |     6 |
                | 48616dd47e12e369feef53a57830158a |     5 |
                | 11bc606269a161555431bacf37f7c1e4 |     5 |
                | bf08e6b02e00d2bc6dd493e93e69872f |     4 |


            Possible attributes are those that come from the CTU index
            file (``{attributes}``).

            To see more detailed descriptions of the CTU datasets as a whole,
            use ``lim ctu overview`` to view the appropriate web page.
           """)  # noqa
        return parser

    def take_action(self, parsed_args):
        self.log.debug('[+] showing CTU data statistics')
        if 'ctu_metadata' not in dir(self):
            self.ctu_metadata = CTU_Dataset(
                cache_file=parsed_args.cache_file,
                ignore_cache=parsed_args.ignore_cache,
                debug=self.app_args.debug)
        self.ctu_metadata.load_ctu_metadata()
        columns = (self.ctu_metadata.get_column_string(parsed_args.attribute),
                   'Count')
        count = {}
        results = [
            item[0] for item in self.ctu_metadata.get_metadata(
                columns=[parsed_args.attribute], fullnames=True)
        ]
        for item in results:
            # Handle null values vs. no key (either way, be consistent)
            if item == '':
                item = None
            try:
                count[item] += 1
            except KeyError:
                count[item] = 1
        data = [(r, count[r])
                for r in sorted(count, key=count.get, reverse=True)]
        return columns, data
Esempio n. 6
0
class CTUList(Lister):
    """List CTU dataset metadata."""

    log = logging.getLogger(__name__)

    def get_parser(self, prog_name):
        parser = super().get_parser(prog_name)
        parser.formatter_class = argparse.RawDescriptionHelpFormatter
        cache_file = CTU_Dataset.get_cache_file()
        parser.add_argument('--cache-file',
                            action='store',
                            dest='cache_file',
                            default=cache_file,
                            help=('Cache file path for CTU metadata '
                                  '(Env: ``LIM_CTU_CACHE``; '
                                  f'default: ``{cache_file}``)'))
        parser.add_argument(
            '--ignore-cache',
            action='store_true',
            dest='ignore_cache',
            default=False,
            help="Ignore any cached results (default: ``False``)")
        parser.add_argument('--date-starting',
                            dest='date_starting',
                            metavar='<YYYY-MM-DD>',
                            default='1970-01-01',
                            help=('List scenarios starting from this date '
                                  "(default: '1970-01-01')"))
        TODAY_DATE = arrow.now().format('YYYY-MM-DD')
        parser.add_argument('--date-ending',
                            dest='date_ending',
                            metavar='<YYYY-MM-DD>',
                            default=TODAY_DATE,
                            help=('List scenarios up to this date '
                                  f"(default: '{TODAY_DATE}')"))
        parser.add_argument('--fullnames',
                            action='store_true',
                            dest='fullnames',
                            default=False,
                            help=("Show full names"))
        parser.add_argument('-a',
                            '--everything',
                            action='store_true',
                            dest='everything',
                            default=False,
                            help=("Show all metadata columns "
                                  "(default : False)"))
        parser.add_argument('--hash',
                            dest='hash',
                            metavar='<{md5_hash|sha256_hash}>',
                            default=None,
                            help=('Only list scenarios that involve a '
                                  'specific hash (default: ``None``)'))
        parser.add_argument(
            '--malware-includes',
            dest='malware_includes',
            metavar='<string>',
            default=None,
            help=('Only list scenarios including this string'
                  "in the 'Malware' column (default: ``None``)"))
        parser.add_argument(
            '--name-includes',
            dest='name_includes',
            metavar='<string>',
            default=None,
            help=('Only list scenario including this string'
                  "in the 'Capture_Name' column (default: ``None``)"))
        parser.add_argument('--description-includes',
                            dest='description_includes',
                            metavar='<string>',
                            default=None,
                            help=('Only list scenarios including this string'
                                  'in the description (default: ``None``)'))
        parser.add_argument('scenario',
                            nargs='*',
                            type=normalize_ctu_name,
                            default=None)
        all_columns = ", ".join(
            [f'{i.lower()}' for i in CTU_Dataset.get_all_columns()])
        default_columns = ", ".join(
            [f"{i.lower()}" for i in CTU_Dataset.get_index_columns()])
        parser.epilog = textwrap.dedent(f"""\
            List scenarios (a.k.a., "captures") and related metadata.

            By default, all scenarios are listed. You can limit the output
            by filtering on several attributes (e.g., by ``Capture_Name``
            field, by date range, contents of the malware name or web page
            description, etc.) You can also limit the number of items
            shown if necessary when the number of results is large.

            The ``scenario`` argument equates to the field ``Capture_Name`` in
            the index. This can be the scenario's full name (e.g.,
            ``CTU-IoT-Malware-Capture-34-1``) or an abbreviated form of the
            name (e.g., ``IoT-34-1`` or just ``34-1``).

            ::

                $ lim ctu list IoT-34-1 Botnet-42
                +----------------+-------------------------------+---------+
                | Infection_Date | Capture_Name                  | Malware |
                +----------------+-------------------------------+---------+
                | 2011-08-10     | CTU-Malware-Capture-Botnet-42 | Neeris  |
                | 2018-12-21     | CTU-IoT-Malware-Capture-34-1  | Mirai   |
                +----------------+-------------------------------+---------+


            A larger number of attributes are available. You can get all of them
            using the ``-a`` (``--everything``) flag. The subset of columns shown
            by default is: ``{default_columns}``

            Valid column labels for options ``-c``, ``--column``, ``--sort-column``,
            or to be shown with ``-a``, include:
            ``{all_columns}``

            Using ``lim ctu list -a`` produces very wide output. Even if many fields
            are ``None`` and ``--fit-width`` is included, it is still unwieldy for just
            one scenario as you can see here. Consider using ``lim ctu show`` instead.

            ::

                $ lim ctu list --name-includes IoT --malware-includes muhstik --fit-width -a
                +----------------+------------------------+---------+------------------------+------------------------+------------------------+------------------------+---------+------------------------+------------------------+-----------------------------+
                | Infection_Date | Capture_Name           | Malware | MD5                    | SHA256                 | Capture_URL            | ZIP                    | LABELED | BINETFLOW              | PCAP                   | WEBLOGNG                    |
                +----------------+------------------------+---------+------------------------+------------------------+------------------------+------------------------+---------+------------------------+------------------------+-----------------------------+
                | 2018-05-19     | CTU-IoT-Malware-       | Muhstik | b8849fe97e39ae3afd6def | 5ce13670bc875e913e6f08 | https://mcfp.felk.cvut | fce7b8bbd1c1fba1d75b9d | None    | 2018-05-21_capture.bin | 2018-05-21_capture.pca | 2018-05-21_capture.weblogng |
                |                | Capture-3-1            |         | 618568bb09             | 7a4ac0a9e343347d5babb3 | .cz/publicDatasets/IoT | c1a60b25f49f68c9ec16b3 |         | etflow                 | p                      |                             |
                |                |                        |         |                        | b5c63e1d1b199371f69a   | Datasets/CTU-IoT-      | 656b52ed28290fc93c72.z |         |                        |                        |                             |
                |                |                        |         |                        |                        | Malware-Capture-3-1    | ip                     |         |                        |                        |                             |
                +----------------+------------------------+---------+------------------------+------------------------+------------------------+------------------------+---------+------------------------+------------------------+-----------------------------+


            There are also a number of filters that can be applied, including MD5
            and SHA256 hash, substrings in the ``Capture_Name`` or ``Malware``
            fields, start and end dates, or description of the scenario in its
            web page.

            The ``--hash`` option makes an exact match on any of the stored hash
            values.  This is the hash of the executable binary referenced in the
            ``ZIP`` column. This example uses the most frequently occuring MD5
            hash as seen in ``lim ctu stats --help``::

                $ lim ctu list --hash e515267ba19417974a63b51e4f7dd9e9
                +----------------+----------------------------------+---------+
                | Infection_Date | Capture_Name                     | Malware |
                +----------------+----------------------------------+---------+
                | 2015-03-04     | CTU-Malware-Capture-Botnet-110-1 | HTBot   |
                | 2015-03-04     | CTU-Malware-Capture-Botnet-110-2 | HTBot   |
                | 2015-03-09     | CTU-Malware-Capture-Botnet-110-3 | HTBot   |
                | 2015-03-09     | CTU-Malware-Capture-Botnet-111-2 | HTBot   |
                | 2015-04-09     | CTU-Malware-Capture-Botnet-110-4 | HTBot   |
                | 2015-04-09     | CTU-Malware-Capture-Botnet-111-3 | HTBot   |
                | 2015-04-22     | CTU-Malware-Capture-Botnet-110-5 | HTBot   |
                | 2015-04-22     | CTU-Malware-Capture-Botnet-111-4 | HTBot   |
                | 2015-04-23     | CTU-Malware-Capture-Botnet-110-6 | HTBot   |
                | 2015-06-09     | CTU-Malware-Capture-Botnet-111-5 | HTBot   |
                +----------------+----------------------------------+---------+


            The ``--malware-includes`` option is rather simplistic, matching any
            occurance of the substring (case insensitive) in the ``Malware`` field.
            The same applies for the ``--name-includes`` option with respect to the
            ``Capture_Name`` field. For more accurate matching, you may want to use
            something like the ``-f csv`` option and match on regular expressions
            using one of the ``grep`` variants.  Or add regular expression handling
            and submit a pull request! ;)
           \n""") + CTU_Dataset.get_disclaimer()  # noqa

        return parser

    # FYI, https://mcfp.felk.cvut.cz/publicDatasets/CTU-Malware-Capture-Botnet-269-1/README.html  # noqa
    # is an Emotet sample...
    # TODO(dittrich): Figure out how to handle these

    def take_action(self, parsed_args):
        self.log.debug('[+] listing CTU data')
        if 'ctu_metadata' not in dir(self):
            self.ctu_metadata = CTU_Dataset(
                cache_file=parsed_args.cache_file,
                ignore_cache=parsed_args.ignore_cache,
                debug=self.app_args.debug)
        self.ctu_metadata.load_ctu_metadata()
        # Expand capture names if abbreviated
        scenarios = []
        for scenario in parsed_args.scenario:
            full_name = self.ctu_metadata.get_fullname(name=scenario)
            if full_name is None:
                sys.exit(f"[-] '{scenario}' does not match any scenario names")
            scenarios.append(full_name)
        columns = (self.ctu_metadata.get_index_columns()
                   if not parsed_args.everything else
                   self.ctu_metadata.get_all_columns())
        results = self.ctu_metadata.get_metadata(
            columns=columns,
            malware_includes=parsed_args.malware_includes,
            name_includes=parsed_args.name_includes,
            fullnames=parsed_args.fullnames,
            description_includes=parsed_args.description_includes,
            date_starting=parsed_args.date_starting,
            date_ending=parsed_args.date_ending,
            has_hash=parsed_args.hash)
        data = []
        if len(scenarios) > 0:
            for row in results:
                if row[1] in scenarios:
                    data.append(row)
        else:
            if self.app_args.limit > 0:
                data = results[0:min(self.app_args.limit, len(results))]
            else:
                data = results
        if not len(data):
            sys.exit(1)
        return columns, data