Esempio n. 1
0
class Test_CTU_Dataset(unittest.TestCase):
    def setUp(self):
        self.ctu_dataset = CTU_Dataset(cache_file=TEST_CACHE)
        self.ctu_dataset.load_ctu_metadata()

    def tearDown(self):
        pass

    def test_cache_exists(self):
        self.assertTrue(os.path.exists(TEST_CACHE))

    def test_get_file_last_mtime_exists(self):
        self.assertNotEqual(
            get_file_last_mtime(file_path=TEST_CACHE), 0)

    def test_get_file_last_mtime_notexists(self):
        self.assertEqual(
            get_file_last_mtime(file_path=TEST_EMPTY_CACHE), 0)

    def test_get_file_last_mtime_nopath(self):
        self.assertRaises(RuntimeError,
                          get_file_last_mtime)

    def test_get_file_last_mtime_relative_path(self):
        self.assertRaises(RuntimeError,
                          get_file_last_mtime,
                          file_path='../../../etc/passwd')

    def test_get_file_last_mtime_clean_empty(self):
        os.makedirs(os.path.dirname(TEST_EMPTY_CACHE), exist_ok=True)
        f = open(TEST_EMPTY_CACHE, 'w')
        f.close()
        self.assertEqual(
            get_file_last_mtime(file_path=TEST_EMPTY_CACHE, clean=True), 0)
        self.assertRaises(FileNotFoundError,
                          open,
                          TEST_EMPTY_CACHE,
                          'r')

    def test_get_data_columns(self):
        columns = CTU_Dataset.get_data_columns()
        self.assertIs(type(columns), type(list()))
        self.assertTrue(len(columns) > 0)

    def test_get_index_columns(self):
        columns = CTU_Dataset.get_index_columns()
        self.assertIs(type(columns), type(list()))
        self.assertTrue(len(columns) > 0)

    def test_get_all_columns(self):
        columns = CTU_Dataset.get_all_columns()
        self.assertIs(type(columns), type(list()))
        self.assertTrue(len(columns) > 0)

    def test_get_disclaimer(self):
        disclaimer = CTU_Dataset.get_disclaimer()
        self.assertTrue("http://dx.doi.org/10.1016/j.cose.2014.05.011" in disclaimer)

    def test_get_scenarios(self):
        scenarios = self.ctu_dataset.get_scenarios()
        self.assertIs(type(scenarios), type(dict()))
        self.assertIn('CTU-Malware-Capture-Botnet-48', scenarios)

    def test_get_scenario_names(self):
        scenario_names = self.ctu_dataset.get_scenario_names()
        self.assertIs(type(scenario_names), type(list()))
        self.assertTrue(len(scenario_names) > 0)
        self.assertEqual(scenario_names[0], 'CTU-Malware-Capture-Botnet-90',
            msg=f'scenario_names[0]={scenario_names[0]:40}...')

    def test_is_valid_scenario_short_MATCH(self):
        self.assertFalse(self.ctu_dataset.is_valid_scenario('Botnet-48'))

    def test_is_valid_scenario_long_MATCH(self):
        self.assertTrue(self.ctu_dataset.is_valid_scenario('CTU-Malware-Capture-Botnet-48'))

    def test_is_valid_scenario_FAIL(self):
        self.assertFalse(self.ctu_dataset.is_valid_scenario('CTU-Milware-Copture-Botnet-48'))

    def test_get_scenario_data_url_SUCCESS(self):
        self.assertEqual(
            self.ctu_dataset.get_scenario_data('CTU-Malware-Capture-Botnet-48',
                                              'Capture_URL'),
            'https://mcfp.felk.cvut.cz/publicDatasets/CTU-Malware-Capture-Botnet-48')

    def test_get_data_columns(self):
        items = [a for a in CTU_Dataset.__DATA_COLUMNS__]
        self.assertListEqual(items, self.ctu_dataset.get_data_columns())

    def test_get_scenario_data_url_FAIL(self):
        try:
            _ = self.ctu_dataset.get_scenario_data('CTU-Malware-Capture-Botnet-48',
                                                   'Capture_ORL')
        except RuntimeError as err:
            self.assertIn('is not supported', str(err))

    def test_get_scenario_data_pcap(self):
        url = self.ctu_dataset.get_scenario_data('CTU-Malware-Capture-Botnet-113-1',
                                                 'PCAP')
        self.assertEqual(url,
            'https://mcfp.felk.cvut.cz/publicDatasets/CTU-Malware-Capture-Botnet-113-1/2015-03-12_capture-win6.pcap',
            msg=f'url={url}')

    def test_get_scenario_page_short(self):
        self.assertIn('DOCTYPE HTML PUBLIC',
                      self.ctu_dataset.get_scenario_page('Malware-Botnet-42'))

    def test_get_scenario_page_full(self):
        self.assertIn('DOCTYPE HTML PUBLIC',
                      self.ctu_dataset.get_scenario_page('CTU-Malware-Capture-Botnet-42'))

    def test_filename_from_url(self):
        filename = self.ctu_dataset.filename_from_url(
                'https://mcfp.felk.cvut.cz/publicDatasets/CTU-Mixed-Capture-1/2015-07-28_mixed.pcap')
        self.assertEqual(filename, '2015-07-28_mixed.pcap',
                         msg='filename={}'.format(filename))

    def test_get_fullname_short_5parts(self):
        fullname = self.ctu_dataset.get_fullname(name='CTU-Malware-Capture-Botnet-116-1')
        self.assertEqual(fullname, 'CTU-Malware-Capture-Botnet-116-1')

    def test_get_fullname_short_4parts(self):
        fullname = self.ctu_dataset.get_fullname('Malware-Capture-Botnet-116-1')
        self.assertEqual(fullname, 'CTU-Malware-Capture-Botnet-116-1')

    def test_get_fullname_short_3parts1(self):
        fullname = self.ctu_dataset.get_fullname(name='Malware-Botnet-116-1')
        self.assertEqual(fullname, 'CTU-Malware-Capture-Botnet-116-1')

    def test_get_fullname_short_3parts2(self):
        fullname = self.ctu_dataset.get_fullname(name='Malware-Capture-42')
        self.assertEqual(fullname, 'CTU-Malware-Capture-Botnet-42')

    def test_get_fullname_short_2parts1(self):
        fullname = self.ctu_dataset.get_fullname(name='Malware-42')
        self.assertEqual(fullname, 'CTU-Malware-Capture-Botnet-42')

    def test_get_fullname_short_2parts2(self):
        fullname = self.ctu_dataset.get_fullname(name='Capture-42')
        self.assertEqual(fullname, 'CTU-Malware-Capture-Botnet-42')

    def test_get_fullname_short_1part_number(self):
        fullname = self.ctu_dataset.get_fullname(name='42')
        self.assertEqual(fullname, 'CTU-Malware-Capture-Botnet-42')

    def test_get_fullname_short_1part_name(self):
        self.assertRaises(SystemExit,
                          self.ctu_dataset.get_fullname,
                          name='IoT')

    def test_get_fullname_short_fail(self):
        fullname = self.ctu_dataset.get_fullname(name='Botnet-1')
        self.assertEqual(fullname, None)

    def test_get_fullname_typo(self):
        fullname = self.ctu_dataset.get_fullname(name='CTU_Malware_Capture-Botnet-42')
        self.assertEqual(fullname, None)

    def test_get_shortname_match(self):
        shortname = self.ctu_dataset.get_shortname(name='CTU-Malware-Capture-Botnet-42')
        self.assertEqual(shortname, 'Malware-Botnet-42')

    def test_normalize_ctu_name_lower(self):
        self.assertEqual(normalize_ctu_name('ctu-malware-botnet-42'),
                        'CTU-Malware-Botnet-42')
        self.assertEqual(normalize_ctu_name('iot-malware-33-1'),
                        'IoT-Malware-33-1')
    def test_normalize_ctu_name_upper(self):
        self.assertEqual(normalize_ctu_name('CTU-MALWARE-BOTNET-42'),
                        'CTU-Malware-Botnet-42')
        self.assertEqual(normalize_ctu_name('IOT-MALWARE-33-1'),
                        'IoT-Malware-33-1')
    def test_normalize_ctu_name_mixed(self):
        self.assertEqual(normalize_ctu_name('Ctu-Malware-Botnet-42'),
                        'CTU-Malware-Botnet-42')
        self.assertEqual(normalize_ctu_name('Iot-Malware-33-1'),
                        'IoT-Malware-33-1')
    def test_normalize_ctu_name_random(self):
        self.assertEqual(normalize_ctu_name('CTU-MALWARE-BOTNET-42'),
                        'CTU-Malware-Botnet-42')
        self.assertEqual(normalize_ctu_name('IoT-MaLwArE-33-1'),
                        'IoT-Malware-33-1')
Esempio n. 2
0
 def test_get_data_columns(self):
     columns = CTU_Dataset.get_data_columns()
     self.assertIs(type(columns), type(list()))
     self.assertTrue(len(columns) > 0)
Esempio n. 3
0
    def get_parser(self, prog_name):
        parser = super().get_parser(prog_name)
        parser.formatter_class = argparse.RawDescriptionHelpFormatter
        parser.add_argument(
            '--force',
            action='store_true',
            dest='force',
            default=False,
            help="Force over-writing files if they exist (default: ``False``)")
        parser.add_argument('--no-subdir',
                            action='store_true',
                            dest='no_subdir',
                            default=False,
                            help=('Do not maintain scenario name subdirectory '
                                  '(default: ``False``)'))
        _default_protocols = ",".join(DEFAULT_PROTOCOLS)
        parser.add_argument('-P',
                            '--protocols',
                            metavar='<protocol-list>',
                            dest='protocols',
                            type=lambda s: [i for i in s.split(',')],
                            default=_default_protocols,
                            help=("Protocols to include, or 'any' "
                                  f'(default: ``{_default_protocols}``)'))
        parser.add_argument(
            '-L',
            '--maxlines',
            metavar='<lines>',
            dest='maxlines',
            default=None,
            help="Maximum number of lines to get (default: ``None``)")
        cache_file = CTU_Dataset.get_cache_file()
        parser.add_argument('--cache-file',
                            action='store',
                            dest='cache_file',
                            default=cache_file,
                            help=('Cache file path for CTU metadata '
                                  '(Env: ``LIM_CTU_CACHE``; '
                                  f'default: ``{cache_file}``)'))
        parser.add_argument(
            '--ignore-cache',
            action='store_true',
            dest='ignore_cache',
            default=False,
            help="Ignore any cached results (default: ``False``)")
        parser.add_argument('scenario',
                            nargs=1,
                            type=normalize_ctu_name,
                            default=None)
        data_types = str(", ".join(
            [f'{i.lower()}' for i in CTU_Dataset.get_data_columns()]))
        parser.add_argument(
            'data',
            nargs='+',
            type=str.lower,
            choices=[
                c.lower() for c in CTU_Dataset.get_data_columns() + ['all']
            ],
            default=None)
        parser.epilog = textwrap.dedent(f"""\
            Get one or more data components from a scenario. These
            components are the raw PCAP file, Netflow file, and
            other analytic products from intrusion detection system
            processing, etc.

            See ``lim ctu list --help`` for more on the ``scenario`` argument.

            For the ``data`` argument, you can use ``all`` to recursively
            download all scenario data, or one or more of the data
            files by type: ``{data_types}``

            By default, or when using the ``all`` attribute identifier,
            the file(s) are placed in a subdirectory with the full name
            of the scenario to better organize data across multiple
            scenarios. You can override this when getting specific files
            (i.e., not using ``all``) with the ``--no-subdir`` option.
           \n""") + CTU_Dataset.get_disclaimer()  # noqa
        return parser