class Test_CTU_Dataset(unittest.TestCase): def setUp(self): self.ctu_dataset = CTU_Dataset(cache_file=TEST_CACHE) self.ctu_dataset.load_ctu_metadata() def tearDown(self): pass def test_cache_exists(self): self.assertTrue(os.path.exists(TEST_CACHE)) def test_get_file_last_mtime_exists(self): self.assertNotEqual( get_file_last_mtime(file_path=TEST_CACHE), 0) def test_get_file_last_mtime_notexists(self): self.assertEqual( get_file_last_mtime(file_path=TEST_EMPTY_CACHE), 0) def test_get_file_last_mtime_nopath(self): self.assertRaises(RuntimeError, get_file_last_mtime) def test_get_file_last_mtime_relative_path(self): self.assertRaises(RuntimeError, get_file_last_mtime, file_path='../../../etc/passwd') def test_get_file_last_mtime_clean_empty(self): os.makedirs(os.path.dirname(TEST_EMPTY_CACHE), exist_ok=True) f = open(TEST_EMPTY_CACHE, 'w') f.close() self.assertEqual( get_file_last_mtime(file_path=TEST_EMPTY_CACHE, clean=True), 0) self.assertRaises(FileNotFoundError, open, TEST_EMPTY_CACHE, 'r') def test_get_data_columns(self): columns = CTU_Dataset.get_data_columns() self.assertIs(type(columns), type(list())) self.assertTrue(len(columns) > 0) def test_get_index_columns(self): columns = CTU_Dataset.get_index_columns() self.assertIs(type(columns), type(list())) self.assertTrue(len(columns) > 0) def test_get_all_columns(self): columns = CTU_Dataset.get_all_columns() self.assertIs(type(columns), type(list())) self.assertTrue(len(columns) > 0) def test_get_disclaimer(self): disclaimer = CTU_Dataset.get_disclaimer() self.assertTrue("http://dx.doi.org/10.1016/j.cose.2014.05.011" in disclaimer) def test_get_scenarios(self): scenarios = self.ctu_dataset.get_scenarios() self.assertIs(type(scenarios), type(dict())) self.assertIn('CTU-Malware-Capture-Botnet-48', scenarios) def test_get_scenario_names(self): scenario_names = self.ctu_dataset.get_scenario_names() self.assertIs(type(scenario_names), type(list())) self.assertTrue(len(scenario_names) > 0) self.assertEqual(scenario_names[0], 'CTU-Malware-Capture-Botnet-90', msg=f'scenario_names[0]={scenario_names[0]:40}...') def test_is_valid_scenario_short_MATCH(self): self.assertFalse(self.ctu_dataset.is_valid_scenario('Botnet-48')) def test_is_valid_scenario_long_MATCH(self): self.assertTrue(self.ctu_dataset.is_valid_scenario('CTU-Malware-Capture-Botnet-48')) def test_is_valid_scenario_FAIL(self): self.assertFalse(self.ctu_dataset.is_valid_scenario('CTU-Milware-Copture-Botnet-48')) def test_get_scenario_data_url_SUCCESS(self): self.assertEqual( self.ctu_dataset.get_scenario_data('CTU-Malware-Capture-Botnet-48', 'Capture_URL'), 'https://mcfp.felk.cvut.cz/publicDatasets/CTU-Malware-Capture-Botnet-48') def test_get_data_columns(self): items = [a for a in CTU_Dataset.__DATA_COLUMNS__] self.assertListEqual(items, self.ctu_dataset.get_data_columns()) def test_get_scenario_data_url_FAIL(self): try: _ = self.ctu_dataset.get_scenario_data('CTU-Malware-Capture-Botnet-48', 'Capture_ORL') except RuntimeError as err: self.assertIn('is not supported', str(err)) def test_get_scenario_data_pcap(self): url = self.ctu_dataset.get_scenario_data('CTU-Malware-Capture-Botnet-113-1', 'PCAP') self.assertEqual(url, 'https://mcfp.felk.cvut.cz/publicDatasets/CTU-Malware-Capture-Botnet-113-1/2015-03-12_capture-win6.pcap', msg=f'url={url}') def test_get_scenario_page_short(self): self.assertIn('DOCTYPE HTML PUBLIC', self.ctu_dataset.get_scenario_page('Malware-Botnet-42')) def test_get_scenario_page_full(self): self.assertIn('DOCTYPE HTML PUBLIC', self.ctu_dataset.get_scenario_page('CTU-Malware-Capture-Botnet-42')) def test_filename_from_url(self): filename = self.ctu_dataset.filename_from_url( 'https://mcfp.felk.cvut.cz/publicDatasets/CTU-Mixed-Capture-1/2015-07-28_mixed.pcap') self.assertEqual(filename, '2015-07-28_mixed.pcap', msg='filename={}'.format(filename)) def test_get_fullname_short_5parts(self): fullname = self.ctu_dataset.get_fullname(name='CTU-Malware-Capture-Botnet-116-1') self.assertEqual(fullname, 'CTU-Malware-Capture-Botnet-116-1') def test_get_fullname_short_4parts(self): fullname = self.ctu_dataset.get_fullname('Malware-Capture-Botnet-116-1') self.assertEqual(fullname, 'CTU-Malware-Capture-Botnet-116-1') def test_get_fullname_short_3parts1(self): fullname = self.ctu_dataset.get_fullname(name='Malware-Botnet-116-1') self.assertEqual(fullname, 'CTU-Malware-Capture-Botnet-116-1') def test_get_fullname_short_3parts2(self): fullname = self.ctu_dataset.get_fullname(name='Malware-Capture-42') self.assertEqual(fullname, 'CTU-Malware-Capture-Botnet-42') def test_get_fullname_short_2parts1(self): fullname = self.ctu_dataset.get_fullname(name='Malware-42') self.assertEqual(fullname, 'CTU-Malware-Capture-Botnet-42') def test_get_fullname_short_2parts2(self): fullname = self.ctu_dataset.get_fullname(name='Capture-42') self.assertEqual(fullname, 'CTU-Malware-Capture-Botnet-42') def test_get_fullname_short_1part_number(self): fullname = self.ctu_dataset.get_fullname(name='42') self.assertEqual(fullname, 'CTU-Malware-Capture-Botnet-42') def test_get_fullname_short_1part_name(self): self.assertRaises(SystemExit, self.ctu_dataset.get_fullname, name='IoT') def test_get_fullname_short_fail(self): fullname = self.ctu_dataset.get_fullname(name='Botnet-1') self.assertEqual(fullname, None) def test_get_fullname_typo(self): fullname = self.ctu_dataset.get_fullname(name='CTU_Malware_Capture-Botnet-42') self.assertEqual(fullname, None) def test_get_shortname_match(self): shortname = self.ctu_dataset.get_shortname(name='CTU-Malware-Capture-Botnet-42') self.assertEqual(shortname, 'Malware-Botnet-42') def test_normalize_ctu_name_lower(self): self.assertEqual(normalize_ctu_name('ctu-malware-botnet-42'), 'CTU-Malware-Botnet-42') self.assertEqual(normalize_ctu_name('iot-malware-33-1'), 'IoT-Malware-33-1') def test_normalize_ctu_name_upper(self): self.assertEqual(normalize_ctu_name('CTU-MALWARE-BOTNET-42'), 'CTU-Malware-Botnet-42') self.assertEqual(normalize_ctu_name('IOT-MALWARE-33-1'), 'IoT-Malware-33-1') def test_normalize_ctu_name_mixed(self): self.assertEqual(normalize_ctu_name('Ctu-Malware-Botnet-42'), 'CTU-Malware-Botnet-42') self.assertEqual(normalize_ctu_name('Iot-Malware-33-1'), 'IoT-Malware-33-1') def test_normalize_ctu_name_random(self): self.assertEqual(normalize_ctu_name('CTU-MALWARE-BOTNET-42'), 'CTU-Malware-Botnet-42') self.assertEqual(normalize_ctu_name('IoT-MaLwArE-33-1'), 'IoT-Malware-33-1')
def test_get_data_columns(self): columns = CTU_Dataset.get_data_columns() self.assertIs(type(columns), type(list())) self.assertTrue(len(columns) > 0)
def get_parser(self, prog_name): parser = super().get_parser(prog_name) parser.formatter_class = argparse.RawDescriptionHelpFormatter parser.add_argument( '--force', action='store_true', dest='force', default=False, help="Force over-writing files if they exist (default: ``False``)") parser.add_argument('--no-subdir', action='store_true', dest='no_subdir', default=False, help=('Do not maintain scenario name subdirectory ' '(default: ``False``)')) _default_protocols = ",".join(DEFAULT_PROTOCOLS) parser.add_argument('-P', '--protocols', metavar='<protocol-list>', dest='protocols', type=lambda s: [i for i in s.split(',')], default=_default_protocols, help=("Protocols to include, or 'any' " f'(default: ``{_default_protocols}``)')) parser.add_argument( '-L', '--maxlines', metavar='<lines>', dest='maxlines', default=None, help="Maximum number of lines to get (default: ``None``)") cache_file = CTU_Dataset.get_cache_file() parser.add_argument('--cache-file', action='store', dest='cache_file', default=cache_file, help=('Cache file path for CTU metadata ' '(Env: ``LIM_CTU_CACHE``; ' f'default: ``{cache_file}``)')) parser.add_argument( '--ignore-cache', action='store_true', dest='ignore_cache', default=False, help="Ignore any cached results (default: ``False``)") parser.add_argument('scenario', nargs=1, type=normalize_ctu_name, default=None) data_types = str(", ".join( [f'{i.lower()}' for i in CTU_Dataset.get_data_columns()])) parser.add_argument( 'data', nargs='+', type=str.lower, choices=[ c.lower() for c in CTU_Dataset.get_data_columns() + ['all'] ], default=None) parser.epilog = textwrap.dedent(f"""\ Get one or more data components from a scenario. These components are the raw PCAP file, Netflow file, and other analytic products from intrusion detection system processing, etc. See ``lim ctu list --help`` for more on the ``scenario`` argument. For the ``data`` argument, you can use ``all`` to recursively download all scenario data, or one or more of the data files by type: ``{data_types}`` By default, or when using the ``all`` attribute identifier, the file(s) are placed in a subdirectory with the full name of the scenario to better organize data across multiple scenarios. You can override this when getting specific files (i.e., not using ``all``) with the ``--no-subdir`` option. \n""") + CTU_Dataset.get_disclaimer() # noqa return parser