def test_train(self):
     data = utils.dataloader('data/wslog.dat.template', ApacheLogSchemaRaw,
                             ' ')
     data = [ApacheLog.format(l) for l in data]
     featext_config = utils.configloader(
         'config.cfg.template')['feat_extraction']
     model = ApacheLogModel(data, featext_config)
     tm = model.train()
     print(tm)
     print(tm.show_topics())
 def test_extract_bows(self):
     data = utils.dataloader('data/wslog.dat.template', ApacheLogSchemaRaw,
                             ' ')
     data = [ApacheLog.format(l) for l in data]
     featext_config = utils.configloader(
         'config.cfg.template')['feat_extraction']
     model = ApacheLogModel(data, featext_config)
     actual = model.extract_bows(data)[0]
     expected = [
         '_bytes_out_val_low', '_referrer_ent_low',
         '_user_agent_ent_veryhigh', '_request_resource_ent_med',
         '_referrer_len_low', '_request_resource_len_low', '_301',
         'mozilla', 'macintosh', 'intel mac os x', 'applewebkit', 'khtml',
         'like gecko', 'chrome', '... safari', 'svds.com', 'rockandroll'
     ]
     self.assertEqual(actual, expected)
Esempio n. 3
0
	def test_dataloader (self):
		datafile = 'data/wslog.dat.template'
		data = utils.dataloader (datafile, ApacheLogSchemaRaw, ' ')
		actual = data[0]
		expected = {'ip': '198.0.200.105', 
				'user_ident': '-',
				'user_http': '-', 
				'ts': '[14/Jan/2014:09:36:50', 
				'ms': '-0800]', 
				'request': 'GET /svds.com/rockandroll HTTP/1.1', 
				'response_code': '301', 
				'bytes_out': '241', 
				'referrer': '-',
				'user_agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36'}
		self.assertEqual(actual, expected)

		expected_len = 20
		self.assertEqual (len(data), expected_len)
 def test_derive_ranges(self):
     data = utils.dataloader('data/wslog.dat.template', ApacheLogSchemaRaw,
                             ' ')
     data = [ApacheLog.format(l) for l in data]
     featext_config = utils.configloader(
         'config.cfg.template')['feat_extraction']
     model = ApacheLogModel(data, featext_config)
     actual = model.derive_stats()
     expected = {
         'bytes_out_val': {
             'high': 14308.57885358002,
             'med': 5101,
             'low': 3518
         },
         'referrer_len': {
             'high': 52.121734779245941,
             'med': 30,
             'low': 28
         },
         'request_resource_len': {
             'high': 64.657561374216158,
             'med': 43,
             'low': 37
         },
         'request_resource_ent': {
             'high': 4.4115352724758283,
             'med': 4,
             'low': 3
         },
         'user_agent_ent': {
             'high': 5.0920380749115814,
             'med': 5,
             'low': 5
         },
         'referrer_ent': {
             'high': 6.4544661178668585,
             'med': 3,
             'low': 3
         }
     }
     self.assertEqual(actual, expected)
Esempio n. 5
0
    datapath = args.datapath
    num_topics = args.num_topics
    alpha = args.alpha
    iterations = args.iterations
    modelname = args.modelname

    # init logging
    logging.basicConfig(filename=config['logging']['logfile'].format(
        dt.now().date()),
                        level=config['logging']['level'],
                        format=config['logging']['format'])
    logging.getLogger().addHandler(logging.StreamHandler())
    logger = logging.getLogger(__name__)

    # load data
    data = utils.dataloader(datapath, ApacheLogSchemaRaw, ' ')
    data = [ApacheLog.format(l) for l in data]
    logger.info('completed loading data from {}'.format(datapath))

    # load feature extraction configs
    featextraction_config = utils.configloader(
        'config.cfg.template')['feat_extraction']

    # init model
    model = ApacheLogModel(data,
                           featextraction_config,
                           num_topics=num_topics,
                           alpha=alpha,
                           iterations=iterations,
                           modelname=modelname)
Esempio n. 6
0
	def test_to_dataframe (self):
		data = utils.dataloader ('data/wslog.dat.template', ApacheLogSchemaRaw, ' ')
		df = utils.to_dataframe (data)
		expected_len = 20
		actual_len = len (df)
		self.assertEqual(actual_len, expected_len)