def test_calculate_metric_hex_encoded_length(self): result = MetricsAnalyzer.calculate_metric("hex_encoded_length", "12c322adc020 12322029620") expected_observation = { 'max_hex_encoded_length': 12, 'max_hex_encoded_word': '12c322adc020' } self.assertEqual(result, (12, expected_observation))
def test_calculate_metric_url_length(self): result = MetricsAnalyzer.calculate_metric( "url_length", "why don't we go http://www.nviso.com") expected_observation = { 'extracted_urls_length': 20, 'extracted_urls': 'http://www.nviso.com' } self.assertEqual(result, (20, expected_observation))
def test_calculate_metric_base64_encoded_length(self): result = MetricsAnalyzer.calculate_metric( "base64_encoded_length", "houston we have a cHJvYmxlbQ==") expected_observation = { 'max_base64_decoded_length': 7, 'max_base64_decoded_word': 'problem' } self.assertEqual(result, (7, expected_observation))
def _test_whitelist_batch_document_not_process_all( self): # TODO FIX with new whitelist system self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_with_whitelist.conf") analyzer = MetricsAnalyzer("metrics_length_dummy_test") # Whitelisted (ignored) doc1_without_outlier = copy.deepcopy( doc_without_outliers_test_whitelist_01_test_file) self.test_es.add_doc(doc1_without_outlier) # Not whitelisted (add) doc2_without_outlier = copy.deepcopy( doc_without_outliers_test_whitelist_02_test_file) self.test_es.add_doc(doc2_without_outlier) # Not whitelisted doc3_without_outlier = copy.deepcopy( doc_without_outliers_test_whitelist_03_test_file) self.test_es.add_doc(doc3_without_outlier) analyzer.evaluate_model() self.assertEqual(len(analyzer.outliers), 2)
def test_remove_metric_from_batch_simple_value(self): eval_metrics_array = defaultdict() aggregator_value = "agg" target_value = "dummy_target" metrics_value = "dummy_metric" observations = {} dummy_doc_gen = DummyDocumentsGenerate() doc = dummy_doc_gen.generate_document() batch = MetricsAnalyzer.add_metric_to_batch(eval_metrics_array, aggregator_value, target_value, metrics_value, observations, doc) result = MetricsAnalyzer.remove_metric_from_batch( batch[aggregator_value], 0) expected_aggregator_value = defaultdict(list) expected_aggregator_value["metrics"] = [] expected_aggregator_value["observations"] = [] expected_aggregator_value["raw_docs"] = [] self.assertEqual(result, expected_aggregator_value)
def test_evaluate_batch_for_outliers_fetch_remain_metrics(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_01.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test.conf" ) eval_metrics_array, aggregator_value, target_value, metrics_value, observations = \ self._preperate_data_terms_with_doc() doc = DummyDocumentsGenerate().generate_document() metrics = MetricsAnalyzer.add_metric_to_batch(eval_metrics_array, aggregator_value, target_value, metrics_value, observations, doc) result = analyzer._evaluate_batch_for_outliers(metrics, False) # outliers, not_enough_value, document_need_to_be_recompute self.assertEqual(result, ([], metrics))
def test_add_metric_to_batch_no_modification(self): eval_metrics_array, aggregator_value, target_value, metrics_value, observations, doc = \ self._preperate_dummy_data_terms() # Create expected result observations["target"] = [target_value] observations["aggregator"] = [aggregator_value] expected_eval_terms = defaultdict() expected_eval_terms[aggregator_value] = defaultdict(list) expected_eval_terms[aggregator_value]["metrics"] = [metrics_value] expected_eval_terms[aggregator_value]["observations"] = [observations] expected_eval_terms[aggregator_value]["raw_docs"] = [doc] result = MetricsAnalyzer.add_metric_to_batch(eval_metrics_array, aggregator_value, target_value, metrics_value, observations, doc) self.assertEqual(result, expected_eval_terms)
def perform_analysis(): """ The entrypoint for analysis """ analyzers = list() for config_section_name in settings.config.sections(): try: if config_section_name.startswith("simplequery_"): simplequery_analyzer = SimplequeryAnalyzer(config_section_name=config_section_name) analyzers.append(simplequery_analyzer) if config_section_name.startswith("metrics_"): metrics_analyzer = MetricsAnalyzer(config_section_name=config_section_name) analyzers.append(metrics_analyzer) if config_section_name.startswith("terms_"): terms_analyzer = TermsAnalyzer(config_section_name=config_section_name) analyzers.append(terms_analyzer) if config_section_name.startswith("beaconing_"): beaconing_analyzer = BeaconingAnalyzer(config_section_name=config_section_name) analyzers.append(beaconing_analyzer) if config_section_name.startswith("word2vec_"): word2vec_analyzer = Word2VecAnalyzer(config_section_name=config_section_name) analyzers.append(word2vec_analyzer) except Exception: logging.logger.error(traceback.format_exc()) analyzers_to_evaluate = list() for idx, analyzer in enumerate(analyzers): if analyzer.should_run_model or analyzer.should_test_model: analyzers_to_evaluate.append(analyzer) random.shuffle(analyzers_to_evaluate) analyzed_models = 0 for analyzer in analyzers_to_evaluate: try: analyzer.evaluate_model() analyzed_models = analyzed_models + 1 logging.logger.info("finished processing use case - " + str(analyzed_models + 1) + "/" + str(len(analyzers_to_evaluate)) + " [" + '{:.2f}'.format(round(float(analyzed_models + 1) / float(len(analyzers_to_evaluate)) * 100, 2)) + "% done" + "]") except Exception: logging.logger.error(traceback.format_exc())
def test_evaluate_batch_for_outliers_add_outlier(self): self.test_settings.change_configuration_path( "/app/tests/unit_tests/files/metrics_test_02.conf") analyzer = AnalyzerFactory.create( "/app/tests/unit_tests/files/use_cases/metrics/metrics_dummy_test_2.conf" ) eval_metrics_array, aggregator_value, target_value, metrics_value, observations = \ self._preperate_data_terms_with_doc(metrics_value=12) doc_without_outlier = copy.deepcopy(doc_without_outlier_test_file) self.test_es.add_doc(doc_without_outlier) metrics = MetricsAnalyzer.add_metric_to_batch( eval_metrics_array, aggregator_value, target_value, metrics_value, observations, doc_without_outlier) outliers, remaining_metrics = analyzer._evaluate_batch_for_outliers( metrics, True) analyzer.process_outlier(outliers[0]) result = [elem for elem in es._scan()][0] doc_with_outlier = copy.deepcopy(doc_with_outlier_test_file) self.maxDiff = None self.assertEqual(result, doc_with_outlier)
def test_add_metric_to_batch_empty(self): eval_metrics_array = defaultdict() aggregator_value = "" target_value = "" metrics_value = "" observations = {} doc = {} # Create expected result observations["target"] = [target_value] observations["aggregator"] = [aggregator_value] expected_eval_terms = defaultdict() expected_eval_terms[aggregator_value] = defaultdict(list) expected_eval_terms[aggregator_value]["metrics"] = [metrics_value] expected_eval_terms[aggregator_value]["observations"] = [observations] expected_eval_terms[aggregator_value]["raw_docs"] = [doc] result = MetricsAnalyzer.add_metric_to_batch(eval_metrics_array, aggregator_value, target_value, metrics_value, observations, doc) self.assertEqual(result, expected_eval_terms)
def perform_analysis(): """ The entrypoint for analysis """ analyzers = list() for config_section_name in settings.config.sections(): _analyzer = None try: if config_section_name.startswith("simplequery_"): _analyzer = SimplequeryAnalyzer( config_section_name=config_section_name) analyzers.append(_analyzer) elif config_section_name.startswith("metrics_"): _analyzer = MetricsAnalyzer( config_section_name=config_section_name) analyzers.append(_analyzer) elif config_section_name.startswith("terms_"): _analyzer = TermsAnalyzer( config_section_name=config_section_name) analyzers.append(_analyzer) elif config_section_name.startswith("beaconing_"): logging.logger.error( "use of the beaconing model is deprecated, please use the terms model using " + "coeff_of_variation trigger method to convert use case " + config_section_name) elif config_section_name.startswith("word2vec_"): _analyzer = Word2VecAnalyzer( config_section_name=config_section_name) analyzers.append(_analyzer) except Exception: logging.logger.error("error while initializing analyzer " + config_section_name, exc_info=True) analyzers_to_evaluate = list() for analyzer in analyzers: if analyzer.should_run_model or analyzer.should_test_model: analyzers_to_evaluate.append(analyzer) random.shuffle(analyzers_to_evaluate) for index, analyzer in enumerate(analyzers_to_evaluate): if analyzer.configuration_parsing_error: continue try: analyzer.analysis_start_time = datetime.today().timestamp() analyzer.evaluate_model() analyzer.analysis_end_time = datetime.today().timestamp() analyzer.completed_analysis = True logging.logger.info("finished processing use case - " + str(index + 1) + "/" + str(len(analyzers_to_evaluate)) + " [" + '{:.2f}'.format( round((index + 1) / float(len(analyzers_to_evaluate)) * 100, 2)) + "% done" + "]") except elasticsearch.exceptions.NotFoundError: analyzer.index_not_found_analysis = True logging.logger.warning( "index %s does not exist, skipping use case" % analyzer.es_index) except Exception: analyzer.unknown_error_analysis = True logging.logger.error("error while analyzing use case", exc_info=True) finally: es.flush_bulk_actions(refresh=True) return analyzers_to_evaluate
def test_calculate_metric_unexist_operation(self): self.assertEqual( MetricsAnalyzer.calculate_metric("dummy operation", ""), (None, dict()))
def test_calculate_metric_entropy(self): self.assertEqual(MetricsAnalyzer.calculate_metric("entropy", "test"), (helpers.utils.shannon_entropy("test"), dict()))
def test_calculate_metric_length(self): self.assertEqual(MetricsAnalyzer.calculate_metric("length", "test"), (len("test"), dict()))
def test_calculate_metric_numerical_value(self): self.assertEqual( MetricsAnalyzer.calculate_metric("numerical_value", "12"), (float(12), dict()))