def test_indirect_matrices_shape_with_missing_sources(self): dataset_name = "SAMPLE_WARC" dataset_builder = DatasetBuilder(dataset_name) dataset_builder.build() top = dataset_builder.artifacts[0] middle = dataset_builder.artifacts[1] bottom = dataset_builder.artifacts[2] expected_top_middle_shape = (len(top), len(middle)) expected_middle_bottom_shape = (len(middle), len(bottom)) expected_top_bottom_shape = (len(top), len(bottom)) assert_shapes_equal( dataset_builder.trace_matrix_map["0-1"].matrix, expected_top_middle_shape ) assert_shapes_equal( dataset_builder.trace_matrix_map["0-2"].matrix, expected_top_bottom_shape ) assert_shapes_equal( dataset_builder.trace_matrix_map["1-2"].matrix, expected_middle_bottom_shape, ) self.assertEqual( dataset_builder.trace_matrix_map["1-2"].matrix.shape, expected_middle_bottom_shape, )
def test_creating_missing_matrices(self): """ For MockDataset, removes trace matrix 0-2 from structure definition and checks that builder is able to construct one using transitive traces from the other matrices. :return: """ dataset_builder = DatasetBuilder("MockDataset") dataset_builder.structure_definition["traces"]["0-2"] = None dataset_builder.build() trace_matrix_builder = dataset_builder.trace_matrix_builder graph = trace_matrix_builder.create_trace_matrix_dependency_graph() self.assertEqual( 3, len(trace_matrix_builder.trace_matrix_map.get_trace_ids())) self.assertIsNotNone(trace_matrix_builder.trace_matrix_map["0-2"]) self.assertIsNotNone( trace_matrix_builder.trace_matrix_map["0-2"].matrix) self.assertEqual( 1, trace_matrix_builder.trace_matrix_map["0-2"].matrix[0, 0]) self.assertEqual( 2, trace_matrix_builder.trace_matrix_map["0-2"].matrix.sum( axis=1).sum()) self.assertEqual(3, len(graph.es))
def test_create_transitive_trace_matrices_with_warc(self): dataset_builder = DatasetBuilder("SAMPLE_WARC") dataset_builder.build() trace_matrix_creator = dataset_builder.trace_matrix_builder graph = (dataset_builder.trace_matrix_builder. create_trace_matrix_dependency_graph()) self.assertIsNotNone(trace_matrix_creator.trace_matrix_map["1-2"]) self.assertEqual(3, len(graph.es))
def test_get_paths_to_missing_traces_with_fakedataset(self): dataset_builder = DatasetBuilder("MockDataset") dataset_builder.structure_definition["traces"]["0-2"] = None dataset_builder.artifact_builder.build() dataset_builder.trace_matrix_builder.set_artifact_levels( dataset_builder.artifact_builder.artifacts) dataset_builder.trace_matrix_builder.build_matrices_defined_in_dataset( ) trace_matrix_map = dataset_builder.trace_matrix_builder.trace_matrix_map # TODO: clean up levels dependency trace_dependency_graph = (dataset_builder.trace_matrix_builder. create_trace_matrix_dependency_graph()) trace_ids: List[str] = trace_matrix_map.get_trace_ids() paths = get_paths_to_complete_graph(trace_ids, trace_dependency_graph, 3) self.assertEqual(1, len(paths.get_trace_ids())) self.assertEqual(1, len(paths["0-2"])) self.assertEqual([0, 1, 2], paths["0-2"][0])
def test_create_trace_matrices_in_definition(self): dataset_builder = DatasetBuilder("MockDataset") dataset_builder.structure_definition["traces"]["0-2"] = None dataset_builder.artifact_builder.build() dataset_builder.trace_matrix_builder.set_artifact_levels( dataset_builder.artifact_builder.artifacts) dataset_builder.trace_matrix_builder.build_matrices_defined_in_dataset( ) trace_matrix_map_keys = (dataset_builder.trace_matrix_builder. trace_matrix_map.get_trace_ids()) self.assertEqual(2, len(trace_matrix_map_keys)) self.assertIn("0-1", trace_matrix_map_keys) self.assertIn("1-2", trace_matrix_map_keys) self.assertNotIn("0-2", trace_matrix_map_keys) self.assertEqual(0, dataset_builder.trace_matrix_builder["0-1"].top_index) self.assertEqual( 1, dataset_builder.trace_matrix_builder["0-1"].bottom_index)
def run(self) -> Table: with create_loading_bar( EXPERIMENT_NAME, DATASET_COLUMN_ORDER, len(DATASET_COLUMN_ORDER) ) as d_iterable: for dataset_name in d_iterable: builder = DatasetBuilder(dataset_name) builder.build() builder.export() print(f"{dataset_name} exported.") return Table()
def test_export_dataset(self): """ Tests that after MockDataset is exported all required folders have been updated. :return: """ # Setup dataset_name = "MockDataset" dataset_builder = DatasetBuilder(dataset_name) dataset_builder.build() # Work dataset_builder.export() folders = ["Artifacts", "Oracles"] for folder_rel_path in folders: path_to_folder = os.path.join( PATH_TO_SAMPLE_DATASETS, dataset_name, folder_rel_path ) check_folder_has_updated(path_to_folder)
def test_get_paths_to_missing_traces_with_warc(self): """ Checks that the missing trace matrix in WARC is covered using a transitive path using the defined trace matrices. :return: None """ dataset_builder = DatasetBuilder("SAMPLE_WARC") dataset_builder.artifact_builder.build() dataset_builder.trace_matrix_builder.set_artifact_levels( dataset_builder.artifact_builder.artifacts) dataset_builder.trace_matrix_builder.build_matrices_defined_in_dataset( ) graph = (dataset_builder.trace_matrix_builder. create_trace_matrix_dependency_graph()) trace_ids: List[str] = list( map( to_string, dataset_builder.trace_matrix_builder.trace_matrix_map. get_trace_ids(), )) paths = get_paths_to_complete_graph(trace_ids, graph, 3) self.assertEqual(1, len(paths.get_trace_ids())) self.assertEqual(1, len(paths["0-1"])) self.assertEqual([0, 2, 1], paths["0-1"][0])
class TestTechniqueHelper(SmartTest): d_name = "MockDataset" d_builder = DatasetBuilder(d_name) d_builder.build() d_builder.export() dataset = Dataset(d_name) """ Direct """ direct_algebraic_model = AlgebraicModel.VSM direct_trace_type = TraceType.NOT_TRACED direct_parameters = [direct_algebraic_model.value, direct_trace_type.value] direct_components = ["0", "2"] direct_definition = [ DIRECT_COMMAND_SYMBOL, direct_parameters, direct_components ] """ Intermediate """ transitive_algebraic_model = AlgebraicModel.VSM transitive_aggregation_type = AggregationMethod.SUM transitive_component_scaling_type = ScalingMethod.GLOBAL transitive_component_trace_type = TraceType.NOT_TRACED transitive_component_a = [ DIRECT_COMMAND_SYMBOL, [ transitive_algebraic_model.value, transitive_component_trace_type.value ], ["0", "1"], ] transitive_upper_comp = "(%s (%s %s) (%s %s))" % ( DIRECT_COMMAND_SYMBOL, transitive_algebraic_model.value, transitive_component_trace_type.value, "0", "1", ) transitive_component_b = [ DIRECT_COMMAND_SYMBOL, [ transitive_algebraic_model.value, transitive_component_trace_type.value ], ["1", "2"], ] transitive_component_b_name = "(%s (%s %s) (%s %s))" % ( DIRECT_COMMAND_SYMBOL, transitive_algebraic_model.value, transitive_component_trace_type.value, "1", "2", ) transitive_parameters = [ transitive_aggregation_type.value, transitive_component_scaling_type.value, ] transitive_components = [transitive_component_a, transitive_component_b] transitive_technique_definition = [ TRANSITIVE_COMMAND_SYMBOL, transitive_parameters, transitive_components, ] """ Traced Components """ traced_component_type = TraceType.TRACED traced_aggregation_value = AggregationMethod.MAX traced_direct_component_a = [ DIRECT_COMMAND_SYMBOL, [transitive_algebraic_model.value, traced_component_type.value], ["0", "1"], ] traced_direct_component_b = [ DIRECT_COMMAND_SYMBOL, [transitive_algebraic_model.value, traced_component_type.value], ["1", "2"], ] traced_components = [traced_direct_component_a, traced_direct_component_b] traced_parameters = [ traced_aggregation_value.value, transitive_component_scaling_type.value, ] """ Sampled Artifacts """ sample_percentage = 0.5 sampled_parameters: [str ] = transitive_parameters + [repr(sample_percentage)] sampled_components = transitive_components sampled_artifacts_definition = [ SAMPLED_COMMAND_SYMBOL, sampled_parameters, sampled_components, ] sampled_traces_definition = [ SAMPLED_TRACED_COMMAND_SYMBOL, sampled_parameters, sampled_components, ] """ Combined """ combined_aggregation_type = AggregationMethod.SUM combined_parameters = ["SUM"] combined_components = [direct_definition, transitive_technique_definition] """ Combined (with sampled transitive) """ combined_sampled_artifacts_components = [ direct_definition, sampled_artifacts_definition, ] combined_sampled_traces_components = [ direct_definition, sampled_traces_definition ] direct_technique_name = "(. (VSM NT) (0 2))" transitive_technique_name = ( "(x (SUM GLOBAL) ((. (VSM NT) (0 1)) (. (VSM NT) (1 2))))") transitive_sampled_artifacts_technique_name = ( "(~ (SUM GLOBAL %f) ((. (VSM NT) (0 1)) (. (VSM NT) (1 2))))" % sample_percentage) transitive_sampled_traces_technique_name = ( "($ (SUM GLOBAL %f) ((. (VSM NT) (0 1)) (. (VSM NT) (1 2))))" % sample_percentage) combined_technique_name = "(o (%s) (%s %s))" % ( "SUM", direct_technique_name, transitive_technique_name, ) combined_sampled_artifacts_technique_name = "(o (%s) (%s %s))" % ( "SUM", direct_technique_name, transitive_sampled_artifacts_technique_name, ) combined_sampled_traces_technique_name = "(o (%s) (%s %s))" % ( "SUM", direct_technique_name, transitive_sampled_traces_technique_name, ) def get_direct_definition(self) -> DirectTechniqueDefinition: return DirectTechniqueDefinition(self.direct_parameters, self.direct_components) def get_transitive_definition(self) -> TransitiveTechniqueDefinition: return TransitiveTechniqueDefinition(self.transitive_parameters, self.transitive_components) def get_traced_transitive_definition( self) -> TransitiveTechniqueDefinition: return TransitiveTechniqueDefinition(self.traced_parameters, self.traced_components) def get_combined_definition(self) -> HybridTechniqueDefinition: return HybridTechniqueDefinition(self.combined_parameters, self.combined_components) def get_sampled_technique_definition(self) -> SampledTechniqueDefinition: return SampledTechniqueDefinition(self.sampled_parameters, self.sampled_components) def get_combined_sampled_artifacts_definition( self) -> HybridTechniqueDefinition: return HybridTechniqueDefinition( self.combined_parameters, self.combined_sampled_artifacts_components) def assert_valid_fake_dataset_similarity_matrix( self, similarity_matrix: SimilarityMatrix): self.assertEqual((1, 3), similarity_matrix.shape) def create_counter_func(self, t_name: str): n_function_calls = {"value": 0} def counter_func(data: TechniqueData): self.assertEqual(self.d_name, data.dataset.name) self.assertEqual(t_name, data.technique.get_name()) n_function_calls["value"] = n_function_calls["value"] + 1 return counter_func, n_function_calls
def test_normalize_original_matrices(self): db = DatasetBuilder("MockDataset") db.build() self.assertEqual(1, db.trace_matrix_builder["0-2"].matrix[0][2])
from api.datasets.builder.dataset_builder import DatasetBuilder from api.datasets.dataset import Dataset from api.tracer import Tracer DATASET_NAME = "IllustrativeExample" TOP_TECHNIQUE_NAME = "(. (VSM NT) (0 1))" BOTTOM_TECHNIQUE_NAME = "(. (VSM NT) (1 2))" DIRECT_TECHNIQUE_NAME = "(. (VSM NT) (0 2))" TECHNIQUE_NAME = "(x (MAX INDEPENDENT) ((. (VSM NT) (0 1)) (. (VSM NT) (1 2))))" REBUILD = False if __name__ == "__main__": if REBUILD: dataset_builder = DatasetBuilder(DATASET_NAME) dataset_builder.build() dataset_builder.export() dataset = Dataset(DATASET_NAME) tracer = Tracer() top_technique_data = tracer.get_technique_data(DATASET_NAME, TOP_TECHNIQUE_NAME) bottom_technique_data = tracer.get_technique_data(DATASET_NAME, BOTTOM_TECHNIQUE_NAME) direct_technique_data = tracer.get_technique_data(DATASET_NAME, DIRECT_TECHNIQUE_NAME) top_score = top_technique_data.similarity_matrix[0][0] bottom_score = bottom_technique_data.similarity_matrix[0][0] transitive_score = top_score * bottom_score direct_score = direct_technique_data.similarity_matrix[0][0]