def test_generate_and_save_package_topic_model_local(self): input_data_store = LocalFileSystem( "tests/data/data_gnosis/input-ptm-data") self.assertTrue(input_data_store is not None) output_data_store = LocalFileSystem( "tests/data/data_gnosis/output-ptm-data") self.assertTrue(output_data_store is not None) package_topic_model = GnosisPackageTopicModel.curate( data_store=input_data_store, filename="data_input_curated_package_topic/package_topic.json") self.assertTrue(package_topic_model is not None) output_result = package_topic_model.get_dictionary() self.assertTrue(output_result is not None) expected_package_topic_model = GnosisPackageTopicModel.load( data_store=output_data_store, filename="data_package_topic/expected_package_topic.json") self.assertTrue(expected_package_topic_model is not None) expected_output_result = expected_package_topic_model.get_dictionary() self.assertTrue(expected_output_result is not None) self.assertDictEqual(output_result, expected_output_result) package_topic_model.save( data_store=output_data_store, filename="data_package_topic/package_topic.json")
def train(cls, data_store, additional_path="", min_support_count=None, min_intent_topic_count=None, fp_num_partition=None): """Generates the Gnosis Reference Architecture. :param data_store: input data store containing the processed package topic map and list of manifest files. :param min_support_count: minimum support count to be used by FP Growth Algo. :param min_intent_topic_count: minimum number of allowed topics per intent. :return: the Gnosis Reference Architecture dictionary.""" gnosis_ptm_obj = GnosisPackageTopicModel.load( data_store=data_store, filename=additional_path + GNOSIS_PTM_OUTPUT_PATH) eco_to_package_topic_dict = gnosis_ptm_obj.get_dictionary() eco_to_package_to_topic_dict = eco_to_package_topic_dict[ GNOSIS_PTM_PACKAGE_TOPIC_MAP] gnosis_component_class_list = cls._generate_component_class_list_for_eco_package_topic_dict( eco_to_package_topic_dict=eco_to_package_to_topic_dict) fp_growth_model = cls._train_fp_growth_model( data_store=data_store, eco_to_package_topic_dict=eco_to_package_to_topic_dict, min_support_count=min_support_count, additional_path=additional_path, fp_num_partition=fp_num_partition) gnosis_intent_to_component_class_dict = cls._generate_intent_component_class_dict_fp_growth( model=fp_growth_model, min_intent_topic_count=min_intent_topic_count, package_list=gnosis_component_class_list) # TODO: modify this while implementing multiple levels in the reference # architecture gnosis_intent_to_intent_dict = {} gnosis_intent_list = cls._generate_intent_list( gnosis_intent_to_intent_dict=gnosis_intent_to_intent_dict, gnosis_intent_to_component_class_dict= gnosis_intent_to_component_class_dict) gnosis_edge_list = cls._generate_edge_list( gnosis_intent_to_component_class_dict= gnosis_intent_to_component_class_dict, gnosis_intent_to_intent_dict=gnosis_intent_to_intent_dict) gnosis_model = cls._generate_gnosis_model( gnosis_intent_to_intent_dict=gnosis_intent_to_intent_dict, gnosis_intent_to_component_class_dict= gnosis_intent_to_component_class_dict, gnosis_component_class_list=gnosis_component_class_list, gnosis_intent_list=gnosis_intent_list, gnosis_edge_list=gnosis_edge_list) return gnosis_model
def test_manifest_missing_packages(self): input_data_store = LocalFileSystem("tests/data/data_gnosis/") self.assertTrue(input_data_store is not None) manifest_json = input_data_store.read_json_file( filename= 'data_input_manifest_file_list/manifest_unknown_packages.json') self.assertTrue(manifest_json) self.assertTrue("package_list" in manifest_json[0]) package_list = manifest_json[0]['package_list'] packages = GnosisPackageTopicModel._get_unknown_packages_from_manifests( input_data_store, additional_path='', package_topic_dict={}) self.assertListEqual(sorted(package_list[0]), sorted(packages.keys()))
def test_package_tag_creation(self): input_data_store = LocalFileSystem( "tests/data/data_gnosis/input-ptm-data") self.assertTrue(input_data_store is not None) ptm_json = input_data_store.read_json_file( filename='data_input_curated_package_topic/package_topic.json') self.assertTrue(ptm_json) package_names = ptm_json[0]['package_topic_map'] for package_name in package_names: tag_list = GnosisPackageTopicModel._create_tags_for_package( package_name) # At least one tag should be generated for each package self.assertTrue(tag_list)
def generate_and_save_gnosis_package_topic_model(input_data_store, output_data_store, additional_path): """Trains the package to topic map as well as topic to package map. :param input_data_store: source data store. :param output_data_store: destination data store. :param type: "curate" or "train". """ gnosis_package_topic_model_obj = GnosisPackageTopicModel.curate( data_store=input_data_store, filename=additional_path + GNOSIS_PTM_INPUT_PATH) gnosis_package_topic_model_obj.save(data_store=output_data_store, filename=additional_path + GNOSIS_PTM_OUTPUT_PATH) return None