コード例 #1
0
    def test_generate_and_save_package_topic_model_local(self):
        input_data_store = LocalFileSystem(
            "tests/data/data_gnosis/input-ptm-data")
        self.assertTrue(input_data_store is not None)

        output_data_store = LocalFileSystem(
            "tests/data/data_gnosis/output-ptm-data")
        self.assertTrue(output_data_store is not None)

        package_topic_model = GnosisPackageTopicModel.curate(
            data_store=input_data_store,
            filename="data_input_curated_package_topic/package_topic.json")
        self.assertTrue(package_topic_model is not None)
        output_result = package_topic_model.get_dictionary()

        self.assertTrue(output_result is not None)

        expected_package_topic_model = GnosisPackageTopicModel.load(
            data_store=output_data_store,
            filename="data_package_topic/expected_package_topic.json")

        self.assertTrue(expected_package_topic_model is not None)

        expected_output_result = expected_package_topic_model.get_dictionary()

        self.assertTrue(expected_output_result is not None)

        self.assertDictEqual(output_result, expected_output_result)

        package_topic_model.save(
            data_store=output_data_store,
            filename="data_package_topic/package_topic.json")
コード例 #2
0
    def train(cls,
              data_store,
              additional_path="",
              min_support_count=None,
              min_intent_topic_count=None,
              fp_num_partition=None):
        """Generates the Gnosis Reference Architecture.

        :param data_store: input data store containing the processed package topic map and list of manifest files.
        :param min_support_count: minimum support count to be used by FP Growth Algo.
        :param min_intent_topic_count: minimum number of allowed topics per intent.

        :return: the Gnosis Reference Architecture dictionary."""

        gnosis_ptm_obj = GnosisPackageTopicModel.load(
            data_store=data_store,
            filename=additional_path + GNOSIS_PTM_OUTPUT_PATH)
        eco_to_package_topic_dict = gnosis_ptm_obj.get_dictionary()

        eco_to_package_to_topic_dict = eco_to_package_topic_dict[
            GNOSIS_PTM_PACKAGE_TOPIC_MAP]

        gnosis_component_class_list = cls._generate_component_class_list_for_eco_package_topic_dict(
            eco_to_package_topic_dict=eco_to_package_to_topic_dict)

        fp_growth_model = cls._train_fp_growth_model(
            data_store=data_store,
            eco_to_package_topic_dict=eco_to_package_to_topic_dict,
            min_support_count=min_support_count,
            additional_path=additional_path,
            fp_num_partition=fp_num_partition)

        gnosis_intent_to_component_class_dict = cls._generate_intent_component_class_dict_fp_growth(
            model=fp_growth_model,
            min_intent_topic_count=min_intent_topic_count,
            package_list=gnosis_component_class_list)

        # TODO: modify this while implementing multiple levels in the reference
        # architecture
        gnosis_intent_to_intent_dict = {}

        gnosis_intent_list = cls._generate_intent_list(
            gnosis_intent_to_intent_dict=gnosis_intent_to_intent_dict,
            gnosis_intent_to_component_class_dict=
            gnosis_intent_to_component_class_dict)

        gnosis_edge_list = cls._generate_edge_list(
            gnosis_intent_to_component_class_dict=
            gnosis_intent_to_component_class_dict,
            gnosis_intent_to_intent_dict=gnosis_intent_to_intent_dict)

        gnosis_model = cls._generate_gnosis_model(
            gnosis_intent_to_intent_dict=gnosis_intent_to_intent_dict,
            gnosis_intent_to_component_class_dict=
            gnosis_intent_to_component_class_dict,
            gnosis_component_class_list=gnosis_component_class_list,
            gnosis_intent_list=gnosis_intent_list,
            gnosis_edge_list=gnosis_edge_list)

        return gnosis_model
 def test_manifest_missing_packages(self):
     input_data_store = LocalFileSystem("tests/data/data_gnosis/")
     self.assertTrue(input_data_store is not None)
     manifest_json = input_data_store.read_json_file(
         filename=
         'data_input_manifest_file_list/manifest_unknown_packages.json')
     self.assertTrue(manifest_json)
     self.assertTrue("package_list" in manifest_json[0])
     package_list = manifest_json[0]['package_list']
     packages = GnosisPackageTopicModel._get_unknown_packages_from_manifests(
         input_data_store, additional_path='', package_topic_dict={})
     self.assertListEqual(sorted(package_list[0]), sorted(packages.keys()))
コード例 #4
0
 def test_package_tag_creation(self):
     input_data_store = LocalFileSystem(
         "tests/data/data_gnosis/input-ptm-data")
     self.assertTrue(input_data_store is not None)
     ptm_json = input_data_store.read_json_file(
         filename='data_input_curated_package_topic/package_topic.json')
     self.assertTrue(ptm_json)
     package_names = ptm_json[0]['package_topic_map']
     for package_name in package_names:
         tag_list = GnosisPackageTopicModel._create_tags_for_package(
             package_name)
         # At least one tag should be generated for each package
         self.assertTrue(tag_list)
コード例 #5
0
def generate_and_save_gnosis_package_topic_model(input_data_store,
                                                 output_data_store,
                                                 additional_path):
    """Trains the package to topic map as well as topic to package map.

    :param input_data_store: source data store.
    :param output_data_store: destination data store.
    :param type: "curate" or "train". """

    gnosis_package_topic_model_obj = GnosisPackageTopicModel.curate(
        data_store=input_data_store,
        filename=additional_path + GNOSIS_PTM_INPUT_PATH)
    gnosis_package_topic_model_obj.save(data_store=output_data_store,
                                        filename=additional_path +
                                        GNOSIS_PTM_OUTPUT_PATH)
    return None