コード例 #1
0
    def test_generate_and_save_package_topic_model_local(self):
        input_data_store = LocalFileSystem(
            "tests/data/data_gnosis/input-ptm-data")
        self.assertTrue(input_data_store is not None)

        output_data_store = LocalFileSystem(
            "tests/data/data_gnosis/output-ptm-data")
        self.assertTrue(output_data_store is not None)

        package_topic_model = GnosisPackageTopicModel.curate(
            data_store=input_data_store,
            filename="data_input_curated_package_topic/package_topic.json")
        self.assertTrue(package_topic_model is not None)
        output_result = package_topic_model.get_dictionary()

        self.assertTrue(output_result is not None)

        expected_package_topic_model = GnosisPackageTopicModel.load(
            data_store=output_data_store,
            filename="data_package_topic/expected_package_topic.json")

        self.assertTrue(expected_package_topic_model is not None)

        expected_output_result = expected_package_topic_model.get_dictionary()

        self.assertTrue(expected_output_result is not None)

        self.assertDictEqual(output_result, expected_output_result)

        package_topic_model.save(
            data_store=output_data_store,
            filename="data_package_topic/package_topic.json")
コード例 #2
0
    def test_train_and_save_gnosis_ref_arch_local(self):
        input_data_store = LocalFileSystem(
            "analytics_platform/kronos/gnosis/test/data/input-ra-data")
        self.assertTrue(input_data_store is not None)

        output_data_store = LocalFileSystem(
            "analytics_platform/kronos/gnosis/test/data/output-ra-data")
        self.assertTrue(output_data_store is not None)

        gnosis_ra_obj = GnosisReferenceArchitecture.train(
            data_store=input_data_store,
            min_support_count=40,
            min_intent_topic_count=2,
            fp_num_partition=12)

        self.assertTrue(gnosis_ra_obj is not None)
        output_result = gnosis_ra_obj.get_dictionary()

        self.assertTrue(output_result is not None)

        expected_gnosis_ra_obj = GnosisReferenceArchitecture.load(
            data_store=output_data_store,
            filename="data_gnosis/expected_gnosis_ref_arch.json")

        self.assertTrue(expected_gnosis_ra_obj is not None)

        expected_output_result = expected_gnosis_ra_obj.get_dictionary()

        self.assertTrue(expected_output_result is not None)

        self.assertDictEqual(output_result, expected_output_result)

        gnosis_ra_obj.save(data_store=output_data_store,
                           filename="data_gnosis/gnosis_ref_arch.json")
コード例 #3
0
    def test_train_and_save_gnosis_ref_arch_local(self):
        """Test the train() method, the deserialization method and compare results."""
        input_data_store = LocalFileSystem(
            "tests/data/data_gnosis/input-ra-data/")
        self.assertTrue(input_data_store is not None)

        output_data_store = LocalFileSystem(
            "tests/data/data_gnosis/output-ra-data/")
        self.assertTrue(output_data_store is not None)

        gnosis_ra_obj = GnosisReferenceArchitecture.train(data_store=input_data_store,
                                                          min_support_count=40,
                                                          min_intent_topic_count=2,
                                                          fp_num_partition=12,
                                                          fp_tag_intent_limit=2)

        self.assertTrue(gnosis_ra_obj is not None)
        output_result = gnosis_ra_obj.get_dictionary()

        self.assertTrue(output_result is not None)

        expected_gnosis_ra_obj = GnosisReferenceArchitecture.load(
            data_store=output_data_store, filename="data_gnosis/expected_gnosis_ref_arch.json")

        self.assertTrue(expected_gnosis_ra_obj is not None)

        expected_output_result = expected_gnosis_ra_obj.get_dictionary()

        self.assertTrue(expected_output_result is not None)

        self.assertDictEqual(output_result, expected_output_result)

        gnosis_ra_obj.save(data_store=output_data_store,
                           filename="data_gnosis/gnosis_ref_arch.json")
    def test_generate_and_save_pruned_list_local(self):
        """Test the method prune_tag_list() and the deserialization of the tag list."""
        input_data_store = LocalFileSystem("tests/data/data_apollo/")
        self.assertTrue(input_data_store is not None)

        output_data_store = LocalFileSystem("tests/data/data_apollo/")
        self.assertTrue(output_data_store is not None)

        TagListPruner.prune_tag_list(input_data_store,
                                     output_data_store,
                                     additional_path="")

        output_pruned_list_obj = TagListPruner.load(
            data_store=output_data_store,
            filename=PACKAGE_LIST_INPUT_CURATED_FILEPATH +
            "package_topic.json")
        self.assertTrue(output_pruned_list_obj is not None)
        output_result = output_pruned_list_obj.package_list
        self.assertTrue(output_result is not None)
        expected_output_json = TagListPruner.load(
            data_store=output_data_store,
            filename=PACKAGE_LIST_INPUT_CURATED_FILEPATH +
            "expected_output.json")
        self.assertTrue(expected_output_json is not None)
        expected_output_result = expected_output_json.package_list
        self.assertTrue(expected_output_result is not None)

        for tag_generated, tag_expected in zip(output_result,
                                               expected_output_result):
            self.assertDictEqual(tag_generated, tag_expected)
    def test_generate_and_save_cooccurrence_matrices_local(self):
        input_data_store = LocalFileSystem(
            "tests/data/data_softnet/input-com-data")
        self.assertTrue(input_data_store is not None)

        output_data_store = LocalFileSystem(
            "tests/data/data_softnet/output-com-data")
        self.assertTrue(output_data_store is not None)

        eco_to_kronos_dependency_dict = load_eco_to_kronos_dependency_dict(
            input_kronos_dependency_data_store=input_data_store,
            additional_path="")
        self.assertTrue(eco_to_kronos_dependency_dict is not None)

        manifest_filenames = input_data_store.list_files(
            "data_input_manifest_file_list")
        self.assertTrue(manifest_filenames is not None)

        for manifest_filename in manifest_filenames:
            user_category = manifest_filename.split("/")[1]
            manifest_content_json_list = input_data_store.read_json_file(
                filename=manifest_filename)
            self.assertTrue(manifest_content_json_list is not None)

            for manifest_content_json in manifest_content_json_list:
                self.assertTrue(manifest_content_json is not None)
                manifest_content_dict = dict(manifest_content_json)
                ecosystem = manifest_content_dict["ecosystem"]
                kronos_dependency_dict = eco_to_kronos_dependency_dict[
                    ecosystem]
                list_of_package_list = manifest_content_dict.get(
                    "package_list")
                cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.generate_cooccurrence_matrix(
                    kronos_dependency_dict=kronos_dependency_dict,
                    list_of_package_list=list_of_package_list)
                self.assertTrue(cooccurrence_matrix_obj is not None)
                output_filename = "data_co_occurrence_matrix" + "/" + str(
                    user_category) + "/" + "cooccurrence_matrix" + "_" + str(
                        ecosystem) + ".json"
                cooccurrence_matrix_obj.save(data_store=output_data_store,
                                             filename=output_filename)
                expected_output_filename = "data_co_occurrence_matrix" + "/" + str(
                    user_category
                ) + "/" + "expected_cooccurrence_matrix" + "_" + str(
                    ecosystem) + ".json"
                expected_cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.load(
                    data_store=output_data_store,
                    filename=expected_output_filename)
                self.assertTrue(expected_cooccurrence_matrix_obj is not None)
                cooccurrence_matrix_df = cooccurrence_matrix_obj.get_matrix_dictionary(
                )
                self.assertTrue(cooccurrence_matrix_df is not None)
                expected_cooccurrence_matrix_df = expected_cooccurrence_matrix_obj.get_matrix_dictionary(
                )
                expected_columns = set(expected_cooccurrence_matrix_df.columns)
                resultant_columns = set(cooccurrence_matrix_df.columns)
                self.assertTrue(resultant_columns == expected_columns)
                self.assertTrue(
                    set(cooccurrence_matrix_df).issubset(
                        set(expected_cooccurrence_matrix_df)))
    def test_generate_and_save_cooccurrence_matrices_local(self):
        input_data_store = LocalFileSystem(
            "analytics_platform/kronos/softnet/test/data/input-com-data")
        self.assertTrue(input_data_store is not None)

        output_data_store = LocalFileSystem(
            "analytics_platform/kronos/softnet/test/data/output-com-data")
        self.assertTrue(output_data_store is not None)

        eco_to_kronos_dependency_dict = load_eco_to_kronos_dependency_dict(
            input_kronos_dependency_data_store=input_data_store,
            additional_path="")
        self.assertTrue(eco_to_kronos_dependency_dict is not None)

        manifest_filenames = input_data_store.list_files(
            "data_input_manifest_file_list")
        self.assertTrue(manifest_filenames is not None)

        for manifest_filename in manifest_filenames:
            user_category = manifest_filename.split("/")[1]
            manifest_content_json_list = input_data_store.read_json_file(
                filename=manifest_filename)
            self.assertTrue(manifest_content_json_list is not None)

            for manifest_content_json in manifest_content_json_list:
                self.assertTrue(manifest_content_json is not None)
                manifest_content_dict = dict(manifest_content_json)
                ecosystem = manifest_content_dict["ecosystem"]
                kronos_dependency_dict = eco_to_kronos_dependency_dict[
                    ecosystem]
                list_of_package_list = manifest_content_dict.get(
                    "package_list")
                cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.generate_cooccurrence_matrix(
                    kronos_dependency_dict=kronos_dependency_dict,
                    list_of_package_list=list_of_package_list)
                self.assertTrue(cooccurrence_matrix_obj is not None)
                output_filename = "data_co_occurrence_matrix" + "/" + str(
                    user_category) + "/" + "cooccurrence_matrix" + "_" + str(
                        ecosystem) + ".json"
                cooccurrence_matrix_obj.save(data_store=output_data_store,
                                             filename=output_filename)
                expected_output_filename = "data_co_occurrence_matrix" + "/" + str(
                    user_category
                ) + "/" + "expected_cooccurrence_matrix" + "_" + str(
                    ecosystem) + ".json"
                expected_cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.load(
                    data_store=output_data_store,
                    filename=expected_output_filename)
                self.assertTrue(expected_cooccurrence_matrix_obj is not None)
                cooccurrence_matrix_df = cooccurrence_matrix_obj.get_matrix_dictionary(
                )
                self.assertTrue(cooccurrence_matrix_df is not None)
                expected_cooccurrence_matrix_df = expected_cooccurrence_matrix_obj.get_matrix_dictionary(
                )
                self.assertTrue(expected_cooccurrence_matrix_df is not None)
                assert_frame_equal(
                    cooccurrence_matrix_df.sort_index(axis=1),
                    expected_cooccurrence_matrix_df.sort_index(axis=1),
                    check_names=True)
    def test_generate_and_save_kronos_dependency_local(self):
        input_data_store = LocalFileSystem("analytics_platform/kronos/softnet/test/data/input-kd-data")
        self.assertTrue(input_data_store is not None)

        output_data_store = LocalFileSystem("analytics_platform/kronos/softnet/test/data/output-kd-data")
        self.assertTrue(output_data_store is not None)

        gnosis_ref_arch_json = input_data_store.read_json_file(filename="data_gnosis/gnosis_ref_arch.json")
        self.assertTrue(gnosis_ref_arch_json is not None)

        gnosis_ref_arch_dict = dict(gnosis_ref_arch_json)
        self.assertTrue(gnosis_ref_arch_dict is not None)

        package_topic_json = input_data_store.read_json_file("data_package_topic/package_topic.json")
        self.assertTrue(package_topic_json is not None)

        package_topic_dict = dict(package_topic_json)
        self.assertTrue(package_topic_dict is not None)

        eco_to_package_topic_dict = package_topic_dict["package_topic_map"]
        eco_to_topic_package_dict = package_topic_dict["topic_package_map"]

        eco_to_kronos_dependency_dict = dict()

        for ecosystem in eco_to_package_topic_dict.keys():
            package_to_topic_dict = eco_to_package_topic_dict.get(ecosystem)
            topic_to_package_dict = eco_to_topic_package_dict.get(ecosystem)

            kronos_dependency_obj = KronosDependencyGenerator.generate_kronos_dependency(
                gnosis_ref_arch_dict=gnosis_ref_arch_dict,
                package_to_topic_dict=package_to_topic_dict,
                topic_to_package_dict=topic_to_package_dict)

            self.assertTrue(kronos_dependency_obj is not None)

            eco_to_kronos_dependency_dict[ecosystem] = kronos_dependency_obj

        for ecosystem in eco_to_kronos_dependency_dict.keys():
            kronos_dependency_obj = eco_to_kronos_dependency_dict[ecosystem]
            filename = "data_kronos_dependency/kronos_dependency.json"
            filename_formatted = filename.replace(".", "_" + ecosystem + ".")
            kronos_dependency_obj.save(data_store=output_data_store, filename=filename_formatted)

            kronos_dependency_dict = kronos_dependency_obj.get_dictionary()

            self.assertTrue(kronos_dependency_dict is not None)

            expected_filename_formatted = filename_formatted.replace("/", "/expected_")

            expected_kronos_dependency_obj = KronosDependencyGenerator.load(data_store=output_data_store,
                                                                            filename=expected_filename_formatted)
            self.assertTrue(expected_kronos_dependency_obj is not None)

            expected_kronos_dependency_dict = expected_kronos_dependency_obj.get_dictionary()
            self.assertTrue(expected_kronos_dependency_dict is not None)

            self.assertDictEqual(kronos_dependency_dict, expected_kronos_dependency_dict)
    def test_score_eco_user_package_dict(self):
        """Test the dependency dict deserialization, frequency list deserialization etc."""
        input_data_store = LocalFileSystem(
            "tests/data/data_pgm/input-score-data/")
        self.assertIsNotNone(input_data_store)

        output_data_store = LocalFileSystem(
            "tests/data/data_pgm/output-score-data/")
        self.assertIsNotNone(output_data_store)

        frequency_dict_data_store = LocalFileSystem(
            src_dir="tests/data/data_apollo/")
        self.assertIsNotNone(frequency_dict_data_store)

        user_eco_kronos_dict = load_user_eco_to_kronos_model_dict(
            input_kronos_data_store=input_data_store, additional_path="")

        self.assertIsNotNone(user_eco_kronos_dict)

        eco_to_kronos_dependency_dict = get_eco_to_kronos_dependency_dict(
            data_store=input_data_store, folderpath="data_kronos_dependency")

        self.assertIsNotNone(eco_to_kronos_dependency_dict)

        user_request = [{
            "ecosystem": "pypi",
            "comp_package_count_threshold": 10,
            "alt_package_count_threshold": 1,
            "outlier_probability_threshold": 0.61,
            "unknown_packages_ratio_threshold": 0.4,
            "outlier_package_count_threshold": 2,
            "package_list": ["p1", "p2", "p3", "np1"]
        }]

        frequency_dict = frequency_dict_data_store.read_json_file(
            filename=KD_PACKAGE_FREQUENCY)
        self.assertIsNotNone(frequency_dict)
        all_package_list_obj = RecommendationValidator.load_package_list_local(
            input_folder_name="tests/data/data_recom_valid/",
            additional_path="")

        response = score_eco_user_package_dict(
            user_request,
            user_eco_kronos_dict=user_eco_kronos_dict,
            eco_to_kronos_dependency_dict=eco_to_kronos_dependency_dict,
            all_package_list_obj=all_package_list_obj,
            package_frequency_dict=frequency_dict,
            use_filters=USE_FILTERS)

        self.assertIsNotNone(response)

        output_data_store.write_json_file(filename="response.json",
                                          contents=response)

        expected_response = output_data_store.read_json_file(
            filename="expected_response.json")
        self.assertIsNotNone(expected_response)

        self.assertDictEqual(response[0], expected_response[0])
 def test_package_tag_creation(self):
     input_data_store = LocalFileSystem(
         "tests/data/data_gnosis/input-ptm-data/")
     self.assertTrue(input_data_store is not None)
     ptm_json = input_data_store.read_json_file(
         filename='data_input_curated_package_topic/package_topic.json')
     self.assertTrue(ptm_json)
     package_names = ptm_json[0]['package_topic_map']
     for package_name in package_names:
         tag_list = create_tags_for_package(package_name)
         # At least one tag should be generated for each package
         self.assertTrue(tag_list)
 def test_manifest_missing_packages(self):
     input_data_store = LocalFileSystem("tests/data/data_gnosis/")
     self.assertTrue(input_data_store is not None)
     manifest_json = input_data_store.read_json_file(
         filename=
         'data_input_manifest_file_list/manifest_unknown_packages.json')
     self.assertTrue(manifest_json)
     self.assertTrue("package_list" in manifest_json[0])
     package_list = manifest_json[0]['package_list']
     packages = GnosisPackageTopicModel._get_unknown_packages_from_manifests(
         input_data_store, additional_path='', package_topic_dict={})
     self.assertListEqual(sorted(package_list[0]), sorted(packages.keys()))
    def generate_kronos_dependency(cls, gnosis_ref_arch_dict,
                                   package_to_topic_dict,
                                   topic_to_package_dict):
        """Generate a soft net: component class topic model from the data
        available in the given data stores.

        :param data_store: Gnosis graph store.
        :param pkg_topic_store: Package Topic graph store.

        :return: Object of class KronosDependencyGenerator."""
        _logger.info("Started kronos dependency graph generation")
        package_list = list(package_to_topic_dict.keys())
        component_class_list = gnosis_ref_arch_dict.get(
            softnet_constants.GNOSIS_RA_COMPONENT_CLASS_LIST)

        component_class_to_package_edge_list, component_class_to_package_dict = \
            cls._generate_component_class_to_package_edge_list_and_dict(
                package_list, component_class_list, package_to_topic_dict)

        gnosis_ref_arch_intent_list = gnosis_ref_arch_dict.get(
            softnet_constants.GNOSIS_RA_INTENT_LIST)
        kronos_intent_list = gnosis_ref_arch_intent_list + component_class_list
        kronos_node_list = package_list + kronos_intent_list

        gnosis_ref_arch_edge_list = gnosis_ref_arch_dict.get(
            softnet_constants.GNOSIS_RA_EDGE_LIST)

        kronos_dependency_edge_list = gnosis_ref_arch_edge_list + \
            component_class_to_package_edge_list

        parent_tuple_list = softnet_utils.generate_parent_tuple_list(
            kronos_node_list, kronos_dependency_edge_list)
        parent_tuple_list_string = LocalFileSystem.convert_list_of_tuples_to_string(
            parent_tuple_list)
        similar_package_dict = cls._generate_similar_package_dict(
            package_to_topic_dict, topic_to_package_dict)

        kronos_dependency_dict = dict()
        kronos_dependency_dict[
            softnet_constants.KD_PACKAGE_LIST] = package_list
        kronos_dependency_dict[
            softnet_constants.KD_INTENT_LIST] = kronos_intent_list
        kronos_dependency_dict[
            softnet_constants.
            KD_INTENT_DEPENDENCY_MAP] = gnosis_ref_arch_dict.get(
                softnet_constants.GNOSIS_RA_DICT)
        kronos_dependency_dict[
            softnet_constants.
            KD_COMPONENT_DEPENDENCY_MAP] = component_class_to_package_dict
        kronos_dependency_dict[
            softnet_constants.KD_PARENT_TUPLE_LIST] = parent_tuple_list_string
        kronos_dependency_dict[
            softnet_constants.KD_EDGE_LIST] = kronos_dependency_edge_list
        kronos_dependency_dict[
            softnet_constants.KD_SIMILAR_PACKAGE_MAP] = similar_package_dict
        kronos_dependency_dict[
            softnet_constants.KD_PACKAGE_TO_TOPIC_MAP] = package_to_topic_dict
        _logger.info("Ended Kronos dependency graph generation")
        return KronosDependencyGenerator(kronos_dependency_dict)
コード例 #12
0
    def load(cls, data_store):

        if type(data_store) is LocalFileSystem:
            word_class_dict = data_store.read_pickle_file(
                filename=WORD_CLASS_DICT_FILENAME)
            net = tflearn.input_data(
                shape=[None, int(word_class_dict["num_input"])])
            net = tflearn.fully_connected(net, 8)
            net = tflearn.fully_connected(net, 8)
            net = tflearn.fully_connected(net,
                                          int(word_class_dict["num_output"]),
                                          activation='softmax')
            net = tflearn.regression(net)
            model = tflearn.DNN(net)
            dl_model = data_store.read_dl_model(data=model,
                                                filename=MODEL_FILENAME)
        if type(data_store) is S3DataStore:
            data_store.download_file(MODEL_FILENAME + ".index",
                                     "/tmp/" + MODEL_FILENAME + ".index")
            data_store.download_file(MODEL_FILENAME + ".meta",
                                     "/tmp/" + MODEL_FILENAME + ".meta")
            data_store.download_file(
                MODEL_FILENAME + ".data-00000-of-00001",
                "/tmp/" + MODEL_FILENAME + ".data-00000-of-00001")
            data_store.download_file(WORD_CLASS_DICT_FILENAME,
                                     "/tmp/" + WORD_CLASS_DICT_FILENAME)
            temp_data_store = LocalFileSystem("/tmp/")
            word_class_dict = temp_data_store.read_pickle_file(
                filename=WORD_CLASS_DICT_FILENAME)
            net = tflearn.input_data(
                shape=[None, int(word_class_dict["num_input"])])
            net = tflearn.fully_connected(net, 8)
            net = tflearn.fully_connected(net, 8)
            net = tflearn.fully_connected(net,
                                          int(word_class_dict["num_output"]),
                                          activation='softmax')
            net = tflearn.regression(net)
            model = tflearn.DNN(net)
            dl_model = temp_data_store.read_dl_model(data=model,
                                                     filename=MODEL_FILENAME)
        return ChatbotModel(words=word_class_dict["words"],
                            classes=word_class_dict["classes"],
                            num_input=word_class_dict["num_input"],
                            num_output=word_class_dict["num_output"],
                            dl_model=dl_model,
                            response=word_class_dict["response"])
コード例 #13
0
 def load(cls, data_store):
     if type(data_store) is LocalFileSystem:
         matrix = data_store.read_pickle_file(
             filename=SIMILARITY_MATRIX_FILENAME)
         movie_names = data_store.read_pickle_file(
             filename=MOVIE_LIST_FILENAME)
     if type(data_store) is S3DataStore:
         data_store.download_file(SIMILARITY_MATRIX_FILENAME,
                                  "/tmp/" + SIMILARITY_MATRIX_FILENAME)
         data_store.download_file(MOVIE_LIST_FILENAME,
                                  "/tmp/" + MOVIE_LIST_FILENAME)
         temp_data_store = LocalFileSystem("/tmp/")
         matrix = temp_data_store.read_pickle_file(
             filename=SIMILARITY_MATRIX_FILENAME)
         movie_names = temp_data_store.read_pickle_file(
             filename=MOVIE_LIST_FILENAME)
     return ImdbRecSys(matrix=matrix, movie_names=movie_names)
    def test_generate_and_save_package_frequency_dict_local(self):
        input_data_store = LocalFileSystem(
            src_dir="tests/data/data_gnosis/input-ra-data/")
        self.assertIsNotNone(input_data_store)

        output_data_store = LocalFileSystem(src_dir="tests/data/data_apollo/")

        self.assertIsNotNone(output_data_store)

        frequency_dict_generator = FrequencyDictGenerator.create_frequency_generator(
            input_data_store=input_data_store, additional_path="")

        self.assertIsNotNone(frequency_dict_generator)

        frequency_dict_generator.generate_and_save_frequency_dict(
            output_data_store=output_data_store, additional_path="")

        frequency_dict = output_data_store.read_json_file(
            filename=KD_PACKAGE_FREQUENCY)

        self.assertIsNotNone(frequency_dict)
コード例 #15
0
 def save(self, data_store):
     word_class_dict = {
         "words": self.words,
         "classes": self.classes,
         "num_input": self.num_input,
         "num_output": self.num_output,
         "response": self.response
     }
     if type(data_store) is LocalFileSystem:
         data_store.write_dl_model(data=self.dl_model,
                                   filename=MODEL_FILENAME)
         data_store.write_pickle_file(data=word_class_dict,
                                      filename=WORD_CLASS_DICT_FILENAME)
     if type(data_store) is S3DataStore:
         temp_data_store = LocalFileSystem("/tmp/")
         temp_data_store.write_dl_model(data=self.dl_model,
                                        filename=MODEL_FILENAME)
         temp_data_store.write_pickle_file(
             data=word_class_dict, filename=WORD_CLASS_DICT_FILENAME)
         data_store.upload_file("/tmp/" + MODEL_FILENAME + ".index",
                                MODEL_FILENAME + ".index")
         data_store.upload_file("/tmp/" + MODEL_FILENAME + ".meta",
                                MODEL_FILENAME + ".meta")
         data_store.upload_file(
             "/tmp/" + MODEL_FILENAME + ".data-00000-of-00001",
             MODEL_FILENAME + ".data-00000-of-00001")
         data_store.upload_file("/tmp/" + WORD_CLASS_DICT_FILENAME,
                                WORD_CLASS_DICT_FILENAME)
     return None
    def load_package_list_local(cls, input_folder_name, additional_path, input_ecosystem):
        """Generate the aggregated manifest list for a given ecosystem from
        LocalFileSystem datasource.

        :param input_folder_name: The main directory where the manifest files are stored.
        :param additional_path: The directory to pick the manifest files from.
        :param input_ecosystem: The ecosystem for which the aggregated manifest list will be saved.

        :return: RecommendationValidator object."""

        # Create a LocalFile object
        input_manifest_data_store = LocalFileSystem(src_dir=input_folder_name)
        return cls.load_package_list(input_manifest_data_store, additional_path, input_ecosystem)
コード例 #17
0
    def test_train_and_save_kronos_list_local(self):

        input_data_store = LocalFileSystem(
            "tests/data/data_pgm/input-train-data/")
        self.assertTrue(input_data_store is not None)

        output_data_store = LocalFileSystem(
            "tests/data/data_pgm/output-train-data/")
        self.assertTrue(output_data_store is not None)

        eco_to_kronos_dependency_dict = load_eco_to_kronos_dependency_dict(
            input_kronos_dependency_data_store=input_data_store,
            additional_path="")
        self.assertTrue(eco_to_kronos_dependency_dict is not None)

        user_eco_to_cooccurrence_matrix_dict = load_user_eco_to_co_occerrence_matrix_dict(
            input_co_occurrence_data_store=input_data_store,
            additional_path="")
        self.assertTrue(user_eco_to_cooccurrence_matrix_dict is not None)

        for user_category in user_eco_to_cooccurrence_matrix_dict.keys():
            eco_to_cooccurrence_matrix_dict = user_eco_to_cooccurrence_matrix_dict[
                user_category]
            for ecosystem in eco_to_cooccurrence_matrix_dict.keys():
                kronos_dependency_dict = eco_to_kronos_dependency_dict[
                    ecosystem]
                cooccurrence_matrix_df = eco_to_cooccurrence_matrix_dict[
                    ecosystem]
                kronos_model = PGMPomegranate.train(
                    kronos_dependency_dict=kronos_dependency_dict,
                    package_occurrence_df=cooccurrence_matrix_df)
                self.assertTrue(kronos_model is not None)
                filename = os.path.join(
                    "data_kronos_user_eco", str(user_category),
                    "kronos" + "_" + str(ecosystem) + ".json")
                kronos_model.save(data_store=output_data_store,
                                  filename=filename)
コード例 #18
0
def test_movie_recommender_with_local_data_store():
    movie_reco = MovieRecommender.train(
        src_url="data/sample_movielens_ratings.txt")
    assert movie_reco is not None

    model_data_store = LocalFileSystem(src_dir="/tmp")
    movie_reco.save_to_data_store(data_store=model_data_store)

    movie_reco = MovieRecommender.load_from_data_store(
        data_store=model_data_store)
    assert movie_reco is not None

    reco = movie_reco.recommend_movies(user_id=25)
    assert reco is not None
    assert len(reco.items()) == 10
    def load_package_list_local(cls, input_folder_name, additional_path):
        """Load manifest files from the local file system and generate list of packages from it.

        Generate the aggregated manifest list for a given ecosystem from
        LocalFileSystem datasource.

        :param input_folder_name: The main directory where the manifest files are stored.
        :param additional_path: The directory to pick the manifest files from.

        :return: RecommendationValidator object.
        """
        # Create a LocalFile object
        input_manifest_data_store = LocalFileSystem(src_dir=input_folder_name)
        all_list_of_package_set = load_package_list(
            input_data_store=input_manifest_data_store,
            additional_path=additional_path)
        return cls(all_list_of_package_set=all_list_of_package_set)
コード例 #20
0
        def test_score_eco_user_package_dict(self):
            input_data_store = LocalFileSystem(
                "tests/data/data_pgm/input-score-data/")
            self.assertTrue(input_data_store is not None)

            output_data_store = LocalFileSystem(
                "tests/data/data_pgm/output-score-data/")
            self.assertTrue(output_data_store is not None)

            user_eco_kronos_dict = load_user_eco_to_kronos_model_dict(
                input_kronos_data_store=input_data_store, additional_path="")

            self.assertTrue(user_eco_kronos_dict is not None)

            eco_to_kronos_dependency_dict = get_eco_to_kronos_dependency_dict(
                data_store=input_data_store,
                folderpath="data_kronos_dependency")

            self.assertTrue(eco_to_kronos_dependency_dict is not None)

            user_request = [{
                "ecosystem":
                "pypi",
                "comp_package_count_threshold":
                10,
                "alt_package_count_threshold":
                1,
                "outlier_probability_threshold":
                0.61,
                "unknown_packages_ratio_threshold":
                0.4,
                "outlier_package_count_threshold":
                2,
                "package_list": ["p1", "p2", "p3", "np1", "p2", "p3", "p1"]
            }]

            response = score_eco_user_package_dict(
                user_request,
                user_eco_kronos_dict=user_eco_kronos_dict,
                eco_to_kronos_dependency_dict=eco_to_kronos_dependency_dict,
                all_package_list_obj=None)

            self.assertTrue(response is not None)

            output_data_store.write_json_file(filename="response.json",
                                              contents=response)

            expected_response = output_data_store.read_json_file(
                filename="expected_response.json")
            self.assertTrue(expected_response is not None)

            self.assertDictEqual(response[0], expected_response[0])
コード例 #21
0
 def save(self, data_store):
     if type(data_store) is LocalFileSystem:
         data_store.write_pickle_file(data=self.matrix,
                                      filename=SIMILARITY_MATRIX_FILENAME)
         data_store.write_pickle_file(data=self.movie_names,
                                      filename=MOVIE_LIST_FILENAME)
     if type(data_store) is S3DataStore:
         temp_data_store = LocalFileSystem("/tmp/")
         temp_data_store.write_pickle_file(
             data=self.matrix, filename=SIMILARITY_MATRIX_FILENAME)
         temp_data_store.write_pickle_file(data=self.movie_names,
                                           filename=MOVIE_LIST_FILENAME)
         data_store.upload_file("/tmp/" + SIMILARITY_MATRIX_FILENAME,
                                SIMILARITY_MATRIX_FILENAME)
         data_store.upload_file("/tmp/" + MOVIE_LIST_FILENAME,
                                MOVIE_LIST_FILENAME)
     return None
コード例 #22
0
def crawl_local():
    data_store = LocalFileSystem(src_dir="./rec_platform/data/")
    crawl(data_store=data_store)
コード例 #23
0
def train_and_save_rec_model_local():
    data_store = LocalFileSystem(src_dir="./rec_platform/data/")
    train_and_save_rec_model(data_store=data_store)
コード例 #24
0
def load_rec_model_local():
    data_store = LocalFileSystem(src_dir="./rec_platform/data")
    rec_sys = ImdbRecSys.load(data_store=data_store)
    return rec_sys
コード例 #25
0
def load_chatbot_model_local(src_dir):
    data_store = LocalFileSystem(src_dir=src_dir)
    chatbot = ChatbotModel.load(data_store=data_store)
    return chatbot
コード例 #26
0
def load_credential_local(src_dir):
    data_store = LocalFileSystem(src_dir=src_dir)
    credential_json = data_store.read_json_file(CREDENTIAL_FILENAME)
    return dict(credential_json)