def test_score_eco_user_package_dict(self):
        """Test the dependency dict deserialization, frequency list deserialization etc."""
        input_data_store = LocalFileSystem(
            "tests/data/data_pgm/input-score-data/")
        self.assertIsNotNone(input_data_store)

        output_data_store = LocalFileSystem(
            "tests/data/data_pgm/output-score-data/")
        self.assertIsNotNone(output_data_store)

        frequency_dict_data_store = LocalFileSystem(
            src_dir="tests/data/data_apollo/")
        self.assertIsNotNone(frequency_dict_data_store)

        user_eco_kronos_dict = load_user_eco_to_kronos_model_dict(
            input_kronos_data_store=input_data_store, additional_path="")

        self.assertIsNotNone(user_eco_kronos_dict)

        eco_to_kronos_dependency_dict = get_eco_to_kronos_dependency_dict(
            data_store=input_data_store, folderpath="data_kronos_dependency")

        self.assertIsNotNone(eco_to_kronos_dependency_dict)

        user_request = [{
            "ecosystem": "pypi",
            "comp_package_count_threshold": 10,
            "alt_package_count_threshold": 1,
            "outlier_probability_threshold": 0.61,
            "unknown_packages_ratio_threshold": 0.4,
            "outlier_package_count_threshold": 2,
            "package_list": ["p1", "p2", "p3", "np1"]
        }]

        frequency_dict = frequency_dict_data_store.read_json_file(
            filename=KD_PACKAGE_FREQUENCY)
        self.assertIsNotNone(frequency_dict)
        all_package_list_obj = RecommendationValidator.load_package_list_local(
            input_folder_name="tests/data/data_recom_valid/",
            additional_path="")

        response = score_eco_user_package_dict(
            user_request,
            user_eco_kronos_dict=user_eco_kronos_dict,
            eco_to_kronos_dependency_dict=eco_to_kronos_dependency_dict,
            all_package_list_obj=all_package_list_obj,
            package_frequency_dict=frequency_dict,
            use_filters=USE_FILTERS)

        self.assertIsNotNone(response)

        output_data_store.write_json_file(filename="response.json",
                                          contents=response)

        expected_response = output_data_store.read_json_file(
            filename="expected_response.json")
        self.assertIsNotNone(expected_response)

        self.assertDictEqual(response[0], expected_response[0])
    def test_generate_and_save_kronos_dependency_local(self):
        input_data_store = LocalFileSystem("analytics_platform/kronos/softnet/test/data/input-kd-data")
        self.assertTrue(input_data_store is not None)

        output_data_store = LocalFileSystem("analytics_platform/kronos/softnet/test/data/output-kd-data")
        self.assertTrue(output_data_store is not None)

        gnosis_ref_arch_json = input_data_store.read_json_file(filename="data_gnosis/gnosis_ref_arch.json")
        self.assertTrue(gnosis_ref_arch_json is not None)

        gnosis_ref_arch_dict = dict(gnosis_ref_arch_json)
        self.assertTrue(gnosis_ref_arch_dict is not None)

        package_topic_json = input_data_store.read_json_file("data_package_topic/package_topic.json")
        self.assertTrue(package_topic_json is not None)

        package_topic_dict = dict(package_topic_json)
        self.assertTrue(package_topic_dict is not None)

        eco_to_package_topic_dict = package_topic_dict["package_topic_map"]
        eco_to_topic_package_dict = package_topic_dict["topic_package_map"]

        eco_to_kronos_dependency_dict = dict()

        for ecosystem in eco_to_package_topic_dict.keys():
            package_to_topic_dict = eco_to_package_topic_dict.get(ecosystem)
            topic_to_package_dict = eco_to_topic_package_dict.get(ecosystem)

            kronos_dependency_obj = KronosDependencyGenerator.generate_kronos_dependency(
                gnosis_ref_arch_dict=gnosis_ref_arch_dict,
                package_to_topic_dict=package_to_topic_dict,
                topic_to_package_dict=topic_to_package_dict)

            self.assertTrue(kronos_dependency_obj is not None)

            eco_to_kronos_dependency_dict[ecosystem] = kronos_dependency_obj

        for ecosystem in eco_to_kronos_dependency_dict.keys():
            kronos_dependency_obj = eco_to_kronos_dependency_dict[ecosystem]
            filename = "data_kronos_dependency/kronos_dependency.json"
            filename_formatted = filename.replace(".", "_" + ecosystem + ".")
            kronos_dependency_obj.save(data_store=output_data_store, filename=filename_formatted)

            kronos_dependency_dict = kronos_dependency_obj.get_dictionary()

            self.assertTrue(kronos_dependency_dict is not None)

            expected_filename_formatted = filename_formatted.replace("/", "/expected_")

            expected_kronos_dependency_obj = KronosDependencyGenerator.load(data_store=output_data_store,
                                                                            filename=expected_filename_formatted)
            self.assertTrue(expected_kronos_dependency_obj is not None)

            expected_kronos_dependency_dict = expected_kronos_dependency_obj.get_dictionary()
            self.assertTrue(expected_kronos_dependency_dict is not None)

            self.assertDictEqual(kronos_dependency_dict, expected_kronos_dependency_dict)
    def test_generate_and_save_cooccurrence_matrices_local(self):
        input_data_store = LocalFileSystem(
            "tests/data/data_softnet/input-com-data")
        self.assertTrue(input_data_store is not None)

        output_data_store = LocalFileSystem(
            "tests/data/data_softnet/output-com-data")
        self.assertTrue(output_data_store is not None)

        eco_to_kronos_dependency_dict = load_eco_to_kronos_dependency_dict(
            input_kronos_dependency_data_store=input_data_store,
            additional_path="")
        self.assertTrue(eco_to_kronos_dependency_dict is not None)

        manifest_filenames = input_data_store.list_files(
            "data_input_manifest_file_list")
        self.assertTrue(manifest_filenames is not None)

        for manifest_filename in manifest_filenames:
            user_category = manifest_filename.split("/")[1]
            manifest_content_json_list = input_data_store.read_json_file(
                filename=manifest_filename)
            self.assertTrue(manifest_content_json_list is not None)

            for manifest_content_json in manifest_content_json_list:
                self.assertTrue(manifest_content_json is not None)
                manifest_content_dict = dict(manifest_content_json)
                ecosystem = manifest_content_dict["ecosystem"]
                kronos_dependency_dict = eco_to_kronos_dependency_dict[
                    ecosystem]
                list_of_package_list = manifest_content_dict.get(
                    "package_list")
                cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.generate_cooccurrence_matrix(
                    kronos_dependency_dict=kronos_dependency_dict,
                    list_of_package_list=list_of_package_list)
                self.assertTrue(cooccurrence_matrix_obj is not None)
                output_filename = "data_co_occurrence_matrix" + "/" + str(
                    user_category) + "/" + "cooccurrence_matrix" + "_" + str(
                        ecosystem) + ".json"
                cooccurrence_matrix_obj.save(data_store=output_data_store,
                                             filename=output_filename)
                expected_output_filename = "data_co_occurrence_matrix" + "/" + str(
                    user_category
                ) + "/" + "expected_cooccurrence_matrix" + "_" + str(
                    ecosystem) + ".json"
                expected_cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.load(
                    data_store=output_data_store,
                    filename=expected_output_filename)
                self.assertTrue(expected_cooccurrence_matrix_obj is not None)
                cooccurrence_matrix_df = cooccurrence_matrix_obj.get_matrix_dictionary(
                )
                self.assertTrue(cooccurrence_matrix_df is not None)
                expected_cooccurrence_matrix_df = expected_cooccurrence_matrix_obj.get_matrix_dictionary(
                )
                expected_columns = set(expected_cooccurrence_matrix_df.columns)
                resultant_columns = set(cooccurrence_matrix_df.columns)
                self.assertTrue(resultant_columns == expected_columns)
                self.assertTrue(
                    set(cooccurrence_matrix_df).issubset(
                        set(expected_cooccurrence_matrix_df)))
    def test_generate_and_save_cooccurrence_matrices_local(self):
        input_data_store = LocalFileSystem(
            "analytics_platform/kronos/softnet/test/data/input-com-data")
        self.assertTrue(input_data_store is not None)

        output_data_store = LocalFileSystem(
            "analytics_platform/kronos/softnet/test/data/output-com-data")
        self.assertTrue(output_data_store is not None)

        eco_to_kronos_dependency_dict = load_eco_to_kronos_dependency_dict(
            input_kronos_dependency_data_store=input_data_store,
            additional_path="")
        self.assertTrue(eco_to_kronos_dependency_dict is not None)

        manifest_filenames = input_data_store.list_files(
            "data_input_manifest_file_list")
        self.assertTrue(manifest_filenames is not None)

        for manifest_filename in manifest_filenames:
            user_category = manifest_filename.split("/")[1]
            manifest_content_json_list = input_data_store.read_json_file(
                filename=manifest_filename)
            self.assertTrue(manifest_content_json_list is not None)

            for manifest_content_json in manifest_content_json_list:
                self.assertTrue(manifest_content_json is not None)
                manifest_content_dict = dict(manifest_content_json)
                ecosystem = manifest_content_dict["ecosystem"]
                kronos_dependency_dict = eco_to_kronos_dependency_dict[
                    ecosystem]
                list_of_package_list = manifest_content_dict.get(
                    "package_list")
                cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.generate_cooccurrence_matrix(
                    kronos_dependency_dict=kronos_dependency_dict,
                    list_of_package_list=list_of_package_list)
                self.assertTrue(cooccurrence_matrix_obj is not None)
                output_filename = "data_co_occurrence_matrix" + "/" + str(
                    user_category) + "/" + "cooccurrence_matrix" + "_" + str(
                        ecosystem) + ".json"
                cooccurrence_matrix_obj.save(data_store=output_data_store,
                                             filename=output_filename)
                expected_output_filename = "data_co_occurrence_matrix" + "/" + str(
                    user_category
                ) + "/" + "expected_cooccurrence_matrix" + "_" + str(
                    ecosystem) + ".json"
                expected_cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.load(
                    data_store=output_data_store,
                    filename=expected_output_filename)
                self.assertTrue(expected_cooccurrence_matrix_obj is not None)
                cooccurrence_matrix_df = cooccurrence_matrix_obj.get_matrix_dictionary(
                )
                self.assertTrue(cooccurrence_matrix_df is not None)
                expected_cooccurrence_matrix_df = expected_cooccurrence_matrix_obj.get_matrix_dictionary(
                )
                self.assertTrue(expected_cooccurrence_matrix_df is not None)
                assert_frame_equal(
                    cooccurrence_matrix_df.sort_index(axis=1),
                    expected_cooccurrence_matrix_df.sort_index(axis=1),
                    check_names=True)
 def test_package_tag_creation(self):
     input_data_store = LocalFileSystem(
         "tests/data/data_gnosis/input-ptm-data/")
     self.assertTrue(input_data_store is not None)
     ptm_json = input_data_store.read_json_file(
         filename='data_input_curated_package_topic/package_topic.json')
     self.assertTrue(ptm_json)
     package_names = ptm_json[0]['package_topic_map']
     for package_name in package_names:
         tag_list = create_tags_for_package(package_name)
         # At least one tag should be generated for each package
         self.assertTrue(tag_list)
 def test_manifest_missing_packages(self):
     input_data_store = LocalFileSystem("tests/data/data_gnosis/")
     self.assertTrue(input_data_store is not None)
     manifest_json = input_data_store.read_json_file(
         filename=
         'data_input_manifest_file_list/manifest_unknown_packages.json')
     self.assertTrue(manifest_json)
     self.assertTrue("package_list" in manifest_json[0])
     package_list = manifest_json[0]['package_list']
     packages = GnosisPackageTopicModel._get_unknown_packages_from_manifests(
         input_data_store, additional_path='', package_topic_dict={})
     self.assertListEqual(sorted(package_list[0]), sorted(packages.keys()))
Ejemplo n.º 7
0
        def test_score_eco_user_package_dict(self):
            input_data_store = LocalFileSystem(
                "tests/data/data_pgm/input-score-data/")
            self.assertTrue(input_data_store is not None)

            output_data_store = LocalFileSystem(
                "tests/data/data_pgm/output-score-data/")
            self.assertTrue(output_data_store is not None)

            user_eco_kronos_dict = load_user_eco_to_kronos_model_dict(
                input_kronos_data_store=input_data_store, additional_path="")

            self.assertTrue(user_eco_kronos_dict is not None)

            eco_to_kronos_dependency_dict = get_eco_to_kronos_dependency_dict(
                data_store=input_data_store,
                folderpath="data_kronos_dependency")

            self.assertTrue(eco_to_kronos_dependency_dict is not None)

            user_request = [{
                "ecosystem":
                "pypi",
                "comp_package_count_threshold":
                10,
                "alt_package_count_threshold":
                1,
                "outlier_probability_threshold":
                0.61,
                "unknown_packages_ratio_threshold":
                0.4,
                "outlier_package_count_threshold":
                2,
                "package_list": ["p1", "p2", "p3", "np1", "p2", "p3", "p1"]
            }]

            response = score_eco_user_package_dict(
                user_request,
                user_eco_kronos_dict=user_eco_kronos_dict,
                eco_to_kronos_dependency_dict=eco_to_kronos_dependency_dict,
                all_package_list_obj=None)

            self.assertTrue(response is not None)

            output_data_store.write_json_file(filename="response.json",
                                              contents=response)

            expected_response = output_data_store.read_json_file(
                filename="expected_response.json")
            self.assertTrue(expected_response is not None)

            self.assertDictEqual(response[0], expected_response[0])
    def test_generate_and_save_package_frequency_dict_local(self):
        input_data_store = LocalFileSystem(
            src_dir="tests/data/data_gnosis/input-ra-data/")
        self.assertIsNotNone(input_data_store)

        output_data_store = LocalFileSystem(src_dir="tests/data/data_apollo/")

        self.assertIsNotNone(output_data_store)

        frequency_dict_generator = FrequencyDictGenerator.create_frequency_generator(
            input_data_store=input_data_store, additional_path="")

        self.assertIsNotNone(frequency_dict_generator)

        frequency_dict_generator.generate_and_save_frequency_dict(
            output_data_store=output_data_store, additional_path="")

        frequency_dict = output_data_store.read_json_file(
            filename=KD_PACKAGE_FREQUENCY)

        self.assertIsNotNone(frequency_dict)
Ejemplo n.º 9
0
def load_credential_local(src_dir):
    data_store = LocalFileSystem(src_dir=src_dir)
    credential_json = data_store.read_json_file(CREDENTIAL_FILENAME)
    return dict(credential_json)