def test_generate_and_save_cooccurrence_matrices_local(self):
        input_data_store = LocalFileSystem(
            "tests/data/data_softnet/input-com-data")
        self.assertTrue(input_data_store is not None)

        output_data_store = LocalFileSystem(
            "tests/data/data_softnet/output-com-data")
        self.assertTrue(output_data_store is not None)

        eco_to_kronos_dependency_dict = load_eco_to_kronos_dependency_dict(
            input_kronos_dependency_data_store=input_data_store,
            additional_path="")
        self.assertTrue(eco_to_kronos_dependency_dict is not None)

        manifest_filenames = input_data_store.list_files(
            "data_input_manifest_file_list")
        self.assertTrue(manifest_filenames is not None)

        for manifest_filename in manifest_filenames:
            user_category = manifest_filename.split("/")[1]
            manifest_content_json_list = input_data_store.read_json_file(
                filename=manifest_filename)
            self.assertTrue(manifest_content_json_list is not None)

            for manifest_content_json in manifest_content_json_list:
                self.assertTrue(manifest_content_json is not None)
                manifest_content_dict = dict(manifest_content_json)
                ecosystem = manifest_content_dict["ecosystem"]
                kronos_dependency_dict = eco_to_kronos_dependency_dict[
                    ecosystem]
                list_of_package_list = manifest_content_dict.get(
                    "package_list")
                cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.generate_cooccurrence_matrix(
                    kronos_dependency_dict=kronos_dependency_dict,
                    list_of_package_list=list_of_package_list)
                self.assertTrue(cooccurrence_matrix_obj is not None)
                output_filename = "data_co_occurrence_matrix" + "/" + str(
                    user_category) + "/" + "cooccurrence_matrix" + "_" + str(
                        ecosystem) + ".json"
                cooccurrence_matrix_obj.save(data_store=output_data_store,
                                             filename=output_filename)
                expected_output_filename = "data_co_occurrence_matrix" + "/" + str(
                    user_category
                ) + "/" + "expected_cooccurrence_matrix" + "_" + str(
                    ecosystem) + ".json"
                expected_cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.load(
                    data_store=output_data_store,
                    filename=expected_output_filename)
                self.assertTrue(expected_cooccurrence_matrix_obj is not None)
                cooccurrence_matrix_df = cooccurrence_matrix_obj.get_matrix_dictionary(
                )
                self.assertTrue(cooccurrence_matrix_df is not None)
                expected_cooccurrence_matrix_df = expected_cooccurrence_matrix_obj.get_matrix_dictionary(
                )
                expected_columns = set(expected_cooccurrence_matrix_df.columns)
                resultant_columns = set(cooccurrence_matrix_df.columns)
                self.assertTrue(resultant_columns == expected_columns)
                self.assertTrue(
                    set(cooccurrence_matrix_df).issubset(
                        set(expected_cooccurrence_matrix_df)))
    def test_generate_and_save_cooccurrence_matrices_local(self):
        input_data_store = LocalFileSystem(
            "analytics_platform/kronos/softnet/test/data/input-com-data")
        self.assertTrue(input_data_store is not None)

        output_data_store = LocalFileSystem(
            "analytics_platform/kronos/softnet/test/data/output-com-data")
        self.assertTrue(output_data_store is not None)

        eco_to_kronos_dependency_dict = load_eco_to_kronos_dependency_dict(
            input_kronos_dependency_data_store=input_data_store,
            additional_path="")
        self.assertTrue(eco_to_kronos_dependency_dict is not None)

        manifest_filenames = input_data_store.list_files(
            "data_input_manifest_file_list")
        self.assertTrue(manifest_filenames is not None)

        for manifest_filename in manifest_filenames:
            user_category = manifest_filename.split("/")[1]
            manifest_content_json_list = input_data_store.read_json_file(
                filename=manifest_filename)
            self.assertTrue(manifest_content_json_list is not None)

            for manifest_content_json in manifest_content_json_list:
                self.assertTrue(manifest_content_json is not None)
                manifest_content_dict = dict(manifest_content_json)
                ecosystem = manifest_content_dict["ecosystem"]
                kronos_dependency_dict = eco_to_kronos_dependency_dict[
                    ecosystem]
                list_of_package_list = manifest_content_dict.get(
                    "package_list")
                cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.generate_cooccurrence_matrix(
                    kronos_dependency_dict=kronos_dependency_dict,
                    list_of_package_list=list_of_package_list)
                self.assertTrue(cooccurrence_matrix_obj is not None)
                output_filename = "data_co_occurrence_matrix" + "/" + str(
                    user_category) + "/" + "cooccurrence_matrix" + "_" + str(
                        ecosystem) + ".json"
                cooccurrence_matrix_obj.save(data_store=output_data_store,
                                             filename=output_filename)
                expected_output_filename = "data_co_occurrence_matrix" + "/" + str(
                    user_category
                ) + "/" + "expected_cooccurrence_matrix" + "_" + str(
                    ecosystem) + ".json"
                expected_cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.load(
                    data_store=output_data_store,
                    filename=expected_output_filename)
                self.assertTrue(expected_cooccurrence_matrix_obj is not None)
                cooccurrence_matrix_df = cooccurrence_matrix_obj.get_matrix_dictionary(
                )
                self.assertTrue(cooccurrence_matrix_df is not None)
                expected_cooccurrence_matrix_df = expected_cooccurrence_matrix_obj.get_matrix_dictionary(
                )
                self.assertTrue(expected_cooccurrence_matrix_df is not None)
                assert_frame_equal(
                    cooccurrence_matrix_df.sort_index(axis=1),
                    expected_cooccurrence_matrix_df.sort_index(axis=1),
                    check_names=True)
def generate_and_save_cooccurrence_matrices(input_kronos_dependency_data_store,
                                            input_manifest_data_store,
                                            output_data_store,
                                            additional_path):
    eco_to_kronos_dependency_dict = load_eco_to_kronos_dependency_dict(
        input_kronos_dependency_data_store=input_kronos_dependency_data_store,
        additional_path=additional_path)

    manifest_filenames = input_manifest_data_store.list_files(
        additional_path + MANIFEST_FILEPATH)

    for manifest_filename in manifest_filenames:
        user_category = manifest_filename.split("/")[-2]
        manifest_content_json_list = input_manifest_data_store.read_json_file(
            filename=manifest_filename)
        for manifest_content_json in manifest_content_json_list:
            manifest_content_dict = dict(manifest_content_json)
            ecosystem = manifest_content_dict[MANIFEST_ECOSYSTEM]
            kronos_dependency_dict = eco_to_kronos_dependency_dict[ecosystem]
            list_of_package_list = manifest_content_dict.get(
                MANIFEST_PACKAGE_LIST)
            cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.generate_cooccurrence_matrix(
                kronos_dependency_dict=kronos_dependency_dict,
                list_of_package_list=list_of_package_list)
            output_filename = COM_OUTPUT_FOLDER + "/" + str(
                user_category) + "/" + "cooccurrence_matrix" + "_" + str(
                    ecosystem) + ".json"
            cooccurrence_matrix_obj.save(data_store=output_data_store,
                                         filename=additional_path +
                                         output_filename)
def generate_and_save_cooccurrence_matrices(input_kronos_dependency_data_store,
                                            input_manifest_data_store,
                                            output_data_store,
                                            additional_path):
    """Generate and save cooccurrence matrices into the selected storage."""
    eco_to_kronos_dependency_dict = load_eco_to_kronos_dependency_dict(
        input_kronos_dependency_data_store=input_kronos_dependency_data_store,
        additional_path=additional_path)

    package_topic_json = \
        input_kronos_dependency_data_store.read_json_file(os.path.join(
                                                      additional_path,
                                                      softnet_constants.GNOSIS_PTM_OUTPUT_PATH))
    package_topic_dict = dict(package_topic_json)
    eco_to_package_topic_dict = package_topic_dict[
        softnet_constants.GNOSIS_PTM_PACKAGE_TOPIC_MAP]

    manifest_filenames = input_manifest_data_store.list_files(
        os.path.join(additional_path, softnet_constants.MANIFEST_FILEPATH))

    for manifest_filename in manifest_filenames:
        user_category = manifest_filename.split("/")[-2]
        manifest_content_json_list = input_manifest_data_store.read_json_file(
            filename=manifest_filename)
        for manifest_content_json in manifest_content_json_list:
            manifest_content_dict = dict(manifest_content_json)
            ecosystem = manifest_content_dict[
                softnet_constants.MANIFEST_ECOSYSTEM]
            kronos_dependency_dict = eco_to_kronos_dependency_dict[ecosystem]
            list_of_package_list = manifest_content_dict.get(
                softnet_constants.MANIFEST_PACKAGE_LIST)
            cooccurrence_matrix_obj = CooccurrenceMatrixGenerator.generate_cooccurrence_matrix(
                kronos_dependency_dict=kronos_dependency_dict,
                list_of_package_list=list_of_package_list,
                package_topic_map=eco_to_package_topic_dict[ecosystem])
            output_filename = os.path.join(
                softnet_constants.COM_OUTPUT_FOLDER, str(user_category),
                "cooccurrence_matrix_{}.json".format(str(ecosystem)))
            cooccurrence_matrix_obj.save(data_store=output_data_store,
                                         filename=os.path.join(
                                             additional_path, output_filename))