def load_user_eco_to_kronos_model_dict(input_kronos_data_store,
                                       additional_path):
    """Load the Kronos model dictionary from the selected storage."""
    kronos_model_filenames = input_kronos_data_store.list_files(
        os.path.join(additional_path, pgm_constants.KRONOS_OUTPUT_FOLDER))
    temp_user_eco_to_kronos_model_dict = dict()
    user_category_list = list()
    ecosystem_list = list()

    for kronos_model_filename in kronos_model_filenames:
        user_category = kronos_model_filename.split("/")[-2]
        if user_category not in user_category_list:
            user_category_list.append(user_category)
        ecosystem = kronos_model_filename.split("/")[-1].split(".")[0].split(
            "_")[-1]
        if ecosystem not in ecosystem_list:
            ecosystem_list.append(ecosystem)
        kronos_model = PGMPomegranate.load(data_store=input_kronos_data_store,
                                           filename=kronos_model_filename)
        temp_user_eco_to_kronos_model_dict[(user_category,
                                            ecosystem)] = kronos_model

    user_eco_to_kronos_model_dict = dict()

    for user_category in user_category_list:
        eco_to_kronos_model_dict = dict()
        for ecosystem in ecosystem_list:
            eco_to_kronos_model_dict[
                ecosystem] = temp_user_eco_to_kronos_model_dict[(user_category,
                                                                 ecosystem)]
        user_eco_to_kronos_model_dict[user_category] = eco_to_kronos_model_dict

    return user_eco_to_kronos_model_dict
Example #2
0
def train_and_save_kronos_list(input_kronos_dependency_data_store,
                               input_co_occurrence_data_store,
                               output_data_store, additional_path):
    eco_to_kronos_dependency_dict = load_eco_to_kronos_dependency_dict(
        input_kronos_dependency_data_store=input_kronos_dependency_data_store,
        additional_path=additional_path)

    user_eco_to_cooccurrence_matrix_dict = load_user_eco_to_co_occerrence_matrix_dict(
        input_co_occurrence_data_store=input_co_occurrence_data_store,
        additional_path=additional_path)

    for user_category in user_eco_to_cooccurrence_matrix_dict.keys():
        eco_to_cooccurrence_matrix_dict = user_eco_to_cooccurrence_matrix_dict[
            user_category]
        for ecosystem in eco_to_cooccurrence_matrix_dict.keys():
            kronos_dependency_dict = eco_to_kronos_dependency_dict[ecosystem]
            cooccurrence_matrix_df = eco_to_cooccurrence_matrix_dict[ecosystem]
            kronos_model = PGMPomegranate.train(
                kronos_dependency_dict=kronos_dependency_dict,
                package_occurrence_df=cooccurrence_matrix_df)
            filename = KRONOS_OUTPUT_FOLDER + "/" + str(
                user_category) + "/" + "kronos" + "_" + str(
                    ecosystem) + ".json"
            kronos_model.save(data_store=output_data_store,
                              filename=additional_path + filename)
Example #3
0
def train_and_save_kronos_list(input_kronos_dependency_data_store,
                               input_co_occurrence_data_store,
                               output_data_store, additional_path):
    """Train the Kronos and save the results into the selected storage."""
    eco_to_kronos_dependency_dict = load_eco_to_kronos_dependency_dict(
        input_kronos_dependency_data_store=input_kronos_dependency_data_store,
        additional_path=additional_path)

    user_eco_to_cooccurrence_matrix_dict = load_user_eco_to_co_occerrence_matrix_dict(
        input_co_occurrence_data_store=input_co_occurrence_data_store,
        additional_path=additional_path)

    for user_category in user_eco_to_cooccurrence_matrix_dict.keys():
        eco_to_cooccurrence_matrix_dict = user_eco_to_cooccurrence_matrix_dict[
            user_category]
        for ecosystem in eco_to_cooccurrence_matrix_dict.keys():
            kronos_dependency_dict = eco_to_kronos_dependency_dict[ecosystem]
            cooccurrence_matrix_df = eco_to_cooccurrence_matrix_dict[ecosystem]
            kronos_model = PGMPomegranate.train(
                kronos_dependency_dict=kronos_dependency_dict,
                package_occurrence_df=cooccurrence_matrix_df)
            filename = os.path.join(pgm_constants.KRONOS_OUTPUT_FOLDER,
                                    str(user_category),
                                    "kronos_{}.json".format(str(ecosystem)))
            kronos_model.save(data_store=output_data_store,
                              filename=additional_path + filename)
def load_user_eco_to_kronos_model_dict(input_kronos_data_store,
                                       additional_path):
    kronos_model_filenames = input_kronos_data_store.list_files(
        additional_path + KRONOS_OUTPUT_FOLDER)
    temp_user_eco_to_kronos_model_dict = dict()
    user_category_list = list()
    ecosystem_list = list()

    for kronos_model_filename in kronos_model_filenames:
        user_category = kronos_model_filename.split("/")[-2]
        if user_category not in user_category_list:
            user_category_list.append(user_category)
        ecosystem = kronos_model_filename.split("/")[-1].split(".")[0].split(
            "_")[-1]
        if ecosystem not in ecosystem_list:
            ecosystem_list.append(ecosystem)
        kronos_model = PGMPomegranate.load(data_store=input_kronos_data_store,
                                           filename=kronos_model_filename)
        temp_user_eco_to_kronos_model_dict[(user_category,
                                            ecosystem)] = kronos_model

    user_eco_to_kronos_model_dict = dict()

    for user_category in user_category_list:
        eco_to_kronos_model_dict = dict()
        for ecosystem in ecosystem_list:
            eco_to_kronos_model_dict[
                ecosystem] = temp_user_eco_to_kronos_model_dict[(user_category,
                                                                 ecosystem)]
        user_eco_to_kronos_model_dict[user_category] = eco_to_kronos_model_dict

    return user_eco_to_kronos_model_dict
Example #5
0
    def test_train_and_save_kronos_list_local(self):

        input_data_store = LocalFileSystem(
            "tests/data/data_pgm/input-train-data/")
        self.assertTrue(input_data_store is not None)

        output_data_store = LocalFileSystem(
            "tests/data/data_pgm/output-train-data/")
        self.assertTrue(output_data_store is not None)

        eco_to_kronos_dependency_dict = load_eco_to_kronos_dependency_dict(
            input_kronos_dependency_data_store=input_data_store,
            additional_path="")
        self.assertTrue(eco_to_kronos_dependency_dict is not None)

        user_eco_to_cooccurrence_matrix_dict = load_user_eco_to_co_occerrence_matrix_dict(
            input_co_occurrence_data_store=input_data_store,
            additional_path="")
        self.assertTrue(user_eco_to_cooccurrence_matrix_dict is not None)

        for user_category in user_eco_to_cooccurrence_matrix_dict.keys():
            eco_to_cooccurrence_matrix_dict = user_eco_to_cooccurrence_matrix_dict[
                user_category]
            for ecosystem in eco_to_cooccurrence_matrix_dict.keys():
                kronos_dependency_dict = eco_to_kronos_dependency_dict[
                    ecosystem]
                cooccurrence_matrix_df = eco_to_cooccurrence_matrix_dict[
                    ecosystem]
                kronos_model = PGMPomegranate.train(
                    kronos_dependency_dict=kronos_dependency_dict,
                    package_occurrence_df=cooccurrence_matrix_df)
                self.assertTrue(kronos_model is not None)
                filename = os.path.join(
                    "data_kronos_user_eco", str(user_category),
                    "kronos" + "_" + str(ecosystem) + ".json")
                kronos_model.save(data_store=output_data_store,
                                  filename=filename)