def compare_models(runs,
                   k,
                   model_generator,
                   base_output_folder,
                   test_prepositions=PREPOSITION_LIST):
    study_info = StudyInfo("2019 study")

    m = MultipleRuns(model_generator,
                     base_output_folder + "tables",
                     base_output_folder + "plots",
                     study_info,
                     test_prepositions=test_prepositions,
                     number_runs=runs,
                     k=k,
                     compare="y")
    models_to_test = m.Generate_Models_all_scenes.models

    t = TestModels(models_to_test, "all")
    all_dataframe = t.score_dataframe.copy()
    print(all_dataframe)
    all_dataframe.to_csv(m.all_csv)

    print(("Test Model k = " + str(k)))
    m.validation()
    m.output()
    print(m.average_dataframe)
def main():
    print(tf.__version__)
    # should_be_one = [1.33, -0.60, 1.99, 0.40, -0.75, 2.60, -0.95, 0.45, 1.51, 1.84]
    # print(tf.shape(should_be_one))
    # print(tf.shape([should_be_one]))
    study_info = StudyInfo("2019 study")
    plot_all_csv()
    def test_k_fold(self):
        study_info = StudyInfo("2019 study")

        m = MultipleRuns(GeneratePolysemeModels,
                         POLYSEMY_SCORES_FOLDER + "tables",
                         POLYSEMY_SCORES_FOLDER + "plots",
                         study_info,
                         number_runs=10,
                         k=10,
                         compare="y",
                         test_prepositions=POLYSEMOUS_PREPOSITIONS)

        self.assertIsInstance(m.Generate_Models_all_scenes,
                              GeneratePolysemeModels)
        self.assertIsInstance(m.Generate_Models_all_scenes.features_to_remove,
                              list)
        self.assertEqual(m.Generate_Models_all_scenes.features_to_remove,
                         Configuration.object_specific_features.copy())

        self.assertIsInstance(m.model_name_list, list)

        test_folds = m.get_validation_scene_split()

        while not m.folds_check(test_folds):
            test_folds = m.get_validation_scene_split()

        self.assertEqual(len(test_folds), 10)

        for f1 in test_folds:
            self.assertNotEqual(len(f1), 0)
            for f2 in test_folds:
                if f1 != f2:
                    f1_set = set(f1)
                    f2_set = set(f2)
                    self.assertFalse(f1_set & f2_set)
Exemple #4
0
def output_model_params():
    s_info = StudyInfo("2019 study")
    all_scenes = s_info.scene_name_list

    generated_polyseme_models = GenerateOSModels(all_scenes, all_scenes, s_info, PREPOSITION_LIST)

    # for m in generated_polyseme_models.models:
    #     for preposition in PREPOSITION_LIST:
    #         m.output_typicalities(preposition)
    #
    #     m.output_unsatisfied_constraints()

    generated_polyseme_models.distinct_supervised_model.output_polyseme_info()
    generated_polyseme_models.sense_model.output_polyseme_info()
def verify_sup_model():
    # Show the model trains well
    study_info = StudyInfo("2019 study")
    scene_list = study_info.scene_name_list
    features_to_remove = Configuration.object_specific_features.copy()
    train_scenes, test_scenes = scene_list, scene_list
    model = SupervisedNeuralTypicalityModel(train_scenes,
                                            test_scenes,
                                            study_info,
                                            features_to_remove,
                                            train_test_proportion=0.8,
                                            number_of_epochs=1000)

    t = TestModels([model], "all")
    all_dataframe = t.score_dataframe.copy()

    print(all_dataframe)
def verify_dnn_model():
    # Show the model trains well
    study_info = StudyInfo("2019 study")
    scene_list = study_info.scene_name_list

    preposition_models_dict = get_standard_preposition_parameters(scene_list)

    dnn_model = NeuralNetworkCategorisationModel(preposition_models_dict,
                                                 scene_list,
                                                 study_info,
                                                 make_plots=True,
                                                 train_test_proportion=0.8,
                                                 number_of_epochs=1000)

    t = TestModels([dnn_model], "all")
    all_dataframe = t.score_dataframe.copy()

    print(all_dataframe)
    def test_polyseme_rank_info(self):
        '''

        :return:
        '''
        study_info = StudyInfo("2019 study")

        all_scenes = study_info.scene_name_list
        generated_polyseme_models = GeneratePolysemeModels(
            all_scenes, all_scenes, study_info)

        # Check ranks
        generated_polyseme_models.non_shared.output_polyseme_info(
            base_folder=output_folder)
        model_name = GeneratePolysemeModels.distinct_model_name
        for preposition in POLYSEMOUS_PREPOSITIONS:
            new_rank_csv = output_folder + POLYSEMY_MODEL_PROPERTY_FOLDER + model_name + "/ranks/" + preposition + " -ranks.csv"
            new_rank_df = pd.read_csv(new_rank_csv)
            original_rank_df = pd.read_csv(get_original_csv(new_rank_csv))

            cols = new_rank_df.columns.tolist()
            redordered_orig_df = original_rank_df[cols]
            assert_frame_equal(new_rank_df, redordered_orig_df)
    def test_initial_model(self):
        cluster_numbers = KMeansPolysemyModel.cluster_numbers
        self.assertEqual(cluster_numbers["on"], 8)
        self.assertEqual(cluster_numbers["in"], 4)
        self.assertEqual(cluster_numbers["over"], 4)
        self.assertEqual(cluster_numbers["under"], 4)

        study_info = StudyInfo("2019 study")

        all_scenes = study_info.scene_name_list
        generated_polyseme_models = GeneratePolysemeModels(
            all_scenes,
            all_scenes,
            study_info,
            test_prepositions=POLYSEMOUS_PREPOSITIONS)

        p_models = generated_polyseme_models.models

        archive_all_csv = archive_folder + "2019 study/polysemy/scores/all_test.csv"
        original_dataframe = pd.read_csv(archive_all_csv, index_col=0)
        print(original_dataframe)

        t = TestModels(p_models, "all")

        new_dframe = t.score_dataframe
        print(new_dframe)

        # reindex original as it contains shared aswell but new doesn't
        new_reindexed, original_reindexed = dropcolumns_reindexlike(
            new_dframe, original_dataframe)

        try:
            assert_frame_equal(new_reindexed, original_reindexed)
        except AssertionError as e:
            print(e)

        # first check basic parametres
        for model in p_models:
            self.assertEqual(len(model.all_feature_keys), 16)
            if hasattr(model, "preposition_model_dict"):
                for p in model.test_prepositions:
                    self.assertEqual(
                        len(model.preposition_model_dict[p].feature_keys), 10)

        # Check typicalities
        for model in p_models:

            for preposition in POLYSEMOUS_PREPOSITIONS:
                typ_csv = output_folder + model.study_info.base_polysemy_folder + "config typicalities/typicality-" + preposition + ".csv"

                model.output_typicalities(preposition, input_csv=typ_csv)

        for preposition in POLYSEMOUS_PREPOSITIONS:
            # Remove Kmeans column as it is not deterministic
            new_typicality_csv = output_folder + generated_polyseme_models.study_info.base_polysemy_folder + "config typicalities/typicality-" + preposition + ".csv"
            new_typicality_df = pd.read_csv(new_typicality_csv,
                                            usecols=[0, 1, 2, 3, 4, 5, 6])

            original_typicality_df = pd.read_csv(
                get_original_csv(new_typicality_csv),
                usecols=[0, 1, 2, 3, 4, 5, 6])

            columns_to_check = [
                'scene', 'figure', 'ground', 'Distinct Prototype',
                'Baseline Model'
            ]
            print(original_typicality_df.columns.tolist())
            print(new_typicality_df.columns.tolist())
            # reindex original as it contains shared aswell but new doesn't
            original_reindexed = original_typicality_df[columns_to_check]
            new_reindexed = new_typicality_df[columns_to_check]
            assert_frame_equal(new_reindexed, original_reindexed)
            print(original_reindexed.columns.tolist())
            print(new_reindexed.columns.tolist())