def train_models(dataset, models): """ Train the models for the dataset """ output_dir = OUTPUT_DATA_DIR + dataset.get_name() + "/" result = {} if "rf" in models: rf = classifierwrapper.SklearnClassifier(dataset) rf.set_properties("sklearn", "rf", output_dir) rf.fit_model() print(dir(rf)) rf.save_model() result["rf"] = rf if "lr" in models: lr = classifierwrapper.SklearnClassifier(dataset) lr.set_properties("sklearn", "lr", output_dir) lr.fit_model() lr.save_model() lr.evaluate_model() result["lr"] = lr if "MLP" in models: # Train models MLP = keras_networks.MLPSimple2(dataset) MLP.set_properties("keras", "mlp", output_dir) MLP.fit_model() MLP.save_model() MLP.evaluate_model() result["MLP"] = MLP return result
def get_news_data_and_models(): """ Read in the data and saved models for the movie dataset""" news_data = twentynewsgroups_dataset.TwentyNewssGroupsData() news_data.load_dataset(OUTPUT_DATA_DIR + NEWS_DATA_DIR) MLP_news = keras_networks.MLPSimple2(news_data) MLP_news.load_model(OUTPUT_DATA_DIR + NEWS_DATA_DIR + NEWS_MLP) lr_news = classifierwrapper.SklearnClassifier(news_data) lr_news.load_model(OUTPUT_DATA_DIR + NEWS_DATA_DIR + NEWS_LR) return news_data, MLP_news, lr_news
def get_rationales_data_and_models(): """ Read in the data and saved models for the rationales dataset """ rationales_data = rationales_dataset.RationalesData() rationales_data.load_dataset(OUTPUT_DATA_DIR + RATIONALES_DATA_DIR) MLP_rationales = keras_networks.MLPSimple2(rationales_data) MLP_rationales.load_model(OUTPUT_DATA_DIR + RATIONALES_DATA_DIR + RATIONALES_MLP) lr_rationales = classifierwrapper.SklearnClassifier(rationales_data) lr_rationales.load_model(OUTPUT_DATA_DIR + RATIONALES_DATA_DIR + RATIONALES_LR) return rationales_data, MLP_rationales, lr_rationales
def print_stats_and_evaluate_models_news(): """Load the 20news dataset, print stats and evaluate the models """ news_data = twentynewsgroups_dataset.TwentyNewssGroupsData() news_data.load_dataset(OUTPUT_DATA_DIR + NEWS_DATA_DIR) news_data.print_stats() print("Evaluate models") lr_news = classifierwrapper.SklearnClassifier(news_data) lr_news.load_model(OUTPUT_DATA_DIR + NEWS_DATA_DIR + NEWS_LR) lr_news.evaluate_model() MLP_news = keras_networks.MLPSimple2(news_data) MLP_news.load_model(OUTPUT_DATA_DIR + NEWS_DATA_DIR + NEWS_MLP) MLP_news.evaluate_model()
def print_stats_and_evaluate_models_rationales(): """Load the movie (rationales) dataset, print stats and evaluate the models """ print("Load rationales") rationales_data = rationales_dataset.RationalesData() rationales_data.load_dataset(OUTPUT_DATA_DIR + RATIONALES_DATA_DIR) rationales_data.print_stats() print("Evaluate models") lr_rationales = classifierwrapper.SklearnClassifier(rationales_data) lr_rationales.load_model(OUTPUT_DATA_DIR + RATIONALES_DATA_DIR + RATIONALES_LR) lr_rationales.evaluate_model() MLP_rationales = keras_networks.MLPSimple2(rationales_data) MLP_rationales.load_model(OUTPUT_DATA_DIR + RATIONALES_DATA_DIR + RATIONALES_MLP) MLP_rationales.evaluate_model()
def test_omission(self): rationales_data = rationales_dataset.RationalesData() rationales_data.load_dataset(experiments.OUTPUT_DATA_DIR + experiments.RATIONALES_DATA_DIR) MLP_rationales = keras_networks.MLPSimple2(rationales_data) MLP_rationales.load_model(experiments.OUTPUT_DATA_DIR + experiments.RATIONALES_DATA_DIR + experiments.RATIONALES_MLP) MLP_rationales.evaluate_model() doc_id = 0 base_pred = MLP_rationales.model.predict_proba(np.array([rationales_data.x_test_trans_bin[doc_id]]), verbose=0)[0] max_index = np.argmax(base_pred) omission_summary = keras_networks.omission_summary_binary(rationales_data.x_test_trans_bin[doc_id], MLP_rationales.model, rationales_data.inv_vocab) for word, score in omission_summary.items(): s = score[1] modified_text = list(rationales_data.x_test_trans_bin[doc_id]) #copy modified_text[rationales_data.vocab[word]] = 0 new_pred = MLP_rationales.model.predict_proba(np.array([modified_text]), verbose=0)[0] npt.assert_almost_equal(base_pred[max_index] - new_pred[max_index], s, decimal=7)