parser.add_argument("-text_numbers", type=int, nargs="+", default=[1, 2, 3, 4, 5, 7, 8], help="numbers in [1,2,3,4,5,7,8]") parser.add_argument("-orders", type=int, nargs="+", default=[4], help="order of the n-grams used to calculate ngram prob") parser.add_argument("-categories", nargs="*", default=["all"], help="grammatical categories. [%s]" % ", ".join(category.ALL_CATEGORIES)) args = parser.parse_args() categories = args.categories if categories == ["all"]: categories = category.ALL_CATEGORIES elif categories == ["function"]: categories = category.FUNCTION_CATEGORIES elif categories == ["content"]: categories = category.CONTENT_CATEGORIES texts = PredictionTexts(args.text_numbers, filter_by=categories) ngram_predictor = NgramPredictor(max(args.orders)) human_predictor = HumanPredictor() ngram_probs = ngram_predictor.batch_predict(texts) cloze_probs = human_predictor.batch_predict(texts) # freqs = [target.frequency() for target in texts.target_words()] def plot_hist(title, data, bins=50): plt.hist(data, bins) # plt.show() plt.title("Histograma de %s" % title) plt.savefig("plots/histogram_%s.png" % title) plt.close() plot_hist("ngram_prob", ngram_probs)
import argparse from predict_this.predictor.ngram_predictor import NgramPredictor from predict_this.text.prediction_text import PredictionText def argument_parser(): parser = argparse.ArgumentParser(description="Output the conditional probabilites at the target positions.") parser.add_argument("-text_number", type=int, help="number in [1,2,3,4,5,7,8]") parser.add_argument("-ngram_predictor_order", type=int) parser.add_argument("-ngram_lm", default="") parser.add_argument("-output_filename") return parser.parse_args() if __name__ == "__main__": args = argument_parser() prediction_text = PredictionText(args.text_number) ngram_predictor = NgramPredictor(args.ngram_predictor_order, args.ngram_lm) ngram_predictor.print_distribution(prediction_text, args.output_filename)