コード例 #1
0
parser.add_argument("-text_numbers", type=int, nargs="+", default=[1, 2, 3, 4, 5, 7, 8], help="numbers in [1,2,3,4,5,7,8]")
parser.add_argument("-orders", type=int, nargs="+", default=[4], help="order of the n-grams used to calculate ngram prob")
parser.add_argument("-categories", nargs="*", default=["all"], help="grammatical categories. [%s]" % ", ".join(category.ALL_CATEGORIES))
args = parser.parse_args()

categories = args.categories
if categories == ["all"]:
    categories = category.ALL_CATEGORIES
elif categories == ["function"]:
    categories = category.FUNCTION_CATEGORIES
elif categories == ["content"]:
    categories = category.CONTENT_CATEGORIES


texts = PredictionTexts(args.text_numbers, filter_by=categories)
ngram_predictor = NgramPredictor(max(args.orders))
human_predictor = HumanPredictor()

ngram_probs = ngram_predictor.batch_predict(texts)
cloze_probs = human_predictor.batch_predict(texts)
# freqs = [target.frequency() for target in texts.target_words()]


def plot_hist(title, data, bins=50):
    plt.hist(data, bins)
    # plt.show()
    plt.title("Histograma de %s" % title)
    plt.savefig("plots/histogram_%s.png" % title)
    plt.close()

plot_hist("ngram_prob", ngram_probs)
コード例 #2
0
import argparse
from predict_this.predictor.ngram_predictor import NgramPredictor
from predict_this.text.prediction_text import PredictionText


def argument_parser():
    parser = argparse.ArgumentParser(description="Output the conditional probabilites at the target positions.")
    parser.add_argument("-text_number", type=int, help="number in [1,2,3,4,5,7,8]")
    parser.add_argument("-ngram_predictor_order", type=int)
    parser.add_argument("-ngram_lm", default="")
    parser.add_argument("-output_filename")
    return parser.parse_args()


if __name__ == "__main__":
    args = argument_parser()
    prediction_text = PredictionText(args.text_number)
    ngram_predictor = NgramPredictor(args.ngram_predictor_order, args.ngram_lm)
    ngram_predictor.print_distribution(prediction_text, args.output_filename)