예제 #1
0
def run(which="all"):
    backup()
    if which == "all":
        bases = _bases
    else:
        bases = [which]
    for base in bases:
        compute_diff("mpl_%s_raw.xml" % base, "mpl_%s.xml" % base, "mpl_%s_diff.xml" % base)
        run_parse(base)
        apply_diff("mpl_%s_raw.xml" % base, "mpl_%s_diff.xml" % base, "mpl_%s.xml" % base)
        run_generate(base)
예제 #2
0
def run(which="all"):
    backup()
    if which == "all":
        bases = _bases
    else:
        bases = [which]
    for base in bases:
        compute_diff("mpl_%s_raw.xml" % base, "mpl_%s.xml" % base,
                     "mpl_%s_diff.xml" % base)
        run_parse(base)
        apply_diff("mpl_%s_raw.xml" % base, "mpl_%s_diff.xml" % base,
                   "mpl_%s.xml" % base)
        run_generate(base)
예제 #3
0
def generate_text_with_our_methods_sentiment(sentiment_tokenizer,
                                             sentiment_classifier):

    for condition in ["negative", "positive"]:
        for prefix1 in [
                "The chiken", "The house", "The pizza", "The potato",
                "The lake"
        ]:
            prefix2 = "The following is an article about " + condition + ". " + prefix1
            for prefix in [prefix1, prefix2]:
                print('Condition:', condition)
                print('Prefix:', prefix)
                perplexit_scores = []
                diversity_scores = []
                sentiment_scores = []
                for k in range(20):  # generate 100 samples per combination
                    a_lst = run_generate(prefix=prefix,
                                         condition=condition,
                                         length=100,
                                         device=device)
                    a_str = [''.join(a for a in a_lst)]
                    #print(a_str)
                    perplexit_scores.append(
                        np.mean([
                            perplexity_score(text, per_model, tokenizer)
                            for text in a_str
                        ]))
                    diversity_scores.append(diversity(a_str))
                    sentiment_scores.append([
                        sentiment_predict(
                            text, sentiment_tokenizer,
                            sentiment_classifier)['label'] == condition
                        for text in a_str
                    ][0])
                print('Perplexity score %.3f' % np.mean(perplexit_scores))
                print('Sentiment acc. %.3f' %
                      (sum(sentiment_scores) / len(sentiment_scores)))
                print('Diversity score:')
                print('\t Dist-1: %.3f' %
                      np.mean([a[0] for a in diversity_scores]))
                print('\t Dist-2: %.3f' %
                      np.mean([a[1] for a in diversity_scores]))
                print('\t Dist-3: %.3f' %
                      np.mean([a[2] for a in diversity_scores]))
                print('********\n')
예제 #4
0
def generate_text_with_our_methods_topic():

    # loop over prefix and conditions and log the generated sentences and evaluation scores
    for condition in [
            "Religion", "Military", "Politics", "Science", "Legal", "Space",
            "Computers", "Technology"
    ]:
        for prefix1 in [
                "The chiken", "The house", "The pizza", "The potato",
                "The lake"
        ]:
            prefix2 = "The following is an article about " + condition + ". " + prefix1
            # print(prefix2) # added the following
            for prefix in [prefix1, prefix2]:
                print('Condition:', condition)
                print('Prefix:', prefix)
                perplexit_scores = []
                diversity_scores = []
                for k in range(20):  # generate 100 samples per combination
                    a_lst = run_generate(prefix=prefix,
                                         condition=condition,
                                         length=100,
                                         device=device)
                    a_str = [''.join(a for a in a_lst)]
                    #print(a_str)
                    perplexit_scores.append(
                        np.mean([
                            perplexity_score(text, per_model, tokenizer)
                            for text in a_str
                        ]))
                    diversity_scores.append(diversity(a_str))
                #evaluate_all(all_text)
                print('Perplexity score %.3f' % np.mean(perplexit_scores))
                print('Diversity score:')
                print('\t Dist-1: %.3f' %
                      np.mean([a[0] for a in diversity_scores]))
                print('\t Dist-2: %.3f' %
                      np.mean([a[1] for a in diversity_scores]))
                print('\t Dist-3: %.3f' %
                      np.mean([a[2] for a in diversity_scores]))
                print('********\n')