def pplm_examples():

    run_pplm_example(cond_text="The potato",
                     num_samples=3,
                     bag_of_words='military',
                     length=50,
                     stepsize=0.03,
                     sample=True,
                     num_iterations=3,
                     window_length=5,
                     gamma=1.5,
                     gm_scale=0.95,
                     kl_scale=0.01,
                     verbosity='regular')

    run_pplm_example(cond_text="Once upon a time",
                     num_samples=10,
                     discrim='sentiment',
                     class_label='very_positive',
                     length=50,
                     stepsize=0.05,
                     sample=True,
                     num_iterations=10,
                     gamma=1,
                     gm_scale=0.9,
                     kl_scale=0.02,
                     verbosity='quiet')
Example #2
0
def get_data():
    if(request.method =='POST'):
        text = request.form['nlg']
        drop = request.form['personality']
        x = run_pplm_example(cond_text=text,num_samples=1,bag_of_words=drop,length=50,stepsize=0.03,sample=True,num_iterations=3, window_length=5,gamma=1.5,gm_scale=0.95,kl_scale=0.01,verbosity='regular')
        
        
        
    return render_template('result.html',prediction=[text,type(x)])
Example #3
0
def generation(bag_of_words, seed, num_samples_each_prefix, length, step_size,
               num_iterations, verbosity, prefixes, dst_file):
    for prefix in prefixes:
        with open(dst_file, 'a') as file:
            run_pplm_example(
                bag_of_words=bag_of_words,
                cond_text=prefix,
                num_samples=num_samples_each_prefix,
                length=length,  # influence random
                seed=seed,
                stepsize=step_size,
                sample=True,
                num_iterations=num_iterations,
                gamma=1.5,
                gm_scale=0.9,
                kl_scale=0.01,
                verbosity=verbosity,
                file=file,
            )
def generate_text_with_pplm_bow():
    for condition in [
            "military", "religion", "politics", "science", "legal", "space",
            "technology"
    ]:
        for prefix1 in [
                "The chiken", "The house", "The pizza", "The potato",
                "The lake"
        ]:
            for prefix in [prefix1]:
                print('Condition:', condition)
                print('Prefix:', prefix)
                all_text = []
                for _ in range(1):  # generate 100 samples per combination
                    generated_texts = run_pplm_example(cond_text=prefix,
                                                       num_samples=20,
                                                       bag_of_words=condition,
                                                       length=100,
                                                       stepsize=0.03,
                                                       sample=True,
                                                       num_iterations=3,
                                                       window_length=5,
                                                       gamma=1.5,
                                                       gm_scale=0.95,
                                                       kl_scale=0.01,
                                                       verbosity='quiet',
                                                       device=device)
                    # get rid of the beginning '<|endoftext|>'
                    processed_texts = [
                        sample.replace('<|endoftext|>', '')
                        for sample in generated_texts
                    ]
                    #print(processed_texts)
                    #print('')
                    print('Original perplexity score: %.3f' % perplexity_score(
                        processed_texts[0], per_model, tokenizer))
                    avg_pplm_ppl = np.mean([
                        perplexity_score(text, per_model, tokenizer)
                        for text in processed_texts[1:]
                    ])
                    print('Perplexity score over %d samples: %.3f' %
                          (len(processed_texts[1:]), avg_pplm_ppl))
                    diversity_scores = []
                    for text in processed_texts[1:]:
                        diversity_scores.append(diversity([text]))
                    print('Diversity score over %d samples:' %
                          len(processed_texts[1:]))
                    print('\t Dist-1 %.3f' %
                          np.mean([a[0] for a in diversity_scores]))
                    print('\t Dist-2 %.3f' %
                          np.mean([a[1] for a in diversity_scores]))
                    print('\t Dist-3 %.3f' %
                          np.mean([a[2] for a in diversity_scores]))
                print('********\n')
Example #5
0
def generate_samples(prefixes,
                     num_samples,
                     length,
                     method_name,
                     sentiment_label,
                     verbose,
                     num_iterations,
                     vad_loss_params=None,
                     seed=0,
                     vad_threshold=0.01):
    total_samples = len(prefixes) * num_samples
    file_name = 'seed={},{},name={},samples={},itrs={},vad_t={}'.format(
        seed, sentiment_label, method_name, total_samples, num_iterations,
        vad_threshold)
    if vad_loss_params:
        file_name += ', lambda={}, pos_t={}, neg_t={}'.format(
            vad_loss_params['lambda'], vad_loss_params['pos_threshold'],
            vad_loss_params['neg_threshold'])

    output = 'automated_evaluation/generated_samples/{}/{}'.format(
        sentiment_label, file_name)

    word_changes_list = []
    start_time = time.time()
    for prefix in prefixes:
        with open(output, 'a') as file:
            word_changes = run_pplm_example(
                cond_text=prefix,
                num_samples=num_samples,
                discrim='sentiment',
                class_label='very_{}'.format(sentiment_label),
                length=length,  # influence random
                seed=seed,
                stepsize=0.03,
                sample=True,
                num_iterations=num_iterations,
                gamma=1,
                gm_scale=0.95,
                kl_scale=0.01,
                verbosity=verbose,
                file=file,
                sample_method=method_name,
                vad_loss_params=vad_loss_params,
                vad_threshold=vad_threshold,
            )
            word_changes_list.append(word_changes)
    time_lag = time.time() - start_time
    dst_file_name = '{},changes={:.2f}'.format(output,
                                               stat.mean(word_changes_list))
    os.rename(output, dst_file_name)
    os.rename(dst_file_name, '{},time={:.2f}'.format(dst_file_name, time_lag))
Example #6
0
 def test_BC_VAD_ABS_untached(self):
     with open('test_cases/BC_VAD_ABS/output', 'w') as file:
         run_pplm_example(
             cond_text='The book',
             num_samples=1,
             discrim='sentiment',
             class_label=3,  # very_negative
             length=5,  # influence random
             seed=0,
             stepsize=0.05,
             sample=True,
             num_iterations=3,
             gamma=1,
             gm_scale=0.9,
             kl_scale=0.02,
             verbosity='quiet',
             file=file,
             sample_method='BC_VAD_ABS'
         )
     with open('test_cases/BC_VAD_ABS/output', 'r') as file:
         output = file.read()
     with open('test_cases/BC_VAD_ABS/known_output', 'r') as file:
         known_output = file.read()
     self.assertEqual(output, known_output)
Example #7
0
def get_bow_probs(bow_dir, text, gm_scale=0.95, kl_scale=0.01, stepsize=0.03, num_iterations=3):
    return run_pplm_example(
        cond_text=text,
        num_samples=1,
        bag_of_words=f"{bow_dir}/story_bow.txt",
        length=1,
        stepsize=stepsize,
        sample=True,
        uncond=(True if text == '' else False),
        num_iterations=num_iterations,
        window_length=5,
        gamma=1.5,
        gm_scale=gm_scale,
        kl_scale=kl_scale,
        verbosity='quiet'
    )[1]
Example #8
0
def get_discriminator_probs(discrim_dir, text, gm_scale=0.90, kl_scale=0.02, stepsize=0.04, num_iterations=20):
    return run_pplm_example(
        cond_text=text,
        num_samples=1,
        discrim='generic',
        discrim_meta=f"{discrim_dir}/generic_classifier_head_meta.json",
        discrim_weights=f"{discrim_dir}/generic_classifier_head_epoch_10.pt",
        class_label='1',
        length=1,
        stepsize=stepsize,
        sample=True,
        uncond=(True if i == 0 else False),
        num_iterations=num_iterations,
        gamma=1,
        gm_scale=gm_scale,
        kl_scale=kl_scale,
        verbosity='quiet'
    )[1]
Example #9
0
def gen_bow(bow_dir,
            text,
            seed=0,
            length=20,
            gm_scale=0.95,
            kl_scale=0.01,
            stepsize=0.03,
            num_iterations=3):
    return run_pplm_example(cond_text=text,
                            num_samples=1,
                            bag_of_words=f"{bow_dir}/story_bow.txt",
                            length=length,
                            stepsize=stepsize,
                            sample=True,
                            seed=seed,
                            num_iterations=num_iterations,
                            window_length=5,
                            gamma=1.5,
                            gm_scale=gm_scale,
                            kl_scale=kl_scale,
                            verbosity='quiet')[0]
Example #10
0
def gen_discrim(discrim_dir,
                text,
                seed=0,
                length=20,
                gm_scale=0.90,
                kl_scale=0.02,
                stepsize=0.04,
                num_iterations=20):
    return run_pplm_example(
        cond_text=text,
        num_samples=1,
        seed=seed,
        discrim='generic',
        discrim_meta=f"{discrim_dir}/generic_classifier_head_meta.json",
        discrim_weights=f"{discrim_dir}/generic_classifier_head_epoch_10.pt",
        class_label='1',
        length=length,
        stepsize=stepsize,
        sample=True,
        num_iterations=num_iterations,
        gamma=1,
        gm_scale=gm_scale,
        kl_scale=kl_scale,
        verbosity='quiet')[0]
Example #11
0
from run_pplm import run_pplm_example

if __name__ == '__main__':
    prefix = ['The orange', 'The spider man', 'my father']
    for p in prefix:
        with open('demos/religion', 'a') as file:
            file.write(
                '========================================================================================================================================================\n'
            )
            file.write('【{}】\n'.format(p))
            run_pplm_example(cond_text=p,
                             num_samples=1,
                             bag_of_words='religion',
                             length=50,
                             stepsize=0.03,
                             sample=True,
                             num_iterations=3,
                             window_length=5,
                             gamma=1.5,
                             gm_scale=0.95,
                             kl_scale=0.01,
                             verbosity='regular',
                             file=file,
                             generation_method='vad_abs')
Example #12
0
    prefix = 'The horse'
    sample_methods = [
        'perturbed',
        # 'vad_abs',
        # 'vad_abs'
    ]

    with open('demos/neg2pos', 'a') as file:
        for m in sample_methods:
            file.write(
                '\n================= "{}" to BC by {}) =================\n'.format(prefix, m))
            run_pplm_example(
                cond_text=prefix,
                num_samples=1,
                discrim='sentiment',
                class_label='very_positive',
                length=50,  # influence random
                seed=0,
                stepsize=0.05,
                sample=True,
                num_iterations=3,
                gamma=1,
                gm_scale=0.9,
                kl_scale=0.02,
                verbosity='quiet',
                file=file,
                sample_method=m
            )

            file.write('=' * 89)
def generate_text_with_pplm_discrim(sentiment_tokenizer, sentiment_classifier):
    for condition in ["very_negative", "very_positive"]:
        for prefix1 in [
                "The chiken", "The house", "The pizza", "The potato",
                "The lake"
        ]:
            for prefix in [prefix1]:
                print('Condition:', condition)
                print('Prefix:', prefix)
                all_text = []
                for _ in range(1):  # generate 100 samples per combination
                    generated_texts = run_pplm_example(cond_text=prefix,
                                                       num_samples=20,
                                                       discrim='sentiment',
                                                       class_label=condition,
                                                       length=100,
                                                       stepsize=0.05,
                                                       sample=True,
                                                       num_iterations=10,
                                                       gamma=1,
                                                       gm_scale=0.9,
                                                       kl_scale=0.02,
                                                       verbosity='quiet',
                                                       device=device)
                    # get rid of the beginning '<|endoftext|>'
                    processed_texts = [
                        sample.replace('<|endoftext|>', '')
                        for sample in generated_texts
                    ]
                    #print(processed_texts)
                    #print('')
                    print('Original perplexity score: %f' % perplexity_score(
                        processed_texts[0], per_model, tokenizer))
                    avg_pplm_ppl = np.mean([
                        perplexity_score(text, per_model, tokenizer)
                        for text in processed_texts[1:]
                    ])
                    print('Perplexity score over %d samples: %f' %
                          (len(processed_texts[1:]), avg_pplm_ppl))
                    if condition == "very_negative":
                        sent_label = 'negative'
                    else:
                        sent_label = 'positive'
                    avg_sen_clas = sum([
                        sentiment_predict(text, sentiment_tokenizer,
                                          sentiment_classifier)['label']
                        == sent_label for text in processed_texts[1:]
                    ]) / len(processed_texts[1:])
                    print(
                        'External sentimnt classifier over %d samples: %.3f' %
                        (len(processed_texts[1:]), avg_sen_clas))
                    diversity_scores = []
                    for text in processed_texts[1:]:
                        diversity_scores.append(diversity([text]))
                    print('Diversity score over %d samples:' %
                          len(processed_texts[1:]))
                    print('\t Dist-1 %f' %
                          np.mean([a[0] for a in diversity_scores]))
                    print('\t Dist-2 %f' %
                          np.mean([a[1] for a in diversity_scores]))
                    print('\t Dist-3 %f' %
                          np.mean([a[2] for a in diversity_scores]))
                #evaluate_all(generated_texts)
                print('********\n')
Example #14
0
from run_pplm import run_pplm_example

if __name__ == '__main__':
    prefix = ['The orange', 'The spider man', 'my father']
    for p in prefix:
        with open('demos/legal', 'a') as file:
            file.write(
                '========================================================================================================================================================\n'
            )
            file.write('【{}】\n'.format(p))
            run_pplm_example(cond_text=p,
                             num_samples=1,
                             bag_of_words='legal',
                             length=50,
                             stepsize=0.03,
                             sample=True,
                             num_iterations=3,
                             window_length=5,
                             gamma=1.5,
                             gm_scale=0.95,
                             kl_scale=0.01,
                             verbosity='regular',
                             file=file)