def pplm_examples(): run_pplm_example(cond_text="The potato", num_samples=3, bag_of_words='military', length=50, stepsize=0.03, sample=True, num_iterations=3, window_length=5, gamma=1.5, gm_scale=0.95, kl_scale=0.01, verbosity='regular') run_pplm_example(cond_text="Once upon a time", num_samples=10, discrim='sentiment', class_label='very_positive', length=50, stepsize=0.05, sample=True, num_iterations=10, gamma=1, gm_scale=0.9, kl_scale=0.02, verbosity='quiet')
def get_data(): if(request.method =='POST'): text = request.form['nlg'] drop = request.form['personality'] x = run_pplm_example(cond_text=text,num_samples=1,bag_of_words=drop,length=50,stepsize=0.03,sample=True,num_iterations=3, window_length=5,gamma=1.5,gm_scale=0.95,kl_scale=0.01,verbosity='regular') return render_template('result.html',prediction=[text,type(x)])
def generation(bag_of_words, seed, num_samples_each_prefix, length, step_size, num_iterations, verbosity, prefixes, dst_file): for prefix in prefixes: with open(dst_file, 'a') as file: run_pplm_example( bag_of_words=bag_of_words, cond_text=prefix, num_samples=num_samples_each_prefix, length=length, # influence random seed=seed, stepsize=step_size, sample=True, num_iterations=num_iterations, gamma=1.5, gm_scale=0.9, kl_scale=0.01, verbosity=verbosity, file=file, )
def generate_text_with_pplm_bow(): for condition in [ "military", "religion", "politics", "science", "legal", "space", "technology" ]: for prefix1 in [ "The chiken", "The house", "The pizza", "The potato", "The lake" ]: for prefix in [prefix1]: print('Condition:', condition) print('Prefix:', prefix) all_text = [] for _ in range(1): # generate 100 samples per combination generated_texts = run_pplm_example(cond_text=prefix, num_samples=20, bag_of_words=condition, length=100, stepsize=0.03, sample=True, num_iterations=3, window_length=5, gamma=1.5, gm_scale=0.95, kl_scale=0.01, verbosity='quiet', device=device) # get rid of the beginning '<|endoftext|>' processed_texts = [ sample.replace('<|endoftext|>', '') for sample in generated_texts ] #print(processed_texts) #print('') print('Original perplexity score: %.3f' % perplexity_score( processed_texts[0], per_model, tokenizer)) avg_pplm_ppl = np.mean([ perplexity_score(text, per_model, tokenizer) for text in processed_texts[1:] ]) print('Perplexity score over %d samples: %.3f' % (len(processed_texts[1:]), avg_pplm_ppl)) diversity_scores = [] for text in processed_texts[1:]: diversity_scores.append(diversity([text])) print('Diversity score over %d samples:' % len(processed_texts[1:])) print('\t Dist-1 %.3f' % np.mean([a[0] for a in diversity_scores])) print('\t Dist-2 %.3f' % np.mean([a[1] for a in diversity_scores])) print('\t Dist-3 %.3f' % np.mean([a[2] for a in diversity_scores])) print('********\n')
def generate_samples(prefixes, num_samples, length, method_name, sentiment_label, verbose, num_iterations, vad_loss_params=None, seed=0, vad_threshold=0.01): total_samples = len(prefixes) * num_samples file_name = 'seed={},{},name={},samples={},itrs={},vad_t={}'.format( seed, sentiment_label, method_name, total_samples, num_iterations, vad_threshold) if vad_loss_params: file_name += ', lambda={}, pos_t={}, neg_t={}'.format( vad_loss_params['lambda'], vad_loss_params['pos_threshold'], vad_loss_params['neg_threshold']) output = 'automated_evaluation/generated_samples/{}/{}'.format( sentiment_label, file_name) word_changes_list = [] start_time = time.time() for prefix in prefixes: with open(output, 'a') as file: word_changes = run_pplm_example( cond_text=prefix, num_samples=num_samples, discrim='sentiment', class_label='very_{}'.format(sentiment_label), length=length, # influence random seed=seed, stepsize=0.03, sample=True, num_iterations=num_iterations, gamma=1, gm_scale=0.95, kl_scale=0.01, verbosity=verbose, file=file, sample_method=method_name, vad_loss_params=vad_loss_params, vad_threshold=vad_threshold, ) word_changes_list.append(word_changes) time_lag = time.time() - start_time dst_file_name = '{},changes={:.2f}'.format(output, stat.mean(word_changes_list)) os.rename(output, dst_file_name) os.rename(dst_file_name, '{},time={:.2f}'.format(dst_file_name, time_lag))
def test_BC_VAD_ABS_untached(self): with open('test_cases/BC_VAD_ABS/output', 'w') as file: run_pplm_example( cond_text='The book', num_samples=1, discrim='sentiment', class_label=3, # very_negative length=5, # influence random seed=0, stepsize=0.05, sample=True, num_iterations=3, gamma=1, gm_scale=0.9, kl_scale=0.02, verbosity='quiet', file=file, sample_method='BC_VAD_ABS' ) with open('test_cases/BC_VAD_ABS/output', 'r') as file: output = file.read() with open('test_cases/BC_VAD_ABS/known_output', 'r') as file: known_output = file.read() self.assertEqual(output, known_output)
def get_bow_probs(bow_dir, text, gm_scale=0.95, kl_scale=0.01, stepsize=0.03, num_iterations=3): return run_pplm_example( cond_text=text, num_samples=1, bag_of_words=f"{bow_dir}/story_bow.txt", length=1, stepsize=stepsize, sample=True, uncond=(True if text == '' else False), num_iterations=num_iterations, window_length=5, gamma=1.5, gm_scale=gm_scale, kl_scale=kl_scale, verbosity='quiet' )[1]
def get_discriminator_probs(discrim_dir, text, gm_scale=0.90, kl_scale=0.02, stepsize=0.04, num_iterations=20): return run_pplm_example( cond_text=text, num_samples=1, discrim='generic', discrim_meta=f"{discrim_dir}/generic_classifier_head_meta.json", discrim_weights=f"{discrim_dir}/generic_classifier_head_epoch_10.pt", class_label='1', length=1, stepsize=stepsize, sample=True, uncond=(True if i == 0 else False), num_iterations=num_iterations, gamma=1, gm_scale=gm_scale, kl_scale=kl_scale, verbosity='quiet' )[1]
def gen_bow(bow_dir, text, seed=0, length=20, gm_scale=0.95, kl_scale=0.01, stepsize=0.03, num_iterations=3): return run_pplm_example(cond_text=text, num_samples=1, bag_of_words=f"{bow_dir}/story_bow.txt", length=length, stepsize=stepsize, sample=True, seed=seed, num_iterations=num_iterations, window_length=5, gamma=1.5, gm_scale=gm_scale, kl_scale=kl_scale, verbosity='quiet')[0]
def gen_discrim(discrim_dir, text, seed=0, length=20, gm_scale=0.90, kl_scale=0.02, stepsize=0.04, num_iterations=20): return run_pplm_example( cond_text=text, num_samples=1, seed=seed, discrim='generic', discrim_meta=f"{discrim_dir}/generic_classifier_head_meta.json", discrim_weights=f"{discrim_dir}/generic_classifier_head_epoch_10.pt", class_label='1', length=length, stepsize=stepsize, sample=True, num_iterations=num_iterations, gamma=1, gm_scale=gm_scale, kl_scale=kl_scale, verbosity='quiet')[0]
from run_pplm import run_pplm_example if __name__ == '__main__': prefix = ['The orange', 'The spider man', 'my father'] for p in prefix: with open('demos/religion', 'a') as file: file.write( '========================================================================================================================================================\n' ) file.write('【{}】\n'.format(p)) run_pplm_example(cond_text=p, num_samples=1, bag_of_words='religion', length=50, stepsize=0.03, sample=True, num_iterations=3, window_length=5, gamma=1.5, gm_scale=0.95, kl_scale=0.01, verbosity='regular', file=file, generation_method='vad_abs')
prefix = 'The horse' sample_methods = [ 'perturbed', # 'vad_abs', # 'vad_abs' ] with open('demos/neg2pos', 'a') as file: for m in sample_methods: file.write( '\n================= "{}" to BC by {}) =================\n'.format(prefix, m)) run_pplm_example( cond_text=prefix, num_samples=1, discrim='sentiment', class_label='very_positive', length=50, # influence random seed=0, stepsize=0.05, sample=True, num_iterations=3, gamma=1, gm_scale=0.9, kl_scale=0.02, verbosity='quiet', file=file, sample_method=m ) file.write('=' * 89)
def generate_text_with_pplm_discrim(sentiment_tokenizer, sentiment_classifier): for condition in ["very_negative", "very_positive"]: for prefix1 in [ "The chiken", "The house", "The pizza", "The potato", "The lake" ]: for prefix in [prefix1]: print('Condition:', condition) print('Prefix:', prefix) all_text = [] for _ in range(1): # generate 100 samples per combination generated_texts = run_pplm_example(cond_text=prefix, num_samples=20, discrim='sentiment', class_label=condition, length=100, stepsize=0.05, sample=True, num_iterations=10, gamma=1, gm_scale=0.9, kl_scale=0.02, verbosity='quiet', device=device) # get rid of the beginning '<|endoftext|>' processed_texts = [ sample.replace('<|endoftext|>', '') for sample in generated_texts ] #print(processed_texts) #print('') print('Original perplexity score: %f' % perplexity_score( processed_texts[0], per_model, tokenizer)) avg_pplm_ppl = np.mean([ perplexity_score(text, per_model, tokenizer) for text in processed_texts[1:] ]) print('Perplexity score over %d samples: %f' % (len(processed_texts[1:]), avg_pplm_ppl)) if condition == "very_negative": sent_label = 'negative' else: sent_label = 'positive' avg_sen_clas = sum([ sentiment_predict(text, sentiment_tokenizer, sentiment_classifier)['label'] == sent_label for text in processed_texts[1:] ]) / len(processed_texts[1:]) print( 'External sentimnt classifier over %d samples: %.3f' % (len(processed_texts[1:]), avg_sen_clas)) diversity_scores = [] for text in processed_texts[1:]: diversity_scores.append(diversity([text])) print('Diversity score over %d samples:' % len(processed_texts[1:])) print('\t Dist-1 %f' % np.mean([a[0] for a in diversity_scores])) print('\t Dist-2 %f' % np.mean([a[1] for a in diversity_scores])) print('\t Dist-3 %f' % np.mean([a[2] for a in diversity_scores])) #evaluate_all(generated_texts) print('********\n')
from run_pplm import run_pplm_example if __name__ == '__main__': prefix = ['The orange', 'The spider man', 'my father'] for p in prefix: with open('demos/legal', 'a') as file: file.write( '========================================================================================================================================================\n' ) file.write('【{}】\n'.format(p)) run_pplm_example(cond_text=p, num_samples=1, bag_of_words='legal', length=50, stepsize=0.03, sample=True, num_iterations=3, window_length=5, gamma=1.5, gm_scale=0.95, kl_scale=0.01, verbosity='regular', file=file)