def main(parsed_args): scorer = FluencyScorer(parsed_args.lang_model_path, parsed_args.lang_model_data) with open(os.path.join(parsed_args.data, parsed_args.gen_subset)) as f: for line in f: line = line.strip() score = scorer.score_sentence(line) print('[{:0.4f}] {}'.format(score, line))
def main(args): assert args.path is not None, '--path required for generation!' assert not args.sampling or args.nbest == args.beam, \ '--sampling requires --nbest to be equal to --beam' assert args.replace_unk is None or args.raw_text, \ '--replace-unk requires a raw text dataset (--raw-text)' if args.max_tokens is None and args.max_sentences is None: args.max_tokens = 12000 print(args) use_cuda = torch.cuda.is_available() and not args.cpu # Load dataset splits task = tasks.setup_task(args) task.load_dataset(args.gen_subset) print('| {} {} {} examples'.format(args.data, args.gen_subset, len(task.dataset(args.gen_subset)))) # Set dictionaries src_dict = task.source_dictionary tgt_dict = task.target_dictionary # Load ensemble print('| loading model(s) from {}'.format(args.path)) models, _ = utils.load_ensemble_for_inference(args.path.split(':'), task, model_arg_overrides=eval( args.model_overrides)) # Optimize ensemble for generation for model in models: model.make_generation_fast_( beamable_mm_beam_size=None if args.no_beamable_mm else args.beam, need_attn=args.print_alignment, ) if args.fp16: model.half() # Load alignment dictionary for unknown word replacement # (None if no unknown word replacement, empty if no path to align dictionary) align_dict = utils.load_align_dict(args.replace_unk) # Load dataset (possibly sharded) itr = task.get_batch_iterator( dataset=task.dataset(args.gen_subset), max_tokens=args.max_tokens, max_sentences=args.max_sentences, max_positions=utils.resolve_max_positions( task.max_positions(), *[model.max_positions() for model in models]), ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test, required_batch_size_multiple=8, num_shards=args.num_shards, shard_id=args.shard_id, ).next_epoch_itr(shuffle=False) # Initialize generator gen_timer = StopwatchMeter() if args.score_reference: translator = SequenceScorer(models, task.target_dictionary) else: translator = SequenceGenerator( models, task.target_dictionary, beam_size=args.beam, minlen=args.min_len, stop_early=(not args.no_early_stop), normalize_scores=(not args.unnormalized), len_penalty=args.lenpen, unk_penalty=args.unkpen, sampling=args.sampling, sampling_topk=args.sampling_topk, sampling_temperature=args.sampling_temperature, diverse_beam_groups=args.diverse_beam_groups, diverse_beam_strength=args.diverse_beam_strength, ) if use_cuda: translator.cuda() # Initialize fluency scorer (and language model) fluency_scorer = FluencyScorer(args.lang_model_path, args.lang_model_data) # Generate and compute BLEU score scorer = bleu.Scorer(tgt_dict.pad(), tgt_dict.eos(), tgt_dict.unk()) # Save all sources, targets and hypothesis to compute GLEU score sources = [] targets = [] hypoths = [] num_sentences = 0 has_target = True with progress_bar.build_progress_bar(args, itr) as t: if args.score_reference: translations = translator.score_batched_itr(t, cuda=use_cuda, timer=gen_timer) else: translations = translator.generate_batched_itr( t, maxlen_a=args.max_len_a, maxlen_b=args.max_len_b, cuda=use_cuda, timer=gen_timer, prefix_size=args.prefix_size, ) wps_meter = TimeMeter() for sample_id, src_tokens, target_tokens, hypos in translations: # Process input and ground truth has_target = target_tokens is not None target_tokens = target_tokens.int().cpu() if has_target else None # Either retrieve the original sentences or regenerate them from tokens. if align_dict is not None: src_str = task.dataset( args.gen_subset).src.get_original_text(sample_id) target_str = task.dataset( args.gen_subset).tgt.get_original_text(sample_id) else: src_str = src_dict.string(src_tokens, args.remove_bpe) if has_target: target_str = tgt_dict.string(target_tokens, args.remove_bpe, escape_unk=True) sources.append(src_str) targets.append(target_str) if not args.quiet: print('S-{}\t{}'.format(sample_id, src_str)) if has_target: print('T-{}\t{}'.format(sample_id, target_str)) iteration = 0 curr_src_str = src_str best_fluency_score = fluency_scorer.score_sentence(src_str).item() best_hypo_str = '' # Boost inference while True: hypo_tokens_list = [] hypo_str_list = [] hypo_fluency_score_list = [] # Process top predictions for i, hypo in enumerate(hypos[:min(len(hypos), args.nbest)]): hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=curr_src_str, alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None, align_dict=align_dict, tgt_dict=tgt_dict, remove_bpe=args.remove_bpe, ) hypo_tokens_list.append(hypo_tokens) hypo_str_list.append(hypo_str) hypo_fluency_score = fluency_scorer.score_sentence( hypo_str).item() hypo_fluency_score_list.append(hypo_fluency_score) if not args.quiet: # print('H-{}\t{}\t{}'.format(sample_id, hypo['score'], hypo_str)) print('H-{}\t{}\t{}'.format(sample_id, hypo_str, hypo['score'])) print('P-{}\t{}'.format( sample_id, ' '.join( map( lambda x: '{:.4f}'.format(x), hypo['positional_scores'].tolist(), )))) print('F-{}\t{}'.format(sample_id, hypo_fluency_score)) if args.print_alignment: print('A-{}\t{}'.format( sample_id, ' '.join( map(lambda x: str(utils.item(x)), alignment)))) # Compare best scores max_fluency_score = max(hypo_fluency_score_list) max_idx = hypo_fluency_score_list.index(max_fluency_score) max_hypo_str = hypo_str_list[max_idx] if max_fluency_score <= best_fluency_score: # Score only the top hypothesis if align_dict is not None or args.remove_bpe is not None: # Convert back to tokens for evaluation with unk replacement and/or without BPE target_tokens = tokenizer.Tokenizer.tokenize( target_str, tgt_dict, add_if_not_exist=True) max_tokens = hypo_tokens_list[max_idx] scorer.add(target_tokens, max_tokens) hypoths.append(max_hypo_str) break else: # Keep boosting iteration = iteration + 1 curr_src_str = max_hypo_str best_fluency_score = max_fluency_score best_hypo_str = max_hypo_str wps_meter.update(src_tokens.size(0)) t.log({'wps': round(wps_meter.avg)}) num_sentences += 1 print( '| Translated {} sentences ({} tokens) in {:.1f}s ({:.2f} sentences/s, {:.2f} tokens/s)' .format(num_sentences, gen_timer.n, gen_timer.sum, num_sentences / gen_timer.sum, 1. / gen_timer.avg)) if has_target: print('| Generate {} with beam={}: {}'.format(args.gen_subset, args.beam, scorer.result_string())) # compute GLEU gleu_calculator = GLEU(args.n) gleu_calculator.load_text_sources(sources) gleu_calculator.load_text_references([targets]) gleu_scores = gleu_calculator.run_iterations(num_iterations=args.iter, hypothesis=hypoths, per_sent=args.sent) gleu_score = [g for g in gleu_scores][0][0] * 100 print('| Generate {} with beam={}: GLEU = {:2.2f}'.format( args.gen_subset, args.beam, gleu_score))
def main(args): assert args.path is not None, '--path required for generation!' assert not args.sampling or args.nbest == args.beam, \ '--sampling requires --nbest to be equal to --beam' assert args.replace_unk is None or args.raw_text, \ '--replace-unk requires a raw text dataset (--raw-text)' if args.max_tokens is None and args.max_sentences is None: args.max_tokens = 12000 print(args) use_cuda = torch.cuda.is_available() and not args.cpu # Load dataset splits task = tasks.setup_task(args) task.load_dataset(args.gen_subset) print('| {} {} {} examples'.format( args.data, args.gen_subset, len(task.dataset(args.gen_subset)))) # Set dictionaries src_dict = task.source_dictionary tgt_dict = task.target_dictionary # Load ensemble print('| loading model(s) from {}'.format(args.path)) models, _ = utils.load_ensemble_for_inference(args.path.split( ':'), task, model_arg_overrides=eval(args.model_overrides)) # Optimize ensemble for generation for model in models: model.make_generation_fast_( beamable_mm_beam_size=None if args.no_beamable_mm else args.beam, need_attn=args.print_alignment, ) if args.fp16: model.half() # Load alignment dictionary for unknown word replacement # (None if no unknown word replacement, empty if no path to align dictionary) align_dict = utils.load_align_dict(args.replace_unk) # Load dataset (possibly sharded) itr = task.get_batch_iterator( dataset=task.dataset(args.gen_subset), max_tokens=args.max_tokens, max_sentences=args.max_sentences, max_positions=utils.resolve_max_positions( task.max_positions(), *[model.max_positions() for model in models] ), ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test, required_batch_size_multiple=8, num_shards=args.num_shards, shard_id=args.shard_id, ).next_epoch_itr(shuffle=False) # Initialize generator gen_timer = StopwatchMeter() if args.score_reference: translator = SequenceScorer(models, task.target_dictionary) else: translator = SequenceGenerator( models, task.target_dictionary, beam_size=args.beam, minlen=args.min_len, stop_early=(not args.no_early_stop), normalize_scores=(not args.unnormalized), len_penalty=args.lenpen, unk_penalty=args.unkpen, sampling=args.sampling, sampling_topk=args.sampling_topk, sampling_temperature=args.sampling_temperature, diverse_beam_groups=args.diverse_beam_groups, diverse_beam_strength=args.diverse_beam_strength, ) if use_cuda: translator.cuda() # Initialize fluency scorer (and language model) fluency_scorer = FluencyScorer( args.lang_model_path, args.lang_model_data, use_cpu=False) en_filename = os.path.join(args.out_dir, 'errorgen.en') gec_filename = os.path.join(args.out_dir, 'errorgen.gec') has_target = True with progress_bar.build_progress_bar(args, itr) as t, open(en_filename, 'w') as en_file, open(gec_filename, 'w') as gec_file: if args.score_reference: translations = translator.score_batched_itr( t, cuda=use_cuda, timer=gen_timer) else: translations = translator.generate_batched_itr( t, maxlen_a=args.max_len_a, maxlen_b=args.max_len_b, cuda=use_cuda, timer=gen_timer, prefix_size=args.prefix_size, ) for sample_id, src_tokens, target_tokens, hypos in translations: # Process input and ground truth has_target = target_tokens is not None target_tokens = target_tokens.int().cpu() if has_target else None # Either retrieve the original sentences or regenerate them from tokens. if align_dict is not None: src_str = task.dataset( args.gen_subset).src.get_original_text(sample_id) target_str = task.dataset( args.gen_subset).tgt.get_original_text(sample_id) else: src_str = src_dict.string(src_tokens, args.remove_bpe) if has_target: target_str = tgt_dict.string( target_tokens, args.remove_bpe, escape_unk=True) # Only consider sentences with at least four words. if len(src_tokens) < 5: continue # Calculate the fluency score for the source sentence source_fluency = fluency_scorer.score_sentence(src_str) # Process top predictions for i, hypo in enumerate(hypos[:min(len(hypos), args.nbest)]): hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu( ) if hypo['alignment'] is not None else None, align_dict=align_dict, tgt_dict=tgt_dict, remove_bpe=args.remove_bpe, ) # Skip if this is the original sentence. if hypo_str == target_str: continue # Score the hypothesis. hypo_fluency = fluency_scorer.score_sentence(hypo_str) # Save the hypothesis if it is sufficiently disfluent. if (source_fluency / hypo_fluency) > 1.05: en_file.write('{}\n'.format(hypo_str)) gec_file.write('{}\n'.format(src_str))
from fluency_scorer import FluencyScorer import pickle from keras.models import load_model from keras.preprocessing.text import Tokenizer from keras.preprocessing import sequence import tensorflow as tf import numpy as np # load word2vec embedding print("word2vec model loading") word2vec = api.load("word2vec-google-news-300") print("word2vec model loaded") # load scorer p, d = "dependency/wiki103/wiki103.pt", "dependency/wiki103" fluency_scorer = FluencyScorer(p, d) # load cnn model VOCAB_SIZE = 30000 MAX_LEN = 500 model = load_model('model/cnn_model.h5') with open('model/cnn_model_tokenizer.pickle', 'rb') as f: imdb_tokenizer = pickle.load(f) ana = AnalogyMutator("gender", model=word2vec) act = ActiveMutator("gender") def mutate(sentence, epsilon): ana_candidates, act_candidates = create_sentence_candidates(
def main(args): if args.buffer_size < 1: args.buffer_size = 1 if args.max_tokens is None and args.max_sentences is None: args.max_sentences = 1 assert not args.sampling or args.nbest == args.beam, \ '--sampling requires --nbest to be equal to --beam' assert not args.max_sentences or args.max_sentences <= args.buffer_size, \ '--max-sentences/--batch-size cannot be larger than --buffer-size' print(args) use_cuda = torch.cuda.is_available() and not args.cpu # Setup task, e.g., translation task = tasks.setup_task(args) # Load ensemble print('| loading model(s) from {}'.format(args.path)) model_paths = args.path.split(':') models, model_args = utils.load_ensemble_for_inference( model_paths, task, model_arg_overrides=eval(args.model_overrides)) # Set dictionaries tgt_dict = task.target_dictionary # Optimize ensemble for generation for model in models: model.make_generation_fast_( beamable_mm_beam_size=None if args.no_beamable_mm else args.beam, need_attn=args.print_alignment, ) if args.fp16: model.half() # Initialize generator translator = SequenceGenerator( models, tgt_dict, beam_size=args.beam, minlen=args.min_len, stop_early=(not args.no_early_stop), normalize_scores=(not args.unnormalized), len_penalty=args.lenpen, unk_penalty=args.unkpen, sampling=args.sampling, sampling_topk=args.sampling_topk, sampling_temperature=args.sampling_temperature, diverse_beam_groups=args.diverse_beam_groups, diverse_beam_strength=args.diverse_beam_strength, ) if use_cuda: translator.cuda() # Load alignment dictionary for unknown word replacement # (None if no unknown word replacement, empty if no path to align dictionary) align_dict = utils.load_align_dict(args.replace_unk) # Initialize fluency scorer (and language model) fluency_scorer = FluencyScorer(args.lang_model_path, args.lang_model_data) def make_result(src_str, hypos, tgt_str='', iteration=0): results = [] # compute fluency score for source string # the source string itself is an entry result0 = Correction() result0.iteration = iteration result0.src_str = result0.hypo_str = src_str fluency_scores = fluency_scorer.score_sentence(src_str).item() result0.fluency_scores = fluency_scores result0.fluency_scores_str = "Fluency Score: {:0.4f}".format( fluency_scores) results.append(result0) # Process top predictions for hypo in hypos[:min(len(hypos), args.nbest)]: result = Correction() result.iteration = iteration + 1 result.src_str = src_str hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None, align_dict=align_dict, tgt_dict=tgt_dict, remove_bpe=args.remove_bpe, ) # result.hypos.append('H\t{}\t{}'.format(hypo['score'], hypo_str)) result.hypo_str = hypo_str result.hypo_score = result.hypo_score_str = hypo['score'] result.pos_scores_str = 'P\t{}'.format(' '.join( map( lambda x: '{:.4f}'.format(x), hypo['positional_scores'].tolist(), ))) result.alignments_str = ('A\t{}'.format(' '.join( map(lambda x: str(utils.item(x)), alignment))) if args.print_alignment else None) # compute GLEU if target is provided if tgt_str: gleu_calculator = GLEU(args.n) gleu_calculator.load_text_sources([src_str]) gleu_calculator.load_text_references([[tgt_str]]) gleu_scores = gleu_calculator.run_iterations( num_iterations=args.iter, hypothesis=[hypo_str], per_sent=args.sent) gleu_score = [g for g in gleu_scores][0][0] * 100 result.gleu_scores = gleu_score result.gleu_scores_str = 'GLEU {:2.2f}'.format(gleu_score) else: result.gleu_scores_str = 'GLEU N/A (no target was provided. use format "source sentence|target setence" to provide a target/reference)' # compute fluency score fluency_scores = fluency_scorer.score_sentence(hypo_str).item() result.fluency_scores = fluency_scores result.fluency_scores_str = "Fluency Score: {:0.4f}".format( fluency_scores) results.append(result) return results def process_batch(batch, tgts, iteration): tokens = batch.tokens lengths = batch.lengths if use_cuda: tokens = tokens.cuda() lengths = lengths.cuda() encoder_input = {'src_tokens': tokens, 'src_lengths': lengths} translations = translator.generate( encoder_input, maxlen=int(args.max_len_a * tokens.size(1) + args.max_len_b), ) return [ make_result(batch.srcs[i], t, tgts[i], iteration) for i, t in enumerate(translations) ] max_positions = utils.resolve_max_positions( task.max_positions(), *[model.max_positions() for model in models]) if not args.server: listen_to_stdin(args, max_positions, task, process_batch) else: listen_to_web(args, max_positions, task, process_batch)