Exemple #1
0
def main(parsed_args):
    scorer = FluencyScorer(parsed_args.lang_model_path,
                           parsed_args.lang_model_data)
    with open(os.path.join(parsed_args.data, parsed_args.gen_subset)) as f:
        for line in f:
            line = line.strip()
            score = scorer.score_sentence(line)
            print('[{:0.4f}] {}'.format(score, line))
Exemple #2
0
def main(args):
    assert args.path is not None, '--path required for generation!'
    assert not args.sampling or args.nbest == args.beam, \
        '--sampling requires --nbest to be equal to --beam'
    assert args.replace_unk is None or args.raw_text, \
        '--replace-unk requires a raw text dataset (--raw-text)'

    if args.max_tokens is None and args.max_sentences is None:
        args.max_tokens = 12000
    print(args)

    use_cuda = torch.cuda.is_available() and not args.cpu

    # Load dataset splits
    task = tasks.setup_task(args)
    task.load_dataset(args.gen_subset)
    print('| {} {} {} examples'.format(args.data, args.gen_subset,
                                       len(task.dataset(args.gen_subset))))

    # Set dictionaries
    src_dict = task.source_dictionary
    tgt_dict = task.target_dictionary

    # Load ensemble
    print('| loading model(s) from {}'.format(args.path))
    models, _ = utils.load_ensemble_for_inference(args.path.split(':'),
                                                  task,
                                                  model_arg_overrides=eval(
                                                      args.model_overrides))

    # Optimize ensemble for generation
    for model in models:
        model.make_generation_fast_(
            beamable_mm_beam_size=None if args.no_beamable_mm else args.beam,
            need_attn=args.print_alignment,
        )
        if args.fp16:
            model.half()

    # Load alignment dictionary for unknown word replacement
    # (None if no unknown word replacement, empty if no path to align dictionary)
    align_dict = utils.load_align_dict(args.replace_unk)

    # Load dataset (possibly sharded)
    itr = task.get_batch_iterator(
        dataset=task.dataset(args.gen_subset),
        max_tokens=args.max_tokens,
        max_sentences=args.max_sentences,
        max_positions=utils.resolve_max_positions(
            task.max_positions(),
            *[model.max_positions() for model in models]),
        ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test,
        required_batch_size_multiple=8,
        num_shards=args.num_shards,
        shard_id=args.shard_id,
    ).next_epoch_itr(shuffle=False)

    # Initialize generator
    gen_timer = StopwatchMeter()
    if args.score_reference:
        translator = SequenceScorer(models, task.target_dictionary)
    else:
        translator = SequenceGenerator(
            models,
            task.target_dictionary,
            beam_size=args.beam,
            minlen=args.min_len,
            stop_early=(not args.no_early_stop),
            normalize_scores=(not args.unnormalized),
            len_penalty=args.lenpen,
            unk_penalty=args.unkpen,
            sampling=args.sampling,
            sampling_topk=args.sampling_topk,
            sampling_temperature=args.sampling_temperature,
            diverse_beam_groups=args.diverse_beam_groups,
            diverse_beam_strength=args.diverse_beam_strength,
        )

    if use_cuda:
        translator.cuda()

    # Initialize fluency scorer (and language model)
    fluency_scorer = FluencyScorer(args.lang_model_path, args.lang_model_data)

    # Generate and compute BLEU score
    scorer = bleu.Scorer(tgt_dict.pad(), tgt_dict.eos(), tgt_dict.unk())
    # Save all sources, targets and hypothesis to compute GLEU score
    sources = []
    targets = []
    hypoths = []

    num_sentences = 0
    has_target = True
    with progress_bar.build_progress_bar(args, itr) as t:
        if args.score_reference:
            translations = translator.score_batched_itr(t,
                                                        cuda=use_cuda,
                                                        timer=gen_timer)
        else:
            translations = translator.generate_batched_itr(
                t,
                maxlen_a=args.max_len_a,
                maxlen_b=args.max_len_b,
                cuda=use_cuda,
                timer=gen_timer,
                prefix_size=args.prefix_size,
            )

        wps_meter = TimeMeter()
        for sample_id, src_tokens, target_tokens, hypos in translations:
            # Process input and ground truth
            has_target = target_tokens is not None
            target_tokens = target_tokens.int().cpu() if has_target else None

            # Either retrieve the original sentences or regenerate them from tokens.
            if align_dict is not None:
                src_str = task.dataset(
                    args.gen_subset).src.get_original_text(sample_id)
                target_str = task.dataset(
                    args.gen_subset).tgt.get_original_text(sample_id)
            else:
                src_str = src_dict.string(src_tokens, args.remove_bpe)
                if has_target:
                    target_str = tgt_dict.string(target_tokens,
                                                 args.remove_bpe,
                                                 escape_unk=True)

            sources.append(src_str)
            targets.append(target_str)

            if not args.quiet:
                print('S-{}\t{}'.format(sample_id, src_str))
                if has_target:
                    print('T-{}\t{}'.format(sample_id, target_str))

            iteration = 0
            curr_src_str = src_str
            best_fluency_score = fluency_scorer.score_sentence(src_str).item()
            best_hypo_str = ''

            # Boost inference
            while True:
                hypo_tokens_list = []
                hypo_str_list = []
                hypo_fluency_score_list = []

                # Process top predictions
                for i, hypo in enumerate(hypos[:min(len(hypos), args.nbest)]):
                    hypo_tokens, hypo_str, alignment = utils.post_process_prediction(
                        hypo_tokens=hypo['tokens'].int().cpu(),
                        src_str=curr_src_str,
                        alignment=hypo['alignment'].int().cpu()
                        if hypo['alignment'] is not None else None,
                        align_dict=align_dict,
                        tgt_dict=tgt_dict,
                        remove_bpe=args.remove_bpe,
                    )

                    hypo_tokens_list.append(hypo_tokens)
                    hypo_str_list.append(hypo_str)
                    hypo_fluency_score = fluency_scorer.score_sentence(
                        hypo_str).item()
                    hypo_fluency_score_list.append(hypo_fluency_score)

                    if not args.quiet:
                        # print('H-{}\t{}\t{}'.format(sample_id, hypo['score'], hypo_str))
                        print('H-{}\t{}\t{}'.format(sample_id, hypo_str,
                                                    hypo['score']))
                        print('P-{}\t{}'.format(
                            sample_id, ' '.join(
                                map(
                                    lambda x: '{:.4f}'.format(x),
                                    hypo['positional_scores'].tolist(),
                                ))))
                        print('F-{}\t{}'.format(sample_id, hypo_fluency_score))

                        if args.print_alignment:
                            print('A-{}\t{}'.format(
                                sample_id, ' '.join(
                                    map(lambda x: str(utils.item(x)),
                                        alignment))))

                # Compare best scores
                max_fluency_score = max(hypo_fluency_score_list)
                max_idx = hypo_fluency_score_list.index(max_fluency_score)
                max_hypo_str = hypo_str_list[max_idx]
                if max_fluency_score <= best_fluency_score:
                    # Score only the top hypothesis
                    if align_dict is not None or args.remove_bpe is not None:
                        # Convert back to tokens for evaluation with unk replacement and/or without BPE
                        target_tokens = tokenizer.Tokenizer.tokenize(
                            target_str, tgt_dict, add_if_not_exist=True)
                    max_tokens = hypo_tokens_list[max_idx]
                    scorer.add(target_tokens, max_tokens)
                    hypoths.append(max_hypo_str)
                    break
                else:
                    # Keep boosting
                    iteration = iteration + 1
                    curr_src_str = max_hypo_str
                    best_fluency_score = max_fluency_score
                    best_hypo_str = max_hypo_str

            wps_meter.update(src_tokens.size(0))
            t.log({'wps': round(wps_meter.avg)})
            num_sentences += 1

    print(
        '| Translated {} sentences ({} tokens) in {:.1f}s ({:.2f} sentences/s, {:.2f} tokens/s)'
        .format(num_sentences, gen_timer.n, gen_timer.sum,
                num_sentences / gen_timer.sum, 1. / gen_timer.avg))
    if has_target:
        print('| Generate {} with beam={}: {}'.format(args.gen_subset,
                                                      args.beam,
                                                      scorer.result_string()))

    # compute GLEU
    gleu_calculator = GLEU(args.n)
    gleu_calculator.load_text_sources(sources)
    gleu_calculator.load_text_references([targets])
    gleu_scores = gleu_calculator.run_iterations(num_iterations=args.iter,
                                                 hypothesis=hypoths,
                                                 per_sent=args.sent)
    gleu_score = [g for g in gleu_scores][0][0] * 100
    print('| Generate {} with beam={}: GLEU = {:2.2f}'.format(
        args.gen_subset, args.beam, gleu_score))
Exemple #3
0
def main(args):
    assert args.path is not None, '--path required for generation!'
    assert not args.sampling or args.nbest == args.beam, \
        '--sampling requires --nbest to be equal to --beam'
    assert args.replace_unk is None or args.raw_text, \
        '--replace-unk requires a raw text dataset (--raw-text)'

    if args.max_tokens is None and args.max_sentences is None:
        args.max_tokens = 12000
    print(args)

    use_cuda = torch.cuda.is_available() and not args.cpu

    # Load dataset splits
    task = tasks.setup_task(args)
    task.load_dataset(args.gen_subset)
    print('| {} {} {} examples'.format(
        args.data, args.gen_subset, len(task.dataset(args.gen_subset))))

    # Set dictionaries
    src_dict = task.source_dictionary
    tgt_dict = task.target_dictionary

    # Load ensemble
    print('| loading model(s) from {}'.format(args.path))
    models, _ = utils.load_ensemble_for_inference(args.path.split(
        ':'), task, model_arg_overrides=eval(args.model_overrides))

    # Optimize ensemble for generation
    for model in models:
        model.make_generation_fast_(
            beamable_mm_beam_size=None if args.no_beamable_mm else args.beam,
            need_attn=args.print_alignment,
        )
        if args.fp16:
            model.half()

    # Load alignment dictionary for unknown word replacement
    # (None if no unknown word replacement, empty if no path to align dictionary)
    align_dict = utils.load_align_dict(args.replace_unk)

    # Load dataset (possibly sharded)
    itr = task.get_batch_iterator(
        dataset=task.dataset(args.gen_subset),
        max_tokens=args.max_tokens,
        max_sentences=args.max_sentences,
        max_positions=utils.resolve_max_positions(
            task.max_positions(),
            *[model.max_positions() for model in models]
        ),
        ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test,
        required_batch_size_multiple=8,
        num_shards=args.num_shards,
        shard_id=args.shard_id,
    ).next_epoch_itr(shuffle=False)

    # Initialize generator
    gen_timer = StopwatchMeter()
    if args.score_reference:
        translator = SequenceScorer(models, task.target_dictionary)
    else:
        translator = SequenceGenerator(
            models, task.target_dictionary, beam_size=args.beam, minlen=args.min_len,
            stop_early=(not args.no_early_stop), normalize_scores=(not args.unnormalized),
            len_penalty=args.lenpen, unk_penalty=args.unkpen,
            sampling=args.sampling, sampling_topk=args.sampling_topk, sampling_temperature=args.sampling_temperature,
            diverse_beam_groups=args.diverse_beam_groups, diverse_beam_strength=args.diverse_beam_strength,
        )

    if use_cuda:
        translator.cuda()

    # Initialize fluency scorer (and language model)
    fluency_scorer = FluencyScorer(
        args.lang_model_path, args.lang_model_data, use_cpu=False)

    en_filename = os.path.join(args.out_dir, 'errorgen.en')
    gec_filename = os.path.join(args.out_dir, 'errorgen.gec')
    has_target = True
    with progress_bar.build_progress_bar(args, itr) as t, open(en_filename, 'w') as en_file, open(gec_filename, 'w') as gec_file:
        if args.score_reference:
            translations = translator.score_batched_itr(
                t, cuda=use_cuda, timer=gen_timer)
        else:
            translations = translator.generate_batched_itr(
                t, maxlen_a=args.max_len_a, maxlen_b=args.max_len_b,
                cuda=use_cuda, timer=gen_timer, prefix_size=args.prefix_size,
            )

        for sample_id, src_tokens, target_tokens, hypos in translations:
            # Process input and ground truth
            has_target = target_tokens is not None
            target_tokens = target_tokens.int().cpu() if has_target else None

            # Either retrieve the original sentences or regenerate them from tokens.
            if align_dict is not None:
                src_str = task.dataset(
                    args.gen_subset).src.get_original_text(sample_id)
                target_str = task.dataset(
                    args.gen_subset).tgt.get_original_text(sample_id)
            else:
                src_str = src_dict.string(src_tokens, args.remove_bpe)
                if has_target:
                    target_str = tgt_dict.string(
                        target_tokens, args.remove_bpe, escape_unk=True)

            # Only consider sentences with at least four words.
            if len(src_tokens) < 5:
                continue

            # Calculate the fluency score for the source sentence
            source_fluency = fluency_scorer.score_sentence(src_str)

            # Process top predictions
            for i, hypo in enumerate(hypos[:min(len(hypos), args.nbest)]):
                hypo_tokens, hypo_str, alignment = utils.post_process_prediction(
                    hypo_tokens=hypo['tokens'].int().cpu(),
                    src_str=src_str,
                    alignment=hypo['alignment'].int().cpu(
                    ) if hypo['alignment'] is not None else None,
                    align_dict=align_dict,
                    tgt_dict=tgt_dict,
                    remove_bpe=args.remove_bpe,
                )

                # Skip if this is the original sentence.
                if hypo_str == target_str:
                    continue

                # Score the hypothesis.
                hypo_fluency = fluency_scorer.score_sentence(hypo_str)

                # Save the hypothesis if it is sufficiently disfluent.
                if (source_fluency / hypo_fluency) > 1.05:
                    en_file.write('{}\n'.format(hypo_str))
                    gec_file.write('{}\n'.format(src_str))
Exemple #4
0
from fluency_scorer import FluencyScorer
import pickle
from keras.models import load_model
from keras.preprocessing.text import Tokenizer
from keras.preprocessing import sequence
import tensorflow as tf
import numpy as np

# load word2vec embedding
print("word2vec model loading")
word2vec = api.load("word2vec-google-news-300")
print("word2vec model loaded")

# load scorer
p, d = "dependency/wiki103/wiki103.pt", "dependency/wiki103"
fluency_scorer = FluencyScorer(p, d)

# load cnn model
VOCAB_SIZE = 30000
MAX_LEN = 500

model = load_model('model/cnn_model.h5')
with open('model/cnn_model_tokenizer.pickle', 'rb') as f:
    imdb_tokenizer = pickle.load(f)

ana = AnalogyMutator("gender", model=word2vec)
act = ActiveMutator("gender")


def mutate(sentence, epsilon):
    ana_candidates, act_candidates = create_sentence_candidates(
Exemple #5
0
def main(args):
    if args.buffer_size < 1:
        args.buffer_size = 1
    if args.max_tokens is None and args.max_sentences is None:
        args.max_sentences = 1

    assert not args.sampling or args.nbest == args.beam, \
        '--sampling requires --nbest to be equal to --beam'
    assert not args.max_sentences or args.max_sentences <= args.buffer_size, \
        '--max-sentences/--batch-size cannot be larger than --buffer-size'

    print(args)

    use_cuda = torch.cuda.is_available() and not args.cpu

    # Setup task, e.g., translation
    task = tasks.setup_task(args)

    # Load ensemble
    print('| loading model(s) from {}'.format(args.path))
    model_paths = args.path.split(':')
    models, model_args = utils.load_ensemble_for_inference(
        model_paths, task, model_arg_overrides=eval(args.model_overrides))

    # Set dictionaries
    tgt_dict = task.target_dictionary

    # Optimize ensemble for generation
    for model in models:
        model.make_generation_fast_(
            beamable_mm_beam_size=None if args.no_beamable_mm else args.beam,
            need_attn=args.print_alignment,
        )
        if args.fp16:
            model.half()

    # Initialize generator
    translator = SequenceGenerator(
        models,
        tgt_dict,
        beam_size=args.beam,
        minlen=args.min_len,
        stop_early=(not args.no_early_stop),
        normalize_scores=(not args.unnormalized),
        len_penalty=args.lenpen,
        unk_penalty=args.unkpen,
        sampling=args.sampling,
        sampling_topk=args.sampling_topk,
        sampling_temperature=args.sampling_temperature,
        diverse_beam_groups=args.diverse_beam_groups,
        diverse_beam_strength=args.diverse_beam_strength,
    )

    if use_cuda:
        translator.cuda()

    # Load alignment dictionary for unknown word replacement
    # (None if no unknown word replacement, empty if no path to align dictionary)
    align_dict = utils.load_align_dict(args.replace_unk)

    # Initialize fluency scorer (and language model)
    fluency_scorer = FluencyScorer(args.lang_model_path, args.lang_model_data)

    def make_result(src_str, hypos, tgt_str='', iteration=0):
        results = []

        # compute fluency score for source string
        # the source string itself is an entry
        result0 = Correction()
        result0.iteration = iteration
        result0.src_str = result0.hypo_str = src_str
        fluency_scores = fluency_scorer.score_sentence(src_str).item()
        result0.fluency_scores = fluency_scores
        result0.fluency_scores_str = "Fluency Score: {:0.4f}".format(
            fluency_scores)
        results.append(result0)

        # Process top predictions
        for hypo in hypos[:min(len(hypos), args.nbest)]:
            result = Correction()
            result.iteration = iteration + 1
            result.src_str = src_str

            hypo_tokens, hypo_str, alignment = utils.post_process_prediction(
                hypo_tokens=hypo['tokens'].int().cpu(),
                src_str=src_str,
                alignment=hypo['alignment'].int().cpu()
                if hypo['alignment'] is not None else None,
                align_dict=align_dict,
                tgt_dict=tgt_dict,
                remove_bpe=args.remove_bpe,
            )
            # result.hypos.append('H\t{}\t{}'.format(hypo['score'], hypo_str))
            result.hypo_str = hypo_str
            result.hypo_score = result.hypo_score_str = hypo['score']
            result.pos_scores_str = 'P\t{}'.format(' '.join(
                map(
                    lambda x: '{:.4f}'.format(x),
                    hypo['positional_scores'].tolist(),
                )))
            result.alignments_str = ('A\t{}'.format(' '.join(
                map(lambda x: str(utils.item(x)), alignment)))
                                     if args.print_alignment else None)

            # compute GLEU if target is provided
            if tgt_str:
                gleu_calculator = GLEU(args.n)
                gleu_calculator.load_text_sources([src_str])
                gleu_calculator.load_text_references([[tgt_str]])
                gleu_scores = gleu_calculator.run_iterations(
                    num_iterations=args.iter,
                    hypothesis=[hypo_str],
                    per_sent=args.sent)
                gleu_score = [g for g in gleu_scores][0][0] * 100
                result.gleu_scores = gleu_score
                result.gleu_scores_str = 'GLEU {:2.2f}'.format(gleu_score)
            else:
                result.gleu_scores_str = 'GLEU N/A (no target was provided. use format "source sentence|target setence" to provide a target/reference)'

            # compute fluency score
            fluency_scores = fluency_scorer.score_sentence(hypo_str).item()
            result.fluency_scores = fluency_scores
            result.fluency_scores_str = "Fluency Score: {:0.4f}".format(
                fluency_scores)

            results.append(result)

        return results

    def process_batch(batch, tgts, iteration):
        tokens = batch.tokens
        lengths = batch.lengths

        if use_cuda:
            tokens = tokens.cuda()
            lengths = lengths.cuda()

        encoder_input = {'src_tokens': tokens, 'src_lengths': lengths}
        translations = translator.generate(
            encoder_input,
            maxlen=int(args.max_len_a * tokens.size(1) + args.max_len_b),
        )

        return [
            make_result(batch.srcs[i], t, tgts[i], iteration)
            for i, t in enumerate(translations)
        ]

    max_positions = utils.resolve_max_positions(
        task.max_positions(), *[model.max_positions() for model in models])

    if not args.server:
        listen_to_stdin(args, max_positions, task, process_batch)
    else:
        listen_to_web(args, max_positions, task, process_batch)