def load_options(model_name):
    # load model model_options
    with open('%s.pkl' % model_name, 'rb') as f:
        options = DefaultOptions.copy()
        options.update(pkl.load(f))

        print 'Options:'
        pprint(options)

    return options
Exemple #2
0
def load_options_test(model_name):
    # load model model_options
    with open('%s.pkl' % model_name, 'rb') as f:
        options = DefaultOptions.copy()
        options.update(pkl.load(f))
        if 'fix_dp_bug' not in options:
            options['fix_dp_bug'] = False
        print('Options:')
        pprint(options)

    return options
Exemple #3
0
def test(model_name, beam_size, reload_=True, Hard = False, k = -1, type = None):
    print(1)
    with open('%s.pkl' % model_name, 'rb') as f:
        model_options = DefaultOptions.copy()
        model_options.update(pkl.load(f))
    #model_options['temperature'] = 1.0
    #model_options['scale'] = 1.0
    #model_options['gate_dropout'] = 0.0
    #model_options['fix_dp_bug'] = False
    print(2)
    model = NMTModel(model_options)
    #model.O['small_train_datasets'] = (r'\\GCR\Scratch\RR1\dihe\stochastic_lstm\data\test\test.de-en.bpe.25000.de', r'\\GCR\Scratch\RR1\dihe\stochastic_lstm\data\test\test.de-en.bpe.25000.en',) + (r'\\GCR\Scratch\RR1\dihe\stochastic_lstm\data\test\test.de-en.en',)
    print(3)
    params = model.initializer.init_params()
    print(4)
    params = load_params_v2(model_name, params, k, type)
    print(5)
    model.init_tparams(params)
    print(6)
    print(model_options)
    check_options(model_options)

    trng, use_noise, stochastic_mode, hyper_param,\
        x, x_mask, y, y_mask, \
        opt_ret, \
        cost, test_cost, x_emb, stochastic_updates, _ = model.build_model()

    print 'Building sampler'
    f_init, f_next = model.build_sampler(trng=trng, use_noise=use_noise, batch_mode=True, stochastic_mode=stochastic_mode, hyper_param=hyper_param)

    uidx = search_start_uidx(reload_, model_name)

    if Hard:
        stochastic_mode.set_value(2)
        bleu_hard = translate_dev_get_bleu(model, f_init, f_next, trng, use_noise, beam_size, alpha)
        message('{} {} BLEU = {:.2f} at uidx {} beam_size = {}'.format(type, k, bleu_hard, uidx, beam_size))
        sys.stdout.flush()
        bleu_soft = 0.0
    else:
        stochastic_mode.set_value(0)
        alpha = 1.0
        while True:
          bleu_soft = translate_dev_get_bleu(model, f_init, f_next, trng, use_noise, beam_size, alpha)
          message('{} {} {} BLEU = {:.2f} at uidx {} beam_size = {}'.format(type, k, alpha, bleu_soft, uidx, beam_size))
          sys.stdout.flush()
          break
          
        bleu_hard = 0.0

    return (bleu_soft, bleu_hard)
Exemple #4
0
def main(model,
         dictionary,
         dictionary_target,
         source_file,
         saveto,
         k=5,
         normalize=False,
         n_process=5,
         chr_level=False):
    # load model model_options
    with open('%s.pkl' % model, 'rb') as f:
        options = DefaultOptions.copy()
        options.update(pkl.load(f))

        print 'Options:'
        pprint(options)

    word_dict, word_idict, word_idict_trg = load_translate_data(
        dictionary,
        dictionary_target,
        source_file,
        batch_mode=False,
        chr_level=chr_level,
        load_input=False,
        echo=False,
    )

    # create input and output queues for processes
    queue = Queue()
    rqueue = Queue()
    processes = [None] * n_process
    for midx in xrange(n_process):
        processes[midx] = Process(target=translate_model,
                                  args=(queue, rqueue, midx, model, options, k,
                                        normalize))
        processes[midx].start()

    # utility function
    def _send_jobs(fname):
        with open(fname, 'r') as f:
            for idx, line in enumerate(f):
                if chr_level:
                    words = list(line.decode('utf-8').strip())
                else:
                    words = line.strip().split()
                x = map(lambda w: word_dict[w] if w in word_dict else 1, words)
                x = map(lambda ii: ii if ii < options['n_words_src'] else 1, x)
                x += [0]
                queue.put((idx, x))
        return idx + 1

    def _finish_processes():
        for midx in xrange(n_process):
            queue.put(None)

    def _retrieve_jobs(n_samples):
        trans = [None] * n_samples
        for idx in xrange(n_samples):
            resp = rqueue.get()
            trans[resp[0]] = resp[1]
            if np.mod(idx, 10) == 0:
                print 'Sample ', (idx + 1), '/', n_samples, ' Done'
        return trans

    print 'Translating ', source_file, '...'
    n_samples = _send_jobs(source_file)
    trans = seqs2words(_retrieve_jobs(n_samples), word_idict_trg)
    _finish_processes()
    with open(saveto, 'w') as f:
        print >> f, '\n'.join(trans)
    print 'Done'
def build_regression(args, top_options):
    """The main function to build the regression.

    :param args: Options from the argument parser
    :param top_options: Options from top-level (like options in train_nmt.py)
    """

    # Initialize and load options.
    old_options = DefaultOptions.copy()
    old_options.update(top_options)
    load_options(old_options)

    # Initialize options of new model.
    new_options = old_options.copy()
    new_options['n_encoder_layers'] = args.n_encoder_layers
    new_options['n_decoder_layers'] = args.n_decoder_layers
    new_options['encoder_many_bidirectional'] = args.connection_type == 1
    new_options['unit'] = args.unit
    new_options['attention_layer_id'] = args.attention_layer_id
    new_options['residual_enc'] = args.residual_enc
    new_options['residual_dec'] = args.residual_dec
    new_options['use_zigzag'] = args.use_zigzag

    only_encoder = new_options['n_decoder_layers'] == old_options['n_decoder_layers']

    # Old and new models.
    old_model = NMTModel(old_options)
    new_model = NMTModel(new_options)

    print('Loading data...', end='')
    text_iterator = TextIterator(
        old_options['datasets'][0],
        old_options['datasets'][1],
        old_options['vocab_filenames'][0],
        old_options['vocab_filenames'][1],
        old_options['batch_size'],
        old_options['maxlen'],
        old_options['n_words_src'],
        old_options['n_words'],
    )

    small_train_iterator = TextIterator(
        old_options['small_train_datasets'][0],
        old_options['small_train_datasets'][1],
        old_options['vocab_filenames'][0],
        old_options['vocab_filenames'][1],
        old_options['batch_size'],
        old_options['maxlen'],
        old_options['n_words_src'],
        old_options['n_words'],
    )

    valid_text_iterator = TextIterator(
        old_options['valid_datasets'][0],
        old_options['valid_datasets'][1],
        old_options['vocab_filenames'][0],
        old_options['vocab_filenames'][1],
        old_options['valid_batch_size'],
        old_options['maxlen'],
        old_options['n_words_src'],
        old_options['n_words'],
    )
    print('Done')

    if only_encoder:
        f_initialize = ParameterInitializer.init_input_to_context
    else:
        f_initialize = ParameterInitializer.init_input_to_decoder_context

    # Initialize and reload model parameters.
    print('Initializing and reloading model parameters...', end='')
    f_initialize(old_model.initializer, old_model.P, reload_=True)
    f_initialize(
        new_model.initializer, new_model.P,
        reload_=args.warm_start_file is not None,
        preload=args.warm_start_file,
    )
    print('Done')

    # Build model.
    if only_encoder:
        print('Building model...', end='')
        input_, context_old, _ = old_model.input_to_context()
        x, x_mask, y, y_mask = input_
        _, context_new, _ = new_model.input_to_context(input_)
        print('Done')

        # Build output and MSE loss.
        f_context_old, f_context_new, loss, f_loss = build_loss(x, x_mask, context_old, context_new, args)
    else:
        print('Building model...', end='')
        input_, hidden_decoder_old, context_decoder_old = old_model.input_to_decoder_context()
        x, x_mask, y, y_mask = input_
        _, hidden_decoder_new, context_decoder_new = new_model.input_to_decoder_context(input_)
        print('Done')

        f_context_old, f_context_new, loss, f_loss = build_decoder_loss(
            x, x_mask, y, y_mask,
            hidden_decoder_old, hidden_decoder_new, context_decoder_old, context_decoder_new, args)

    # Compute gradient.
    print('Computing gradient...', end='')
    trainable_parameters = new_model.P.copy()
    if args.fix_embedding:
        print('Fix word embedding!')
        del trainable_parameters['Wemb']

        if not only_encoder:
            del trainable_parameters['Wemb_dec']

    # Build L2 regularization.
    l2_regularization(loss, trainable_parameters, args.decay_c)

    grads = T.grad(loss, wrt=itemlist(trainable_parameters))

    # Apply gradient clipping.
    _, g2 = apply_gradient_clipping(args.clip_c, grads)
    print('Done')

    # Build optimizer.
    inputs = [x, x_mask] if only_encoder else [x, x_mask, y, y_mask]

    print('Building optimizers...', end='')
    lr = T.scalar(name='lr')
    f_grad_shared, f_update, _ = Optimizers[args.regression_optimizer](
        lr, trainable_parameters, grads, inputs, loss, g2=g2)
    print('Done')

    print('Optimization')
    start_time = time.time()
    iteration = 0
    estop = False

    if args.dump_before_train:
        print('Dumping before train...', end='')
        new_model.save_whole_model(args.model_file, iteration)
        print('Done')

    # Validate before train
    new_model.save_whole_model(args.model_file, iteration=-1)
    best_val_cost = validate(valid_text_iterator, small_train_iterator, f_loss, only_encoder, top_options['maxlen'])

    learning_rate = args.learning_rate

    for epoch in xrange(args.max_epoch):
        n_samples = 0

        for i, (x, y) in enumerate(text_iterator):
            n_samples += len(x)
            iteration += 1

            x, x_mask, y, y_mask = prepare_data(x, y, maxlen=top_options['maxlen'])

            if x is None:
                print('Minibatch with zero sample under length ', top_options['maxlen'])
                iteration -= 1
                continue

            inputs = [x, x_mask] if only_encoder else [x, x_mask, y, y_mask]

            if args.debug:
                print('Cost before train: {}'.format(f_loss(*inputs)))

            # Train!
            cost, g2_value = f_grad_shared(*inputs)
            f_update(learning_rate)

            if args.debug:
                print('Cost after train: {}'.format(f_loss(*inputs)))

            if np.isnan(cost) or np.isinf(cost):
                print('NaN detected')

                learning_rate *= 0.5
                print('Discount learning rate to {}'.format(learning_rate))

                print('Reloading best model {}...'.format(args.model_file), end='')
                new_model.load_whole_model(args.model_file, iteration=-1)
                print('Done')

                print('Training restart')
                continue

            # verbose
            if np.mod(iteration, args.disp_freq) == 0:
                print('Epoch {} Update {} Cost {:.6f} Time {:.6f}min'.format(
                    epoch, iteration, float(cost), (time.time() - start_time) / 60.0,
                ))
                if True:
                    print('G2 value: {:.6f}'.format(float(g2_value)))
                sys.stdout.flush()

            if args.save_freq > 0 and np.mod(iteration, args.save_freq) == 0:
                new_model.save_whole_model(args.model_file, iteration)

            if np.mod(iteration, args.valid_freq) == 0:
                curr_val_cost = validate(valid_text_iterator, small_train_iterator, f_loss,
                                         only_encoder, top_options['maxlen'])
                if curr_val_cost < best_val_cost:
                    best_val_cost = curr_val_cost
                    new_model.save_whole_model(args.model_file, iteration=-1)

                if args.debug and args.dump_hidden is not None:
                    print('Dumping input and hidden state to {}...'.format(args.dump_hidden), end='')
                    np.savez(
                        args.dump_hidden,
                        x=x, x_mask=x_mask,
                        y=y, y_mask=y_mask,
                        hidden_old=f_context_old(*inputs),
                        hidden_new=f_context_new(*inputs),
                    )
                    print('Done')

            if args.discount_lr_freq > 0 and np.mod(iteration, args.discount_lr_freq) == 0:
                learning_rate *= 0.5
                print('Discount learning rate to {}'.format(learning_rate))

            # finish after this many updates
            if iteration >= args.finish_after:
                print ('Finishing after {} iterations!'.format(iteration))
                estop = True
                break

        print('Seen {} samples'.format(n_samples))

        if estop:
            break

    return 0.
def main(model,
         dictionary,
         dictionary_target,
         source_file,
         saveto,
         k=5,
         normalize=False,
         chr_level=False,
         batch_size=-1,
         args=None):
    batch_mode = batch_size > 0

    # load model model_options
    option_file = '%s.pkl' % model
    if not os.path.exists(option_file):
        m = re.search("iter(\d+)\.npz", model)
        if m:
            uidx = int(m.group((1)))
            option_file = '%s.iter%d.npz.pkl' % (os.path.splitext(model)[0],
                                                 uidx)
    assert os.path.exists(option_file)

    with open(option_file, 'rb') as f:
        options = DefaultOptions.copy()
        options.update(pkl.load(f))

        if 'fix_dp_bug' not in options:
            options['fix_dp_bug'] = False
        print 'Options:'
        pprint(options)

    from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
    trng = RandomStreams(1234)
    use_noise = theano.shared(np.float32(0.))

    model_type = 'NMTModel'
    if args.trg_attention:
        model_type = 'TrgAttnNMTModel'

    model, _ = build_and_init_model(model,
                                    options=options,
                                    build=False,
                                    model_type=model_type)

    f_init, f_next = model.build_sampler(trng=trng,
                                         use_noise=use_noise,
                                         batch_mode=batch_mode,
                                         dropout=options['use_dropout'])

    if not batch_mode:
        word_dict, word_idict, word_idict_trg, input_ = load_translate_data(
            dictionary,
            dictionary_target,
            source_file,
            batch_mode=False,
            chr_level=chr_level,
            options=options,
        )

        print 'Translating ', source_file, '...'
        trans = seqs2words(
            translate(input_, model, f_init, f_next, trng, k, normalize),
            word_idict_trg,
        )
    else:
        word_dict, word_idict, word_idict_trg, all_src_blocks, m_block = load_translate_data(
            dictionary,
            dictionary_target,
            source_file,
            batch_mode=True,
            chr_level=chr_level,
            n_words_src=options['n_words_src'],
            batch_size=batch_size,
        )

        print 'Translating ', source_file, '...'
        all_sample = []
        for bidx, seqs in enumerate(all_src_blocks):
            all_sample.extend(
                translate_block(seqs, model, f_init, f_next, trng, k))
            print bidx, '/', m_block, 'Done'

        trans = seqs2words(all_sample, word_idict_trg)

    with open(saveto, 'w') as f:
        print >> f, '\n'.join(trans)
    print 'Done'
parser.add_argument('model_prefix', help='model file prefix.')
parser.add_argument('start', type=int, help='start iteration number.')
parser.add_argument('end', type=int, help='end iteration number')
parser.add_argument('gap',
                    type=int,
                    default=10000,
                    help='the gap between each saved model.')

args = parser.parse_args()

num_of_model = args.end - args.start + 1

option_file = '%s.iter%d.npz.pkl' % (os.path.splitext(
    args.model_prefix)[0], args.start * args.gap)
with open(option_file, 'rb') as f:
    options = DefaultOptions.copy()
    options.update(pkl.load(f))

    if 'fix_dp_bug' not in options:
        options['fix_dp_bug'] = False

    # pprint(options)

model = NMTModel(options)

from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
trng = RandomStreams(1234)
use_noise = theano.shared(np.float32(0.))

print('initialize the model.')
params = model.initializer.init_params()