one_model = config['one_model']
    log('Build lookup table, bi-directional encoder and decoder ... ', nl=False)

    trans = Translate(**config)
    # transpose all the input matrix into shape (sent_len * batch_size)
    if config['use_mv']:
        trans.apply(source.T, source_mask.T, target.T,
                    target_mask.T, v_part, v_true)
    else:
        trans.apply(source.T, source_mask.T, target.T, target_mask.T)
    log('Done\n')

    if config['reload']:
        log('Reload model {}'.format(config['one_model']))
        trans.load(one_model)

    # actually the average cross entropy (cost) per sentence in a batch
    cost = trans.mean_cost
    log_norm = trans.mean_abs_log_norm
    params = trans.params

    # print all parameters in this rnn search
    for value in params:
        log('\t{:15}: {}'.format(value.get_value().shape, value.name))

    log('Build grad ... ', nl=False)
    grade = T.grad(cost, params)
    # add step clipping, L2-norm of grade to prevent over-fitting, make
    # gradients (update) smaller, model simpler
    if config['step_clipping'] > 0.:
Beispiel #2
0
    params = trans.params
    print params[0].get_value().sum()

    logger.info("begin to build sample model : f_init, f_next")
    f_init, f_next = trans.build_sample()
    logger.info("end build sample model : f_init, f_next")

    src_vocab = pickle.load(open(config["src_vocab"]))
    trg_vocab = pickle.load(open(config["trg_vocab"]))
    src_vocab = ensure_special_tokens(
        src_vocab, bos_idx=0, eos_idx=config["src_vocab_size"] - 1, unk_idx=config["unk_id"]
    )
    trg_vocab = ensure_special_tokens(
        trg_vocab, bos_idx=0, eos_idx=config["src_vocab_size"] - 1, unk_idx=config["unk_id"]
    )
    trg_vocab_reverse = {index: word for word, index in trg_vocab.iteritems()}
    src_vocab_reverse = {index: word for word, index in src_vocab.iteritems()}
    logger.info("load dict finished ! src dic size : {} trg dic size : {}.".format(len(src_vocab), len(trg_vocab)))

    # val_set=sys.argv[1]
    # config['val_set']=val_set
    dev_stream = get_dev_stream(**config)
    logger.info("start training!!!")
    trans.load(config["saveto"] + "/params.npz")

    val_save_file = open("trans", "w")
    data_iter = dev_stream.get_epoch_iterator()
    trans = multi_process_sample(data_iter, f_init, f_next, k=10, vocab=trg_vocab_reverse, process=1, normalize=False)
    val_save_file.writelines(trans)
    val_save_file.close()
Beispiel #3
0
    #   {1: '<UNK>', 0: '<S>', trg_vocab_size-1: '</S>', 5: 'is', ...}
    debug('\t~done source vocab count: {}, target vocab count: {}'.format(
        len(src_vocab), len(trg_vocab)))

    lm = kenlm.Model(lmpath) if (lmpath and search_mode == 3) else None

    val_prefix = config['val_prefix']
    config['val_prefix'] = valid_set
    config['val_set'] = config['val_tst_dir'] + config['val_prefix'] + '.src'
    if config['val_prefix'] == val_prefix:
        dev_stream = get_dev_stream(**config)
    else:
        dev_stream = get_tst_stream(**config)
    if args.use_valid:
        _log('start decoding ... {}'.format(config['val_set']))
    trans.load(model_name)  # this is change all weights of nmt, importance!!!
    np_params = trans.load2numpy(model_name)
    '''
    for np_param in np_params.files:
        print type(np_param)
        print np_param
    '''
    params = trans.params
    # _log('Weights in model {}'.format(model_name))
    # for shared_var in params:
    #    _log('{} : {} {} {}'.format(shared_var.name, shared_var.get_value().sum(),
    #                                       type(shared_var), type(shared_var.get_value())))

    translator = Translator(
        fs=fs,
        switchs=switchs,