Python prfx Examples

Programming Language: Python

Namespace/Package Name: utils

Method/Function: prfx

Examples at hotexamples.com: 8

Python prfx - 8 examples found. These are the top rated real world Python examples of utils.prfx extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: layers.py Project: afcarl/nmt

def param_init_fflayer(options, params, prefix='ff', nin=None, nout=None, ortho=True):
    if nin is None:
        nin = options['dim_proj']
    if nout is None:
        nout = options['dim_proj']
    params[prfx(prefix,'W')] = norm_weight(nin, nout, scale=0.01, ortho=ortho)
    params[prfx(prefix,'b')] = numpy.zeros((nout,)).astype('float32')

    return params

Example #2

Show file

File: layers.py Project: afcarl/nmt

def gru_layer(tparams, state_below, options, prefix='gru', mask=None, **kwargs):
    nsteps = state_below.shape[0]
    if state_below.ndim == 3:
        n_samples = state_below.shape[1]
    else:
        n_samples = 1

    dim = tparams[prfx(prefix,'Ux')].shape[1]

    if mask == None:
        mask = tensor.alloc(1., state_below.shape[0], 1)

    def _slice(_x, n, dim):
        if _x.ndim == 3:
            return _x[:, :, n*dim:(n+1)*dim]
        return _x[:, n*dim:(n+1)*dim]

    state_below_ = tensor.dot(state_below, tparams[prfx(prefix, 'W')]) + tparams[prfx(prefix, 'b')]
    state_belowx = tensor.dot(state_below, tparams[prfx(prefix, 'Wx')]) + tparams[prfx(prefix, 'bx')]
    U = tparams[prfx(prefix, 'U')]
    Ux = tparams[prfx(prefix, 'Ux')]

    def _step_slice(m_, x_, xx_, h_, U, Ux):
        preact = tensor.dot(h_, U)
        preact += x_

        r = tensor.nnet.sigmoid(_slice(preact, 0, dim))
        u = tensor.nnet.sigmoid(_slice(preact, 1, dim))

        preactx = tensor.dot(h_, Ux)
        preactx = preactx * r
        preactx = preactx + xx_

        h = tensor.tanh(preactx)

        h = u * h_ + (1. - u) * h
        h = m_[:,None] * h + (1. - m_)[:,None] * h_

        return h

    seqs = [mask, state_below_, state_belowx]
    _step = _step_slice

    rval, updates = theano.scan(_step,
                                sequences=seqs,
                                outputs_info = [tensor.alloc(0., n_samples, dim)],
                                non_sequences = [tparams[prfx(prefix, 'U')],
                                                 tparams[prfx(prefix, 'Ux')]],
                                name=prfx(prefix, '_layers'),
                                n_steps=nsteps,
                                profile=profile,
                                strict=True)
    rval = [rval]
    return rval

Example #3

Show file

File: layers.py Project: afcarl/nmt

def param_init_gru_cond(options, params, prefix='gru_cond',
                        nin=None, dim=None, dimctx=None):
    if nin is None:
        nin = options['dim']
    if dim is None:
        dim = options['dim']
    if dimctx is None:
        dimctx = options['dim']

    params = param_init_gru(options, params, prefix, nin=nin, dim=dim)

    # context to LSTM
    Wc = norm_weight(dimctx,dim*2)
    params[prfx(prefix,'Wc')] = Wc

    Wcx = norm_weight(dimctx,dim)
    params[prfx(prefix,'Wcx')] = Wcx

    # attention: prev -> hidden
    Wi_att = norm_weight(nin,dimctx)
    params[prfx(prefix,'Wi_att')] = Wi_att

    # attention: context -> hidden
    Wc_att = norm_weight(dimctx)
    params[prfx(prefix,'Wc_att')] = Wc_att

    # attention: LSTM -> hidden
    Wd_att = norm_weight(dim,dimctx)
    params[prfx(prefix,'Wd_att')] = Wd_att

    # attention: hidden bias
    b_att = numpy.zeros((dimctx,)).astype('float32')
    params[prfx(prefix,'b_att')] = b_att

    # attention:
    U_att = norm_weight(dimctx,1)
    params[prfx(prefix,'U_att')] = U_att
    c_att = numpy.zeros((1,)).astype('float32')
    params[prfx(prefix, 'c_tt')] = c_att

    return params

Example #4

Show file

File: layers.py Project: afcarl/nmt

def param_init_gru(options, params, prefix='gru', nin=None, dim=None, hiero=False):
    if nin == None:
        nin = options['dim_proj']
    if dim == None:
        dim = options['dim_proj']
    if not hiero:
        W = numpy.concatenate([norm_weight(nin,dim),
                               norm_weight(nin,dim)], axis=1)
        params[prfx(prefix,'W')] = W
        params[prfx(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32')
    U = numpy.concatenate([ortho_weight(dim),
                           ortho_weight(dim)], axis=1)
    params[prfx(prefix,'U')] = U

    Wx = norm_weight(nin, dim)
    params[prfx(prefix,'Wx')] = Wx
    Ux = ortho_weight(dim)
    params[prfx(prefix,'Ux')] = Ux
    params[prfx(prefix,'bx')] = numpy.zeros((dim,)).astype('float32')

    return params

Example #5

Show file

File: attentive_reader.py Project: tinyloop/Attentive_reader

def train(
        dim_word_desc=400,  # word vector dimensionality
        dim_word_q=400,
        dim_word_ans=600,
        dim_proj=300,
        dim=400,  # the number of LSTM units
        encoder_desc='lstm',
        encoder_desc_word='lstm',
        encoder_desc_sent='lstm',
        use_dq_sims=False,
        eyem=None,
        learn_h0=False,
        use_desc_skip_c_g=False,
        debug=False,
        encoder_q='lstm',
        patience=10,
        max_epochs=5000,
        dispFreq=100,
        decay_c=0.,
        alpha_c=0.,
        clip_c=-1.,
        lrate=0.01,
        n_words_q=49145,
        n_words_desc=115425,
        n_words_ans=409,
        pkl_train_files=None,
        pkl_valid_files=None,
        maxlen=2000,  # maximum length of the description
        optimizer='rmsprop',
        batch_size=2,
        vocab=None,
        valid_batch_size=16,
        use_elu_g=False,
        saveto='model.npz',
        model_dir=None,
        ms_nlayers=3,
        validFreq=1000,
        saveFreq=1000,  # save the parameters after every saveFreq updates
        datasets=[None],
        truncate=400,
        momentum=0.9,
        use_bidir=False,
        cost_mask=None,
        valid_datasets=[
            '/u/yyu/stor/caglar/rc-data/cnn/cnn_test_data.h5',
            '/u/yyu/stor/caglar/rc-data/cnn/cnn_valid_data.h5'
        ],
        dropout_rate=0.5,
        use_dropout=True,
        reload_=True,
        **opt_ds):

    ensure_dir_exists(model_dir)
    mpath = os.path.join(model_dir, saveto)
    mpath_best = os.path.join(model_dir, prfx("best", saveto))
    mpath_last = os.path.join(model_dir, prfx("last", saveto))
    mpath_stats = os.path.join(model_dir, prfx("stats", saveto))

    # Model options
    model_options = locals().copy()
    model_options['use_sent_reps'] = opt_ds['use_sent_reps']
    stats = defaultdict(list)

    del model_options['eyem']
    del model_options['cost_mask']

    if cost_mask is not None:
        cost_mask = sharedX(cost_mask)

    # reload options and parameters
    if reload_:
        print "Reloading the model."
        if os.path.exists(mpath_best):
            print "Reloading the best model from %s." % mpath_best
            with open(os.path.join(mpath_best, '%s.pkl' % mpath_best),
                      'rb') as f:
                models_options = pkl.load(f)
            params = init_params(model_options)
            params = load_params(mpath_best, params)
        elif os.path.exists(mpath):
            print "Reloading the model from %s." % mpath
            with open(os.path.join(mpath, '%s.pkl' % mpath), 'rb') as f:
                models_options = pkl.load(f)
            params = init_params(model_options)
            params = load_params(mpath, params)
        else:
            raise IOError("Couldn't open the file.")
    else:
        print "Couldn't reload the models initializing from scratch."
        params = init_params(model_options)

    if datasets[0]:
        print "Short dataset", datasets[0]

    print 'Loading data'
    print 'Building model'
    if pkl_train_files is None or pkl_valid_files is None:
        train, valid, test = load_data(path=datasets[0],
                                       valid_path=valid_datasets[0],
                                       test_path=valid_datasets[1],
                                       batch_size=batch_size,
                                       **opt_ds)
    else:
        train, valid, test = load_pkl_data(train_file_paths=pkl_train_files,
                                           valid_file_paths=pkl_valid_files,
                                           batch_size=batch_size,
                                           vocab=vocab,
                                           eyem=eyem,
                                           **opt_ds)

    tparams = init_tparams(params)
    trng, use_noise, inps_d, \
                     opt_ret, \
                     cost, errors, ent_errors, ent_derrors, probs = \
                        build_model(tparams,
                                    model_options,
                                    prepare_data if not opt_ds['use_sent_reps'] \
                                            else prepare_data_sents,
                                    valid,
                                    cost_mask=cost_mask)

    alphas = opt_ret['dec_alphas']

    if opt_ds['use_sent_reps']:
        inps = [inps_d["desc"], \
                inps_d["word_mask"], \
                inps_d["q"], \
                inps_d['q_mask'], \
                inps_d['ans'], \
                inps_d['wlen'],
                inps_d['slen'], inps_d['qlen'],\
                inps_d['ent_mask']
                ]
    else:
        inps = [inps_d["desc"], \
                inps_d["word_mask"], \
                inps_d["q"], \
                inps_d['q_mask'], \
                inps_d['ans'], \
                inps_d['wlen'], \
                inps_d['qlen'], \
                inps_d['ent_mask']]

    outs = [cost, errors, probs, alphas]
    if ent_errors:
        outs += [ent_errors]

    if ent_derrors:
        outs += [ent_derrors]

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, outs, profile=profile)
    print 'Done'

    # Apply weight decay on the feed-forward connections
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.

        for kk, vv in tparams.iteritems():
            if "logit" in kk or "ff" in kk:
                weight_decay += (vv**2).sum()

        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Computing gradient...',
    grads = safe_grad(cost, itemlist(tparams))
    print 'Done'

    # Gradient clipping:
    if clip_c > 0.:
        g2 = get_norms(grads)
        for p, g in grads.iteritems():
            grads[p] = tensor.switch(g2 > (clip_c**2),
                                     (g / tensor.sqrt(g2 + 1e-8)) * clip_c, g)
    inps.pop()
    if optimizer.lower() == "adasecant":
        learning_rule = Adasecant(delta_clip=25.0,
                                  use_adagrad=True,
                                  grad_clip=0.25,
                                  gamma_clip=0.)
    elif optimizer.lower() == "rmsprop":
        learning_rule = RMSPropMomentum(init_momentum=momentum)
    elif optimizer.lower() == "adam":
        learning_rule = Adam()
    elif optimizer.lower() == "adadelta":
        learning_rule = AdaDelta()

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    learning_rule = None

    if learning_rule:
        f_grad_shared, f_update = learning_rule.get_funcs(learning_rate=lr,
                                                          grads=grads,
                                                          inp=inps,
                                                          cost=cost,
                                                          errors=errors)
    else:
        f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps,
                                                  cost, errors)

    print 'Done'
    print 'Optimization'
    history_errs = []
    # reload history
    if reload_ and os.path.exists(mpath):
        history_errs = list(numpy.load(mpath)['history_errs'])

    best_p = None
    bad_count = 0

    if validFreq == -1:
        validFreq = len(train[0]) / batch_size

    if saveFreq == -1:
        saveFreq = len(train[0]) / batch_size

    best_found = False
    uidx = 0
    estop = False

    train_cost_ave, train_err_ave, \
            train_gnorm_ave = reset_train_vals()

    for eidx in xrange(max_epochs):
        n_samples = 0

        if train.done:
            train.reset()

        for d_, q_, a, em in train:
            n_samples += len(a)
            uidx += 1
            use_noise.set_value(1.)

            if opt_ds['use_sent_reps']:
                # To mask the description and the question.
                d, d_mask, q, q_mask, dlen, slen, qlen = prepare_data_sents(
                    d_, q_)

                if d is None:
                    print 'Minibatch with zero sample under length ', maxlen
                    uidx -= 1
                    continue

                ud_start = time.time()
                cost, errors, gnorm, pnorm = f_grad_shared(
                    d, d_mask, q, q_mask, a, dlen, slen, qlen)
            else:
                d, d_mask, q, q_mask, dlen, qlen = prepare_data(d_, q_)

                if d is None:
                    print 'Minibatch with zero sample under length ', maxlen
                    uidx -= 1
                    continue

                ud_start = time.time()
                cost, errors, gnorm, pnorm = f_grad_shared(
                    d, d_mask, q, q_mask, a, dlen, qlen)

            upnorm = f_update(lrate)
            ud = time.time() - ud_start

            # Collect the running ave train stats.
            train_cost_ave = running_ave(train_cost_ave, cost)
            train_err_ave = running_ave(train_err_ave, errors)
            train_gnorm_ave = running_ave(train_gnorm_ave, gnorm)

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                import ipdb
                ipdb.set_trace()

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, ' Update ', uidx, \
                        ' Cost ', cost, ' UD ', ud, \
                        ' UpNorm ', upnorm[0].tolist(), \
                        ' GNorm ', gnorm, \
                        ' Pnorm ', pnorm, 'Terrors ', errors

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',
                if best_p is not None and best_found:
                    numpy.savez(mpath_best,
                                history_errs=history_errs,
                                **best_p)
                    pkl.dump(model_options, open('%s.pkl' % mpath_best, 'wb'))
                else:
                    params = unzip(tparams)

                numpy.savez(mpath, history_errs=history_errs, **params)
                pkl.dump(model_options, open('%s.pkl' % mpath, 'wb'))
                pkl.dump(stats, open("%s.pkl" % mpath_stats, 'wb'))

                print 'Done'
                print_param_norms(tparams)

            if numpy.mod(uidx, validFreq) == 0:
                use_noise.set_value(0.)
                if valid.done:
                    valid.reset()

                valid_costs, valid_errs, valid_probs, \
                        valid_alphas, error_ent, error_dent = eval_model(f_log_probs,
                                                  prepare_data if not opt_ds['use_sent_reps'] \
                                                    else prepare_data_sents,
                                                  model_options,
                                                  valid,
                                                  use_sent_rep=opt_ds['use_sent_reps'])

                valid_alphas_ = numpy.concatenate(
                    [va.argmax(0) for va in valid_alphas.tolist()], axis=0)
                valid_err = valid_errs.mean()
                valid_cost = valid_costs.mean()
                valid_alpha_ent = -negentropy(valid_alphas)

                mean_valid_alphas = valid_alphas_.mean()
                std_valid_alphas = valid_alphas_.std()

                mean_valid_probs = valid_probs.argmax(1).mean()
                std_valid_probs = valid_probs.argmax(1).std()

                history_errs.append([valid_cost, valid_err])

                stats['train_err_ave'].append(train_err_ave)
                stats['train_cost_ave'].append(train_cost_ave)
                stats['train_gnorm_ave'].append(train_gnorm_ave)

                stats['valid_errs'].append(valid_err)
                stats['valid_costs'].append(valid_cost)
                stats['valid_err_ent'].append(error_ent)
                stats['valid_err_desc_ent'].append(error_dent)

                stats['valid_alphas_mean'].append(mean_valid_alphas)
                stats['valid_alphas_std'].append(std_valid_alphas)
                stats['valid_alphas_ent'].append(valid_alpha_ent)

                stats['valid_probs_mean'].append(mean_valid_probs)
                stats['valid_probs_std'].append(std_valid_probs)

                if uidx == 0 or valid_err <= numpy.array(
                        history_errs)[:, 1].min():
                    best_p = unzip(tparams)
                    bad_counter = 0
                    best_found = True
                else:
                    bst_found = False

                if numpy.isnan(valid_err):
                    import ipdb
                    ipdb.set_trace()

                print "============================"
                print '\t>>>Valid error: ', valid_err, \
                        ' Valid cost: ', valid_cost
                print '\t>>>Valid pred mean: ', mean_valid_probs, \
                        ' Valid pred std: ', std_valid_probs
                print '\t>>>Valid alphas mean: ', mean_valid_alphas, \
                        ' Valid alphas std: ', std_valid_alphas, \
                        ' Valid alpha negent: ', valid_alpha_ent, \
                        ' Valid error ent: ', error_ent, \
                        ' Valid error desc ent: ', error_dent

                print "============================"
                print "Running average train stats "
                print '\t>>>Train error: ', train_err_ave, \
                        ' Train cost: ', train_cost_ave, \
                        ' Train grad norm: ', train_gnorm_ave
                print "============================"


                train_cost_ave, train_err_ave, \
                    train_gnorm_ave = reset_train_vals()

        print 'Seen %d samples' % n_samples

        if estop:
            break

    if best_p is not None:
        zipp(best_p, tparams)

    use_noise.set_value(0.)
    valid.reset()
    valid_cost, valid_error, valid_probs, \
            valid_alphas, error_ent = eval_model(f_log_probs,
                                      prepare_data if not opt_ds['use_sent_reps'] \
                                           else prepare_data_sents,
                                      model_options, valid,
                                      use_sent_rep=opt_ds['use_sent_rep'])

    print " Final eval resuts: "
    print 'Valid error: ', valid_error.mean()
    print 'Valid cost: ', valid_cost.mean()
    print '\t>>>Valid pred mean: ', valid_probs.mean(), \
            ' Valid pred std: ', valid_probs.std(), \
            ' Valid error ent: ', error_ent

    params = copy.copy(best_p)

    numpy.savez(mpath_last,
                zipped_params=best_p,
                history_errs=history_errs,
                **params)

    return valid_err, valid_cost

Example #6

Show file

File: attentive_reader.py Project: anirudh9119/Attentive_reader

def train(dim_word_desc=400,# word vector dimensionality
          dim_word_q=400,
          dim_word_ans=600,
          dim_proj=300,
          dim=400,# the number of LSTM units
          encoder_desc='lstm',
          encoder_desc_word='lstm',
          encoder_desc_sent='lstm',
          use_dq_sims=False,
          eyem=None,
          learn_h0=False,
          use_desc_skip_c_g=False,
          debug=False,
          encoder_q='lstm',
          patience=10,
          max_epochs=5000,
          dispFreq=100,
          decay_c=0.,
          alpha_c=0.,
          clip_c=-1.,
          lrate=0.01,
          n_words_q=49145,
          n_words_desc=115425,
          n_words_ans=409,
          pkl_train_files=None,
          pkl_valid_files=None,
          maxlen=2000, # maximum length of the description
          optimizer='rmsprop',
          batch_size=2,
          vocab=None,
          valid_batch_size=16,
          use_elu_g=False,
          saveto='model.npz',
          model_dir=None,
          ms_nlayers=3,
          validFreq=1000,
          saveFreq=1000, # save the parameters after every saveFreq updates
          datasets=[None],
          truncate=400,
          momentum=0.9,
          use_bidir=False,
          cost_mask=None,
          valid_datasets=['/u/yyu/stor/caglar/rc-data/cnn/cnn_test_data.h5',
                          '/u/yyu/stor/caglar/rc-data/cnn/cnn_valid_data.h5'],
          dropout_rate=0.5,
          use_dropout=True,
          reload_=True,
          **opt_ds):

    ensure_dir_exists(model_dir)
    mpath = os.path.join(model_dir, saveto)
    mpath_best = os.path.join(model_dir, prfx("best", saveto))
    mpath_last = os.path.join(model_dir, prfx("last", saveto))
    mpath_stats = os.path.join(model_dir, prfx("stats", saveto))

    # Model options
    model_options = locals().copy()
    model_options['use_sent_reps'] = opt_ds['use_sent_reps']
    stats = defaultdict(list)

    del model_options['eyem']
    del model_options['cost_mask']

    if cost_mask is not None:
        cost_mask = sharedX(cost_mask)

    # reload options and parameters
    if reload_:
        print "Reloading the model."
        if os.path.exists(mpath_best):
            print "Reloading the best model from %s." % mpath_best
            with open(os.path.join(mpath_best, '%s.pkl' % mpath_best), 'rb') as f:
                models_options = pkl.load(f)
            params = init_params(model_options)
            params = load_params(mpath_best, params)
        elif os.path.exists(mpath):
            print "Reloading the model from %s." % mpath
            with open(os.path.join(mpath, '%s.pkl' % mpath), 'rb') as f:
                models_options = pkl.load(f)
            params = init_params(model_options)
            params = load_params(mpath, params)
        else:
            raise IOError("Couldn't open the file.")
    else:
        print "Couldn't reload the models initializing from scratch."
        params = init_params(model_options)

    if datasets[0]:
        print "Short dataset", datasets[0]

    print 'Loading data'
    print 'Building model'
    if pkl_train_files is None or pkl_valid_files is None:
        train, valid, test = load_data(path=datasets[0],
                                       valid_path=valid_datasets[0],
                                       test_path=valid_datasets[1],
                                       batch_size=batch_size,
                                       **opt_ds)
    else:
        train, valid, test = load_pkl_data(train_file_paths=pkl_train_files,
                                           valid_file_paths=pkl_valid_files,
                                           batch_size=batch_size,
                                           vocab=vocab,
                                           eyem=eyem,
                                           **opt_ds)

    tparams = init_tparams(params)
    trng, use_noise, inps_d, \
                     opt_ret, \
                     cost, errors, ent_errors, ent_derrors, probs = \
                        build_model(tparams,
                                    model_options,
                                    prepare_data if not opt_ds['use_sent_reps'] \
                                            else prepare_data_sents,
                                    valid,
                                    cost_mask=cost_mask)

    alphas = opt_ret['dec_alphas']

    if opt_ds['use_sent_reps']:
        inps = [inps_d["desc"], \
                inps_d["word_mask"], \
                inps_d["q"], \
                inps_d['q_mask'], \
                inps_d['ans'], \
                inps_d['wlen'],
                inps_d['slen'], inps_d['qlen'],\
                inps_d['ent_mask']
                ]
    else:
        inps = [inps_d["desc"], \
                inps_d["word_mask"], \
                inps_d["q"], \
                inps_d['q_mask'], \
                inps_d['ans'], \
                inps_d['wlen'], \
                inps_d['qlen'], \
                inps_d['ent_mask']]

    outs = [cost, errors, probs, alphas]
    if ent_errors:
        outs += [ent_errors]

    if ent_derrors:
        outs += [ent_derrors]

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, outs, profile=profile)
    print 'Done'

    # Apply weight decay on the feed-forward connections
    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.

        for kk, vv in tparams.iteritems():
            if "logit" in kk or "ff" in kk:
                weight_decay += (vv ** 2).sum()

        weight_decay *= decay_c
        cost += weight_decay

    # after any regularizer
    print 'Computing gradient...',
    grads = safe_grad(cost, itemlist(tparams))
    print 'Done'

    # Gradient clipping:
    if clip_c > 0.:
        g2 = get_norms(grads)
        for p, g in grads.iteritems():
            grads[p] = tensor.switch(g2 > (clip_c**2),
                                     (g / tensor.sqrt(g2 + 1e-8)) * clip_c,
                                     g)
    inps.pop()
    if optimizer.lower() == "adasecant":
        learning_rule = Adasecant(delta_clip=25.0,
                                  use_adagrad=True,
                                  grad_clip=0.25,
                                  gamma_clip=0.)
    elif optimizer.lower() == "rmsprop":
        learning_rule = RMSPropMomentum(init_momentum=momentum)
    elif optimizer.lower() == "adam":
        learning_rule = Adam()
    elif optimizer.lower() == "adadelta":
        learning_rule = AdaDelta()

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    learning_rule = None

    if learning_rule:
        f_grad_shared, f_update = learning_rule.get_funcs(learning_rate=lr,
                                                          grads=grads,
                                                          inp=inps,
                                                          cost=cost,
                                                          errors=errors)
    else:
        f_grad_shared, f_update = eval(optimizer)(lr,
                                                  tparams,
                                                  grads,
                                                  inps,
                                                  cost,
                                                  errors)

    print 'Done'
    print 'Optimization'
    history_errs = []
    # reload history
    if reload_ and os.path.exists(mpath):
        history_errs = list(numpy.load(mpath)['history_errs'])

    best_p = None
    bad_count = 0

    if validFreq == -1:
        validFreq = len(train[0]) / batch_size

    if saveFreq == -1:
        saveFreq = len(train[0]) / batch_size

    best_found = False
    uidx = 0
    estop = False

    train_cost_ave, train_err_ave, \
            train_gnorm_ave = reset_train_vals()

    for eidx in xrange(max_epochs):
        n_samples = 0

        if train.done:
            train.reset()

        for d_, q_, a, em in train:
            n_samples += len(a)
            uidx += 1
            use_noise.set_value(1.)

            if opt_ds['use_sent_reps']:
                # To mask the description and the question.
                d, d_mask, q, q_mask, dlen, slen, qlen = prepare_data_sents(d_,
                                                                            q_)

                if d is None:
                    print 'Minibatch with zero sample under length ', maxlen
                    uidx -= 1
                    continue

                ud_start = time.time()
                cost, errors, gnorm, pnorm = f_grad_shared(d,
                                                           d_mask,
                                                           q,
                                                           q_mask,
                                                           a,
                                                           dlen,
                                                           slen,
                                                           qlen)
            else:
                d, d_mask, q, q_mask, dlen, qlen = prepare_data(d_, q_)

                if d is None:
                    print 'Minibatch with zero sample under length ', maxlen
                    uidx -= 1
                    continue

                ud_start = time.time()
                cost, errors, gnorm, pnorm = f_grad_shared(d, d_mask,
                                                           q, q_mask,
                                                           a,
                                                           dlen,
                                                           qlen)

            upnorm = f_update(lrate)
            ud = time.time() - ud_start

            # Collect the running ave train stats.
            train_cost_ave = running_ave(train_cost_ave,
                                         cost)
            train_err_ave = running_ave(train_err_ave,
                                        errors)
            train_gnorm_ave = running_ave(train_gnorm_ave,
                                          gnorm)

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                import ipdb; ipdb.set_trace()

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, ' Update ', uidx, \
                        ' Cost ', cost, ' UD ', ud, \
                        ' UpNorm ', upnorm[0].tolist(), \
                        ' GNorm ', gnorm, \
                        ' Pnorm ', pnorm, 'Terrors ', errors

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',
                if best_p is not None and best_found:
                    numpy.savez(mpath_best, history_errs=history_errs, **best_p)
                    pkl.dump(model_options, open('%s.pkl' % mpath_best, 'wb'))
                else:
                    params = unzip(tparams)

                numpy.savez(mpath, history_errs=history_errs, **params)
                pkl.dump(model_options, open('%s.pkl' % mpath, 'wb'))
                pkl.dump(stats, open("%s.pkl" % mpath_stats, 'wb'))

                print 'Done'
                print_param_norms(tparams)

            if numpy.mod(uidx, validFreq) == 0:
                use_noise.set_value(0.)
                if valid.done:
                    valid.reset()

                valid_costs, valid_errs, valid_probs, \
                        valid_alphas, error_ent, error_dent = eval_model(f_log_probs,
                                                  prepare_data if not opt_ds['use_sent_reps'] \
                                                    else prepare_data_sents,
                                                  model_options,
                                                  valid,
                                                  use_sent_rep=opt_ds['use_sent_reps'])

                valid_alphas_ = numpy.concatenate([va.argmax(0) for va  in valid_alphas.tolist()], axis=0)
                valid_err = valid_errs.mean()
                valid_cost = valid_costs.mean()
                valid_alpha_ent = -negentropy(valid_alphas)

                mean_valid_alphas = valid_alphas_.mean()
                std_valid_alphas = valid_alphas_.std()

                mean_valid_probs = valid_probs.argmax(1).mean()
                std_valid_probs = valid_probs.argmax(1).std()

                history_errs.append([valid_cost, valid_err])

                stats['train_err_ave'].append(train_err_ave)
                stats['train_cost_ave'].append(train_cost_ave)
                stats['train_gnorm_ave'].append(train_gnorm_ave)

                stats['valid_errs'].append(valid_err)
                stats['valid_costs'].append(valid_cost)
                stats['valid_err_ent'].append(error_ent)
                stats['valid_err_desc_ent'].append(error_dent)

                stats['valid_alphas_mean'].append(mean_valid_alphas)
                stats['valid_alphas_std'].append(std_valid_alphas)
                stats['valid_alphas_ent'].append(valid_alpha_ent)

                stats['valid_probs_mean'].append(mean_valid_probs)
                stats['valid_probs_std'].append(std_valid_probs)

                if uidx == 0 or valid_err <= numpy.array(history_errs)[:, 1].min():
                    best_p = unzip(tparams)
                    bad_counter = 0
                    best_found = True
                else:
                    bst_found = False

                if numpy.isnan(valid_err):
                    import ipdb; ipdb.set_trace()


                print "============================"
                print '\t>>>Valid error: ', valid_err, \
                        ' Valid cost: ', valid_cost
                print '\t>>>Valid pred mean: ', mean_valid_probs, \
                        ' Valid pred std: ', std_valid_probs
                print '\t>>>Valid alphas mean: ', mean_valid_alphas, \
                        ' Valid alphas std: ', std_valid_alphas, \
                        ' Valid alpha negent: ', valid_alpha_ent, \
                        ' Valid error ent: ', error_ent, \
                        ' Valid error desc ent: ', error_dent

                print "============================"
                print "Running average train stats "
                print '\t>>>Train error: ', train_err_ave, \
                        ' Train cost: ', train_cost_ave, \
                        ' Train grad norm: ', train_gnorm_ave
                print "============================"


                train_cost_ave, train_err_ave, \
                    train_gnorm_ave = reset_train_vals()


        print 'Seen %d samples' % n_samples

        if estop:
            break

    if best_p is not None:
        zipp(best_p, tparams)

    use_noise.set_value(0.)
    valid.reset()
    valid_cost, valid_error, valid_probs, \
            valid_alphas, error_ent = eval_model(f_log_probs,
                                      prepare_data if not opt_ds['use_sent_reps'] \
                                           else prepare_data_sents,
                                      model_options, valid,
                                      use_sent_rep=opt_ds['use_sent_rep'])

    print " Final eval resuts: "
    print 'Valid error: ', valid_error.mean()
    print 'Valid cost: ', valid_cost.mean()
    print '\t>>>Valid pred mean: ', valid_probs.mean(), \
            ' Valid pred std: ', valid_probs.std(), \
            ' Valid error ent: ', error_ent

    params = copy.copy(best_p)

    numpy.savez(mpath_last,
                zipped_params=best_p,
                history_errs=history_errs,
                **params)

    return valid_err, valid_cost

Example #7

Show file

File: layers.py Project: afcarl/nmt

def fflayer(tparams, state_below, options, prefix='rconv', activ='lambda x: tensor.tanh(x)', **kwargs):
    return eval(activ)(tensor.dot(state_below, tparams[prfx(prefix,'W')]) + tparams[prfx(prefix,'b')])

Example #8

Show file

File: layers.py Project: afcarl/nmt

def gru_cond_layer(tparams,
                   state_below,
                   options,
                   prefix='gru',
                   mask=None,
                   context=None,
                   one_step=False,
                   init_memory=None,
                   init_state=None,
                   context_mask=None,
                   **kwargs):

    assert context, 'Context must be provided'

    if one_step:
        assert init_state, 'previous state must be provided'

    nsteps = state_below.shape[0]
    if state_below.ndim == 3:
        n_samples = state_below.shape[1]
    else:
        n_samples = 1

    # mask
    if mask == None:
        mask = tensor.alloc(1., state_below.shape[0], 1)

    dim = tparams[prfx(prefix, 'Wcx')].shape[1]

    # initial/previous state
    if init_state == None:
        init_state = tensor.alloc(0., n_samples, dim)

    # projected context
    assert context.ndim == 3, 'Context must be 3-d: #annotation x #sample x dim'
    pctx_ = tensor.dot(context, tparams[prfx(prefix,'Wc_att')]) + tparams[prfx(prefix,'b_att')]

    def _slice(_x, n, dim):
        if _x.ndim == 3:
            return _x[:, :, n*dim:(n+1)*dim]
        return _x[:, n*dim:(n+1)*dim]

    # projected x
    state_belowx = tensor.dot(state_below, tparams[prfx(prefix, 'Wx')]) + tparams[prfx(prefix, 'bx')]
    state_below_ = tensor.dot(state_below, tparams[prfx(prefix, 'W')]) + tparams[prfx(prefix, 'b')]
    state_belowc = tensor.dot(state_below, tparams[prfx(prefix, 'Wi_att')])

    def _step_slice(m_, x_, xx_, xc_, h_, ctx_, alpha_, pctx_, cc_,
                    U, Wc, Wd_att, U_att, c_tt, Ux, Wcx):
        # attention
        pstate_ = tensor.dot(h_, Wd_att)
        pctx__ = pctx_ + pstate_[None,:,:]
        pctx__ += xc_
        pctx__ = tensor.tanh(pctx__)
        alpha = tensor.dot(pctx__, U_att)+c_tt
        alpha = alpha.reshape([alpha.shape[0], alpha.shape[1]])
        alpha = tensor.exp(alpha)
        if context_mask:
            alpha = alpha * context_mask
        alpha = alpha / alpha.sum(0, keepdims=True)
        ctx_ = (cc_ * alpha[:,:,None]).sum(0) # current context

        preact = tensor.dot(h_, U)
        preact += x_
        preact += tensor.dot(ctx_, Wc)
        preact = tensor.nnet.sigmoid(preact)

        r = _slice(preact, 0, dim)
        u = _slice(preact, 1, dim)

        preactx = tensor.dot(h_, Ux)
        preactx *= r
        preactx += xx_
        preactx += tensor.dot(ctx_, Wcx)

        h = tensor.tanh(preactx)

        h = u * h_ + (1. - u) * h
        h = m_[:,None] * h + (1. - m_)[:,None] * h_

        return h, ctx_, alpha.T

    seqs = [mask, state_below_, state_belowx, state_belowc]
    _step = _step_slice

    shared_vars = [tparams[prfx(prefix, 'U')],
                   tparams[prfx(prefix, 'Wc')],
                   tparams[prfx(prefix,'Wd_att')],
                   tparams[prfx(prefix,'U_att')],
                   tparams[prfx(prefix, 'c_tt')],
                   tparams[prfx(prefix, 'Ux')],
                   tparams[prfx(prefix, 'Wcx')]]

    if one_step:
        rval = _step(*(seqs+[init_state, None, None, pctx_, context]+shared_vars))
    else:
        rval, updates = theano.scan(_step,
                                    sequences=seqs,
                                    outputs_info = [init_state,
                                                    tensor.alloc(0., n_samples, context.shape[2]),
                                                    tensor.alloc(0., n_samples, context.shape[0])],
                                    non_sequences=[pctx_,
                                                   context]+shared_vars,
                                    name=prfx(prefix, '_layers'),
                                    n_steps=nsteps,
                                    profile=profile,
                                    strict=True)
    return rval