Example #1
0
def load_model(load_states=True):
    if utils.check_file(params):
        model.load_params(params, ctx=ctxs)
        logging.info("Loading parameters from : {}".format(params))

    if load_states and utils.check_file(trainingfile):
        trainer.set_learning_rate(float(
            utils.read_kvstore(trainingfile)['lr']))
        logging.info("Loading lr from : {}".format(trainingfile))
Example #2
0
def gen_model(queue, rqueue, pid, model, options, k, normalize, word_idict,
              sampling):
    import theano
    from theano import tensor
    from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams

    trng = RandomStreams(1234)
    # this is zero indicate we are not using dropout in the graph
    use_noise = theano.shared(numpy.float32(0.), name='use_noise')

    # get the parameters
    params = init_params(options)
    params = load_params(model, params)
    tparams = init_tparams(params)

    # build the sampling computational graph
    # see model.py for more detailed explanations
    print "Starting to build sampler ..."
    f_init, f_next = build_sampler(tparams,
                                   options,
                                   use_noise,
                                   trng,
                                   sampling=sampling)

    def _gencap(cc0):
        sample, score = gen_sample(tparams,
                                   f_init,
                                   f_next,
                                   cc0,
                                   options,
                                   trng=trng,
                                   k=k,
                                   maxlen=200,
                                   stochastic=False)
        # adjust for length bias
        if normalize:
            lengths = numpy.array([len(s) for s in sample])
            score = score / lengths
        sidx = numpy.argmin(score)
        return sample[sidx]

    while True:
        req = queue.get()
        # exit signal
        if req is None:
            break

        idx, context = req[0], req[1]
        print pid, '-', idx
        seq = _gencap(context)
        rqueue.put((idx, seq))

    return
Example #3
0
def main(model, dictionary, dictionary_tag, source_file, target_file, saveto):

    # load model model_options
    with open('%s.pkl' % model, 'rb') as f:
        options = pkl.load(f)

    # load source dictionary and invert
    with open(dictionary, 'rb') as f:
        word_dict = pkl.load(f)
    word_idict = dict()
    for kk, vv in word_dict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # load tag dictionary and invert
    with open(dictionary_tag, 'rb') as f:
        tag_dict = pkl.load(f)
    tag_idict = dict()
    for kk, vv in tag_dict.iteritems():
        tag_idict[vv] = kk

    # allocate model parameters
    params = init_params(options)

    # load model parameters and set theano shared variables
    params = load_params(model, params)
    tparams = init_tparams(params)

    trng, use_noise, \
        x, x_mask, y, y_mask, \
        opt_ret, \
        cost, predicts = \
        build_model(tparams, options)

    print 'Building f_predicts...',
    f_predicts = theano.function([x, x_mask], predicts)
    print 'Done'

    use_noise.set_value(0.)
    valid_err = evaluation(f_predicts,
                           options,
                           tag_idict,
                           word_dict,
                           source_file,
                           saveto,
                           target_file,
                           0,
                           options['n_words_src'],
                           back_file=target_file + ".back")

    print 'Test ', valid_err
Example #4
0
def main(data_path, model_path, dict_path, save_path):

    print("Preparing Data...")

    # Load data and dictionary
    X = []
    with io.open(data_path, 'r', encoding='utf-8') as f:
        for line in f:
            X.append(line.rstrip('\n'))
    with open(dict_path, 'rb') as f:
        chardict = pkl.load(f)
    n_char = len(chardict.keys()) + 1

    # Prepare data for encoding
    batches = Batch(X)

    # Load model
    print("Loading model params...")
    params = load_params(model_path)

    # Build encoder
    print("Building encoder...")

    # Theano variables
    tweet = T.itensor3()
    t_mask = T.fmatrix()

    # Embeddings
    emb_t = tweet2vec(tweet, t_mask, params, n_char)[0]

    # Theano function
    f_enc = theano.function([tweet, t_mask], emb_t)

    # Encode
    print("Encoding data...")
    print("Input data {} samples".format(len(X)))
    features = np.zeros((len(X), WDIM), dtype='float32')
    it = 0
    for x, i in batches:
        if it % 100 == 0:
            print("Minibatch {}".format(it))
        it += 1

        xp, x_mask = prepare_data(x, chardict)
        ff = f_enc(xp, x_mask)
        for ind, idx in enumerate(i):
            features[idx] = ff[ind]

    # Save
    with open(save_path, 'w') as o:
        np.save(o, features)
Example #5
0
def main(data_path, model_path, dict_path, save_path):

    print("Preparing Data...")

    # Load data and dictionary
    X = []
    with io.open(data_path,'r',encoding='utf-8') as f:
        for line in f:
            X.append(line.rstrip('\n'))
    with open(dict_path, 'rb') as f:
        chardict = pkl.load(f)
    n_char = len(chardict.keys()) + 1

    # Prepare data for encoding
    batches = Batch(X)

    # Load model
    print("Loading model params...")
    params = load_params(model_path)

    # Build encoder
    print("Building encoder...")

    # Theano variables
    tweet = T.itensor3()
    t_mask = T.fmatrix()

    # Embeddings
    emb_t = tweet2vec(tweet, t_mask, params, n_char)[0]

    # Theano function
    f_enc = theano.function([tweet, t_mask], emb_t)

    # Encode
    print("Encoding data...")
    print("Input data {} samples".format(len(X)))
    features = np.zeros((len(X),WDIM), dtype='float32')
    it = 0
    for x,i in batches:
        if it % 100 == 0:
            print("Minibatch {}".format(it))
        it += 1

        xp, x_mask = prepare_data(x, chardict)
        ff = f_enc(xp, x_mask)
        for ind, idx in enumerate(i):
            features[idx] = ff[ind]

    # Save
    with open(save_path, 'w') as o:
        np.save(o, features)
Example #6
0
def gen_model(queue, rqueue, pid, model, options, k, normalize, word_idict, sampling):
    import theano
    from theano import tensor
    from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams

    trng = RandomStreams(1234)
    # this is zero indicate we are not using dropout in the graph
    use_noise = theano.shared(numpy.float32(0.), name='use_noise')

    # get the parameters
    params = init_params(options)
    params = load_params(model, params)
    tparams = init_tparams(params)

    # build the sampling computational graph
    # see model.py for more detailed explanations
    print "Starting to build sampler ..."
    f_init, f_next = build_sampler(tparams, options, use_noise, trng, sampling=sampling)

    def _gencap(cc0):
        sample, score = gen_sample(tparams, f_init, f_next, cc0, options,
                                   trng=trng, k=k, maxlen=200, stochastic=False)
        # adjust for length bias
        if normalize:
            lengths = numpy.array([len(s) for s in sample])
            score = score / lengths
        sidx = numpy.argmin(score)
        return sample[sidx]

    while True:
        req = queue.get()
        # exit signal
        if req is None:
            break

        idx, context = req[0], req[1]
        print pid, '-', idx
        seq = _gencap(context)
        rqueue.put((idx, seq))

    return 
Example #7
0
def train(
        dim_word=100,  # word vector dimensionality
        dim=1000,  # the number of LSTM units
        encoder='gru',
        decoder='gru_cond',
        patience=10,
        max_epochs=5000,
        dispFreq=100,
        decay_c=0.,
        alpha_c=0.,
        diag_c=0.,
        clip_c=-1.,
        lrate=0.01,
        n_words_src=100000,
        n_words=100000,
        maxlen=100,  # maximum length of the description
        optimizer='rmsprop',
        batch_size=16,
        valid_batch_size=16,
        saveto='model.npz',
        validFreq=1000,
        saveFreq=1000,  # save the parameters after every saveFreq updates
        sampleFreq=100,  # generate some samples after every sampleFreq updates
        datasets=[
            '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.en.tok',
            '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.fr.tok'
        ],
        valid_datasets=[
            '../data/dev/newstest2011.en.tok',
            '../data/dev/newstest2011.fr.tok'
        ],
        dictionaries=[
            '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.en.tok.pkl',
            '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.fr.tok.pkl'
        ],
        use_dropout=False,
        reload_=False):

    # Model options
    model_options = locals().copy()

    worddicts = [None] * len(dictionaries)
    worddicts_r = [None] * len(dictionaries)
    for ii, dd in enumerate(dictionaries):
        with open(dd, 'rb') as f:
            worddicts[ii] = pkl.load(f)
        worddicts_r[ii] = dict()
        for kk, vv in worddicts[ii].iteritems():
            worddicts_r[ii][vv] = kk

    # reload options
    if reload_ and os.path.exists(saveto):
        with open('%s.pkl' % saveto, 'rb') as f:
            models_options = pkl.load(f)

    print 'Loading data'
    train = TextIterator(datasets[0],
                         datasets[1],
                         dictionaries[0],
                         dictionaries[1],
                         n_words_source=n_words_src,
                         n_words_target=n_words,
                         batch_size=batch_size,
                         maxlen=maxlen)
    valid = TextIterator(valid_datasets[0],
                         valid_datasets[1],
                         dictionaries[0],
                         dictionaries[1],
                         n_words_source=n_words_src,
                         n_words_target=n_words,
                         batch_size=valid_batch_size,
                         maxlen=maxlen)

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, use_noise, \
          x, x_mask, y, y_mask, \
          opt_ret, \
          cost = \
          build_model(tparams, model_options)
    inps = [x, x_mask, y, y_mask]

    print 'Buliding sampler'
    f_init, f_next = build_sampler(tparams, model_options, trng)

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=profile)
    print 'Done'

    cost = cost.mean()

    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv**2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    if alpha_c > 0. and not model_options['decoder'].endswith('simple'):
        alpha_c = theano.shared(numpy.float32(alpha_c), name='alpha_c')
        alpha_reg = alpha_c * (
            (tensor.cast(y_mask.sum(0) // x_mask.sum(0), 'float32')[:, None] -
             opt_ret['dec_alphas'].sum(0))**2).sum(1).mean()
        cost += alpha_reg

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=profile)
    print 'Done'

    print 'Computing gradient...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    print 'Done'
    print 'Building f_grad...',
    f_grad = theano.function(inps, grads, profile=profile)
    print 'Done'

    if clip_c > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(
                tensor.switch(g2 > (clip_c**2), g / tensor.sqrt(g2) * clip_c,
                              g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)
    print 'Done'

    print 'Optimization'

    history_errs = []
    # reload history
    if reload_ and os.path.exists(saveto):
        history_errs = list(numpy.load(saveto)['history_errs'])
    best_p = None
    bad_count = 0

    if validFreq == -1:
        validFreq = len(train[0]) / batch_size
    if saveFreq == -1:
        saveFreq = len(train[0]) / batch_size
    if sampleFreq == -1:
        sampleFreq = len(train[0]) / batch_size

    uidx = 0
    estop = False
    for eidx in xrange(max_epochs):
        n_samples = 0

        for x, y in train:
            n_samples += len(x)
            uidx += 1
            use_noise.set_value(1.)

            x, x_mask, y, y_mask = prepare_data(x,
                                                y,
                                                maxlen=maxlen,
                                                n_words_src=n_words_src,
                                                n_words=n_words)

            if x is None:
                print 'Minibatch with zero sample under length ', maxlen
                uidx -= 1
                continue

            ud_start = time.time()
            cost = f_grad_shared(x, x_mask, y, y_mask)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',

                if best_p is not None:
                    params = best_p
                else:
                    params = unzip(tparams)

                numpy.savez(saveto, history_errs=history_errs, **params)
                pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'))
                print 'Done'

            if numpy.mod(uidx, sampleFreq) == 0:
                # FIXME: random selection?
                for jj in xrange(numpy.minimum(5, x.shape[1])):
                    stochastic = True
                    sample, score = gen_sample(tparams,
                                               f_init,
                                               f_next,
                                               x[:, jj][:, None],
                                               model_options,
                                               trng=trng,
                                               k=1,
                                               maxlen=30,
                                               stochastic=stochastic,
                                               argmax=False)
                    print 'Source ', jj, ': ',
                    for vv in x[:, jj]:
                        if vv == 0:
                            break
                        if vv in worddicts_r[0]:
                            print worddicts_r[0][vv],
                        else:
                            print 'UNK',
                    print
                    print 'Truth ', jj, ' : ',
                    for vv in y[:, jj]:
                        if vv == 0:
                            break
                        if vv in worddicts_r[1]:
                            print worddicts_r[1][vv],
                        else:
                            print 'UNK',
                    print
                    print 'Sample ', jj, ': ',
                    if stochastic:
                        ss = sample
                    else:
                        score = score / numpy.array([len(s) for s in sample])
                        ss = sample[score.argmin()]
                    for vv in ss:
                        if vv == 0:
                            break
                        if vv in worddicts_r[1]:
                            print worddicts_r[1][vv],
                        else:
                            print 'UNK',
                    print

            if numpy.mod(uidx, validFreq) == 0:
                use_noise.set_value(0.)
                valid_errs = pred_probs(f_log_probs, prepare_data,
                                        model_options, valid)
                valid_err = valid_errs.mean()
                history_errs.append(valid_err)

                if uidx == 0 or valid_err <= numpy.array(history_errs).min():
                    best_p = unzip(tparams)
                    bad_counter = 0
                if len(history_errs) > patience and valid_err >= numpy.array(
                        history_errs)[:-patience].min():
                    bad_counter += 1
                    if bad_counter > patience:
                        print 'Early Stop!'
                        estop = True
                        break

                if numpy.isnan(valid_err):
                    import ipdb
                    ipdb.set_trace()

                print 'Valid ', valid_err

        print 'Seen %d samples' % n_samples

        if estop:
            break

    if best_p is not None:
        zipp(best_p, tparams)

    use_noise.set_value(0.)
    valid_err = pred_probs(f_log_probs, prepare_data, model_options,
                           valid).mean()
    print 'Valid ', valid_err
    params = copy.copy(best_p)
    numpy.savez(saveto,
                zipped_params=best_p,
                history_errs=history_errs,
                **params)

    return valid_err
Example #8
0
def main(data_path, model_path):

    print("Loading data...")
    with open(data_path, 'r') as f:
        valX = pkl.load(f)

    print("Preparing data...")
    val_iter = batched_tweets.BatchedTweets(valX,
                                            batch_size=1024,
                                            maxlen=MAX_LENGTH)

    print("Loading dictionary...")
    with open('%s/dict.pkl' % model_path, 'rb') as f:
        chardict = pkl.load(f)
    n_char = len(chardict.keys()) + 1

    # check for model files
    files = sorted(glob.glob(model_path + 'model_*.npz'))
    print("Found {} model files".format(len(files)))

    for modelf in files:
        print("Computing validation cost on {}".format(modelf))

        print("Loading params...")
        params = load_params(modelf)

        print("Building network...")

        # Tweet variables
        tweet = T.itensor3()
        ptweet = T.itensor3()
        ntweet = T.itensor3()

        # masks
        t_mask = T.fmatrix()
        tp_mask = T.fmatrix()
        tn_mask = T.fmatrix()

        # Embeddings
        emb_t = tweet2vec(tweet, t_mask, params, n_char)[0]
        emb_tp = tweet2vec(ptweet, tp_mask, params, n_char)[0]
        emb_tn = tweet2vec(ntweet, tn_mask, params, n_char)[0]

        # batch cost
        D1 = 1 - T.batched_dot(emb_t, emb_tp) / (tnorm(emb_t) * tnorm(emb_tp))
        D2 = 1 - T.batched_dot(emb_t, emb_tn) / (tnorm(emb_t) * tnorm(emb_tn))
        gap = D1 - D2 + M
        loss = gap * (gap > 0)
        cost = T.mean(loss)

        # Theano function
        print("Compiling theano function...")
        inps = [tweet, t_mask, ptweet, tp_mask, ntweet, tn_mask]
        cost_val = theano.function(inps, cost)

        print("Testing...")
        uidx = 0
        try:
            validation_cost = 0.
            n_val_samples = 0
            for x, y, z in val_iter:
                if not x:
                    print("Validation: Minibatch with no valid triples")
                    continue

                n_val_samples += len(x)
                x, x_m, y, y_m, z, z_m = batched_tweets.prepare_data(
                    x, y, z, chardict, maxlen=MAX_LENGTH, n_chars=n_char)

                if x == None:
                    print(
                        "Validation: Minibatch with zero samples under maxlength"
                    )
                    continue

                curr_cost = cost_val(x, x_m, y, y_m, z, z_m)
                validation_cost += curr_cost * len(x)

            print("Model {} Validation Cost {}".format(
                modelf, validation_cost / n_val_samples))
            print("Seen {} samples.".format(n_val_samples))

        except KeyboardInterrupt:
            pass
Example #9
0
def main(data_path, model_path):

    print("Loading data...")
    with open(data_path,'r') as f:
        valX = pkl.load(f)

    print("Preparing data...")
    val_iter = batched_tweets.BatchedTweets(valX, batch_size=512, maxlen=MAX_LENGTH)

    print("Loading dictionary...")
    with open('%s/dict.pkl' % model_path, 'rb') as f:
        chardict = pkl.load(f)
    n_char = len(chardict.keys()) + 1

    # check for model files
    files = sorted(glob.glob(model_path+'model_*.npz'))
    print("Found {} model files".format(len(files)))

    for modelf in files:
        print("Computing validation cost on {}".format(modelf))

        print("Loading params...")
        params = load_params(modelf)

        print("Building network...")

        # Tweet variables
        tweet = T.itensor4()
        ptweet = T.itensor4()
        ntweet = T.itensor4()

        # masks
        t_mask = T.ftensor3()
        tp_mask = T.ftensor3()
        tn_mask = T.ftensor3()

        # Embeddings
        emb_t = char2word2vec(tweet, t_mask, params, n_char)[0]
        emb_tp = char2word2vec(ptweet, tp_mask, params, n_char)[0]
        emb_tn = char2word2vec(ntweet, tn_mask, params, n_char)[0]
        
        # batch cost
        D1 = 1 - T.batched_dot(emb_t, emb_tp)/(tnorm(emb_t)*tnorm(emb_tp))
        D2 = 1 - T.batched_dot(emb_t, emb_tn)/(tnorm(emb_t)*tnorm(emb_tn))
        gap = D1-D2+M
        loss = gap*(gap>0)
        cost = T.mean(loss)
        reg = REGULARIZATION*lasagne.regularization.regularize_network_params(char2word2vec(tweet, t_mask, params, n_char)[1], lasagne.regularization.l2) + REGULARIZATION*lasagne.regularization.regularize_network_params(char2word2vec(tweet, t_mask, params, n_char)[2], lasagne.regularization.l2)


        # Theano function
        print("Compiling theano function...")
        inps = [tweet,t_mask,ptweet,tp_mask,ntweet,tn_mask]
        cost_val = theano.function(inps,cost)
        reg_val = theano.function([], reg)

        print("Testing...")
        uidx = 0
        try:
            validation_cost = 0.
            reg_cost = 0.
            n_val_samples = 0
            for x,y,z in val_iter:
                if not x:
                    print("Validation: Minibatch with no valid triples")
                    continue

                n_val_samples += len(x)
                x, x_m, y, y_m, z, z_m = batched_tweets.prepare_data_c2w2s(x, y, z, chardict, maxwordlen=MAX_WORD_LENGTH, maxseqlen=MAX_SEQ_LENGTH, n_chars=n_char)

                if x==None:
                    print("Validation: Minibatch with zero samples under maxlength")
                    continue

                curr_cost = cost_val(x,x_m,y,y_m,z,z_m)
                validation_cost += curr_cost*len(x)

            reg_cost = reg_val()
            print("Model {} Validation Cost {} Regularization Cost {}".format(modelf, validation_cost/n_val_samples, reg_cost))
            print("Seen {} samples.".format(n_val_samples))

        except KeyboardInterrupt:
            pass
Example #10
0
File: nmt.py Project: caglar/nmt
def train(dim_word=100, # word vector dimensionality
          dim=1000, # the number of LSTM units
          encoder='gru',
          decoder='gru_cond',
          patience=10,
          max_epochs=5000,
          dispFreq=100,
          decay_c=0.,
          alpha_c=0.,
          diag_c=0.,
          clip_c=-1.,
          lrate=0.01,
          n_words_src=100000,
          n_words=100000,
          maxlen=100, # maximum length of the description
          optimizer='rmsprop',
          batch_size = 16,
          valid_batch_size = 16,
          saveto='model.npz',
          validFreq=1000,
          saveFreq=1000, # save the parameters after every saveFreq updates
          sampleFreq=100, # generate some samples after every sampleFreq updates
          datasets=['/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.en.tok',
                    '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.fr.tok'],
          valid_datasets=['../data/dev/newstest2011.en.tok', '../data/dev/newstest2011.fr.tok'],
          dictionaries=['/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.en.tok.pkl',
                        '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.fr.tok.pkl'],
          use_dropout=False,
          reload_=False):

    # Model options
    model_options = locals().copy()

    worddicts = [None] * len(dictionaries)
    worddicts_r = [None] * len(dictionaries)
    for ii, dd in enumerate(dictionaries):
        with open(dd, 'rb') as f:
            worddicts[ii] = pkl.load(f)
        worddicts_r[ii] = dict()
        for kk, vv in worddicts[ii].iteritems():
            worddicts_r[ii][vv] = kk

    # reload options
    if reload_ and os.path.exists(saveto):
        with open('%s.pkl' % saveto, 'rb') as f:
            models_options = pkl.load(f)

    print 'Loading data'
    train = TextIterator(datasets[0], datasets[1],
                         dictionaries[0], dictionaries[1],
                         n_words_source=n_words_src, n_words_target=n_words,
                         batch_size=batch_size,
                         maxlen=maxlen)
    valid = TextIterator(valid_datasets[0], valid_datasets[1],
                         dictionaries[0], dictionaries[1],
                         n_words_source=n_words_src, n_words_target=n_words,
                         batch_size=valid_batch_size,
                         maxlen=maxlen)

    print 'Building model'
    params = init_params(model_options)
    # reload parameters
    if reload_ and os.path.exists(saveto):
        params = load_params(saveto, params)

    tparams = init_tparams(params)

    trng, use_noise, \
          x, x_mask, y, y_mask, \
          opt_ret, \
          cost = \
          build_model(tparams, model_options)
    inps = [x, x_mask, y, y_mask]

    print 'Buliding sampler'
    f_init, f_next = build_sampler(tparams, model_options, trng)

    # before any regularizer
    print 'Building f_log_probs...',
    f_log_probs = theano.function(inps, cost, profile=profile)
    print 'Done'

    cost = cost.mean()

    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        for kk, vv in tparams.iteritems():
            weight_decay += (vv ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    if alpha_c > 0. and not model_options['decoder'].endswith('simple'):
        alpha_c = theano.shared(numpy.float32(alpha_c), name='alpha_c')
        alpha_reg = alpha_c * ((tensor.cast(y_mask.sum(0)//x_mask.sum(0), 'float32')[:,None]-
                                opt_ret['dec_alphas'].sum(0))**2).sum(1).mean()
        cost += alpha_reg

    # after any regularizer
    print 'Building f_cost...',
    f_cost = theano.function(inps, cost, profile=profile)
    print 'Done'

    print 'Computing gradient...',
    grads = tensor.grad(cost, wrt=itemlist(tparams))
    print 'Done'
    print 'Building f_grad...',
    f_grad = theano.function(inps, grads, profile=profile)
    print 'Done'

    if clip_c > 0.:
        g2 = 0.
        for g in grads:
            g2 += (g**2).sum()
        new_grads = []
        for g in grads:
            new_grads.append(tensor.switch(g2 > (clip_c**2),
                                           g / tensor.sqrt(g2) * clip_c,
                                           g))
        grads = new_grads

    lr = tensor.scalar(name='lr')
    print 'Building optimizers...',
    f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost)
    print 'Done'

    print 'Optimization'

    history_errs = []
    # reload history
    if reload_ and os.path.exists(saveto):
        history_errs = list(numpy.load(saveto)['history_errs'])
    best_p = None
    bad_count = 0

    if validFreq == -1:
        validFreq = len(train[0])/batch_size
    if saveFreq == -1:
        saveFreq = len(train[0])/batch_size
    if sampleFreq == -1:
        sampleFreq = len(train[0])/batch_size

    uidx = 0
    estop = False
    for eidx in xrange(max_epochs):
        n_samples = 0

        for x, y in train:
            n_samples += len(x)
            uidx += 1
            use_noise.set_value(1.)

            x, x_mask, y, y_mask = prepare_data(x, y, maxlen=maxlen,
                                                n_words_src=n_words_src,
                                                n_words=n_words)

            if x is None:
                print 'Minibatch with zero sample under length ', maxlen
                uidx -= 1
                continue

            ud_start = time.time()
            cost = f_grad_shared(x, x_mask, y, y_mask)
            f_update(lrate)
            ud = time.time() - ud_start

            if numpy.isnan(cost) or numpy.isinf(cost):
                print 'NaN detected'
                return 1., 1., 1.

            if numpy.mod(uidx, dispFreq) == 0:
                print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud

            if numpy.mod(uidx, saveFreq) == 0:
                print 'Saving...',

                if best_p is not None:
                    params = best_p
                else:
                    params = unzip(tparams)

                numpy.savez(saveto, history_errs=history_errs, **params)
                pkl.dump(model_options, open('%s.pkl'%saveto, 'wb'))
                print 'Done'

            if numpy.mod(uidx, sampleFreq) == 0:
                # FIXME: random selection?
                for jj in xrange(numpy.minimum(5,x.shape[1])):
                    stochastic = True
                    sample, score = gen_sample(tparams, f_init, f_next, x[:,jj][:,None],
                                               model_options, trng=trng, k=1, maxlen=30,
                                               stochastic=stochastic, argmax=False)
                    print 'Source ', jj, ': ',
                    for vv in x[:, jj]:
                        if vv == 0:
                            break
                        if vv in worddicts_r[0]:
                            print worddicts_r[0][vv],
                        else:
                            print 'UNK',
                    print
                    print 'Truth ', jj, ' : ',
                    for vv in y[:, jj]:
                        if vv == 0:
                            break
                        if vv in worddicts_r[1]:
                            print worddicts_r[1][vv],
                        else:
                            print 'UNK',
                    print
                    print 'Sample ', jj, ': ',
                    if stochastic:
                        ss = sample
                    else:
                        score = score / numpy.array([len(s) for s in sample])
                        ss = sample[score.argmin()]
                    for vv in ss:
                        if vv == 0:
                            break
                        if vv in worddicts_r[1]:
                            print worddicts_r[1][vv],
                        else:
                            print 'UNK',
                    print

            if numpy.mod(uidx, validFreq) == 0:
                use_noise.set_value(0.)
                valid_errs = pred_probs(f_log_probs, prepare_data, model_options, valid)
                valid_err = valid_errs.mean()
                history_errs.append(valid_err)

                if uidx == 0 or valid_err <= numpy.array(history_errs).min():
                    best_p = unzip(tparams)
                    bad_counter = 0
                if len(history_errs) > patience and valid_err >= numpy.array(history_errs)[:-patience].min():
                    bad_counter += 1
                    if bad_counter > patience:
                        print 'Early Stop!'
                        estop = True
                        break

                if numpy.isnan(valid_err):
                    import ipdb; ipdb.set_trace()

                print 'Valid ', valid_err

        print 'Seen %d samples' % n_samples

        if estop:
            break

    if best_p is not None:
        zipp(best_p, tparams)

    use_noise.set_value(0.)
    valid_err = pred_probs(f_log_probs, prepare_data, model_options, valid).mean()
    print 'Valid ', valid_err
    params = copy.copy(best_p)
    numpy.savez(saveto, zipped_params=best_p,
                history_errs=history_errs,
                **params)

    return valid_err
Example #11
0
            trainer.step(1)
            total_L += mx.nd.sum(L).asscalar()

            if i % args.log_interval == 0 and i > 0:
                cur_L = total_L / args.log_interval
                print('[Epoch %d Batch %d] loss %.2f, ppl %.2f' %
                      (epoch, i, cur_L, math.exp(cur_L)))
                total_L = 0.0

        val_L = eval(val_data)

        print('[Epoch %d] time cost %.2fs, valid loss %.2f, valid ppl %.2f' %
              (epoch, time.time() - start_time, val_L, math.exp(val_L)))

        if val_L < best_val:
            best_val = val_L
            test_L = eval(test_data)
            model.save_params(args.save)
            print('test loss %.2f, test ppl %.2f' % (test_L, math.exp(test_L)))
        else:
            args.lr = args.lr * 0.25
            trainer.set_learning_rate(args.lr)


if __name__ == '__main__':
    train()
    model.load_params(args.save, context)
    test_L = eval(test_data)
    print('Best test loss %.2f, test ppl %.2f' % (test_L, math.exp(test_L)))
Example #12
0
    def __init__(self,
                 model_dir,
                 num_channels,
                 shape_z,
                 shape_y,
                 shape_scale=5,
                 num_maps=1,
                 batch_size=1,
                 tf_graph=None,
                 tf_sess=None,
                 debug_plot=False):
        """
        Setup model for inference

        Args:
          model_dir: Directory with model files
          num_channels: Number of channels for input data
          shape_z: Shape of input data in Z
          shape_y: Shape of input data in Y
          shape_scale: Scale data with center k-space data
          num_maps: Number of sets of sensitivity maps
        """
        self.debug_plot = debug_plot

        self.tf_graph = tf_graph
        if self.tf_graph is None:
            self.tf_graph = tf.Graph()
        self.tf_sess = tf_sess
        if self.tf_sess is None:
            session_config = tf.ConfigProto()
            session_config.gpu_options.allow_growth = True  # pylint: disable=E1101
            session_config.allow_soft_placement = True
            self.tf_sess = tf.Session(graph=self.tf_graph,
                                      config=session_config)

        params = model.load_params(model_dir)

        with self.tf_graph.as_default():
            self.batch_size = batch_size
            self.tf_kspace_input = tf.placeholder(
                tf.complex64,
                (self.batch_size, shape_z, shape_y, num_channels))
            self.tf_sensemap_input = tf.placeholder(
                tf.complex64,
                (self.batch_size, shape_z, shape_y, num_maps, num_channels))

            if shape_scale > 0:
                scale = tf.image.resize_image_with_crop_or_pad(
                    self.tf_kspace_input, shape_scale, shape_scale)
                scale = tf.reduce_mean(tf.square(tf.abs(scale)))
                scale *= shape_scale * shape_scale / shape_y / shape_z
            else:
                logger.info('Turning off scaling...')
                scale = 1.0
            scale = tf.cast(1.0 / tf.sqrt(scale), dtype=tf.complex64)
            tf_kspace_input_scaled = self.tf_kspace_input * scale
            tf_image_output_scaled, tf_kspace_output_scaled, self.iter_out = model.unrolled_prox(
                tf_kspace_input_scaled,
                self.tf_sensemap_input,
                num_grad_steps=params['unrolled_steps'],
                resblock_num_features=params['unrolled_num_features'],
                resblock_num_blocks=params['unrolled_num_resblocks'],
                resblock_share=params['unrolled_share'],
                training=False,
                hard_projection=params['hard_projection'],
                scope=params['recon_scope'])
            self.tf_image_output = tf_image_output_scaled / scale
            self.tf_kspace_output = tf_kspace_output_scaled / scale

            if params['loss_adv'] > 0:
                adv_scope = 'Adversarial'
                tf_image_input_scaled = tfmri.model_transpose(
                    tf_kspace_input_scaled, self.tf_sensemap_input)
                self.adv_output = model.adversarial(tf_image_input_scaled,
                                                    training=False,
                                                    scope=adv_scope)
            else:
                self.adv_output = None

            filename_latest_model = tf.train.latest_checkpoint(model_dir)
            logger.info('Loading model ({})...'.format(filename_latest_model))
            saver = tf.train.Saver()
            saver.restore(self.tf_sess, filename_latest_model)