예제 #1
0
def find_sent_embedding(whole,
                        n_words=21102,
                        img_w=300,
                        img_h=48,
                        feature_maps=200,
                        filter_hs=[3, 4, 5],
                        n_x=300,
                        n_h=600):

    options = {}
    options['n_words'] = n_words
    options['img_w'] = img_w
    options['img_h'] = img_h
    options['feature_maps'] = feature_maps
    options['filter_hs'] = filter_hs
    options['n_x'] = n_x
    options['n_h'] = n_h

    filter_w = img_w
    filter_shapes = []
    pool_sizes = []
    for filter_h in filter_hs:
        filter_shapes.append((feature_maps, 1, filter_h, filter_w))
        pool_sizes.append((img_h - filter_h + 1, img_w - filter_w + 1))

    options['filter_shapes'] = filter_shapes
    options['pool_sizes'] = pool_sizes

    params = init_params(options)
    tparams = init_tparams(params)

    data = np.load('./bookcorpus_result.npz')

    for kk, pp in params.iteritems():
        params[kk] = data[kk]

    for kk, pp in params.iteritems():
        tparams[kk].set_value(params[kk])

    x = tensor.matrix('x', dtype='int32')

    layer0_input = tparams['Wemb'][tensor.cast(x.flatten(),
                                               dtype='int32')].reshape(
                                                   (x.shape[0], 1, x.shape[1],
                                                    tparams['Wemb'].shape[1]))

    layer1_inputs = []
    for i in xrange(len(options['filter_hs'])):
        filter_shape = options['filter_shapes'][i]
        pool_size = options['pool_sizes'][i]
        conv_layer = encoder(tparams,
                             layer0_input,
                             filter_shape,
                             pool_size,
                             prefix=_p('cnn_encoder', i))
        layer1_input = conv_layer
        layer1_inputs.append(layer1_input)
    layer1_input = tensor.concatenate(layer1_inputs, 1)

    f_embed = theano.function([x], layer1_input, name='f_embed')

    kf = get_minibatches_idx(len(whole), 100)
    sent_emb = np.zeros((len(whole), 600))

    for i, train_index in kf:
        sents = [whole[t] for t in train_index]
        x = prepare_data_for_cnn(sents)
        sent_emb[train_index[0]:train_index[-1] + 1] = f_embed(x)
        if i % 500 == 0:
            print i,

    np.savez('./bookcorpus_embedding.npz', sent_emb=sent_emb)

    return sent_emb
예제 #2
0
def train_model(train, valid, test, img_feats, W, n_words=7414, n_x=300, n_h=512,
    max_epochs=20, lrate=0.001, batch_size=64, valid_batch_size=64, dropout_val=0.5,
    dispFreq=10, validFreq=500, saveFreq=1000, saveto = 'flickr30k_result_psgld_dropout.npz'):
        
    """ n_words : vocabulary size
        n_x : word embedding dimension
        n_h : LSTM/GRU number of hidden units 
        max_epochs : The maximum number of epoch to run
        lrate : learning rate
        batch_size : batch size during training
        valid_batch_size : The batch size used for validation/test set
        dropout_val : the probability of dropout
        dispFreq : Display to stdout the training progress every N updates
        validFreq : Compute the validation error after this number of update.
        saveFreq : save results after this number of update.
        saveto : where to save.
    """

    options = {}
    options['n_words'] = n_words
    options['n_x'] = n_x
    options['n_h'] = n_h
    options['max_epochs'] = max_epochs
    options['lrate'] = lrate
    options['batch_size'] = batch_size
    options['valid_batch_size'] = valid_batch_size
    options['dispFreq'] = dispFreq
    options['validFreq'] = validFreq
    options['saveFreq'] = saveFreq
    
    options['n_z'] = img_feats.shape[0]
   
    logger.info('Model options {}'.format(options))
    logger.info('{} train examples'.format(len(train[0])))
    logger.info('{} valid examples'.format(len(valid[0])))
    logger.info('{} test examples'.format(len(test[0])))

    logger.info('Building model...')
    
    params = init_params(options,W)
    tparams = init_tparams(params)

    (use_noise, x, mask, z, f_pred_prob, cost) = build_model(tparams,options)
    
    f_cost = theano.function([x, mask, z], cost, name='f_cost')
    
    lr_theano = tensor.scalar(name='lr')
    ntrain_theano = tensor.scalar(name='ntrain')
    f_grad_shared, f_update = pSGLD(tparams, cost, [x, mask,z], ntrain_theano, lr_theano)

    logger.info('Training model...')

    kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
    kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)
    
    estop = False  # early stop
    history_negll = []
    best_p = None
    best_valid_negll, best_test_negll = 0., 0.
    bad_counter = 0    
    uidx = 0  # the number of update done
    start_time = time.time()
    
    # statistics of data
    train_num_words, valid_num_words, test_num_words = 0, 0, 0
    for sent in train[0]:
        train_num_words = train_num_words + len(sent)
    for sent in valid[0]:
        valid_num_words = valid_num_words + len(sent)
    for sent in test[0]:
        test_num_words = test_num_words + len(sent)
    
    n_average = 0
    valid_probs = np.zeros((valid_num_words,))
    test_probs = np.zeros((test_num_words,)) 
    
    try:
        for eidx in xrange(max_epochs):
            n_samples = 0
            kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)

            for _, train_index in kf:
                uidx += 1
                use_noise.set_value(dropout_val)

                x = [train[0][t]for t in train_index]
                z = np.array([img_feats[:,train[1][t]]for t in train_index])
                
                x, mask = prepare_data(x)
                n_samples += x.shape[1]

                cost = f_grad_shared(x, mask,z)
                f_update(lrate,len(train[0]))

                if np.isnan(cost) or np.isinf(cost):
                    logger.info('NaN detected')
                    return 1., 1., 1.

                if np.mod(uidx, dispFreq) == 0:
                    logger.info('Epoch {} Update {} Cost {}'.format(eidx, uidx, cost))
                    
                if np.mod(uidx, saveFreq) == 0:
                    logger.info('Saving ...')
                
                    if best_p is not None:
                        params = best_p
                    else:
                        params = unzip(tparams)
                    np.savez(saveto, history_negll=history_negll, **params)
                    logger.info('Done ...')

                if np.mod(uidx, validFreq) == 0:
                    use_noise.set_value(0.)
                    
                    if eidx < 3: 
                        valid_negll = calu_negll(f_cost, prepare_data, valid, img_feats, kf_valid)
                        test_negll = calu_negll(f_cost, prepare_data, test, img_feats, kf_test)
                        history_negll.append([valid_negll, test_negll])
                    else:
                        valid_probs_curr = calu_pred_prob(f_pred_prob, prepare_data, valid, img_feats, kf_valid)
                        test_probs_curr = calu_pred_prob(f_pred_prob, prepare_data, test, img_feats, kf_test)
                        valid_probs = (n_average * valid_probs + valid_probs_curr)/(n_average+1) 
                        test_probs = (n_average * test_probs + test_probs_curr)/(n_average+1) 
                        n_average += 1
                        
                        valid_negll = -np.log(valid_probs + 1e-6).sum() / valid_num_words
                        test_negll = -np.log(test_probs + 1e-6).sum() / test_num_words
                        history_negll.append([valid_negll, test_negll])
                        
                        logger.info('Saving {}th Sample...'.format(n_average))
                        
                        params = unzip(tparams)
                        np.savez('flickr30k_result_psgld_{}.npz'.format(n_average), valid_probs_curr=valid_probs_curr, test_probs_curr=test_probs_curr, **params)
                        logger.info('Done ...')
                        
                    
                    if (uidx == 0 or
                        valid_negll <= np.array(history_negll)[:,0].min()):
                             
                        best_p = unzip(tparams)
                        
                        best_valid_negll = valid_negll
                        best_test_negll = test_negll
                        
                        bad_counter = 0
                        
                    logger.info('Perp: Valid {} Test {}'.format(np.exp(valid_negll), np.exp(test_negll)))

                    if (len(history_negll) > 10 and
                        valid_negll >= np.array(history_negll)[:-10,0].min()):
                            bad_counter += 1
                            if bad_counter > 10:
                                logger.info('Early Stop!')
                                estop = True
                                break

            logger.info('Seen {} samples'.format(n_samples))
            
            if estop:
                break

    except KeyboardInterrupt:
        logger.info('Training interupted')

    end_time = time.time()
    
    if best_p is not None:
        zipp(best_p, tparams)
    else:
        best_p = unzip(tparams)
    
    logger.info('Perp: Valid {} Test {}'.format(np.exp(best_valid_negll), np.exp(best_test_negll)))
    np.savez(saveto, history_negll=history_negll, **best_p)

    
    logger.info('The code run for {} epochs, with {} sec/epochs'.format(eidx + 1, 
                 (end_time - start_time) / (1. * (eidx + 1))))
    
    return best_valid_negll, best_test_negll
예제 #3
0
def train_classifier(train,
                     valid,
                     test,
                     W,
                     n_words=10000,
                     n_x=300,
                     n_h=200,
                     patience=10,
                     max_epochs=50,
                     lrate=0.001,
                     n_train=10000,
                     optimizer='RMSprop',
                     batch_size=50,
                     valid_batch_size=50,
                     dispFreq=10,
                     validFreq=100,
                     saveFreq=500,
                     saveto='mr_pSGLD_dropout.npz'):
    """ train, valid, test : datasets
        W : the word embedding initialization
        n_words : vocabulary size
        n_x : word embedding dimension
        n_h : LSTM/GRU number of hidden units 
        n_z : latent embedding sapce for a sentence 
        patience : Number of epoch to wait before early stop if no progress
        max_epochs : The maximum number of epoch to run
        lrate : learning rate
        optimizer : methods to do optimization
        batch_size : batch size during training
        valid_batch_size : The batch size used for validation/test set
        dispFreq : Display to stdout the training progress every N updates
        validFreq : Compute the validation error after this number of update.
    """

    options = {}
    options['n_words'] = n_words
    options['n_x'] = n_x
    options['n_h'] = n_h
    options['patience'] = patience
    options['max_epochs'] = max_epochs
    options['lrate'] = lrate
    options['optimizer'] = optimizer
    options['batch_size'] = batch_size
    options['valid_batch_size'] = valid_batch_size
    options['dispFreq'] = dispFreq
    options['validFreq'] = validFreq

    logger.info('Model options {}'.format(options))

    logger.info('{} train examples'.format(len(train[0])))
    logger.info('{} valid examples'.format(len(valid[0])))
    logger.info('{} test examples'.format(len(test[0])))

    logger.info('Building model...')

    n_y = np.max(train[1]) + 1
    options['n_y'] = n_y

    params = init_params(options, W)
    tparams = init_tparams(params)

    (use_noise, x, mask, y, f_pred_prob, f_pred,
     cost) = build_model(tparams, options)

    lr_theano = tensor.scalar(name='lr')
    ntrain_theano = tensor.scalar(name='ntrain')
    f_grad_shared, f_update = pSGLD(tparams, cost, [x, mask, y], ntrain_theano,
                                    lr_theano)

    #print 'Training model...'
    logger.info('Training model...')

    kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
    kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)

    estop = False  # early stop
    history_errs = []
    best_train_err, best_valid_err, best_test_err = 0., 0., 0.
    bad_counter = 0
    uidx = 0  # the number of update done
    start_time = time.time()

    n_average = 0
    train_probs = np.zeros((len(train[0]), n_y))
    valid_probs = np.zeros((len(valid[0]), n_y))
    test_probs = np.zeros((len(test[0]), n_y))

    try:
        for eidx in xrange(max_epochs):
            n_samples = 0
            kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)

            for _, train_index in kf:
                uidx += 1
                use_noise.set_value(0.5)

                y = [train[1][t] for t in train_index]
                x = [train[0][t] for t in train_index]

                x, mask, y = prepare_data(x, y)
                n_samples += x.shape[1]

                cost = f_grad_shared(x, mask, y)
                f_update(lrate, n_train)

                if np.isnan(cost) or np.isinf(cost):

                    logger.info('NaN detected')
                    return 1., 1., 1.

                if np.mod(uidx, dispFreq) == 0:

                    logger.info('Epoch {} Update {} Cost {}'.format(
                        eidx, uidx, cost))

                if np.mod(uidx, saveFreq) == 0:
                    logger.info('Saving ...')

                    np.savez(saveto, history_errs=history_errs)

                    logger.info('Done ...')

                if np.mod(uidx, validFreq) == 0:
                    use_noise.set_value(0.)

                    if eidx < 1:
                        train_err = pred_error(f_pred, prepare_data, train, kf)
                        valid_err = pred_error(f_pred, prepare_data, valid,
                                               kf_valid)
                        test_err = pred_error(f_pred, prepare_data, test,
                                              kf_test)
                        history_errs.append([valid_err, test_err, train_err])
                    else:
                        train_probs_curr = pred_probs(f_pred_prob,
                                                      prepare_data, train, kf,
                                                      options)
                        valid_probs_curr = pred_probs(f_pred_prob,
                                                      prepare_data, valid,
                                                      kf_valid, options)
                        test_probs_curr = pred_probs(f_pred_prob, prepare_data,
                                                     test, kf_test, options)
                        train_probs = (n_average * train_probs +
                                       train_probs_curr) / (n_average + 1)
                        valid_probs = (n_average * valid_probs +
                                       valid_probs_curr) / (n_average + 1)
                        test_probs = (n_average * test_probs +
                                      test_probs_curr) / (n_average + 1)
                        n_average += 1

                        train_pred = train_probs.argmax(axis=1)
                        valid_pred = valid_probs.argmax(axis=1)
                        test_pred = test_probs.argmax(axis=1)

                        train_err = (train_pred == np.array(train[1])).sum()
                        train_err = 1. - numpy_floatX(train_err) / len(
                            train[0])

                        valid_err = (valid_pred == np.array(valid[1])).sum()
                        valid_err = 1. - numpy_floatX(valid_err) / len(
                            valid[0])

                        test_err = (test_pred == np.array(test[1])).sum()
                        test_err = 1. - numpy_floatX(test_err) / len(test[0])
                        history_errs.append([valid_err, test_err, train_err])

                    if (uidx == 0 or
                            valid_err <= np.array(history_errs)[:, 0].min()):

                        best_train_err = train_err
                        best_valid_err = valid_err
                        best_test_err = test_err
                        bad_counter = 0

                    logger.info('Train {} Valid {} Test {}'.format(
                        train_err, valid_err, test_err))

                    if (len(history_errs) > patience and valid_err >=
                            np.array(history_errs)[:-patience, 0].min()):
                        bad_counter += 1
                        if bad_counter > patience:

                            logger.info('Early Stop!')
                            estop = True
                            break

            logger.info('Seen {} samples'.format(n_samples))

            if estop:
                break

    except KeyboardInterrupt:

        logger.info('Training interupted')

    end_time = time.time()
    logger.info('Train {} Valid {} Test {}'.format(best_train_err,
                                                   best_valid_err,
                                                   best_test_err))

    np.savez(saveto,
             train_err=best_train_err,
             valid_err=best_valid_err,
             test_err=best_test_err,
             history_errs=history_errs)

    logger.info('The code run for {} epochs, with {} sec/epochs'.format(
        eidx + 1, (end_time - start_time) / (1. * (eidx + 1))))

    #print >> sys.stderr, ('Training took %.1fs' %
    #                      (end_time - start_time))
    return best_train_err, best_valid_err, best_test_err
def train_classifier(train, valid, test, W, n_words=10000, n_x=300, n_h=200, 
    dropout_val=0.5, patience=10, max_epochs=20, lrate=0.0002, 
    batch_size=50, valid_batch_size=50, dispFreq=10, validFreq=100, 
    saveFreq=200, saveto = 'trec_gru_result.npz'):
        
    """ train, valid, test : datasets
        W : the word embedding initialization
        n_words : vocabulary size
        n_x : word embedding dimension
        n_h : LSTM/GRU number of hidden units 
        dropout_val: dropput probability
        patience : Number of epoch to wait before early stop if no progress
        max_epochs : The maximum number of epoch to run
        lrate : learning rate
        batch_size : batch size during training
        valid_batch_size : The batch size used for validation/test set
        dispFreq : Display to stdout the training progress every N updates
        validFreq : Compute the validation error after this number of update.
        saveFreq: save the result after this number of update.
        saveto: where to save the result.
    """

    options = {}
    options['n_words'] = n_words
    options['n_x'] = n_x
    options['n_h'] = n_h
    options['patience'] = patience
    options['max_epochs'] = max_epochs
    options['lrate'] = lrate
    options['batch_size'] = batch_size
    options['valid_batch_size'] = valid_batch_size
    options['dispFreq'] = dispFreq
    options['validFreq'] = validFreq
    
    logger.info('Model options {}'.format(options))
    
    logger.info('{} train examples'.format(len(train[0])))
    logger.info('{} valid examples'.format(len(valid[0])))
    logger.info('{} test examples'.format(len(test[0])))

    logger.info('Building model...')
    
    n_y = np.max(train[1]) + 1
    options['n_y'] = n_y
    
    params = init_params(options,W)
    tparams = init_tparams(params)

    (use_noise, x, mask, y, f_pred_prob, f_pred, cost) = build_model(tparams,options)
    
    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = Adam(tparams, cost, [x, mask, y], lr)

    logger.info('Training model...')
    
    kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
    kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)

    estop = False  # early stop
    history_errs = []
    best_p = None
    bad_counter = 0    
    uidx = 0  # the number of update done
    start_time = time.time()
    
    try:
        for eidx in xrange(max_epochs):
            
            kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)

            for _, train_index in kf:
                uidx += 1
                use_noise.set_value(dropout_val)

                y = [train[1][t] for t in train_index]
                x = [train[0][t]for t in train_index]
                                
                x, mask, y = prepare_data(x, y)

                cost = f_grad_shared(x, mask, y)
                f_update(lrate)

                if np.isnan(cost) or np.isinf(cost):
                    logger.info('NaN detected')
                    return 1., 1., 1.

                if np.mod(uidx, dispFreq) == 0:
                    logger.info('Epoch {} Update {} Cost {}'.format(eidx, uidx, cost))
                    
                if np.mod(uidx, saveFreq) == 0:
                    logger.info('Saving ...')
                    
                    if best_p is not None:
                        params = best_p
                    else:
                        params = unzip(tparams)
                        np.savez(saveto, history_errs=history_errs, **params)
                    
                    logger.info('Done ...')

                if np.mod(uidx, validFreq) == 0:
                    
                    use_noise.set_value(0.)
                    
                    train_err = pred_error(f_pred, prepare_data, train, kf)
                    valid_err = pred_error(f_pred, prepare_data, valid, kf_valid)
                    test_err = pred_error(f_pred, prepare_data, test, kf_test)
                    history_errs.append([valid_err, test_err, train_err])
                   
                        
                    if (uidx == 0 or
                        valid_err <= np.array(history_errs)[:,0].min()):

                        best_p = unzip(tparams)
                        bad_counter = 0

                    logger.info('Train {} Valid {} Test {}'.format(train_err, valid_err, test_err))

                    if (len(history_errs) > patience and
                        valid_err >= np.array(history_errs)[:-patience,0].min()):
                        bad_counter += 1
                        if bad_counter > patience:
                            
                            logger.info('Early Stop!')
                            estop = True
                            break

            if estop:
                break

    except KeyboardInterrupt:
        logger.info('Training interupted')

    end_time = time.time()
    if best_p is not None:
        zipp(best_p, tparams)
    else:
        best_p = unzip(tparams)
    
    use_noise.set_value(0.)
    
    kf_train_sorted = get_minibatches_idx(len(train[0]), batch_size)
    train_err = pred_error(f_pred, prepare_data, train, kf_train_sorted)
    valid_err = pred_error(f_pred, prepare_data, valid, kf_valid)
    test_err = pred_error(f_pred, prepare_data, test, kf_test)
    
    logger.info('Train {} Valid {} Test {}'.format(train_err, valid_err, test_err))
    
    np.savez(saveto, train_err=train_err,
             valid_err=valid_err, test_err=test_err,
             history_errs=history_errs, **best_p)
    
    logger.info('The code run for {} epochs, with {} sec/epochs'.format(eidx + 1, 
                 (end_time - start_time) / (1. * (eidx + 1))))
    
    return train_err, valid_err, test_err
예제 #5
0
def train_model(train_x,
                train_y,
                valid_x,
                valid_y,
                test_x,
                test_y,
                n_words=10000,
                n_x=300,
                n_h=1500,
                max_epochs=55,
                collect_epoch=4,
                lrate=1,
                anneal_lr_epoch=15,
                anneal_lr_factor=1.15,
                dropout_val=0.65,
                batch_size=32,
                valid_batch_size=64,
                dispFreq=10,
                validFreq=400,
                saveFreq=1000,
                saveto='ptb_result_large_sgld_with_dropout.npz'):
    """ n_words : vocabulary size
        n_x : word embedding dimension
        n_h : LSTM/GRU number of hidden units 
        n_z : latent embedding sapce for a sentence 
        patience : Number of epoch to wait before early stop if no progress
        max_epochs : The maximum number of epoch to run
        lrate : learning rate
        optimizer : methods to do optimization
        batch_size : batch size during training
        valid_batch_size : The batch size used for validation/test set
        use_dropout : whether use dropout or not
        dispFreq : Display to stdout the training progress every N updates
        validFreq : Compute the validation error after this number of update.
        test_size : If >0, we keep only this number of test example.
    """

    options = {}
    options['n_words'] = n_words
    options['n_x'] = n_x
    options['n_h'] = n_h
    options['max_epochs'] = max_epochs
    options['lrate'] = lrate
    options['batch_size'] = batch_size
    options['valid_batch_size'] = valid_batch_size
    options['dispFreq'] = dispFreq
    options['validFreq'] = validFreq
    options['saveFreq'] = saveFreq

    logger.info('Model options {}'.format(options))
    logger.info('Building model...')

    params = init_params(options)
    tparams = init_tparams(params)

    use_noise, x, y, f_pred_prob, cost = build_model(tparams, options)

    f_cost = theano.function([x, y], cost, name='f_cost')

    lr_theano = tensor.scalar(name='lr')
    ntrain_theano = tensor.scalar(name='ntrain')
    f_grad_shared, f_update = SGLD(tparams, cost, [x, y], ntrain_theano,
                                   lr_theano)

    logger.info('Training model...')

    kf_valid = get_minibatches_idx(valid_x.shape[0], valid_batch_size)
    kf_test = get_minibatches_idx(test_x.shape[0], valid_batch_size)

    estop = False  # early stop
    history_negll = []
    best_p = None
    best_valid_negll, best_test_negll = 0., 0.
    bad_counter = 0
    uidx = 0  # the number of update done
    start_time = time.time()

    # statistics of data
    train_num_words = train_x.shape[0] * train_x.shape[1]
    valid_num_words = valid_x.shape[0] * valid_x.shape[1]
    test_num_words = test_x.shape[0] * test_x.shape[1]

    n_average = 0
    valid_probs = np.zeros((valid_num_words, ))
    test_probs = np.zeros((test_num_words, ))

    try:
        for eidx in xrange(max_epochs):
            n_samples = 0
            kf = get_minibatches_idx(train_x.shape[0],
                                     batch_size,
                                     shuffle=True)

            if eidx >= anneal_lr_epoch:
                #annealing learning rate
                lrate = lrate / anneal_lr_factor

            for _, train_index in kf:
                uidx += 1
                use_noise.set_value(dropout_val)

                x = train_x[train_index].T
                y = train_y[train_index].T

                n_samples += x.shape[1]

                cost = f_grad_shared(x, y)
                f_update(lrate, train_num_words)

                if np.isnan(cost) or np.isinf(cost):

                    logger.info('NaN detected')
                    return 1., 1., 1.

                if np.mod(uidx, dispFreq) == 0:

                    logger.info('Epoch {} Update {} Cost {}'.format(
                        eidx, uidx, np.exp(cost)))

                if np.mod(uidx, saveFreq) == 0:

                    logger.info('Saving ...')

                    if best_p is not None:
                        params = best_p
                    else:
                        params = unzip(tparams)
                    np.savez(saveto, history_negll=history_negll, **params)

                    logger.info('Done ...')

                if np.mod(uidx, validFreq) == 0:
                    use_noise.set_value(0.)

                    if eidx < collect_epoch:
                        valid_negll = calu_negll(f_cost, valid_x, valid_y,
                                                 kf_valid)
                        test_negll = calu_negll(f_cost, test_x, test_y,
                                                kf_test)
                        history_negll.append([valid_negll, test_negll])
                    else:
                        valid_probs_curr = calu_pred_prob(
                            f_pred_prob, valid_x, valid_y, kf_valid)
                        test_probs_curr = calu_pred_prob(
                            f_pred_prob, test_x, test_y, kf_test)

                        valid_probs = (n_average * valid_probs +
                                       valid_probs_curr) / (n_average + 1)
                        test_probs = (n_average * test_probs +
                                      test_probs_curr) / (n_average + 1)
                        n_average += 1

                        valid_negll = -np.log(valid_probs +
                                              1e-6).sum() / valid_num_words
                        test_negll = -np.log(test_probs +
                                             1e-6).sum() / test_num_words
                        history_negll.append([valid_negll, test_negll])

                        logger.info('Saving {}th Sample...'.format(n_average))

                        params = unzip(tparams)
                        np.savez(
                            'ptb_result_sgld_large_{}.npz'.format(n_average),
                            valid_probs_curr=valid_probs_curr,
                            test_probs_curr=test_probs_curr,
                            **params)
                        logger.info('Done ...')

                    if (uidx == 0 or valid_negll <=
                            np.array(history_negll)[:, 0].min()):

                        best_p = unzip(tparams)

                        best_valid_negll = valid_negll
                        best_test_negll = test_negll

                        bad_counter = 0

                    logger.info('Valid {} Test {}'.format(
                        np.exp(valid_negll), np.exp(test_negll)))

                    if (len(history_negll) > 10 and valid_negll >=
                            np.array(history_negll)[:-10, 0].min()):
                        bad_counter += 1
                        if bad_counter > 10:
                            logger.info('Early Stop!')
                            estop = True
                            break

            logger.info('Seen {} samples'.format(n_samples))

            if estop:
                break

    except KeyboardInterrupt:

        logger.info('Training interupted')

    end_time = time.time()

    if best_p is not None:
        zipp(best_p, tparams)
    else:
        best_p = unzip(tparams)


#    use_noise.set_value(0.)
#    kf_train_sorted = get_minibatches_idx(len(train), batch_size)
#    train_negll = calu_negll(f_cost, prepare_data, train, kf_train_sorted)
#    valid_negll = calu_negll(f_cost, prepare_data, valid, kf_valid)
#    test_negll = calu_negll(f_cost, prepare_data, test, kf_test)

    logger.info('Valid {} Test {}'.format(np.exp(best_valid_negll),
                                          np.exp(best_test_negll)))
    np.savez(saveto, history_negll=history_negll, **best_p)

    logger.info('The code run for {} epochs, with {} sec/epochs'.format(
        eidx + 1, (end_time - start_time) / (1. * (eidx + 1))))

    return best_valid_negll, best_test_negll
예제 #6
0
def train_model(train, val, test, n_words=21103, img_w=300, max_len=40, 
    feature_maps=200, filter_hs=[3,4,5], n_x=300, n_h=600, 
    max_epochs=8, lrate=0.0002, batch_size=64, valid_batch_size=64, dispFreq=10, 
    validFreq=500, saveFreq=1000, saveto = 'bookcorpus_result.npz'):
        
    """ train, valid, test : datasets
        n_words : vocabulary size
        img_w : word embedding dimension, must be 300.
        max_len : the maximum length of a sentence 
        feature_maps : the number of feature maps we used 
        filter_hs: the filter window sizes we used
        n_x: word embedding dimension
        n_h: the number of hidden units in LSTM        
        max_epochs : the maximum number of epoch to run
        lrate : learning rate
        batch_size : batch size during training
        valid_batch_size : The batch size used for validation/test set
        dispFreq : Display to stdout the training progress every N updates
        validFreq : Compute the validation error after this number of update.
        saveFreq: save the result after this number of update.
        saveto: where to save the result.
    """
    
    img_h = max_len + 2*(filter_hs[-1]-1)
    
    options = {}
    options['n_words'] = n_words
    options['img_w'] = img_w
    options['img_h'] = img_h
    options['feature_maps'] = feature_maps
    options['filter_hs'] = filter_hs
    options['n_x'] = n_x
    options['n_h'] = n_h
    options['max_epochs'] = max_epochs
    options['lrate'] = lrate
    options['batch_size'] = batch_size
    options['valid_batch_size'] = valid_batch_size
    options['dispFreq'] = dispFreq
    options['validFreq'] = validFreq
    options['saveFreq'] = saveFreq
   
    logger.info('Model options {}'.format(options))

    logger.info('Building model...')
    
    filter_w = img_w
    filter_shapes = []
    pool_sizes = []
    for filter_h in filter_hs:
        filter_shapes.append((feature_maps, 1, filter_h, filter_w))
        pool_sizes.append((img_h-filter_h+1, img_w-filter_w+1))
        
    options['filter_shapes'] = filter_shapes
    options['pool_sizes'] = pool_sizes
    
    params = init_params(options)
    tparams = init_tparams(params)

    use_noise, x, y, y_mask, cost = build_model(tparams,options)
    
    f_cost = theano.function([x, y, y_mask], cost, name='f_cost')
    
    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = Adam(tparams, cost, [x, y, y_mask], lr)
    
    logger.info('Training model...')
    
    history_cost = []  
    uidx = 0  # the number of update done
    start_time = time.time()
    
    kf_valid = get_minibatches_idx(len(val), valid_batch_size)
    
    zero_vec_tensor = tensor.vector()
    zero_vec = np.zeros(img_w).astype(theano.config.floatX)
    set_zero = theano.function([zero_vec_tensor], updates=[(tparams['Wemb'], tensor.set_subtensor(tparams['Wemb'][21102,:], zero_vec_tensor))])
    
    try:
        for eidx in xrange(max_epochs):
            n_samples = 0
            
            kf = get_minibatches_idx(len(train), batch_size, shuffle=True)

            for _, train_index in kf:
                uidx += 1
                use_noise.set_value(0.)

                sents = [train[t]for t in train_index]
                
                x = prepare_data_for_cnn(sents)
                y, y_mask = prepare_data_for_rnn(sents)
                n_samples += y.shape[1]

                cost = f_grad_shared(x, y, y_mask)
                f_update(lrate)
                # the special <pad_zero> token does not need to update.
                set_zero(zero_vec)

                if np.isnan(cost) or np.isinf(cost):
                    
                    logger.info('NaN detected')
                    return 1., 1., 1.

                if np.mod(uidx, dispFreq) == 0:
                    logger.info('Epoch {} Update {} Cost {}'.format(eidx, uidx, np.exp(cost)))
                
                if np.mod(uidx, saveFreq) == 0:
                    
                    logger.info('Saving ...')
                    
                    params = unzip(tparams)
                    np.savez(saveto, history_cost=history_cost, **params)
                    
                    logger.info('Done ...')

                if np.mod(uidx, validFreq) == 0:
                    use_noise.set_value(0.)
                    
                    valid_cost = calu_cost(f_cost, prepare_data_for_cnn, prepare_data_for_rnn, val, kf_valid)
                    history_cost.append([valid_cost])
                        
                    logger.info('Valid {}'.format(np.exp(valid_cost)))

        logger.info('Seen {} samples'.format(n_samples))

    except KeyboardInterrupt:
        logger.info('Training interupted')

    end_time = time.time()
    
#    if best_p is not None:
#        zipp(best_p, tparams)
#    else:
#        best_p = unzip(tparams)
    
    
    use_noise.set_value(0.)
    valid_cost = calu_cost(f_cost, prepare_data_for_cnn, prepare_data_for_rnn, val, kf_valid)
    logger.info('Valid {}'.format(np.exp(valid_cost)))
    
    params = unzip(tparams)
    np.savez(saveto, history_cost=history_cost, **params)

    
    logger.info('The code run for {} epochs, with {} sec/epochs'.format(eidx + 1, 
                 (end_time - start_time) / (1. * (eidx + 1))))
    
    
    return valid_cost
예제 #7
0
        tmp = cPickle.load(f)

    for keys in params:
        params[str(keys)] = tmp[str(keys)]

    del tmp

    tparams = init_tparams(params)

    (use_noise, x, mask, y, f_pred_prob, f_pred,
     cost) = build_model(tparams, options)

    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = Adam(tparams, cost, [x, mask, y], lr)

    kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
    kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)

    use_noise.set_value(0.)

    kf_train_sorted = get_minibatches_idx(len(train[0]), batch_size)
    train_err = pred_error(f_pred, prepare_data, train, kf_train_sorted)
    valid_err = pred_error(f_pred, prepare_data, valid, kf_valid)
    test_err = pred_error(f_pred, prepare_data, test, kf_test)

    print('Train {} Valid {} Test {}'.format(train_err, valid_err, test_err))

    # =============================================================================

    print("train_err %.2f, valid_err %.2f, test_err %.2f" %
          (train_err, valid_err, test_err))
def train_classifier(train, valid, test, W, n_words=10000, img_w=300, max_len=40, 
    feature_maps=100, filter_hs=[3,4,5], dropout_val=0.5, patience=10, 
    max_epochs=20, lrate=0.0002, batch_size=50, valid_batch_size=50, dispFreq=10, 
    validFreq=100, saveFreq=200, saveto = 'trec_cnn_result.npz'):
        
    """ train, valid, test : datasets
        W : the word embedding initialization
        n_words : vocabulary size
        img_w : word embedding dimension, must be 300.
        max_len : the maximum length of a sentence 
        feature_maps : the number of feature maps we used 
        filter_hs: the filter window sizes we used
	dropout_val: dropput probability
        patience : Number of epoch to wait before early stop if no progress
        max_epochs : The maximum number of epoch to run
        lrate : learning rate
        batch_size : batch size during training
        valid_batch_size : The batch size used for validation/test set
        dispFreq : Display to stdout the training progress every N updates
        validFreq : Compute the validation error after this number of update.
        saveFreq: save the result after this number of update.
        saveto: where to save the result.
    """

    img_h = max_len + 2*(filter_hs[-1]-1)
    
    options = {}
    options['n_words'] = n_words
    options['img_w'] = img_w
    options['img_h'] = img_h
    options['feature_maps'] = feature_maps
    options['filter_hs'] = filter_hs
    options['patience'] = patience
    options['max_epochs'] = max_epochs
    options['lrate'] = lrate
    options['batch_size'] = batch_size
    options['valid_batch_size'] = valid_batch_size
    options['dispFreq'] = dispFreq
    options['validFreq'] = validFreq
    
    logger.info('Model options {}'.format(options))
    
    logger.info('{} train examples'.format(len(train[0])))
    logger.info('{} valid examples'.format(len(valid[0])))
    logger.info('{} test examples'.format(len(test[0])))

    logger.info('Building model...')
    
    n_y = np.max(train[1]) + 1
    options['n_y'] = n_y
    
    """
    Train a simple conv net
    img_h = sentence length (padded where necessary)
    img_w = word vector length (300 for word2vec)
    filter_hs = filter window sizes    
    """ 

    filter_w = img_w
    filter_shapes = []
    pool_sizes = []
    for filter_h in filter_hs:
        filter_shapes.append((feature_maps, 1, filter_h, filter_w))
        pool_sizes.append((img_h-filter_h+1, img_w-filter_w+1))
        
    options['filter_shapes'] = filter_shapes
    options['pool_sizes'] = pool_sizes
    
    params = init_params(options,W)
    tparams = init_tparams(params)

    (use_noise, x, y, f_pred_prob, f_pred, cost) = build_model(tparams,options)
    
    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = Adam(tparams, cost, [x, y], lr)

    logger.info('Training model...')
    
    kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
    kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)

    estop = False  # early stop
    history_errs = []
    best_p = None
    bad_counter = 0    
    uidx = 0  # the number of update done
    start_time = time.time()
    
    zero_vec_tensor = tensor.vector()
    zero_vec = np.zeros(img_w).astype(theano.config.floatX)
    set_zero = theano.function([zero_vec_tensor], updates=[(tparams['Wemb'], tensor.set_subtensor(tparams['Wemb'][n_words-1,:], zero_vec_tensor))])
    
    try:
        for eidx in xrange(max_epochs):

            kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)

            for _, train_index in kf:
                uidx += 1
                use_noise.set_value(dropout_val)

                y = np.array([train[1][t] for t in train_index]).astype('int32')
                x = [train[0][t]for t in train_index]
                x = prepare_data(x,max_len,n_words,filter_hs[-1])

                cost = f_grad_shared(x, y)
                f_update(lrate)
                # the special token does not need to update.
                set_zero(zero_vec)

                if np.isnan(cost) or np.isinf(cost):
                    logger.info('NaN detected')
                    return 1., 1., 1.

                if np.mod(uidx, dispFreq) == 0:
                    logger.info('Epoch {} Update {} Cost {}'.format(eidx, uidx, cost))
                    
                if np.mod(uidx, saveFreq) == 0:
                    logger.info('Saving ...')
                    
                    if best_p is not None:
                        params = best_p
                    else:
                        params = unzip(tparams)
                        np.savez(saveto, history_errs=history_errs, **params)
                    
                    logger.info('Done ...')

                if np.mod(uidx, validFreq) == 0:
                    
                    use_noise.set_value(0.)
                    
                    train_err = pred_error(f_pred, prepare_data, train, kf, max_len,n_words, filter_hs[-1])
                    valid_err = pred_error(f_pred, prepare_data, valid, kf_valid, max_len,n_words, filter_hs[-1])
                    test_err = pred_error(f_pred, prepare_data, test, kf_test, max_len,n_words, filter_hs[-1])
                    history_errs.append([valid_err, test_err, train_err])
                   
                    if (uidx == 0 or
                        valid_err <= np.array(history_errs)[:,0].min()):

                        best_p = unzip(tparams)
                        bad_counter = 0

                    logger.info('Train {} Valid {} Test {}'.format(train_err, valid_err, test_err))

                    if (len(history_errs) > patience and
                        valid_err >= np.array(history_errs)[:-patience,0].min()):
                        bad_counter += 1
                        if bad_counter > patience:
                            
                            logger.info('Early Stop!')
                            estop = True
                            break

            if estop:
                break

    except KeyboardInterrupt:
        logger.info('Training interupted')

    end_time = time.time()
    if best_p is not None:
        zipp(best_p, tparams)
    else:
        best_p = unzip(tparams)
    
    use_noise.set_value(0.)
    
    kf_train_sorted = get_minibatches_idx(len(train[0]), batch_size)
    train_err = pred_error(f_pred, prepare_data, train, kf_train_sorted, max_len,n_words, filter_hs[-1])
    valid_err = pred_error(f_pred, prepare_data, valid, kf_valid, max_len,n_words, filter_hs[-1])
    test_err = pred_error(f_pred, prepare_data, test, kf_test, max_len,n_words, filter_hs[-1])

    logger.info('Train {} Valid {} Test {}'.format(train_err, valid_err, test_err))
    
    np.savez(saveto, train_err=train_err,
             valid_err=valid_err, test_err=test_err,
             history_errs=history_errs, **best_p)
    
    logger.info('The code run for {} epochs, with {} sec/epochs'.format(eidx + 1, 
                 (end_time - start_time) / (1. * (eidx + 1))))
    
    return train_err, valid_err, test_err
예제 #9
0
def train_model(train,
                val,
                test,
                train_lab,
                val_lab,
                test_lab,
                ixtoword,
                n_words=22153,
                period=887,
                img_w=300,
                img_h=148,
                feature_maps=300,
                filter_hs=[3, 4, 5],
                n_x=300,
                n_h=500,
                n_h2_d=200,
                n_h2=900,
                p_lambda_q=0,
                p_lambda_fm=0.001,
                p_lambda_recon=0.001,
                n_codes=2,
                max_epochs=16,
                lr_d=0.0001,
                lr_g=0.00005,
                kde_sigma=1.,
                batch_size=256,
                valid_batch_size=256,
                dim_mmd=32,
                dispFreq=10,
                dg_ratio=1,
                Large=1e3,
                validFreq=500,
                saveFreq=500,
                saveto='disent_result'):
    """ n_words : word vocabulary size
        feature_maps : CNN embedding dimension for each width
        filter_hs : CNN width
        n_h : LSTM/GRU number of hidden units 
        n_h2: discriminative network number of hidden units
        n_gan: number of hidden units in GAN
        n_codes: number of latent codes 
        max_epochs : The maximum number of epoch to run
        lrate : learning rate
        batch_size : batch size during training
        valid_batch_size : The batch size used for validation/test set
        dispFreq : Display to stdout the training progress every N updates
        validFreq : Compute the validation error after this number of update.
    """
    n_gan = len(filter_hs) * feature_maps  # 900

    options = {}
    options['n_words'] = n_words
    options['img_w'] = img_w
    options['img_h'] = img_h
    options['feature_maps'] = feature_maps
    options['filter_hs'] = filter_hs  #band width
    options['n_x'] = n_x
    options['n_h'] = n_h
    options['n_h2'] = n_h2
    options['n_h2_d'] = n_h2_d
    options['n_codes'] = n_codes
    options['lambda_q'] = p_lambda_q
    options['lambda_fm'] = p_lambda_fm  # weight for feature matching
    options['lambda_recon'] = p_lambda_recon
    options['L'] = Large
    options['max_epochs'] = max_epochs
    options['lr_d'] = lr_d
    options['lr_g'] = lr_g
    options['kde_sigma'] = kde_sigma
    options['batch_size'] = batch_size
    options['valid_batch_size'] = valid_batch_size
    options['dispFreq'] = dispFreq
    options['validFreq'] = validFreq
    options['saveFreq'] = saveFreq
    options['dg_ratio'] = dg_ratio

    options['n_gan'] = n_gan
    options['debug'] = False
    options[
        'feature_match'] = 'mmd'  #'mmd' #' mmd_h','mmd_ld' #'JSD_acc'  # moment  #None  #
    options['shareLSTM'] = True
    options['delta'] = 0.00
    options['sharedEmb'] = False
    options['cnn_activation'] = 'tanh'  # tanh
    options['sigma_range'] = [20]  # range of sigma for mmd
    options['diag'] = 0.1  # diagonal matrix added on cov for JSD_acc
    options['label_smoothing'] = 0.01
    options['dim_mmd'] = dim_mmd
    options['force_cut'] = 'None'
    options['batch_norm'] = False
    options['wgan'] = False
    options['cutoff'] = 0.01

    options['max_step'] = 60
    options['period'] = period

    logger.info('Model options {}'.format(options))

    logger.info('Building model...')

    filter_w = img_w
    filter_shapes = []
    pool_sizes = []
    for filter_h in filter_hs:
        filter_shapes.append((feature_maps, 1, filter_h, filter_w))
        pool_sizes.append((img_h - filter_h + 1, img_w - filter_w + 1))

    options['filter_shapes'] = filter_shapes
    options['pool_sizes'] = pool_sizes
    # generative model for GAN

    ## modified
    n_label = len(set(train_lab))
    options['label_sizes'] = n_label

    n_feature = len(options['filter_hs']) * options['feature_maps']

    options['input_shape'] = (n_h2_d, n_feature)
    options['pred_shape'] = (1, n_h2_d)

    if options['feature_match'] == 'mmd_ld':
        options['mmd_shape'] = (dim_mmd, n_h2_d)

    options['propose_shape'] = (n_codes, n_h2_d)

    # if options['reverse']:
    options['input_recon_shape'] = (n_h2, n_feature)
    options['recon_shape'] = (n_gan,
                              n_h2) if options['shareLSTM'] else (n_gan + 1,
                                                                  n_h2)
    ##

    d_params_s, g_params_s, s_params_s = init_params(options)
    d_params, g_params, s_params = init_tparams(d_params_s, g_params_s,
                                                s_params_s, options)
    lr_d_t = tensor.scalar(name='lr_d')
    lr_g_t = tensor.scalar(name='lr_g')

    use_noise, use_noise2, x, z, d_cost, g_cost, r_cost, fake_recon, acc_fake_xx, acc_real_xx, acc_fake_mean, acc_real_mean, wtf1, wtf2, wtf3, wtf4, wtf5, wtf6, KDE, KDE_input = build_model(
        d_params, g_params, s_params, options)  # change
    f_cost = theano.function([x, z], [d_cost, g_cost, KDE, KDE_input],
                             name='f_cost')
    #f_print = theano.function([x, z],[ wtf1, wtf2, wtf3, wtf4, wtf5, wtf6, KDE, KDE_input], name='f_print',on_unused_input='ignore')
    f_print = theano.function([x, z], [wtf1, wtf2, wtf3, wtf4, wtf5, wtf6],
                              name='f_print')
    f_recon = theano.function([x, z], [r_cost, fake_recon, d_cost],
                              name='f_recon',
                              on_unused_input='ignore')

    if options['feature_match']:
        ss_updates = [(s_params['acc_fake_xx'], acc_fake_xx),
                      (s_params['acc_real_xx'], acc_real_xx),
                      (s_params['acc_fake_mean'], acc_fake_mean),
                      (s_params['acc_real_mean'], acc_real_mean),
                      (s_params['seen_size'],
                       s_params['seen_size'] + options['batch_size'])]
        f_update_ss = theano.function([x, z], s_params, updates=ss_updates)

    f_cost_d, _train_d = Adam(d_params, d_cost, [x, z], lr_d_t)
    if options['feature_match']:
        f_cost_g, _train_g = Adam(g_params, g_cost, [x, z], lr_g_t)
    else:
        f_cost_g, _train_g = Adam(g_params, g_cost, [z], lr_g_t)

    ##

    logger.info('Training model...')

    history_cost = []
    uidx = 0  # the number of update done
    kdes = np.zeros(10)
    kde_std = 0.  # standard deviation of every 10 kde_input
    kde_mean = 0.

    start_time = time.time()

    kf_valid = get_minibatches_idx(len(val), valid_batch_size)
    y_min = min(train_lab)
    train_lab = [t - y_min for t in train_lab]
    val_lab = [t - y_min for t in val_lab]
    test_lab = [t - y_min for t in test_lab]
    testset = [prepare_for_bleu(s) for s in test[:1000]]
    try:
        for eidx in xrange(max_epochs):
            n_samples = 0

            kf = get_minibatches_idx(len(train), batch_size, shuffle=True)

            for _, train_index in kf:
                uidx += 1
                use_noise.set_value(0.0)
                use_noise2.set_value(0.0)
                sents = [train[t] for t in train_index]

                x = prepare_data_for_cnn(sents)
                n_samples += x.shape[0]

                if options['shareLSTM']:
                    z = np.random.uniform(
                        -1, 1, (batch_size, n_gan)).astype('float32')
                else:
                    z = np.random.uniform(
                        -1, 1, (batch_size, n_gan + 1)).astype('float32')

                z[:, 0] = np.random.randint(n_codes,
                                            size=batch_size).astype('float32')

                # update gradient
                if options['feature_match']:
                    cost_g = f_cost_g(x, z)
                else:
                    cost_g = f_cost_g(z)

                if np.isnan(cost_g):

                    logger.info('NaN detected')
                    temp_out = f_print(x, z)

                    print 'real' + str(temp_out[0]) + ' fake' + str(
                        temp_out[1])
                    return 1., 1., 1.

                if np.isinf(cost_g):
                    temp_out = f_print(x, z)
                    print 'real' + str(temp_out[0]) + ' fake' + str(
                        temp_out[1])
                    logger.info('Inf detected')
                    return 1., 1., 1.

                # update G
                _train_g(lr_g)

                if np.mod(uidx, dispFreq) == 0:
                    temp_out = f_print(x, z)
                    _, _, cost_d = f_recon(x, z)

                    np.set_printoptions(precision=3)
                    np.set_printoptions(threshold=np.inf)

                    print 'real ' + str(round(
                        temp_out[0], 2)) + ' fake ' + str(round(
                            temp_out[1], 2)) + ' Covariance loss ' + str(
                                round(temp_out[3], 2)) + ' mean loss ' + str(
                                    round(temp_out[5], 2))
                    print 'cost_g ' + str(cost_g) + ' cost_d ' + str(cost_d)
                    print(
                        "Generated:" + " ".join(
                            [ixtoword[x] for x in temp_out[2][0] if x != 0]))

                    logger.info(
                        'Epoch {} Update {} Cost G {} Real {} Fake {} loss_cov {}  meanMSE {}'
                        .format(eidx, uidx, cost_g, round(temp_out[0], 2),
                                round(temp_out[1], 2), temp_out[3],
                                temp_out[5]))
                    logger.info('Generated: {}'.format(" ".join(
                        [ixtoword[x] for x in temp_out[2][0] if x != 0])))

                if np.mod(uidx, dg_ratio) == 0:
                    x = prepare_data_for_cnn(sents)
                    cost_d = f_cost_d(x, z)
                    _train_d(lr_d)

                    if np.mod(uidx, dispFreq) == 0:
                        logger.info('Cost D {}'.format(cost_d))

                if np.mod(uidx, saveFreq) == 0:

                    logger.info('Saving ...')

                    d_params_s = unzip(d_params)
                    g_params_s = unzip(g_params)
                    params_d = OrderedDict()
                    params_g = OrderedDict()
                    for kk, pp in d_params_s.iteritems():
                        params_d[kk] = np.asarray(d_params_s[kk])
                    for kk, pp in g_params_s.iteritems():
                        params_g[kk] = np.asarray(g_params_s[kk])

                    np.savez(saveto + '_d.npz',
                             history_cost=history_cost,
                             options=options,
                             **params_d)
                    np.savez(saveto + '_g.npz',
                             history_cost=history_cost,
                             options=options,
                             **params_g)

                    logger.info('Done ...')

                if np.mod(uidx, validFreq) == 0:
                    use_noise.set_value(0.)
                    use_noise2.set_value(0.)
                    if options['shareLSTM']:
                        val_z = np.random.uniform(
                            -1, 1, (batch_size, n_gan)).astype('float32')
                    else:
                        val_z = np.random.uniform(
                            -1, 1, (batch_size, n_gan + 1)).astype('float32')

                    temp_out = f_print(x, val_z)
                    predset = temp_out[2]
                    [bleu2s, bleu3s,
                     bleu4s] = cal_BLEU([prepare_for_bleu(s) for s in predset],
                                        {0: testset})

                    logger.info(
                        'Valid BLEU2 = {}, BLEU3 = {}, BLEU4 = {}'.format(
                            bleu2s, bleu3s, bleu4s))
                    print 'Valid BLEU (2,3,4): ' + ' '.join(
                        [str(round(it, 3)) for it in (bleu2s, bleu3s, bleu4s)])

                if options['feature_match']:
                    f_update_ss(x, z)

        logger.info('Seen {} samples'.format(n_samples))

    except KeyboardInterrupt:
        logger.info('Training interrupted')

    end_time = time.time()

    logger.info('The code run for {} epochs, with {} sec/epochs'.format(
        eidx + 1, (end_time - start_time) / (1. * (eidx + 1))))

    return valid_cost