def train_model(train, valid, test, img_feats, W, n_words=7414, n_x=300, n_h=512,
    max_epochs=20, lrate=0.001, batch_size=64, valid_batch_size=64, dropout_val=0.5,
    dispFreq=10, validFreq=500, saveFreq=1000, saveto = 'flickr30k_result_psgld_dropout.npz'):
        
    """ n_words : vocabulary size
        n_x : word embedding dimension
        n_h : LSTM/GRU number of hidden units 
        max_epochs : The maximum number of epoch to run
        lrate : learning rate
        batch_size : batch size during training
        valid_batch_size : The batch size used for validation/test set
        dropout_val : the probability of dropout
        dispFreq : Display to stdout the training progress every N updates
        validFreq : Compute the validation error after this number of update.
        saveFreq : save results after this number of update.
        saveto : where to save.
    """

    options = {}
    options['n_words'] = n_words
    options['n_x'] = n_x
    options['n_h'] = n_h
    options['max_epochs'] = max_epochs
    options['lrate'] = lrate
    options['batch_size'] = batch_size
    options['valid_batch_size'] = valid_batch_size
    options['dispFreq'] = dispFreq
    options['validFreq'] = validFreq
    options['saveFreq'] = saveFreq
    
    options['n_z'] = img_feats.shape[0]
   
    logger.info('Model options {}'.format(options))
    logger.info('{} train examples'.format(len(train[0])))
    logger.info('{} valid examples'.format(len(valid[0])))
    logger.info('{} test examples'.format(len(test[0])))

    logger.info('Building model...')
    
    params = init_params(options,W)
    tparams = init_tparams(params)

    (use_noise, x, mask, z, f_pred_prob, cost) = build_model(tparams,options)
    
    f_cost = theano.function([x, mask, z], cost, name='f_cost')
    
    lr_theano = tensor.scalar(name='lr')
    ntrain_theano = tensor.scalar(name='ntrain')
    f_grad_shared, f_update = pSGLD(tparams, cost, [x, mask,z], ntrain_theano, lr_theano)

    logger.info('Training model...')

    kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
    kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)
    
    estop = False  # early stop
    history_negll = []
    best_p = None
    best_valid_negll, best_test_negll = 0., 0.
    bad_counter = 0    
    uidx = 0  # the number of update done
    start_time = time.time()
    
    # statistics of data
    train_num_words, valid_num_words, test_num_words = 0, 0, 0
    for sent in train[0]:
        train_num_words = train_num_words + len(sent)
    for sent in valid[0]:
        valid_num_words = valid_num_words + len(sent)
    for sent in test[0]:
        test_num_words = test_num_words + len(sent)
    
    n_average = 0
    valid_probs = np.zeros((valid_num_words,))
    test_probs = np.zeros((test_num_words,)) 
    
    try:
        for eidx in xrange(max_epochs):
            n_samples = 0
            kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)

            for _, train_index in kf:
                uidx += 1
                use_noise.set_value(dropout_val)

                x = [train[0][t]for t in train_index]
                z = np.array([img_feats[:,train[1][t]]for t in train_index])
                
                x, mask = prepare_data(x)
                n_samples += x.shape[1]

                cost = f_grad_shared(x, mask,z)
                f_update(lrate,len(train[0]))

                if np.isnan(cost) or np.isinf(cost):
                    logger.info('NaN detected')
                    return 1., 1., 1.

                if np.mod(uidx, dispFreq) == 0:
                    logger.info('Epoch {} Update {} Cost {}'.format(eidx, uidx, cost))
                    
                if np.mod(uidx, saveFreq) == 0:
                    logger.info('Saving ...')
                
                    if best_p is not None:
                        params = best_p
                    else:
                        params = unzip(tparams)
                    np.savez(saveto, history_negll=history_negll, **params)
                    logger.info('Done ...')

                if np.mod(uidx, validFreq) == 0:
                    use_noise.set_value(0.)
                    
                    if eidx < 3: 
                        valid_negll = calu_negll(f_cost, prepare_data, valid, img_feats, kf_valid)
                        test_negll = calu_negll(f_cost, prepare_data, test, img_feats, kf_test)
                        history_negll.append([valid_negll, test_negll])
                    else:
                        valid_probs_curr = calu_pred_prob(f_pred_prob, prepare_data, valid, img_feats, kf_valid)
                        test_probs_curr = calu_pred_prob(f_pred_prob, prepare_data, test, img_feats, kf_test)
                        valid_probs = (n_average * valid_probs + valid_probs_curr)/(n_average+1) 
                        test_probs = (n_average * test_probs + test_probs_curr)/(n_average+1) 
                        n_average += 1
                        
                        valid_negll = -np.log(valid_probs + 1e-6).sum() / valid_num_words
                        test_negll = -np.log(test_probs + 1e-6).sum() / test_num_words
                        history_negll.append([valid_negll, test_negll])
                        
                        logger.info('Saving {}th Sample...'.format(n_average))
                        
                        params = unzip(tparams)
                        np.savez('flickr30k_result_psgld_{}.npz'.format(n_average), valid_probs_curr=valid_probs_curr, test_probs_curr=test_probs_curr, **params)
                        logger.info('Done ...')
                        
                    
                    if (uidx == 0 or
                        valid_negll <= np.array(history_negll)[:,0].min()):
                             
                        best_p = unzip(tparams)
                        
                        best_valid_negll = valid_negll
                        best_test_negll = test_negll
                        
                        bad_counter = 0
                        
                    logger.info('Perp: Valid {} Test {}'.format(np.exp(valid_negll), np.exp(test_negll)))

                    if (len(history_negll) > 10 and
                        valid_negll >= np.array(history_negll)[:-10,0].min()):
                            bad_counter += 1
                            if bad_counter > 10:
                                logger.info('Early Stop!')
                                estop = True
                                break

            logger.info('Seen {} samples'.format(n_samples))
            
            if estop:
                break

    except KeyboardInterrupt:
        logger.info('Training interupted')

    end_time = time.time()
    
    if best_p is not None:
        zipp(best_p, tparams)
    else:
        best_p = unzip(tparams)
    
    logger.info('Perp: Valid {} Test {}'.format(np.exp(best_valid_negll), np.exp(best_test_negll)))
    np.savez(saveto, history_negll=history_negll, **best_p)

    
    logger.info('The code run for {} epochs, with {} sec/epochs'.format(eidx + 1, 
                 (end_time - start_time) / (1. * (eidx + 1))))
    
    return best_valid_negll, best_test_negll
def train_classifier(train, valid, test, W, n_words=10000, n_x=300, n_h=200, 
    dropout_val=0.5, patience=10, max_epochs=20, lrate=0.0002, 
    batch_size=50, valid_batch_size=50, dispFreq=10, validFreq=100, 
    saveFreq=200, saveto = 'trec_gru_result.npz'):
        
    """ train, valid, test : datasets
        W : the word embedding initialization
        n_words : vocabulary size
        n_x : word embedding dimension
        n_h : LSTM/GRU number of hidden units 
        dropout_val: dropput probability
        patience : Number of epoch to wait before early stop if no progress
        max_epochs : The maximum number of epoch to run
        lrate : learning rate
        batch_size : batch size during training
        valid_batch_size : The batch size used for validation/test set
        dispFreq : Display to stdout the training progress every N updates
        validFreq : Compute the validation error after this number of update.
        saveFreq: save the result after this number of update.
        saveto: where to save the result.
    """

    options = {}
    options['n_words'] = n_words
    options['n_x'] = n_x
    options['n_h'] = n_h
    options['patience'] = patience
    options['max_epochs'] = max_epochs
    options['lrate'] = lrate
    options['batch_size'] = batch_size
    options['valid_batch_size'] = valid_batch_size
    options['dispFreq'] = dispFreq
    options['validFreq'] = validFreq
    
    logger.info('Model options {}'.format(options))
    
    logger.info('{} train examples'.format(len(train[0])))
    logger.info('{} valid examples'.format(len(valid[0])))
    logger.info('{} test examples'.format(len(test[0])))

    logger.info('Building model...')
    
    n_y = np.max(train[1]) + 1
    options['n_y'] = n_y
    
    params = init_params(options,W)
    tparams = init_tparams(params)

    (use_noise, x, mask, y, f_pred_prob, f_pred, cost) = build_model(tparams,options)
    
    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = Adam(tparams, cost, [x, mask, y], lr)

    logger.info('Training model...')
    
    kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
    kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)

    estop = False  # early stop
    history_errs = []
    best_p = None
    bad_counter = 0    
    uidx = 0  # the number of update done
    start_time = time.time()
    
    try:
        for eidx in xrange(max_epochs):
            
            kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)

            for _, train_index in kf:
                uidx += 1
                use_noise.set_value(dropout_val)

                y = [train[1][t] for t in train_index]
                x = [train[0][t]for t in train_index]
                                
                x, mask, y = prepare_data(x, y)

                cost = f_grad_shared(x, mask, y)
                f_update(lrate)

                if np.isnan(cost) or np.isinf(cost):
                    logger.info('NaN detected')
                    return 1., 1., 1.

                if np.mod(uidx, dispFreq) == 0:
                    logger.info('Epoch {} Update {} Cost {}'.format(eidx, uidx, cost))
                    
                if np.mod(uidx, saveFreq) == 0:
                    logger.info('Saving ...')
                    
                    if best_p is not None:
                        params = best_p
                    else:
                        params = unzip(tparams)
                        np.savez(saveto, history_errs=history_errs, **params)
                    
                    logger.info('Done ...')

                if np.mod(uidx, validFreq) == 0:
                    
                    use_noise.set_value(0.)
                    
                    train_err = pred_error(f_pred, prepare_data, train, kf)
                    valid_err = pred_error(f_pred, prepare_data, valid, kf_valid)
                    test_err = pred_error(f_pred, prepare_data, test, kf_test)
                    history_errs.append([valid_err, test_err, train_err])
                   
                        
                    if (uidx == 0 or
                        valid_err <= np.array(history_errs)[:,0].min()):

                        best_p = unzip(tparams)
                        bad_counter = 0

                    logger.info('Train {} Valid {} Test {}'.format(train_err, valid_err, test_err))

                    if (len(history_errs) > patience and
                        valid_err >= np.array(history_errs)[:-patience,0].min()):
                        bad_counter += 1
                        if bad_counter > patience:
                            
                            logger.info('Early Stop!')
                            estop = True
                            break

            if estop:
                break

    except KeyboardInterrupt:
        logger.info('Training interupted')

    end_time = time.time()
    if best_p is not None:
        zipp(best_p, tparams)
    else:
        best_p = unzip(tparams)
    
    use_noise.set_value(0.)
    
    kf_train_sorted = get_minibatches_idx(len(train[0]), batch_size)
    train_err = pred_error(f_pred, prepare_data, train, kf_train_sorted)
    valid_err = pred_error(f_pred, prepare_data, valid, kf_valid)
    test_err = pred_error(f_pred, prepare_data, test, kf_test)
    
    logger.info('Train {} Valid {} Test {}'.format(train_err, valid_err, test_err))
    
    np.savez(saveto, train_err=train_err,
             valid_err=valid_err, test_err=test_err,
             history_errs=history_errs, **best_p)
    
    logger.info('The code run for {} epochs, with {} sec/epochs'.format(eidx + 1, 
                 (end_time - start_time) / (1. * (eidx + 1))))
    
    return train_err, valid_err, test_err
def train_classifier(train, valid, test, W, n_words=10000, img_w=300, max_len=40, 
    feature_maps=100, filter_hs=[3,4,5], dropout_val=0.5, patience=10, 
    max_epochs=20, lrate=0.0002, batch_size=50, valid_batch_size=50, dispFreq=10, 
    validFreq=100, saveFreq=200, saveto = 'trec_cnn_result.npz'):
        
    """ train, valid, test : datasets
        W : the word embedding initialization
        n_words : vocabulary size
        img_w : word embedding dimension, must be 300.
        max_len : the maximum length of a sentence 
        feature_maps : the number of feature maps we used 
        filter_hs: the filter window sizes we used
	dropout_val: dropput probability
        patience : Number of epoch to wait before early stop if no progress
        max_epochs : The maximum number of epoch to run
        lrate : learning rate
        batch_size : batch size during training
        valid_batch_size : The batch size used for validation/test set
        dispFreq : Display to stdout the training progress every N updates
        validFreq : Compute the validation error after this number of update.
        saveFreq: save the result after this number of update.
        saveto: where to save the result.
    """

    img_h = max_len + 2*(filter_hs[-1]-1)
    
    options = {}
    options['n_words'] = n_words
    options['img_w'] = img_w
    options['img_h'] = img_h
    options['feature_maps'] = feature_maps
    options['filter_hs'] = filter_hs
    options['patience'] = patience
    options['max_epochs'] = max_epochs
    options['lrate'] = lrate
    options['batch_size'] = batch_size
    options['valid_batch_size'] = valid_batch_size
    options['dispFreq'] = dispFreq
    options['validFreq'] = validFreq
    
    logger.info('Model options {}'.format(options))
    
    logger.info('{} train examples'.format(len(train[0])))
    logger.info('{} valid examples'.format(len(valid[0])))
    logger.info('{} test examples'.format(len(test[0])))

    logger.info('Building model...')
    
    n_y = np.max(train[1]) + 1
    options['n_y'] = n_y
    
    """
    Train a simple conv net
    img_h = sentence length (padded where necessary)
    img_w = word vector length (300 for word2vec)
    filter_hs = filter window sizes    
    """ 

    filter_w = img_w
    filter_shapes = []
    pool_sizes = []
    for filter_h in filter_hs:
        filter_shapes.append((feature_maps, 1, filter_h, filter_w))
        pool_sizes.append((img_h-filter_h+1, img_w-filter_w+1))
        
    options['filter_shapes'] = filter_shapes
    options['pool_sizes'] = pool_sizes
    
    params = init_params(options,W)
    tparams = init_tparams(params)

    (use_noise, x, y, f_pred_prob, f_pred, cost) = build_model(tparams,options)
    
    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = Adam(tparams, cost, [x, y], lr)

    logger.info('Training model...')
    
    kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
    kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)

    estop = False  # early stop
    history_errs = []
    best_p = None
    bad_counter = 0    
    uidx = 0  # the number of update done
    start_time = time.time()
    
    zero_vec_tensor = tensor.vector()
    zero_vec = np.zeros(img_w).astype(theano.config.floatX)
    set_zero = theano.function([zero_vec_tensor], updates=[(tparams['Wemb'], tensor.set_subtensor(tparams['Wemb'][n_words-1,:], zero_vec_tensor))])
    
    try:
        for eidx in xrange(max_epochs):

            kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)

            for _, train_index in kf:
                uidx += 1
                use_noise.set_value(dropout_val)

                y = np.array([train[1][t] for t in train_index]).astype('int32')
                x = [train[0][t]for t in train_index]
                x = prepare_data(x,max_len,n_words,filter_hs[-1])

                cost = f_grad_shared(x, y)
                f_update(lrate)
                # the special token does not need to update.
                set_zero(zero_vec)

                if np.isnan(cost) or np.isinf(cost):
                    logger.info('NaN detected')
                    return 1., 1., 1.

                if np.mod(uidx, dispFreq) == 0:
                    logger.info('Epoch {} Update {} Cost {}'.format(eidx, uidx, cost))
                    
                if np.mod(uidx, saveFreq) == 0:
                    logger.info('Saving ...')
                    
                    if best_p is not None:
                        params = best_p
                    else:
                        params = unzip(tparams)
                        np.savez(saveto, history_errs=history_errs, **params)
                    
                    logger.info('Done ...')

                if np.mod(uidx, validFreq) == 0:
                    
                    use_noise.set_value(0.)
                    
                    train_err = pred_error(f_pred, prepare_data, train, kf, max_len,n_words, filter_hs[-1])
                    valid_err = pred_error(f_pred, prepare_data, valid, kf_valid, max_len,n_words, filter_hs[-1])
                    test_err = pred_error(f_pred, prepare_data, test, kf_test, max_len,n_words, filter_hs[-1])
                    history_errs.append([valid_err, test_err, train_err])
                   
                    if (uidx == 0 or
                        valid_err <= np.array(history_errs)[:,0].min()):

                        best_p = unzip(tparams)
                        bad_counter = 0

                    logger.info('Train {} Valid {} Test {}'.format(train_err, valid_err, test_err))

                    if (len(history_errs) > patience and
                        valid_err >= np.array(history_errs)[:-patience,0].min()):
                        bad_counter += 1
                        if bad_counter > patience:
                            
                            logger.info('Early Stop!')
                            estop = True
                            break

            if estop:
                break

    except KeyboardInterrupt:
        logger.info('Training interupted')

    end_time = time.time()
    if best_p is not None:
        zipp(best_p, tparams)
    else:
        best_p = unzip(tparams)
    
    use_noise.set_value(0.)
    
    kf_train_sorted = get_minibatches_idx(len(train[0]), batch_size)
    train_err = pred_error(f_pred, prepare_data, train, kf_train_sorted, max_len,n_words, filter_hs[-1])
    valid_err = pred_error(f_pred, prepare_data, valid, kf_valid, max_len,n_words, filter_hs[-1])
    test_err = pred_error(f_pred, prepare_data, test, kf_test, max_len,n_words, filter_hs[-1])

    logger.info('Train {} Valid {} Test {}'.format(train_err, valid_err, test_err))
    
    np.savez(saveto, train_err=train_err,
             valid_err=valid_err, test_err=test_err,
             history_errs=history_errs, **best_p)
    
    logger.info('The code run for {} epochs, with {} sec/epochs'.format(eidx + 1, 
                 (end_time - start_time) / (1. * (eidx + 1))))
    
    return train_err, valid_err, test_err
Example #4
0
def trainer(train,
            valid,
            test,
            n_chars=33,
            img_w=128,
            max_len=27,
            feature_maps=100,
            filter_hs=[2, 3, 4],
            max_epochs=20,
            gamma=10,
            ncon=50,
            lrate=0.0002,
            batch_size=100,
            dispFreq=10,
            validFreq=100,
            saveto='example.npz'):
    """ train, valid, test : datasets
        n_chars : vocabulary size
        img_w : character embedding dimension.
        max_len : the maximum length of a sentence 
        feature_maps : the number of feature maps we used 
        filter_hs: the filter window sizes we used
        max_epochs : The maximum number of epoch to run
        gamma: hyper-parameter using in ranking
        ncon: the number of negative samples we used for each postive sample
        lrate : learning rate
        batch_size : batch size during training
        dispFreq : Display to stdout the training progress every N updates
        validFreq : Compute the validation rank score after this number of update.
        saveto: where to save the result.
    """

    img_h = max_len + 2 * (filter_hs[-1] - 1)

    model_options = {}
    model_options['n_chars'] = n_chars
    model_options['img_w'] = img_w
    model_options['img_h'] = img_h
    model_options['feature_maps'] = feature_maps
    model_options['filter_hs'] = filter_hs
    model_options['max_epochs'] = max_epochs
    model_options['gamma'] = gamma
    model_options['ncon'] = ncon
    model_options['lrate'] = lrate
    model_options['batch_size'] = batch_size
    model_options['dispFreq'] = dispFreq
    model_options['validFreq'] = validFreq
    model_options['saveto'] = saveto

    logger.info('Model options {}'.format(model_options))

    logger.info('Building model...')

    filter_w = img_w
    filter_shapes = []
    pool_sizes = []
    for filter_h in filter_hs:
        filter_shapes.append((feature_maps, 1, filter_h, filter_w))
        pool_sizes.append((img_h - filter_h + 1, img_w - filter_w + 1))

    model_options['filter_shapes'] = filter_shapes
    model_options['pool_sizes'] = pool_sizes

    params = init_params(model_options)
    tparams = init_tparams(params)

    use_noise, inps, cost = build_model(tparams, model_options)

    logger.info('Building encoder...')
    inps_e, feat_x, feat_y = build_encoder(tparams, model_options)

    logger.info('Building functions...')
    f_emb = theano.function(inps_e, [feat_x, feat_y], name='f_emb')

    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = Adam(tparams, cost, inps, lr)

    logger.info('Training model...')

    uidx = 0
    seed = 1234
    curr = 0
    history_errs = []

    valid_x = prepare_data(valid[0], max_len, n_chars, filter_hs[-1])
    valid_y = prepare_data(valid[1], max_len, n_chars, filter_hs[-1])

    test_x = prepare_data(test[0], max_len, n_chars, filter_hs[-1])
    test_y = prepare_data(test[1], max_len, n_chars, filter_hs[-1])

    zero_vec_tensor = tensor.vector()
    zero_vec = np.zeros(img_w).astype(theano.config.floatX)
    set_zero = theano.function([zero_vec_tensor],
                               updates=[(tparams['Wemb'],
                                         tensor.set_subtensor(
                                             tparams['Wemb'][n_chars - 1, :],
                                             zero_vec_tensor))])

    # Main loop
    for eidx in range(max_epochs):
        prng = RandomState(seed - eidx - 1)

        trainA = train[0]
        trainB = train[1]

        num_samples = len(trainA)

        inds = np.arange(num_samples)
        prng.shuffle(inds)
        numbatches = len(inds) / batch_size
        for minibatch in range(numbatches):
            use_noise.set_value(0.)
            uidx += 1
            conprng = RandomState(seed + uidx + 1)

            x = [trainA[seq] for seq in inds[minibatch::numbatches]]
            y = [trainB[seq] for seq in inds[minibatch::numbatches]]

            cinds = conprng.random_integers(low=0,
                                            high=num_samples - 1,
                                            size=ncon * len(x))
            cy = [trainB[seq] for seq in cinds]

            x = prepare_data(x, max_len, n_chars, filter_hs[-1])
            y = prepare_data(y, max_len, n_chars, filter_hs[-1])
            cy = prepare_data(cy, max_len, n_chars, filter_hs[-1])

            cost = f_grad_shared(x, y, cy)
            f_update(lrate)
            # the special token does not need to update.
            set_zero(zero_vec)

            if np.mod(uidx, dispFreq) == 0:
                logger.info('Epoch {} Update {} Cost {}'.format(
                    eidx, uidx, cost))

            if np.mod(uidx, validFreq) == 0:
                use_noise.set_value(0.)
                logger.info('Computing ranks...')

                feats_x, feats_y = f_emb(valid_x, valid_y)
                (r1, r3, r10, medr, meanr, h_meanr) = rank(feats_x, feats_y)
                history_errs.append([r1, r3, r10, medr, meanr, h_meanr])

                logger.info('Valid Rank:{}, {}, {}, {},{},{}'.format(
                    r1, r3, r10, medr, meanr, h_meanr))

                currscore = r1 + r3 + r10
                if currscore > curr:
                    curr = currscore
                    logger.info('Saving...')
                    params = unzip(tparams)
                    np.savez(saveto, history_errs=history_errs, **params)
                    logger.info('Done...')

    use_noise.set_value(0.)
    zipp(params, tparams)
    logger.info('Final results...')

    feats_x, feats_y = f_emb(valid_x, valid_y)
    (r1, r3, r10, medr, meanr, h_meanr) = rank(feats_x, feats_y)
    logger.info('Valid Rank:{}, {}, {}, {},{},{}'.format(
        r1, r3, r10, medr, meanr, h_meanr))

    feats_x, feats_y = f_emb(test_x, test_y)
    (r1, r3, r10, medr, meanr, h_meanr) = rank(feats_x, feats_y)
    logger.info('Test Rank:{}, {}, {}, {},{},{}'.format(
        r1, r3, r10, medr, meanr, h_meanr))

    # np.savez("./cnn_feats.npz", feats_x=feats_x, feats_y=feats_y)

    return (r1, r3, r10, medr, meanr, h_meanr)
Example #5
0
                        bad_counter += 1
                        if bad_counter > patience:

                            print('Early Stop!')
                            estop = True
                            break

            if estop:
                break

    except KeyboardInterrupt:
        print('Training interupted')

    end_time = time.time()
    if best_p is not None:
        zipp(best_p, tparams)
    else:
        best_p = unzip(tparams)

    use_noise.set_value(0.)

    kf_train_sorted = get_minibatches_idx(len(train[0]), batch_size)
    train_err = pred_error(f_pred, prepare_data, train, kf_train_sorted)
    valid_err = pred_error(f_pred, prepare_data, valid, kf_valid)
    test_err = pred_error(f_pred, prepare_data, test, kf_test)

    print('Train {} Valid {} Test {}'.format(train_err, valid_err, test_err))

    np.savez(saveto,
             train_err=train_err,
             valid_err=valid_err,