def pred_probs(f_pred_prob,
               prepare_data,
               data,
               iterator,
               model_options,
               verbose=False):
    """ If you want to use a trained model, this is useful to compute
    the probabilities of new examples.
    """
    n_samples = len(data[0])
    probs = numpy.zeros((n_samples, 2)).astype('float32')

    n_done = 0

    for _, valid_index in iterator:
        x, y = prepare_data([data[0][t] for t in valid_index],
                            numpy.array(data[1])[valid_index],
                            model_options['n_iter'], model_options['n_input'])
        pred_probs = f_pred_prob(x)
        probs[valid_index, :] = pred_probs

        n_done += len(valid_index)
        if verbose:
            print '%d/%d samples classified' % (n_done, n_samples)

    return probs
def R_score(f_pred,
            prepare_data,
            data,
            iterator,
            model_options,
            verbose=False):
    """
    Just compute the error
    f_pred: Theano fct computing the prediction
    prepare_data: usual prepare_data for that dataset.
    """
    valid_err = 0
    denom = 0
    data_mean = numpy.array(data[1]).mean()
    for _, valid_index in iterator:
        # TODO: This is not very efficient I should check
        x, y = prepare_data([data[0][t] for t in valid_index],
                            numpy.array(data[1])[valid_index],
                            model_options['n_iter'], model_options['n_input'])

        preds = f_pred(x)
        targets = numpy.array(data[1])[valid_index]
        valid_err += tensor.sum((targets - preds.T)**2)
        denom += ((numpy.array(data[1]) - data_mean)**2).sum()
    #valid_err = 1. - numpy.float32(valid_err) / len(data[0])
    valid_err = 1. - (valid_err / denom)

    return valid_err.eval()
def pred_probs(f_pred_prob, prepare_data, data, iterator, model_options, verbose=False):
    """ If you want to use a trained model, this is useful to compute
    the probabilities of new examples.
    """
    n_samples = len(data[0])
    probs = numpy.zeros((n_samples, 2)).astype("float32")

    n_done = 0

    for _, valid_index in iterator:
        x, y = prepare_data(
            [data[0][t] for t in valid_index],
            numpy.array(data[1])[valid_index],
            model_options["n_iter"],
            model_options["n_input"],
            up=True,
        )
        pred_probs = f_pred_prob(x)
        probs[valid_index, :] = pred_probs

        n_done += len(valid_index)
        if verbose:
            print "%d/%d samples classified" % (n_done, n_samples)

    return probs
def pred_error(f_pred,
               prepare_data,
               data,
               iterator,
               model_options,
               verbose=False):
    """
    Just compute the error
    f_pred: Theano fct computing the prediction
    prepare_data: usual prepare_data for that dataset.
    """
    valid_err = 0
    for _, valid_index in iterator:
        # TODO: This is not very efficient I should check
        x, y = prepare_data([data[0][t] for t in valid_index],
                            numpy.array(data[1])[valid_index],
                            model_options['n_iter'],
                            model_options['n_input'],
                            up=True)

        preds_prob = f_pred(x)
        preds = preds_prob.argmax(axis=1)
        targets = numpy.array(data[1])[valid_index]
        valid_err += tensor.sum(tensor.neq(targets, preds))
    #valid_err = 1. - numpy.float32(valid_err) / len(data[0])
    valid_err = float(valid_err.eval())
    return valid_err / float(len(data[0]))
def pred_error(f_pred, prepare_data, data, iterator, model_options, verbose=False):
    """
    Just compute the error
    f_pred: Theano fct computing the prediction
    prepare_data: usual prepare_data for that dataset.
    """
    valid_err = 0
    for _, valid_index in iterator:
        # TODO: This is not very efficient I should check
        x,  y = prepare_data([data[0][t] for t in valid_index],
                                  numpy.array(data[1])[valid_index],
                                  model_options['n_iter'],model_options['n_input'])


        preds = f_pred(x)
        targets = numpy.array(data[1])[valid_index]
        valid_err += tensor.sum((targets-preds.T)**2)
    #valid_err = 1. - numpy.float32(valid_err) / len(data[0])
    valid_err = valid_err / len(data[0])    

    return valid_err.eval()
def pred_error(f_pred, prepare_data, data, iterator, model_options, verbose=False):
    """
    Just compute the error
    f_pred: Theano fct computing the prediction
    prepare_data: usual prepare_data for that dataset.
    """
    valid_err = 0
    for _, valid_index in iterator:
        # TODO: This is not very efficient I should check
        x, y = prepare_data(
            [data[0][t] for t in valid_index],
            numpy.array(data[1])[valid_index],
            model_options["n_iter"],
            model_options["n_input"],
            up=True,
        )

        preds_prob = f_pred(x)
        preds = preds_prob.argmax(axis=1)
        targets = numpy.array(data[1])[valid_index]
        valid_err += tensor.sum(tensor.neq(targets, preds))
    # valid_err = 1. - numpy.float32(valid_err) / len(data[0])
    valid_err = float(valid_err.eval())
    return valid_err / float(len(data[0]))
def train_lstm(
        dim_proj=32,  # word embeding dimension and LSTM number of hidden units.
        patience=10,  # Number of epoch to wait before early stop if no progress
        max_epochs=150,  # The maximum number of epoch to run
        dispFreq=10,  # Display to stdout the training progress every N updates
        decay_c=0.,  # Weight decay for the classifier applied to the U weights.
        lrate=0.1,  # Learning rate for sgd (not used for adadelta and rmsprop)
        n_input=4,  # Vocabulary size
        optimizer=mom_sgd,  # sgd,mom_sgs, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
        encoder='lstm',  # TODO: can be removed must be lstm.
        saveto='lstm_model.npz',  # The best model will be saved there
        validFreq=170,  # Compute the validation error after this number of update.
        saveFreq=1110,  # Save the parameters after every saveFreq updates
        maxlen=100,  # Sequence longer then this get ignored
        batch_size=16,  # The batch size during training.
        valid_batch_size=64,  # The batch size used for validation/test set.
        dataset='imdb',

        # Parameter for extra option
        noise_std=0.,
        use_dropout=False,  # if False slightly faster, but worst test error
        # This frequently need a bigger model.
    reload_model="",  # Path to a saved model we want to start from.
        sum_pool=False,
        mom_start=0.5,
        mom_end=0.99,
        mom_epoch_interval=100,
        learning_rate_decay=0.9995):

    # Model options
    model_options = locals().copy()
    print "model options", model_options

    print 'Loading data'
    ydim = 1
    n_iter = 10

    train, valid, test = read_data(max_len=n_iter)

    #YDIM??
    #number of labels (output)

    model_options['ydim'] = ydim
    model_options['n_iter'] = n_iter

    theano.config.optimizer = 'None'

    print 'Building model'
    # This create the initial parameters as numpy ndarrays.
    # Dict name (string) -> numpy ndarray
    params = init_params(model_options)

    if reload_model:
        load_params('lstm_model.npz', params)

    # This create Theano Shared Variable from the parameters.
    # Dict name (string) -> Theano Tensor Shared Variable
    # params and tparams have different copy of the weights.
    tparams = init_tparams(params)

    # use_noise is for dropout
    (use_noise, x, y, f_pred_prob, cost) = build_model(tparams, model_options)

    if decay_c > 0.:
        decay_c = theano.shared(numpy.float32(decay_c), name='decay_c')
        weight_decay = 0.
        weight_decay += (tparams['U']**2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    f_cost = theano.function([x, y], cost, name='f_cost')

    grads = tensor.grad(cost, wrt=tparams.values())
    f_grad = theano.function([x, y], grads, name='f_grad')

    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = optimizer(lr, tparams, grads, x, y, cost)

    print 'Optimization'

    kf_valid = get_minibatches_idx(len(valid[0]),
                                   valid_batch_size,
                                   shuffle=True)
    kf_test = get_minibatches_idx(len(test[0]), valid_batch_size, shuffle=True)

    print "%d train examples" % len(train[0])
    print "%d valid examples" % len(valid[0])
    print "%d test examples" % len(test[0])
    history_errs = []
    best_p = None
    bad_count = 0

    if validFreq == -1:
        validFreq = len(train[0]) / batch_size
    if saveFreq == -1:
        saveFreq = len(train[0]) / batch_size

    uidx = 0  # the number of update done
    estop = False  # early stop
    start_time = time.clock()
    mom = 0

    try:
        for eidx in xrange(max_epochs):
            n_samples = 0

            # Get new shuffled index for the training set.
            kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)

            for _, train_index in kf:
                uidx += 1
                use_noise.set_value(1.)

                # Select the random examples for this minibatch
                y = [train[1][t] for t in train_index]
                x = [train[0][t] for t in train_index]

                # Get the data in numpy.ndarray formet.
                # It return something of the shape (minibatch maxlen, n samples)
                x, y = prepare_data(x, y, model_options['n_iter'],
                                    model_options['n_input'])

                if x is None:
                    print 'Minibatch with zero sample under length ', maxlen
                    continue
                n_samples += x.shape[1]
                if eidx < model_options['mom_epoch_interval']:
                    mom = model_options['mom_start']*\
                    (1.0 - eidx/model_options['mom_epoch_interval'])\
                      + mom_end*(eidx/model_options['mom_epoch_interval'])
                else:
                    mom = mom_end

                cost = f_grad_shared(x, y)
                f_update(lrate, mom)

                #decay
                lrate = learning_rate_decay * lrate

                if numpy.isnan(cost) or numpy.isinf(cost):
                    print 'NaN detected'
                    return 1., 1., 1.

                if numpy.mod(uidx, dispFreq) == 0:
                    print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost

                if numpy.mod(uidx, saveFreq) == 0:
                    print 'Saving...',

                    if best_p is not None:
                        params = best_p
                    else:
                        params = unzip(tparams)
                    numpy.savez(saveto, history_errs=history_errs, **params)
                    pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
                    print 'Done'

                if numpy.mod(uidx, validFreq) == 0:
                    use_noise.set_value(0.)
                    #train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options)
                    valid_err = pred_error(f_pred_prob, prepare_data, valid,
                                           kf_valid, model_options)
                    test_err = pred_error(f_pred_prob, prepare_data, test,
                                          kf_test, model_options)
                    r_score = R_score(f_pred_prob, prepare_data, test, kf_test,
                                      model_options)

                    history_errs.append([valid_err, test_err])

                    if (uidx == 0 or valid_err <=
                            numpy.array(history_errs)[:, 0].min()):

                        best_p = unzip(tparams)
                        bad_counter = 0

                    print('Valid ', valid_err, 'Test ', test_err, 'R_score ',
                          r_score)

                    if (len(history_errs) > patience and valid_err >=
                            numpy.array(history_errs)[:-patience, 0].min()):
                        bad_counter += 1
                        if bad_counter > patience:
                            print 'Early Stop!'
                            estop = True
                            break

            print 'Seen %d samples' % n_samples

            if estop:
                break

    except KeyboardInterrupt:
        print "Training interupted"

    end_time = time.clock()
    if best_p is not None:
        zipp(best_p, tparams)
    else:
        best_p = unzip(tparams)

    use_noise.set_value(0.)
    train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options)
    valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid,
                           model_options)
    test_err = pred_error(f_pred_prob, prepare_data, test, kf_test,
                          model_options)
    r_score = R_score(f_pred_prob, prepare_data, test, kf_test, model_options)

    print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err, 'R2 score ', r_score

    numpy.savez(saveto,
                train_err=train_err,
                valid_err=valid_err,
                test_err=test_err,
                history_errs=history_errs,
                **best_p)
    print 'The code run for %d epochs, with %f sec/epochs' % (
        (eidx + 1), (end_time - start_time) / (1. * (eidx + 1)))
    print >> sys.stderr, ('Training took %.1fs' % (end_time - start_time))
    return train_err, valid_err, test_err
def train_lstm(
    dim_proj=32,  # word embeding dimension and LSTM number of hidden units.
    patience=10,  # Number of epoch to wait before early stop if no progress
    max_epochs=150,  # The maximum number of epoch to run
    dispFreq=10,  # Display to stdout the training progress every N updates
    decay_c=0.0,  # Weight decay for the classifier applied to the U weights.
    lrate=0.1,  # Learning rate for sgd (not used for adadelta and rmsprop)
    n_input=4,  # Vocabulary size
    optimizer=mom_sgd,  # sgd,mom_sgs, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
    encoder="lstm",  # TODO: can be removed must be lstm.
    saveto="lstm_model.npz",  # The best model will be saved there
    validFreq=170,  # Compute the validation error after this number of update.
    saveFreq=1110,  # Save the parameters after every saveFreq updates
    maxlen=100,  # Sequence longer then this get ignored
    batch_size=16,  # The batch size during training.
    valid_batch_size=64,  # The batch size used for validation/test set.
    dataset="imdb",
    # Parameter for extra option
    noise_std=0.0,
    use_dropout=False,  # if False slightly faster, but worst test error
    # This frequently need a bigger model.
    reload_model="",  # Path to a saved model we want to start from.
    sum_pool=False,
    mom_start=0.5,
    mom_end=0.99,
    mom_epoch_interval=300,
    learning_rate_decay=0.99995,
):

    # Model options
    model_options = locals().copy()
    print "model options", model_options

    print "Loading data"
    ydim = 2
    n_iter = 10

    train, valid, test, mean, std = read_data(max_len=n_iter, up=True)

    # YDIM??
    # number of labels (output)

    model_options["ydim"] = ydim
    model_options["n_iter"] = n_iter

    theano.config.optimizer = "None"

    print "Building model"
    # This create the initial parameters as numpy ndarrays.
    # Dict name (string) -> numpy ndarray
    params = init_params(model_options)

    if reload_model:
        load_params("lstm_model.npz", params)

    # This create Theano Shared Variable from the parameters.
    # Dict name (string) -> Theano Tensor Shared Variable
    # params and tparams have different copy of the weights.
    tparams = init_tparams(params)

    # use_noise is for dropout
    (use_noise, x, y, f_pred_prob, cost) = build_model(tparams, model_options)

    if decay_c > 0.0:
        decay_c = theano.shared(numpy.float32(decay_c), name="decay_c")
        weight_decay = 0.0
        weight_decay += (tparams["U"] ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    f_cost = theano.function([x, y], cost, name="f_cost")

    grads = tensor.grad(cost, wrt=tparams.values())
    f_grad = theano.function([x, y], grads, name="f_grad")

    lr = tensor.scalar(name="lr")
    f_grad_shared, f_update = optimizer(lr, tparams, grads, x, y, cost)

    print "Optimization"

    kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size, shuffle=True)
    kf_test = get_minibatches_idx(len(test[0]), valid_batch_size, shuffle=True)

    print "%d train examples" % len(train[0])
    print "%d valid examples" % len(valid[0])
    print "%d test examples" % len(test[0])
    history_errs = []
    best_p = None
    bad_count = 0

    if validFreq == -1:
        validFreq = len(train[0]) / batch_size
    if saveFreq == -1:
        saveFreq = len(train[0]) / batch_size

    uidx = 0  # the number of update done
    estop = False  # early stop
    start_time = time.clock()
    mom = 0

    try:
        for eidx in xrange(max_epochs):
            n_samples = 0

            # Get new shuffled index for the training set.
            kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)

            for _, train_index in kf:
                uidx += 1
                use_noise.set_value(1.0)

                # Select the random examples for this minibatch
                y = [train[1][t] for t in train_index]
                x = [train[0][t] for t in train_index]

                # Get the data in numpy.ndarray formet.
                # It return something of the shape (minibatch maxlen, n samples)
                x, y = prepare_data(x, y, model_options["n_iter"], model_options["n_input"], up=True)

                if x is None:
                    print "Minibatch with zero sample under length ", maxlen
                    continue
                n_samples += x.shape[1]
                if eidx < model_options["mom_epoch_interval"]:
                    mom = model_options["mom_start"] * (1.0 - eidx / model_options["mom_epoch_interval"]) + mom_end * (
                        eidx / model_options["mom_epoch_interval"]
                    )
                else:
                    mom = mom_end

                cost = f_grad_shared(x, y)
                f_update(lrate, mom)

                # decay
                lrate = learning_rate_decay * lrate

                if numpy.isnan(cost) or numpy.isinf(cost):
                    print "NaN detected"
                    return 1.0, 1.0, 1.0

                if numpy.mod(uidx, dispFreq) == 0:
                    print "Epoch ", eidx, "Update ", uidx, "Cost ", cost

                if numpy.mod(uidx, saveFreq) == 0:
                    print "Saving...",

                    if best_p is not None:
                        params = best_p
                    else:
                        params = unzip(tparams)
                    numpy.savez(saveto, history_errs=history_errs, **params)
                    pkl.dump(model_options, open("%s.pkl" % saveto, "wb"), -1)
                    print "Done"

                if numpy.mod(uidx, validFreq) == 0:
                    use_noise.set_value(0.0)
                    # train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options)
                    valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options)
                    test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options)

                    history_errs.append([valid_err, test_err])

                    if uidx == 0 or valid_err <= numpy.array(history_errs)[:, 0].min():

                        best_p = unzip(tparams)
                        bad_counter = 0

                    print ("Valid ", valid_err, "Test ", test_err)

                    if len(history_errs) > patience and valid_err >= numpy.array(history_errs)[:-patience, 0].min():
                        bad_counter += 1
                        if bad_counter > patience:
                            print "Early Stop!"
                            estop = True
                            break

            print "Seen %d samples" % n_samples

            if estop:
                break

    except KeyboardInterrupt:
        print "Training interupted"

    end_time = time.clock()
    if best_p is not None:
        zipp(best_p, tparams)
    else:
        best_p = unzip(tparams)

    use_noise.set_value(0.0)
    train_err = pred_error(f_pred_prob, prepare_data, train, kf, model_options)
    valid_err = pred_error(f_pred_prob, prepare_data, valid, kf_valid, model_options)
    test_err = pred_error(f_pred_prob, prepare_data, test, kf_test, model_options)

    print "Train ", train_err, "Valid ", valid_err, "Test ", test_err

    numpy.savez(
        saveto, train_err=train_err, valid_err=valid_err, test_err=test_err, history_errs=history_errs, **best_p
    )
    print "The code run for %d epochs, with %f sec/epochs" % ((eidx + 1), (end_time - start_time) / (1.0 * (eidx + 1)))
    print >> sys.stderr, ("Training took %.1fs" % (end_time - start_time))
    return train_err, valid_err, test_err