def __init__(self,model,config, optimizer='SGD'):
        self.model = model(config=config)
        if optimizer == 'SGD':
            self.optimizer = SGD(lr=config['learning_rate'],
                                decay=config['weight_decay'],
                                momentum=config['momentum'])
        elif optimizer =='Adagrad':
            self.optimizer = Adagrad(lr=config['learning_rate'], decay=config['weight_decay'])
        elif optimizer =='RMSprop':
            self.optimizer = RMSprop(lr=config['learning_rate'])

        self.config = config
Example #2
0
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        seed=1)

    # Optimization method for finding weights that minimizes loss
    optimizer = RMSprop(learning_rate=0.01)

    # Perceptron
    clf = Perceptron(n_iterations=5000,
                     activation_function=ExpLU,
                     optimizer=optimizer,
                     early_stopping=True,
                     plot_errors=True)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test,
                   y_pred,
                   title="Perceptron",
                   accuracy=accuracy,
                   legend_labels=np.unique(y))
Example #3
0
def main():

    config = {
        "optimizer": "rnn",
        "problem": "mnist",
        "rollout_length": 100,  # This is 100 in the paper
        "learning_rate": 0.1,
        "decay_rate": 0.9,
        "meta_layers": 2,
        "meta_hidden_size": 20,
        "layers": 2,
        "hidden_size": 100,
        "activation": 'relu',
        "preprocess": True,
        "max_to_keep": 3,
        "retrain": False,
        "dim": 10,
        "range_of_means": 10,
        "range_of_stds": 10,
        "summary_dir": "summary",
        "checkpoint_dir": "data_ckpt",
        "batch_size": 10000,
        "training_iters": 4000,
        "log_iters": 100
    }

    # create the experiments dirs
    create_dirs([config["summary_dir"], config["checkpoint_dir"]])
    # create tensorflow session
    sess = tf.Session()

    # create your data generator
    # create an instance of the model you want
    if config["problem"] == "simple":
        data = SimpleDG(config)
        model = LinearRegressionModel(config)
    elif config["problem"] == "mnist":
        data = MNISTDG(config)
        model = MNISTModel(config)
    else:
        raise ValueError("{} is not a valid problem".format(config["problem"]))

    # create tensorboard logger
    # logger = Logger(sess, config)
    # create trainer and pass all the previous components to it
    # trainer = LinearRegressionTrainer(sess, model, data, config, logger)

    sess.run(tf.global_variables_initializer())

    if config["optimizer"] == "sgd":
        optim = SGD(config)
        losses = learn(optim, model, config["rollout_length"])
    elif config["optimizer"] == "rms":
        optim = RMSprop(config)
        losses = learn(optim, model, config["rollout_length"])
    elif config["optimizer"] == "rnn":
        optim = RNNOptimizer(config)
        losses = learn(optim, model, config["rollout_length"])

        if config["retrain"]:
            optim.train(losses, sess, data)
        else:
            optim.load(sess)
    else:
        raise ValueError("{} is not a valid optimizer".format(
            config["optimizer"]))

    # initialize variables in optimizee
    # (can't initialize all here because it would potentially overwrite the trained optimizer)
    sess.run(
        tf.variables_initializer([
            var
            for var in tf.trainable_variables(scope=optim.__class__.__name__)
        ]))

    x = np.arange(config["rollout_length"] + 1)

    for i in range(3):
        sess.run(
            tf.variables_initializer([
                var for var in tf.trainable_variables(
                    scope=optim.__class__.__name__)
            ]))

        data.refresh_parameters(seed=i)
        data_x, data_y = next(data.next_batch(config["batch_size"]))

        l = sess.run([losses],
                     feed_dict={
                         "input:0": data_x,
                         "label:0": data_y
                     })
        print(l)

        p1, = plt.semilogy(x, l[0], label=config["optimizer"])
        plt.legend(handles=[p1])
        plt.title('Losses')
        plt.show()

        # TODO compare different optimizers

    data.refresh_parameters()

    data_x, data_y = next(data.next_batch(100, mode="train"))
    pred = sess.run(model.prediction,
                    feed_dict={
                        "input:0": data_x,
                        "label:0": data_y
                    })
    print(
        list(
            zip(pred, np.argmax(data_y, axis=1), pred == np.argmax(data_y,
                                                                   axis=1))))

    # calculate accuracy on test data
    seed = np.random.randint(low=0, high=1e6)
    data.refresh_parameters(seed=seed)
    data_x, data_y = next(data.next_batch(5000, mode="train"))
    acc = sess.run(model.accuracy,
                   feed_dict={
                       "input:0": data_x,
                       "label:0": data_y
                   })
    print("Train accuracy: {}".format(acc))

    data_x, data_y = next(data.next_batch(5000, mode="test"))
    acc = sess.run(model.accuracy,
                   feed_dict={
                       "input:0": data_x,
                       "label:0": data_y
                   })
    print("Test accuracy: {}".format(acc))
def train_classifier(train,
                     valid,
                     test,
                     W,
                     n_p=10,
                     n_words=10000,
                     n_x=300,
                     n_h=200,
                     patience=10,
                     max_epochs=50,
                     lrate=0.001,
                     n_train=10000,
                     optimizer='RMSprop',
                     batch_size=50,
                     valid_batch_size=50,
                     dispFreq=10,
                     validFreq=100,
                     saveFreq=500,
                     eps=1e-3):
    """ train, valid, test : datasets
        W : the word embedding initialization
        n_words : vocabulary size
        n_x : word embedding dimension
        n_h : LSTM/GRU number of hidden units 
        n_z : latent embedding sapce for a sentence 
        patience : Number of epoch to wait before early stop if no progress
        max_epochs : The maximum number of epoch to run
        lrate : learning rate
        optimizer : methods to do optimization
        batch_size : batch size during training
        valid_batch_size : The batch size used for validation/test set
        dispFreq : Display to stdout the training progress every N updates
        validFreq : Compute the validation error after this number of update.
    """

    options = {}
    options['n_p'] = n_p
    options['n_words'] = n_words
    options['n_x'] = n_x
    options['n_h'] = n_h
    options['patience'] = patience
    options['max_epochs'] = max_epochs
    options['lrate'] = lrate
    options['optimizer'] = optimizer
    options['batch_size'] = batch_size
    options['valid_batch_size'] = valid_batch_size
    options['dispFreq'] = dispFreq
    options['validFreq'] = validFreq

    #if config.method in ['SVGD', 'SVGD_KFAC']: patience = 5

    logger.info('Model options {}'.format(options))

    logger.info('{} train examples'.format(len(train[0])))
    logger.info('{} valid examples'.format(len(valid[0])))
    logger.info('{} test examples'.format(len(test[0])))

    logger.info('Building model...')

    assert np.min(train[1]) == 0 and np.max(train[1]) == 1
    n_y = np.max(train[1]) + 1
    options['n_y'] = n_y

    params = init_params(options, W)
    tparams = init_tparams(params)

    (use_noise, x, mask, y, f_pred_prob, f_pred, cost,
     cache) = build_model(tparams, options)

    lr_theano = tensor.scalar(name='lr')
    ntrain_theano = tensor.scalar(name='ntrain')

    if config.method == 'pSGLD':
        f_grad_shared, f_update = pSGLD(tparams, cost, [x, mask, y],
                                        ntrain_theano, lr_theano)
    elif config.method == 'SGLD':
        f_grad_shared, f_update = SGLD(tparams, cost, [x, mask, y],
                                       ntrain_theano, lr_theano)
    elif config.method == 'RMSprop':
        f_grad_shared, f_update = RMSprop(tparams, cost, [x, mask, y],
                                          lr_theano)
    elif config.method == 'SVGD':
        f_grad_shared, f_update = SVGD(tparams,
                                       cost, [x, mask, y],
                                       ntrain_theano,
                                       lr_theano,
                                       kfac=False)
    elif config.method == 'SVGD_KFAC':
        f_grad_shared, f_update = SVGD(tparams,
                                       cost, [x, mask, y],
                                       ntrain_theano,
                                       lr_theano,
                                       kfac=True,
                                       average=True,
                                       cache=cache,
                                       eps=eps,
                                       n_p=n_p)
    elif config.method == 'MIXTURE_KFAC':
        f_grad_shared, f_update = SVGD(tparams,
                                       cost, [x, mask, y],
                                       ntrain_theano,
                                       lr_theano,
                                       kfac=True,
                                       average=False,
                                       cache=cache,
                                       eps=eps,
                                       n_p=n_p)

    #print 'Training model...'
    logger.info('Training model...')

    kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
    kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)

    estop = False  # early stop
    history_errs = []
    best_train_err, best_valid_err, best_test_err = 0., 0., 0.
    bad_counter = 0
    uidx = 0  # the number of update done
    start_time = time.time()

    n_average = 0
    train_probs = np.zeros((len(train[0]), n_y))
    valid_probs = np.zeros((len(valid[0]), n_y))
    test_probs = np.zeros((len(test[0]), n_y))

    try:
        for eidx in xrange(max_epochs):
            print tparams.keys()
            from optimizers import sqr_dist
            ##['Wemb', 'lstm_encoder_W', 'lstm_encoder_U', 'lstm_encoder_rev_W', 'lstm_encoder_rev_U', 'Wy']
            tv = tensor.flatten(tparams['Wy'], 2)
            ftv = theano.function([], sqr_dist(tv, tv))
            otv = ftv()
            print(np.min(otv), np.max(otv), np.mean(otv), np.median(otv),
                  np.sum(otv**2) / n_p)

            n_samples = 0
            kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)

            for _, train_index in kf:
                uidx += 1
                #use_noise.set_value(0.5)
                use_noise.set_value(config.dropout)

                y = [train[1][t] for t in train_index]
                x = [train[0][t] for t in train_index]

                x, mask, y = prepare_data(x, y)
                n_samples += x.shape[1]

                cost = f_grad_shared(x, mask, y)
                if config.method == 'RMSprop':
                    f_update(lrate)
                elif config.method in ['SVGD', 'pSGLD', 'SGLD']:
                    f_update(lrate, n_train)
                elif config.method in ['SVGD_KFAC', 'MIXTURE_KFAC']:
                    f_update(lrate, n_train, x, mask, y)

                if np.isnan(cost) or np.isinf(cost):

                    logger.info('NaN detected')
                    estop = True
                    break
                    return 1., 1., 1.

                if np.mod(uidx, dispFreq) == 0:
                    logger.info('Epoch {} Update {} Cost {}'.format(
                        eidx, uidx, cost))

                if np.mod(uidx, saveFreq) == 0:
                    logger.info('Saving ...')
                    saveto = 'results/%s.npz' % save_prefix
                    np.savez(saveto, history_errs=history_errs)

                    logger.info('Done ...')

                if np.mod(uidx, validFreq) == 0:
                    use_noise.set_value(0.)

                    if eidx < 1:
                        train_err = pred_error(f_pred, prepare_data, train, kf)
                        valid_err = pred_error(f_pred, prepare_data, valid,
                                               kf_valid)
                        test_err = pred_error(f_pred, prepare_data, test,
                                              kf_test)
                        history_errs.append([valid_err, test_err, train_err])
                    else:
                        train_probs_curr = pred_probs(f_pred_prob,
                                                      prepare_data, train, kf,
                                                      options)
                        valid_probs_curr = pred_probs(f_pred_prob,
                                                      prepare_data, valid,
                                                      kf_valid, options)
                        test_probs_curr = pred_probs(f_pred_prob, prepare_data,
                                                     test, kf_test, options)
                        train_probs = (n_average * train_probs +
                                       train_probs_curr) / (n_average + 1)
                        valid_probs = (n_average * valid_probs +
                                       valid_probs_curr) / (n_average + 1)
                        test_probs = (n_average * test_probs +
                                      test_probs_curr) / (n_average + 1)
                        n_average += 1

                        train_pred = train_probs.argmax(axis=1)
                        valid_pred = valid_probs.argmax(axis=1)
                        test_pred = test_probs.argmax(axis=1)

                        train_err = (train_pred == np.array(train[1])).sum()
                        train_err = 1. - numpy_floatX(train_err) / len(
                            train[0])

                        valid_err = (valid_pred == np.array(valid[1])).sum()
                        valid_err = 1. - numpy_floatX(valid_err) / len(
                            valid[0])

                        test_err = (test_pred == np.array(test[1])).sum()
                        test_err = 1. - numpy_floatX(test_err) / len(test[0])
                        history_errs.append([valid_err, test_err, train_err])

                    if (uidx == 0 or
                            valid_err <= np.array(history_errs)[:, 0].min()):

                        best_train_err = train_err
                        best_valid_err = valid_err
                        best_test_err = test_err
                        bad_counter = 0

                    logger.info('Train {} Valid {} Test {}'.format(
                        train_err, valid_err, test_err))

                    if (len(history_errs) > patience and valid_err >=
                            np.array(history_errs)[:-patience, 0].min()):
                        #valid_err >= np.array(history_errs)[:-patience,0].mean()):
                        bad_counter += 1
                        #valid_err >= np.array(history_errs)[:-patience,0].mean()):
                        if bad_counter > patience:

                            logger.info('Early Stop!')
                            estop = True
                            break

            logger.info('Seen {} samples'.format(n_samples))

            if estop:
                break

    except KeyboardInterrupt:

        logger.info('Training interupted')

    end_time = time.time()
    logger.info('Train {} Valid {} Test {}'.format(best_train_err,
                                                   best_valid_err,
                                                   best_test_err))

    saveto = 'results/%s.npz' % save_prefix
    np.savez(saveto,
             train_err=best_train_err,
             valid_err=best_valid_err,
             test_err=best_test_err,
             history_errs=history_errs)

    logger.info('The code run for {} epochs, with {} sec/epochs'.format(
        eidx + 1, (end_time - start_time) / (1. * (eidx + 1))))

    #print >> sys.stderr, ('Training took %.1fs' %
    #                      (end_time - start_time))
    return best_train_err, best_valid_err, best_test_err
class ModelCompiler(object):
    def __init__(self,model,config, optimizer='SGD'):
        self.model = model(config=config)
        if optimizer == 'SGD':
            self.optimizer = SGD(lr=config['learning_rate'],
                                decay=config['weight_decay'],
                                momentum=config['momentum'])
        elif optimizer =='Adagrad':
            self.optimizer = Adagrad(lr=config['learning_rate'], decay=config['weight_decay'])
        elif optimizer =='RMSprop':
            self.optimizer = RMSprop(lr=config['learning_rate'])

        self.config = config

    def share_var(self, data_xy, testing=False, borrow=True):
        if testing:
            assert type(data_xy) == np.ndarray, "using test data in testing step"
            shared_x = theano.shared(np.asarray(data_xy,dtype=theano.config.floatX),borrow=borrow)
            return shared_x
        else: # training
            assert type(data_xy) == tuple, "label data was missing or something else"
            data_x, data_y = data_xy
            shared_x = theano.tensor._shared(np.asarray(data_x,dtype=theano.config.floatX),borrow=borrow)
            shared_y = theano.tensor._shared(np.asarray(data_y,dtype=theano.config.floatX),borrow=borrow)
            return shared_x, T.cast(shared_y,'int32')

    def _train_by_sentence_init_(self, x_train, y_train, x_val, y_val, l_t, l_v):
        #x_train, y_train, x_val, y_val = [], [], [], []
        #for each in train:
        #    x_train.extend(each['data'].tolist())
        #    y_train.extend(each['label'].tolist())
        #for each in val:
        #    x_val.extend(each['data'].tolist())
        #    y_val.extend(each['label'].tolist())

        #x_train = np.asarray(x_train).astype('float32')
        #y_train = np.asarray(y_train).astype('int32')
        #x_val = np.asarray(x_val).astype('float32')
        #y_val = np.asarray(y_val).astype('int32')

        self.learning_rate_decay = self.config['learning_rate_decay']
        train_set_x, train_set_y = self.share_var((x_train,y_train))
        valid_set_x, valid_set_y = self.share_var((x_val,y_val))

        #batch_size = self.model.batch_size
        #n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

        #self.n_train_batches = n_train_batches
        l_t= T.cast(theano.tensor._shared(np.asarray(l_t,dtype=theano.config.floatX),borrow=True),'int32')
        l_v= T.cast(theano.tensor._shared(np.asarray(l_v,dtype=theano.config.floatX),borrow=True),'int32')
        self.layers = self.model.layers
        x = self.model.x
        y = self.model.y
        index = T.lscalar()  # index to a [mini]batch
        cost = self.model.cost
        params = self.model.params
        errors = self.model.errors
        train_model = theano.function(
                    inputs=[index],
                    outputs=[cost,errors],
                    updates=self.optimizer.get_updates(params=params,cost=cost),
                    givens={
                        x: train_set_x[l_t[index]:l_t[index+1]],
                        y: train_set_y[l_t[index]:l_t[index+1]]
                        }
                    )

        #n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
        #self.n_valid_batches = n_valid_batches
        validate_model = theano.function(
                    inputs=[index],
                    outputs=errors,
                    givens={
                        x: valid_set_x[l_v[index]:l_v[index+1]],
                        y: valid_set_y[l_v[index]:l_v[index+1]]
                        }
                    )

        return train_model, validate_model

    def train_by_sentence(self, x_train, y_train, x_val, y_val, index_train,index_val, save_model=False):
        """
        - train: {name:'sentenceID', data:[features], label:[labels]}
        """
        #train_model, validate_model = self._train_by_order_init_(train, val)
        train_model, validate_model = self._train_by_sentence_init_(x_train, y_train, x_val, y_val,index_train,index_val)
        patience = 10000
        patience_increase = 4
        improvement_threshold = 0.995
        validation_frequency = len(index_train)-1#min(self.n_train_batches, patience / 2)
        best_validation_loss = np.inf
        best_val_acc = 0.
        best_iter = 0
        test_score = 0.
        start_time = time.clock()
        epoch = 0
        done_looping = False
        n_epochs = self.config['n_epochs']
        t_cost, t_acc, v_acc = [], [], []
        print 'start training...'
        while (epoch < n_epochs) and (not done_looping):
            epoch = epoch + 1
            DropoutLayer.SetDropoutOn()
            for minibatch_index in xrange(len(index_train)-1):
                minibatch_avg_cost,train_acc = train_model(minibatch_index)
                iter = (epoch - 1) * (len(index_train)-1) + minibatch_index
                if (iter + 1) % validation_frequency == 0:
                    DropoutLayer.SetDropoutOff()
                    validation_losses = [validate_model(i) for i in xrange(len(index_val)-1)]
                    this_validation_loss = np.mean(validation_losses)
                    this_val_acc = 1 - this_validation_loss
                    this_train_acc = 1 - train_acc
                    print('epoch %i/%s, cost %.4f , train acc %.4f , val acc %.4f ' %(epoch,str(n_epochs),minibatch_avg_cost,(this_train_acc),(this_val_acc)))

                    t_cost.append(round(minibatch_avg_cost,5))
                    t_acc.append(round(this_train_acc,5))
                    v_acc.append(round(this_val_acc,5))


                    if save_model:
                        if this_val_acc > best_val_acc:
                            best_val_acc = this_val_acc
                            #print "best val acc at epoch %i is %.4f" %(epoch,best_val_acc)
                            folder = "./snapshot_{0}_{1}/".format(epoch, round(best_val_acc,3))
                            os.mkdir(folder)
                            tools.save_weights(self.layers, folder, epoch)
                            #print "model saved at epoch %i" %(epoch)

                    if this_validation_loss < best_validation_loss:
                        if (this_validation_loss < best_validation_loss * improvement_threshold):
                            patience = max(patience, iter * patience_increase)
                        best_validation_loss = this_validation_loss
                        best_iter = iter
        #            if this_train_acc - this_val_acc >0.05:
        #                done_looping = True
        #                break
        #        if patience <= iter:
        #            done_looping = True
        #            break
            if self.learning_rate_decay == True:
                if epoch % 5 == 0:
                    rate = theano.shared(np.cast[theano.config.floatX](0.5))
                    self.optimizer.lr = self.optimizer.lr * rate

        self.record = {
                      'training loss' : t_cost,
                      'training accuracy' : t_acc,
                      'validation accuracy' : v_acc }
        end_time = time.clock()
        print(('Optimization complete. Best validation score of %f %% \n obtained at iteration %i, with test performance %f %%') %(best_validation_loss * 100., best_iter + 1, test_score * 100.))
    #print >> sys.stderr,('The code for file '+os.path.split(__file__)[1] +' ran for %.2fm' % ((end_time - start_time) / 60.))


    def _train_without_val_init_(self, train_set_x, train_set_y):
        self.learning_rate_decay = self.config['learning_rate_decay']
        batch_size = self.model.batch_size
        n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        self.n_train_batches = n_train_batches
        self.layers = self.model.layers
        x = self.model.x
        y = self.model.y
        index = T.lscalar()  # index to a [mini]batch
        cost = self.model.cost
        params = self.model.params
        errors = self.model.errors
        train_model = theano.function(
                    inputs=[index],
                    outputs=[cost,errors],
                    updates=self.optimizer.get_updates(params=params,cost=cost),
                    givens={
                        x: train_set_x[index * batch_size: (index + 1) * batch_size],
                        y: train_set_y[index * batch_size: (index + 1) * batch_size]
                        }
                    )
        return train_model

    def train_without_val(self, train_set_x, train_set_y, save_model=False):
        train_model = self._train_without_val_init_(train_set_x, train_set_y)
        patience = 10000
        patience_increase = 4
        improvement_threshold = 0.995
        validation_frequency = self.n_train_batches
        best_validation_loss = np.inf
        best_train_acc = 0.
        best_iter = 0
        test_score = 0.
        start_time = time.clock()
        epoch = 0
        done_looping = False
        n_epochs = self.config['n_epochs']
        t_cost, t_acc, v_acc = [], [], []
        print 'start training...'
        while (epoch < n_epochs) and (not done_looping):
            epoch = epoch + 1
            DropoutLayer.SetDropoutOn()
            for minibatch_index in xrange(self.n_train_batches):
                minibatch_avg_cost,train_acc = train_model(minibatch_index)
                this_train_acc = 1 - train_acc
            print('epoch %i/%s, cost %.4f , train acc %.4f ' %(epoch,str(n_epochs),minibatch_avg_cost,(this_train_acc)))

            if save_model:
                if this_train_acc > best_train_acc:
                    best_train_acc = this_train_acc
                    #print "best val acc at epoch %i is %.4f" %(epoch,best_val_acc)
                    folder = "./snapshot_{0}_{1}/".format(epoch, round(best_train_acc,3))
                    os.mkdir(folder)
                    tools.save_weights(self.layers, folder, epoch)
                    #print "model saved at epoch %i" %(epoch)

            if self.learning_rate_decay == True:
                if epoch % 5 == 0:
                    rate = theano.shared(np.cast[theano.config.floatX](0.5))
                    self.optimizer.lr = self.optimizer.lr * rate

        end_time = time.clock()

    def _train_init_(self, train_set_x, train_set_y, valid_set_x,valid_set_y):
        self.learning_rate_decay = self.config['learning_rate_decay']
        batch_size = self.model.batch_size
        n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        self.n_train_batches = n_train_batches
        self.layers = self.model.layers
        x = self.model.x
        y = self.model.y
        index = T.lscalar()  # index to a [mini]batch
        cost = self.model.cost
        params = self.model.params
        errors = self.model.errors
        train_model = theano.function(
                    inputs=[index],
                    outputs=[cost,errors],
                    updates=self.optimizer.get_updates(params=params,cost=cost),
                    givens={
                        x: train_set_x[index * batch_size: (index + 1) * batch_size],
                        y: train_set_y[index * batch_size: (index + 1) * batch_size]
                        }
                    )

        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
        self.n_valid_batches = n_valid_batches
        validate_model = theano.function(
                    inputs=[index],
                    outputs=errors,
                    givens={
                        x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                        y: valid_set_y[index * batch_size:(index + 1) * batch_size]
                        }
                    )

        return train_model, validate_model

    def train(self, train_set_x, train_set_y, valid_set_x, valid_set_y, save_model=False):
        train_model, validate_model = self._train_init_(train_set_x, train_set_y, valid_set_x, valid_set_y)
        patience = 10000
        patience_increase = 4
        improvement_threshold = 0.995
        validation_frequency = min(self.n_train_batches, patience / 2)
        best_validation_loss = np.inf
        best_val_acc = 0.
        best_iter = 0
        test_score = 0.
        start_time = time.clock()
        epoch = 0
        done_looping = False
        n_epochs = self.config['n_epochs']
        t_cost, t_acc, v_acc = [], [], []
        print 'start training...'
        while (epoch < n_epochs) and (not done_looping):
            epoch = epoch + 1
            DropoutLayer.SetDropoutOn()
            for minibatch_index in xrange(self.n_train_batches):
                minibatch_avg_cost,train_acc = train_model(minibatch_index)
                iter = (epoch - 1) * self.n_train_batches + minibatch_index
                if (iter + 1) % validation_frequency == 0:
                    DropoutLayer.SetDropoutOff()
                    validation_losses = [validate_model(i) for i in xrange(self.n_valid_batches)]
                    this_validation_loss = np.mean(validation_losses)
                    this_val_acc = 1 - this_validation_loss
                    this_train_acc = 1 - train_acc
                    print('epoch %i/%s, cost %.4f , train acc %.4f , val acc %.4f ' %(epoch,str(n_epochs),minibatch_avg_cost,(this_train_acc),(this_val_acc)))

                    t_cost.append(round(minibatch_avg_cost,5))
                    t_acc.append(round(this_train_acc,5))
                    v_acc.append(round(this_val_acc,5))


                    if save_model:
                        if this_val_acc > best_val_acc:
                            best_val_acc = this_val_acc
                            #print "best val acc at epoch %i is %.4f" %(epoch,best_val_acc)
                            folder = "./snapshot_{0}_{1}/".format(epoch, round(best_val_acc,3))
                            os.mkdir(folder)
                            tools.save_weights(self.layers, folder, epoch)
                            #print "model saved at epoch %i" %(epoch)

                    if this_validation_loss < best_validation_loss:
                        if (this_validation_loss < best_validation_loss * improvement_threshold):
                            patience = max(patience, iter * patience_increase)
                        best_validation_loss = this_validation_loss
                        best_iter = iter
        #            if this_train_acc - this_val_acc >0.05:
        #                done_looping = True
        #                break
        #        if patience <= iter:
        #            done_looping = True
        #            break
            if self.learning_rate_decay == True:
                if epoch % 5 == 0:
                    rate = theano.shared(np.cast[theano.config.floatX](0.5))
                    self.optimizer.lr = self.optimizer.lr * rate

        self.record = {
                      'training loss' : t_cost,
                      'training accuracy' : t_acc,
                      'validation accuracy' : v_acc }
        end_time = time.clock()
        print(('Optimization complete. Best validation score of %f %% \n obtained at iteration %i, with test performance %f %%') %(best_validation_loss * 100., best_iter + 1, test_score * 100.))
    #print >> sys.stderr,('The code for file '+os.path.split(__file__)[1] +' ran for %.2fm' % ((end_time - start_time) / 60.))
    def load(self):
        layers = self.model.layers
        dir = self.model.snapshot
        if not os.path.isdir(dir):
            raise IOError('no such snapshot file: %s' %(dir))

        snapshots = glob.glob(dir+'*.npy')
        e = self.config['e_snapshot']
        tools.load_weights(layers, dir, e)

    def predict_by_sentence(self, test_set_x, index_test, load_model=None, dropout=False):
        assert load_model != None, "load_model should be True of False"
        #batch_size = self.model.batch_size
        #n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
        #self.n_test_batches = n_test_batches
        test_set_x = self.share_var(test_set_x,testing=True)
        layers = self.model.layers
        x = self.model.x
        y = self.model.y
        index = T.lscalar()  # index to a [mini]batch
        predict_times = len(index_test)-1
        index_test= T.cast(theano.tensor._shared(np.asarray(index_test,dtype=theano.config.floatX),borrow=True),'int32')
        if load_model == True:
            dir = self.model.snapshot
            if not os.path.isdir(dir):
                raise IOError('no such snapshot file: %s' %(dir))

            snapshots = glob.glob(dir+'*.npy')
            #e = os.path.basename(snapshots[0])[-5]
            e = self.config['e_snapshot']
            if dropout == False:
                tools.load_weights(layers, dir, e)
            else:
                tools.dropout_load_weights(layers, dir, e)

        test_model = theano.function(
                        inputs = [index],
                        outputs = self.model.y_pred,
                        givens={
                            x: test_set_x[index_test[index]:index_test[(index + 1)]],
                            }
                )

        n_test = test_set_x.get_value(borrow=True).shape[0]
        y_pred = np.array([])
        DropoutLayer.SetDropoutOff()
        print "predict on %d datas" %(int(n_test))
        for i in xrange(predict_times):
            y_pred = np.concatenate((y_pred,test_model(i)),axis=0)

        return y_pred

    def predict(self, test_set_x, load_model=None, dropout=False):
        assert load_model != None, "load_model should be True of False"
        #batch_size = self.model.batch_size
        #n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
        #self.n_test_batches = n_test_batches
        layers = self.model.layers
        x = self.model.x
        y = self.model.y
        index = T.lscalar()  # index to a [mini]batch

        if load_model == True:
            dir = self.model.snapshot
            if not os.path.isdir(dir):
                raise IOError('no such snapshot file: %s' %(dir))

            snapshots = glob.glob(dir+'*.npy')
            #e = os.path.basename(snapshots[0])[-5]
            e = self.config['e_snapshot']
            if dropout == False:
                tools.load_weights(layers, dir, e)
            else:
                tools.dropout_load_weights(layers, dir, e)

        test_model = theano.function(
                        inputs = [index],
                        outputs = self.model.y_pred,
                        givens={
                            x: test_set_x[index:(index + 1)],
                            }
                )

        n_test = test_set_x.get_value(borrow=True).shape[0]
        y_pred = np.zeros(n_test)
        DropoutLayer.SetDropoutOff()
        print "predict on %d datas" %(int(n_test))
        for i in xrange(n_test):
            y_pred[i] = int(test_model(i))

        return y_pred

    def proba(self, X, load_model=None):
        assert load_model != None, "load_model should be True of False"
        layers = self.model.layers
        x = self.model.x
        y = self.model.y
        index = T.lscalar()  # index to a [mini]batch

        if load_model == True:
            dir = self.model.snapshot
            if not os.path.isdir(dir):
                raise IOError('no such snapshot file: %s' %(dir))

            snapshots = glob.glob(dir+'*.npy')
            #e = os.path.basename(snapshots[0])[-5]
            e = self.config['e_snapshot']
            tools.load_weights(layers, dir, e)

        prob_model = theano.function(
                        inputs = [index],
                        outputs = self.model.proba,
                        givens={
                            x: X[index:(index + 1)],
                            }
                )
        y_prob = []
        n_test = X.get_value(borrow=True).shape[0]
        DropoutLayer.SetDropoutOff()
        print "getting probability on %d datas" %(int(n_test))
        for i in xrange(n_test):
            y_prob.append(prob_model(i))
        return np.asarray(y_prob).reshape(n_test,y_prob[0].shape[1])
# plt.subplot(1, 4, 2)
# plt.imshow(img[1])
# plt.subplot(1, 4, 3)
# plt.imshow(img[2])
# plt.subplot(1, 4, 4)
# plt.imshow(img[3])

model = MNISTNet()
loss = SoftmaxCrossEntropy(num_class=10)


# define your learning rate sheduler
def func(lr, iteration):
    if iteration % 1000 == 0:
        return lr * 0.5
    else:
        return lr


rms = RMSprop(lr=0.001, decay=0, sheduler_func=func)
l2 = L2(w=0.001)  # L2 regularization with lambda=0.001
model.compile(optimizer=rms, loss=loss, regularization=l2)
train_results, val_results, test_results = model.train(mnist,
                                                       train_batch=30,
                                                       val_batch=1000,
                                                       test_batch=1000,
                                                       epochs=2,
                                                       val_intervals=100,
                                                       test_intervals=300,
                                                       print_intervals=100)
# Initial x
x0 = np.array([-2., -1.])

# Some global settings
max_iter = 5000
tol = 1e-8

# Optimization methods
gd = GD(fun, jac, lr=0.0005, max_iter=max_iter, tol=tol)
mom1 = GD(fun, jac, lr=0.0005, momentum=0.5, max_iter=max_iter, tol=tol)
mom2 = GD(fun, jac, lr=0.0005, momentum=0.9, max_iter=max_iter, tol=tol)
nest = GD(fun, jac, lr=0.0005, momentum=0.5, nesterov=True, max_iter=max_iter, tol=tol)
agrad = Adagrad(fun, jac, lr=0.1, max_iter=max_iter, tol=tol)
adelta = Adadelta(fun, jac, lr=1., max_iter=max_iter, tol=tol)
rms = RMSprop(fun, jac, lr=0.001, max_iter=max_iter, tol=tol)
adam = Adam(fun, jac, lr=0.01, max_iter=max_iter, tol=tol)

optimizers = [gd, mom1, nest, agrad, adelta, rms, adam]
labels = ['GD', 'Momentum', 'Nesterov', 'Adagrad', 'Adadelta', 'RMSprop', 'Adam']

# Initialise lists for x-values at each iteration, and final x-value for each
# optimisation method
xall = []
xfinal = []
feval = []

# Loop over all optimizers
for opt in optimizers:
    # Minimise the function
    opt.optimize(x0)
Example #8
0
         backprop_depth=SEQUENCE_LENGTH,
         stateful=True),
    LSTM(size=512,
         input_size=512,
         batch_size=BATCH_SIZE,
         backprop_depth=SEQUENCE_LENGTH,
         stateful=True),
    TimeDistributed(
        Dense(size=EMBEDDING_LENGTH,
              input_size=512,
              activation=SparseSoftmax())))

if RESTORE_MODEL_PATH:
    model.loadParams(RESTORE_MODEL_PATH)

optimizer = RMSprop(learning_rate=lambda n: 0.001)
loss_function = VectorCrossEntropy

model.assignOptimizer(optimizer)

if RESTORE_OPTIMIZER_PATH:
    optimizer.load(RESTORE_OPTIMIZER_PATH)

for epoch in range(INITIAL_EPOCH, NR_OF_EPOCHS + INITIAL_EPOCH):
    loss, accuracy = model.train(makeBatches(source, SEQUENCE_LENGTH,
                                             EMBEDDING_LENGTH),
                                 lossfunc=loss_function)
    model.saveParams(
        f"{MODEL_PATH}{MODEL_NAME}-{epoch:02d}-loss_{loss:.5f}-acc_{accuracy:.5f}.nn"
    )
    optimizer.save(f"{MODEL_PATH}{epoch:02d}-optimizer.json")