Python RMSprop Examples

Programming Language: Python

Namespace/Package Name: optimizers

Class/Type: RMSprop

Examples at hotexamples.com: 8

Python RMSprop - 8 examples found. These are the top rated real world Python examples of optimizers.RMSprop extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

RMSprop(6)

get_updates(1)

load(1)

save(1)

Example #1

Show file

File: modelcompiler.py Project: CyrusChiu/Theano-lightweight

    def __init__(self,model,config, optimizer='SGD'):
        self.model = model(config=config)
        if optimizer == 'SGD':
            self.optimizer = SGD(lr=config['learning_rate'],
                                decay=config['weight_decay'],
                                momentum=config['momentum'])
        elif optimizer =='Adagrad':
            self.optimizer = Adagrad(lr=config['learning_rate'], decay=config['weight_decay'])
        elif optimizer =='RMSprop':
            self.optimizer = RMSprop(lr=config['learning_rate'])

        self.config = config

Example #2

Show file

File: perceptron_opt.py Project: fvinas/ML-From-Scratch

def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        seed=1)

    # Optimization method for finding weights that minimizes loss
    optimizer = RMSprop(learning_rate=0.01)

    # Perceptron
    clf = Perceptron(n_iterations=5000,
                     activation_function=ExpLU,
                     optimizer=optimizer,
                     early_stopping=True,
                     plot_errors=True)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test,
                   y_pred,
                   title="Perceptron",
                   accuracy=accuracy,
                   legend_labels=np.unique(y))

Example #3

Show file

File: main.py Project: chaoshunh/LearningToOptimize

def main():

    config = {
        "optimizer": "rnn",
        "problem": "mnist",
        "rollout_length": 100,  # This is 100 in the paper
        "learning_rate": 0.1,
        "decay_rate": 0.9,
        "meta_layers": 2,
        "meta_hidden_size": 20,
        "layers": 2,
        "hidden_size": 100,
        "activation": 'relu',
        "preprocess": True,
        "max_to_keep": 3,
        "retrain": False,
        "dim": 10,
        "range_of_means": 10,
        "range_of_stds": 10,
        "summary_dir": "summary",
        "checkpoint_dir": "data_ckpt",
        "batch_size": 10000,
        "training_iters": 4000,
        "log_iters": 100
    }

    # create the experiments dirs
    create_dirs([config["summary_dir"], config["checkpoint_dir"]])
    # create tensorflow session
    sess = tf.Session()

    # create your data generator
    # create an instance of the model you want
    if config["problem"] == "simple":
        data = SimpleDG(config)
        model = LinearRegressionModel(config)
    elif config["problem"] == "mnist":
        data = MNISTDG(config)
        model = MNISTModel(config)
    else:
        raise ValueError("{} is not a valid problem".format(config["problem"]))

    # create tensorboard logger
    # logger = Logger(sess, config)
    # create trainer and pass all the previous components to it
    # trainer = LinearRegressionTrainer(sess, model, data, config, logger)

    sess.run(tf.global_variables_initializer())

    if config["optimizer"] == "sgd":
        optim = SGD(config)
        losses = learn(optim, model, config["rollout_length"])
    elif config["optimizer"] == "rms":
        optim = RMSprop(config)
        losses = learn(optim, model, config["rollout_length"])
    elif config["optimizer"] == "rnn":
        optim = RNNOptimizer(config)
        losses = learn(optim, model, config["rollout_length"])

        if config["retrain"]:
            optim.train(losses, sess, data)
        else:
            optim.load(sess)
    else:
        raise ValueError("{} is not a valid optimizer".format(
            config["optimizer"]))

    # initialize variables in optimizee
    # (can't initialize all here because it would potentially overwrite the trained optimizer)
    sess.run(
        tf.variables_initializer([
            var
            for var in tf.trainable_variables(scope=optim.__class__.__name__)
        ]))

    x = np.arange(config["rollout_length"] + 1)

    for i in range(3):
        sess.run(
            tf.variables_initializer([
                var for var in tf.trainable_variables(
                    scope=optim.__class__.__name__)
            ]))

        data.refresh_parameters(seed=i)
        data_x, data_y = next(data.next_batch(config["batch_size"]))

        l = sess.run([losses],
                     feed_dict={
                         "input:0": data_x,
                         "label:0": data_y
                     })
        print(l)

        p1, = plt.semilogy(x, l[0], label=config["optimizer"])
        plt.legend(handles=[p1])
        plt.title('Losses')
        plt.show()

        # TODO compare different optimizers

    data.refresh_parameters()

    data_x, data_y = next(data.next_batch(100, mode="train"))
    pred = sess.run(model.prediction,
                    feed_dict={
                        "input:0": data_x,
                        "label:0": data_y
                    })
    print(
        list(
            zip(pred, np.argmax(data_y, axis=1), pred == np.argmax(data_y,
                                                                   axis=1))))

    # calculate accuracy on test data
    seed = np.random.randint(low=0, high=1e6)
    data.refresh_parameters(seed=seed)
    data_x, data_y = next(data.next_batch(5000, mode="train"))
    acc = sess.run(model.accuracy,
                   feed_dict={
                       "input:0": data_x,
                       "label:0": data_y
                   })
    print("Train accuracy: {}".format(acc))

    data_x, data_y = next(data.next_batch(5000, mode="test"))
    acc = sess.run(model.accuracy,
                   feed_dict={
                       "input:0": data_x,
                       "label:0": data_y
                   })
    print("Test accuracy: {}".format(acc))

Example #4

Show file

File: acl_sentence_classification.py Project: zchaoking/matrix_svgd

def train_classifier(train,
                     valid,
                     test,
                     W,
                     n_p=10,
                     n_words=10000,
                     n_x=300,
                     n_h=200,
                     patience=10,
                     max_epochs=50,
                     lrate=0.001,
                     n_train=10000,
                     optimizer='RMSprop',
                     batch_size=50,
                     valid_batch_size=50,
                     dispFreq=10,
                     validFreq=100,
                     saveFreq=500,
                     eps=1e-3):
    """ train, valid, test : datasets
        W : the word embedding initialization
        n_words : vocabulary size
        n_x : word embedding dimension
        n_h : LSTM/GRU number of hidden units 
        n_z : latent embedding sapce for a sentence 
        patience : Number of epoch to wait before early stop if no progress
        max_epochs : The maximum number of epoch to run
        lrate : learning rate
        optimizer : methods to do optimization
        batch_size : batch size during training
        valid_batch_size : The batch size used for validation/test set
        dispFreq : Display to stdout the training progress every N updates
        validFreq : Compute the validation error after this number of update.
    """

    options = {}
    options['n_p'] = n_p
    options['n_words'] = n_words
    options['n_x'] = n_x
    options['n_h'] = n_h
    options['patience'] = patience
    options['max_epochs'] = max_epochs
    options['lrate'] = lrate
    options['optimizer'] = optimizer
    options['batch_size'] = batch_size
    options['valid_batch_size'] = valid_batch_size
    options['dispFreq'] = dispFreq
    options['validFreq'] = validFreq

    #if config.method in ['SVGD', 'SVGD_KFAC']: patience = 5

    logger.info('Model options {}'.format(options))

    logger.info('{} train examples'.format(len(train[0])))
    logger.info('{} valid examples'.format(len(valid[0])))
    logger.info('{} test examples'.format(len(test[0])))

    logger.info('Building model...')

    assert np.min(train[1]) == 0 and np.max(train[1]) == 1
    n_y = np.max(train[1]) + 1
    options['n_y'] = n_y

    params = init_params(options, W)
    tparams = init_tparams(params)

    (use_noise, x, mask, y, f_pred_prob, f_pred, cost,
     cache) = build_model(tparams, options)

    lr_theano = tensor.scalar(name='lr')
    ntrain_theano = tensor.scalar(name='ntrain')

    if config.method == 'pSGLD':
        f_grad_shared, f_update = pSGLD(tparams, cost, [x, mask, y],
                                        ntrain_theano, lr_theano)
    elif config.method == 'SGLD':
        f_grad_shared, f_update = SGLD(tparams, cost, [x, mask, y],
                                       ntrain_theano, lr_theano)
    elif config.method == 'RMSprop':
        f_grad_shared, f_update = RMSprop(tparams, cost, [x, mask, y],
                                          lr_theano)
    elif config.method == 'SVGD':
        f_grad_shared, f_update = SVGD(tparams,
                                       cost, [x, mask, y],
                                       ntrain_theano,
                                       lr_theano,
                                       kfac=False)
    elif config.method == 'SVGD_KFAC':
        f_grad_shared, f_update = SVGD(tparams,
                                       cost, [x, mask, y],
                                       ntrain_theano,
                                       lr_theano,
                                       kfac=True,
                                       average=True,
                                       cache=cache,
                                       eps=eps,
                                       n_p=n_p)
    elif config.method == 'MIXTURE_KFAC':
        f_grad_shared, f_update = SVGD(tparams,
                                       cost, [x, mask, y],
                                       ntrain_theano,
                                       lr_theano,
                                       kfac=True,
                                       average=False,
                                       cache=cache,
                                       eps=eps,
                                       n_p=n_p)

    #print 'Training model...'
    logger.info('Training model...')

    kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
    kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)

    estop = False  # early stop
    history_errs = []
    best_train_err, best_valid_err, best_test_err = 0., 0., 0.
    bad_counter = 0
    uidx = 0  # the number of update done
    start_time = time.time()

    n_average = 0
    train_probs = np.zeros((len(train[0]), n_y))
    valid_probs = np.zeros((len(valid[0]), n_y))
    test_probs = np.zeros((len(test[0]), n_y))

    try:
        for eidx in xrange(max_epochs):
            print tparams.keys()
            from optimizers import sqr_dist
            ##['Wemb', 'lstm_encoder_W', 'lstm_encoder_U', 'lstm_encoder_rev_W', 'lstm_encoder_rev_U', 'Wy']
            tv = tensor.flatten(tparams['Wy'], 2)
            ftv = theano.function([], sqr_dist(tv, tv))
            otv = ftv()
            print(np.min(otv), np.max(otv), np.mean(otv), np.median(otv),
                  np.sum(otv**2) / n_p)

            n_samples = 0
            kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)

            for _, train_index in kf:
                uidx += 1
                #use_noise.set_value(0.5)
                use_noise.set_value(config.dropout)

                y = [train[1][t] for t in train_index]
                x = [train[0][t] for t in train_index]

                x, mask, y = prepare_data(x, y)
                n_samples += x.shape[1]

                cost = f_grad_shared(x, mask, y)
                if config.method == 'RMSprop':
                    f_update(lrate)
                elif config.method in ['SVGD', 'pSGLD', 'SGLD']:
                    f_update(lrate, n_train)
                elif config.method in ['SVGD_KFAC', 'MIXTURE_KFAC']:
                    f_update(lrate, n_train, x, mask, y)

                if np.isnan(cost) or np.isinf(cost):

                    logger.info('NaN detected')
                    estop = True
                    break
                    return 1., 1., 1.

                if np.mod(uidx, dispFreq) == 0:
                    logger.info('Epoch {} Update {} Cost {}'.format(
                        eidx, uidx, cost))

                if np.mod(uidx, saveFreq) == 0:
                    logger.info('Saving ...')
                    saveto = 'results/%s.npz' % save_prefix
                    np.savez(saveto, history_errs=history_errs)

                    logger.info('Done ...')

                if np.mod(uidx, validFreq) == 0:
                    use_noise.set_value(0.)

                    if eidx < 1:
                        train_err = pred_error(f_pred, prepare_data, train, kf)
                        valid_err = pred_error(f_pred, prepare_data, valid,
                                               kf_valid)
                        test_err = pred_error(f_pred, prepare_data, test,
                                              kf_test)
                        history_errs.append([valid_err, test_err, train_err])
                    else:
                        train_probs_curr = pred_probs(f_pred_prob,
                                                      prepare_data, train, kf,
                                                      options)
                        valid_probs_curr = pred_probs(f_pred_prob,
                                                      prepare_data, valid,
                                                      kf_valid, options)
                        test_probs_curr = pred_probs(f_pred_prob, prepare_data,
                                                     test, kf_test, options)
                        train_probs = (n_average * train_probs +
                                       train_probs_curr) / (n_average + 1)
                        valid_probs = (n_average * valid_probs +
                                       valid_probs_curr) / (n_average + 1)
                        test_probs = (n_average * test_probs +
                                      test_probs_curr) / (n_average + 1)
                        n_average += 1

                        train_pred = train_probs.argmax(axis=1)
                        valid_pred = valid_probs.argmax(axis=1)
                        test_pred = test_probs.argmax(axis=1)

                        train_err = (train_pred == np.array(train[1])).sum()
                        train_err = 1. - numpy_floatX(train_err) / len(
                            train[0])

                        valid_err = (valid_pred == np.array(valid[1])).sum()
                        valid_err = 1. - numpy_floatX(valid_err) / len(
                            valid[0])

                        test_err = (test_pred == np.array(test[1])).sum()
                        test_err = 1. - numpy_floatX(test_err) / len(test[0])
                        history_errs.append([valid_err, test_err, train_err])

                    if (uidx == 0 or
                            valid_err <= np.array(history_errs)[:, 0].min()):

                        best_train_err = train_err
                        best_valid_err = valid_err
                        best_test_err = test_err
                        bad_counter = 0

                    logger.info('Train {} Valid {} Test {}'.format(
                        train_err, valid_err, test_err))

                    if (len(history_errs) > patience and valid_err >=
                            np.array(history_errs)[:-patience, 0].min()):
                        #valid_err >= np.array(history_errs)[:-patience,0].mean()):
                        bad_counter += 1
                        #valid_err >= np.array(history_errs)[:-patience,0].mean()):
                        if bad_counter > patience:

                            logger.info('Early Stop!')
                            estop = True
                            break

            logger.info('Seen {} samples'.format(n_samples))

            if estop:
                break

    except KeyboardInterrupt:

        logger.info('Training interupted')

    end_time = time.time()
    logger.info('Train {} Valid {} Test {}'.format(best_train_err,
                                                   best_valid_err,
                                                   best_test_err))

    saveto = 'results/%s.npz' % save_prefix
    np.savez(saveto,
             train_err=best_train_err,
             valid_err=best_valid_err,
             test_err=best_test_err,
             history_errs=history_errs)

    logger.info('The code run for {} epochs, with {} sec/epochs'.format(
        eidx + 1, (end_time - start_time) / (1. * (eidx + 1))))

    #print >> sys.stderr, ('Training took %.1fs' %
    #                      (end_time - start_time))
    return best_train_err, best_valid_err, best_test_err

Example #5

Show file

File: modelcompiler.py Project: CyrusChiu/Theano-lightweight

class ModelCompiler(object):
    def __init__(self,model,config, optimizer='SGD'):
        self.model = model(config=config)
        if optimizer == 'SGD':
            self.optimizer = SGD(lr=config['learning_rate'],
                                decay=config['weight_decay'],
                                momentum=config['momentum'])
        elif optimizer =='Adagrad':
            self.optimizer = Adagrad(lr=config['learning_rate'], decay=config['weight_decay'])
        elif optimizer =='RMSprop':
            self.optimizer = RMSprop(lr=config['learning_rate'])

        self.config = config

    def share_var(self, data_xy, testing=False, borrow=True):
        if testing:
            assert type(data_xy) == np.ndarray, "using test data in testing step"
            shared_x = theano.shared(np.asarray(data_xy,dtype=theano.config.floatX),borrow=borrow)
            return shared_x
        else: # training
            assert type(data_xy) == tuple, "label data was missing or something else"
            data_x, data_y = data_xy
            shared_x = theano.tensor._shared(np.asarray(data_x,dtype=theano.config.floatX),borrow=borrow)
            shared_y = theano.tensor._shared(np.asarray(data_y,dtype=theano.config.floatX),borrow=borrow)
            return shared_x, T.cast(shared_y,'int32')

    def _train_by_sentence_init_(self, x_train, y_train, x_val, y_val, l_t, l_v):
        #x_train, y_train, x_val, y_val = [], [], [], []
        #for each in train:
        #    x_train.extend(each['data'].tolist())
        #    y_train.extend(each['label'].tolist())
        #for each in val:
        #    x_val.extend(each['data'].tolist())
        #    y_val.extend(each['label'].tolist())

        #x_train = np.asarray(x_train).astype('float32')
        #y_train = np.asarray(y_train).astype('int32')
        #x_val = np.asarray(x_val).astype('float32')
        #y_val = np.asarray(y_val).astype('int32')

        self.learning_rate_decay = self.config['learning_rate_decay']
        train_set_x, train_set_y = self.share_var((x_train,y_train))
        valid_set_x, valid_set_y = self.share_var((x_val,y_val))

        #batch_size = self.model.batch_size
        #n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

        #self.n_train_batches = n_train_batches
        l_t= T.cast(theano.tensor._shared(np.asarray(l_t,dtype=theano.config.floatX),borrow=True),'int32')
        l_v= T.cast(theano.tensor._shared(np.asarray(l_v,dtype=theano.config.floatX),borrow=True),'int32')
        self.layers = self.model.layers
        x = self.model.x
        y = self.model.y
        index = T.lscalar()  # index to a [mini]batch
        cost = self.model.cost
        params = self.model.params
        errors = self.model.errors
        train_model = theano.function(
                    inputs=[index],
                    outputs=[cost,errors],
                    updates=self.optimizer.get_updates(params=params,cost=cost),
                    givens={
                        x: train_set_x[l_t[index]:l_t[index+1]],
                        y: train_set_y[l_t[index]:l_t[index+1]]
                        }
                    )

        #n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
        #self.n_valid_batches = n_valid_batches
        validate_model = theano.function(
                    inputs=[index],
                    outputs=errors,
                    givens={
                        x: valid_set_x[l_v[index]:l_v[index+1]],
                        y: valid_set_y[l_v[index]:l_v[index+1]]
                        }
                    )

        return train_model, validate_model

    def train_by_sentence(self, x_train, y_train, x_val, y_val, index_train,index_val, save_model=False):
        """
        - train: {name:'sentenceID', data:[features], label:[labels]}
        """
        #train_model, validate_model = self._train_by_order_init_(train, val)
        train_model, validate_model = self._train_by_sentence_init_(x_train, y_train, x_val, y_val,index_train,index_val)
        patience = 10000
        patience_increase = 4
        improvement_threshold = 0.995
        validation_frequency = len(index_train)-1#min(self.n_train_batches, patience / 2)
        best_validation_loss = np.inf
        best_val_acc = 0.
        best_iter = 0
        test_score = 0.
        start_time = time.clock()
        epoch = 0
        done_looping = False
        n_epochs = self.config['n_epochs']
        t_cost, t_acc, v_acc = [], [], []
        print 'start training...'
        while (epoch < n_epochs) and (not done_looping):
            epoch = epoch + 1
            DropoutLayer.SetDropoutOn()
            for minibatch_index in xrange(len(index_train)-1):
                minibatch_avg_cost,train_acc = train_model(minibatch_index)
                iter = (epoch - 1) * (len(index_train)-1) + minibatch_index
                if (iter + 1) % validation_frequency == 0:
                    DropoutLayer.SetDropoutOff()
                    validation_losses = [validate_model(i) for i in xrange(len(index_val)-1)]
                    this_validation_loss = np.mean(validation_losses)
                    this_val_acc = 1 - this_validation_loss
                    this_train_acc = 1 - train_acc
                    print('epoch %i/%s, cost %.4f , train acc %.4f , val acc %.4f ' %(epoch,str(n_epochs),minibatch_avg_cost,(this_train_acc),(this_val_acc)))

                    t_cost.append(round(minibatch_avg_cost,5))
                    t_acc.append(round(this_train_acc,5))
                    v_acc.append(round(this_val_acc,5))


                    if save_model:
                        if this_val_acc > best_val_acc:
                            best_val_acc = this_val_acc
                            #print "best val acc at epoch %i is %.4f" %(epoch,best_val_acc)
                            folder = "./snapshot_{0}_{1}/".format(epoch, round(best_val_acc,3))
                            os.mkdir(folder)
                            tools.save_weights(self.layers, folder, epoch)
                            #print "model saved at epoch %i" %(epoch)

                    if this_validation_loss < best_validation_loss:
                        if (this_validation_loss < best_validation_loss * improvement_threshold):
                            patience = max(patience, iter * patience_increase)
                        best_validation_loss = this_validation_loss
                        best_iter = iter
        #            if this_train_acc - this_val_acc >0.05:
        #                done_looping = True
        #                break
        #        if patience <= iter:
        #            done_looping = True
        #            break
            if self.learning_rate_decay == True:
                if epoch % 5 == 0:
                    rate = theano.shared(np.cast[theano.config.floatX](0.5))
                    self.optimizer.lr = self.optimizer.lr * rate

        self.record = {
                      'training loss' : t_cost,
                      'training accuracy' : t_acc,
                      'validation accuracy' : v_acc }
        end_time = time.clock()
        print(('Optimization complete. Best validation score of %f %% \n obtained at iteration %i, with test performance %f %%') %(best_validation_loss * 100., best_iter + 1, test_score * 100.))
    #print >> sys.stderr,('The code for file '+os.path.split(__file__)[1] +' ran for %.2fm' % ((end_time - start_time) / 60.))


    def _train_without_val_init_(self, train_set_x, train_set_y):
        self.learning_rate_decay = self.config['learning_rate_decay']
        batch_size = self.model.batch_size
        n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        self.n_train_batches = n_train_batches
        self.layers = self.model.layers
        x = self.model.x
        y = self.model.y
        index = T.lscalar()  # index to a [mini]batch
        cost = self.model.cost
        params = self.model.params
        errors = self.model.errors
        train_model = theano.function(
                    inputs=[index],
                    outputs=[cost,errors],
                    updates=self.optimizer.get_updates(params=params,cost=cost),
                    givens={
                        x: train_set_x[index * batch_size: (index + 1) * batch_size],
                        y: train_set_y[index * batch_size: (index + 1) * batch_size]
                        }
                    )
        return train_model

    def train_without_val(self, train_set_x, train_set_y, save_model=False):
        train_model = self._train_without_val_init_(train_set_x, train_set_y)
        patience = 10000
        patience_increase = 4
        improvement_threshold = 0.995
        validation_frequency = self.n_train_batches
        best_validation_loss = np.inf
        best_train_acc = 0.
        best_iter = 0
        test_score = 0.
        start_time = time.clock()
        epoch = 0
        done_looping = False
        n_epochs = self.config['n_epochs']
        t_cost, t_acc, v_acc = [], [], []
        print 'start training...'
        while (epoch < n_epochs) and (not done_looping):
            epoch = epoch + 1
            DropoutLayer.SetDropoutOn()
            for minibatch_index in xrange(self.n_train_batches):
                minibatch_avg_cost,train_acc = train_model(minibatch_index)
                this_train_acc = 1 - train_acc
            print('epoch %i/%s, cost %.4f , train acc %.4f ' %(epoch,str(n_epochs),minibatch_avg_cost,(this_train_acc)))

            if save_model:
                if this_train_acc > best_train_acc:
                    best_train_acc = this_train_acc
                    #print "best val acc at epoch %i is %.4f" %(epoch,best_val_acc)
                    folder = "./snapshot_{0}_{1}/".format(epoch, round(best_train_acc,3))
                    os.mkdir(folder)
                    tools.save_weights(self.layers, folder, epoch)
                    #print "model saved at epoch %i" %(epoch)

            if self.learning_rate_decay == True:
                if epoch % 5 == 0:
                    rate = theano.shared(np.cast[theano.config.floatX](0.5))
                    self.optimizer.lr = self.optimizer.lr * rate

        end_time = time.clock()

    def _train_init_(self, train_set_x, train_set_y, valid_set_x,valid_set_y):
        self.learning_rate_decay = self.config['learning_rate_decay']
        batch_size = self.model.batch_size
        n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        self.n_train_batches = n_train_batches
        self.layers = self.model.layers
        x = self.model.x
        y = self.model.y
        index = T.lscalar()  # index to a [mini]batch
        cost = self.model.cost
        params = self.model.params
        errors = self.model.errors
        train_model = theano.function(
                    inputs=[index],
                    outputs=[cost,errors],
                    updates=self.optimizer.get_updates(params=params,cost=cost),
                    givens={
                        x: train_set_x[index * batch_size: (index + 1) * batch_size],
                        y: train_set_y[index * batch_size: (index + 1) * batch_size]
                        }
                    )

        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
        self.n_valid_batches = n_valid_batches
        validate_model = theano.function(
                    inputs=[index],
                    outputs=errors,
                    givens={
                        x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                        y: valid_set_y[index * batch_size:(index + 1) * batch_size]
                        }
                    )

        return train_model, validate_model

    def train(self, train_set_x, train_set_y, valid_set_x, valid_set_y, save_model=False):
        train_model, validate_model = self._train_init_(train_set_x, train_set_y, valid_set_x, valid_set_y)
        patience = 10000
        patience_increase = 4
        improvement_threshold = 0.995
        validation_frequency = min(self.n_train_batches, patience / 2)
        best_validation_loss = np.inf
        best_val_acc = 0.
        best_iter = 0
        test_score = 0.
        start_time = time.clock()
        epoch = 0
        done_looping = False
        n_epochs = self.config['n_epochs']
        t_cost, t_acc, v_acc = [], [], []
        print 'start training...'
        while (epoch < n_epochs) and (not done_looping):
            epoch = epoch + 1
            DropoutLayer.SetDropoutOn()
            for minibatch_index in xrange(self.n_train_batches):
                minibatch_avg_cost,train_acc = train_model(minibatch_index)
                iter = (epoch - 1) * self.n_train_batches + minibatch_index
                if (iter + 1) % validation_frequency == 0:
                    DropoutLayer.SetDropoutOff()
                    validation_losses = [validate_model(i) for i in xrange(self.n_valid_batches)]
                    this_validation_loss = np.mean(validation_losses)
                    this_val_acc = 1 - this_validation_loss
                    this_train_acc = 1 - train_acc
                    print('epoch %i/%s, cost %.4f , train acc %.4f , val acc %.4f ' %(epoch,str(n_epochs),minibatch_avg_cost,(this_train_acc),(this_val_acc)))

                    t_cost.append(round(minibatch_avg_cost,5))
                    t_acc.append(round(this_train_acc,5))
                    v_acc.append(round(this_val_acc,5))


                    if save_model:
                        if this_val_acc > best_val_acc:
                            best_val_acc = this_val_acc
                            #print "best val acc at epoch %i is %.4f" %(epoch,best_val_acc)
                            folder = "./snapshot_{0}_{1}/".format(epoch, round(best_val_acc,3))
                            os.mkdir(folder)
                            tools.save_weights(self.layers, folder, epoch)
                            #print "model saved at epoch %i" %(epoch)

                    if this_validation_loss < best_validation_loss:
                        if (this_validation_loss < best_validation_loss * improvement_threshold):
                            patience = max(patience, iter * patience_increase)
                        best_validation_loss = this_validation_loss
                        best_iter = iter
        #            if this_train_acc - this_val_acc >0.05:
        #                done_looping = True
        #                break
        #        if patience <= iter:
        #            done_looping = True
        #            break
            if self.learning_rate_decay == True:
                if epoch % 5 == 0:
                    rate = theano.shared(np.cast[theano.config.floatX](0.5))
                    self.optimizer.lr = self.optimizer.lr * rate

        self.record = {
                      'training loss' : t_cost,
                      'training accuracy' : t_acc,
                      'validation accuracy' : v_acc }
        end_time = time.clock()
        print(('Optimization complete. Best validation score of %f %% \n obtained at iteration %i, with test performance %f %%') %(best_validation_loss * 100., best_iter + 1, test_score * 100.))
    #print >> sys.stderr,('The code for file '+os.path.split(__file__)[1] +' ran for %.2fm' % ((end_time - start_time) / 60.))
    def load(self):
        layers = self.model.layers
        dir = self.model.snapshot
        if not os.path.isdir(dir):
            raise IOError('no such snapshot file: %s' %(dir))

        snapshots = glob.glob(dir+'*.npy')
        e = self.config['e_snapshot']
        tools.load_weights(layers, dir, e)

    def predict_by_sentence(self, test_set_x, index_test, load_model=None, dropout=False):
        assert load_model != None, "load_model should be True of False"
        #batch_size = self.model.batch_size
        #n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
        #self.n_test_batches = n_test_batches
        test_set_x = self.share_var(test_set_x,testing=True)
        layers = self.model.layers
        x = self.model.x
        y = self.model.y
        index = T.lscalar()  # index to a [mini]batch
        predict_times = len(index_test)-1
        index_test= T.cast(theano.tensor._shared(np.asarray(index_test,dtype=theano.config.floatX),borrow=True),'int32')
        if load_model == True:
            dir = self.model.snapshot
            if not os.path.isdir(dir):
                raise IOError('no such snapshot file: %s' %(dir))

            snapshots = glob.glob(dir+'*.npy')
            #e = os.path.basename(snapshots[0])[-5]
            e = self.config['e_snapshot']
            if dropout == False:
                tools.load_weights(layers, dir, e)
            else:
                tools.dropout_load_weights(layers, dir, e)

        test_model = theano.function(
                        inputs = [index],
                        outputs = self.model.y_pred,
                        givens={
                            x: test_set_x[index_test[index]:index_test[(index + 1)]],
                            }
                )

        n_test = test_set_x.get_value(borrow=True).shape[0]
        y_pred = np.array([])
        DropoutLayer.SetDropoutOff()
        print "predict on %d datas" %(int(n_test))
        for i in xrange(predict_times):
            y_pred = np.concatenate((y_pred,test_model(i)),axis=0)

        return y_pred

    def predict(self, test_set_x, load_model=None, dropout=False):
        assert load_model != None, "load_model should be True of False"
        #batch_size = self.model.batch_size
        #n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
        #self.n_test_batches = n_test_batches
        layers = self.model.layers
        x = self.model.x
        y = self.model.y
        index = T.lscalar()  # index to a [mini]batch

        if load_model == True:
            dir = self.model.snapshot
            if not os.path.isdir(dir):
                raise IOError('no such snapshot file: %s' %(dir))

            snapshots = glob.glob(dir+'*.npy')
            #e = os.path.basename(snapshots[0])[-5]
            e = self.config['e_snapshot']
            if dropout == False:
                tools.load_weights(layers, dir, e)
            else:
                tools.dropout_load_weights(layers, dir, e)

        test_model = theano.function(
                        inputs = [index],
                        outputs = self.model.y_pred,
                        givens={
                            x: test_set_x[index:(index + 1)],
                            }
                )

        n_test = test_set_x.get_value(borrow=True).shape[0]
        y_pred = np.zeros(n_test)
        DropoutLayer.SetDropoutOff()
        print "predict on %d datas" %(int(n_test))
        for i in xrange(n_test):
            y_pred[i] = int(test_model(i))

        return y_pred

    def proba(self, X, load_model=None):
        assert load_model != None, "load_model should be True of False"
        layers = self.model.layers
        x = self.model.x
        y = self.model.y
        index = T.lscalar()  # index to a [mini]batch

        if load_model == True:
            dir = self.model.snapshot
            if not os.path.isdir(dir):
                raise IOError('no such snapshot file: %s' %(dir))

            snapshots = glob.glob(dir+'*.npy')
            #e = os.path.basename(snapshots[0])[-5]
            e = self.config['e_snapshot']
            tools.load_weights(layers, dir, e)

        prob_model = theano.function(
                        inputs = [index],
                        outputs = self.model.proba,
                        givens={
                            x: X[index:(index + 1)],
                            }
                )
        y_prob = []
        n_test = X.get_value(borrow=True).shape[0]
        DropoutLayer.SetDropoutOff()
        print "getting probability on %d datas" %(int(n_test))
        for i in xrange(n_test):
            y_prob.append(prob_model(i))
        return np.asarray(y_prob).reshape(n_test,y_prob[0].shape[1])

Example #6

Show file

File: test_3.py Project: HarrisonSong/CS5242_Assignment_1

# plt.subplot(1, 4, 2)
# plt.imshow(img[1])
# plt.subplot(1, 4, 3)
# plt.imshow(img[2])
# plt.subplot(1, 4, 4)
# plt.imshow(img[3])

model = MNISTNet()
loss = SoftmaxCrossEntropy(num_class=10)


# define your learning rate sheduler
def func(lr, iteration):
    if iteration % 1000 == 0:
        return lr * 0.5
    else:
        return lr


rms = RMSprop(lr=0.001, decay=0, sheduler_func=func)
l2 = L2(w=0.001)  # L2 regularization with lambda=0.001
model.compile(optimizer=rms, loss=loss, regularization=l2)
train_results, val_results, test_results = model.train(mnist,
                                                       train_batch=30,
                                                       val_batch=1000,
                                                       test_batch=1000,
                                                       epochs=2,
                                                       val_intervals=100,
                                                       test_intervals=300,
                                                       print_intervals=100)

Example #7

Show file

File: minimise_rosenbrock.py Project: fagan2888/gradient_descent

# Initial x
x0 = np.array([-2., -1.])

# Some global settings
max_iter = 5000
tol = 1e-8

# Optimization methods
gd = GD(fun, jac, lr=0.0005, max_iter=max_iter, tol=tol)
mom1 = GD(fun, jac, lr=0.0005, momentum=0.5, max_iter=max_iter, tol=tol)
mom2 = GD(fun, jac, lr=0.0005, momentum=0.9, max_iter=max_iter, tol=tol)
nest = GD(fun, jac, lr=0.0005, momentum=0.5, nesterov=True, max_iter=max_iter, tol=tol)
agrad = Adagrad(fun, jac, lr=0.1, max_iter=max_iter, tol=tol)
adelta = Adadelta(fun, jac, lr=1., max_iter=max_iter, tol=tol)
rms = RMSprop(fun, jac, lr=0.001, max_iter=max_iter, tol=tol)
adam = Adam(fun, jac, lr=0.01, max_iter=max_iter, tol=tol)

optimizers = [gd, mom1, nest, agrad, adelta, rms, adam]
labels = ['GD', 'Momentum', 'Nesterov', 'Adagrad', 'Adadelta', 'RMSprop', 'Adam']

# Initialise lists for x-values at each iteration, and final x-value for each
# optimisation method
xall = []
xfinal = []
feval = []

# Loop over all optimizers
for opt in optimizers:
    # Minimise the function
    opt.optimize(x0)

Example #8

Show file

         backprop_depth=SEQUENCE_LENGTH,
         stateful=True),
    LSTM(size=512,
         input_size=512,
         batch_size=BATCH_SIZE,
         backprop_depth=SEQUENCE_LENGTH,
         stateful=True),
    TimeDistributed(
        Dense(size=EMBEDDING_LENGTH,
              input_size=512,
              activation=SparseSoftmax())))

if RESTORE_MODEL_PATH:
    model.loadParams(RESTORE_MODEL_PATH)

optimizer = RMSprop(learning_rate=lambda n: 0.001)
loss_function = VectorCrossEntropy

model.assignOptimizer(optimizer)

if RESTORE_OPTIMIZER_PATH:
    optimizer.load(RESTORE_OPTIMIZER_PATH)

for epoch in range(INITIAL_EPOCH, NR_OF_EPOCHS + INITIAL_EPOCH):
    loss, accuracy = model.train(makeBatches(source, SEQUENCE_LENGTH,
                                             EMBEDDING_LENGTH),
                                 lossfunc=loss_function)
    model.saveParams(
        f"{MODEL_PATH}{MODEL_NAME}-{epoch:02d}-loss_{loss:.5f}-acc_{accuracy:.5f}.nn"
    )
    optimizer.save(f"{MODEL_PATH}{epoch:02d}-optimizer.json")