Example #1
0
def train(model, train_dataset, test_dataset):

    (x_train, y_train) = train_dataset
    (x_test, y_test) = test_dataset

    lr = 0.1
    momentum_coef = 0
    weight_decay = 0

    print(model)

    opt = SGD(lr=lr, momentum_coef=momentum_coef, weight_decay=weight_decay)
    print('Optimizer: {} with (lr: {} -- momentum_coef: {} -- weight_decay: {})'.
          format(opt.__class__.__name__, lr, momentum_coef, weight_decay))

    num_of_epochs = 1000
    batch_size = 256
    val_split = 0.1
    print('Validation Split: {} -- BatchSize: {} -- Epochs: {}'.format(val_split, batch_size, num_of_epochs))
    print('Training is about the start with epoch: {}, batch_size: {}, validation_split: {}'
          .format(num_of_epochs, batch_size, val_split))

    opt.train(model,
              x_train, y_train,
              num_of_epochs=num_of_epochs,
              batch_size=batch_size,
              val_split=val_split,
              verbose=1)

    print('\nEvaluating with test dataset !..')

    test_acc, test_loss = model.evaluate(x_test, y_test, return_pred=False)
    train_acc, train_loss = model.evaluate(x_train, y_train, return_pred=False)
    print("train_acc: {} -- test_loss: {}".format(train_acc, train_loss))
    print("test_acc: {} -- test_loss: {}".format(test_acc, test_loss))

    print('For complete use case of the framework please refer to guide.ipynb')
Example #2
0
def main():

    config = {
        "optimizer": "rnn",
        "problem": "mnist",
        "rollout_length": 100,  # This is 100 in the paper
        "learning_rate": 0.1,
        "decay_rate": 0.9,
        "meta_layers": 2,
        "meta_hidden_size": 20,
        "layers": 2,
        "hidden_size": 100,
        "activation": 'relu',
        "preprocess": True,
        "max_to_keep": 3,
        "retrain": False,
        "dim": 10,
        "range_of_means": 10,
        "range_of_stds": 10,
        "summary_dir": "summary",
        "checkpoint_dir": "data_ckpt",
        "batch_size": 10000,
        "training_iters": 4000,
        "log_iters": 100
    }

    # create the experiments dirs
    create_dirs([config["summary_dir"], config["checkpoint_dir"]])
    # create tensorflow session
    sess = tf.Session()

    # create your data generator
    # create an instance of the model you want
    if config["problem"] == "simple":
        data = SimpleDG(config)
        model = LinearRegressionModel(config)
    elif config["problem"] == "mnist":
        data = MNISTDG(config)
        model = MNISTModel(config)
    else:
        raise ValueError("{} is not a valid problem".format(config["problem"]))

    # create tensorboard logger
    # logger = Logger(sess, config)
    # create trainer and pass all the previous components to it
    # trainer = LinearRegressionTrainer(sess, model, data, config, logger)

    sess.run(tf.global_variables_initializer())

    if config["optimizer"] == "sgd":
        optim = SGD(config)
        losses = learn(optim, model, config["rollout_length"])
    elif config["optimizer"] == "rms":
        optim = RMSprop(config)
        losses = learn(optim, model, config["rollout_length"])
    elif config["optimizer"] == "rnn":
        optim = RNNOptimizer(config)
        losses = learn(optim, model, config["rollout_length"])

        if config["retrain"]:
            optim.train(losses, sess, data)
        else:
            optim.load(sess)
    else:
        raise ValueError("{} is not a valid optimizer".format(
            config["optimizer"]))

    # initialize variables in optimizee
    # (can't initialize all here because it would potentially overwrite the trained optimizer)
    sess.run(
        tf.variables_initializer([
            var
            for var in tf.trainable_variables(scope=optim.__class__.__name__)
        ]))

    x = np.arange(config["rollout_length"] + 1)

    for i in range(3):
        sess.run(
            tf.variables_initializer([
                var for var in tf.trainable_variables(
                    scope=optim.__class__.__name__)
            ]))

        data.refresh_parameters(seed=i)
        data_x, data_y = next(data.next_batch(config["batch_size"]))

        l = sess.run([losses],
                     feed_dict={
                         "input:0": data_x,
                         "label:0": data_y
                     })
        print(l)

        p1, = plt.semilogy(x, l[0], label=config["optimizer"])
        plt.legend(handles=[p1])
        plt.title('Losses')
        plt.show()

        # TODO compare different optimizers

    data.refresh_parameters()

    data_x, data_y = next(data.next_batch(100, mode="train"))
    pred = sess.run(model.prediction,
                    feed_dict={
                        "input:0": data_x,
                        "label:0": data_y
                    })
    print(
        list(
            zip(pred, np.argmax(data_y, axis=1), pred == np.argmax(data_y,
                                                                   axis=1))))

    # calculate accuracy on test data
    seed = np.random.randint(low=0, high=1e6)
    data.refresh_parameters(seed=seed)
    data_x, data_y = next(data.next_batch(5000, mode="train"))
    acc = sess.run(model.accuracy,
                   feed_dict={
                       "input:0": data_x,
                       "label:0": data_y
                   })
    print("Train accuracy: {}".format(acc))

    data_x, data_y = next(data.next_batch(5000, mode="test"))
    acc = sess.run(model.accuracy,
                   feed_dict={
                       "input:0": data_x,
                       "label:0": data_y
                   })
    print("Test accuracy: {}".format(acc))
Example #3
0
class Model():
    def __init__(self, name="Model"):
        self.layers = []
        self.name = name
        self.loss = None
        self.optimizer = None

    # Method for adding a layer
    def addLayer(self, layer):
        self.layers.append(layer)
    
    # Performs the forward pass and evaluates the network
    # Returns the loss value & metrics values
    def evaluate(self, inputs, targets, updateInternal=False):
        predictions = self.predict(inputs, updateInternal)
        cost = self.computeCost(predictions, targets)
        accuracy = self.computeAccuracy(predictions, targets)

        return cost, accuracy        

    
    # Performs a forward pass without training the network
    def predict(self, inputs, updateInternal=False):
        prediction = inputs

        for layer in self.layers:
            if type(layer) != BatchNormalization:
                prediction = layer.forward(prediction)
            else:
                prediction = layer.forward(prediction, updateInternal)
        
        return prediction

    # Propagates the targets(one hot encoding) back through the network
    def backpropagate(self, targets):
        grad = self.layers[-1].backward(targets)  
        for layer in self.layers[-2::-1]:
            grad = layer.backward(grad)

        return grad



    # Computes the cost
    def computeCost(self, predictions, targets):
        
        totaltCost = 0   

        ## Maybe dont need to use the probabilities. We have the predictions...
        if self.loss == "categorical_cross_entropy":
            assert self.layers[-1].type == "Softmax", "Loss is cross-entropy but last layer is not softmax"
            yhat = targets*np.log(self.layers[-1].probabilities)
            entropy = -np.sum(yhat)/targets.shape[1]
            totaltCost = totaltCost + entropy

            for layer in self.layers[0:-1]:
                totaltCost = totaltCost + layer.cost()

        # NOT TESTED YET
        elif self.loss == "binary_cross_entropy":
            m = predictions.shape[0]
            binaryEntropy = -1 / m * (np.dot(targets, np.log(predictions).T) + np.dot(1 - targets, np.log(1 - predictions).T))
            totaltCost = totaltCost + np.squeeze(binaryEntropy)

            for layer in self.layers[0:-1]:
                totaltCost = totaltCost + layer.cost()

        # NOT TESTED YET
        elif self.loss == "mse":
            totaltCost = totaltCost + np.mean((predictions-targets)**2)

            for layer in self.layers[0:-1]:
                totaltCost = totaltCost + layer.cost()

        elif self.loss == "None":
            for layer in self.layers:
                totaltCost = totaltCost + layer.cost()
        
        return totaltCost


    # Computes the accuracy of the predictions given the targets
    def computeAccuracy(self, predictions, targets):
        assert predictions.shape == targets.shape
        accuracy = np.sum(np.argmax(predictions, axis=0) == np.argmax(targets, axis=0)) / predictions.shape[1]
        return accuracy



    # Initializes the attributes for the optimizer and the loss function. 
    # Also adds a reference for the optimizer to the current model(for access to the forward and backward pass of the network)
    def compile(self, optimizer="SGD", loss="cce"):

        if type(optimizer) is str:
            if optimizer == "SGD":
                self.optimizer = SGD()
            else:
                raise NameError("Unrecognized optimizer")
        else:
            self.optimizer = copy.deepcopy(optimizer)
        
        # Adds reference for the optimizer to the model
        self.optimizer.model = self

        if loss == "cce" or loss == "categorical_cross_entropy":
            self.loss = "categorical_cross_entropy"
        else:
            raise NameError("Unrecognized loss function.")

        self.history = self.optimizer.history


    # Fits the model to the data using the optimizer and loss function specified during compile
    def fit(self, inputs, targets, epochs=1, validationData=None, batch_size=None, verbose=True):
        if self.loss is None or self.optimizer is None:
            raise ValueError("Model not compiled")
        
        
        self.optimizer.train(x_train=inputs, y_train=targets,\
              validationData=validationData,\
              epochs=epochs, batch_size=batch_size, verbose=verbose)


    def __str__(self):
        strrep = "Sequential Model: " + self.name +"\n"
        for i in range(len(self.layers)):
            strrep = strrep + "     Layer " + str(i) + ": Type:"  + " " + str(self.layers[i]) + "\n"
        return strrep
Example #4
0
wgn = WriteGradientNorm()

# DEFINE THE NN TOPOLOGY
nn = NN(10)
nn.add_layer(Layer(97, Rectifier))
nn.add_layer(Layer(19, TanH))
nn.add_layer(Layer(2, Linear))

# START THE TRAINING
regularizer = L1L2Regularizer(0.00037205665611222174, 0.0008170173137216012)
lr = learning_rate_time_based(0.010868833133798494, 5.964961555920687e-06)
sgd.train(nn,
          x_train,
          y_train,
          learning_rate=lr,
          batch_size=batch_size,
          epochs=10000,
          momentum=0.9516339410240324,
          regularizer=regularizer,
          callbacks=[plc, wtf, wgn])

fig, ax = plt.subplots()
y_predict = nn.predict_batch(x_val)
ax.scatter(y_predict[:, 0], y_predict[:, 1], c='b', marker='+', s=30, lw=.4)
ax.scatter(y_val[:, 0], y_val[:, 1], c='r', marker='+', s=30, lw=.4)
plt.show()

with open('%s/%s_predictions.csv' % (directory, training_id), 'w') as f:
    f.write('y0_predict,y1_predict,y0_val,y1_val\n')
    for y, y_val in zip(y_predict, y_val):
        f.write('%f,%f,%f,%f\n' % (y[0], y[1], y_val[0], y_val[1]))