def train(model, train_dataset, test_dataset): (x_train, y_train) = train_dataset (x_test, y_test) = test_dataset lr = 0.1 momentum_coef = 0 weight_decay = 0 print(model) opt = SGD(lr=lr, momentum_coef=momentum_coef, weight_decay=weight_decay) print('Optimizer: {} with (lr: {} -- momentum_coef: {} -- weight_decay: {})'. format(opt.__class__.__name__, lr, momentum_coef, weight_decay)) num_of_epochs = 1000 batch_size = 256 val_split = 0.1 print('Validation Split: {} -- BatchSize: {} -- Epochs: {}'.format(val_split, batch_size, num_of_epochs)) print('Training is about the start with epoch: {}, batch_size: {}, validation_split: {}' .format(num_of_epochs, batch_size, val_split)) opt.train(model, x_train, y_train, num_of_epochs=num_of_epochs, batch_size=batch_size, val_split=val_split, verbose=1) print('\nEvaluating with test dataset !..') test_acc, test_loss = model.evaluate(x_test, y_test, return_pred=False) train_acc, train_loss = model.evaluate(x_train, y_train, return_pred=False) print("train_acc: {} -- test_loss: {}".format(train_acc, train_loss)) print("test_acc: {} -- test_loss: {}".format(test_acc, test_loss)) print('For complete use case of the framework please refer to guide.ipynb')
def main(): config = { "optimizer": "rnn", "problem": "mnist", "rollout_length": 100, # This is 100 in the paper "learning_rate": 0.1, "decay_rate": 0.9, "meta_layers": 2, "meta_hidden_size": 20, "layers": 2, "hidden_size": 100, "activation": 'relu', "preprocess": True, "max_to_keep": 3, "retrain": False, "dim": 10, "range_of_means": 10, "range_of_stds": 10, "summary_dir": "summary", "checkpoint_dir": "data_ckpt", "batch_size": 10000, "training_iters": 4000, "log_iters": 100 } # create the experiments dirs create_dirs([config["summary_dir"], config["checkpoint_dir"]]) # create tensorflow session sess = tf.Session() # create your data generator # create an instance of the model you want if config["problem"] == "simple": data = SimpleDG(config) model = LinearRegressionModel(config) elif config["problem"] == "mnist": data = MNISTDG(config) model = MNISTModel(config) else: raise ValueError("{} is not a valid problem".format(config["problem"])) # create tensorboard logger # logger = Logger(sess, config) # create trainer and pass all the previous components to it # trainer = LinearRegressionTrainer(sess, model, data, config, logger) sess.run(tf.global_variables_initializer()) if config["optimizer"] == "sgd": optim = SGD(config) losses = learn(optim, model, config["rollout_length"]) elif config["optimizer"] == "rms": optim = RMSprop(config) losses = learn(optim, model, config["rollout_length"]) elif config["optimizer"] == "rnn": optim = RNNOptimizer(config) losses = learn(optim, model, config["rollout_length"]) if config["retrain"]: optim.train(losses, sess, data) else: optim.load(sess) else: raise ValueError("{} is not a valid optimizer".format( config["optimizer"])) # initialize variables in optimizee # (can't initialize all here because it would potentially overwrite the trained optimizer) sess.run( tf.variables_initializer([ var for var in tf.trainable_variables(scope=optim.__class__.__name__) ])) x = np.arange(config["rollout_length"] + 1) for i in range(3): sess.run( tf.variables_initializer([ var for var in tf.trainable_variables( scope=optim.__class__.__name__) ])) data.refresh_parameters(seed=i) data_x, data_y = next(data.next_batch(config["batch_size"])) l = sess.run([losses], feed_dict={ "input:0": data_x, "label:0": data_y }) print(l) p1, = plt.semilogy(x, l[0], label=config["optimizer"]) plt.legend(handles=[p1]) plt.title('Losses') plt.show() # TODO compare different optimizers data.refresh_parameters() data_x, data_y = next(data.next_batch(100, mode="train")) pred = sess.run(model.prediction, feed_dict={ "input:0": data_x, "label:0": data_y }) print( list( zip(pred, np.argmax(data_y, axis=1), pred == np.argmax(data_y, axis=1)))) # calculate accuracy on test data seed = np.random.randint(low=0, high=1e6) data.refresh_parameters(seed=seed) data_x, data_y = next(data.next_batch(5000, mode="train")) acc = sess.run(model.accuracy, feed_dict={ "input:0": data_x, "label:0": data_y }) print("Train accuracy: {}".format(acc)) data_x, data_y = next(data.next_batch(5000, mode="test")) acc = sess.run(model.accuracy, feed_dict={ "input:0": data_x, "label:0": data_y }) print("Test accuracy: {}".format(acc))
class Model(): def __init__(self, name="Model"): self.layers = [] self.name = name self.loss = None self.optimizer = None # Method for adding a layer def addLayer(self, layer): self.layers.append(layer) # Performs the forward pass and evaluates the network # Returns the loss value & metrics values def evaluate(self, inputs, targets, updateInternal=False): predictions = self.predict(inputs, updateInternal) cost = self.computeCost(predictions, targets) accuracy = self.computeAccuracy(predictions, targets) return cost, accuracy # Performs a forward pass without training the network def predict(self, inputs, updateInternal=False): prediction = inputs for layer in self.layers: if type(layer) != BatchNormalization: prediction = layer.forward(prediction) else: prediction = layer.forward(prediction, updateInternal) return prediction # Propagates the targets(one hot encoding) back through the network def backpropagate(self, targets): grad = self.layers[-1].backward(targets) for layer in self.layers[-2::-1]: grad = layer.backward(grad) return grad # Computes the cost def computeCost(self, predictions, targets): totaltCost = 0 ## Maybe dont need to use the probabilities. We have the predictions... if self.loss == "categorical_cross_entropy": assert self.layers[-1].type == "Softmax", "Loss is cross-entropy but last layer is not softmax" yhat = targets*np.log(self.layers[-1].probabilities) entropy = -np.sum(yhat)/targets.shape[1] totaltCost = totaltCost + entropy for layer in self.layers[0:-1]: totaltCost = totaltCost + layer.cost() # NOT TESTED YET elif self.loss == "binary_cross_entropy": m = predictions.shape[0] binaryEntropy = -1 / m * (np.dot(targets, np.log(predictions).T) + np.dot(1 - targets, np.log(1 - predictions).T)) totaltCost = totaltCost + np.squeeze(binaryEntropy) for layer in self.layers[0:-1]: totaltCost = totaltCost + layer.cost() # NOT TESTED YET elif self.loss == "mse": totaltCost = totaltCost + np.mean((predictions-targets)**2) for layer in self.layers[0:-1]: totaltCost = totaltCost + layer.cost() elif self.loss == "None": for layer in self.layers: totaltCost = totaltCost + layer.cost() return totaltCost # Computes the accuracy of the predictions given the targets def computeAccuracy(self, predictions, targets): assert predictions.shape == targets.shape accuracy = np.sum(np.argmax(predictions, axis=0) == np.argmax(targets, axis=0)) / predictions.shape[1] return accuracy # Initializes the attributes for the optimizer and the loss function. # Also adds a reference for the optimizer to the current model(for access to the forward and backward pass of the network) def compile(self, optimizer="SGD", loss="cce"): if type(optimizer) is str: if optimizer == "SGD": self.optimizer = SGD() else: raise NameError("Unrecognized optimizer") else: self.optimizer = copy.deepcopy(optimizer) # Adds reference for the optimizer to the model self.optimizer.model = self if loss == "cce" or loss == "categorical_cross_entropy": self.loss = "categorical_cross_entropy" else: raise NameError("Unrecognized loss function.") self.history = self.optimizer.history # Fits the model to the data using the optimizer and loss function specified during compile def fit(self, inputs, targets, epochs=1, validationData=None, batch_size=None, verbose=True): if self.loss is None or self.optimizer is None: raise ValueError("Model not compiled") self.optimizer.train(x_train=inputs, y_train=targets,\ validationData=validationData,\ epochs=epochs, batch_size=batch_size, verbose=verbose) def __str__(self): strrep = "Sequential Model: " + self.name +"\n" for i in range(len(self.layers)): strrep = strrep + " Layer " + str(i) + ": Type:" + " " + str(self.layers[i]) + "\n" return strrep
wgn = WriteGradientNorm() # DEFINE THE NN TOPOLOGY nn = NN(10) nn.add_layer(Layer(97, Rectifier)) nn.add_layer(Layer(19, TanH)) nn.add_layer(Layer(2, Linear)) # START THE TRAINING regularizer = L1L2Regularizer(0.00037205665611222174, 0.0008170173137216012) lr = learning_rate_time_based(0.010868833133798494, 5.964961555920687e-06) sgd.train(nn, x_train, y_train, learning_rate=lr, batch_size=batch_size, epochs=10000, momentum=0.9516339410240324, regularizer=regularizer, callbacks=[plc, wtf, wgn]) fig, ax = plt.subplots() y_predict = nn.predict_batch(x_val) ax.scatter(y_predict[:, 0], y_predict[:, 1], c='b', marker='+', s=30, lw=.4) ax.scatter(y_val[:, 0], y_val[:, 1], c='r', marker='+', s=30, lw=.4) plt.show() with open('%s/%s_predictions.csv' % (directory, training_id), 'w') as f: f.write('y0_predict,y1_predict,y0_val,y1_val\n') for y, y_val in zip(y_predict, y_val): f.write('%f,%f,%f,%f\n' % (y[0], y[1], y_val[0], y_val[1]))