Example #1
0
def bn_2_layer_test(epochs=2, reg=0.0, lr=0.01, momentum=0.7):

    trainingData, trainingLabels, \
    validationData, validationLabels, \
    testingData, testingLabels = loadAllData("Datasets/cifar-10-batches-mat/", valsplit=0.20)
    timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S')


    network = Model(name="2-layer(NO BN)")
    
    network.addLayer(Linear(32*32*3, 50, regularization=reg, initializer="he"))
    network.addLayer(Relu())

    network.addLayer(Linear(50,10, regularization=reg, initializer="he"))
    network.addLayer(Softmax())

    sgd = SGD(lr=lr, lr_decay=1.00, momentum=momentum, shuffle=True, lr_min=1e-5)  
 
    network.compile(sgd, "cce")
    network.fit(trainingData, trainingLabels, epochs=epochs, batch_size=64, validationData=(validationData, validationLabels))
    


    networkBN = Model(name="2-layer(WITH BN)")
    networkBN.addLayer(Linear(32*32*3, 50, regularization=reg, initializer="he"))
    networkBN.addLayer(BatchNormalization(50, trainable=True, alpha=0.90))
    networkBN.addLayer(Relu())

    networkBN.addLayer(Linear(50,10, regularization=reg, initializer="he"))
    networkBN.addLayer(Softmax())

    sgd2 = SGD(lr=lr, lr_decay=1.00, momentum=momentum, shuffle=True, lr_min=1e-5)  
 
    networkBN.compile(sgd2, "cce")
    networkBN.fit(trainingData, trainingLabels, epochs=epochs, batch_size=64, validationData=(validationData, validationLabels))

    #plotAccuracy(network, "plots/", timestamp)
    #plotLoss(network, "plots/", timestamp)
    
    #loss, acc = network.evaluate(testingData, testingLabels)
    #print("Test loss: {} , Test acc: {}".format(loss, acc) )

    #plotAccuracy(network, "plots/", timestamp, title="2-layer(NO BN) accuracy over epochs", fileName="nobnacc")
    #plotLoss(network, "plots/", timestamp, title="2-layer(NO BN) loss over epochs", fileName="nobnloss")

    #plotAccuracy(networkBN, "plots/", timestamp, title="2-layer(WITH BN) accuracy over epochs", fileName="bnacc")
    #plotLoss(networkBN, "plots/", timestamp, title="2-layer(WITH BN) loss over epochs", fileName="bnloss")

    multiPlotLoss((network, networkBN), "plots/", timestamp, title="2-layer network loss over epochs, eta:{}, lambda:{}".format(lr, reg))
    multiPlotAccuracy((network, networkBN), "plots/", timestamp, title="2-layer network accuracy over epochs, eta:{}, lambda:{}".format(lr, reg))
def __train(weight_init_std):
    bn_network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10, 
                                    weight_init_std=weight_init_std, use_batchnorm=True)
    network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100], output_size=10,
                                weight_init_std=weight_init_std)
    optimizer = SGD(lr=learning_rate)
    
    train_acc_list = []
    bn_train_acc_list = []
    
    iter_per_epoch = max(train_size / batch_size, 1)
    epoch_cnt = 0
    
    for i in range(1000000000):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]
    
        for _network in (bn_network, network):
            grads = _network.gradient(x_batch, t_batch)
            optimizer.update(_network.params, grads)
    
        if i % iter_per_epoch == 0:
            train_acc = network.accuracy(x_train, t_train)
            bn_train_acc = bn_network.accuracy(x_train, t_train)
            train_acc_list.append(train_acc)
            bn_train_acc_list.append(bn_train_acc)
    
            print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " + str(bn_train_acc))
    
            epoch_cnt += 1
            if epoch_cnt >= max_epochs:
                break
                
    return train_acc_list, bn_train_acc_list
Example #3
0
def run_GNN_SGD(train_data,
                valid_data,
                W,
                A,
                b,
                B,
                alpha=0.0001,
                eps=0.001,
                n_vector=8,
                gnn_steps=2,
                n_epochs=100):
    params = []
    for epoch in range(n_epochs):
        W, A, b, loss_train = SGD(train_data, n_vector, B, W, A, b, gnn_steps,
                                  alpha, eps)
        precision_train = mean_precision(train_data, W, A, b, n_vector,
                                         gnn_steps)
        precision_val = mean_precision(valid_data, W, A, b, n_vector,
                                       gnn_steps)
        loss_val = valid_loss(data, W, A, b, n_vector, gnn_steps)
        print(
            'epoch: {}, train loss: {}, train precision: {}, valid loss: {}, valid precision: {}'
            .format(epoch + 1, loss_train, precision_train, loss_val,
                    precision_val))
        params.append((loss_train, precision_train, loss_val, precision_val))
    return params
Example #4
0
def test1layergradients(samples=1, dimensions=3072):

    print("\n\nTesting 1-layer gradients (NO BN, NO REG) using a batch size of {}".format(samples))
    trainingData, trainingLabels, encodedTrainingLabels = loadData("Datasets/cifar-10-batches-mat/data_batch_1.mat")

    
    trainingData = trainingData[0:dimensions, 0:samples]
    trainingLabels = trainingLabels[0:dimensions, 0:samples]
    encodedTrainingLabels = encodedTrainingLabels[0:dimensions, 0:samples]
    

    network = Model()
    linear = Linear(dimensions, 10, regularization=0.00)
    network.addLayer(linear)
    network.addLayer(Softmax())

    sgd = SGD(lr=0.001, lr_decay=1.0, momentum=0.0, shuffle=True)
    network.compile(sgd, "cce")

    network.predict(trainingData)
    network.backpropagate(encodedTrainingLabels)
    
    timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S')
    numerical_gradW = compute_grads(1e-6, linear.W, trainingData, encodedTrainingLabels, network)
    numerical_gradb = compute_grads(1e-6, linear.b, trainingData, encodedTrainingLabels, network)

    print("W")
    relative_errorW = grad_difference(linear.gradW, numerical_gradW)
    print("b")
    relative_errorb = grad_difference(linear.gradb, numerical_gradb)

    return (relative_errorW, linear.gradW, numerical_gradW), (relative_errorb, linear.gradb, numerical_gradb)
Example #5
0
 def __init__(self, input_dims, layers_info, opts):
     self.layers_info = layers_info
     self.num_layers = len(layers_info)
     self.params = {}
     self.save_prefix = opts.save_prefix
     for ix in xrange(len(layers_info)):
         if ix == 0:
             input_dim = input_dims
         else:
             input_dim = layers_info[ix - 1][1]
         output_dim = layers_info[ix][1]
         if layers_info[ix][0] != "batchnorm":
             layer_object = DenseLayer(input_dim,
                                       output_dim,
                                       layers_info[ix][2],
                                       dropout=layers_info[ix][3])
         else:
             layer_object = BatchNormLayer(input_dim)
         self.params[layers_info[ix][0] +
                     "_{}".format(ix)] = layer_object.params
         setattr(self, 'layer_{}'.format(ix), layer_object)
     self.optimizer = SGD(self.params,
                          'categorical_cross_entropy',
                          lr=opts.lr,
                          l2_penalty=opts.l2,
                          momentum=opts.momentum)
Example #6
0
def test3layergradients(samples=1, dimensions=3072):

    print("\n\nTesting 3-layer gradients using a batch size of {}".format(samples))
    trainingData, trainingLabels, encodedTrainingLabels = loadData("Datasets/cifar-10-batches-mat/data_batch_1.mat")

    
    trainingData = trainingData[0:dimensions, 0:samples]
    trainingLabels = trainingLabels[0:dimensions, 0:samples]
    encodedTrainingLabels = encodedTrainingLabels[0:dimensions, 0:samples]
    
    network = Model()

    linear = Linear(dimensions, 50, regularization=0.00, initializer="he")
    network.addLayer(linear)
    network.addLayer(Relu())

    linear2 = Linear(50, 30, regularization=0.00, initializer="he")
    network.addLayer(linear2)
    network.addLayer(Relu())

    linear3 = Linear(30, 10, regularization=0.00, initializer="he")
    network.addLayer(linear3)
    network.addLayer(Softmax())

    sgd = SGD(lr=0.001, lr_decay=1.0, momentum=0.0, shuffle=True)
    network.compile(sgd, "cce")

    network.predict(trainingData, updateInternal=True)
    network.backpropagate(encodedTrainingLabels)
    
    timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S')
    
    numerical_gradW1 = compute_grads_w_BN(1e-4, linear.W, trainingData, encodedTrainingLabels, network)
    numerical_gradb1 = compute_grads_w_BN(1e-4, linear.b, trainingData, encodedTrainingLabels, network)

    numerical_gradW2 = compute_grads_w_BN(1e-4, linear2.W, trainingData, encodedTrainingLabels, network)
    numerical_gradb2 = compute_grads_w_BN(1e-4, linear2.b, trainingData, encodedTrainingLabels, network)

    numerical_gradW3 = compute_grads_w_BN(1e-4, linear3.W, trainingData, encodedTrainingLabels, network)
    numerical_gradb3 = compute_grads_w_BN(1e-4, linear3.b, trainingData, encodedTrainingLabels, network)



    print("W1")
    relative_errorW = grad_difference(linear.gradW, numerical_gradW1)
    print("b1")
    relative_errorb = grad_difference(linear.gradb, numerical_gradb1)

    print("W2")
    relative_errorW2 = grad_difference(linear2.gradW, numerical_gradW2)
    print("b2")
    relative_errorb2 = grad_difference(linear2.gradb, numerical_gradb2)
    
    print("W3")
    relative_errorW3 = grad_difference(linear3.gradW, numerical_gradW3)
    print("b3")
    relative_errorb3 = grad_difference(linear3.gradb, numerical_gradb3)

    print("\n")
Example #7
0
 def __call__(self):
     param = self.param
     if param is None:
         opt = SGD()
     elif isinstance(param, OptimizerBase):
         opt = param
     elif isinstance(param, str):
         opt = self.init_from_str()
     elif isinstance(param, dict):
         opt = self.init_from_dict()
     return opt
    def configure(self, loss, optimizer=SGD(learning_rate=0.01), metrics=None):
        """
        Configure the model for training or evaluation

        metrics should be a dictionary with the name each of the metric as the key and the
        function that computes the metric as the value
        """

        self.loss = loss
        self.optimizer = optimizer
        self.metrics = metrics
        if self.metrics is None:
            self.metrics = {}
Example #9
0
def make_DNN(lr=0.1, exp_decay=0.001, optimizer='SGD'):
    dnn = DNN()
    dnn.set_input_size(2)
    dnn.add_layer(4, tanh_activation)
    dnn.add_layer(3, tanh_activation)
    dnn.add_layer(1, linear_activation)
    if optimizer == 'SGD':
        dnn.compile(loss=mse_loss, optimizer=SGD(lr=lr, exp_decay=exp_decay))
    elif optimizer == 'Ada':
        dnn.compile(loss=mse_loss,
                    optimizer=AdaGrad(lr=lr, exp_decay=exp_decay))
    elif optimizer == 'BFGS':
        dnn.compile(loss=mse_loss, optimizer=BFGS)
    return dnn
Example #10
0
 def __init__(self,
              num_layers,
              units_list=None,
              initializer=None,
              optimizer='adam'):
     self.weight_num = num_layers - 1
     # 根据传入的初始化方法初始化参数,本次实验只实现xavier和全0初始化
     self.params = xavier(num_layers,
                          units_list) if initializer == 'xavier' else zero(
                              num_layers, units_list)
     self.optimizer = Adam(
         weights=self.params,
         weight_num=self.weight_num) if optimizer == 'adam' else SGD()
     self.bn_param = {}
Example #11
0
 def init_from_str(self):
     r = r"([a-zA-Z]*)=([^,)]*)"
     opt_str = self.param.lower()
     kwargs = dict([(i, eval(j)) for (i, j) in re.findall(r, opt_str)])
     if "sgd" in opt_str:
         optimizer = SGD(**kwargs)
     elif "adagrad" in opt_str:
         optimizer = AdaGrad(**kwargs)
     elif "rmsprop" in opt_str:
         optimizer = RMSProp(**kwargs)
     elif "adam" in opt_str:
         optimizer = Adam(**kwargs)
     else:
         raise NotImplementedError("{}".format(opt_str))
     return optimizer
def regularizationSearch():

    trainingData, trainingLabels, \
    validationData, validationLabels, \
    testingData, testingLabels = loadAllData("Datasets/cifar-10-batches-mat/", valsplit=0.10)    

    bestLambda = 0.0
    bestValAcc = 0.0
    bestLoss = 0.0
    
    for lambdaValue in np.arange(0, 0.2, 0.005):

        network = Model()
        network.addLayer(Linear(32*32*3, 50, regularization=lambdaValue, initializer="he"))
        network.addLayer(BatchNormalization(50, trainable=True))
        network.addLayer(Relu())

        network.addLayer(Linear(50, 30, regularization=lambdaValue, initializer="he"))
        network.addLayer(BatchNormalization(30, trainable=True))
        network.addLayer(Relu())

        network.addLayer(Linear(30,10, regularization=lambdaValue, initializer="he"))
        network.addLayer(Softmax())

        sgd = SGD(lr=0.01, lr_decay=0.95, momentum=0.7, shuffle=True, lr_min=1e-5)  
    
        network.compile(sgd, "cce")
        
        timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S')

        network.fit(trainingData, trainingLabels, epochs=20, validationData=(validationData, validationLabels), batch_size=64)

        
        #plotAccuracy(network, "plots/", timestamp)
        #plotLoss(network, "plots/", timestamp)
        
        print("Lambda:{}".format(lambdaValue))
        loss, acc = network.evaluate(validationData, validationLabels)
        print("Val loss: {} , Val acc: {}".format(loss, acc) )
        print("\n\n")
        
        if acc > bestValAcc:
            bestLambda = lambdaValue
            bestValAcc = acc
            bestLoss = loss
    
    return bestLambda, bestValAcc, bestLoss
Example #13
0
def __train(lr, weight_decay, epocs=50, verbose=False):  # 减少epoch的数量

    network = MultiLayerNet(input_size=784,
                            hidden_size_list=[100, 100, 100, 100, 100, 100],
                            output_size=10,
                            weight_decay_lambda=weight_decay)

    optimizer = SGD(lr)

    iter_per_epoch = max(train_size / mini_batch_size, 1)
    current_iter = 0
    current_epoch = 0

    train_loss_list = []
    train_acc_list = []
    val_acc_list = []

    for i in range(int(epochs * iter_per_epoch)):
        batch_mask = np.random.choice(train_size, mini_batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        grads = network.gradient(x_batch, t_batch)
        optimizer.update(network.params, grads)

        loss = network.loss(x_batch, t_batch)
        train_loss_list.append(loss)

        if verbose:
            print("train loss:" + str(loss))

        if current_iter % iter_per_epoch == 0:
            current_epoch += 1

            train_acc = network.accuracy(x_train, t_train)
            val_acc = network.accuracy(x_val, t_val)
            train_acc_list.append(train_acc)
            val_acc_list.append(val_acc)

            if verbose:
                print("=== epoch:" + str(current_epoch) + ", train acc:" +
                      str(train_acc) + ", validation acc:" + str(val_acc) +
                      " ===")
        current_iter += 1

    return val_acc_list, train_acc_list
Example #14
0
    def init_from_dict(self):
        O = self.param
        cc = O["cache"] if "cache" in O else None
        op = O["hyperparameters"] if "hyperparameters" in O else None

        if op is None:
            raise ValueError("Must have `hyperparemeters` key: {}".format(O))

        if op and op["id"] == "SGD":
            optimizer = SGD().set_params(op, cc)
        elif op and op["id"] == "RMSProp":
            optimizer = RMSProp().set_params(op, cc)
        elif op and op["id"] == "AdaGrad":
            optimizer = AdaGrad().set_params(op, cc)
        elif op and op["id"] == "Adam":
            optimizer = Adam().set_params(op, cc)
        elif op:
            raise NotImplementedError("{}".format(op["id"]))
        return optimizer
Example #15
0
def train(
    net: NeuralNet,
    inputs: Tensor,
    targets: Tensor,
    num_epochs: int = 5000,
    iterator: DataIterator = BatchIterator(),
    loss=MSE(),
    optimizer: Optimizer = SGD()
) -> None:

    for epoch in range(num_epochs):
        epoch_loss = 0.0
        for batch in iterator(inputs, targets):
            predicted = net.forward(batch.inputs)
            epoch_loss += loss.loss(predicted, batch.targets)
            grad = loss.grad(predicted, batch.targets)
            net.backward(grad)
            optimizer.step(net)
        print(epoch, epoch_loss)
Example #16
0
    def compile(self, optimizer="SGD", loss="cce"):

        if type(optimizer) is str:
            if optimizer == "SGD":
                self.optimizer = SGD()
            else:
                raise NameError("Unrecognized optimizer")
        else:
            self.optimizer = copy.deepcopy(optimizer)
        
        # Adds reference for the optimizer to the model
        self.optimizer.model = self

        if loss == "cce" or loss == "categorical_cross_entropy":
            self.loss = "categorical_cross_entropy"
        else:
            raise NameError("Unrecognized loss function.")

        self.history = self.optimizer.history
Example #17
0
def train():
    global args
    args = parser.parse_args()
    print(args)

    train_videos = UCF101Flows(
        frames_path='data/UCF101/train/frames/',
        batch_size=args.batch_size)
    valid_videos = UCF101Flows(
        frames_path='data/UCF101/validation/frames',
        batch_size=args.batch_size,
        shuffle=False)

    lr_scheduler = LearningRateScheduler(schedule=schedule)
    save_best = ModelCheckpoint(
        args.filepath,
        monitor='val_acc',
        verbose=1,
        save_best_only=True,
        mode='max')
    
    callbacks = [save_best, lr_scheduler]
    lr_multipliers = {}
    lr_multipliers['block1_conv1/kernel:0'] = 10

    if os.path.exists(args.filepath):
        model = load_model(args.filepath)
    else:        
        model = TSNs_MotionStream(
            input_shape=(299, 299, 20), dropout_prob=0.7,
            classes=len(train_videos.labels))
    
    model.compile(optimizer=SGD(lr=args.train_lr, momentum=0.9, multipliers=lr_multipliers),
                  loss='categorical_crossentropy',
                  metrics=['acc'])
    model.fit_generator(
        generator=train_videos,
        epochs=args.epochs,
        callbacks=callbacks,
        workers=args.num_workers,
        validation_data=valid_videos)
    def fit(self, loader: DataLoader, optimizer=None, loss_function=None) -> None:
        """
        Fits the model to the data.
        If no optimizer is passed in, the default optimizer is SGD.
        If no loss function is passed in, the default loss function is MSE.  :returns: None; self.params are fit to the data.
        """
        if optimizer is None:
            optimizer = SGD(0.01)

        if loss_function is None:
            loss_function = mean_squared_error

        for X, y in loader:
            if self.params is None:
                self.params = Matrix([[Variable(random.random())] for _ in range(len(X[0]))])
                self.bias = Matrix([[Variable(random.random())]])

            output = self._evaluate(X)
            loss = loss_function(output, y)
            loss += self._regularize()
            self.params = optimizer.step(self.params, loss.get_grad(self.params))
            self.bias = optimizer.step(self.bias, loss.get_grad(self.bias))
Example #19
0
def experiment(method, kappa, epochs, batch_size, lr, momentum, zero_init):
    train_loader, test_loader = utils.load(batch_size=batch_size)
    model = MLPNet(zero_init=zero_init)
    if method == 'SGD':
        optimizer = SGD(model.parameters(), lr=lr, momentum=momentum)
    elif method == 'PSGDl1':
        optimizer = PSGDl1(model.parameters(),
                           kappa_l1=kappa,
                           lr=lr,
                           momentum=momentum)
    elif method == 'SGDFWl1':
        optimizer = SGDFWl1(model.parameters(), kappa_l1=kappa)
    else:
        raise ValueError('Invalid choice of method: ' + str(method))

    criterion = nn.CrossEntropyLoss(size_average=False)
    model, metrics = train_model(model, optimizer, criterion, epochs,
                                 train_loader, test_loader)

    fname = get_exp_name(method, kappa, epochs, batch_size, zero_init)
    with open('results/' + fname + '.pkl', 'wb+') as f:
        pickle.dump(metrics, f)
Example #20
0
def train(model, train_dataset, test_dataset):

    (x_train, y_train) = train_dataset
    (x_test, y_test) = test_dataset

    lr = 0.1
    momentum_coef = 0
    weight_decay = 0

    print(model)

    opt = SGD(lr=lr, momentum_coef=momentum_coef, weight_decay=weight_decay)
    print('Optimizer: {} with (lr: {} -- momentum_coef: {} -- weight_decay: {})'.
          format(opt.__class__.__name__, lr, momentum_coef, weight_decay))

    num_of_epochs = 1000
    batch_size = 256
    val_split = 0.1
    print('Validation Split: {} -- BatchSize: {} -- Epochs: {}'.format(val_split, batch_size, num_of_epochs))
    print('Training is about the start with epoch: {}, batch_size: {}, validation_split: {}'
          .format(num_of_epochs, batch_size, val_split))

    opt.train(model,
              x_train, y_train,
              num_of_epochs=num_of_epochs,
              batch_size=batch_size,
              val_split=val_split,
              verbose=1)

    print('\nEvaluating with test dataset !..')

    test_acc, test_loss = model.evaluate(x_test, y_test, return_pred=False)
    train_acc, train_loss = model.evaluate(x_train, y_train, return_pred=False)
    print("train_acc: {} -- test_loss: {}".format(train_acc, train_loss))
    print("test_acc: {} -- test_loss: {}".format(test_acc, test_loss))

    print('For complete use case of the framework please refer to guide.ipynb')
Example #21
0
 def create_optimizer(lr):
     if name == 'SGD':
         return SGD(model_params.values(), lr, \
                    momentum=momentum, weight_decay=weight_decay)
     elif name == 'NoisySGD':
         noise_factor = other_params['noise_factor']
         return NoisySGD(model_params.values(), lr, noise_factor, momentum,
                         weight_decay)
     elif name == 'ReservoirSGD':
         scale = other_params['scale']
         is_distributed = other_params['distributed']
         max_reservoir_size = other_params['max_reservoir_size']
         num_gradients_to_sample = other_params['num_gradients_to_sample']
         # TODO make not distributed version of this? maybe
         if not is_distributed:
             raise ValueError(
                 'ReservoirSGD only supports distributed mode right now!')
         return ReservoirSGD(model_params.values(), lr, scale,
                             num_gradients_to_sample, max_reservoir_size,
                             momentum, weight_decay)
     elif name == 'HessianVecSGD':
         noise_factor = other_params['noise_factor']
         return HessianVecSGD(model_params.values(), lr, noise_factor,
                              momentum, weight_decay)
Example #22
0
def main():

    config = {
        "optimizer": "rnn",
        "problem": "mnist",
        "rollout_length": 100,  # This is 100 in the paper
        "learning_rate": 0.1,
        "decay_rate": 0.9,
        "meta_layers": 2,
        "meta_hidden_size": 20,
        "layers": 2,
        "hidden_size": 100,
        "activation": 'relu',
        "preprocess": True,
        "max_to_keep": 3,
        "retrain": False,
        "dim": 10,
        "range_of_means": 10,
        "range_of_stds": 10,
        "summary_dir": "summary",
        "checkpoint_dir": "data_ckpt",
        "batch_size": 10000,
        "training_iters": 4000,
        "log_iters": 100
    }

    # create the experiments dirs
    create_dirs([config["summary_dir"], config["checkpoint_dir"]])
    # create tensorflow session
    sess = tf.Session()

    # create your data generator
    # create an instance of the model you want
    if config["problem"] == "simple":
        data = SimpleDG(config)
        model = LinearRegressionModel(config)
    elif config["problem"] == "mnist":
        data = MNISTDG(config)
        model = MNISTModel(config)
    else:
        raise ValueError("{} is not a valid problem".format(config["problem"]))

    # create tensorboard logger
    # logger = Logger(sess, config)
    # create trainer and pass all the previous components to it
    # trainer = LinearRegressionTrainer(sess, model, data, config, logger)

    sess.run(tf.global_variables_initializer())

    if config["optimizer"] == "sgd":
        optim = SGD(config)
        losses = learn(optim, model, config["rollout_length"])
    elif config["optimizer"] == "rms":
        optim = RMSprop(config)
        losses = learn(optim, model, config["rollout_length"])
    elif config["optimizer"] == "rnn":
        optim = RNNOptimizer(config)
        losses = learn(optim, model, config["rollout_length"])

        if config["retrain"]:
            optim.train(losses, sess, data)
        else:
            optim.load(sess)
    else:
        raise ValueError("{} is not a valid optimizer".format(
            config["optimizer"]))

    # initialize variables in optimizee
    # (can't initialize all here because it would potentially overwrite the trained optimizer)
    sess.run(
        tf.variables_initializer([
            var
            for var in tf.trainable_variables(scope=optim.__class__.__name__)
        ]))

    x = np.arange(config["rollout_length"] + 1)

    for i in range(3):
        sess.run(
            tf.variables_initializer([
                var for var in tf.trainable_variables(
                    scope=optim.__class__.__name__)
            ]))

        data.refresh_parameters(seed=i)
        data_x, data_y = next(data.next_batch(config["batch_size"]))

        l = sess.run([losses],
                     feed_dict={
                         "input:0": data_x,
                         "label:0": data_y
                     })
        print(l)

        p1, = plt.semilogy(x, l[0], label=config["optimizer"])
        plt.legend(handles=[p1])
        plt.title('Losses')
        plt.show()

        # TODO compare different optimizers

    data.refresh_parameters()

    data_x, data_y = next(data.next_batch(100, mode="train"))
    pred = sess.run(model.prediction,
                    feed_dict={
                        "input:0": data_x,
                        "label:0": data_y
                    })
    print(
        list(
            zip(pred, np.argmax(data_y, axis=1), pred == np.argmax(data_y,
                                                                   axis=1))))

    # calculate accuracy on test data
    seed = np.random.randint(low=0, high=1e6)
    data.refresh_parameters(seed=seed)
    data_x, data_y = next(data.next_batch(5000, mode="train"))
    acc = sess.run(model.accuracy,
                   feed_dict={
                       "input:0": data_x,
                       "label:0": data_y
                   })
    print("Train accuracy: {}".format(acc))

    data_x, data_y = next(data.next_batch(5000, mode="test"))
    acc = sess.run(model.accuracy,
                   feed_dict={
                       "input:0": data_x,
                       "label:0": data_y
                   })
    print("Test accuracy: {}".format(acc))
Example #23
0
    xs = corpus[:-1]  # 入力
    ts = corpus[1:]  # 教師
    data_size = len(xs)
    print('corpus size: {0}, vocabulary size: {1}'.format(
        corpus_size, vocab_size))

    #
    max_iters = data_size // (batch_size * time_size)
    time_idx = 0
    total_loss = 0
    loss_count = 0
    ppl_list = []

    # モデル
    model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size)
    optimizer = SGD(lr)

    # ミニバッチの各サンプル読込開始位置を計算
    jump = (corpus_size - 1) // batch_size
    offsets = [i * jump for i in range(batch_size)]

    for epoch in range(max_epoch):
        for iter in range(max_iters):

            # ミニバッチ取得
            batch_x = np.empty((batch_size, time_size), dtype='i')
            batch_t = np.empty((batch_size, time_size), dtype='i')
            for t in range(time_size):
                for i, offset in enumerate(offsets):
                    batch_x[i, t] = xs[(offset + time_idx) % data_size]
                    batch_t[i, t] = ts[(offset + time_idx) % data_size]
Example #24
0
def main():

    trainingData, trainingLabels, \
    validationData, validationLabels, \
    testingData, testingLabels = loadAllData("Datasets/cifar-10-batches-mat/", valsplit=.10)

    #Settings 1
    #reg = 0.065
    #lr = 0.002

    #Settings 2
    #reg = 0.0021162
    #lr = 0.061474

    #Settings 3
    #reg = 0.0010781
    #lr = 0.069686

    #Settings 4
    #reg = 0.0049132
    #lr = 0.07112

    #Settings 5
    reg = 0.005
    lr = 0.007
    network = Model()

    network.addLayer(
        Linear(32 * 32 * 3, 50, regularization=reg, initializer="he"))
    network.addLayer(BatchNormalization(50, trainable=True))
    network.addLayer(Relu())

    network.addLayer(Linear(50, 30, regularization=reg, initializer="he"))
    network.addLayer(BatchNormalization(30, trainable=True))
    network.addLayer(Relu())

    network.addLayer(Linear(30, 10, regularization=reg, initializer="he"))
    network.addLayer(Softmax())

    sgd = SGD(lr=lr, lr_decay=0.95, momentum=0.7, shuffle=True, lr_min=1e-5)

    network.compile(sgd, "cce")

    timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S')

    network.fit(trainingData,
                trainingLabels,
                epochs=30,
                batch_size=100,
                validationData=(validationData, validationLabels))

    plotAccuracy(
        network,
        "plots/",
        timestamp,
        title="3-layer network accuracy over epochs, eta:{}, lambda:{}".format(
            lr, reg))
    plotLoss(
        network,
        "plots/",
        timestamp,
        title="3-layer network loss over epochs, eta:{}, lambda:{}".format(
            lr, reg))

    loss, acc = network.evaluate(testingData, testingLabels)
    print("Test loss: {} , Test acc: {}".format(loss, acc))
Example #25
0
#!/usr/bin/env python3
import numpy as np
from lib.mnist import load_mnist
from chap6.multi_layer_net import MultiLayerNet
from chap6.overfit_weight_decay import train
from optimizers import SGD

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True,
                                                  one_hot_label=True)
x_train = x_train[:300]
t_train = t_train[:300]
optimizer = SGD(lr=0.01)

max_epochs = 201
train_size = x_train.shape[0]
batch_size = 100

# train
network = MultiLayerNet(input_size=784,
                        hidden_size_list=[100, 100, 100, 100, 100, 100],
                        output_size=10)
(train_loss_list, train_acc_list, test_acc_list) = train(network)
network = MultiLayerNet(input_size=784,
                        hidden_size_list=[100, 100, 100, 100, 100, 100],
                        output_size=10,
                        use_dropout=True,
                        dropout_ratio=0.2)
(train_loss_list_decay, train_acc_list_decay,
 test_acc_list_decay) = train(network)

# draw out
from optimizers import SGD

with open('data/shakespear.txt', 'r') as f:
    raw = f.read()

vocab = list(set(raw))
word2index = {}
for i, word in enumerate(vocab):
    word2index[word] = i
indices = np.array(list(map(lambda x: word2index[x], raw)))

embed = Embedding(vocab_size=len(vocab), dim=512)
model = RNNCell(n_inputs=512, n_hidden=512, n_output=len(vocab))

criterion = CrossEntropyLoss()
optim = SGD(parameters=model.get_parameters() + embed.get_parameters(),
            alpha=0.01)

batch_size = 32
bptt = 16
n_batches = int((indices.shape[0] / batch_size))
trimmed_indices = indices[:n_batches * batch_size]
# batch_indices: each column represents a sub-sequence from indices -> continuous
batched_indices = trimmed_indices.reshape(batch_size, n_batches)
batched_indices = batched_indices.transpose()

input_batched_indices = batched_indices[:-1]
target_batched_indices = batched_indices[1:]

n_bptt = int((n_batches - 1) / bptt)
input_batches = input_batched_indices[:n_bptt * bptt]
input_batches = input_batches.reshape(n_bptt, bptt, batch_size)
Example #27
0
def network_grad_test():

    # Load data

    if args.data_set == 'SwissRollData':
        Xtrain, Xtest, Ytrain, Ytest = loadSwissRollData()
    elif args.data_set == 'GMMData':
        Xtrain, Xtest, Ytrain, Ytest = loadGMMData()
    else:
        Xtrain, Xtest, Ytrain, Ytest = loadPeaksData()

    # preprocess data - shuffle and split into different batch sizes (using batch_size list)
    Xtrain, Ytrain, test_sets, train_sets = preprocess_data(
        Xtest, Xtrain, Ytest, Ytrain)

    # hyper params
    n_layer = args.n_layers
    neurons = args.neurons
    dim_in = Xtrain.shape[0]
    dim_out = Ytrain.shape[0]

    lr = args.lr
    opt = SGD(lr=lr)

    # init model
    model = Net(n_layer, dim_in, dim_out, opt, neurons)

    all_batches, all_labels = train_sets
    batch = all_batches[0]
    labels = all_labels[0].T

    outputs = model(batch, labels)
    f_x = model.cross_entropy(outputs, labels)  # compute f(x)

    # get the d vectors to perturb the weights
    d_vecs = get_pertrubazia(model)

    model.backward()  # compute grad(x) per layer

    # concatenate grad per layer
    grad_x = get_raveled_grads_per_layer(model)

    # save weights of each layer, for testing:
    weights_list = get_weights_from_layers(model)

    # save the initial weights
    w_ce = model.cross_entropy.W
    w_li = model.linear_inp.W

    first_order_l, second_order_l = [], []
    eps_vals = np.geomspace(0.5, 0.5**20, 20)

    for eps in eps_vals:

        eps_d = eps * d_vecs[0]
        eps_ds = [eps_d.ravel()]
        model.linear_inp.W = np.add(w_li, eps_d)
        for d, ll, w in zip(d_vecs[1:-1], model.layers, weights_list[1:-1]):
            eps_d = eps * d
            ll.W = np.add(w, eps_d)
            eps_ds.append(eps_d.ravel())
        eps_d = eps * d_vecs[-1]
        model.cross_entropy.W = np.add(w_ce, eps_d)
        eps_ds.append(eps_d.ravel())
        eps_ds = np.concatenate(eps_ds, axis=0)

        output_d = model(batch, labels)
        fx_d = model.cross_entropy(output_d, labels)

        first_order = abs(fx_d - f_x)
        second_order = abs(fx_d - f_x - eps_ds.ravel().T @ grad_x.ravel())

        print(first_order)
        print(second_order)

        first_order_l.append(first_order)
        second_order_l.append(second_order)

    l = range(20)
    plt.title('Network gradient test')
    plt.plot(l, first_order_l, label='First Order')
    plt.plot(l, second_order_l, label='Second Order')
    plt.yscale('log')
    plt.legend()
    plt.savefig('./Test_Figures/grad_test_net.png',
                transparent=True,
                bbox_inches='tight',
                pad_inches=0)
    plt.show()
Example #28
0
def sgd_test():

    # Load data
    if args.data_set == 'SwissRollData':
        Xtrain, Xtest, Ytrain, Ytest = loadSwissRollData()
    elif args.data_set == 'GMMData':
        Xtrain, Xtest, Ytrain, Ytest = loadGMMData()
    else:
        Xtrain, Xtest, Ytrain, Ytest = loadPeaksData()

    # Define set of learning rate and batch size (use only for testing)
    batch_size = np.geomspace(2, 2**8, 8)
    batch_size = [round_(i) for i in batch_size]

    # preprocess data - shuffle and split into different batch sizes (using batch_size list)
    Xtrain, Ytrain, test_sets, train_sets = preprocess_data(
        Xtest, Xtrain, Ytest, Ytrain)

    # train loop

    all_batches, all_labels = train_sets

    softmax = Softmax(Xtrain.shape[0] + 1, Ytrain.shape[0])
    loss_func = CrossEntropy(softmax.W)
    opt = SGD(lr=args.lr)

    accs_hyper_params_train = []
    accs_hyper_params_test = []

    for e in range(args.iter):
        acc_train = []
        loss_l = []
        for batch, labels in tqdm(zip(all_batches, all_labels),
                                  total=len(all_batches),
                                  file=sys.stdout):
            labels = labels.T

            ones = np.ones((1, batch.shape[-1]), dtype=int)
            batch = np.concatenate((batch, ones), axis=0)

            loss = loss_func(batch, labels)
            loss_l.append(loss)

            loss_func.grad_w(batch, labels)
            softmax.W = opt.step(loss_func.grad_W, softmax.W)
            loss_func.W = softmax.W

            output = softmax(batch)
            # calculate train error
            labels = get_index(labels)
            prediction = predict(output)

            acc_train = np.append(acc_train, prediction == labels, axis=0)

        print('Epoch {} train acc: {}  train loss: {}'.format(
            e, np.mean(acc_train), np.mean(loss_l)))

        accs_hyper_params_train.append(np.mean(acc_train))
        accs_hyper_params_test.append(
            np.mean(test_accuracy(softmax, test_sets)))

    plt.plot(range(args.iter), accs_hyper_params_train, label='Train Accuracy')
    plt.plot(range(args.iter),
             accs_hyper_params_test,
             label='Validation Accuracy')
    plt.title('SGD test: {} Set, Acc of lr={} and batch size={}'.format(
        args.data_set, args.lr, args.batch_size))
    plt.legend()
    plt.savefig(
        './Test_Figures/{} Set, Acc of lr={} and batch size={}.png'.format(
            args.data_set, args.lr, args.batch_size),
        transparent=True,
        bbox_inches='tight',
        pad_inches=0)
    plt.show()
learning_rate = 0.02
batch_size = 1
updates = 10000

layers = [FullyConnected(4, 1, activation=ScaledTanh(), threshold=True)]

for layer in layers:
    size = (layer.output_dim, layer.input_dim)
    layer.weights = np.random.uniform(low=-0.2, high=0.2, size=size)
    size = layer.output_dim
    layer.threshold = np.random.uniform(low=-1, high=1, size=size)

network = StandardNetwork(layers)

optimizer = SGD(layers, learning_rate)
error_function = MSE()

repeats = 10
is_linearly_separable = []
for targets in all_targets:
    for idx in range(1, repeats + 1):
        data = DataHandler(inputs, targets)

        linearly_separable = False
        for jdx in range(updates):
            input, target = data.sample(batch_size)

            output = network.forward(input)
            train_error = error_function(target, output)
            network.backward(error=error_function.grad())
Example #30
0
    # D_in is input dimension
    # D_out is output dimension.
    N, D_in, D_out = 64, 1, 1

    # Add some noise to the observations
    noise_var = 0.5

    # Create random input and output data
    X = lhs(D_in, N)
    y = 5 * X + noise_var * np.random.randn(N, D_out)

    # Define the model
    model = LinearRegression(X, y)

    # Define an optimizer
    optimizer = SGD(model.num_params, lr=1e-3, momentum=0.9)
    #    optimizer = Adam(model.num_params, lr = 1e-3)
    #    optimizer = RMSprop(model.num_params, lr = 1e-3)

    # Train the model
    model.train(10000, optimizer)

    # Print the learned parameters
    print('w = %e, sigma_sq = %e' %
          (model.theta[:-1], np.exp(model.theta[-1])))

    # Make predictions
    y_pred = model.predict(X)

    # Plot
    plt.figure(1)