예제 #1
0
    def test_sgd(self):

        model_list = [dict(type='Linear', in_dim=128, out_dim=10)]
        criterion = dict(type='SoftmaxCrossEntropy')
        model = ConvNet(model_list, criterion)

        optimizer = SGD(model)

        # forward once
        np.random.seed(1024)
        x = np.random.randn(32, 128)
        np.random.seed(1024)
        y = np.random.randint(10, size=32)
        tmp = model.forward(x, y)
        model.backward()
        optimizer.update(model)
        # forward twice
        np.random.seed(512)
        x = np.random.randn(32, 128)
        np.random.seed(512)
        y = np.random.randint(10, size=32)
        tmp = model.forward(x, y)
        model.backward()
        optimizer.update(model)

        expected_weights = np.load('tests/sgd_weights/w.npy')
        expected_bias = np.load('tests/sgd_weights/b.npy')

        self.assertAlmostEquals(
            np.sum(np.abs(expected_weights - model.modules[0].weight)), 0)
        self.assertAlmostEquals(
            np.sum(np.abs(expected_bias - model.modules[0].bias)), 0)
예제 #2
0
def main():
    batch_size = 10
    wordvec_size = 100
    hidden_size = 100
    time_size = 5
    lr = 0.1
    max_epoch = 100

    corpus, word_to_id, id_to_word = ptb.load_data('train')
    corpus_size = 1000
    corpus = corpus[:1000]
    vocab_size = int(max(corpus) + 1)

    xs = corpus[:-1]
    ts = corpus[1:]
    data_size = len(xs)
    print(f'corpus size: {corpus_size}, vocabulary size: {vocab_size}')

    max_iters = data_size // (batch_size + time_size)
    time_idx = 0
    total_loss = 0
    loss_count = 0
    ppl_list = []

    model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size)
    optimizer = SGD(lr)

    jump = (corpus_size - 1) // batch_size
    offsets = [i * jump for i in range(batch_size)]

    for epoch in range(1, max_epoch + 1):
        for iter_ in range(max_iters):
            batch_x = np.empty((batch_size, time_size), dtype=int)
            batch_t = np.empty((batch_size, time_size), dtype=int)
            for t in range(time_size):
                for i, offset in enumerate(offsets):
                    batch_x[i, t] = xs[(offset + time_idx) % data_size]
                    batch_t[i, t] = xs[(offset + time_idx) % data_size]
                time_idx += 1

            loss = model.forward(batch_x, batch_t)
            model.backward()
            optimizer.update(model.params, model.grads)
            total_loss += loss
            loss_count += 1

        ppl = np.exp(total_loss / loss_count)
        print(f'| epoch {epoch} | perplexity {ppl}')
        ppl_list.append(float(ppl))
        total_loss, loss_count = 0, 0
    print('DONE')
예제 #3
0
 def test_update(self):
     sgd = SGD()
     gnn = GraphNeuralNetwork(vector_size=2)
     expected = gnn.params
     sgd.update(gnn)
     actual = gnn.params
     self.assertEqual(expected, actual)
     params = copy.deepcopy(gnn.params)
     for _ in range(100):
         gnn.grads["W"] = np.random.rand()
         gnn.grads["A"] = np.random.rand()
         gnn.grads["b"] = np.random.rand()
         sgd.update(gnn)
         for key, param in params.items():
             params[key] = param - gnn.grads[key] * sgd.lr
             expected = repr(params[key])
             actual = repr(gnn.params[key])
             self.assertEqual(expected, actual)
def train(weight_init_std, x_train, t_train, max_epochs):
    batch_norm_network = MultiLayerNet(input_size=784,
                                       hidden_size_list=[100, 100, 100, 100, 100],
                                       output_size=10,
                                       weight_init_std=weight_init_std,
                                       use_batchnorm=True)
    no_batch_norm_network = MultiLayerNet(input_size=784,
                                          hidden_size_list=[100, 100, 100, 100, 100],
                                          output_size=10,
                                          weight_init_std=weight_init_std)
    train_size = x_train.shape[0]
    batch_size = 100
    learning_rate = 0.01
    max_iters_times = 1000000000
    epoch = max(int(train_size / batch_size), 1)

    optimizer = SGD(lr=learning_rate)
    bn_train_acc_list = []
    no_bn_train_acc_list = []

    epoch_cnt = 0
    for i in range(max_iters_times):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        for network in (batch_norm_network, no_batch_norm_network):
            grads = network.gradient(x_batch, t_batch)
            optimizer.update(network.params, grads)

        if i % epoch == 0:
            bn_train_acc = batch_norm_network.accuracy(x_train, t_train)
            no_bn_train_acc = no_batch_norm_network.accuracy(x_train, t_train)
            bn_train_acc_list.append(bn_train_acc)
            no_bn_train_acc_list.append(no_bn_train_acc)

            print("epoch:" + str(epoch_cnt) + " | " + str(no_bn_train_acc) + " - " + str(bn_train_acc))

            epoch_cnt += 1
            if epoch_cnt >= max_epochs:
                break

    return no_bn_train_acc_list, bn_train_acc_list
def __train(weight_init_std):
    bn_network = MultiLayerNetExtend(
        input_size=784,
        hidden_size_list=[100, 100, 100, 100, 100],
        output_size=10,
        weight_init_std=weight_init_std,
        use_batchnorm=True)
    network = MultiLayerNetExtend(input_size=784,
                                  hidden_size_list=[100, 100, 100, 100, 100],
                                  output_size=10,
                                  weight_init_std=weight_init_std)
    optimizer = SGD(lr=learning_rate)

    train_acc_list = []
    bn_train_acc_list = []

    iter_per_epoch = max(train_size / batch_size, 1)
    epoch_cnt = 0

    for i in range(1000000000):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        for _network in (bn_network, network):
            grads = _network.gradient(x_batch, t_batch)
            optimizer.update(_network.params, grads)

        if i % iter_per_epoch == 0:
            train_acc = network.accuracy(x_train, t_train)
            bn_train_acc = bn_network.accuracy(x_train, t_train)
            train_acc_list.append(train_acc)
            bn_train_acc_list.append(bn_train_acc)

            print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " +
                  str(bn_train_acc))

            epoch_cnt += 1
            if epoch_cnt >= max_epochs:
                break

    return train_acc_list, bn_train_acc_list
예제 #6
0
optimizer = SGD(lr=learning_rate)

data_size = len(x)
max_iters = data_size // batch_size
total_loss = 0
loss_count = 0
loss_list = []

for epoch in range(max_epoch):
    idx = np.random.permutation(data_size)
    x = x[idx]
    t = t[idx]

    for iters in range(max_iters):
        batch_x = x[iters * batch_size:(iters + 1) * batch_size]
        batch_t = t[iters * batch_size:(iters + 1) * batch_size]

        loss = model.forward(batch_x, batch_t)
        model.backward()
        optimizer.update(model.params, model.grads)

        total_loss += loss
        loss_count += 1

        if (iters + 1) % 10 == 0:
            avg_loss = total_loss / loss_count
            print('| epoch %d | iter %d/%d | loss %.2f ' %
                  (epoch + 1, iters + 1, max_iters, avg_loss))
            loss_list.append(avg_loss)
            total_loss, loss_count = 0, 0
for key, weight_type in weight_init_types.items():
    networks[key] = MultiLayerNet(input_size=784,
                                  hidden_size_list=[100, 100, 100, 100],
                                  output_size=10,
                                  weight_init_std=weight_type)
    train_loss[key] = []

# 2:开始训练==========
for i in range(max_iterations):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    for key in weight_init_types.keys():
        grads = networks[key].gradient(x_batch, t_batch)
        optimizer.update(networks[key].params, grads)

        loss = networks[key].loss(x_batch, t_batch)
        train_loss[key].append(loss)

    if i % 100 == 0:
        print("===========" + "iteration:" + str(i) + "===========")
        for key in weight_init_types.keys():
            loss = networks[key].loss(x_batch, t_batch)
            print(key + ":" + str(loss))

# 3.绘制图形==========
markers = {'std=0.01': 'o', 'Xavier': 's', 'He': 'D'}
x = np.arange(max_iterations)
for key in weight_init_types.keys():
    plt.plot(x,
예제 #8
0
train_acc_list = []
test_acc_list = []

iters_num = 10000
batch_size = 100
train_size = x_train.shape[0]
iter_per_epoch = max(train_size / batch_size, 1)

net = ConvNet()
optim = SGD(net.params, lr=0.1, momentum=0.9)
# optim = AdaGrad(net.params)
# optim = Adam(net.params)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    grad = net.gradient(x_batch, t_batch)
    net.params = optim.update(net.params, grad)

    loss = net.loss(x_batch, t_batch)
    train_loss_list.append(loss)

    if i % iter_per_epoch == 0:
        train_acc = net.accuracy(x_train, t_train)
        test_acc = net.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))
batch_size = 100

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)
epoch_cnt = 0

for i in range(1000000000):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    grads = network.gradient(x_batch, t_batch)
    optimizer.update(network.params, grads)

    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)

        print("epoch:" + str(epoch_cnt) + ", train acc:" + str(train_acc) +
              ", test acc:" + str(test_acc))

        epoch_cnt += 1
        if epoch_cnt >= max_epochs:
            break

# 그래프 그리기==========
    def fit(self):
        n_batches = math.ceil(len(self.x_train) / self.batch_size)

        gpuConfig = tf.compat.v1.ConfigProto(
            gpu_options=tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=self.per_process_gpu_memory_fraction),
            device_count={'GPU': 1}
        )
        with tf.compat.v1.Session(config=gpuConfig) as self.sess:

            optimizer = SGD(self.cost, self.params, self.learning_rate)
            all_grad = optimizer.get_grad()
            hessian = optimizer.get_hessian(layer_num=-2)
            updates = optimizer.update()
            train = tf.group(*updates)

            self.sess.run(tf.compat.v1.global_variables_initializer())

            costs_train, accs_train, norms_train, hesse_matrixes_train = [], [], [], []
            costs_valid, accs_valid, norms_valid, hesse_matrixes_valid = [], [], [], []

            for self.epoch in range(self.epoch_size):
                self.x_train, self.t_train = shuffle(self.x_train, self.t_train)
                train_feed_dict={self.x: self.x_train[:20000], self.t: self.t_train[:20000], self.is_training: False}
                valid_feed_dict={self.x: self.x_valid, self.t: self.t_valid, self.is_training: False}

                for i in tqdm(range(n_batches)):
                    start = i * self.batch_size
                    end = start + self.batch_size
                    self.sess.run(train, feed_dict={self.x: self.x_train[start:end], 
                                                    self.t: self.t_train[start:end], 
                                                    self.is_training: True})

                norm_train = self.extract_gradient_norm(all_grad, 'Train', train_feed_dict)
                norms_train.append(norm_train)

                norm_valid = self.extract_gradient_norm(all_grad, 'Valid', valid_feed_dict)
                norms_valid.append(norm_valid)

                cost_train, acc_train = self.extract_cost_and_acc('Train', train_feed_dict)
                costs_train.append(cost_train)
                accs_train.append(acc_train)

                cost_valid, acc_valid = self.extract_cost_and_acc('Valid', valid_feed_dict)
                costs_valid.append(cost_valid)
                accs_valid.append(acc_valid)

                hesse_matrix_train = self.sess.run(hessian, feed_dict=train_feed_dict)
                hesse_matrixes_train.append(hesse_matrix_train)

                hesse_matrix_valid = self.sess.run(hessian, feed_dict=valid_feed_dict)
                hesse_matrixes_valid.append(hesse_matrix_valid)
            
            surface_values_train = self.calculate_loss_surface(optimizer, hesse_matrix_train, train_feed_dict)
            surface_values_valid = self.calculate_loss_surface(optimizer, hesse_matrix_valid, valid_feed_dict)

        save_lists = [norms_train, hesse_matrixes_train, costs_train, accs_train, surface_values_train,
                      norms_valid, hesse_matrixes_valid, costs_valid, accs_valid, surface_values_valid]
        reslut_names = ['grad_norm_train', 'hessian_train', 'costs_train', 'accuracy_train', 'surface_values_train',
                        'grad_norm_valid', 'hessian_valid', 'costs_valid', 'accuracy_valid', 'surface_values_valid']
        for (save_list, reslut_name) in zip(save_lists, reslut_names):
            self.save_result(save_list=save_list, reslut_name=reslut_name)
    normalize=True, one_hot_label=True)
iters_num = 2000
train_size = x_train.shape[0]
batch_size = 100

train_loss = {}

for key, weight_type in weight_init_types.items():
    network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100],
                            output_size=10, weight_init_std=weight_type)
    optimizer = SGD()
    train_loss[key] = []

    for i in range(iters_num):
        mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[mask]
        t_batch = t_train[mask]

        grads = network.gradient(x_batch, t_batch)
        optimizer.update(network.params, grads)
        train_loss[key].append(network.loss(x_batch, t_batch))

markers = {'std=0.01': 'o', 'Xavier': 's', 'He': 'D'}
x = np.arange(iters_num)
for key in weight_init_types.keys():
    plt.plot(x, train_loss[key], marker=markers[key], markevery=100, label=key)
plt.xlabel("iterations")
plt.ylabel("loss")
plt.ylim(0, 2.5)
plt.legend()
plt.show()
예제 #12
0
class RNN:
    def __init__(self, input_size, hidden_size, labels):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = len(labels)
        self.labels = labels
        self.label_index = {labels[i]: i for i in range(len(labels))}
        self.variables = self._init_variables()
        self.optimizer = SGD(self.variables)

    def _init_variables(self):
        variables = {
            'w': Variable(np.random.randn(self.input_size, self.hidden_size)),
            'w_rec':
            Variable(np.random.randn(self.hidden_size, self.hidden_size)),
            'b': Variable(np.random.randn(self.hidden_size)),
            'c': Variable(np.random.randn(self.output_size))
        }
        return variables

    def _calculate(self, input_list):
        hidden = Variable(np.zeros(self.hidden_size))
        output_list = []
        for x in input_list:
            hidden = Add(
                Mul(
                    Sigmoid(
                        Add(Mul(Variable(x), self.variables['w']),
                            self.variables['b'])), hidden),
                self.variables['c'])
            output_list.append(hidden)
        return output_list

    def fit(self, input_list, label, update=True):
        output_list = self._calculate(input_list)

        t_node = self.t_node(label)
        graph = np.zeros(self.hidden_size)
        for hidden in output_list:
            graph = Add(graph, SoftmaxLoss(hidden, t_node))
        loss = graph.forward()
        graph.backward(1)
        if update:
            self.optimizer.update()
        return loss

    def predict(self, input_list):
        output_list = self._calculate(input_list)
        results = []

        for hidden in output_list:
            y = hidden.forward()
            max_index = np.argmax(y)
            results.append(self.labels[max_index])
        return results

    def t_node(self, label):
        index = self.label_index[label]
        t = np.zeros(self.output_size)
        t[index] = 1.0
        return Variable(t)