Esempio n. 1
0
 def __init__(self, input_size, hidden_size, labels):
     self.input_size = input_size
     self.hidden_size = hidden_size
     self.output_size = len(labels)
     self.labels = labels
     self.label_index = {labels[i]: i for i in range(len(labels))}
     self.variables = self._init_variables()
     self.optimizer = SGD(self.variables)
Esempio n. 2
0
def testNetwork():  # noqa D103
    net = Network([Linear(10, 64), ReLU(), Linear(64, 2), Sigmoid()])
    x = np.random.randn(32, 10)
    y = np.random.randn(32, 2)
    mse = MSE()
    optim = SGD(0.001, 0.001)
    pred = net(x)
    _ = mse(pred, y)
    _ = net.backward(mse.grad)
    optim.step(net)
Esempio n. 3
0
 def test00_batch_creation(self):
     m = 52
     _sgd = SGD(batch_size=10, m=m)
     batches = _sgd.create_batches()
     all = []
     for batch in batches:
         all += batch
     for i in range(m):
         if i not in all:
             self.assertTrue(False)
     self.assertTrue(True)
Esempio n. 4
0
def get_model_optimizer(args):
    model = SVM(c=args.c, penalty=args.penalty)
    if args.gpu >= 0:
        model.to_gpu()
    if args.penalty == 'L2':
        optimizer = optimizers.SGD(lr=args.lr)
    elif args.penalty == 'L1':
        optimizer = SGD(lr=args.lr)
    optimizer.setup(model)

    return model, optimizer
Esempio n. 5
0
def get_model_optimizer(args):
    model = SVM(c=args.c, penalty=args.penalty)
    if args.gpu >= 0:
        model.to_gpu()
    if args.penalty == 'L2':
        optimizer = optimizers.SGD(lr=args.lr)
    elif args.penalty == 'L1':
        optimizer = SGD(lr=args.lr)
    optimizer.setup(model)

    return model, optimizer
Esempio n. 6
0
    def test02_sgd_sanity_with_epochs(self):
        C, W0, X, _, _ = create_C_W_X_d()
        optimizer = SGD(batch_size=256, m=X.shape[1])
        W = W0.copy()
        for epoch in range(15):
            W = optimizer.optimize(W,
                                   X,
                                   C,
                                   objective_soft_max,
                                   objective_soft_max_gradient_W,
                                   lr=1)

        self.assertTrue(True)
Esempio n. 7
0
def main():
    batch_size = 10
    wordvec_size = 100
    hidden_size = 100
    time_size = 5
    lr = 0.1
    max_epoch = 100

    corpus, word_to_id, id_to_word = ptb.load_data('train')
    corpus_size = 1000
    corpus = corpus[:1000]
    vocab_size = int(max(corpus) + 1)

    xs = corpus[:-1]
    ts = corpus[1:]
    data_size = len(xs)
    print(f'corpus size: {corpus_size}, vocabulary size: {vocab_size}')

    max_iters = data_size // (batch_size + time_size)
    time_idx = 0
    total_loss = 0
    loss_count = 0
    ppl_list = []

    model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size)
    optimizer = SGD(lr)

    jump = (corpus_size - 1) // batch_size
    offsets = [i * jump for i in range(batch_size)]

    for epoch in range(1, max_epoch + 1):
        for iter_ in range(max_iters):
            batch_x = np.empty((batch_size, time_size), dtype=int)
            batch_t = np.empty((batch_size, time_size), dtype=int)
            for t in range(time_size):
                for i, offset in enumerate(offsets):
                    batch_x[i, t] = xs[(offset + time_idx) % data_size]
                    batch_t[i, t] = xs[(offset + time_idx) % data_size]
                time_idx += 1

            loss = model.forward(batch_x, batch_t)
            model.backward()
            optimizer.update(model.params, model.grads)
            total_loss += loss
            loss_count += 1

        ppl = np.exp(total_loss / loss_count)
        print(f'| epoch {epoch} | perplexity {ppl}')
        ppl_list.append(float(ppl))
        total_loss, loss_count = 0, 0
    print('DONE')
def main():
    batch_size = 20
    wordvec_size = 650
    hidden_size = 650
    time_size = 35
    lr = 20.0
    #max_epoch = 40
    max_epoch = 1
    max_grad = 0.25
    dropout = 0.5

    corpus, word_to_id, _ = ptb.load_data('train')
    corpus_val, _, _ = ptb.load_data('val')
    corpus_test, _, _ = ptb.load_data('test')
    vocab_size = len(word_to_id)
    xs = corpus[:-1]
    ts = corpus[1:]

    model = BetterRnnlm(vocab_size, wordvec_size, hidden_size, dropout)
    optimizer = SGD(lr)
    trainer = RnnlmTrainer(model, optimizer)

    best_ppl = float('inf')
    for _ in range(max_epoch):
        trainer.fit(xs,
                    ts,
                    1,
                    batch_size,
                    time_size,
                    max_grad,
                    eval_interval=20)
        model.reset_state()

        ppl = eval_perplexity(model, corpus_val)
        print(f'valid perplexity: {ppl}')

        if best_ppl > ppl:
            best_ppl = ppl
            model.save_params()
        else:
            lr /= 4.0
            optimizer.lr = lr
        model.reset_state()
        print('-' * 50)

    model.reset_state()
    ppl_test = eval_perplexity(model, corpus_test)
    print(f'valid perplexity: {ppl_test}')
    print('DONE')
def test_categorical_classifier(
        M: int = 3,
        log_loss_function: Callable = softmax_cross_entropy_log_loss):
    """Test case for layer matmul class
    """
    N = 10
    D = 2
    W = weights.he(M, D + 1)
    optimizer = SGD(lr=TYPE_FLOAT(0.1))
    X, T, V = linear_separable_sectors(n=N, d=D, m=M)
    assert X.shape == (N, D)
    X, T = transform_X_T(X, T)

    def callback(W):
        W

    profiler = cProfile.Profile()
    profiler.enable()

    train_binary_classifier(N=N,
                            D=D,
                            M=M,
                            X=X,
                            T=T,
                            W=W,
                            log_loss_function=log_loss_function,
                            optimizer=optimizer,
                            test_numerical_gradient=True,
                            log_level=logging.WARNING,
                            callback=callback)

    profiler.disable()
    profiler.print_stats(sort="cumtime")
def _test_binary_classifier(
        M: int = 2,
        log_loss_function: Callable = softmax_cross_entropy_log_loss,
        num_epochs: int = 100):
    """Test case for layer matmul class
    """
    N = 50
    D = 2
    W = weights.he(M, D + 1)
    optimizer = SGD(lr=TYPE_FLOAT(0.1))
    X, T, V = linear_separable(d=D, n=N)

    # X, T = transform_X_T(X, T)

    def callback(W):
        return W

    train_binary_classifier(N=N,
                            D=D,
                            M=M,
                            X=X,
                            T=T,
                            W=W,
                            log_loss_function=log_loss_function,
                            optimizer=optimizer,
                            num_epochs=num_epochs,
                            test_numerical_gradient=True,
                            callback=callback)
Esempio n. 11
0
def main():
    batch_size = 20
    wordvec_size = 100
    hidden_size = 100
    time_size = 35
    lr = 20.0
    #max_epoch = 4
    max_epoch = 1
    max_grad = 0.25

    corpus, word_to_id, _ = ptb.load_data('train')
    corpus_test, _, _ = ptb.load_data('test')
    vocab_size = len(word_to_id)
    xs = corpus[:-1]
    ts = corpus[1:]

    model = Rnnlm(vocab_size, wordvec_size, hidden_size)
    optimizer = SGD(lr)
    trainer = RnnlmTrainer(model, optimizer)

    trainer.fit(xs,
                ts,
                max_epoch,
                batch_size,
                time_size,
                max_grad,
                eval_interval=20)
    model.reset_state()

    ppl_test = eval_perplexity(model, corpus_test)
    print(f'test perplexity: {ppl_test}')

    model.save_params()
    print('DONE')
def test_matmul_bn_relu_classifier(M: int = 3):
    """Test case for layer matmul class
    """
    N = 10
    D = 2
    W = weights.he(M, D + 1)
    optimizer = SGD(lr=TYPE_FLOAT(0.5))
    X, T, V = linear_separable_sectors(n=N, d=D, m=M)
    assert X.shape == (N, D)
    X, T = transform_X_T(X, T)

    def callback(W):
        """Dummy callback"""
        W

    profiler = cProfile.Profile()
    profiler.enable()

    train_matmul_bn_relu_classifier(
        N=N,
        D=D,
        M=M,
        X=X,
        T=T,
        W=W,
        log_loss_function=softmax_cross_entropy_log_loss,
        optimizer=optimizer,
        test_numerical_gradient=True,
        callback=callback)

    profiler.disable()
    profiler.print_stats(sort="cumtime")
Esempio n. 13
0
def run():
    global args
    args = parser.parse_args()
    with open(args.config) as f:
        config = yaml.load(f)

    for key in config:
        for k, v in config[key].items():
            setattr(args, k, v)


    # Prepare MNIST data
    train_data, train_label, val_data, val_label = load_mnist_trainval()
    test_data, test_label = load_mnist_test()

    # Create a model
    if args.type == 'SoftmaxRegression':
        model = SoftmaxRegression()
    elif args.type == 'TwoLayerNet':
        model = TwoLayerNet(hidden_size=args.hidden_size)

    # Optimizer
    optimizer = SGD(learning_rate=args.learning_rate, reg=args.reg)

    train_loss_history = []
    train_acc_history = []
    valid_loss_history = []
    valid_acc_history = []
    best_acc = 0.0
    best_model = None
    for epoch in range(args.epochs):

        batched_train_data, batched_train_label = generate_batched_data(train_data, train_label, batch_size=args.batch_size, shuffle=True)
        epoch_loss, epoch_acc = train(epoch, batched_train_data, batched_train_label, model, optimizer, args.debug)

        train_loss_history.append(epoch_loss)
        train_acc_history.append(epoch_acc)
        # evaluate on test data
        batched_test_data, batched_test_label = generate_batched_data(val_data, val_label, batch_size=args.batch_size)
        valid_loss, valid_acc = evaluate(batched_test_data, batched_test_label, model, args.debug)
        if args.debug:
            print("* Validation Accuracy: {accuracy:.4f}".format(accuracy=valid_acc))

        valid_loss_history.append(valid_loss)
        valid_acc_history.append(valid_acc)

        if valid_acc > best_acc:
            best_acc = valid_acc
            best_model = copy.deepcopy(model)
    plot_curves(train_loss_history,train_acc_history,valid_loss_history,valid_acc_history)
    batched_test_data, batched_test_label = generate_batched_data(test_data, test_label, batch_size=args.batch_size)
    _, test_acc = evaluate(batched_test_data, batched_test_label, best_model) # test the best model
    if args.debug:
        print("Final Accuracy on Train Data: {accuracy:.4f}".format(accuracy=train_acc_history[-1]))
        print("Final Accuracy on Validation Data: {accuracy:.4f}".format(accuracy=valid_acc_history[-1]))
        print("Final Accuracy on Test Data: {accuracy:.4f}".format(accuracy=test_acc))



    return train_loss_history, train_acc_history, valid_loss_history, valid_acc_history
Esempio n. 14
0
    def test_predict(self):
        gnn1 = GraphNeuralNetwork(2)
        gnn1.params["W"] = np.arange(1, 5).reshape(2, 2)
        gnn1.params["A"] = np.arange(1, 3)
        gnn1.params["b"] = np.array([1])
        sgd = SGD()
        trainer1 = Trainer(gnn1, sgd)

        graphs = [[[0, 0, 1, 0],
                   [0, 0, 1, 1],
                   [1, 1, 0, 1],
                   [0, 1, 1, 0]]] * 10
        vertex_sizes = [4] * 10

        expected1 = [1] * 10
        actual1 = trainer1.predict(graphs, vertex_sizes)
        self.assertEqual(expected1, actual1)

        gnn2 = GraphNeuralNetwork(3)
        gnn2.params["W"] = -np.arange(1, 10).reshape(3, 3)
        gnn2.params["b"] = -np.array([1])
        trainer2 = Trainer(gnn2, sgd)

        expected2 = [0] * 10
        actual2 = trainer2.predict(graphs, vertex_sizes)
        self.assertEqual(expected2, actual2)
Esempio n. 15
0
 def __init__(self, learning_rate=1e-1, eps=1e-5, max_iter=1000, batch_size=10, decay='step', reg_lambda=0.1):
     loss = LOSS["SumOfSquares"](reg_lambda, L2Normalizer())
     self.optimizer = SGD(learning_rate=learning_rate, eps=eps, max_iter=max_iter, batch_size=batch_size, loss=loss,
                          decay=decay)
     self.logger = logger("Ridge")
     self.params = dict()
     self.trained = False
Esempio n. 16
0
def test_020_matmul_build_specification():
    name = "matmul01"
    num_nodes = 8
    num_features = 2
    weights_initialization_scheme = "he"
    expected_spec = {
        _SCHEME: Matmul.class_id(),
        _PARAMETERS: {
            _NAME: name,
            _NUM_NODES: num_nodes,
            _NUM_FEATURES: num_features,  # NOT including bias
            _WEIGHTS: {
                _SCHEME: weights_initialization_scheme
            },
            _OPTIMIZER: SGD.specification(name="sgd")
        }
    }
    actual_spec = Matmul.specification(
        name=name,
        num_nodes=num_nodes,
        num_features=num_features,
        weights_initialization_scheme=weights_initialization_scheme,
    )
    assert expected_spec == actual_spec, \
        "expected\n%s\nactual\n%s\n" % (expected_spec, actual_spec)
Esempio n. 17
0
 def __init__(self, learning_rate=1e-3, eps=1e-5, max_iter=1000, batch_size=10, loss="LossWithSoftmax", decay='step',
              _lambda=0.1):
     self.loss = LOSS[loss](_lambda, L2Normalizer())
     self.optimizer = SGD(learning_rate=learning_rate, eps=eps, max_iter=max_iter, batch_size=batch_size, loss=loss,
                          decay=decay)
     self.logger = logger("LogisticRegression")
     self.params = dict()
     self.trained = False
Esempio n. 18
0
 def __init__(self, learning_rate=1e-1, eps=1e-5, max_iter=1000, batch_size=10, decay='step'):
     loss = LOSS["SumOfSquares"](0, ZeroNormalizer())
     self.optimizer = SGD(learning_rate=learning_rate, eps=eps, max_iter=max_iter, batch_size=batch_size, loss=loss,
                          decay=decay)
     # self.optimizer = NormalEquation()
     self.logger = logger("LinearRegression")
     self.params = dict()
     self.trained = False
def train(weight_init_std, x_train, t_train, max_epochs):
    batch_norm_network = MultiLayerNet(input_size=784,
                                       hidden_size_list=[100, 100, 100, 100, 100],
                                       output_size=10,
                                       weight_init_std=weight_init_std,
                                       use_batchnorm=True)
    no_batch_norm_network = MultiLayerNet(input_size=784,
                                          hidden_size_list=[100, 100, 100, 100, 100],
                                          output_size=10,
                                          weight_init_std=weight_init_std)
    train_size = x_train.shape[0]
    batch_size = 100
    learning_rate = 0.01
    max_iters_times = 1000000000
    epoch = max(int(train_size / batch_size), 1)

    optimizer = SGD(lr=learning_rate)
    bn_train_acc_list = []
    no_bn_train_acc_list = []

    epoch_cnt = 0
    for i in range(max_iters_times):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        for network in (batch_norm_network, no_batch_norm_network):
            grads = network.gradient(x_batch, t_batch)
            optimizer.update(network.params, grads)

        if i % epoch == 0:
            bn_train_acc = batch_norm_network.accuracy(x_train, t_train)
            no_bn_train_acc = no_batch_norm_network.accuracy(x_train, t_train)
            bn_train_acc_list.append(bn_train_acc)
            no_bn_train_acc_list.append(no_bn_train_acc)

            print("epoch:" + str(epoch_cnt) + " | " + str(no_bn_train_acc) + " - " + str(bn_train_acc))

            epoch_cnt += 1
            if epoch_cnt >= max_epochs:
                break

    return no_bn_train_acc_list, bn_train_acc_list
def __train(weight_init_std):
    bn_network = MultiLayerNetExtend(
        input_size=784,
        hidden_size_list=[100, 100, 100, 100, 100],
        output_size=10,
        weight_init_std=weight_init_std,
        use_batchnorm=True)
    network = MultiLayerNetExtend(input_size=784,
                                  hidden_size_list=[100, 100, 100, 100, 100],
                                  output_size=10,
                                  weight_init_std=weight_init_std)
    optimizer = SGD(lr=learning_rate)

    train_acc_list = []
    bn_train_acc_list = []

    iter_per_epoch = max(train_size / batch_size, 1)
    epoch_cnt = 0

    for i in range(1000000000):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        for _network in (bn_network, network):
            grads = _network.gradient(x_batch, t_batch)
            optimizer.update(_network.params, grads)

        if i % iter_per_epoch == 0:
            train_acc = network.accuracy(x_train, t_train)
            bn_train_acc = bn_network.accuracy(x_train, t_train)
            train_acc_list.append(train_acc)
            bn_train_acc_list.append(bn_train_acc)

            print("epoch:" + str(epoch_cnt) + " | " + str(train_acc) + " - " +
                  str(bn_train_acc))

            epoch_cnt += 1
            if epoch_cnt >= max_epochs:
                break

    return train_acc_list, bn_train_acc_list
Esempio n. 21
0
def main():
    max_epoch = 300
    batch_size = 30
    hidden_size = 10
    learning_rate = 1.0

    x, t = spiral.load_data()
    model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
    optimizer = SGD(lr=learning_rate)

    trainer = Trainer(model, optimizer)
    trainer.fit(x, t, max_epoch, batch_size, eval_interval=10)
Esempio n. 22
0
    def test_sgd(self):

        model_list = [dict(type='Linear', in_dim=128, out_dim=10)]
        criterion = dict(type='SoftmaxCrossEntropy')
        model = ConvNet(model_list, criterion)

        optimizer = SGD(model)

        # forward once
        np.random.seed(1024)
        x = np.random.randn(32, 128)
        np.random.seed(1024)
        y = np.random.randint(10, size=32)
        tmp = model.forward(x, y)
        model.backward()
        optimizer.update(model)
        # forward twice
        np.random.seed(512)
        x = np.random.randn(32, 128)
        np.random.seed(512)
        y = np.random.randint(10, size=32)
        tmp = model.forward(x, y)
        model.backward()
        optimizer.update(model)

        expected_weights = np.load('tests/sgd_weights/w.npy')
        expected_bias = np.load('tests/sgd_weights/b.npy')

        self.assertAlmostEquals(
            np.sum(np.abs(expected_weights - model.modules[0].weight)), 0)
        self.assertAlmostEquals(
            np.sum(np.abs(expected_bias - model.modules[0].bias)), 0)
Esempio n. 23
0
 def __init__(self,
              layers,
              loss='cross_entropy',
              optimizer=SGD(),
              logger=get_logger()):
     super(Sequential, self).__init__(logger)
     self.layers = layers
     self.params = list(
         itertools.chain(
             *
             [layer.params for layer in layers
              if hasattr(layer, 'params')]))
     self.optimizer = optimizer
Esempio n. 24
0
def train(C_train, C_val, X_train, X_val, batch_size, epochs, lr, momentum=0):
    # ----------------- hyper params init -----------------
    W0 = randn(X_train.shape[0], C_train.shape[0])
    m, n = W0.shape
    W = W0.copy()
    optimizer = SGD(batch_size=batch_size, m=X_train.shape[1])
    # ----------------------------------------------------

    # ----------------- stats lists init -----------------
    W_history = zeros((W.shape[0] * W.shape[1], epochs))
    val_score = []
    train_score = []
    train_acc = []
    val_acc = []
    # ----------------------------------------------------

    for epoch in range(epochs):
        W = optimizer.optimize(W,
                               X_train,
                               C_train,
                               objective_soft_max,
                               objective_soft_max_gradient_W,
                               lr=lr,
                               momentum=momentum)

        W_history[:, epoch] = W.reshape(W.shape[0] * W.shape[1])
        train_score.append(objective_soft_max(X_train, W, C_train))
        val_score.append(objective_soft_max(X_val, W, C_val))
        train_acc.append(accuracy(X_train, W, C_train))
        val_acc.append(accuracy(X_val, W, C_val))

    W_res = average(W_history, axis=1).reshape(m, n)
    train_score.append(objective_soft_max(X_train, W_res, C_train))
    val_score.append(objective_soft_max(X_val, W_res, C_val))
    # todo add plot epoch \ accuracy (wrote in train)
    plot(range(len(train_score)), train_score)
    return train_score, train_acc, val_score, val_acc
Esempio n. 25
0
def train(net: NetWork,
          inputs: Tensor,
          targets: Tensor,
          epochs: int = 500,
          loss: Loss = MSE(),
          optimizer: Optimizer = SGD(),
          iterator: DataIterator = BatchIterator(),
          show_info: bool = False):
    for epoch in range(epochs):
        epoch_loss = .0
        for batch_inputs, batch_targets in iterator(inputs, targets):
            predictions = net.forward(batch_inputs)
            epoch_loss += loss.loss(predictions, batch_targets)
            grad = loss.grad(predictions, batch_targets)
            net.backward(grad)
            optimizer.step(net)
        if show_info:
            print('epoch:{},  loss:{}'.format(epoch, epoch_loss))
Esempio n. 26
0
 def test_kfold_cross_val(self):
     gnn = GraphNeuralNetwork(2)
     sgd = SGD()
     trainer = Trainer(gnn, sgd)
     graphs = [[[0, 0, 1, 0],
                [0, 0, 1, 1],
                [1, 1, 0, 1],
                [0, 1, 1, 0]]] * 100
     vertex_sizes = [4] * 100
     labels = [0] * 100
     expected = gnn.params
     _ = trainer.kfold_cross_validation(graphs, vertex_sizes, labels)
     actual = gnn.params
     self.assertEqual(expected, actual)
     with self.assertRaises(SplitError):
         trainer.kfold_cross_validation(graphs, vertex_sizes, labels,
                                        minibatch_size=20)
         trainer.kfold_cross_validation(graphs, vertex_sizes, labels,
                                        k=20)
Esempio n. 27
0
    def test_accuracy(self):
        gnn = GraphNeuralNetwork(2)
        gnn.params["W"] = np.arange(1, 5).reshape(2, 2)
        gnn.params["A"] = np.arange(1, 3)
        gnn.params["b"] = np.array([1])
        sgd = SGD()
        trainer = Trainer(gnn, sgd)
        graphs = [[[0, 0, 1, 0],
                   [0, 0, 1, 1],
                   [1, 1, 0, 1],
                   [0, 1, 1, 0]]] * 10
        vertex_sizes = [4] * 10

        labels1 = [1] * 10
        expected1 = 1.
        actual1 = trainer.accuracy(graphs, vertex_sizes, labels1)
        self.assertEqual(expected1, actual1)

        labels2 = [1] * 7 + [0] * 3
        expected2 = 0.7
        actual2 = trainer.accuracy(graphs, vertex_sizes, labels2)
        self.assertEqual(expected2, actual2)
def test():
    M = 1
    D = 2
    N = 100

    X, T, V = linear_separable(d=D, n=N)
    x_min, x_max = X[:, 0].min(), X[:, 0].max()
    y_min, y_max = X[:, 1].min(), X[:, 1].max()

    sigmoid_classifier_specification = {
        _NAME: "softmax_classifier",
        _NUM_NODES: M,
        _LOG_LEVEL: logging.ERROR,
        _COMPOSITE_LAYER_SPEC: {
            "matmul01":
            Matmul.specification(
                name="matmul",
                num_nodes=M,
                num_features=D,
                weights_initialization_scheme="he",
                weights_optimizer_specification=SGD.specification(
                    lr=TYPE_FLOAT(0.2), l2=TYPE_FLOAT(1e-3))),
            "loss":
            CrossEntropyLogLoss.specification(
                name="loss",
                num_nodes=M,
                loss_function=sigmoid_cross_entropy_log_loss.__qualname__)
        }
    }
    logistic_classifier = SequentialNetwork.build(
        specification=sigmoid_classifier_specification, )

    for i in range(50):
        logistic_classifier.train(X=X, T=T)

    prediction = logistic_classifier.predict(
        np.array([-1., -1.], dtype=TYPE_FLOAT))
    np.isin(prediction, [0, 1])
    print(prediction)
 def test_update(self):
     sgd = SGD()
     gnn = GraphNeuralNetwork(vector_size=2)
     expected = gnn.params
     sgd.update(gnn)
     actual = gnn.params
     self.assertEqual(expected, actual)
     params = copy.deepcopy(gnn.params)
     for _ in range(100):
         gnn.grads["W"] = np.random.rand()
         gnn.grads["A"] = np.random.rand()
         gnn.grads["b"] = np.random.rand()
         sgd.update(gnn)
         for key, param in params.items():
             params[key] = param - gnn.grads[key] * sgd.lr
             expected = repr(params[key])
             actual = repr(gnn.params[key])
             self.assertEqual(expected, actual)
Esempio n. 30
0
from tensor import Tensor
from optimizer import SGD
from layer import MSELoss, Linear, Tanh, Sigmoid
from model import Sequential

import numpy as np

#Toy example of Using Tensor Class
np.random.seed(0)
data = Tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), requires_grad=True)
target = Tensor(np.array([[0], [1], [0], [1]]), requires_grad=True)
#Every element in w, is an Object of Tensor representing weight matrix
model = Sequential(
    Linear(2, 3),
    Tanh(),
    Linear(3, 3),
    Tanh(),
    Linear(3, 1),
)
optim = SGD(parameters=model.get_parameters(), lr=0.1)
criterion = MSELoss()
for i in range(10):
    pred = model(data)
    loss = criterion(pred, target)
    loss.backward(Tensor(np.ones_like(loss.data), is_grad=True))
    optim.step()
    print(loss.data)
print(
    "------------------------------------------------------------------------")
from utils import load_mnist

weight_init_types = {'std=0.01': 0.01, 'Xavier': 'sigmoid', 'He': 'relu'}

(x_train, t_train), (x_test, t_test) = load_mnist(
    normalize=True, one_hot_label=True)
iters_num = 2000
train_size = x_train.shape[0]
batch_size = 100

train_loss = {}

for key, weight_type in weight_init_types.items():
    network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100],
                            output_size=10, weight_init_std=weight_type)
    optimizer = SGD()
    train_loss[key] = []

    for i in range(iters_num):
        mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[mask]
        t_batch = t_train[mask]

        grads = network.gradient(x_batch, t_batch)
        optimizer.update(network.params, grads)
        train_loss[key].append(network.loss(x_batch, t_batch))

markers = {'std=0.01': 'o', 'Xavier': 's', 'He': 'D'}
x = np.arange(iters_num)
for key in weight_init_types.keys():
    plt.plot(x, train_loss[key], marker=markers[key], markevery=100, label=key)
Esempio n. 32
0
def main(namemark, ncpu, batchsize, generation, lr, sigma, vbn, vbn_test_g,
         gamename, logfile, modeltype):
    if modeltype == '2015':
        from model_15 import build_model
    elif modeltype == '2013':
        from model_13 import build_model
    setup_logging(logfile)

    logging.info("modeltype: %s", modeltype)
    logging.info("learning rate: %s", lr)
    logging.info("sigma: %s", sigma)
    logging.info("Game name: %s", gamename)
    logging.info("batchsie: %s", batchsize)
    logging.info("ncpu:%s", ncpu)
    logging.info("namemark:%s", namemark)
    print("learning rate:", lr)
    print("sigma:", sigma)
    print("gamename:", gamename)
    print("batchsie:", batchsize)
    print("ncpu:", ncpu)
    print("namemark", namemark)

    checkpoint_name = gamename + namemark + "-sigma" + str(
        sigma) + '-lr' + str(lr) + '-model' + modeltype

    import pandas as pd
    config = pd.read_csv('config.csv')
    CONFIG = dict()
    CONFIG['game'] = gamename + '-v0'
    # it's for training frames
    CONFIG['ep_max_step'] = 1500
    CONFIG['eval_threshold'] = config[config['gamename'] == gamename].iloc[0,
                                                                           1]
    CONFIG['l2coeff'] = 0.005
    test_times = ncpu - 1

    logging.info("Settings: %s", str(CONFIG))

    env = gym.make(gamename + '-v0')
    CONFIG['n_action'] = env.action_space.n
    experiment_record = {}
    experiment_record['kid_rewards'] = []
    experiment_record['test_rewards'] = []

    device = torch.device("cpu")
    model = build_model(CONFIG).to(device)
    model_best = build_model(CONFIG)
    model_before = build_model(CONFIG)
    best_test_score = Small_value

    # utility instead reward for update parameters (rank transformation)
    base = batchsize  # *2 for mirrored sampling
    if batchsize % 2 == 1:
        print("need an even batch size")
        exit()
    rank = np.arange(1, base + 1)
    util_ = np.maximum(0, np.log(base / 2 + 1) - np.log(rank))
    utility = util_ / util_.sum() - 1 / base

    optimizer = SGD(model.named_parameters(), lr)
    pool = mp.Pool(processes=ncpu)
    test_episodes = 15
    # estimate mean and var
    if vbn:
        logging.info("start test reference batch statistic")
        print("start geting reference frame")
        reference_batch = explore_for_vbn(env, 0.01)
    reference_batch_torch = torch.zeros((reference_batch_size, 4, 84, 84))
    for i in range(reference_batch_size):
        reference_batch_torch[i] = reference_batch[i]

    # training
    mar = None  # moving average reward
    training_timestep_count = 0
    best_kid_mean = Small_value
    test_result_list = []
    for g in range(generation):
        t0 = time.time()
        model_before.load_state_dict(model.state_dict())
        model, kid_rewards, timestep_count = train(model, optimizer, pool,
                                                   sigma, env,
                                                   int(batchsize / 2), CONFIG,
                                                   modeltype,
                                                   reference_batch_torch)
        training_timestep_count += timestep_count
        timestep_count = timestep_count / 4
        if training_timestep_count > TIMESTEP_LIMIT:
            logging.info("satisfied timestep limit")
            logging.info("Now timestep %s" % training_timestep_count)
            break
        kid_rewards_mean = np.array(kid_rewards).mean()
        experiment_record['kid_rewards'].append(
            [g, np.array(kid_rewards).mean()])
        if g % 5 == 0:
            logging.info(
                'Gen: %s | Kid_avg_R: %.1f | Episodes Number: %s | timestep number: %s| Gen_T: %.2f'
                % (g, np.array(kid_rewards).mean(), batchsize, timestep_count,
                   time.time() - t0))
            print('Gen:', g,
                  '| Kid_avg_R: %.1f' % np.array(kid_rewards).mean(),
                  '| episodes number:', batchsize, '| timestep number:',
                  timestep_count, '| Gen_T: %.2f' % (time.time() - t0))
        if kid_rewards_mean > best_kid_mean:
            best_kid_mean = kid_rewards_mean
            test_rewards, _ = test(model_before, pool, env, test_times, CONFIG,
                                   reference_batch_torch)
            test_rewards_mean = np.mean(np.array(test_rewards))
            experiment_record['test_rewards'].append([g, test_rewards])
            logging.info("Gen: %s, test model, Reward: %.1f" %
                         (g, test_rewards_mean))
            #logging.info("train progross %s/%s" % (training_timestep_count, TIMESTEP_LIMIT))
            print('Gen: ', g, '| Net_R: %.1f' % test_rewards_mean)
            if test_rewards_mean > best_test_score:
                best_test_score = test_rewards_mean
                model_best.load_state_dict(model_before.state_dict())
                # save when found a better model
                #logging.info("Storing Best model")
                torch.save(
                    model_best.state_dict(),
                    model_storage_path + checkpoint_name + 'best_model.pt')

        if g % 5 == 0:
            test_rewards, timestep_count = test(model, pool, env, test_times,
                                                CONFIG, reference_batch_torch)
            test_rewards_mean = np.mean(np.array(test_rewards))
            experiment_record['test_rewards'].append([g, test_rewards])
            #logging.info("test model, Reward: %.1f" % test_rewards_mean)
            test_result_list.append(test_rewards_mean)
            print('Gen: ', g, '| Net_R: %.1f' % test_rewards_mean)
            if test_rewards_mean > best_test_score:
                best_test_score = test_rewards_mean
                model_best.load_state_dict(model.state_dict())
                # save when found a better model
                #logging.info("Storing Best model")
                torch.save(
                    model_best.state_dict(),
                    model_storage_path + checkpoint_name + 'best_model.pt')
        if g % 40 == 0:
            logging.info("train progross %s/%s" %
                         (training_timestep_count, TIMESTEP_LIMIT))
            logging.info("best test result:%s" % best_test_score)
            logging.info("test result:%s" % str(test_result_list))
            test_result_list = []

        if (g - 1) % 500 == 500 - 1:
            CONFIG['ep_max_step'] += 150
            logging.info("Gen %s | adding max timestep" % g)

        if (g - 1) % 1000 == 1000 - 1:
            logging.info("Gen %s | storing model" % g)
            torch.save(
                model.state_dict(), model_storage_path + checkpoint_name +
                'generation' + str(g) + '.pt')
            torch.save(model_best.state_dict(),
                       model_storage_path + checkpoint_name + 'best_model.pt')
            with open(
                    model_storage_path + "experiment_record" +
                    checkpoint_name + 'generation' + str(g) + ".pickle",
                    "wb") as f:
                pickle.dump(experiment_record, f)

    test_rewards, _ = test(model, pool, env, test_times, CONFIG,
                           reference_batch_torch)
    test_rewards_mean = np.mean(np.array(test_rewards))
    logging.info("test final model, Mean Reward of %s times: %.1f" %
                 (test_times, test_rewards_mean))

    if test_rewards_mean > best_test_score:
        best_test_score = test_rewards_mean
        model_best.load_state_dict(model.state_dict())
        logging.info("storing Best model")

    print("best test results :", best_test_score)
    logging.info("best test results:%s" % best_test_score)
    # ---------------SAVE---------
    torch.save(model_best.state_dict(),
               model_storage_path + checkpoint_name + 'best_model.pt')
    torch.save(model.state_dict(),
               model_storage_path + checkpoint_name + '.pt')
    with open(
            model_storage_path + "experiment_record" + str(namemark) +
            ".pickle", "wb") as f:
        pickle.dump(experiment_record, f)