Ejemplo n.º 1
0
        def train(X_train, X_test, y_train, y_test):
            xp = self.xp

            model = self._create_model()

            optimizer = SGD(lr=0.001)
            optimizer.setup(model)

            if self.cv:
                y_test = xp.array(y_test).reshape((-1, 1))
            for epoch in range(n_epoch):
                print('epoch:', epoch)
                chainer.using_config('train', True)
                self._train_once(model, optimizer, X_train, y_train)

                with chainer.no_backprop_mode():
                    chainer.using_config('train', False)
                    pred = self._predict(model, X_test)
                    if self.cv:
                        loss = F.sigmoid_cross_entropy(pred, y_test)
                        print("test loss:", loss.data)

            if self.gpu is not None:
                pred = chainer.cuda.to_cpu(pred)
            return pred
Ejemplo n.º 2
0
def setup_optimizer(cfg):
    if cfg.solver.optimizer == 'SGD':
        optimizer = SGD(cfg.optimizer.base_lr)
    elif cfg.solver.optimizer == 'MomentumSGD':
        optimizer = MomentumSGD(cfg.solver.base_lr, cfg.solver.momentum)
    else:
        raise ValueError('Not support `optimizer`: {}.'.format(
            cfg.solver.optimizer))
    return optimizer
Ejemplo n.º 3
0
    def __init__(self, n_features, n_hidden):
        self.model = chainer.FunctionSet(
            W1=F.Linear(n_features, n_hidden),
            W2=F.Linear(n_hidden, 2),
            activation=F.relu
        )

        for param in self.model.parameters:
            param[:] = np.random.randn(*param.shape)

        self.optimizer = SGD()
        self.optimizer.setup(self.model)
Ejemplo n.º 4
0
    def __init__(self, n_features):
        # Define what parametrized functions the model consists of.
        self.model = chainer.FunctionSet(
            W=F.Linear(n_features, 2)
        )

        # Initialize parameters randomly from gaussian.
        for param in self.model.parameters:
            param[:] = np.random.randn(*param.shape)

        # Define what update rule we will use. SGD is the simplest one,
        #  w' = w + lr * gradient_f(w)
        self.optimizer = SGD()
        self.optimizer.setup(self.model)
Ejemplo n.º 5
0
class NeuralNet(object):
    def __init__(self, n_features, n_hidden):
        self.model = chainer.FunctionSet(
            W1=F.Linear(n_features, n_hidden),
            W2=F.Linear(n_hidden, 2),
            activation=F.relu
        )

        for param in self.model.parameters:
            param[:] = np.random.randn(*param.shape)

        self.optimizer = SGD()
        self.optimizer.setup(self.model)

    def forward_loss(self, x, y, train=True):
        x = chainer.Variable(x, volatile=not train)
        y = chainer.Variable(y, volatile=not train)

        h1 = self.model.activation(self.model.W1(x))
        h2 = self.model.W2(h1)

        loss = F.softmax_cross_entropy(h2, y)
        return loss, loss.creator.y

    def learn(self, x, y):
        self.optimizer.zero_grads()

        loss, y_hat = self.forward_loss(x, y, train=True)

        loss.backward()

        self.optimizer.update()

        return loss.data

    def eval(self, mb_x, mb_y):
        mb_y_hat = self.predict(mb_x)

        acc =  sklearn.metrics.accuracy_score(mb_y, mb_y_hat)
        prec = sklearn.metrics.precision_score(mb_y, mb_y_hat)
        recall = sklearn.metrics.recall_score(mb_y, mb_y_hat)

        return acc, prec, recall

    def predict(self, x):
        _, y_hat = self.forward_loss(x, np.zeros((len(x), ), dtype='int32'))

        return np.argmax(y_hat, axis=1)

    def plot_eval(self, mb_x, mb_y):
        pass
Ejemplo n.º 6
0
    classes = np.unique(t_train)  # 定義されたクラスラベル
    num_classes = len(classes)  # クラス数
    dim_features = x_train.shape[-1]  # xの次元

    # 超パラメータの定義
    learning_rate = 0.5  # learning_rate(学習率)を定義する
    max_iteration = 100      # 学習させる回数
    batch_size = 200       # ミニバッチ1つあたりのサンプル数
    dim_hidden = 200         # 隠れ層の次元数を定義する

    linear_1 = F.Linear(dim_features, dim_hidden)
    linear_2 = F.Linear(dim_hidden, num_classes)
    model = FunctionSet(linear_1=linear_1,
                        linear_2=linear_2)

    optimizer = SGD(learning_rate)
    optimizer.setup(model)

    loss_history = []
    train_accuracy_history = []
    loss_valid_history = []
    valid_accuracy_history = []

    valid_accuracy_best = 0
    valid_loss_best = 10
    num_batches = num_train / batch_size  # ミニバッチの個数
    num_valid_batches = num_valid / batch_size

    # 学習させるループ
    for epoch in range(max_iteration):
        print "epoch:", epoch
Ejemplo n.º 7
0
        y = self.h2y(h)
        return y


max_epoch = 100
hidden_size = 512  #100
bptt_length = 30
batch_size = 100
lr = 0.05  #1e-4

indices, char_to_id, id_to_char = load_shakespear()
iterator = RnnIterator(indices, batch_size)
vocab_size = len(char_to_id)
rnn = SimpleRNN(vocab_size, hidden_size, vocab_size)
model = L.Classifier(rnn)
optimizer = SGD(lr=lr)
optimizer.setup(model)


def generate_sample(n=30, init_char=' '):
    rnn.reset_state()

    s = ''
    x = np.array([char_to_id[init_char]])
    for i in range(n):
        y = rnn(x)
        m = y.data.argmax()
        c = id_to_char[m]
        s += c
        x = np.array([m])
Ejemplo n.º 8
0
 def setUp(self):
     self.optimizer = SGD(0.1)
     self.model = LinearModel(self.optimizer)
Ejemplo n.º 9
0
class LogisticRegression(object):
    """Logistic regression example in chainer.

    $$ L(x, y) = -log(softmax(Wx + b)_y) $$
    """
    def __init__(self, n_features):
        # Define what parametrized functions the model consists of.
        self.model = chainer.FunctionSet(
            W=F.Linear(n_features, 2)
        )

        # Initialize parameters randomly from gaussian.
        for param in self.model.parameters:
            param[:] = np.random.randn(*param.shape)

        # Define what update rule we will use. SGD is the simplest one,
        #  w' = w + lr * gradient_f(w)
        self.optimizer = SGD()
        self.optimizer.setup(self.model)

    def forward_loss(self, x, y, train=True):
        """Compute the loss function of the model, given the inputs x,
        and labels y.

        Args:
          :arg x Numpy array of dimensionality (batch x input)
          :arg y Numpy array of dimensionality (batch)
        """

        # Wrap the input variables into a class that takes care of remembering
        # the call chain.
        x = chainer.Variable(x, volatile=not train)  # volatile=True means the computation graph will not be
                                                     # built; but for training we need that so we set it to False
        y = chainer.Variable(y, volatile=not train)

        # Apply the functions that define the model.
        wx = self.model.W(x)  # Apply f: f(x) = Wx + b
        loss = F.softmax_cross_entropy(wx, y)  # Apply softmax and crossentropy: f(x, y) = -log(e^{x} / sum(e^{x}))_y

        return loss, loss.creator.y  # loss is an instance of chainer.Variable;
                                     # loss.creator is the computation node that produced the result;
                                     # if you look into the code, it saves softmax outputs as 'y'

    def learn(self, mb_x, mb_y):
        """Update parameters given the training data."""

        self.optimizer.zero_grads()

        # Do the forward pass.
        loss, y_hat = self.forward_loss(mb_x, mb_y, train=True)

        # Do the backward pass from loss (the Jacobian computation).
        loss.backward()

        # Update the parameters W' = W + lr * J^{W}_{loss}(W), b' = b + ...
        self.optimizer.update()

        # Return the "raw" loss (i.e. not chainer.Variable).
        return loss.data

    def eval(self, mb_x, mb_y):
        """Compute some metrics on the given minibatch.
        :param mb_x: Numpy array of float32 of dimensionality (batch x input)
        :param mb_y: Numpy array of int32 of dimensionality (batch) with the labels for each input in mb_x
        :return: Accuracy, Precision, Recall metrics
        """
        mb_y_hat = self.predict(mb_x)  # Get model's predictions about the input data.

        # Compare predictions to the true labels and compute accuracy, precision and recall.
        acc =  sklearn.metrics.accuracy_score(mb_y, mb_y_hat)
        prec = sklearn.metrics.precision_score(mb_y, mb_y_hat)
        recall = sklearn.metrics.recall_score(mb_y, mb_y_hat)

        return acc, prec, recall

    def predict(self, mb_x):
        """Predict labels for the given input minibatch.
        :param mb_x: Numpy array of float32 of dimensionality (batch x input)
        :return: Numpy array of int32 of dimensionality (batch)
        """

        _, y_hat = self.forward_loss(mb_x, np.zeros((len(mb_x), ), dtype='int32'))

        return np.argmax(y_hat, axis=1)

    def plot_eval(self, mb_x, mb_y):
        """Plot the minibatches in 2D and also the separating hyperplane."""
        import matplotlib.pyplot as plt
        import seaborn
        seaborn.set()

        x1 = mb_x[:, 0]
        x2 = mb_x[:, 1]
        y = mb_y

        dec_x1 = np.linspace(-1, 1)

        w1_m_w2 = self.model.W.W[0] - self.model.W.W[1]
        b1_m_b2 = self.model.W.b[0] - self.model.W.b[1]
        dec_x2 = - (w1_m_w2[0] / w1_m_w2[1] * dec_x1) - b1_m_b2 / w1_m_w2[1]

        plt.plot(x1[y == 0], x2[y == 0], 'o', label='Class 0', markersize=3, color='red')
        plt.plot(x1[y == 1], x2[y == 1], 'o', label='Class 1', markersize=3, color='green')
        plt.plot(dec_x1, dec_x2, '-', label='Classifier', color='blue')
        plt.legend()

        plt.show()

    def train(self, n_epochs=10, data='lin'):
        """Train the given model on the given dataset."""

        data_train, x_test, x_valid, x_train, y_test, y_valid, y_train = train._prepare_data(data)
        n_data = len(data_train)

        # Set the learning rate.
        self.optimizer.lr = 0.001  # Good learning rate is around 0.1. We use this one to show the model gradually improves with more iterations.

        n_instances = 0
        begin_t = last_print_t = time.time()

        # Run for the given number of epochs.
        for epoch in range(n_epochs):
            # For SGD it's important to randomize order in which we look at the data points.
            # So for each epoch we randomly choose the order in which we see them.
            order = range(n_data)
            np.random.shuffle(order)

            loss = 0.0
            for i in order:
                x = x_train[i:i + 1]  # We do it this way (instead of x_train[i]) so that the result is of (1 x input) dimensionalit that model.learn expects, instead of just (input).
                y = y_train[i:i + 1]

                # Ask the model to update its parameters given the current example (it uses the model.optimizer rule to update the parameters).
                curr_loss = self.learn(x, y)
                loss += 1.0 / n_data * curr_loss

                n_instances += 1

                # Print something every second so that we keep the frustration low ;)
                if time.time() - last_print_t > 1.0:
                    last_print_t = time.time()

                    a, p, r = self.eval(x_valid, y_valid)

                    #import ipdb; ipdb.set_trace()

                    print '> t(%.1f) train_loss(%.3f) examples(%d) valid{acc(%.3f) prec(%.3f) recall(%.3f)}' % (last_print_t - begin_t, loss, n_instances, a, p, r )

        # Compute the metrics and show evaluation on the test set.
        a, p, r = self.eval(x_test, y_test)
        print '# acc(%.3f) prec(%.3f) recall(%.3f)' % (a, p, r,)

        self.plot_eval(x_test, y_test)