예제 #1
0
class TwoLayerNet:
    def __init__(self, input_size: int, hidden_size: int, output_size: int):
        W1 = 0.01 * np.random.randn(input_size, hidden_size)
        b1 = np.zeros(hidden_size)
        W2 = 0.01 * np.random.randn(hidden_size, output_size)
        b2 = np.zeros(output_size)

        self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]
        self.loss_layer = SoftmaxWithLoss()

        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads

    def predict(self, x: np.ndarray) -> np.ndarray:
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def forward(self, x: np.ndarray, t: np.ndarray) -> np.ndarray:
        score = self.predict(x)
        loss = self.loss_layer.forward(score, t)
        return loss

    def backward(self, dout: int = 1) -> np.ndarray:
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout
예제 #2
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        self.in_layer0 = Matmul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        self.word_vecs = W_in

    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
예제 #3
0
class TwoLayersNet:
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size

        w1 = np.random.randn(I, H) * 0.01
        b1 = np.zeros(H)  #np.random.randn(H)
        w2 = np.random.randn(H, O) * 0.01
        b2 = np.zeros(O)  #np.random.randn(O)

        self.layers = [Affine(w1, b1), Sigmoid(), Affine(w2, b2)]
        self.loss_layer = SoftmaxWithLoss()

        self.params, self.grads = [], []
        for l in self.layers:
            self.params += l.params
            self.grads += l.grads  # 勾配まとめはこのときだけ。-> 各layerの勾配更新は参照場所を動かさないようにする。

    def predict(self, x):
        for l in self.layers:
            x = l.forward(x)
        return x

    def forward(self, x, t):
        score = self.predict(x)
        #print('t:', t)  # test
        #print('score:', score)  # test
        loss = self.loss_layer.forward(score, t)
        return loss

    def backward(self, dl):  # dl=1):
        dl = self.loss_layer.backward(dl)
        for l in self.layers[::-1]:
            dl = l.backward(dl)
예제 #4
0
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size

        # 重みとバイアスの初期化
        W1 = 0.01 * np.random.randn(I, H)
        b1 = np.random.randn(H)
        W2 = 0.01 * np.random.randn(H, O)
        b2 = np.random.randn(O)

        # レイヤの生成
        self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]
        self.loss_layer = SoftmaxWithLoss()

        # すべての重みをリストにまとめる
        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads

    def predict(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def forward(self, x, t):
        score = self.predict(x)
        loss = self.loss_layer.forward(score.t)
        return loss

    def backward(self, dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout
예제 #5
0
class SimpleSkipGram:
    def __init__(self, vocab_size, hidden_size):
        # おもみ
        w_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f')
        w_out = 0.01 * np.random.randn(hidden_size, vocab_size).astype('f')

        # layers
        self.in_layer = MatMul(w_in)
        self.out_layer = MatMul(w_out)

        # おもみ、勾配 まとめ
        layers = [self.in_layer, self.out_layer]
        self.params, self.grads = [], []
        for l in layers:
            self.params += l.params
            self.grads += l.grads

        # loss
        self.loss_layer0 = SoftmaxWithLoss()
        self.loss_layer1 = SoftmaxWithLoss()
        # 極限でコンテキストの各単語の確率 0.5。
        # softmax->*2->lossでも一律ln2引くようだから同じたぶん。

        # 単語の分散表現
        self.word_vecs = w_in

    def forward(self, contexts, target):
        h = self.in_layer.forward(target)
        s = self.out_layer.forward(h)
        l0 = self.loss_layer0.forward(s, contexts[:, 0])
        l1 = self.loss_layer1.forward(s, contexts[:, 1])
        loss = l0 + l1
        return loss

    def backward(self, dl=1):
        ds0 = self.loss_layer0.backward(dl)
        ds1 = self.loss_layer1.backward(dl)
        ds = ds0 + ds1
        dh = self.out_layer.backward(ds)
        self.in_layer.backward(dh)
예제 #6
0
class TwoLayersNet():
    def __init__(self,
                 input_size,
                 hidden_size,
                 output_size,
                 weight_init_std=0.01):
        self.params = {}
        self.params['w1'] = np.random.randn(input_size,
                                            hidden_size) / weight_init_std
        self.params['b1'] = np.zeros(hidden_size)
        self.params['w2'] = np.random.randn(hidden_size,
                                            output_size) / weight_init_std
        self.params['b2'] = np.zeros(output_size)

        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['w1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['w2'], self.params['b2'])
        self.lastlayer = SoftmaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def loss(self, x, t):
        y = self.predict(x)
        return self.lastlayer.forward(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        return np.sum(y == t) / y.shape

    def gradient(self, x, t):
        dout = 1
        self.loss(x, t)
        dout = self.lastlayer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        grads = {}
        grads['w1'] = self.layers['Affine1'].dw
        grads['b1'] = self.layers['Affine1'].db
        grads['w2'] = self.layers['Affine2'].dw
        grads['b2'] = self.layers['Affine2'].db
        return grads
예제 #7
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        # おもみ
        w_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f')
        w_out = 0.01 * np.random.randn(hidden_size, vocab_size).astype('f')

        # layers
        self.in_layer0 = MatMul(w_in)
        self.in_layer1 = MatMul(w_in)
        self.out_layer = MatMul(w_out)

        # おもみ、勾配 まとめ
        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for l in layers:
            self.params += l.params
            self.grads += l.grads

        # loss
        self.loss_layer = SoftmaxWithLoss()

        # 単語の分散表現
        self.word_vecs = w_in

    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dl=1):
        ds = self.loss_layer.backward(dl)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer0.backward(da)
        self.in_layer1.backward(da)
예제 #8
0
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size

        # initialize weight and bias
        W1 = 0.01 * cp.random.randn(I, H)
        b1 = cp.zeros(H)
        W2 = 0.01 * cp.random.randn(H, O)
        b2 = cp.zeros(O)

        # create layer
        self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]
        self.loss_layer = SoftmaxWithLoss()

        # combine all weight and grads into list
        self.params, self.grads = [], []

        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads

    def predict(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def forward(self, x, t):
        score = self.predict(x)
        loss = self.loss_layer.forward(score, t)
        return loss

    def backward(self, dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout
예제 #9
0
class SimpleCBOW:
    def __init__(self, vocab_size: int, hidden_size: int) -> None:
        W_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype(float)
        W_out = 0.01 * np.random.randn(hidden_size, vocab_size).astype(float)

        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        layers = [
            self.in_layer0, self.in_layer1, self.out_layer, self.loss_layer
        ]
        self.params = []
        self.grads = []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        self.word_vecs = W_in

    def forward(self, contexts: np.ndarray, target: np.ndarray) -> float:
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout: int = 1) -> None:
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
class TwoLayerNet:
    def __init__(self,
                 input_size,
                 hidden_size,
                 output_size,
                 weight_init_std=0.01):
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(
            input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(
            hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        # f = open("./db/param_result/784x50x10-0.99162.json", 'r')
        # self.params = json.load(f)
        # f.close()
        # for key in ('W1', 'b1', 'W2', 'b2'):
        #     self.params[key] = np.array(self.params[key])

        # 创建各层的对象
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        self.last_layer = SoftmaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)

        return x

    def loss(self, x, t):
        y = self.predict(x)
        return self.last_layer.forward(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1:
            t = np.argmax(t, axis=1)
        acc = np.sum(y == t) / float(x.shape[0])
        return acc

    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)

        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        return grads

    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)

        layers_list = list(self.layers.values())
        layers_list.reverse()
        for layer in layers_list:
            dout = layer.backward(dout)

        grads = {}
        grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers[
            'Affine1'].db
        grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers[
            'Affine2'].db

        return grads
예제 #11
0
class MultiLayerNet:
    """
    多层神经网络
    """
    def __init__(self,
                 input_size,
                 hidden_size_list,
                 output_size,
                 activation='relu',
                 weight_init_std='relu',
                 weight_decay_lambda=0,
                 use_dropout=False,
                 dropout_ration=0.5,
                 use_batchnorm=False):
        """
        :param input_size: 输入层大小
        :param hidden_size_list: 隐藏层神经元数量的列表 (e.g. [50, 50, 50])
        :param output_size: 输出层大小
        :param activation: 激活函数 ('relu' or 'sigmod')
        :param weight_init_std: 指定权重标准差 (e.g. 0.01)
                指定 'relu' 或 'he': 使用 "He 初始值"
                指定 'sigmoid' 或 'xavier: 使用 "Xavier 初始值"
        :param weight_decay_lambda: Weight Decay (L2 范数) 的强度
        :param use_dropout: 是否使用 Dropout
        :param dropout_ration: Dropout 比例
        :param use_batchnorm: 是否使用 Batch Normalization
        """
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size_list = hidden_size_list
        self.hidden_layer_num = len(hidden_size_list)
        self.activation = activation
        self.weight_decay_lambda = weight_decay_lambda
        self.use_dropout = use_dropout
        self.use_batchnorm = use_batchnorm
        self.dropout_ration = dropout_ration
        self.weight_init_std = weight_init_std
        self.params = {}

        # 初始化权重
        self.__init_weight()

        # 初始化层
        self.__init_layer()

    def __init_layer(self):
        activation_layer = {'sigmoid': Sigmoid, 'relu': Relu}
        self.layers = OrderedDict()
        for idx in range(1, self.hidden_layer_num + 1):
            self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)])
            if self.use_batchnorm:
                self.params['gamma' + str(idx)] = np.ones(self.hidden_size_list[idx - 1])
                self.params['beta' + str(idx)] = np.zeros(self.hidden_size_list[idx - 1])
                self.layers['BatchNorm' + str(idx)] = BatchNormalization(self.params['gamma' + str(idx)],
                                                                         self.params['beta' + str(idx)])

            self.layers['activation_function' + str(idx)] = activation_layer[self.activation]()

            if self.use_dropout:
                self.layers['Dropout' + str(idx)] = Dropout(self.dropout_ration)

        idx += 1
        self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)])
        self.last_layer = SoftmaxWithLoss()

    def __init_weight(self):
        all_size_list = [self.input_size] + self.hidden_size_list + [self.output_size]
        for idx in range(1, len(all_size_list)):
            scale = self.weight_init_std
            if str(self.weight_init_std).lower() in ('relu', 'he'):
                scale = np.sqrt(2.0 / all_size_list[idx - 1])
            elif str(self.weight_init_std).lower() in ('sigmoid', 'xavier'):
                scale = np.sqrt(1.0 / all_size_list[idx - 1])

            self.params['W' + str(idx)] = scale * np.random.randn(all_size_list[idx - 1], all_size_list[idx])
            self.params['b' + str(idx)] = np.zeros(all_size_list[idx])

    def predict(self, x, train_flag=False):
        for key, layer in self.layers.items():
            if "Dropout" in key or "BatchNorm" in key:  # Dropout[0-9]* or BatchNorm[0-9]*
                x = layer.forward(x, train_flag)
            else:
                x = layer.forward(x)
        return x

    def loss(self, x, t, train_flag=False):
        y = self.predict(x, train_flag)

        weight_decay = 0
        for idx in range(1, self.hidden_layer_num + 2):
            W = self.params['W' + str(idx)]
            weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)

        return self.last_layer.forward(y, t) + weight_decay

    def accuracy(self, x, t):
        y = self.predict(x, train_flag=False)
        y = np.argmax(y, axis=1)
        if t.ndim != 1:
            t = np.argmax(t, axis=1)
        acc = np.sum(y == t) / float(x.shape[0])
        return acc

    def gradient(self, x, t):
        # forward
        self.loss(x, t, train_flag=True)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)

        layers_list = list(self.layers.values())
        layers_list.reverse()
        for layer in layers_list:
            dout = layer.backward(dout)

        grads = {}
        for idx in range(1, self.hidden_layer_num + 2):
            grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW + self.weight_decay_lambda * self.params['W' + str(idx)]
            grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db

            if self.use_batchnorm and idx != self.hidden_layer_num + 1:
                grads['gamma' + str(idx)] = self.layers['BatchNorm' + str(idx)].dgamma
                grads['beta' + str(idx)] = self.layers['BatchNorm' + str(idx)].dbeta

        return grads

    def set_dropout(self, flag):
        self.use_dropout = flag
        self.__init_weight()
        self.__init_layer()
예제 #12
0
class MulLayerNet:
    """
    Parameters
    ----------
    input_size : 输入大小(MNIST的情况下为784)
    hidden_size_list : 隐藏层的神经元数量的列表(e.g. [100, 100, 100])
    output_size : 输出大小(MNIST的情况下为10)
    activation : 'relu' or 'sigmoid'
    weight_init_std : 指定权重的标准差(e.g. 0.01)
        指定'relu'或'he'的情况下设定“He的初始值”
        指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值”
    weight_decay_lambda : Weight Decay(L2范数)的强度
    """
    def __init__(self,
                 input_size,
                 output_size,
                 hidden_size_list,
                 activation='relu',
                 weight_init_std='relu',
                 weight_decay_lambda=0):
        self.input_size = input_size
        self.hidden_size_list = hidden_size_list
        self.output_size = output_size
        self.activation = activation
        self.weight_init_std = weight_init_std
        self.weight_decay_lambda = weight_decay_lambda

        self._init_weight()
        self._init_layers()

    def _init_weight(self):
        self.params = {}
        all_layers = [self.input_size
                      ] + self.hidden_size_list + [self.output_size]
        weight_init_std = self.weight_init_std
        for i in range(1, len(self.hidden_size_list) + 2):
            if weight_init_std == 'relu' or weight_init_std == 'he':
                scalar = np.sqrt(2 / all_layers[i - 1])
            elif weight_init_std == 'sigmoid' or weight_init_std == 'xavier':
                scalar = np.sqrt(1 / all_layers[i - 1])
            else:
                scalar = weight_init_std

            self.params['W%d' % i] = np.random.randn(all_layers[i - 1],
                                                     all_layers[i]) * scalar
            self.params['b%d' % i] = np.zeros(all_layers[i], dtype='float')

    def _init_layers(self):
        self.layers = OrderedDict()
        all_layers = [self.input_size
                      ] + self.hidden_size_list + [self.output_size]
        activation_dict = {'relu': Relu, 'sigmoid': Sigmoid}
        for i in range(1, len(self.hidden_size_list) + 2):
            self.layers['Affine%d' % i] = Affine(self.params['W%d' % i],
                                                 self.params['b%d' % i])
            self.layers['Activation%d' %
                        i] = activation_dict[self.activation]()

        self.last_layers = SoftmaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def loss(self, x, t):
        y = self.predict(x)
        weight_decay = 0

        for param_index in range(1, len(self.hidden_size_list) + 2):
            param = self.params['W%d' % param_index]
            weight_decay += .5 * self.weight_decay_lambda * np.sum(param**2)

        return self.last_layers.forward(y, t) + weight_decay

    def accuracy(self, x, t):
        p = self.predict(x)
        y = np.argmax(p, axis=1)

        if t.ndim != 1:
            t = np.argmax(t, axis=1)
        return np.sum(y == t, dtype='float') / float(y.shape[0])

    '''
    求损失函数关于参数的梯度
    '''

    def gradient(self, x, t):
        self.loss(x, t)
        dout = 1.
        dout = self.last_layers.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()

        for layer in layers:
            dout = layer.backward(dout)

        grads = {}
        for i in range(1, len(self.hidden_size_list) + 2):
            grads['W%d' % i] = self.layers[
                'Affine%d' %
                i].dW + self.weight_decay_lambda * self.layers['Affine%d' %
                                                               i].W
            grads['b%d' % i] = self.layers['Affine%d' % i].db
        return grads
예제 #13
0
class DeepConvNet:
    def __init__(self,
                 input_dim,
                 conv_params=[
                     {
                         'filter_num': 32,
                         'filter_size': 9,
                         'pad': 0,
                         'stride': 3
                     },
                     {
                         'filter_num': 64,
                         'filter_size': 5,
                         'pad': 2,
                         'stride': 1
                     },
                     {
                         'filter_num': 128,
                         'filter_size': 7,
                         'pad': 0,
                         'stride': 1
                     },
                 ],
                 hidden_size=128,
                 dropout_ratio=[0.2, 0.5],
                 output_size=5):
        self.params = {}
        self.layers = {}
        pre_shape = input_dim
        for idx, conv_param in enumerate(conv_params):
            # init parameters
            self.params['W' + str(idx + 1)] = init_he(pre_shape[0] * conv_param['filter_size']**2) *\
                np.random.randn(
                    conv_param['filter_num'],
                    pre_shape[0],
                    conv_param['filter_size'],
                    conv_param['filter_size'])
            self.params['b' + str(idx + 1)] = np.zeros(
                conv_param['filter_num'])

            # set layers
            self.layers['Conv' + str(idx + 1)] = Convolution(
                self.params['W' + str(idx + 1)],
                self.params['b' + str(idx + 1)], conv_param['stride'],
                conv_param['pad'])
            self.layers['Relu' + str(idx + 1)] = Relu()

            # calc output image size of conv layers
            pre_shape = self.layers['Conv' +
                                    str(idx + 1)].output_size(pre_shape)

        idx = len(conv_params)

        # init parameters and set layers Affine
        self.params['W' + str(idx + 1)] = init_he(pre_shape[0] * pre_shape[1]**2) *\
            np.random.randn(pre_shape[0] * pre_shape[1]**2, hidden_size)
        self.params['b' + str(idx + 1)] = np.zeros(hidden_size)
        self.layers['Affine' + str(idx + 1)] = Affine(
            self.params['W' + str(idx + 1)], self.params['b' + str(idx + 1)])
        self.layers['Relu' + str(idx + 1)] = Relu()
        idx += 1

        # init parameters and set layers output
        self.params['W' +
                    str(idx + 1)] = init_he(hidden_size) * np.random.randn(
                        hidden_size, output_size)
        self.params['b' + str(idx + 1)] = np.zeros(output_size)
        self.layers['Affine' + str(idx + 1)] = Affine(
            self.params['W' + str(idx + 1)], self.params['b' + str(idx + 1)])

        # set loss function layer
        self.loss_layer = SoftmaxWithLoss()

    def predict(self, x, train_flg=False):
        for layer in self.layers.values():
            if isinstance(layer, Dropout):
                x = layer.forward(x, train_flg)
            else:
                x = layer.forward(x)
        return x

    def loss(self, x, t):
        y = self.predict(x, train_flg=True)
        return self.loss_layer.forward(y, t)

    def accuracy(self, x, t, batch_size=100):
        if t.ndim != 1:
            t = np.argmax(t, axis=1)

        acc = 0.0

        for i in range(int(x.shape[0] / batch_size)):
            tx = x[i * batch_size:(i + 1) * batch_size]
            tt = t[i * batch_size:(i + 1) * batch_size]
            y = self.predict(tx, train_flg=False)
            y = np.argmax(y, axis=1)
            acc += np.sum(y == tt)

        return acc / x.shape[0]

    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.loss_layer.backward(dout)

        tmp_layers = list(self.layers.values())
        tmp_layers.reverse()
        for layer in tmp_layers:
            dout = layer.backward(dout)

        # setting
        grads = {}
        for i, layer_name in enumerate(self.get_layer_names()):
            grads['W' + str(i + 1)] = self.layers[layer_name].dW
            grads['b' + str(i + 1)] = self.layers[layer_name].db

        return grads

    def save_params(self, file_name="params.pkl"):
        params = {}
        for key, val in self.params.items():
            params[key] = val
        with open(file_name, 'wb') as f:
            pickle.dump(params, f)

    def load_params(self, file_name="params.pkl"):
        with open(file_name, 'rb') as f:
            params = pickle.load(f)
        for key, val in params.items():
            self.params[key] = val

        for i, layer_name in enumerate(self.get_layer_names()):
            self.layers[layer_name].W = self.params['W' + str(i + 1)]
            self.layers[layer_name].b = self.params['b' + str(i + 1)]

    def get_layer_names(self):
        lst = []
        for layer_name in self.layers.keys():
            if 'Conv' in layer_name or 'Affine' in layer_name:
                lst.append(layer_name)

        return np.array(lst)
예제 #14
0
class TwoLayerNet:
	def __init__(self, input_size, hidden_size, output_size,
		weight_init_std=0.01):
		# Initialize weights.
		self.params = {}
		self.params['W1'] = weight_init_std * \
												np.random.randn(input_size, hidden_size)
		self.params['b1'] = np.zeros(hidden_size)
		self.params['W2'] = weight_init_std * \
												np.random.randn(hidden_size, output_size)
		self.params['b2'] = np.zeros(output_size)

		# Generate layers.
		self.layers = OrderedDict() # Ordered dictionary
		self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
		self.layers['Relu'] = Relu()
		self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])

		self.lastLayer = SoftmaxWithLoss()

	# Forward propagation.
	def predict(self, x):
		for layer in self.layers.values():
			x = layer.forward(x)
		
		return x

	def loss(self, x, t):
		y = self.predict(x)

		return self.lastLayer.forward(y, t)

	# Calculate accurary.
	def accuracy(self, x, t):
		y = self.predict(x)
		y = np.argmax(y, axis=1)
		t = np.argmax(t, axis=1)
		accuracy = np.sum(y == t) / float(x.shape[0])

		return accuracy

	# # Numerical method to calculate gradient.
	# def numerical_gradient(self, x, t):
	# 	loss_W = lambda W: self.loss(x, t)

	# 	grads = {}
	# 	grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
	# 	grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
	# 	grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
	# 	grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

	# 	return grads

	# BP method to calculate gradient.
	def gradient(self, x, t):
		# FP.
		self.loss(x, t)

		# BP.
		dout = 1
		dout = self.lastLayer.backward(dout)

		# Reverse the order of elements in list layers.
		layers = list(self.layers.values())
		layers.reverse()

		for layer in layers:
			dout = layer.backward(dout)

		# Settings.
		grads = {}
		grads['W1'] = self.layers['Affine1'].dW
		grads['b1'] = self.layers['Affine1'].db
		grads['W2'] = self.layers['Affine2'].dW
		grads['b2'] = self.layers['Affine2'].db

		return grads
예제 #15
0
    sys.path.append(os.path.join(Path(os.getcwd()).parent, 'lib'))
    from layers import SoftmaxWithLoss
    from common import softmax
    import twolayernet as network
except ImportError:
    print('Library Module Can Not Found')

# 1. load training/test data
_x, _t = np.array([2.6, 3.9, 5.6]), np.array([0, 0, 1])

# 2. hyperparameter

# 3. initialize layer
layer = SoftmaxWithLoss()

# Test
loss = layer.forward(_x, _t)
dout = layer.backward(1)
print(loss, dout)

# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


def forward_propagation(x):
    y = softmax(x)
    return y


network.forward_progation = forward_propagation
loss = network.loss(_x, _t)
print(loss)
class SimpleConvNet:
    """简单ConvNet

    conv - relu - pool - affine - relu - affine - softmax
    
    """
    def __init__(self,
                 input_dim=(1, 28, 28),
                 conv_param={
                     'filter_num': 30,
                     'filter_size': 5,
                     'pad': 0,
                     'stride': 1
                 },
                 hidden_size=100,
                 output_size=10,
                 weight_init_std=0.01):
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1]
        conv_output_size = (input_size - filter_size +
                            2 * filter_pad) / filter_stride + 1
        pool_output_size = int(filter_num * (conv_output_size / 2) *
                               (conv_output_size / 2))

        # 权重初始化
        self.params = {}
        self.params['W1'] = weight_init_std * \
                            np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
        self.params['b1'] = np.zeros(filter_num)
        self.params['W2'] = weight_init_std * \
                            np.random.randn(pool_output_size, hidden_size)
        self.params['b2'] = np.zeros(hidden_size)
        self.params['W3'] = weight_init_std * \
                            np.random.randn(hidden_size, output_size)
        self.params['b3'] = np.zeros(output_size)

        # 层生成
        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(self.params['W1'],
                                           self.params['b1'],
                                           conv_param['stride'],
                                           conv_param['pad'])
        self.layers['Relu1'] = Relu()
        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['Relu2'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])

        self.last_layer = SoftmaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)

        return x

    def loss(self, x, t):

        y = self.predict(x)
        return self.last_layer.forward(y, t)

    def accuracy(self, x, t, batch_size=100):
        if t.ndim != 1:
            t = np.argmax(t, axis=1)

        acc = 0.0

        for i in range(int(x.shape[0] / batch_size)):
            tx = x[i * batch_size:(i + 1) * batch_size]
            tt = t[i * batch_size:(i + 1) * batch_size]
            y = self.predict(tx)
            y = np.argmax(y, axis=1)
            acc += np.sum(y == tt)

        return acc / x.shape[0]

    def numerical_gradient(self, x, t):
        loss_w = lambda w: self.loss(x, t)

        grads = {}
        for idx in (1, 2, 3):
            grads['W' + str(idx)] = numerical_gradient(
                loss_w, self.params['W' + str(idx)])
            grads['b' + str(idx)] = numerical_gradient(
                loss_w, self.params['b' + str(idx)])

        return grads

    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # 設定
        grads = {}
        grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers[
            'Conv1'].db
        grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers[
            'Affine1'].db
        grads['W3'], grads['b3'] = self.layers['Affine2'].dW, self.layers[
            'Affine2'].db

        return grads

    def save_params(self, file_name="params.pkl"):
        params = {}
        for key, val in self.params.items():
            params[key] = val
        with open(file_name, 'wb') as f:
            pickle.dump(params, f)

    def load_params(self, file_name="params.pkl"):
        with open(file_name, 'rb') as f:
            params = pickle.load(f)
        for key, val in params.items():
            self.params[key] = val

        for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']):
            self.layers[key].W = self.params['W' + str(i + 1)]
            self.layers[key].b = self.params['b' + str(i + 1)]
예제 #17
0
class TwoLayerNet(object):
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        # 初始化权重
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        # self.params['W1'] = np.ones((input_size, hidden_size))
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) 
        # self.params['W2'] = np.ones((hidden_size, output_size))
        self.params['b2'] = np.zeros(output_size)
        
        # 生成层
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        
        self.lastLayer = SoftmaxWithLoss()
        
    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x
    
    # x: 输入数据,t: 标签
    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1:
            t = np.argmax(t, axis=1)
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
    
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads
    
    def gradient(self, x, t):
        # forward
        self.loss(x, t)
        
        # backward
        dout = 1
        dout = self.lastLayer.backward(dout)
        
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
            
        # 设定
        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db
        
        return grads
class MultiLayerNet:
    """

    Parameters
    ----------
    input_size : 
    hidden_size_list : (e.g. [100, 100, 100])
    output_size : 
    activation : 'relu' or 'sigmoid'
    weight_init_std : 权重初始化标准差(e.g. 0.01)
                      'relu'「He的初始化」
                      'sigmoid'「Xavier的初始化」
    weight_decay_lambda : Weight Decay(L2 Norm)
    use_dropout: 是否使用Dropout层
    drop_out_ratio: dropout的比例
    use_batchnorm: 是否使用BatchNormalization层
    """
    def __init__(self,
                 input_size,
                 hidden_size_list,
                 output_size,
                 activation='relu',
                 weight_init_std='relu',
                 weight_decay_lambda=0,
                 use_dropout=False,
                 dropout_ratio=0.5,
                 use_batchnorm=False):
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size_list = hidden_size_list
        self.hidden_layer_num = len(hidden_size_list)
        self.weight_decay_lambda = weight_decay_lambda
        self.use_dropout = use_dropout
        self.use_batchnorm = use_batchnorm
        self.params = {}

        # 权重初始化
        self.__init_weight(weight_init_std)

        # 生成层
        activation_layer = {'sigmoid': Sigmoid, 'relu': Relu}
        self.layers = OrderedDict()
        for idx in range(1, self.hidden_layer_num + 1):
            self.layers['Affine' + str(idx)] = Affine(
                self.params['W' + str(idx)], self.params['b' + str(idx)])
            if self.use_batchnorm:
                self.params['gamma' + str(idx)] = np.ones(
                    hidden_size_list[idx - 1])
                self.params['beta' + str(idx)] = np.zeros(
                    hidden_size_list[idx - 1])
                self.layers['BatchNorm' + str(idx)] = BatchNormalization(
                    self.params['gamma' + str(idx)],
                    self.params['beta' + str(idx)])

            self.layers['Activation_function' +
                        str(idx)] = activation_layer[activation]()

            if self.use_dropout:
                self.layers['Dropout' + str(idx)] = Dropout(dropout_ratio)

        idx = self.hidden_layer_num + 1
        self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)],
                                                  self.params['b' + str(idx)])

        self.last_layer = SoftmaxWithLoss()

    def __init_weight(self, weight_init_std):
        """权重初始值设定

        Parameters
        ----------
        weight_init_std : 权重初始化标准差(e.g. 0.01)
                          'relu'「He的初始化」    np.sqrt(2/n)
                          'sigmoid'「Xavier的初始化」  np.sqrt(1/n)
        """
        all_size_list = [self.input_size
                         ] + self.hidden_size_list + [self.output_size]
        for idx in range(1, len(all_size_list)):
            scale = weight_init_std
            if str(weight_init_std).lower() in ('relu', 'he'):
                scale = np.sqrt(2.0 / all_size_list[idx - 1])  # ReLU使用的随机化标准差
            elif str(weight_init_std).lower() in ('sigmoid', 'xavier'):
                scale = np.sqrt(1.0 /
                                all_size_list[idx - 1])  # Sigmoid使用的随机化标准差

            self.params['W' + str(idx)] = scale * np.random.randn(
                all_size_list[idx - 1], all_size_list[idx])
            self.params['b' + str(idx)] = np.zeros(all_size_list[idx])

    def predict(self, x, train_flg=False):
        for key, layer in self.layers.items():
            if "Dropout" in key or "BatchNorm" in key:
                x = layer.forward(x, train_flg)
            else:
                x = layer.forward(x)

        return x

    def loss(self, x, t, train_flg=False):

        y = self.predict(x, train_flg)

        weight_decay = 0
        for idx in range(1, self.hidden_layer_num + 2):
            W = self.params['W' + str(idx)]
            weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)

        return self.last_layer.forward(y, t) + weight_decay

    def accuracy(self, x, t):
        y = self.predict(x, train_flg=False)
        y = np.argmax(y, axis=1)
        if t.ndim != 1:
            t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    def numerical_gradient(self, x, t):

        loss_W = lambda W: self.loss(x, t, train_flg=True)

        grads = {}
        for idx in range(1, self.hidden_layer_num + 2):
            grads['W' + str(idx)] = numerical_gradient(
                loss_W, self.params['W' + str(idx)])
            grads['b' + str(idx)] = numerical_gradient(
                loss_W, self.params['b' + str(idx)])

            if self.use_batchnorm and idx != self.hidden_layer_num + 1:
                grads['gamma' + str(idx)] = numerical_gradient(
                    loss_W, self.params['gamma' + str(idx)])
                grads['beta' + str(idx)] = numerical_gradient(
                    loss_W, self.params['beta' + str(idx)])

        return grads

    def gradient(self, x, t):
        # forward
        self.loss(x, t, train_flg=True)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        grads = {}
        for idx in range(1, self.hidden_layer_num + 2):
            grads['W' + str(idx)] = self.layers['Affine' + str(
                idx)].dW + self.weight_decay_lambda * self.layers['Affine' +
                                                                  str(idx)].W
            grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db

            if self.use_batchnorm and idx != self.hidden_layer_num + 1:
                grads['gamma' + str(idx)] = self.layers['BatchNorm' +
                                                        str(idx)].dgamma
                grads['beta' + str(idx)] = self.layers['BatchNorm' +
                                                       str(idx)].dbeta

        return grads


#     """拡張版の全結合による多層ニューラルネットワーク

#     Weiht Decay、Dropout、Batch Normalizationの機能を持つ

#     Parameters
#     ----------
#     input_size : 入力サイズ(MNISTの場合は784)
#     hidden_size_list : 隠れ層のニューロンの数のリスト(e.g. [100, 100, 100])
#     output_size : 出力サイズ(MNISTの場合は10)
#     activation : 'relu' or 'sigmoid'
#     weight_init_std : 重みの標準偏差を指定(e.g. 0.01)
#         'relu'または'he'を指定した場合は「Heの初期値」を設定
#         'sigmoid'または'xavier'を指定した場合は「Xavierの初期値」を設定
#     weight_decay_lambda : Weight Decay(L2ノルム)の強さ
#     use_dropout: Dropoutを使用するかどうか
#     dropout_ration : Dropoutの割り合い
#     use_batchNorm: Batch Normalizationを使用するかどうか
#     """
#     def __init__(self, input_size, hidden_size_list, output_size,
#                  activation='relu', weight_init_std='relu', weight_decay_lambda=0,
#                  use_dropout = False, dropout_ration = 0.5, use_batchnorm=False):
#         self.input_size = input_size
#         self.output_size = output_size
#         self.hidden_size_list = hidden_size_list
#         self.hidden_layer_num = len(hidden_size_list)
#         self.use_dropout = use_dropout
#         self.weight_decay_lambda = weight_decay_lambda
#         self.use_batchnorm = use_batchnorm
#         self.params = {}

#         # 重みの初期化
#         self.__init_weight(weight_init_std)

#         # レイヤの生成
#         activation_layer = {'sigmoid': Sigmoid, 'relu': Relu}
#         self.layers = OrderedDict()
#         for idx in range(1, self.hidden_layer_num+1):
#             self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)],
#                                                       self.params['b' + str(idx)])
#             if self.use_batchnorm:
#                 self.params['gamma' + str(idx)] = np.ones(hidden_size_list[idx-1])
#                 self.params['beta' + str(idx)] = np.zeros(hidden_size_list[idx-1])
#                 self.layers['BatchNorm' + str(idx)] = BatchNormalization(self.params['gamma' + str(idx)], self.params['beta' + str(idx)])

#             self.layers['Activation_function' + str(idx)] = activation_layer[activation]()

#             if self.use_dropout:
#                 self.layers['Dropout' + str(idx)] = Dropout(dropout_ration)

#         idx = self.hidden_layer_num + 1
#         self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)])

#         self.last_layer = SoftmaxWithLoss()

#     def __init_weight(self, weight_init_std):
#         """重みの初期値設定

#         Parameters
#         ----------
#         weight_init_std : 重みの標準偏差を指定(e.g. 0.01)
#             'relu'または'he'を指定した場合は「Heの初期値」を設定
#             'sigmoid'または'xavier'を指定した場合は「Xavierの初期値」を設定
#         """
#         all_size_list = [self.input_size] + self.hidden_size_list + [self.output_size]
#         for idx in range(1, len(all_size_list)):
#             scale = weight_init_std
#             if str(weight_init_std).lower() in ('relu', 'he'):
#                 scale = np.sqrt(2.0 / all_size_list[idx - 1])  # ReLUを使う場合に推奨される初期値
#             elif str(weight_init_std).lower() in ('sigmoid', 'xavier'):
#                 scale = np.sqrt(1.0 / all_size_list[idx - 1])  # sigmoidを使う場合に推奨される初期値
#             self.params['W' + str(idx)] = scale * np.random.randn(all_size_list[idx-1], all_size_list[idx])
#             self.params['b' + str(idx)] = np.zeros(all_size_list[idx])

#     def predict(self, x, train_flg=False):
#         for key, layer in self.layers.items():
#             if "Dropout" in key or "BatchNorm" in key:
#                 x = layer.forward(x, train_flg)
#             else:
#                 x = layer.forward(x)

#         return x

#     def loss(self, x, t, train_flg=False):
#         """損失関数を求める
#         引数のxは入力データ、tは教師ラベル
#         """
#         y = self.predict(x, train_flg)

#         weight_decay = 0
#         for idx in range(1, self.hidden_layer_num + 2):
#             W = self.params['W' + str(idx)]
#             weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)

#         return self.last_layer.forward(y, t) + weight_decay

#     def accuracy(self, X, T):
#         Y = self.predict(X, train_flg=False)
#         Y = np.argmax(Y, axis=1)
#         if T.ndim != 1 : T = np.argmax(T, axis=1)

#         accuracy = np.sum(Y == T) / float(X.shape[0])
#         return accuracy

#     def numerical_gradient(self, X, T):
#         """勾配を求める(数値微分)

#         Parameters
#         ----------
#         X : 入力データ
#         T : 教師ラベル

#         Returns
#         -------
#         各層の勾配を持ったディクショナリ変数
#             grads['W1']、grads['W2']、...は各層の重み
#             grads['b1']、grads['b2']、...は各層のバイアス
#         """
#         loss_W = lambda W: self.loss(X, T, train_flg=True)

#         grads = {}
#         for idx in range(1, self.hidden_layer_num+2):
#             grads['W' + str(idx)] = numerical_gradient(loss_W, self.params['W' + str(idx)])
#             grads['b' + str(idx)] = numerical_gradient(loss_W, self.params['b' + str(idx)])

#             if self.use_batchnorm and idx != self.hidden_layer_num+1:
#                 grads['gamma' + str(idx)] = numerical_gradient(loss_W, self.params['gamma' + str(idx)])
#                 grads['beta' + str(idx)] = numerical_gradient(loss_W, self.params['beta' + str(idx)])

#         return grads

#     def gradient(self, x, t):
#         # forward
#         self.loss(x, t, train_flg=True)

#         # backward
#         dout = 1
#         dout = self.last_layer.backward(dout)

#         layers = list(self.layers.values())
#         layers.reverse()
#         for layer in layers:
#             dout = layer.backward(dout)

#         # 設定
#         grads = {}
#         for idx in range(1, self.hidden_layer_num+2):
#             grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW + self.weight_decay_lambda * self.params['W' + str(idx)]
#             grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db

#             if self.use_batchnorm and idx != self.hidden_layer_num+1:
#                 grads['gamma' + str(idx)] = self.layers['BatchNorm' + str(idx)].dgamma
#                 grads['beta' + str(idx)] = self.layers['BatchNorm' + str(idx)].dbeta

#         return grads
예제 #19
0
class TwoLayerNet(DeepLearn):
    def __init__(self, hidden_size, weight_init_std=0.01):
        super().__init__()
        self.params = dict()
        self.params['W1'] = weight_init_std * np.random.randn(
            self.x_train.shape[1], hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(
            hidden_size, self.t_train.shape[1])
        self.params['b2'] = np.zeros(self.t_train.shape[1])

        # 生成层
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        self.last_layer = SoftmaxWithLoss()

    def predict(self, x):
        # w1, w2 = self.params['W1'], self.params['W2']
        # b1, b2 = self.params['b1'], self.params['b2']
        #
        # a1 = np.dot(x, w1) + b1
        # z1 = fun.sigmoid(a1)
        # a2 = np.dot(z1, w2) + b2
        # return fun.softmax(a2)

        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def cross_entropy_loss(self, x, t):
        y = self.predict(x)
        return self.last_layer.forward(y, t)

    def numerical_gradient(self, x, t):
        """数值微分求梯度,速度慢,有误差,但实现简单"""
        loss_w = lambda _: self.cross_entropy_loss(x, t)

        grads = dict()
        grads['W1'] = fun.numerical_gradient(loss_w, self.params['W1'])
        grads['b1'] = fun.numerical_gradient(loss_w, self.params['b1'])
        grads['W2'] = fun.numerical_gradient(loss_w, self.params['W2'])
        grads['b2'] = fun.numerical_gradient(loss_w, self.params['b2'])

        return grads

    def gradient(self, x, t):
        """误差反向传播求梯度,用的是解析式求微分,速度快"""
        # forward
        self.cross_entropy_loss(x, t)

        # backward
        dout = self.last_layer.backward()

        layers = list(self.layers.values())
        layers.reverse()

        for layer in layers:
            dout = layer.backward(dout)

        grads = dict()
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db
        return grads

    def test(self):
        self.train_acc_list.append(self.accuracy(self.x_train, self.t_train))
        self.test_acc_list.append(self.accuracy(self.x_test, self.t_test))

    def start(self,
              iters_num=10000,
              batch_size=100,
              learning_rate=0.1,
              epoch=0,
              record=False,
              numerical=False):
        for i in range(iters_num):
            print('learn:', i)
            batch_mask = np.random.choice(self.x_train.shape[0], batch_size)
            x_batch = self.x_train[batch_mask]
            t_batch = self.t_train[batch_mask]

            print('开始计算梯度')
            if numerical:
                grad = self.numerical_gradient(x_batch, t_batch)
            else:
                grad = self.gradient(x_batch, t_batch)

            print('重新调整权重系数')
            for k in ('W1', 'b1', 'W2', 'b2'):
                self.params[k] -= learning_rate * grad[k]

            if epoch and (i + 1) % epoch == 0:
                self.test()
            if record:
                loss = self.cross_entropy_loss(x_batch, t_batch)
                yield loss
예제 #20
0
class SimpleConvNet:
    def __init__(self,
                 input_dim=(1, 28, 28),  # (C, W, H)
                 filter_num=30,
                 filter_size=5,
                 filter_pad=0,
                 filter_stride=1,
                 hidden_size=100,
                 output_size=10,
                 weight_init_std=0.01
                 ):
        # input(N, C, W, H)
        # -> Conv(N, FN, conv_out_h, conv_out_w) -> ReLu
        # -> Pooling(N, FN , pool_out_h, pool_out_w)
        # -> Affine[flatten行う](N, hidden_layer) -> ReLu
        # -> Affine(N, output_layer) -> SoftMax

        # input_sizeは動的に決定(正方形を前提)
        input_size = input_dim[1]
        conv_output_size = (input_size + 2 * filter_pad - filter_size) / filter_stride + 1
        # FN * pool_out_h * pool_out_w
        pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2))

        self.params = {}

        # Conv
        # (input_size, C, W, H) -> (N, FilterNum, out_h, out_w)
        self.params['W1'] = \
            weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
        self.params['b1'] = np.zeros(filter_num)

        # ReLu
        # Pool
        # Affine
        self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size)
        self.params['b2'] = np.zeros(hidden_size)

        # Relu
        # Affine
        self.params['W3'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b3'] = np.zeros(output_size)

        self.layers = OrderedDict()

        self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], filter_stride, filter_pad)
        self.layers['ReLu1'] = ReLu()
        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['ReLu2'] = ReLu()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])

        self.last_layer = SoftmaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def loss(self, x, t):
        pred_y = self.predict(x)
        return self.last_layer.forward(pred_y, t)

    def gradient(self, x, t):
        self.loss(x, t)

        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()

        for layer in layers:
            dout = layer.backward(dout)

        grads = {
            'W1': self.layers['Conv1'].dW,
            'b1': self.layers['Conv1'].db,
            'W2': self.layers['Affine1'].dW,
            'b2': self.layers['Affine1'].db,
            'W3': self.layers['Affine2'].dW,
            'b3': self.layers['Affine2'].db
        }

        return grads

    def accuracy(self, x: np.ndarray, t: np.ndarray):
        pred_y = self.predict(x)
        y = np.argmax(pred_y, axis=1)
        if t.ndim != 1:
            # one-hot vectorの場合
            np.argmax(t, axis=1)
        accuracy = np.sum(y == t) / x.shape[0]
        return accuracy
예제 #21
0
class MultiLayernet:
    def __init__(self,
                 input_size,
                 hidden_size_list,
                 output_size,
                 activation='relu',
                 weight_init_std='relu',
                 weight_decay_lambda=0):
        self.input_size = input_size
        self.hidden_size_list = hidden_size_list
        self.hidden_layer_num = len(hidden_size_list)
        self.weight_decay_lambda = weight_decay_lambda
        self.params = {}

        # Initialize weights
        self.__init_weight(weight_init_std)

        # Generate layers
        activation_layer = {'sigmoid': Sigmoid, 'relu': ReLU}
        self.layers = OrderedDict()
        for idx in range(1, self.hidden_layer_num + 1):
            self.layers['Affine' + str(idx)] = Affine(
                self.params['W' + str(idx)], self.params['b', str(idx)])
            self.layers['Activation_function' + str(idx)] = \
                activation_layer[activation]()

        idx = self.hidden_layer_num + 1
        self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)],
                                                  self.params['b' + str(idx)])

        self.last_layer = SoftmaxWithLoss()

    def __init_weight(self, weight_init_std):
        all_size_list = \
            [self.input_size] + self.hidden_size_list + [self.output_size]
        for idx in range(1, len(all_size_list)):
            scale = weight_init_std
            if str(weight_init_std).lower() in ('relu', 'he'):
                scale = np.sqrt(2.0 / all_size_list[idx - 1])
            elif str(weight_init_std).lower() in ('sigmoid', 'xavier'):
                scale = np.sqrt(1.0 / all_size_list[idx - 1])

            self.params['W' + str(idx)] = scale * np.random.randn(
                all_size_list[idx - 1], all_size_list[idx])
            self.params['b' + str(idx)] = np.zeros(all_size_list[idx])

    def predict(self, x, t):
        for layer in self.layers.values():
            x = layer.forward(x)

        return x

    def loss(self, x, t):
        y = self.predict(x)

        weight_decay = 0
        for idx in range(1, self.hidden_layer_num + 2):
            W = self.params['W' + str(idx)]
            weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)

        return self.last_layer.forward(y, t) + weight_decay

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1:
            t = np.argmax(t, axis=1)

        return np.sum(y == t) / float(x.shape[0])

    def gradient(self, x, t):
        # Forward
        self.loss(x, t)

        # Backward
        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # Settings
        grads = {}
        for idx in range(1, self.hidden_layer_num + 2):
            grads['W' + str(idx)] = \
                self.layers['Affine' + str(idx)].dW \
                + self.weight_decay_lambda * self.layers['Affine' + str(idx)].W
            grads['b' + str(idx)] = \
                self.layers['Affine' + str(idx)].db

        return grads
예제 #22
0
class NLayerNet:
    def __init__(self,
                 layer_num,
                 input_size,
                 output_size,
                 hidden_size,
                 weight_init_std=0.01):
        # 重みの初期化
        self.layer_num = layer_num
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.weight_init_std = weight_init_std
        self.weights = []
        self._make_weights()
        self.bias = []
        self._make_bias()
        self._make_layers()

    def _make_layers(self):
        self.layers = []
        for i in range(self.layer_num):
            self.layers.append(Affine(self.weights[i], self.bias[i]))

            if i == self.layer_num - 1:
                pass
            else:
                self.layers.append(Relu())

        self.lastLayer = SoftmaxWithLoss()

    def _make_weights(self):
        """
        make wights list
        """
        for i in range(self.layer_num):
            if i == 0:
                #input -> hidden
                self.weights.append(
                    self.weight_init_std *
                    np.random.randn(self.input_size, self.hidden_size))
            elif i == self.layer_num - 1:
                #hidden -> output
                self.weights.append(
                    self.weight_init_std *
                    np.random.randn(self.hidden_size, self.output_size))
            else:
                #hidden -> hidden
                self.weights.append(
                    self.weight_init_std *
                    np.random.randn(self.hidden_size, self.hidden_size))

    def _make_bias(self):
        """
        make bias list
        """
        for i in range(self.layer_num):
            if i == self.layer_num - 1:
                self.bias.append(np.zeros(self.output_size))
            else:
                self.bias.append(np.zeros(self.hidden_size))

    def predict(self, x):
        """ predict.
        :param x:input_size matrix
        :return: output_size matrix
        """
        for layer in self.layers:
            x = layer.forward(x)

        return x

    def loss(self, x, t):
        """損失関数の値を求める

        :param x: 画像データ
        :param t:正解ラベル
        :return:損失関数
        """
        y = self.predict(x)

        return self.lastLayer.forward(y, t)

    def accuracy(self, x, t):
        """認識精度を求める.

        :param x: input data
        :param t: label
        :return:認識精度
        """
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    def gradient(self, x, t):
        self.loss(x, t)
        # backward
        dout = 1
        dout = self.lastLayer.backward(dout)

        for layer in reversed(self.layers):
            dout = layer.backward(dout)

        # return
        weight_grads = []
        bias_grads = []
        for i in range(self.layer_num):
            Affine_layer = self.layers[2 * i]
            weight_grads.append(Affine_layer.dW)
            bias_grads.append(Affine_layer.db)

        return weight_grads, bias_grads
예제 #23
0
class TwoLayerNet:
  def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
    self.params = {}
    self.params['W1'] = weight_init_std*np.random.randn(input_size, hidden_size)
    self.params['W2'] = weight_init_std*np.random.randn(hidden_size, output_size)
    self.params['b1'] = np.zeros(hidden_size)
    self.params['b2'] = np.zeros(output_size)

    self.layers = OrderedDict()
    self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
    self.layers['Relu'] = Relu()
    self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
    
    self.lastLayer = SoftmaxWithLoss()

  def predict(self, x):
    for layer in self.layers.values():
      x = layer.forward(x)

    return x

  def loss(self, x, t):
    y = self.predict(x)
    return self.lastLayer.forward(y, t)
  
  def accuracy(self, x, t):
    y = self.predict(x)

    y = np.argmax(y, axis=1)
    t = np.argmax(t, axis=1)

    return np.sum(y==t)/float(t.shape[0])

  def numerical_gradient(self, x, t):
    loss_w = lambda W : self.loss(x, t)

    grads = {}
    grads['W1'] = numerical_gradient(loss_w, self.params['W1'])
    grads['b1'] = numerical_gradient(loss_w, self.params['b1'])
    grads['W2'] = numerical_gradient(loss_w, self.params['W2'])
    grads['b2'] = numerical_gradient(loss_w, self.params['b2'])

    return grads

  def gradient(self, x, t):
    self.loss(x, t)

    dout = 1.0
    dout = self.lastLayer.backward(dout)
    layers = list(self.layers.values())
    layers.reverse()
    for layer in layers:
      dout = layer.backward(dout)
    
    grads = {}
    grads['W1'] = self.layers['Affine1'].dW
    grads['b1'] = self.layers['Affine1'].db
    grads['W2'] = self.layers['Affine2'].dW
    grads['b2'] = self.layers['Affine2'].db

    return grads
예제 #24
0
class MultiLayerNet:
    def __init__(self, input_size, hidden_size_list, output_size,
                 activation='relu', weight_init_std="relu", 
                 weight_decay_lambda=0, 
                 use_batchnorm=False):
        self.input_size = input_size
        self.hidden_layer_list = hidden_size_list
        self.output_size = output_size
        self.weight_decay_lambda = weight_decay_lambda
        self.use_batchnorm = use_batchnorm

        activations = {'relu':Relu, 'sigmoid': Sigmoid}
        self.activation_func = activations[activation]

        layer_size_list = [input_size] + hidden_size_list + [output_size]
        self._init_weight_params(layer_size_list, weight_init_std)
        self._init_layers(layer_size_list)

    def _init_weight_params(self, layer_size_list, weight_init_std):
        self.params = {}
        for i in range(1, len(layer_size_list)):
            front_layer_size = layer_size_list[i-1]
            back_layer_size = layer_size_list[i]
            scale = weight_init_std
            if str(weight_init_std).lower() in ('relu', 'he'):
                scale = np.sqrt(2.0 / front_layer_size)  # 使用ReLU的情况下推荐的初始值
            elif str(weight_init_std).lower() in ('sigmoid', 'xavier'):
                scale = np.sqrt(1.0 / front_layer_size)  # 使用sigmoid的情况下推荐的初始值
            self.params['W'+str(i)] = scale * \
                np.random.randn(front_layer_size, back_layer_size)
            self.params['b'+str(i)] = np.zeros(back_layer_size)

    def _init_layers(self, layer_size_list):
        self.layers = OrderedDict()
        for i in range(1, len(layer_size_list)-1):
            self.layers['Affine'+str(i)] = Affine(
                self.params['W' + str(i)], self.params['b'+str(i)])
            if self.use_batchnorm:
                self.params['gamma'+str(i)] = np.ones(layer_size_list[i])
                self.params['beta'+str(i)] = np.zeros(layer_size_list[i])
                self.layers['BatchNorm'+str(i)] = BatchNormalization(
                    self.params['gamma'+str(i)], self.params['beta'+str(i)])

            self.layers['Activition'+str(i)] = self.activation_func()

        output_layer_idx = len(layer_size_list)-1
        self.layers['Affine'+str(output_layer_idx)] = Affine(
            self.params['W'+str(output_layer_idx)], self.params['b'+str(output_layer_idx)])

        self.last_layer = SoftmaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def loss(self, x, t):
        y = self.predict(x)
        
        weight_decay = 0
        for idx in range(1, len(self.hidden_layer_list) + 2):
            W = self.params['W' + str(idx)]
            weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)

        loss_val = self.last_layer.forward(y, t)
        return loss_val

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1:
            t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    def numerical_gradient(self, x, t):
        def loss_w(W): return self.loss(x, t)

        grads = {}
        layer_count = len(self.hidden_layer_list) + 1
        for i in range(1, layer_count+1):
            grads['W'+str(i)] = numerical_gradient(loss_w,
                                                   self.params['W'+str(i)])
            grads['b'+str(i)] = numerical_gradient(loss_w,
                                                   self.params['b'+str(i)])
            
            if self.use_batchnorm and i < layer_count:
                grads['gamma'+str(i)] = numerical_gradient(loss_w,
                                                   self.params['gamma'+str(i)])
                grads['beta'+str(i)] = numerical_gradient(loss_w,
                                                   self.params['beta'+str(i)])


        return grads

    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # 设定
        grads = {}
        layer_count = len(self.hidden_layer_list) + 1
        for idx in range(1, layer_count+1):
            grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW + self.weight_decay_lambda*self.params['W' + str(idx)]
            grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db

            if self.use_batchnorm and idx < layer_count:                
                grads['gamma'+str(idx)] = self.layers['BatchNorm'+str(idx)].dgamma
                grads['beta'+str(idx)] = self.layers['BatchNorm'+str(idx)].dbeta

        return grads