Ejemplo n.º 1
0
class TwoLayersNet:
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size

        w1 = np.random.randn(I, H) * 0.01
        b1 = np.zeros(H)  #np.random.randn(H)
        w2 = np.random.randn(H, O) * 0.01
        b2 = np.zeros(O)  #np.random.randn(O)

        self.layers = [Affine(w1, b1), Sigmoid(), Affine(w2, b2)]
        self.loss_layer = SoftmaxWithLoss()

        self.params, self.grads = [], []
        for l in self.layers:
            self.params += l.params
            self.grads += l.grads  # 勾配まとめはこのときだけ。-> 各layerの勾配更新は参照場所を動かさないようにする。

    def predict(self, x):
        for l in self.layers:
            x = l.forward(x)
        return x

    def forward(self, x, t):
        score = self.predict(x)
        #print('t:', t)  # test
        #print('score:', score)  # test
        loss = self.loss_layer.forward(score, t)
        return loss

    def backward(self, dl):  # dl=1):
        dl = self.loss_layer.backward(dl)
        for l in self.layers[::-1]:
            dl = l.backward(dl)
Ejemplo n.º 2
0
class TwoLayerNet:
    def __init__(self, input_size: int, hidden_size: int, output_size: int):
        W1 = 0.01 * np.random.randn(input_size, hidden_size)
        b1 = np.zeros(hidden_size)
        W2 = 0.01 * np.random.randn(hidden_size, output_size)
        b2 = np.zeros(output_size)

        self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]
        self.loss_layer = SoftmaxWithLoss()

        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads

    def predict(self, x: np.ndarray) -> np.ndarray:
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def forward(self, x: np.ndarray, t: np.ndarray) -> np.ndarray:
        score = self.predict(x)
        loss = self.loss_layer.forward(score, t)
        return loss

    def backward(self, dout: int = 1) -> np.ndarray:
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout
Ejemplo n.º 3
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        self.in_layer0 = Matmul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        self.word_vecs = W_in

    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
Ejemplo n.º 4
0
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size

        # 重みとバイアスの初期化
        W1 = 0.01 * np.random.randn(I, H)
        b1 = np.random.randn(H)
        W2 = 0.01 * np.random.randn(H, O)
        b2 = np.random.randn(O)

        # レイヤの生成
        self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]
        self.loss_layer = SoftmaxWithLoss()

        # すべての重みをリストにまとめる
        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads

    def predict(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def forward(self, x, t):
        score = self.predict(x)
        loss = self.loss_layer.forward(score.t)
        return loss

    def backward(self, dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout
Ejemplo n.º 5
0
    def __init__(self,
                 input_size,
                 hidden_size_list,
                 output_size,
                 activation='relu',
                 weight_init_std='relu',
                 weight_decay_lambda=0):
        self.input_size = input_size
        self.hidden_size_list = hidden_size_list
        self.hidden_layer_num = len(hidden_size_list)
        self.weight_decay_lambda = weight_decay_lambda
        self.params = {}

        # Initialize weights
        self.__init_weight(weight_init_std)

        # Generate layers
        activation_layer = {'sigmoid': Sigmoid, 'relu': ReLU}
        self.layers = OrderedDict()
        for idx in range(1, self.hidden_layer_num + 1):
            self.layers['Affine' + str(idx)] = Affine(
                self.params['W' + str(idx)], self.params['b', str(idx)])
            self.layers['Activation_function' + str(idx)] = \
                activation_layer[activation]()

        idx = self.hidden_layer_num + 1
        self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)],
                                                  self.params['b' + str(idx)])

        self.last_layer = SoftmaxWithLoss()
Ejemplo n.º 6
0
    def _make_layers(self):
        self.layers = []
        for i in range(self.layer_num):
            self.layers.append(Affine(self.weights[i], self.bias[i]))

            if i == self.layer_num - 1:
                pass
            else:
                self.layers.append(Relu())

        self.lastLayer = SoftmaxWithLoss()
Ejemplo n.º 7
0
    def _init_layers(self):
        self.layers = OrderedDict()
        all_layers = [self.input_size
                      ] + self.hidden_size_list + [self.output_size]
        activation_dict = {'relu': Relu, 'sigmoid': Sigmoid}
        for i in range(1, len(self.hidden_size_list) + 2):
            self.layers['Affine%d' % i] = Affine(self.params['W%d' % i],
                                                 self.params['b%d' % i])
            self.layers['Activation%d' %
                        i] = activation_dict[self.activation]()

        self.last_layers = SoftmaxWithLoss()
Ejemplo n.º 8
0
    def __init__(self, input_size: int, hidden_size: int, output_size: int):
        W1 = 0.01 * np.random.randn(input_size, hidden_size)
        b1 = np.zeros(hidden_size)
        W2 = 0.01 * np.random.randn(hidden_size, output_size)
        b2 = np.zeros(output_size)

        self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]
        self.loss_layer = SoftmaxWithLoss()

        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads
Ejemplo n.º 9
0
    def __init__(self,
                 input_dim=(1, 28, 28),  # (C, W, H)
                 filter_num=30,
                 filter_size=5,
                 filter_pad=0,
                 filter_stride=1,
                 hidden_size=100,
                 output_size=10,
                 weight_init_std=0.01
                 ):
        # input(N, C, W, H)
        # -> Conv(N, FN, conv_out_h, conv_out_w) -> ReLu
        # -> Pooling(N, FN , pool_out_h, pool_out_w)
        # -> Affine[flatten行う](N, hidden_layer) -> ReLu
        # -> Affine(N, output_layer) -> SoftMax

        # input_sizeは動的に決定(正方形を前提)
        input_size = input_dim[1]
        conv_output_size = (input_size + 2 * filter_pad - filter_size) / filter_stride + 1
        # FN * pool_out_h * pool_out_w
        pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2))

        self.params = {}

        # Conv
        # (input_size, C, W, H) -> (N, FilterNum, out_h, out_w)
        self.params['W1'] = \
            weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
        self.params['b1'] = np.zeros(filter_num)

        # ReLu
        # Pool
        # Affine
        self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size)
        self.params['b2'] = np.zeros(hidden_size)

        # Relu
        # Affine
        self.params['W3'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b3'] = np.zeros(output_size)

        self.layers = OrderedDict()

        self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], filter_stride, filter_pad)
        self.layers['ReLu1'] = ReLu()
        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['ReLu2'] = ReLu()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])

        self.last_layer = SoftmaxWithLoss()
Ejemplo n.º 10
0
class TwoLayersNet():
    def __init__(self,
                 input_size,
                 hidden_size,
                 output_size,
                 weight_init_std=0.01):
        self.params = {}
        self.params['w1'] = np.random.randn(input_size,
                                            hidden_size) / weight_init_std
        self.params['b1'] = np.zeros(hidden_size)
        self.params['w2'] = np.random.randn(hidden_size,
                                            output_size) / weight_init_std
        self.params['b2'] = np.zeros(output_size)

        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['w1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['w2'], self.params['b2'])
        self.lastlayer = SoftmaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def loss(self, x, t):
        y = self.predict(x)
        return self.lastlayer.forward(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        return np.sum(y == t) / y.shape

    def gradient(self, x, t):
        dout = 1
        self.loss(x, t)
        dout = self.lastlayer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        grads = {}
        grads['w1'] = self.layers['Affine1'].dw
        grads['b1'] = self.layers['Affine1'].db
        grads['w2'] = self.layers['Affine2'].dw
        grads['b2'] = self.layers['Affine2'].db
        return grads
Ejemplo n.º 11
0
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size

        w1 = np.random.randn(I, H) * 0.01
        b1 = np.zeros(H)  #np.random.randn(H)
        w2 = np.random.randn(H, O) * 0.01
        b2 = np.zeros(O)  #np.random.randn(O)

        self.layers = [Affine(w1, b1), Sigmoid(), Affine(w2, b2)]
        self.loss_layer = SoftmaxWithLoss()

        self.params, self.grads = [], []
        for l in self.layers:
            self.params += l.params
            self.grads += l.grads  # 勾配まとめはこのときだけ。-> 各layerの勾配更新は参照場所を動かさないようにする。
    def __init__(self,
                 input_size,
                 hidden_size_list,
                 output_size,
                 activation='relu',
                 weight_init_std='relu',
                 weight_decay_lambda=0,
                 use_dropout=False,
                 dropout_ratio=0.5,
                 use_batchnorm=False):
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size_list = hidden_size_list
        self.hidden_layer_num = len(hidden_size_list)
        self.weight_decay_lambda = weight_decay_lambda
        self.use_dropout = use_dropout
        self.use_batchnorm = use_batchnorm
        self.params = {}

        # 权重初始化
        self.__init_weight(weight_init_std)

        # 生成层
        activation_layer = {'sigmoid': Sigmoid, 'relu': Relu}
        self.layers = OrderedDict()
        for idx in range(1, self.hidden_layer_num + 1):
            self.layers['Affine' + str(idx)] = Affine(
                self.params['W' + str(idx)], self.params['b' + str(idx)])
            if self.use_batchnorm:
                self.params['gamma' + str(idx)] = np.ones(
                    hidden_size_list[idx - 1])
                self.params['beta' + str(idx)] = np.zeros(
                    hidden_size_list[idx - 1])
                self.layers['BatchNorm' + str(idx)] = BatchNormalization(
                    self.params['gamma' + str(idx)],
                    self.params['beta' + str(idx)])

            self.layers['Activation_function' +
                        str(idx)] = activation_layer[activation]()

            if self.use_dropout:
                self.layers['Dropout' + str(idx)] = Dropout(dropout_ratio)

        idx = self.hidden_layer_num + 1
        self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)],
                                                  self.params['b' + str(idx)])

        self.last_layer = SoftmaxWithLoss()
Ejemplo n.º 13
0
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size

        W1 = 0.01 * np.random.randn(I, H)
        b1 = np.zeros(H)
        W2 = 0.01 * np.random.randn(H, O)
        b2 = np.zeros(O)

        self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]

        self.loss_layer = SoftmaxWithLoss()

        self.params, self.grads = [], []
        for layer in self.layer:
            self.params += layer.params
            self.grads += layer.grads
Ejemplo n.º 14
0
def initialize(input_size,
               hidden_size,
               output_size,
               init_weight=0.01,
               init_params=None):
    hidden_count = len(hidden_size)
    if init_params is None:
        params['w1'] = init_weight * np.random.randn(input_size,
                                                     hidden_size[0])
        params['b1'] = np.zeros(hidden_size[0])
        for idx in range(1, hidden_count):
            params[f'w{idx+1}'] = init_weight * np.random.randn(
                hidden_size[idx - 1], hidden_size[idx])
            params[f'b{idx+1}'] = np.zeros(hidden_size[idx])
        params[f'w{hidden_count+1}'] = init_weight * np.random.randn(
            hidden_size[hidden_count - 1], output_size)
        params[f'b{hidden_count+1}'] = np.zeros(output_size)
    else:
        globals()['params'] = init_params

    layers.append(Affine(params['w1'], params['b1']))
    layers.append(ReLU())
    for idx in range(1, hidden_count):
        layers.append(Affine(params[f'w{idx+1}'], params[f'b{idx+1}']))
        layers.append(ReLU())
    layers.append(
        Affine(params[f'w{hidden_count+1}'], params[f'b{hidden_count+1}']))
    layers.append(SoftmaxWithLoss())
Ejemplo n.º 15
0
    def __init__(self, hidden_size, weight_init_std=0.01):
        super().__init__()
        self.params = dict()
        self.params['W1'] = weight_init_std * np.random.randn(
            self.x_train.shape[1], hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(
            hidden_size, self.t_train.shape[1])
        self.params['b2'] = np.zeros(self.t_train.shape[1])

        # 生成层
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        self.last_layer = SoftmaxWithLoss()
Ejemplo n.º 16
0
 def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
     # 初始化权重
     self.params = {}
     self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
     # self.params['W1'] = np.ones((input_size, hidden_size))
     self.params['b1'] = np.zeros(hidden_size)
     self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) 
     # self.params['W2'] = np.ones((hidden_size, output_size))
     self.params['b2'] = np.zeros(output_size)
     
     # 生成层
     self.layers = OrderedDict()
     self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
     self.layers['Relu1'] = Relu()
     self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
     
     self.lastLayer = SoftmaxWithLoss()
    def __init__(self,
                 input_dim=(1, 28, 28),
                 conv_param={
                     'filter_num': 30,
                     'filter_size': 5,
                     'pad': 0,
                     'stride': 1
                 },
                 hidden_size=100,
                 output_size=10,
                 weight_init_std=0.01):
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1]
        conv_output_size = (input_size - filter_size +
                            2 * filter_pad) / filter_stride + 1
        pool_output_size = int(filter_num * (conv_output_size / 2) *
                               (conv_output_size / 2))

        # 权重初始化
        self.params = {}
        self.params['W1'] = weight_init_std * \
                            np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
        self.params['b1'] = np.zeros(filter_num)
        self.params['W2'] = weight_init_std * \
                            np.random.randn(pool_output_size, hidden_size)
        self.params['b2'] = np.zeros(hidden_size)
        self.params['W3'] = weight_init_std * \
                            np.random.randn(hidden_size, output_size)
        self.params['b3'] = np.zeros(output_size)

        # 层生成
        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(self.params['W1'],
                                           self.params['b1'],
                                           conv_param['stride'],
                                           conv_param['pad'])
        self.layers['Relu1'] = Relu()
        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['Relu2'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])

        self.last_layer = SoftmaxWithLoss()
Ejemplo n.º 18
0
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size

        # 重みとバイアスの初期化
        W1 = 0.01 * np.random.randn(I, H)
        b1 = np.random.randn(H)
        W2 = 0.01 * np.random.randn(H, O)
        b2 = np.random.randn(O)

        # レイヤの生成
        self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]
        self.loss_layer = SoftmaxWithLoss()

        # すべての重みをリストにまとめる
        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads
Ejemplo n.º 19
0
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        self.in_layer0 = Matmul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        self.word_vecs = W_in
Ejemplo n.º 20
0
	def __init__(self, input_size, hidden_size, output_size,
		weight_init_std=0.01):
		# Initialize weights.
		self.params = {}
		self.params['W1'] = weight_init_std * \
												np.random.randn(input_size, hidden_size)
		self.params['b1'] = np.zeros(hidden_size)
		self.params['W2'] = weight_init_std * \
												np.random.randn(hidden_size, output_size)
		self.params['b2'] = np.zeros(output_size)

		# Generate layers.
		self.layers = OrderedDict() # Ordered dictionary
		self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
		self.layers['Relu'] = Relu()
		self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])

		self.lastLayer = SoftmaxWithLoss()
Ejemplo n.º 21
0
    def __init__(self,
                 input_size,
                 hidden_size,
                 output_size,
                 weight_init_std=0.01):
        self.params = {}
        self.params['w1'] = np.random.randn(input_size,
                                            hidden_size) / weight_init_std
        self.params['b1'] = np.zeros(hidden_size)
        self.params['w2'] = np.random.randn(hidden_size,
                                            output_size) / weight_init_std
        self.params['b2'] = np.zeros(output_size)

        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['w1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['w2'], self.params['b2'])
        self.lastlayer = SoftmaxWithLoss()
Ejemplo n.º 22
0
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size

        # initialize weight and bias
        W1 = 0.01 * cp.random.randn(I, H)
        b1 = cp.zeros(H)
        W2 = 0.01 * cp.random.randn(H, O)
        b2 = cp.zeros(O)

        # create layer
        self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]
        self.loss_layer = SoftmaxWithLoss()

        # combine all weight and grads into list
        self.params, self.grads = [], []

        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads
Ejemplo n.º 23
0
    def __init_layer(self):
        activation_layer = {'sigmoid': Sigmoid, 'relu': Relu}
        self.layers = OrderedDict()
        for idx in range(1, self.hidden_layer_num + 1):
            self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)])
            if self.use_batchnorm:
                self.params['gamma' + str(idx)] = np.ones(self.hidden_size_list[idx - 1])
                self.params['beta' + str(idx)] = np.zeros(self.hidden_size_list[idx - 1])
                self.layers['BatchNorm' + str(idx)] = BatchNormalization(self.params['gamma' + str(idx)],
                                                                         self.params['beta' + str(idx)])

            self.layers['activation_function' + str(idx)] = activation_layer[self.activation]()

            if self.use_dropout:
                self.layers['Dropout' + str(idx)] = Dropout(self.dropout_ration)

        idx += 1
        self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)])
        self.last_layer = SoftmaxWithLoss()
Ejemplo n.º 24
0
def initialize(input_size, hidden_size, output_size, init_weight=0.01):
    params['w1'] = init_weight * np.random.randn(input_size, hidden_size)
    params['b1'] = np.zeros(hidden_size)
    params['w2'] = init_weight * np.random.randn(hidden_size, output_size)
    params['b2'] = np.zeros(output_size)

    layers.append(Affine(params['w1'], params['b1']))
    layers.append(ReLU())
    layers.append(Affine(params['w2'], params['b2']))
    layers.append(SoftmaxWithLoss())
Ejemplo n.º 25
0
    def __init__(self, vocab_size: int, hidden_size: int) -> None:
        W_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype(float)
        W_out = 0.01 * np.random.randn(hidden_size, vocab_size).astype(float)

        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        layers = [
            self.in_layer0, self.in_layer1, self.out_layer, self.loss_layer
        ]
        self.params = []
        self.grads = []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        self.word_vecs = W_in
Ejemplo n.º 26
0
class SimpleSkipGram:
    def __init__(self, vocab_size, hidden_size):
        # おもみ
        w_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f')
        w_out = 0.01 * np.random.randn(hidden_size, vocab_size).astype('f')

        # layers
        self.in_layer = MatMul(w_in)
        self.out_layer = MatMul(w_out)

        # おもみ、勾配 まとめ
        layers = [self.in_layer, self.out_layer]
        self.params, self.grads = [], []
        for l in layers:
            self.params += l.params
            self.grads += l.grads

        # loss
        self.loss_layer0 = SoftmaxWithLoss()
        self.loss_layer1 = SoftmaxWithLoss()
        # 極限でコンテキストの各単語の確率 0.5。
        # softmax->*2->lossでも一律ln2引くようだから同じたぶん。

        # 単語の分散表現
        self.word_vecs = w_in

    def forward(self, contexts, target):
        h = self.in_layer.forward(target)
        s = self.out_layer.forward(h)
        l0 = self.loss_layer0.forward(s, contexts[:, 0])
        l1 = self.loss_layer1.forward(s, contexts[:, 1])
        loss = l0 + l1
        return loss

    def backward(self, dl=1):
        ds0 = self.loss_layer0.backward(dl)
        ds1 = self.loss_layer1.backward(dl)
        ds = ds0 + ds1
        dh = self.out_layer.backward(ds)
        self.in_layer.backward(dh)
Ejemplo n.º 27
0
    def __init__(self, vocab_size, hidden_size):
        # おもみ
        w_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f')
        w_out = 0.01 * np.random.randn(hidden_size, vocab_size).astype('f')

        # layers
        self.in_layer0 = MatMul(w_in)
        self.in_layer1 = MatMul(w_in)
        self.out_layer = MatMul(w_out)

        # おもみ、勾配 まとめ
        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for l in layers:
            self.params += l.params
            self.grads += l.grads

        # loss
        self.loss_layer = SoftmaxWithLoss()

        # 単語の分散表現
        self.word_vecs = w_in
Ejemplo n.º 28
0
def initialize(input_size, hidden_size, output_size, init_weight=0.01, init_params=None):
    if init_params is None:
        params['w1'] = init_weight * np.random.randn(input_size, hidden_size)
        params['b1'] = np.zeros(hidden_size)
        params['w2'] = init_weight * np.random.randn(hidden_size, output_size)
        params['b2'] = np.zeros(output_size)
    else:
        globals()['params'] = init_params

    layers.append(Affine(params['w1'], params['b1']))
    layers.append(ReLU())
    layers.append(Affine(params['w2'], params['b2']))
    layers.append(SoftmaxWithLoss())
Ejemplo n.º 29
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        # おもみ
        w_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f')
        w_out = 0.01 * np.random.randn(hidden_size, vocab_size).astype('f')

        # layers
        self.in_layer0 = MatMul(w_in)
        self.in_layer1 = MatMul(w_in)
        self.out_layer = MatMul(w_out)

        # おもみ、勾配 まとめ
        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for l in layers:
            self.params += l.params
            self.grads += l.grads

        # loss
        self.loss_layer = SoftmaxWithLoss()

        # 単語の分散表現
        self.word_vecs = w_in

    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dl=1):
        ds = self.loss_layer.backward(dl)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer0.backward(da)
        self.in_layer1.backward(da)
Ejemplo n.º 30
0
    def __init__(self, vocab_size, hidden_size):
        # おもみ
        w_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f')
        w_out = 0.01 * np.random.randn(hidden_size, vocab_size).astype('f')

        # layers
        self.in_layer = MatMul(w_in)
        self.out_layer = MatMul(w_out)

        # おもみ、勾配 まとめ
        layers = [self.in_layer, self.out_layer]
        self.params, self.grads = [], []
        for l in layers:
            self.params += l.params
            self.grads += l.grads

        # loss
        self.loss_layer0 = SoftmaxWithLoss()
        self.loss_layer1 = SoftmaxWithLoss()
        # 極限でコンテキストの各単語の確率 0.5。
        # softmax->*2->lossでも一律ln2引くようだから同じたぶん。

        # 単語の分散表現
        self.word_vecs = w_in