Beispiel #1
0
    def __init__(self, dim_in=(1, 28, 28),
                 par={'num_filter': 30, 'size_filter': 5, 'pad': 0, 'stride': 1},
                 s_hidden=100, s_out=10, std_w_init=0.01):
        n_f = par['num_filter']
        s_f = par['size_filter']
        pad = par['pad']
        stride = par['stride']
        size_in = dim_in[1]

        size_out_conv = int((size_in + 2 * pad - s_f) / stride) + 1
        size_out_pool = int(n_f * (size_out_conv / 2) ** 2)

        self.params = {}
        self.params['W1'] =\
            std_w_init * np.random.randn(n_f, dim_in[0], s_f, s_f)
        self.params['b1'] = np.zeros(n_f)
        self.params['W2'] = std_w_init * np.random.randn(size_out_pool, s_hidden)
        self.params['b2'] = np.zeros(s_hidden)
        self.params['W3'] = std_w_init * np.random.randn(s_hidden, s_out)
        self.params['b3'] = np.zeros(s_out)

        self.layers = OrderedDict()
        self.layers['Conv'] = Convolution(self.params['W1'], self.params['b1'],
                                          stride, pad)
        self.layers['Relu1'] = Relu()
        self.layers['Pool'] = Pooling(2, 2, 2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['Relu'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])
        self.last_layer = SoftmaxWithLoss()
Beispiel #2
0
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size

        # 重みとバイアスの初期化
        W1 = 0.01 * np.random.randn(I, H)  # 属性の数を隠れ層のニューロン数に変換
        b1 = np.zeros(H)
        W2 = 0.01 * np.random.randn(H, O)  # 隠れ層のニューロン数をクラスの数に変換
        b2 = np.zeros(O)

        # レイヤの生成
        self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]
        self.loss_layer = SoftmaxWithLoss()

        # 全ての重みと勾配をリストにまとめる
        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads

    def predict(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def forward(self, x, t):
        score = self.predict(x)
        loss = self.loss_layer.forward(score, t)
        return loss

    def backward(self, dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout
Beispiel #3
0
    def __init__(self, input_size, hidden_size, output_size):
        """
        class initializer

        Parameters
        --------
        input_size : int
            the number of input neurons
        hidden_size : int
            the number of hidden neurons
        output_size : int
            the number of output neurons
        """
        I, H, O = input_size, hidden_size, output_size
        # Initialize Weight & Bias
        W1 = np.random.randn(I, H)
        b1 = np.random.randn(H)
        W2 = np.random.randn(H, O)
        b2 = np.random.randn(O)
        # Generate Layers
        self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]
        self.loss_layer = SoftmaxWithLoss()
        # Store all layers' parameters (オリジナルとは異なる)
        self.params_list, self.grads_list = [], []
        for layer in self.layers:
            self.params_list.append(layer.params)
            self.grads_list.append(layer.grads)
Beispiel #4
0
class SimpleSkipGram:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        self.in_layer = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer1 = SoftmaxWithLoss()
        self.loss_layer2 = SoftmaxWithLoss()
        layers = [self.in_layer, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        self.word_vecs = W_in

    def forward(self, contexts, target):
        h = self.in_layer.forward(target)
        s = self.out_layer.forward(h)
        l1 = self.loss_layer1.forward(s, contexts[:, 0])
        l2 = self.loss_layer2.forward(s, contexts[:, 1])
        loss = l1 + l2
        return loss

    def backward(self, dout=1):
        dl1 = self.loss_layer1.backward(dout)
        dl2 = self.loss_layer2.backward(dout)
        ds = dl1 + dl2
        dh = self.out_layer.backward(ds)
        self.in_layer.backward(dh)
        return None
    def __init__(self, input_dim=(1, 28, 28),
                 conv_param={'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1},
                 hidden_size=100, output_size=10, weight_init_std=0.01):
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1]
        conv_output_size = (input_size - filter_size + 2 * filter_pad) / filter_stride + 1
        pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2))

        # 初始化权重
        self.params = {'W1': weight_init_std * \
                             np.random.randn(filter_num, input_dim[0], filter_size, filter_size),
                       'b1': np.zeros(filter_num), 'W2': weight_init_std * \
                                                         np.random.randn(pool_output_size, hidden_size),
                       'b2': np.zeros(hidden_size), 'W3': weight_init_std * \
                                                          np.random.randn(hidden_size, output_size),
                       'b3': np.zeros(output_size)}

        # 生成层
        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'],
                                           conv_param['stride'], conv_param['pad'])
        self.layers['Relu1'] = ReLU()
        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['Relu2'] = ReLU()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])

        self.last_layer = SoftmaxWithLoss()
Beispiel #6
0
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # 가중치 초기화
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # 계층 생성
        # layer0, layer1은 weight-sharing
        self.in_layer0 = MatMul(W_in)  ## 입력층은 윈도우 크기만큼 만들어야함, 인스턴스 생성.
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # 모든 가중치와 기울기를 리스트에 모음
        layers = [
            self.in_layer0, self.in_layer1, self.out_layer, self.loss_layer
        ]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # 인스턴스 변수에 단어의 분산 표현 저장
        self.word_vecs = W_in
Beispiel #7
0
    def __init__(self, vocab_size, hidden_size):

        V, H = vocab_size, hidden_size

        # 重みの初期設定
        W_in = 0.01 * np.random.randn(V, H).astype("f")
        W_out = 0.01 * np.random.randn(H, V).astype("f")

        # 各レイヤを作る。
        # contextで使用する単語数分だけin_layerは作成する必要がある
        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # 全てのlayer,重み,勾配をリストにまとめる
        layers = [
            self.in_layer0, self.in_layer1, self.out_layer, self.loss_layer
        ]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # メンバ変数に単語の分散表現を設定
        self.word_vecs = W_in
Beispiel #8
0
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # 가중치 초기화
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # 계층 생성
        # 입력층 1개
        self.in_layer = MatMul(W_in)
        # 출력층 1개
        self.out_layer = MatMul(W_out)
        # 맥락의 수만큼 손실 계층을 구한다
        self.loss_layer1 = SoftmaxWithLoss()
        self.loss_layer2 = SoftmaxWithLoss()

        # 모든 가중치와 기울기를 리스트에 모은다
        layers = [self.in_layer, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # 인스턴스 변수에 단어의 분산 표현을 저장한다
        self.word_vecs = W_in
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size

        W1 = 0.01 * np.random.randn(I, H)
        b1 = np.zeros(H)
        W2 = 0.01 * np.random.randn(H, O)
        b2 = np.zeros(O)

        self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]

        self.loss_layer = SoftmaxWithLoss()

        self.params, self.grads = [], []

        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads

    def predict(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def forward(self, x, t):
        score = self.predict(x)
        loss = self.loss_layer.forward(score, t)
        return loss

    def backward(self, dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout
Beispiel #10
0
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # 重みの初期化
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')
        W_in = np.array(
            [[-1.0655735, 1.3231287, -1.1051644, -1.1049938, -1.0685176],
             [1.1559865, 0.08719956, 1.1672966, 1.1607609, 1.1567391],
             [-0.7532327, 0.6444376, -0.76896185, -0.71775854, -0.7918966],
             [0.9111972, 1.9940354, 0.6837302, 0.89859486, 0.87255],
             [-0.78328615, 0.6444221, -0.7729693, -0.7400077, -0.80646306],
             [-1.058986, 1.3268483, -1.1123687, -1.1059289, -1.0616288],
             [1.1203294, -1.6394324, 1.2104743, 1.1509397,
              1.1612827]]).astype('f')

        # レイヤの生成
        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # 全ての重みと勾配をリストにまとめる
        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # メンバ変数に単語の分散表現を設定
        self.word_vecs = W_in
    def __init__(self, input_dim=(1, 28, 28), conv_param=None, hidden_size=100,
                 output_size=10, weight_init_std=0.01, regularizer_lambda=0.1):
        # 卷积层的默认参数:默认情况下滤波器个数为30个,大小为5x5,不填充,步长1
        if conv_param is None:
            conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0,
                          'stride': 1}
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1]  # 输入层的矩阵大小:单通道下二维矩阵的宽/高
        conv_output_size = int((input_size + 2 * filter_pad - filter_size) /
                               filter_stride + 1)  # 卷积层输出的单个特征图的大小
        # 最大池化层的输出大小:池化后保持特征图个数不变,由于使用的是2x2的最大
        # 池化层,因此宽/高都变为原来的一半。
        # 总的输出元素个数为:特征图个数 * (卷积层输出 / 2) * (卷积层输出 / 2)

        # 因为这里的简单CNN中池化层后面接全连接层,
        # 需要将池化层的节点拉平成一个一维数组
        pool_output_size = int(filter_num * (conv_output_size / 2) ** 2)

        self.regularizer_lambda = regularizer_lambda  # 正则化强度

        # 初始化神经网络各层的参数:卷积层、(池化层)、全连接层、全连接层
        # 其中池化层没有需要训练的参数,因此不需要初始化。
        self.params = {}
        # 第一层(卷积层):滤波器的参数(权重参数) + 偏置参数
        # 滤波器的参数有4个:滤波器个数、通道数、高、宽
        self.params['W1'] = weight_init_std * np.random.randn(filter_num,
                                                              input_dim[0],
                                                              filter_size,
                                                              filter_size)
        # 卷积层的偏置参数:一个滤波器需要一个偏置,因此一共filter_num个偏置
        self.params['b1'] = np.zeros(filter_num)
        # 全连接层(在这里是一个隐藏层)权重参数:
        # 输入节点数为池化层的所有节点个数,输出为隐藏层大小
        self.params['W2'] = weight_init_std * np.random.randn(pool_output_size,
                                                              hidden_size)
        self.params['b2'] = np.zeros(hidden_size)
        # 全连接层(在这里是输出层)权重参数:
        # 输入节点数为隐藏层的所有节点个数,输出为输出层大小
        self.params['W3'] = weight_init_std * np.random.randn(hidden_size,
                                                              output_size)
        self.params['b3'] = np.zeros(output_size)

        # 构造神经网络:
        # 卷积层、激活层(ReLU层)、最大池化层、
        # 仿射层(隐藏层)、激活层(ReLU层)、仿射层(输出层)
        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'],
                                           conv_param['stride'],
                                           conv_param['pad'])
        self.layers['ReLU1'] = ReLU()
        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['ReLU2'] = ReLU()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])

        # 最后加入一层SoftmaxWithLoss层用于计算交叉熵误差,帮助训练神经网络
        self.last_layer = SoftmaxWithLoss()
Beispiel #12
0
    def __init__(self, input_size, hidden_sizes, output_size):
        I, O = input_size, output_size
        previous_H = I
        current_H = hidden_sizes[0]
        h_length = len(hidden_sizes)
        self.layers = []
        for i in range(h_length):
            current_H = hidden_sizes[i]
            W = 0.01 * np.random.randn(previous_H, current_H)
            b = np.zeros(current_H)
            self.layers.append(Affine(W, b))
            self.layers.append(Sigmoid())
            #self.layers.append(ReLU())
            previous_H = current_H
        W = 0.01 * np.random.randn(previous_H, O)
        b = np.zeros(O)
        self.layers.append(Affine(W, b))

        self.loss_layer = SoftmaxWithLoss()

        # すべての重みと勾配をリストにまとめる
        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads
Beispiel #13
0
    def __init__(self,
                 n_features,
                 n_output,
                 n_hidden=30,
                 l2=0.0,
                 l1=0.0,
                 epochs=50,
                 eta=0.001,
                 decrease_const=0.0,
                 shuffle=True,
                 n_minibatches=1,
                 random_state=None):
        np.random.seed(random_state)
        self.n_features = n_features
        self.n_hidden = n_hidden
        self.n_output = n_output
        self.l2 = l2
        self.l1 = l1
        self.epochs = epochs
        self.eta = eta
        self.decrease_const = decrease_const
        self.shuffle = shuffle
        self.n_minibatches = n_minibatches

        self.params = {}
        self._init_weights()

        self.layers = {}
        self.layers['Affine_1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Sigmoid'] = Sigmoid()
        self.layers['Affine_2'] = Affine(self.params['W2'], self.params['b2'])
        self.last_layer = SoftmaxWithLoss()

        self._loss = []
        self._iter_t = 0
Beispiel #14
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        self.word_vecs = W_in

    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
    def __init__(self,
                 input_size,
                 hidden_size_list,
                 output_size,
                 activation="relu",
                 weight_init_std="relu",
                 weight_decay_lambda=0,
                 use_dropout=False,
                 dropout_ratio=0.5,
                 use_batchnorm=False):
        """
        :param input_size: 输入的大小
        :param hidden_size_list: 隐藏层的神经元数量列表
        :param output_size: 输出的大小
        :param activation: "relu" or "sigmoid"
        :param weight_init_std: 指定权重的标准差,
        指定"relu" 或者 "he" 是定为"He"的初始值
        指定"sigmoid" 或者 "xavier" 是定为"Xauver"的初始值
        :param weight_decay_lambda: Weight Decay(L2范数)的强度
        :param use_dropout: 是否使用Dropout
        :param dropout_ratio: Dropout比例
        :param use_batchnorm: 是否只用Batch Normalization
        """
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size_list = hidden_size_list
        self.hidden_layer_num = len(hidden_size_list)
        self.use_dropout = use_dropout
        self.weight_decay_lambda = weight_decay_lambda
        self.use_batchnorm = use_batchnorm
        self.params = {}

        # 初始化权值
        self.__init_weight(weight_init_std)

        # 生成层
        activation_layer = {"sigmoid": Sigmoid, "relu": ReLU}
        self.layers = OrderedDict()
        for idx in range(1, self.hidden_layer_num + 1):
            self.layers["Affine" + str(idx)] = Affine(
                self.params["W" + str(idx)], self.params["b" + str(idx)])
            if self.use_batchnorm:
                self.params["gamma" + str(idx)] = np.ones(
                    hidden_size_list[idx - 1])
                self.params["beta" + str(idx)] = np.zeros(
                    hidden_size_list[idx - 1])
                self.layers['BatchNorm' + str(idx)] = BatchNormalization(
                    self.params['gamma' + str(idx)],
                    self.params['beta' + str(idx)])

            self.layers["Activation_function" +
                        str(idx)] = activation_layer[activation]()

            if self.use_dropout:
                self.layers["Dropout" + str(idx)] = Dropout(dropout_ratio)
        idx = self.hidden_layer_num + 1
        self.layers["Affine" + str(idx)] = Affine(self.params["W" + str(idx)],
                                                  self.params["b" + str(idx)])
        self.last_layer = SoftmaxWithLoss()
Beispiel #16
0
class TwoLayerNet:
    "Affineを二層つなげたネットワーク"

    def __init__(self, input_size, hidden_size, output_size):
        """
        class initializer

        Parameters
        --------
        input_size : int
            the number of input neurons
        hidden_size : int
            the number of hidden neurons
        output_size : int
            the number of output neurons
        """
        I, H, O = input_size, hidden_size, output_size
        # Initialize Weight & Bias
        W1 = np.random.randn(I, H)
        b1 = np.random.randn(H)
        W2 = np.random.randn(H, O)
        b2 = np.random.randn(O)
        # Generate Layers
        self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]
        self.loss_layer = SoftmaxWithLoss()
        # Store all layers' parameters (オリジナルとは異なる)
        self.params_list, self.grads_list = [], []
        for layer in self.layers:
            self.params_list.append(layer.params)
            self.grads_list.append(layer.grads)

    def predict(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def forward(self, x, t):
        """
        Parameters
        --------
        x : ndarray
            input of the highest layer  
        t : ndarray
            teacher data

        Returns
        --------
            loss: ndarray
        loss of prediction result
        """
        score = self.predict(x)
        loss = self.loss_layer.forward(score, t)
        return loss

    def backward(self, dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout
    def __init__(self, input_dim = (1, 28, 28),
                 conv_params = {'filter_num':30,'filter_size': 5, 'pad': 0, 'stride':1},
                 hidden_size = 100, output_size = 10, weight_init_std = 0.01):
       """ 인스턴스 초기화 (변수들의 초기값을 줌) - CNN 구성, 변수들 초기화
        input_dim: 입력 데이터 차원, MINIST인 경우(1, 28, 28)
        conv_param: Convolution 레이어의 파라미터(filter, bias)를 생성하기 위해 필요한 값들
            필터 개수 (filter_num),
            필터 크기(filter_size = filter_height = filter_width),
            패딩 개수(pad),
            보폭(stride)
        hidden_size: Affine 계층에서 사용할 뉴런의 개수 -> W 행렬의 크기
        output_size: 출력값의 원소의 개수. MNIST인 경우 10
        weight_init_std: 가중치(weight) 행렬을 난수로 초기화 할 때 사용할 표준편차 
        """
       filter_num = conv_params['filter_num']
       filter_size = conv_params['filter_size']
       filter_pad = conv_params['pad']
       filter_stride = conv_params['stride']
       input_size = input_dim[1]
       conv_output_size = (input_size - filter_size + 2 * filter_pad) / \
                          filter_stride + 1
       pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2))


       # CNN Layer에서 필요한 파라미터들
       self.params = dict()
       self.params['W1'] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
       self.params['b1'] = np.zeros(filter_num)
       self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size)
       self.params['b2'] = np.zeros(hidden_size)
       self.params['W'] = weight_init_std * np.random.randn(hidden_size, output_size)
       self.params['b3'] = np.zeros(output_size)


       # CNN Layer(계층) 생성, 연결
       self.layers = OrderedDict()

        # 방법 1 __init__(self,W,b) 라고 주고,  self.W = W, self.b = b 를 선언
        # self.W = W # 난수로 생성하려고 해도 데이터의 크기(size)를 알아야 필터를 생성할 수 있다
        # self.b = b # bias의 크기는 필터의 크기와 같다. 마찬가지로 난수로 생성해도 크기를 알아야한다 => dimension 결정

        # 방법 2
        # input_dim = (1, 28, 28) = MNIST를 위한 클래스
        # dimension을 주도록 설정 + 필터갯수가 있도록 설정해줘야한다
        # convolution 할 때 필터를 몇번 만들 것인가 -> 난수로 만들어서 넣어줄 수 있다

                    # key값
       self.layers['Conv1'] = Convolution(self.params['W1'],
                                           self.params['b1'],
                                           conv_params['stride'],
                                           conv_params['pad'])  # W와 b를 선언
       self.layers['ReLu1'] = Relu() # x -> Convolution에서 전해주는 값
       self.layers['Pool1'] = Pooling(pool_h = 2, pool_w =2, stride =2)
       self.layers['Affine1'] = Affine(self.params['W2'],
                                        self.params['b2'])
       self.layers['Relu2'] = Relu()
       self.layers['Affine2'] = Affine(self.params['W3'],
                                        self.params['b3'])
       self.last_layer = SoftmaxWithLoss()
class SimpleCBOW:
    """
    Simple continuous bag-of-words.
    """
    def __init__(self, vocabulary_size, hidden_size):
        V, H = vocabulary_size, hidden_size

        # initialize weights
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # generate layers
        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # list all weights and gradient layers
        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []

        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # set distributed representation of words to variable
        self.word_vecs = W_in

    def forward(self, contexts, target):
        """
        :param contexts: dim 3 of numpy array
        :param target: dim2 of numpy array
        """
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        """
        Continuous bag-of-words (CBOW)
                0.5*da
        MatMul <-+                  vector  ----+
         W_in    |                              v
                 |     0.5*da                  Softmax
                 +-- [+] <- [x] <-- MatMul <-- With    <-- Loss
                 |           ^  da  W_out   ds Loss     1
                 |   0.5 ----+
        MatMul <-+
         W_in   0.5*da
        """
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
    def __init__(self, input_dim=(1, 28, 28),
                 conv_param_1={'filter_num': 16, 'filter_size': 3, 'pad': 1, 'stride': 1},
                 conv_param_2={'filter_num': 16, 'filter_size': 3, 'pad': 1, 'stride': 1},
                 conv_param_3={'filter_num': 32, 'filter_size': 3, 'pad': 1, 'stride': 1},
                 conv_param_4={'filter_num': 32, 'filter_size': 3, 'pad': 2, 'stride': 1},
                 conv_param_5={'filter_num': 64, 'filter_size': 3, 'pad': 1, 'stride': 1},
                 conv_param_6={'filter_num': 64, 'filter_size': 3, 'pad': 1, 'stride': 1},
                 hidden_size=50, output_size=10):
        pre_node_nums = np.array([1*3*3, 16*3*3, 16*3*3, 32*3*3, 32*3*3, 64*3*3, 64*4*4, hidden_size])
        weight_init_scale = np.sqrt(2.0 / pre_node_nums)

        # weights init
        self.params = {}
        pre_channel_num = input_dim[0]
        for idx, conv_param in enumerate([conv_param_1, conv_param_2, conv_param_3,
                                          conv_param_4, conv_param_5, conv_param_6]):
            self.params['w'+str(idx+1)] = weight_init_scale[idx] *\
                    np.random.randn(
                        conv_param['filter_num'],
                        pre_channel_num, conv_param['filter_size'],
                        conv_param['filter_size'])
            self.params['b'+str(idx+1)] = np.zeros(conv_param['filter_num'])
            pre_channel_num = conv_param['filter_num']
        self.params['w7'] = weight_init_scale[6] * np.random.randn(64*4*4, hidden_size)
        self.params['b7'] = np.zeros(hidden_size)
        self.params['w8'] = weight_init_scale[7] * np.random.randn(hidden_size, output_size)
        self.params['b8'] = np.zeros(output_size)

        # gen layers
        self.layers = []
        self.layers.append(Convolution(self.params['w1'], self.params['b1'], conv_param_1['stride'],
                                       conv_param_1['pad']))
        self.layers.append(Relu())
        self.layers.append(Convolution(self.params['w2'], self.params['b2'], conv_param_2['stride'],
                                       conv_param_2['pad']))
        self.layers.append(Relu())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Convolution(self.params['w3'], self.params['b3'], conv_param_3['stride'],
                                       conv_param_3['pad']))
        self.layers.append(Relu())
        self.layers.append(Convolution(self.params['w4'], self.params['b4'], conv_param_4['stride'],
                                       conv_param_4['pad']))
        self.layers.append(Relu())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Convolution(self.params['w5'], self.params['b5'], conv_param_5['stride'],
                                       conv_param_5['pad']))
        self.layers.append(Relu())
        self.layers.append(Convolution(self.params['w6'], self.params['b6'], conv_param_6['stride'],
                                       conv_param_6['pad']))
        self.layers.append(Relu())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Affine(self.params['w7'], self.params['b7']))
        self.layers.append(Relu())
        self.layers.append(Dropout(0.5))
        self.layers.append(Affine(self.params['w8'], self.params['b8']))
        self.layers.append(Dropout(0.5))
        self.last_layer = SoftmaxWithLoss()
    def __init__(self, input_dim=(1, 28, 28),
                 conv_param_1 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1},
                 conv_param_2 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1},
                 conv_param_3 = {'filter_num':32, 'filter_size':3, 'pad':1, 'stride':1},
                 conv_param_4 = {'filter_num':32, 'filter_size':3, 'pad':2, 'stride':1},
                 conv_param_5 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1},
                 conv_param_6 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1},
                 hidden_size=50, output_size=10):
        # 重みの初期化===========
        # 各層のニューロンひとつあたりが、前層のニューロンといくつのつながりがあるか(TODO:自動で計算する)
        pre_node_nums = np.array([1*3*3, 16*3*3, 16*3*3, 32*3*3, 32*3*3, 64*3*3, 64*4*4, hidden_size])
        weight_init_scales = np.sqrt(2.0 / pre_node_nums)  # ReLUを使う場合に推奨される初期値
        
        self.params = {} 
        pre_channel_num = input_dim[0]
        for idx, conv_param in enumerate([conv_param_1, conv_param_2, conv_param_3, conv_param_4, conv_param_5, conv_param_6]):
            self.params['W' + str(idx+1)] = weight_init_scales[idx] * np.random.randn(conv_param['filter_num'], pre_channel_num, conv_param['filter_size'], conv_param['filter_size'])
            self.params['b' + str(idx+1)] = np.zeros(conv_param['filter_num'])
            pre_channel_num = conv_param['filter_num']
        self.params['W7'] = weight_init_scales[6] * np.random.randn(64*4*4, hidden_size)
        self.params['b7'] = np.zeros(hidden_size)
        self.params['W8'] = weight_init_scales[7] * np.random.randn(hidden_size, output_size)
        self.params['b8'] = np.zeros(output_size)

        # レイヤの生成===========
        self.layers = []
        self.layers.append(Convolution(self.params['W1'], self.params['b1'], 
                           conv_param_1['stride'], conv_param_1['pad']))
        self.layers.append(Relu())
        self.layers.append(Convolution(self.params['W2'], self.params['b2'], 
                           conv_param_2['stride'], conv_param_2['pad']))
        self.layers.append(Relu())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Convolution(self.params['W3'], self.params['b3'], 
                           conv_param_3['stride'], conv_param_3['pad']))
        self.layers.append(Relu())
        self.layers.append(Convolution(self.params['W4'], self.params['b4'],
                           conv_param_4['stride'], conv_param_4['pad']))
        self.layers.append(Relu())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Convolution(self.params['W5'], self.params['b5'],
                           conv_param_5['stride'], conv_param_5['pad']))
        self.layers.append(Relu())
        self.layers.append(Convolution(self.params['W6'], self.params['b6'],
                           conv_param_6['stride'], conv_param_6['pad']))
        self.layers.append(Relu())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Affine(self.params['W7'], self.params['b7']))
        self.layers.append(Relu())
        self.layers.append(Dropout(0.5))
        self.layers.append(Affine(self.params['W8'], self.params['b8']))
        self.layers.append(Dropout(0.5))
        
        self.last_layer = SoftmaxWithLoss()
Beispiel #21
0
    def make_layers(self):
        # レイヤの生成
        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(
            self.params['W1'], self.params['b1'], 1,
            0)  # W1が畳み込みフィルタの重み, b1が畳み込みフィルタのバイアスになる
        self.layers['ReLU1'] = ReLU()
        self.layers['Pool1'] = MaxPooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['ReLU2'] = ReLU()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])

        self.last_layer = SoftmaxWithLoss()
Beispiel #22
0
    def forward(self, xs, ts):
        _, T, _ = xs.shape
        layers = []
        loss = 0

        for t in range(T):
            layer = SoftmaxWithLoss()
            loss += layer.forward(xs[:, t, :], ts[:, t])
            layers.append(layer)
        loss /= T

        self.cache = (layers, xs)
        return loss
Beispiel #23
0
    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
        # 重みの初期化
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) 
        self.params['b2'] = np.zeros(output_size)

        # レイヤの生成
        self.layers = OrderedDict() # 順番付きdict形式.
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = ReLU()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        self.lastLayer = SoftmaxWithLoss() # 出力層
Beispiel #24
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # 가중치 초기화
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # 계층 생성
        # layer0, layer1은 weight-sharing
        self.in_layer0 = MatMul(W_in)  ## 입력층은 윈도우 크기만큼 만들어야함, 인스턴스 생성.
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # 모든 가중치와 기울기를 리스트에 모음
        layers = [
            self.in_layer0, self.in_layer1, self.out_layer, self.loss_layer
        ]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # 인스턴스 변수에 단어의 분산 표현 저장
        self.word_vecs = W_in

    def forward(self, contexts, target):
        # 양옆 단어에 대한 x*Win을 batch만큼 수행. -> 해당단어가 중심단어에 관해 어느정도의 의미가 있는지를 나타내(분산표현)
        # -> one_hot으로 표현되어 matmul이 수행되므로 weight에서 해당 행이 분산표현 벡터(값)이 됨.
        h0 = self.in_layer0.forward(
            contexts[:, 0])  # (batch, 7) * (vocab_size(7), hidden)
        h1 = self.in_layer1.forward(
            contexts[:, 1])  # (bathc, 7) * (vocab_size, hidden)
        h = (h0 + h1) * 0.5  # 양 옆의 분산표현의 합.
        score = self.out_layer.forward(
            h)  # (batch,hidden) * ( hidden, vocab_size )
        # print(score)
        # print(target)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
Beispiel #25
0
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size

        W1 = 0.01 * np.random.randn(I, H)
        b1 = np.zeros(H)
        W2 = 0.01 * np.random.randn(H, O)
        b2 = np.zeros(O)

        self.layers = [Affine(W1, b1), Sigmoid(), Affine(W2, b2)]
        self.loss_layer = SoftmaxWithLoss()

        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads
Beispiel #26
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # 重みの初期化
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')
        W_in = np.array(
            [[-1.0655735, 1.3231287, -1.1051644, -1.1049938, -1.0685176],
             [1.1559865, 0.08719956, 1.1672966, 1.1607609, 1.1567391],
             [-0.7532327, 0.6444376, -0.76896185, -0.71775854, -0.7918966],
             [0.9111972, 1.9940354, 0.6837302, 0.89859486, 0.87255],
             [-0.78328615, 0.6444221, -0.7729693, -0.7400077, -0.80646306],
             [-1.058986, 1.3268483, -1.1123687, -1.1059289, -1.0616288],
             [1.1203294, -1.6394324, 1.2104743, 1.1509397,
              1.1612827]]).astype('f')

        # レイヤの生成
        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # 全ての重みと勾配をリストにまとめる
        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # メンバ変数に単語の分散表現を設定
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
Beispiel #27
0
class SimpleSkipGram:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # 가중치 초기화
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # 계층 생성
        # 입력층 1개
        self.in_layer = MatMul(W_in)
        # 출력층 1개
        self.out_layer = MatMul(W_out)
        # 맥락의 수만큼 손실 계층을 구한다
        self.loss_layer1 = SoftmaxWithLoss()
        self.loss_layer2 = SoftmaxWithLoss()

        # 모든 가중치와 기울기를 리스트에 모은다
        layers = [self.in_layer, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # 인스턴스 변수에 단어의 분산 표현을 저장한다
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h = self.in_layer.forward(target)
        s = self.out_layer.forward(h)

        l1 = self.loss_layer1.forward(s, contexts[:, 0])
        l2 = self.loss_layer2.forward(s, contexts[:, 1])

        loss = l1 + l2

        return loss

    def backward(self, dout=1):
        dl1 = self.loss_layer1.backward(dout)
        dl2 = self.loss_layer2.backward(dout)

        ds = dl1 + dl2

        dh = self.out_layer.backward(ds)
        self.in_layer.backward(dh)

        return None
Beispiel #28
0
    def __init__(self,
                 input_dim=(1, 28, 28),
                 conv_param={
                     'filter_num': 30,
                     'filter_size': 5,
                     'pad': 0,
                     'stride': 1
                 },
                 hidden_size=100,
                 output_size=10,
                 weight_init_std=0.01):
        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1]

        # 畳み込み層の出力サイズの計算
        conv_output_size = (input_size - filter_size +
                            2 * filter_pad) / filter_stride + 1
        pool_output_size = int(filter_num * (conv_output_size / 2) *
                               (conv_output_size))

        # 重みパラメータの初期化 (1: 畳み込み層、2: 全結合、3: 全結合)
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(
            filter_num, input_dim[0], filter_size, filter_size)
        self.params['b1'] = np.zeros(filter_num)
        self.params['W2'] = weight_init_std * np.random.randn(
            pool_output_size, hidden_size)
        self.params['b2'] = np.zeros(hidden_size)
        self.params['W3'] = weight_init_std * np.random.randn(
            hidden_size, output_size)
        self.params['b3'] = np.zeros(output_size)

        # レイヤの生成
        self.layers = OrderedDict()
        self.layers['Conv1'] = Convolution(self.params['W1'],
                                           self.params['b1'],
                                           conv_param['stride'],
                                           conv_param['pad'])
        self.layers['Relu1'] = Relu()
        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['Relu2'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])

        self.last_layer = SoftmaxWithLoss()
 def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
     # 初始化权重
     # self.params = {"W1": np.random.randn(input_size, hidden_size) / np.sqrt(input_size),
     #                "b1": np.zeros(hidden_size),
     #                "W2": np.random.randn(hidden_size, output_size) / np.sqrt(hidden_size),
     #                "b2": np.zeros(output_size)}
     self.params = {"W1": weight_init_std * np.random.randn(input_size, hidden_size),
                    "b1": np.zeros(hidden_size),
                    "W2": weight_init_std * np.random.randn(hidden_size, output_size),
                    "b2": np.zeros(output_size)}
     # 生成层
     self.layers = OrderedDict()
     self.layers["Affine1"] = Affine(self.params["W1"], self.params["b1"])
     self.layers["ReLU1"] = ReLU()
     self.layers["Affine2"] = Affine(self.params["W2"], self.params["b2"])
     self.lastLayer = SoftmaxWithLoss()
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # 重みの初期化
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # レイヤの作成
        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # すべての重みと勾配をリストにまとめる
        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads
        # メンバ変数に単語の分散表現を設定
        self.word_vecs = W_in

    def forward(self, contexts, target):
        print(contexts[:, 0])
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss