Esempio n. 1
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        self.word_vecs = W_in

    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # 重みの初期化
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # レイヤの作成
        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # すべての重みと勾配をリストにまとめる
        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads
        # メンバ変数に単語の分散表現を設定
        self.word_vecs = W_in

    def forward(self, contexts, target):
        print(contexts[:, 0])
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss
Esempio n. 3
0
class SimpleSkipGram:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        self.in_layer = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer1 = SoftmaxWithLoss()
        self.loss_layer2 = SoftmaxWithLoss()
        layers = [self.in_layer, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        self.word_vecs = W_in

    def forward(self, contexts, target):
        h = self.in_layer.forward(target)
        s = self.out_layer.forward(h)
        l1 = self.loss_layer1.forward(s, contexts[:, 0])
        l2 = self.loss_layer2.forward(s, contexts[:, 1])
        loss = l1 + l2
        return loss

    def backward(self, dout=1):
        dl1 = self.loss_layer1.backward(dout)
        dl2 = self.loss_layer2.backward(dout)
        ds = dl1 + dl2
        dh = self.out_layer.backward(ds)
        self.in_layer.backward(dh)
        return None
class SimpleCBOW:
    """
    Simple continuous bag-of-words.
    """
    def __init__(self, vocabulary_size, hidden_size):
        V, H = vocabulary_size, hidden_size

        # initialize weights
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # generate layers
        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # list all weights and gradient layers
        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []

        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # set distributed representation of words to variable
        self.word_vecs = W_in

    def forward(self, contexts, target):
        """
        :param contexts: dim 3 of numpy array
        :param target: dim2 of numpy array
        """
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        """
        Continuous bag-of-words (CBOW)
                0.5*da
        MatMul <-+                  vector  ----+
         W_in    |                              v
                 |     0.5*da                  Softmax
                 +-- [+] <- [x] <-- MatMul <-- With    <-- Loss
                 |           ^  da  W_out   ds Loss     1
                 |   0.5 ----+
        MatMul <-+
         W_in   0.5*da
        """
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
Esempio n. 5
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # 가중치 초기화
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # 계층 생성
        # layer0, layer1은 weight-sharing
        self.in_layer0 = MatMul(W_in)  ## 입력층은 윈도우 크기만큼 만들어야함, 인스턴스 생성.
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # 모든 가중치와 기울기를 리스트에 모음
        layers = [
            self.in_layer0, self.in_layer1, self.out_layer, self.loss_layer
        ]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # 인스턴스 변수에 단어의 분산 표현 저장
        self.word_vecs = W_in

    def forward(self, contexts, target):
        # 양옆 단어에 대한 x*Win을 batch만큼 수행. -> 해당단어가 중심단어에 관해 어느정도의 의미가 있는지를 나타내(분산표현)
        # -> one_hot으로 표현되어 matmul이 수행되므로 weight에서 해당 행이 분산표현 벡터(값)이 됨.
        h0 = self.in_layer0.forward(
            contexts[:, 0])  # (batch, 7) * (vocab_size(7), hidden)
        h1 = self.in_layer1.forward(
            contexts[:, 1])  # (bathc, 7) * (vocab_size, hidden)
        h = (h0 + h1) * 0.5  # 양 옆의 분산표현의 합.
        score = self.out_layer.forward(
            h)  # (batch,hidden) * ( hidden, vocab_size )
        # print(score)
        # print(target)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
Esempio n. 6
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # 重みの初期化
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')
        W_in = np.array(
            [[-1.0655735, 1.3231287, -1.1051644, -1.1049938, -1.0685176],
             [1.1559865, 0.08719956, 1.1672966, 1.1607609, 1.1567391],
             [-0.7532327, 0.6444376, -0.76896185, -0.71775854, -0.7918966],
             [0.9111972, 1.9940354, 0.6837302, 0.89859486, 0.87255],
             [-0.78328615, 0.6444221, -0.7729693, -0.7400077, -0.80646306],
             [-1.058986, 1.3268483, -1.1123687, -1.1059289, -1.0616288],
             [1.1203294, -1.6394324, 1.2104743, 1.1509397,
              1.1612827]]).astype('f')

        # レイヤの生成
        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # 全ての重みと勾配をリストにまとめる
        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # メンバ変数に単語の分散表現を設定
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
Esempio n. 7
0
class SimpleSkipGram:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # 가중치 초기화
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # 계층 생성
        # 입력층 1개
        self.in_layer = MatMul(W_in)
        # 출력층 1개
        self.out_layer = MatMul(W_out)
        # 맥락의 수만큼 손실 계층을 구한다
        self.loss_layer1 = SoftmaxWithLoss()
        self.loss_layer2 = SoftmaxWithLoss()

        # 모든 가중치와 기울기를 리스트에 모은다
        layers = [self.in_layer, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # 인스턴스 변수에 단어의 분산 표현을 저장한다
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h = self.in_layer.forward(target)
        s = self.out_layer.forward(h)

        l1 = self.loss_layer1.forward(s, contexts[:, 0])
        l2 = self.loss_layer2.forward(s, contexts[:, 1])

        loss = l1 + l2

        return loss

    def backward(self, dout=1):
        dl1 = self.loss_layer1.backward(dout)
        dl2 = self.loss_layer2.backward(dout)

        ds = dl1 + dl2

        dh = self.out_layer.backward(ds)
        self.in_layer.backward(dh)

        return None
Esempio n. 8
0
class SimpleSkipGram:
    def __init__(self, vocab_size, hidden_size):

        V, H = vocab_size, hidden_size

        # 重みの初期設定
        W_in = 0.01 * np.random.randn(V, H).astype("f")
        W_out = 0.01 * np.random.randn(H, V).astype("f")

        # 各レイヤを作る。
        self.in_layer = MatMul(W_in)
        # 予測すべきcontextの単語数分だけloss_layerを作成する必要がある
        self.out_layer = MatMul(W_out)
        self.loss_layer0 = SoftmaxWithLoss()
        self.loss_layer1 = SoftmaxWithLoss()

        # 全てのlayer,重み,勾配をリストにまとめる
        layers = [
            self.in_layer,
            self.out_layer,
            self.loss_layer0,
            self.loss_layer1,
        ]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # メンバ変数に単語の分散表現を設定
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h = self.in_layer.forward(target)
        score = self.out_layer.forward(h)
        loss0 = self.loss_layer0.forward(score, contexts[:, 0])
        loss1 = self.loss_layer1.forward(score, contexts[:, 1])
        loss = loss0 + loss1
        return loss

    def backward(self, dout=1):
        dl0 = self.loss_layer0.backward(dout)
        dl1 = self.loss_layer1.backward(dout)
        ds = dl0 + dl1
        da = self.out_layer.backward(ds)
        self.in_layer.backward(da)

        return None
Esempio n. 9
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):

        V, H = vocab_size, hidden_size

        # 重みの初期設定
        W_in = 0.01 * np.random.randn(V, H).astype("f")
        W_out = 0.01 * np.random.randn(H, V).astype("f")

        # 各レイヤを作る。
        # contextで使用する単語数分だけin_layerは作成する必要がある
        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # 全てのlayer,重み,勾配をリストにまとめる
        layers = [
            self.in_layer0, self.in_layer1, self.out_layer, self.loss_layer
        ]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # メンバ変数に単語の分散表現を設定
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        # 平均を取る過程で0.5をかけているため
        da *= 0.5
        self.in_layer0.backward(da)
        self.in_layer1.backward(da)

        return None
Esempio n. 10
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # 가중치 초기화
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # 계층 생성
        self.in_layer0 = MatMul(W_in)  ## 입력층은 윈도우 크기만큼 만들어야함
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # 모든 가중치와 기울기를 리스트에 모음
        layers = [
            self.in_layer0, self.in_layer1, self.out_layer, self.loss_layer
        ]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # 인스턴스 변수에 단어의 분산 표현 저장
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        # print(score)
        # print(target)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
Esempio n. 11
0
class SimpleSkipGram:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        W_in = tf.Variable(
            tf.random.normal((V, H), mean=0.0, stddev=0.01, dtype='float'))
        W_out = tf.Variable(
            tf.random.normal((H, V), mean=0.0, stddev=0.01, dtype='float'))

        self.in_layer = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer1 = SoftmaxWithLoss()
        self.loss_layer2 = SoftmaxWithLoss()

        layers = [
            self.in_layer, self.out_layer, self.loss_layer1, self.loss_layer2
        ]

        self.params = []
        self.grads = []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        self.word_vecs = W_in

    def forward(self, contexts, target):
        h = self.in_layer.forward(target)
        s = self.out_layer.forward(h)
        l1 = self.loss_layer1.forward(s, contexts[:, 0])
        l2 = self.loss_layer2.forward(s, contexts[:, 1])
        loss = l1 + l2
        return loss

    def backward(self, dout=1):
        dl1 = self.loss_layer1.backward(dout)
        dl2 = self.loss_layer2.backward(dout)
        ds = dl1 + dl2
        dh = self.out_layer.backward(ds)
        self.in_layer.backward(dh)

        return None
Esempio n. 12
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        # 인수로 어휘 수와 은닉층의 뉴런 수를 받는다.
        V, H = vocab_size, hidden_size

        # 가중치 초기화
        W_in = 0.01 * np.random.randn(V, H).astype('f')  # 32비트 부동소수점 수
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # 계층 생성
        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)  # W_in은 contexts의 개수만큼 생성 (즉, window_size*2 만큼 생성)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # 모든 가중치와 기울기를 리스트에 모은다.
        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # 인스턴스 변수에 단어의 분산 표현을 저장한다.
        self.word_vecs = W_in

    def forward(self, contexts, target):  # 인수로 맥락과 타깃을 받아서 loss를 반환
        # contexts.shape = (6, 2, 7), target.shape = (6, 7)
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
class SimpleSkipGram:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # initialize weight
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # create layer
        self.in_layer = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer1 = SoftmaxWithLoss()
        self.loss_layer2 = SoftmaxWithLoss()

        # combine all weights and grads into list
        layers = [self.in_layer, self.out_layer]
        self.params, self.grads = [], []

        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # set word vector to member variable
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h = self.in_layer.forward(target)
        s = self.out_layer.forward(h)
        l1 = self.loss_layer1.forward(s, contexts[:, 0])
        l2 = self.loss_layer2.forward(s, contexts[:, 1])
        loss = l1 + l2
        return loss

    def backward(self, dout=1):
        dl1 = self.loss_layer1.backward(dout)
        dl2 = self.loss_layer2.backward(dout)
        ds = dl1 + dl2
        dh = self.out_layer.backward(ds)
        self.in_layer.backward(dh)
        return None
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # initialize weights
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # create layer
        self.in_layer_0 = MatMul(W_in)
        self.in_layer_1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # combine all weights and grads into list
        layers = [self.in_layer_0, self.in_layer_1, self.out_layer]
        self.params, self.grads = [], []

        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # set word vector into member variable
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h0 = self.in_layer_0.forward(contexts[:, 0])
        h1 = self.in_layer_1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer_1.backward(da)
        self.in_layer_0.backward(da)
        return None
class SimpleSkipGram:
    def __init__(self, vocabulary_size, hidden_size):
        V, H = vocabulary_size, hidden_size

        # initialize weights
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # generate layers
        self.in_layer = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer1 = SoftmaxWithLoss()
        self.loss_layer2 = SoftmaxWithLoss()

        # list all weights and gradiants
        layers = [self.in_layer, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # set distributed representation of words to variable
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h = self.in_layer.forward(target)
        s = self.out_layer.forward(h)
        l1 = self.loss_layer1.forward(s, contexts[:, 0])
        l2 = self.loss_layer2.forward(s, contexts[:, 1])
        loss = l1 + l2
        return loss

    def backward(self, dout=1):
        dl1 = self.loss_layer1.backward(dout)
        dl2 = self.loss_layer2.backward(dout)
        ds = dl1 + dl2
        dh = self.out_layer.backward(ds)
        self.in_layer.backward(dh)
        return None
Esempio n. 16
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # initialize of weights
        W_in = 0.01 * np.random.randn(V, H).astype("f")
        W_out = 0.01 * np.random.randn(H, V).astype("f")

        # make layers
        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # conclude all of weights & grads.
        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # set word representations @ member vars.
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = 0.5 * (h0 + h1)
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer0.backward(da)
        self.in_layer1.backward(da)
        return None
Esempio n. 17
0
class SimpleSkipGram:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # refresh weight
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # make class
        self.in_layer = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer1 = SoftmaxWithLoss()
        self.loss_layer2 = SoftmaxWithLoss()

        # put all weights and gradients in one list
        layers = [self.in_layer, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # save a word's variance in instance variable
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h = self.in_layer.forward(target)
        s = self.out_layer.forward(h)
        l1 = self.loss_layer1.forward(s, contexts[:, 0])
        l2 = self.loss_layer2.forward(s, contexts[:, 1])
        loss = l1 + l2
        return loss

    def backward(self, dout=1):
        dl1 = self.loss_layer1.backward(dout)
        dl2 = self.loss_layer2.backward(dout)
        ds = dl1 + dl2
        dh = self.out_layer.backward(ds)
        self.in_layer.backward(dh)
        return None
Esempio n. 18
0
class SimpleCBOW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # refresh weight
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # make class
        self.in_layer0 = MatMul(W_in)
        self.in_layer1 = MatMul(W_in)
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # put all weights and graditents in one list
        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # save a word's variance in instance variable
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) * 0.5
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer1.backward(da)
        self.in_layer0.backward(da)
        return None
class SimpleCBoW:
    def __init__(self, vocab_size, hidden_size):
        V, H = vocab_size, hidden_size

        # 重みの初期化
        W_in = 0.01 * np.random.randn(V, H).astype('f')
        W_out = 0.01 * np.random.randn(H, V).astype('f')

        # レイヤの生成
        self.in_layer0 = MatMul(W_in)  # Window sizeに依存 : ここでは1
        self.in_layer1 = MatMul(W_in)  # Window sizeに依存 : ここでは1
        self.out_layer = MatMul(W_out)
        self.loss_layer = SoftmaxWithLoss()

        # すべての重みと勾配をリストにまとめる
        layers = [self.in_layer0, self.in_layer1, self.out_layer]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads

        # メンバ変数に単語の分散表現を設定する
        self.word_vecs = W_in

    def forward(self, contexts, target):
        h0 = self.in_layer0.forward(contexts[:, 0])
        h1 = self.in_layer1.forward(contexts[:, 1])
        h = (h0 + h1) / 2
        score = self.out_layer.forward(h)
        loss = self.loss_layer.forward(score, target)
        return loss

    def backward(self, dout=1):
        ds = self.loss_layer.backward(dout)
        da = self.out_layer.backward(ds)
        da *= 0.5
        self.in_layer0.backward(da)
        self.in_layer1.backward(da)
        return None
def test_affine():

    # 簡素な全結合層
    c = np.array([[1, 0, 0, 0, 0, 0, 0]])  # 入力
    W = np.random.randn(7, 3)  # 重み
    h = np.dot(
        c, W)  # 中間ノード @Note 単語ベクトルはone-hotなので、やっていることは重みWからある行の要素を抜き出す事に等しい
    print(f"h: {h}")

    # 1章で作成したMatMulレイヤーで処理する
    layer = MatMul(W)
    h2 = layer.forward(c)
    print(f"h2: {h2}")

    print("-" * 10)
Esempio n. 21
0
import sys, os
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
import numpy as np
from common.layers import MatMul

c0 = np.array([[1, 0, 0, 0, 0, 0, 0]])
c1 = np.array([[0, 0, 1, 0, 0, 0, 0]])

W_in = np.random.randn(7, 3)
W_out = np.random.randn(3, 7)

in_layer0 = MatMul(W_in)
in_layer1 = MatMul(W_in)
out_layer = MatMul(W_out)

h0 = in_layer0.forward(c0)
h1 = in_layer1.forward(c1)
h = 0.5 * (h0 + h1)
s = out_layer.forward(h)
print(s)
Esempio n. 22
0
class Transformer(BaseModel):
    def __init__(self,
                 vocab_size,
                 wordvec_size,
                 head_size,
                 num_heads,
                 num_encoders=3,
                 num_decoders=3):
        S, D, H = vocab_size, wordvec_size, head_size
        rn = np.random.randn

        self.num_encoders = num_encoders
        self.num_decoders = num_decoders
        self.params, self.grads = [], []

        # Double embed (encoder, decoder)
        embed_W1 = (rn(S, D) / 100).astype('f')
        self.e_embed = PositionalEmbedding(embed_W1)
        self.params += self.e_embed.params
        self.grads += self.e_embed.grads

        self.encoders, self.decoders = [], []
        for _ in range(num_encoders):
            te = TransformerEncoder(wordvec_size=D,
                                    head_size=H,
                                    num_heads=num_heads)
            self.encoders.append(te)
            self.params += te.params
            self.grads += te.grads

        for _ in range(num_decoders):
            td = TransformerDecoder(wordvec_size=D,
                                    head_size=H,
                                    num_heads=num_heads)
            self.decoders.append(td)
            self.params += td.params
            self.grads += td.grads

        # 편의를 위해 linear 변수에 따로 weight 저장
        self.linear = MatMul((rn(D, S) / np.sqrt(D)).astype('f'))
        self.params += self.linear.params
        self.grads += self.linear.grads

        # TimeSoftmaxWithLoss도 params와 grads가 있으나 사용되지 않기때문에 생략
        self.softmax = TimeSoftmaxWithLoss(ignore_label=-1)

    def forward(self, xs, ts):
        # xs->(N,T) / eout, dout, ts->N,(T,D)
        eout = self.e_embed.forward(xs)
        dout = self.e_embed.forward(ts)
        N, T, D = eout.shape

        for encoder in self.encoders:
            eout = encoder.forward(eout)
        for decoder in self.decoders:
            ts = decoder.forward(dout, eout)

        ts = ts.reshape(N * T, D)
        # score->(N*T,S)
        score = self.linear.forward(ts)
        _, S = score.shape
        # 순서 주의 score는 linear된 2차원 행렬, xs는 임베딩되기전 2차원 행렬
        # loss->(N*T,1)
        score = score.reshape(N, T, S)
        loss = self.softmax.forward(score, xs)
        return loss

    def backward(self, dout=1):
        # dout->N,(T,S)
        dout = self.softmax.backward(dout)
        N, T, S = dout.shape
        dout = dout.reshape(N * T, S)
        # dout->(N*T,S) / self.linear.W->(D,S)
        dout = self.linear.backward(dout)
        # dout->(N*T,D)
        _, D = dout.shape
        dout = dout.reshape(N, T, D)

        # ddout->N,(T,D)
        for i in range(self.num_decoders - 1, 0, -1):
            _, dout = self.decoders[i].backward(dout)
        ddout, dout = self.decoders[0].backward(dout)

        # dout->N,(T,D)
        for i in range(self.num_encoders - 1, -1, -1):
            ddout = self.encoders[i].backward(ddout)

        self.e_embed.backward(ddout)

    def generate(self, xs, type='GPT'):
        sampled = []
        # 'GPT'는 transformer의 decoder만 이용
        if type == 'GPT':
            # xs->(T,), out->(T,D)
            out = self.e_embed.forward(xs)
            # out->(1,T,D)
            # out = out[np.newaxis,:]
            for i in range(self.num_decoders):
                out = self.decoders[i].generate(out)
            # out->(1,T,D)
            N, T, D = out.shape
            out = out.reshape(N * T, D)
            # score->(1,T,S)
            score = self.linear.forward(out)

            sampled = np.argmax(score, axis=-1).flatten()

        # 'BERT'는 transformer의 encoder만 이용
        # 하지만 아직 masking 처리가 되어있지 않은 구조고
        # positional embedding 이외에 segment embedding이 추가되어야함
        # 따라서 현재 이 코드에서 BERT는 사용하는 의미가 없으며 GPT를 이용해야함
        elif type == 'BERT':
            # xs->(T,), out->(T,D)
            out = self.e_embed.forward(xs)
            # out->(1,T,D)
            out = out[np.newaxis, :]
            for i in range(self.num_encoders):
                out = self.encoders[i].generate(out)

            # decoder의 linear를 그대로 이용하기로 하자
            N, T, D = out.shape
            out = out.reshape(N * T, D)
            # score->(1,T,S)
            score = self.linear.forward(out)

            sampled = np.argmax(score, axis=-1).flatten()
        else:
            print('invalid generate type')

        return sampled
Esempio n. 23
0
import sys
sys.path.append('..')

import numpy as np
from common.layers import MatMul

c = np.array([1, 0, 0, 0, 0, 0, 0])
W = np.random.randn(7, 3)
layer = MatMul(W)
h = layer.forward(c)
print(h)
def test_word2vec():
    """word2vecにつかうNNモデルとして、
    continuous bag-of-wards(CBOW)が有名.

    有名なword2vec用NNモデル
    ①CBOWモデル
    ②skip-gramモデル

    ポイント
    ・分散表現を獲得したい単語周辺のコンテキストの数だけ、入力層がある
    e.g 注目単語の前後1単語を使って中間の注目単語を推論する場合、入力層は2つある.

    `goodbye`の分散表現ベクトルを獲得したい場合
    corpus = [`you`, `say`, `goodbye`, `and`, `I`, `hello`, `.`]
    単語ベクトル(one-hot) = [○, ○, ○, ○, ○, ○, ○]
    You say [goodbye] and I say hello .
    コンテキスト=[`say`, `and`]
    invec = [[0, 1, 0, 0, 0, 0, 0],
             [ 0, 0, 0, 1, 0, 0, 0]]

    中間層の出力 = invec @ W_73 = out_23 = ([[h1],[h2]]) (2, 3) --- 行方向に平均する ---> (1, 3) = 0.5 * (h1 + h2)

    出力層 = 中間層の出力 @ W_37 (1, 7) = 各単語(コーパス)の出現確率(スコア) // これは注目単語に対応している (※単語の分散表現ではない)
    教師データは`goodbye`の単語ベクトル(one-hot) = [0, 1, 0, 0, 0, 0, 0]

    注目単語に対応する重みW_73の行ベクトルが分散表現ベクトルとなる
    invec = [[0, 1, 0, 0, 0, 0, 0],  -> `say`
             [ 0, 0, 0, 1, 0, 0, 0]] -> `and`

    W_73 = [[○, ○, ○],
            [□, □, □],
            [△, △, △],
            [☆, ☆, ☆],
            [※, ※, ※],
            [◎, ◎, ◎],
            [✕, ✕, ✕]]

    コーパスに存在する単語が人間にとって理解できないベクトルに変換された(エンコード)
    `say`の分散表現ベクトル = [□, □, □]
    `and`の分散表現ベクトル = [☆, ☆, ☆]

    中間層の出力から出力層の表現(単語ベクトル)に変換する作業もある(デコード)

    出力側の重みも単語の意味をエンコードしてると考える
    W_37 = [[○, □, △, ☆, ※, ◎, ✕],
            [○, □, △, ☆, ※, ◎, ✕],
            [○, □, △, ☆, ※, ◎, ✕]]

    単語の分散表現重みを使うパターン
    ①W_73(入力側重み)のみ使う -> skip-gramモデル, CBOWモデル
    ②W_37(出力側重み)のみ使う 
    ③W_73とW_37両方使う
    """

    # サンプルのコンテキストデータ(注目単語の周辺2語)
    c0 = np.array([1, 0, 0, 0, 0, 0, 0])
    c1 = np.array([0, 0, 1, 0, 0, 0, 0])

    # 重みの初期化
    W_in = np.random.randn(7, 3)
    W_out = np.random.randn(3, 7)

    # レイヤの生成
    in_layer0 = MatMul(W_in)
    in_layer1 = MatMul(W_in)
    out_layer = MatMul(W_out)

    # 順伝搬
    h0 = in_layer0.forward(c0)
    h1 = in_layer1.forward(c1)
    h = 0.5 * (h0 + h1)
    s = out_layer.forward(h)

    print(f"s: {s}")
Esempio n. 25
0
import sys

sys.path.append('..')
import numpy as np
from common.layers import MatMul

#サンプルのコンテキストデータ
c0 = np.array([[1, 0, 0, 0, 0, 0, 0]])  #you
c1 = np.array([[0, 0, 1, 0, 0, 0, 0]])  #goodbye

#重みの初期化
W_in = np.random.randn(7, 3)
W_out = np.random.randn(3, 7)

#レイヤの初期化
in_layer0 = MatMul(W_in)
in_layer1 = MatMul(W_in)
out_layer = MatMul(W_out)

#順伝播
h0 = in_layer0.forward(c0)
h1 = in_layer0.forward(c1)
h = 0.5 * (h0 + h1)
s = out_layer.forward(h)

print(s)