Esempio n. 1
0
 def visActProb(self):
     # negative phase
     super(DiscriminativeRBM,self).visActProb()
     self.v.apply_sigmoid()
     cm.dot(self.cW, self.h, target = self.c)
     self.c.add_col_vec(self.cb)
     softmax(self.c)
Esempio n. 2
0
 def test_softmax(self):
     np.testing.assert_array_almost_equal(
         softmax(np.array([[1001, 1002], [3, 4]])),
         np.array([[0.26894142, 0.73105858], [0.26894142, 0.73105858]]))
     np.testing.assert_array_almost_equal(
         softmax(np.array([[-1001, -1002]])),
         np.array([[0.73105858, 0.26894142]]))
     np.testing.assert_array_almost_equal(
         softmax(np.array([3, 4])), np.array([0.26894142, 0.73105858]))
    def getError(self, data, labels, batch):
#        print 'get error'
        batchdata = data[:,batch*self.batchsize:(batch+1)*self.batchsize]
        batchlabels = labels[batch*self.batchsize:(batch+1)*self.batchsize]
        batchtargets = self.getTargets(batchlabels)
        results = self.forwardProp(batchdata)
        softmax(results)
        targets = cm.CUDAMatrix(batchtargets)
        results.subtract(targets)
        return results
Esempio n. 4
0
 def __call__(self, X):
     #A = T.exp(T.dot(T.exp(X[:-1]), self.w_trans)) + self.eps
     #B = T.sum(T.gammaln(A), axis=-1) - T.gammaln(T.sum(A, axis=-1))
     #L = T.sum((A-1)*X[1:], axis=-1) - B
     A = softmax(T.dot(T.exp(X[:-1]), self.w_trans))
     L = T.sum(A*X[1:], axis=-1)
     #A_init = T.exp(self.w_init) + self.eps
     #B_init = T.sum(T.gammaln(A_init)) - T.gammaln(T.sum(A_init))
     #L_init = T.sum((A_init-1)*X[0], axis=-1) - B_init
     A_init = softmax(self.w_init)
     L_init = T.sum(A_init*X[0], axis=-1)
     return T.concatenate([T.shape_padleft(L_init), L], axis=0)
Esempio n. 5
0
    def build(self):
        print 'building rnn cell...'
        hidden_layer = RNN(self.rng,
                           self.n_input,
                           self.n_hidden,
                           self.n_batch,
                           self.x,
                           self.Er,
                           self.Ec,
                           self.x_mask_r,
                           self.x_mask_c,
                           is_train=self.is_train,
                           p=self.p)
        print 'building softmax output layer...'
        [h_r, h_c] = hidden_layer.activation

        output_layer = softmax(self.n_hidden, self.cluster_num,
                               self.in_cluster_num, h_r, h_c)
        cost_r = self.categorical_crossentropy(output_layer.activation_r,
                                               self.y[:, :, 0])
        cost_c = self.categorical_crossentropy(output_layer.activation_c,
                                               self.y[:, :, 1])
        cost = cost_r + cost_c
        self.params = [
            self.Er,
            self.Ec,
        ]
        self.params += hidden_layer.params
        self.params += output_layer.params

        lr = T.scalar('lr')
        gparams = [T.clip(T.grad(cost, p), -10, 10) for p in self.params]
        updates = self.optimizer(self.params, gparams, lr)

        self.train = theano.function(
            inputs=[
                self.x, self.x_mask_r, self.x_mask_c, self.y, self.y_mask,
                self.n_batch, lr
            ],
            outputs=[cost],
            updates=updates,
            givens={self.is_train: np.cast['int32'](1)})

        self.getNLL = theano.function(
            inputs=[self.x, self.x_mask_r, self.x_mask_c, self.n_batch],
            outputs=[output_layer.activation_r, output_layer.activation_c],
            givens={self.is_train: np.cast['int32'](0)})

        self.predict = theano.function(
            inputs=[self.x, self.x_mask_r, self.x_mask_c, self.n_batch],
            outputs=[output_layer.predict_r, output_layer.predict_c],
            givens={self.is_train: np.cast['int32'](0)})

        self.test = theano.function(
            inputs=[
                self.x, self.x_mask_r, self.x_mask_c, self.y, self.y_mask,
                self.n_batch
            ],
            outputs=cost,
            givens={self.is_train: np.cast['int32'](0)})
Esempio n. 6
0
def forward(data, label, params, dimensions):
    """
    runs a forward pass and returns the probability of the correct word for eval.
    label here is an integer for the index of the label.
    This function is used for model evaluation.
    """
    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    params[ofs:ofs + Dx * H]
    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    # Compute the probability
    ### YOUR CODE HERE: forward propagation
    z1 = np.dot(data, W1) + b1
    h = sigmoid(z1)
    z2 = np.dot(h, W2) + b2
    y_hat = softmax(z2)
    # J = -np.sum(label*np.log(y_hat))
    J = -np.sum(np.log(y_hat) * label)
    return {"z1": z1, "h": h, "z2": z2, "y_hat": y_hat, "J": J}
Esempio n. 7
0
    def predict(self, X):
        """
    Use the trained weights of this two-layer network to predict labels for
    data points. For each data point we predict scores for each of the C
    classes, and assign each data point to the class with the highest score.

    Inputs:
    - X: A numpy array of shape (N, D) giving N D-dimensional data points to
      classify.

    Returns:
    - y_pred: A numpy array of shape (N,) giving predicted labels for each of
      the elements of X. For all i, y_pred[i] = c means that X[i] is predicted
      to have class c, where 0 <= c < C.
    """
        y_pred = None

        ###########################################################################
        # TODO: Implement this function; it should be VERY simple!                #
        ###########################################################################
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        N, D = X.shape

        h1 = np.dot(X, self.params['W1']) + self.params['b1']

        # Compute the forward pass
        scores_1 = np.maximum(0, h1)

        scores = np.dot(scores_1, self.params['W2']) + self.params['b2']
        ###########################################################################
        #                              END OF YOUR CODE                           #
        ###########################################################################

        return np.argmax(softmax(scores), axis=1)
Esempio n. 8
0
def forward_backward_prop(data, labels, params, dimensions):
    ofs = 0
    Dx, H, Dy = dimensions

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    z1 = np.dot(
        data,
        W1) + b1  # according to broadcast rule, b1 will extend to M dimensions
    h = sigmoid(z1)
    z2 = np.dot(h, W2) + b2
    y_hat = softmax(z2)

    cost = -np.sum(np.log(y_hat[labels == 1])) / data.shape[0]
    d3 = (y_hat - labels) / data.shape[0]
    gradW2 = np.dot(h.T, d3)
    gradb2 = np.sum(d3, 0, keepdims=True)
    dh = np.dot(d3, W2.T)
    grad_h = sigmoid_grad(h) * dh
    gradW1 = np.dot(data.T, grad_h)
    gradb1 = np.sum(grad_h, 0)

    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return y_hat, cost, grad
Esempio n. 9
0
  def predict(self, X):
    """
    Use the trained weights of this two-layer network to predict labels for
    data points. For each data point we predict scores for each of the C
    classes, and assign each data point to the class with the highest score.

    Inputs:
    - X: A numpy array of shape (N, D) giving N D-dimensional data points to
      classify.

    Returns:
    - y_pred: A numpy array of shape (N,) giving predicted labels for each of
      the elements of X. For all i, y_pred[i] = c means that X[i] is predicted
      to have class c, where 0 <= c < C.
    """
    y_pred = None

    ###########################################################################
    # TODO: Implement this function; it should be VERY simple!                #
    ###########################################################################
    temp1 = np.maximum(X.dot(self.params['W1'])+self.params['b1'],0)
    temp2 = temp1.dot(self.params['W2']+self.params['b2'])
    y_pred = np.argmax(softmax.softmax(temp2),axis=1)
    ###########################################################################
    #                              END OF YOUR CODE                           #
    ###########################################################################

    return y_pred
Esempio n. 10
0
def forward(data, label, params, dimensions):
    """
    runs a forward pass and returns the probability of the correct word for eval.
    label here is an integer for the index of the label.
    This function is used for model evaluation.
    """
    # Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    params[ofs:ofs + Dx * H]
    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    # Compute the probability
    # YOUR CODE HERE: forward propagation
    z1 = np.dot(data, W1) + b1
    h = sigmoid(z1)
    z2 = np.dot(h, W2) + b2
    y_guess = softmax(z2)
    # END YOUR CODE

    return y_guess.T[label]
Esempio n. 11
0
def forward(data, label, params, dimensions):
    """
    runs a forward pass and returns the probability of the correct word for eval.
    label here is an integer for the index of the label.
    This function is used for model evaluation.
    """
    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    params[ofs:ofs + Dx * H]
    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    # Compute the probability
    z = data.dot(W1) + b1
    h = sigmoid(z)
    theta = h.dot(W2) + b2
    y_hat = softmax(theta)

    return y_hat[0, label]
Esempio n. 12
0
def SoftMaxLoss(x, y):
    """
  Computes the loss and gradient with the respect to the input for softmax classfier.

  Args:
  x: Input data.
    y: Labels of data. 

  Returns:
    loss: Scalar softmax loss.
    dx: Gradient of the loss with the respect to the input x.

  """
    ########################################################################################
    # TODO:                                                                                #
    # Compute softmax loss on input x and y and store it in loss variable. Compute gradient#
    # of the loss with respect to the input and store it in dx variable.                   #
    ########################################################################################
    logp = softmax.softmax(x)

    T = np.zeros(x.shape)

    for i in range(len(y)):

        T[i, y[i]] = 1

    loss = -(T * logp).sum() / x.shape[0]

    dx = -(T - np.exp(logp))

    ########################################################################################
    #                              END OF YOUR CODE                                        #
    ########################################################################################

    return loss, dx
Esempio n. 13
0
def apply_cnn(images, labels):

    analyzed = 0
    detected = 0

    for idx, img in enumerate(images):
        conv_one_out = c_r.conv_relu_forward(img, c.conv1_weights,
                                             c.conv1_biases)

        mp_one_out = mp.maxpool_forward(conv_one_out)

        conv_two_out = c_r.conv_relu_forward(mp_one_out, c.conv2_weights,
                                             c.conv2_biases)

        mp_two_out = mp.maxpool_forward(conv_two_out)

        conv_three_out = c_r.conv_relu_forward(mp_two_out, c.conv3_weights,
                                               c.conv3_biases)

        mp_three_out = mp.maxpool_forward(conv_three_out)

        reshaped_out = mp_three_out.reshape(1, -1)

        fc_output = fc.fully_connected_forward(reshaped_out, c.local3_weights,
                                               c.local3_biases)
        output = s.softmax(fc_output)
        analyzed = analyzed + 1
        assumption = np.nanargmax(output)
        if (assumption == labels[idx]):
            detected = detected + 1

        print('# Analyzed: ', analyzed)
        print('# Detected: ', detected)
        print('# Rate: ', (detected / analyzed) * 100, '%')
Esempio n. 14
0
def forward_backward_prop(data, labels, params, dimensions):
    """
    激活函数为sigmoid的两层神经网络的前向和后向传播

    前向传播,代价函数为交叉熵函数,后向传播计算所有参数的梯度.

    参数:
    data -- 维度为(M x Dx)的矩阵, 每行代表一个样本.
    labels -- 维度为(M x Dy)的矩阵, 每行是一个one-hot向量.
    params -- 模型的权重
    dimensions -- 元组数据包括,输入维度, 隐藏层神经元的数量,输出维度
    """

    ### 设置网络权重
    ofs = 0  # 用于提取权重,初始化为0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2]
                 )  #输入维度, 隐藏层神经元的数量,输出维度

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))  # 输入层权重W1,维度(Dx, H)
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))  # 输入层权重b1,维度(1, H)
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))  # 隐藏层权重W2,维度(H, Dy)
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))  # 隐藏层权重b2,维度(1, Dy)

    ### 前向传播
    M = np.shape(data)[0]

    z1 = np.zeros([M, H])  # 初始化z1,维度(M, H)
    z1 = np.dot(data, W1) + b1  # 计算z1, 维度(M, H)
    g1 = sigmoid(z1)  # 计算g1, 维度(M, H)

    z2 = np.zeros([M, Dy])  # 初始化z2, 维度(M, Dy)
    z2 = np.dot(g1, W2) + b2  # 计算z2, 维度(M, Dy)
    g2 = softmax(z2)  # 计算g2也就是输出, 维度(M, Dy)

    cost = -np.sum(labels * np.log(g2))  # 计算代价函数,交叉熵

    ###后向传播
    dW1 = data.T  # z1对于W1的梯度
    db1 = np.ones([1, M])  # z1对于b1的梯度
    dz1 = sigmoid_grad(g1)  # g1对于z1的梯度
    dg1 = W2.T  # z2对于g1的梯度
    dz2 = g2 - labels  # 代价函数对于z2的导数

    dW2 = g1.T  # z2对于W2的导数
    db2 = np.ones([1, M])  # z2对于b2的导数

    gradW1 = np.dot(dW1, np.dot(dz2, dg1) * dz1)  # 利用链式法则计算代价对于W1的导数
    gradb1 = np.dot(db1, np.dot(dz2, dg1) * dz1)  # 利用链式法则计算代价对于b1的导数

    gradW2 = np.dot(dW2, dz2)  # 利用链式法则计算代价对于W2的导数
    gradb2 = np.dot(db2, dz2)  # 利用链式法则计算代价对于b2的导数

    ### 保存梯度
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Esempio n. 15
0
File: charrnn.py Progetto: hhcho/rnn
 def __init__(self, n_in, n_out, layers, decoder=linear.Linear, itype='int32'
              , solver=solvers.RMSprop(0.01)):
     self.data = T.matrix(dtype=itype)
     self.x = self.data[:-1] # T.matrix(dtype=itype)
     self.y = self.data[1:] # T.matrix(dtype=itype)
     self.mask = T.matrix(dtype='int32')
     self.weights = []
     k,b = self.x.shape
     y_layer = self.x
     self.y_layers = []
     m = n_in
     for n in layers:
         layer = lstm.LSTM(m, n)
         self.weights.append(layer.weights)
         y0 = T.zeros((b, n))
         c0 = T.zeros((b, n))
         y_layer, _ = layer.scanl(y0, c0, y_layer)
         self.y_layers.append(y_layer)
         m = n
     decode = decoder(m, n_out)
     self.weights.append(decode.weights)
     yh = decode(y_layer)
     self.yh = softmax.softmax(yh)
     self.loss_t = T.sum(crossent.crossent(self.yh, self.y)*self.mask[1:])
     self.correct = T.sum(T.eq(T.argmax(self.yh, axis=2), self.y)*self.mask[1:])
     self.count = T.sum(self.mask[1:])
     self.solver = solver
     #compile theano functions
     self._loss = theano.function([self.data, self.mask], [self.loss_t, self.correct, self.count])
     self._activations = theano.function([self.data], self.y_layers+[self.yh], givens={self.x:self.data})
Esempio n. 16
0
def forward_prop(X_in, W1, b1, W2, b2):
    X_in = activate(X_in, W1, b1)  # Select Row 0 i.e. Data Point 0  => 1*H
    X_in = sigmoid(
        X_in)  # Compute the Output of the Hidden Nodes of the Layer => 1*H
    X_in = activate(X_in, W2,
                    b2)  # Computes next Layer (in this Case Final Layer)
    return softmax(X_in)
Esempio n. 17
0
    def evaluate(self):
        print("evaluating...")
        total = 0
        correct = 0
        num = 0
        for datas, labels in self.test_loader:
            num += 1
            datas = Variable(datas.type(dtype1))
            outputs = self.model(datas)
            outputs = outputs.type(torch.LongTensor)
            temp = outputs.data.numpy()
            s = softmax(temp)
            # print(s)

            if abs(s[0][0] - s[0][1]) > 0.5:
                labels = labels.type(torch.LongTensor)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum()
            if num % 1000 == 0:
                print("%d test samples have been evaluated." % num)
        acc = 100 * correct / total
        print("total: %d" % total)
        self.writer.add_scalar('data/accuracy', acc, self.step)
        if self.save_log == True:
            with open(record_file, "a+") as f:
                print("Accuracy: %.4f %%" % acc, file=f)
        else:
            print("Accuracy: %.4f %%" % acc)
        self.save_model(self.interval)
    def forward(self, x, t, batch_size):
        self.t = t
        self.y = Softmax.softmax(x, batch_size)
        self.loss = Min_batch_cross_entropy_error.cross_entropy_error(
            self.y, self.t)

        return self.loss  #这是单个标准loss而不是整个batch_size的
Esempio n. 19
0
def forwardpass(X, W1, b1, W2, b2, activation = "sigmoid"):
	if(activation == "sigmoid"):
		z1 = 1 / (1 + exp(-X.dot(W1) - b1))
	else:
		z1 = tanh(X.dot(W1) + b1)
	z2 = z1.dot(W2) + b2
	ret = softmax(z2)
	return ret, z1
Esempio n. 20
0
 def predict(self,x):
     w1,w2=self.params['W1'],self.params['W2']
     b1,b2=self.params['b1'],self.params['b2']
     
     a1=np.dot(x,w1)+b1
     z1=sigmoid(a1)
     a2=np.dot(z1,w2)+b2
     y=softmax(a2)
     return y
Esempio n. 21
0
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.

    Arguments:
    data -- M x Dx matrix, where each row is a training example.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation
    x = data
    y = labels
    h = sigmoid(np.matmul(x, W1) + b1)
    y_hat = softmax(np.matmul(h, W2) + b2)
    cost = -np.sum(y * np.log2(y_hat))
    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation

    #calc grads
    y_diff = y_hat - y

    gradW2 = np.transpose(np.matmul(np.transpose(y_diff), h))
    gradb2 = np.expand_dims(np.sum((y_diff), axis=0), axis=0)

    gradW1 = np.matmul(np.transpose(x),
                       np.matmul((y_diff), np.transpose(W2)) * sigmoid_grad(h))
    gradb1 = np.expand_dims(np.sum(np.matmul(
        (y_diff), np.transpose(W2)) * sigmoid_grad(h),
                                   axis=0),
                            axis=0)

    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Esempio n. 22
0
File: neural.py Progetto: oriyor/NLP
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.

    Arguments:
    data -- M x Dx matrix, where each row is a training example.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation

    z1 = np.dot(data, W1) + b1  # z is an M x H matrix, row for each batch sample
    h = sigmoid(z1)  # h is also an M x H matrix, apply sigmoid on each matrix element
    z2 = np.dot(h, W2) + b2  # z2 is an M x Dy matrix
    y_hat = softmax(z2)  # y_pred is also an M x Dy matrix

    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation

    cost = -np.sum(labels * np.log(y_hat))

    delta_2 = y_hat - labels  # an M x Dy matrix
    delta_1 = np.dot(delta_2, W2.T) * sigmoid_grad(h)  # an M x H matrix

    gradb1 = np.sum(delta_1, axis=0)  # 1 x H vector
    gradb2 = np.sum(delta_2, axis=0)  # 1 X Dy vector

    gradW1 = np.dot(data.T, delta_1)  # Dx x H matrix
    gradW2 = np.dot(h.T, delta_2)  # H x M matrix

    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Esempio n. 23
0
def predict(network,x):
    w1,w2,w3=network['W1'],network['W2'],network['W3']
    b1,b2,b3=network['b1'],network['b2'],network['b3']
    a1=np.dot(x,w1)+b1
    z1=sigmoid(a1)
    a2=np.dot(z1,w2)+b2
    z2=sigmoid(a2)
    a3=np.dot(z2,w3)+b3
    y=softmax(a3)
    return y
Esempio n. 24
0
 def predict(self,x):
   W1, W2 = self.params['W1'], self.params['W2']
   b1, b2 = self.params['b1'], self.params['b2']
   #전에 다뤘던 함수  
   a1 = np.dot(x, W1)+b1
   z1 = sigmoid(a1)
   a2 = np.dot(z1, W2)+b2
   y = softmax(a2)
   
   return y
Esempio n. 25
0
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.

    Arguments:
    data -- M x Dx matrix, where each row is a training example.
    labels -- M x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """

    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

    ### YOUR CODE HERE: forward propagation

    z = data.dot(W1) + b1
    h = sigmoid(z)
    theta = h.dot(W2) + b2
    y_hat = softmax(theta)

    cost = -np.sum(labels * np.log2(y_hat))  # cross entropy

    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation
    delta_1 = y_hat - labels
    delta_2 = np.dot(delta_1, W2.transpose())
    delta_3 = np.multiply(delta_2, sigmoid_grad(h))

    gradW2 = np.dot(h.T, delta_1)
    gradb2 = np.sum(delta_1, axis=0)
    gradW1 = np.dot(data.T, delta_3)
    gradb1 = np.sum(delta_3, axis=0)

    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Esempio n. 26
0
def predict(network, x):
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']
    a1 = np.dot(x, W1) + b1
    z1 = sig.sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    z2 = sig.sigmoid(a2)
    a3 = np.dot(z2, W3) + b3
    y = sm.softmax(a3)

    return y
Esempio n. 27
0
def predict(network, X):
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']

    A1 = np.dot(X, W1) + b1
    Z1 = sigmoid(A1)
    A2 = np.dot(Z1, W2) + b2
    Z2 = sigmoid(A2)
    A3 = np.dot(Z2, W3) + b3
    Y = softmax(A3)
    return Y
Esempio n. 28
0
def softmaxCostAndGradient(predicted, target, outputVectors, dataset):
    """ Softmax cost function for word2vec models """

    # Implement the cost and gradients for one predicted word vector
    # and one target word vector as a building block for word2vec
    # models, assuming the softmax prediction function and cross
    # entropy loss.

    # Inputs:
    # - predicted: numpy ndarray, predicted word vector (\hat{v} in
    #   the written component or \hat{r} in an earlier version)
    # - target: integer, the index of the target word
    # - outputVectors: "output" vectors (as rows) for all tokens
    # - dataset: needed for negative sampling, unused here.

    # Outputs:
    # - cost: cross entropy cost for the softmax word prediction
    # - gradPred: the gradient with respect to the predicted word
    #        vector
    # - grad: the gradient with respect to all the other word
    #        vectors

    # We will not provide starter code for this function, but feel
    # free to reference the code you previously wrote for this
    # assignment!

    ### YOUR CODE HERE
    # softmax(uv^) See Assignment 1 3a. Cost function is nothing but softmax of dot product
    softmax_prob = softmax(predicted.dot(outputVectors.T))
    cost = -np.log(softmax_prob[target])  #cross entropy cost

    # In both gradients, the value of target softmax_prob is reduced by 1. (check solution and expand)

    y_cap = softmax_prob
    y_cap[target] -= 1

    N = outputVectors.shape[0]  # No. of output vectors
    D = outputVectors.shape[1]  # No. of output dimensions

    # We need N X D Matrix for output vectors
    # y_cap : Probabilities of each of N  vectors
    # predicted : 1 vector with D dimensions (Vc)
    # grad : ( y_cap -1 )*Vc for target
    #        ( y_cap )*Vc for other dimensions
    # We subtracted 1 alreadyso vectorially now ( y_cap )*Vc

    grad = y_cap.reshape(N, 1) * predicted.reshape(1, D)

    # We need 1 X D vector for predicted vector gradient
    gradPred = (y_cap.reshape(1, N).dot(outputVectors)).flatten()

    ### END YOUR CODE

    return cost, gradPred, grad
Esempio n. 29
0
def click_softmax():  #softmax 버튼 클릭시 이벤트
    print("softmax")
    learn, bat, epo = get_paramiters()
    X, Y, Y_one_hot, hypothesis, cost, optimizer = sf.softmax(learn)
    printbar.delete(1.0, END)
    sess = learning(epo, bat, mnist_train_x, mnist_train_y, cost, optimizer, X,
                    Y, printbar)
    prediction = accuracy(hypothesis, Y_one_hot, mnist_test_x, mnist_test_y,
                          sess, X, Y, printbar)
    #if click_details():
    details(sess, prediction, X, mnist_test_x, mnist_test_y, printbar)
def forward_backward_prop(data, labels, params, dimensions=[10, 5, 10]):
    """ Forward and backward propagation for a two-layer sigmoidal network """
    ###################################################################
    # Compute the forward propagation and for the cross entropy cost, #
    # and backward propagation for the gradients for all parameters.  #
    ###################################################################

    # Unpack network parameters (do not modify)
    t = 0
    W1 = np.reshape(params[t:t + dimensions[0] * dimensions[1]],
                    (dimensions[0], dimensions[1]))
    t += dimensions[0] * dimensions[1]
    b1 = np.reshape(params[t:t + dimensions[1]], (1, dimensions[1]))
    t += dimensions[1]
    W2 = np.reshape(params[t:t + dimensions[1] * dimensions[2]],
                    (dimensions[1], dimensions[2]))
    t += dimensions[1] * dimensions[2]
    b2 = np.reshape(params[t:t + dimensions[2]], (1, dimensions[2]))

    # YOUR CODE HERE: forward propagation
    Z1 = np.dot(data, W1) + b1  # broadcasting on b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(A1, W2) + b2
    Yhat = softmax(Z2)  # Y output

    # cost = ...
    index = labels == 1
    logYhat = np.log(Yhat)
    cost = -np.sum(logYhat[index])
    # END YOUR CODE

    # YOUR CODE HERE: backward propagation
    targets = np.zeros(np.shape(Yhat))
    targets[index] = 1
    dZ2 = Yhat - targets
    db2 = sum(dZ2)
    dW2 = np.dot(A1.T, dZ2)

    dA1 = dZ2.dot(W2.T)
    dZ1 = np.multiply(sigmoid_grad(A1), dA1)
    db1 = sum(dZ1)
    dW1 = np.dot(data.T, dZ1)

    gradb1 = db1
    gradW1 = dW1
    gradb2 = db2
    gradW2 = dW2
    # END YOUR CODE

    # Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
                           gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Esempio n. 31
0
def predict(NN, x):
    W1, W2, W3 = NN['W1'], NN['W2'], NN['W3']
    b1, b2, b3 = NN['b1'], NN['b2'], NN['b3']
    a1 = np.dot(x, W1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, W2) + b2
    z2 = sigmoid(a2)
    a3 = np.dot(z2, W3) + b3
    y = softmax(a3)

    return y
Esempio n. 32
0
def forward(network, X):
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    b1, b2, b3 = network['b1'], network['b2'], network['b3']

    a1 = transport_signal(X, W1, b1)
    z1 = sigmoid(a1)

    a2 = transport_signal(z1, W2, b2)
    z2 = sigmoid(a2)

    a3 = transport_signal(z2, W3, b3)
    return softmax(a3)
def predict(network, x):
    w1, w2, w3 = network["W1"], network["W2"], network["W3"]
    b1, b2, b3 = network["b1"], network["b2"], network["b3"]

    a1 = np.dot(x, w1) + b1
    z1 = sigmoid(a1)
    a2 = np.dot(z1, w2) + b2
    z2 = sigmoid(a2)
    a3 = np.dot(z2, w3) + b3
    y = softmax(a3)

    return y
def predict(network, X):
    W1, W2, W3 = network['W1'], network['W2'], network['W3']
    B1, B2, B3 = network['b1'], network['b2'], network['b3']

    A1 = np.dot(X, W1) + B1
    Z1 = sigmoid(A1)
    A2 = np.dot(Z1, W2) + B2
    Z2 = sigmoid(A2)
    A3 = np.dot(Z2, W3) + B3
    Y = softmax(A3)

    return Y
Esempio n. 35
0
 def postFeature(self):
     featurelist = os.listdir(self.featurefolder)
     for featurefile in featurelist:
         filename = os.path.join(self.featurefolder,featurefile)
         feature = scipy.io.loadmat(filename)
         featuremat = feature['data'].T# each column is a feature
         pre_softmax = self.forwardProp(featuremat)
         postfeature_gpu = softmax(pre_softmax)
         postfeature_gpu.copy_to_host()
         postfeature = postfeature_gpu.numpy_array
         d = {}
         d['data']=postfeature
         savefilename = os.path.join(self.postfeaturefolder, featurefile)
         scipy.io.savemat(savefilename, d)
Esempio n. 36
0
    def forward(self,states,ground_truth,final_label=False):
        err=0.0
        outputs=[]
        num=len(states)

        hidden_states=copy.copy(self.s)

        for idx,token in enumerate(states):
            hidden_states[0]=self.activation[0](np.dot(self.U[0],token)+np.dot(self.W[0],hidden_states[0]))
            for i in xrange(1,self.hidden_layers):
                hidden_states[i]=self.activation[0](np.dot(self.U[i],hidden_states[i-1])+np.dot(self.W[i],hidden_states[i]))
            if idx==len(states)-1 or not final_label:
                proj=np.dot(self.V,hidden_states[-1])
                soft=softmax.softmax(proj)
                logsoft=np.log(soft)

                err-=np.dot(ground_truth[0],logsoft) if final_label else np.dot(ground_truth[idx],logsoft)
                outputs.append(soft)
        
        err=err if final_label else err/num
        return err,outputs
Esempio n. 37
0
    def __init__(self, models, itype='int32', solver=solvers.RMSprop(0.01)):
        self.x = T.matrix(dtype=itype)
        self.mask = T.matrix(dtype='int32')
        self.y = T.vector(dtype=itype)
        self.weights = []
        self.logprobs = []
        self.labels = []
        for label, model in models:
            yh = theano.clone(model.yh, {model.x: self.x[:-1], model.y: self.x[1:]})
            logprob = -T.sum(crossent.crossent(yh, self.x[1:])*self.mask[1:], axis=0)
            self.weights.extend(model.weights)
            self.logprobs.append(logprob)
            self.labels.append(label)
	self.logprobs = T.stack(self.logprobs, axis=1)
        self.yh = softmax.softmax(self.logprobs)
        self.loss_t = T.sum(crossent.crossent(self.yh, self.y))
        self.correct = T.sum(T.eq(T.argmax(self.yh, axis=1), self.y))
        self.count = self.y.size
        self.solver = solver
        #compile theano functions
        self._loss = theano.function([self.x, self.mask, self.y]
                                     , [self.loss_t, self.correct, self.count])
Esempio n. 38
0
def gradient(model,states,ground_truth,final_label=False):
    hidden_layers=model.hidden_layers
    neurons=model.size

    # Global states
    hidden_states=copy.copy(model.s)            #hidden states, should be deep copy
    lamb=copy.copy(model.s)                     #lambda, size shape of hidden states
    err=0.0

    # gradient used by iteration
    dSdU=[[] for i in xrange(hidden_layers)]    #dSdU[n1,n2]=dS[n1]/dU[n2], n1>=n2
    dSdW=[[] for i in xrange(hidden_layers)]    #dSdW[n1,n2]=dS[n1]/dW[n2], n1>=n2
    dSds=[[] for i in xrange(hidden_layers)]    #dSds[n1,n2]=dS[n1]/ds[n2], n1>=n2

    for n1 in xrange(hidden_layers):
        for n2 in xrange(n1+1):
            dSdU_n1n2=np.zeros([neurons[n2+1],neurons[n2],neurons[n1+1]])       #dSdU[n1,n2][i,j,k]=dS[n1][k]/dU[n2][i,j]
            dSdW_n1n2=np.zeros([neurons[n2+1],neurons[n2+1],neurons[n1+1]])      #dSdW[n1,n2][i,j,k]=dS[n1][k]/dW[n2][i,j]
            dSds_n1n2=np.zeros([neurons[n1+1],neurons[n2+1]]) if n1!=n2 else np.eye(neurons[n1+1])       #dSds[n1,n2][i,j]=dS[n1][i]/ds[n2][j]
            dSdU[n1].append(dSdU_n1n2)
            dSdW[n1].append(dSdW_n1n2)
            dSds[n1].append(dSds_n1n2)

    weight=1.0 if final_label else 1.0/len(states)
    for idx,token in enumerate(states):
        # Save the value of old hidden states, useful to update the gradient
        hidden_states_old=copy.copy(hidden_states)

        # Forward Propagation
        linear_comb=np.dot(model.U[0],token)+np.dot(model.W[0],hidden_states[0])
        hidden_states[0]=model.activation[0](linear_comb)
        lamb[0]=model.dactivation[0](linear_comb)
        for i in xrange(1,hidden_layers):
            linear_comb=np.dot(model.U[i],hidden_states[i-1])+np.dot(model.W[i],hidden_states[i])
            hidden_states[i]=model.activation[i](linear_comb)
            lamb[i]=model.dactivation[i](linear_comb)
        
        # R[n][i,j]=dS[n][i]_t/dS[n-1][j]_t
        # S[n][i,j]=dS[n][i]_t/dS[n][j]_{t-1}
        R=[];S=[]
        for i in xrange(hidden_layers):
            Ri=np.dot(np.diag(lamb[i]),model.U[i])
            Si=np.dot(np.diag(lamb[i]),model.W[i])
            R.append(Ri)
            S.append(Si)

        for n1 in xrange(hidden_layers):
            for n2 in xrange(n1):
                dSdU[n1][n2]=batchProduct.nXone(dSdU[n1-1][n2],R[n1].T)+batchProduct.nXone(dSdU[n1][n2],S[n1].T)
                dSdW[n1][n2]=batchProduct.nXone(dSdW[n1-1][n2],R[n1].T)+batchProduct.nXone(dSdW[n1][n2],S[n1].T)
                dSds[n1][n2]=np.dot(dSds[n1-1][n2],R[n1].T)+np.dot(dSds[n1][n2],S[n1].T)

            dSdU[n1][n1]=batchProduct.nXone(dSdU[n1][n1],model.W[n1])
            dSdW[n1][n1]=batchProduct.nXone(dSdW[n1][n1],model.W[n1])
            for i in xrange(neurons[n1+1]):
                for j in xrange(neurons[n1+1]):
                    dSdW[n1][n1][i,j,i]+=hidden_states_old[n1][j]
                for j in xrange(neurons[n1]):
                    if n1>0:
                        dSdU[n1][n1][i,j,i]+=hidden_states[n1-1][j]
                    else:
                        dSdU[n1][n1][i,j,i]+=token[j]
            dSdU[n1][n1]=batchProduct.nXone(dSdU[n1][n1],np.diag(lamb[n1]))
            dSdW[n1][n1]=batchProduct.nXone(dSdW[n1][n1],np.diag(lamb[n1]))
            dSds[n1][n1]=np.dot(np.diag(lamb[n1]),np.dot(model.W[n1],dSds[n1][n1]))

        # Have supervised signal -> update the gradient
        if idx==len(states)-1 or not final_label:
            proj=np.dot(model.V,hidden_states[-1])
            soft=softmax.softmax(proj)
            logsoft=np.log(soft)

            token_truth=ground_truth[0] if final_label else ground_truth[idx]
            # !! to avoid inf*0=nan
            for dim_idx,token_truth_value in enumerate(token_truth):
                if token_truth_value>1e-8:
                    err-=token_truth_value*logsoft[dim_idx]*weight
           # err-=np.dot(token_truth,logsoft)*weight

            # Update V
            dEdV=np.dot((soft-token_truth).reshape(neurons[-1],1),hidden_states[-1].reshape(1,neurons[-2]))
            model.gV+=dEdV*weight

            # Update U,W,s
            dEdS=np.dot(model.V.T,(soft-token_truth).reshape(neurons[-1],1))
            for n in xrange(hidden_layers):
                dEdUi=batchProduct.nXone(dSdU[-1][n],dEdS).squeeze()
                dEdWi=batchProduct.nXone(dSdW[-1][n],dEdS).squeeze()
                dEdsi=np.dot(dSds[-1][n].T,dEdS).squeeze()

                model.gU[n]+=dEdUi*weight
                model.gW[n]+=dEdWi*weight
                model.gs[n]+=dEdsi*weight

    model.buffer+=1
    return err
Esempio n. 39
0
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# initializing
init = tf.initialize_all_variables()

with tf.Session() as sess:
    sess.run(init)
    step = 1
    i = 0
    while step*batch_size < training_iter:
        batch_x = train[i*batch_size*num_steps:((i+1)*batch_size*num_steps)]
        batch_x = batch_x.reshape((batch_size, num_steps, dim_input))
        batch_y = label[i*batch_size:((i+1)*batch_size)]
        if (i+1)*batch_size >= len(train):
            i = 0
        sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})

        if step % display_step == 0:
            acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
            loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
            print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
                  "{:.6f}".format(loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc)
        step += 1

    test = test.reshape((1, num_steps, dim_input))
    test_pred = sess.run(pred, feed_dict={x: test})
    print softmax(test_pred)
Esempio n. 40
0
 def getPostProb(self,data):
     # get the posterior prob. of a feature file
     results = self.forwarProp(data)# pre-softmax results
     softmax(results)
     return results
Esempio n. 41
0
def forward_backward_prop(dimensions, data, labels, params):
    """ Forward and backward propagation for a two-layer sigmoidal network """
    ###################################################################
    # Compute the forward propagation and for the cross entropy cost, #
    # and backward propagation for the gradients for all parameters.  #
    ###################################################################

    ### Unpack network parameters (do not modify)
    t = 0
    W1 = np.reshape(params[t : t + dimensions[0] * dimensions[1]], (dimensions[0], dimensions[1]))
    t += dimensions[0] * dimensions[1]
    b1 = np.reshape(params[t : t + dimensions[1]], (1, dimensions[1]))
    t += dimensions[1]
    W2 = np.reshape(params[t : t + dimensions[1] * dimensions[2]], (dimensions[1], dimensions[2]))
    t += dimensions[1] * dimensions[2]
    b2 = np.reshape(params[t : t + dimensions[2]], (1, dimensions[2]))

    ### YOUR CODE HERE: forward propagation

    # cost = ...
    # labels is (20, 10) (20 1-hot vectors) - this is y
    # data is (20, 10) - this is x
    # W1 is (10, 5)
    # W2 is (5, 10)
    # b1 is (1, 5)
    # b2 is (1, 10)

    a = data.dot(W1) + b1
    h = sigmoid(a)  # hidden layer
    y_hat = softmax(h.dot(W2) + b2)  # Top classifier layer
    N, D = data.shape
    (Dx, H) = W1.shape

    # TODO: may need to change this to sum over rows and then sum up rows?
    # cost = np.sum(-np.sum(np.multiply(labels, np.log(y_hat)), axis=1).reshape((N, 1)))
    cost_per_datapoint = -np.sum(labels * np.log(y_hat), axis=1).reshape((N, 1))  # sum over rows
    cost = np.sum(cost_per_datapoint)

    ### END YOUR CODE

    ### YOUR CODE HERE: backward propagation

    # gradW1 = ...
    # gradb1 = ...
    # gradW2 = ...
    # gradb2 = ...

    # d_y_hat/d_W2
    J_theta = y_hat - labels
    # theta_W2 = h
    # theta_h = W2
    h_a = h * (1.0 - h)
    a_W1 = data
    y_hathw = J_theta.dot(W2.T) * h_a

    gradW2 = h.T.dot(J_theta)
    gradW1 = data.T.dot(y_hathw)
    # gradW1 = np.dot(data.T, np.dot(J_theta, theta_h.T) * h_a)
    gradb1 = np.sum(y_hathw, axis=0).reshape((1, H))
    gradb2 = np.sum(J_theta, axis=0).reshape((1, D))

    assert gradW1.shape == W1.shape
    assert gradb1.shape == b1.shape
    assert W2.shape == gradW2.shape
    assert gradb2.shape == b2.shape

    ### END YOUR CODE

    ### Stack gradients (do not modify)
    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(), gradW2.flatten(), gradb2.flatten()))

    return cost, grad
Esempio n. 42
0
 def test_softmax(self):
     np.testing.assert_array_almost_equal(
         softmax(np.array([[1001, 1002], [3, 4]])), np.array([[0.26894142, 0.73105858], [0.26894142, 0.73105858]])
     )
     np.testing.assert_array_almost_equal(softmax(np.array([[-1001, -1002]])), np.array([[0.73105858, 0.26894142]]))
     np.testing.assert_array_almost_equal(softmax(np.array([3, 4])), np.array([0.26894142, 0.73105858]))
Esempio n. 43
0
 def hidActProb(self,vis, target):
     cm.dot(self.W.T, vis, target = target)
     target.add_col_vec(self.hb)
     softmax(target)
Esempio n. 44
0
import sys
sys.path.insert(0,'util/')

import softmax
import numpy as np

vectors=np.random.random([100,100])
vector1=vectors[0]

print softmax.softmax(vector1)
for i in xrange(100):
    vector=softmax.softmax(vectors[i])
    assert(np.sum(vector)>0.9999 and np.sum(vector)<1.0001)