Ejemplo n.º 1
0
def generator_net(x, training, opts, name='Generator'):
    with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
        sz = opts.img_size // 16
        # x is input. 1 x 1 x nz
        # state size. sz x sz x (ngf*8)
        y = deconv(x, opts.ngf*8, sz, 1, 'valid', 'deconv1')
        y = batch_norm(y, training)
        y = relu(y)

        # state size. (sz*2) x (sz*2) x (ngf*4)
        y = deconv(y, opts.ngf*4, 4, 2, 'same', 'deconv2')
        y = batch_norm(y, training)
        y = relu(y)

        # state size. (sz*4) x (sz*4) x (ngf*2)
        y = deconv(y, opts.ngf*2, 4, 2, 'same', 'deconv3')
        y = batch_norm(y, training)
        y = relu(y)

        # state size. (sz*8) x (sz*8) x ngf
        y = deconv(y, opts.ngf, 4, 2, 'same', 'deconv4')
        y = batch_norm(y, training)
        y = relu(y)

        # state size. (sz*16) x (sz*16) x nc
        y = deconv(y, opts.nc, 4, 2, 'same', 'deconv5')
        y = tf.nn.tanh(y)
        tf.logging.info("Generator output: {}".format(y))

        return y
Ejemplo n.º 2
0
def build_cnn(states, reuse=False, init=None):
    """
    Builds ACER network, returns the shared layer
    """
    #init = tf.initializers.random_normal(0,0.1)

    with tf.variable_scope("shared_policy_net"):
        with tf.variable_scope("conv_layer_1"):
            conv_l1 = relu(conv2d(states, 8, 4, 32, init))
            print(conv_l1)
        with tf.variable_scope("conv_layer_2"):
            conv_l2 = relu(conv2d(conv_l1, 4, 2, 64, init))
            print(conv_l2)
        with tf.variable_scope("conv_layer_3"):
            conv_l3 = relu(conv2d(conv_l2, 3, 1, 64, init))
            print(conv_l3)
        with tf.name_scope("flatten"):
            conv_l3_flat = tf.layers.flatten(conv_l3)
        with tf.variable_scope("shared_fully_connected"):
            shared_fc = tf.layers.dense(
                inputs=conv_l3_flat,
                units=512,
                activation=tf.nn.relu,
                kernel_initializer=init,
            )
    return shared_fc
def forwardPropagation(input, weights, bias, originalOutput,
                       binarizedTruePrediction, prediction, numberOfSamples,
                       numberOfNeuronsInLayers, classes, optimizer):
    #computing output of first hidden layer
    h1In = numpy.dot(input[:, :3], weights[0]) + numpy.repeat(
        numpy.array([bias[0]]), repeats=[numberOfSamples], axis=0)
    h1Output = utils.relu(h1In)

    #computing output of second hidden layer
    h2In = numpy.dot(h1Output, weights[1]) + numpy.repeat(
        numpy.array([bias[1]]), repeats=[numberOfSamples], axis=0)
    h2Output = utils.relu(h2In)

    #computing output of the output layer
    OIn = numpy.dot(h2Output, weights[2]) + numpy.repeat(
        numpy.array([bias[2]]), repeats=[numberOfSamples], axis=0)
    OOutput = utils.softmax(OIn)

    myPredictedValueListAsIntegers = numpy.argmax(OOutput, axis=1)
    # Computing overall error only for plotting graph
    if prediction == False:
        errorForGraph = utils.log_loss(binarizedTruePrediction,
                                       OOutput[:] + 0.00001)
        errorForPlottingGraphList.append(errorForGraph)
        accuracyScoreForGraph.append(
            utils.accuracy_score(originalOutput,
                                 myPredictedValueListAsIntegers))
        backPropagation(input, OOutput, originalOutput,
                        binarizedTruePrediction, h1Output, h2Output,
                        numberOfNeuronsInLayers, classes, h1In, h2In, OIn,
                        optimizer)
    else:
        return myPredictedValueListAsIntegers
Ejemplo n.º 4
0
    def forward(self, x):
        """Forwardpropagation pass"""

        # Layer_in
        a_in = np.matmul(x, self.w_in.T) + self.b_in
        z_in = relu(a_in)
        # Layer_rec
        a_rec = np.matmul(z_in, self.w_rec.T) + self.b_rec
        z_rec = relu(a_rec)
        # Layer_link
        a_link = np.matmul(np.matmul(x, self.w_reduce), self.w_link.T)
        z_link = identity(a_link)
        # Layer_out
        a_out = np.matmul(z_rec+z_link, self.w_out.T) + self.b_out
        y = relu(a_out)

        # Set internal state
        self._state.update({
            'x': x,
            'y': y,
            'a_out': a_out,
            'z_link': z_link,
            'a_link': a_link,
            'z_rec': z_rec,
            'a_rec': a_rec,
            'z_in': z_in,
            'a_in': a_in,
        })

        return y
Ejemplo n.º 5
0
    def forward(self, x):
        #Layer_in forward pass
        self.x = x
        B_in = np.matmul(np.ones((BATCH_SIZE, 1)),
                         self.b_in.reshape(1, P))  # [20, 75]
        self.a_in = np.matmul(self.x, self.w_in.transpose()) + B_in  # [20, 75]
        z_in_numpy = relu(self.a_in)

        # Layer_rec forward pass
        B_rec = np.matmul(np.ones((BATCH_SIZE, 1)),
                          self.b_rec.reshape(1, P))  # [20, 75]
        self.a_rec = np.matmul(z_in_numpy,
                               self.w_rec.transpose()) + B_rec  # [20, 75]
        self.z_rec = relu(self.a_rec)

        # Layer_link forward pass
        self.x_reduce = x[:, 0::3]
        a_link = np.matmul(self.x_reduce, self.w_link.transpose())
        self.z_link = identity_func(a_link)

        # Layer_out forward pass
        self.z_rec_link = self.z_rec + self.z_link
        B_out = np.matmul(np.ones((BATCH_SIZE, 1)),
                          self.b_out.reshape(1, D))  # [20, 225]
        self.a_out = np.matmul(self.z_rec_link,
                               self.w_out.transpose()) + B_out  # [20, 225]
        y = relu(self.a_out)
        return y
Ejemplo n.º 6
0
 def branch1(self, x, numOut, s):
     with tf.variable_scope("conv1"):
         conv1 = utils.relu(utils.Bn(utils.conv2d(x, numOut/4, d_h=s, d_w=s), training=self.is_training))
     with tf.variable_scope("conv2"):
         conv2 = utils.relu(utils.Bn(utils.conv2d(conv1, numOut/4, 3, 3), training=self.is_training))
     with tf.variable_scope("conv3"):
         conv3 = utils.Bn(utils.conv2d(conv2, numOut), training=self.is_training)
     return conv3
Ejemplo n.º 7
0
 def residual(self, x, numOut, stride=1, name='res'):
     with tf.variable_scope(name):
         block = self.branch1(x, numOut, stride)
         if x.get_shape().as_list()[3] == numOut:
             return utils.relu(tf.add_n([x, block]))
         else:
             skip = self.branch2(x, numOut, stride)
             return utils.relu(tf.add_n([block, skip]))
def gradient(x, y, w1, w2, w3, w4, b1, b2, b3, b4, learning_rate, iterr):
    for i in range(iterr):
        z1 = np.dot(w1, x) + b1

        #print("w1 shape{fgh} x shape{yj}".format(fgh=w1.shape,yj = x.shape))
        a1 = relu(z1)

        z2 = np.dot(w2, a1) + b2
        a2 = relu(z2)
        z3 = np.dot(w3, a2) + b3
        a3 = relu(z3)
        z4 = np.dot(w4, a3) + b4
        a4 = sigmoid(z4)
        a4 = preds(a4)

        dz4 = a4 - y
        dw4 = 1 / m * np.dot(dz4, a3.T)

        db4 = 1 / m * np.sum(dz4)
        da3 = np.dot(w4.T, dz4)
        dz3 = da3 * backward_relu(z3)
        dw3 = 1 / m * np.dot(dz4, a2.T)
        db3 = 1 / m * np.sum(dz3)
        da2 = np.dot(w3.T, dz3)
        dz2 = da2 * backward_relu(z2)
        dw2 = 1 / m * np.dot(dz2, a1.T)
        db2 = 1 / m * np.sum(dz2)
        da1 = np.dot(w2.T, dz2)
        dz1 = da1 * backward_relu(z1)
        dw1 = 1 / m * np.dot(dz1, x.T)
        db1 = 1 / m * np.sum(dz1)
        #print(db1,dw4,db4,dw1)
        w1 = w1 - learning_rate * dw1
        b1 = b1 - learning_rate * db1
        w2 = w2 - learning_rate * dw2
        b2 = b2 - learning_rate * db2
        w3 = w3 - learning_rate * dw3
        b3 = b3 - learning_rate * db3
        w4 = w4 - learning_rate * dw4

        if (i % 100) == 0:
            error = a4 - y
            print("Accuracy: " + str(np.sum((a4 == y) / m)))
            print("Error:" + str(np.mean(np.abs(error))))

    d = {
        'w1': w1,
        'b1': b1,
        'w2': w2,
        'b2': b2,
        'w3': w3,
        'b3': b3,
        'w4': w4,
        'b4': b4
    }

    return d
def f_O2(L, p_O2):
    l, a, b, c, du, dd = tuple(L)
    if p_O2 == 1:
        return relu(1 - rat_tdown(L))
    elif p_O2 == 2:
        return relu(1 - rat_tup(L))
    elif p_O2 == 3:
        return relu(1 - 2 / (1 / (rat_tdown(L) + 10**-6) + 1 / rat_tup(L)))
    else:
        return 1
def test(x, w1, w2, w3, w4, b1, b2, b3, b4):
    z1 = np.dot(w1, x) + b1
    a1 = relu(z1)
    z2 = np.dot(w2, a1) + b2
    a2 = relu(z2)
    z3 = np.dot(w3, a2) + b3
    a3 = relu(z3)
    z4 = np.dot(w4, a3) + b4
    a4 = sigmoid(z4)
    a4 = preds(a4)
    return a4
def connvolution_process(img):
    '''----------------------Reading the image-------------------------------'''
    # # print(img.shape) # 3D image

    # Converting the image into gray.
    img = color.rgb2gray(img)
    # # print(img.shape) # 2D image
    # io.imshow(img)
    # plt.show()
    '''----------------------Preparing Filter-------------------------------'''
    l1_filter = numpy.zeros((2, 3, 3))
    # Vertical ditector Filter
    l1_filter[0, :, :] = numpy.array([[[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]]])
    # Horizontal ditector Filter
    l1_filter[1, :, :] = numpy.array([[[1, 1, 1], [0, 0, 0], [-1, -1, -1]]])
    # # print(l1_filter)
    '''---------------------- Convolutional Layer 1 ---------------------------'''
    l1_feature_map = conv(img, l1_filter)
    # print("l1_feature_map", l1_feature_map.shape)
    l1_feature_map_relu = relu(l1_feature_map)
    # print("l1_feature_map_relu", l1_feature_map_relu.shape)
    l1_feature_map_relu_pool = pooling(l1_feature_map_relu, 2, 2)
    # print("l1_feature_map_relu_pool", l1_feature_map_relu_pool.shape)
    # print("**End of conv layer 1**\n\n")
    '''---------------------- Convolutional Layer 2 ---------------------------'''
    l2_filter = numpy.random.rand(3, 5, 5, l1_feature_map_relu_pool.shape[-1])
    l2_feature_map = conv(l1_feature_map_relu_pool, l2_filter)
    # print("l2_feature_map", l2_feature_map.shape)
    l2_feature_map_relu = relu(l2_feature_map)
    # print("l2_feature_map_relu", l2_feature_map_relu.shape)
    l2_feature_map_relu_pool = pooling(l2_feature_map_relu, 2, 2)
    # print("l2_feature_map_relu_pool", l2_feature_map_relu_pool.shape)
    # print("**End of conv layer 2**\n\n")
    '''---------------------- Convolutional Layer 3 ---------------------------'''
    l3_filter = numpy.random.rand(1, 7, 7, l2_feature_map_relu_pool.shape[-1])
    l3_feature_map = conv(l2_feature_map_relu_pool, l3_filter)
    # print("l3_feature_map", l3_feature_map.shape)
    l3_feature_map_relu = relu(l3_feature_map)
    # print("l3_feature_map_relu", l3_feature_map_relu.shape)
    l3_feature_map_relu_pool = pooling(l3_feature_map_relu, 2, 2)
    # print("l3_feature_map_relu_pool", l3_feature_map_relu_pool.shape)
    # print("**End of conv layer 3**\n\n")
    '''---------------------- Graphing results of convolution ---------------------------'''
    draw_layer(img, l1_feature_map, l1_feature_map_relu,
               l1_feature_map_relu_pool, l2_feature_map, l2_feature_map_relu,
               l2_feature_map_relu_pool, l3_feature_map, l3_feature_map_relu,
               l3_feature_map_relu_pool)
    '''---------------------- Fully Connected layer ---------------------------'''
    # print("**Fully connected layer(Convolutional layer to Fully connected layer)**")
    fc = l3_feature_map_relu_pool.reshape(-1)
    ## print(fc.shape)

    return fc
Ejemplo n.º 12
0
def forward_propagation_with_dropout(X, parameters, keep_prob=0.5):
    """
    带有dropout的前向传播
    """

    np.random.seed(1)

    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    W3 = parameters["W3"]
    b3 = parameters["b3"]

    # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID
    # 计算第一层输出
    Z1 = np.dot(W1, X) + b1
    A1 = relu(Z1)

    # 开始
    # 初始化一个矩阵
    D1 = np.random.rand(A1.shape[0], A1.shape[1])
    # 标记为0和1
    D1 = D1 < keep_prob
    # 对于A1中的部分结果丢弃
    A1 = np.multiply(A1, D1)
    # 保持原来的期望值
    A1 /= keep_prob
    # 结束

    # 计算第二层输出
    Z2 = np.dot(W2, A1) + b2
    A2 = relu(Z2)

    # 开始
    D2 = np.random.rand(A2.shape[0], A2.shape[1])
    D2 = D2 < keep_prob
    A2 = np.multiply(A2, D2)
    A2 /= keep_prob
    # 结束

    # 最后一层输出
    Z3 = np.dot(W3, A2) + b3
    A3 = sigmoid(Z3)

    cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)

    return A3, cache
Ejemplo n.º 13
0
    def predict(self, X, y, show_probs=True):
        """
        """
        # forward prop through layers
        for layer_index in np.arange(1, len(self.structure)):
            layer = self.structure[layer_index]
            prev_layer = self.structure[layer_index - 1]

            if prev_layer['bias_node']:
                X = np.concatenate((X, np.ones((X.shape[0], 1))),
                                   axis=1)  # change 1. to self.bias?

            X = (np.dot(layer['weight_matrix'], X.T)).T

            # activation functions
            if layer['activation'] == 'relu':
                X = relu(X)
            elif layer['activation'] == 'softmax':
                X = softmax(X.T).T
            elif layer['activation'] == 'sigmoid':
                X = sigmoid(X)
            else:
                raise Exception('Activation function not recognised')

        predictions = np.argmax(prob_predictions, axis=1)
        self.predict_cm = createConfusionMatrix(predictions=predictions,
                                                targets=np.argmax(y_train,
                                                                  axis=1))

        if show_probs == True:
            return X
        else:
            return predictions
Ejemplo n.º 14
0
 def forward(self, X):
     if (self.activationFunction == "relu"):
         hiddenLayerOutput = relu(X.dot(self.W1) + self.b1)
     else:
         hiddenLayerOutput = tanh(X.dot(self.W1) + self.b1)
     return softmax(hiddenLayerOutput.dot(self.W2) +
                    self.b2), hiddenLayerOutput
Ejemplo n.º 15
0
    def lossAndGrads(self, batchAndTargets, computeGrads=True):
        nb = len(batchAndTargets)
        ctxtoks = [ctxtok for ctxtok,tgt in batchAndTargets]
        tgts = [tgt for ctxtok,tgt in batchAndTargets]
        context = self.wvec[ctxtoks,:].reshape((nb,-1))
        z = np.dot(context, self.Wz)
        a = relu(z)
        scores = np.dot(a, self.Wa)
        preds = np.argmax(scores, axis=1)

        loss, probs, dscores = softmaxLossAndGrads(scores, tgts)
        if not computeGrads:
            return preds, probs, loss

        dWa = np.dot(a.T, dscores)
        da = np.dot(dscores, self.Wa.T)

        dz = drelu(da, a)

        dWz = np.dot(context.T, dz)
        dcontext = np.dot(dz, self.Wz.T).reshape((nb,-1,self.wdim))
        dwvec = np.zeros_like(self.wvec)
        for b,ctxtok in enumerate(ctxtoks):
            for w,tok in enumerate(ctxtok):
                dwvec[tok] += dcontext[b,w]
        grads = {}
        grads['Wa'] = dWa + self.reg * self.Wa
        grads['Wz'] = dWz + self.reg * self.Wz
        grads['wvec'] = dwvec + self.reg * self.wvec * (dwvec != 0)

        return preds, probs, loss, grads
Ejemplo n.º 16
0
 def backpropagate(self, x, y):
     gradients_b = [np.zeros(b.shape) for b in self.biases]
     gradients_w = [np.zeros(w.shape) for w in self.weights]
     #feedforward
     a = x
     a_list = [x]
     z_list = []
     for b, w in zip(self.biases[0:-1], self.weights[0:-1]):
         z = np.dot(w, a) + b
         a = utils.relu(z)
         a_list.append(a)
         z_list.append(z)
     z = np.dot(self.weights[-1], a_list[-1]) + self.biases[-1]
     a = utils.softmax(z)
     a_list.append(a)
     z_list.append(z)
     # backward
     # for softmax-cross-entropy layer: delta in last layer = result - ground truth
     delta = a_list[-1] - y
     # update b and w for the last layer L
     gradients_b[-1] = delta
     gradients_w[-1] = np.dot(delta, a_list[-2].transpose())
     # update b and w for the rest of layers L-1, L-2, ...
     for l in range(2, self.num_layers):
         z = z_list[-l]  # lth last layer of z
         r_derivative = utils.relu_derivative(z)
         # update delta based on delta(l) = transpose of w(l+1) * delta(l+1)
         delta = np.dot(self.weights[-l + 1].transpose(),
                        delta) * r_derivative
         gradients_b[-l] = delta
         gradients_w[-l] = np.dot(delta, a_list[-l - 1].transpose())
     return (gradients_b, gradients_w)
Ejemplo n.º 17
0
    def prediction_baysian_dropout(self, x, k=10):
        """
        Runs test time prediction k times to get a variance on the output.
        Essentially bayesian dropout.

        Input: 
            x: A numpy array of input data, shape (N, D)
        Return:
            mean: The mean prediction from the dropout ensemble, shape (N, M)
            var: The variance on the prediction from the ensemble, shape (N, M)
        """
        N, D = x.shape
        outputs = np.zeros((k, N, self.M))

        for i in range(k):
            h = x  # Input into the next layer or previous hidden activation
            for l in range(self.n_hidden):
                w, b = self.params["w" + str(l)], self.params["b" + str(l)]
                h, _ = affine(h, w, b)  # Affine layer
                h, _ = relu(h)  # Activation (ReLU)

            # Output layer, simply an affine
            outputs[i], _ = affine(h, self.params["w_out"],
                                   self.params["b_out"])

        mean = np.mean(outputs)
        var = np.var(output)

        return mean, var
Ejemplo n.º 18
0
def linear_activation_forward(A_prev, W, b, activation):
    """
    Implement the forward propagation for the LINEAR->ACTIVATION layer

    Arguments:
    A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples)
    W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
    b -- bias vector, numpy array of shape (size of the current layer, 1)
    activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"

    Returns:
    A -- the output of the activation function, also called the post-activation value
    cache -- a python dictionary containing "linear_cache" and "activation_cache";
             stored for computing the backward pass efficiently
    """

    if activation == "sigmoid":
        # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = utils.sigmoid(Z)

    elif activation == "relu":
        # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = utils.relu(Z)

    assert (A.shape == (W.shape[0], A_prev.shape[1]))
    cache = (linear_cache, activation_cache)

    return A, cache
Ejemplo n.º 19
0
    def inLayerForward(self, x):

        B_in = np.ones((BATCH_SIZE, 1)) @ self.b_in  # [20, 1] * [1, 75]
        self.a_in = x @ self.w_in + B_in  # [20, 225] * [225, 75] + [20, 75]
        self.z_in = relu(self.a_in)  # [20, 75]

        return self.z_in
Ejemplo n.º 20
0
 def scalar_input_layer(self, x):
     # scalar form method for performance competion
     z_in = np.zeros((x.shape[0], P))
     for i in range(x.shape[0]):
         a_in = np.dot(self.w_in, x[i, :].T) + self.b_in.T.flatten()
         z_in[i, :] = relu(a_in)
     return z_in
Ejemplo n.º 21
0
    def forward_prop(self, data, parameters, layers):

        """
        Forward propogate values
        :param data: Input data (train and then test)
        :param parameters: Dictionary with initialized weeights and biases
        :param layers: Number of layers and neurons
        :return:
        """
        """
        Z[l] = A[l-1] * W[l] + B[l]
        A[l] = G[l](Z[l])
        """
        # A0 initialized here and not in init function, so we can run the training data through forward prop
        parameters['A' + str(0)] = data

        for i in range(1, len(layers)):
            parameters['Z' + str(i)] = np.add(np.dot(parameters['A' + str(i - 1)], parameters['W' + str(i)]),
                                              parameters['B' + str(i)])

            if i != len(layers) - 1:
                parameters['A' + str(i)] = ut.relu(parameters['Z' + str(i)])
            else:
                # Final Activation is Softmax
                parameters['A' + str(i)] = ut.softmax(parameters['Z' + str(i)])

        return parameters['A' + str(len(layers) - 1)], parameters
 def fprop(self, X):
     X = np.array([np.array([float(x) for x in j]) for j in X])
     X = X.transpose()
     self._ha = np.dot(self._w1, X) + np.repeat(self._b1, len(X[0]), axis=1)  # valeur des synapses entre x et hidden
     self._hs = utils.relu(self._ha)  # valeur hidden
     self._oa = np.dot(self._w2, self._hs) + np.repeat(self._b2, len(X[0]), axis=1)  # valeur entre hidden et sortie
     self._os = utils.softmax(self._oa)  # valeur de sortie
Ejemplo n.º 23
0
    def lossAndGrads(self, batchAndTargets, computeGrads=True):
        nb = len(batchAndTargets)
        ctxtoks = [ctxtok for ctxtok, tgt in batchAndTargets]
        tgts = [tgt for ctxtok, tgt in batchAndTargets]
        context = self.wvec[ctxtoks, :].reshape((nb, -1))
        z = np.dot(context, self.Wz)
        a = relu(z)
        scores = np.dot(a, self.Wa)
        preds = np.argmax(scores, axis=1)

        loss, probs, dscores = softmaxLossAndGrads(scores, tgts)
        if not computeGrads:
            return preds, probs, loss

        dWa = np.dot(a.T, dscores)
        da = np.dot(dscores, self.Wa.T)

        dz = drelu(da, a)

        dWz = np.dot(context.T, dz)
        dcontext = np.dot(dz, self.Wz.T).reshape((nb, -1, self.wdim))
        dwvec = np.zeros_like(self.wvec)
        for b, ctxtok in enumerate(ctxtoks):
            for w, tok in enumerate(ctxtok):
                dwvec[tok] += dcontext[b, w]
        grads = {}
        grads['Wa'] = dWa + self.reg * self.Wa
        grads['Wz'] = dWz + self.reg * self.Wz
        grads['wvec'] = dwvec + self.reg * self.wvec * (dwvec != 0)

        return preds, probs, loss, grads
Ejemplo n.º 24
0
    def prediction(self, x, z):
        """
        Compute prediction for the fully-connected net as test time (without
        saving cache and no-dropout).

        Input: 
            x: A numpy array of input data, shape (N, D)
            z: Diff between y-y_des for the func approx (N, M) (1,2) in this case
        Return:
            output: Output prediction/prediction of label, shape (N, M)
        """
        h = x  # Input into the next layer or previous hidden activation
        for l in range(self.n_hidden):
            l = str(l)
            w = self.params["w" + l]
            b = self.params["b" + l]
            h, _ = affine(h, w, b)  # Affine layer
            h, _ = relu(h)  # Activation (ReLU)
        # Output layer, simply an affine
        output, cache = affine(h, self.params["w_out"], self.params["b_out"])

        # Technically this is not the real z but the 1/N term only scales z (we
        # can think of this as equivalent to scaling β by 1/N).
        # This is to match how dout works in NeuralNetOffline (see line: 190)
        N, D = x.shape
        z = z / N

        # Only trainable paramters in the adaptive case are the last layer weights
        # So we only update the output layer weights (using e-mod)
        _, dw, db = self.w_hat_dot_e_mod(z, cache)

        # Update the weights
        self.params["w_out"] -= self.beta * dw
        self.params["b_out"] -= self.beta * db
        return output
Ejemplo n.º 25
0
 def vector_input_layer(self, x):
     # vector form method for performance competion
     B_in = np.matmul(np.ones((BATCH_SIZE, 1)),
                      self.b_in.reshape(1, P))  # [20, 75]
     a_in = np.matmul(x, self.w_in.transpose()) + B_in  # [20, 75]
     z_in_numpy = relu(a_in)
     return z_in_numpy
Ejemplo n.º 26
0
 def inference(self, x):
     with tf.variable_scope("conv0"):
         conv1 = utils.relu(utils.Bn(utils.conv2d(x, 64, 7, 7, 2, 2, bias=True), training=self.is_training))
     with tf.name_scope("pool1"):
         pool1 = utils.max_pool(conv1, 3, 3, 2, 2)
     with tf.variable_scope("group0"):
         res2a = self.residual(pool1, 256, name='block0')
         res2b = self.residual(res2a, 256, name='block1')
         res2c = self.residual(res2b, 256, name='block2')
     with tf.variable_scope("group1"):
         res3a = self.residual(res2c, 512, 2, name='block0')
         res3b = self.residual(res3a, 512, name='block1')
         res3c = self.residual(res3b, 512, name='block2')
         res3d = self.residual(res3c, 512, name='block3')
     with tf.variable_scope("group2"):
         res4a = self.residual(res3d, 1024, 2, name='block0')
         res4b = self.residual(res4a, 1024, name='block1')
         res4c = self.residual(res4b, 1024, name='block2')
         res4d = self.residual(res4c, 1024, name='block3')
         res4e = self.residual(res4d, 1024, name='block4')
         res4f = self.residual(res4e, 1024, name='block5')
     with tf.variable_scope("group3"):
         res5a = self.residual(res4f, 2048, 2, name='block0')
         res5b = self.residual(res5a, 2048, name='block1')
         res5c = self.residual(res5b, 2048, name='block2')
     with tf.name_scope("pool5"):
         pool5 = utils.global_pool(res5c)
     with tf.variable_scope("linear"):
         dropout = tf.nn.dropout(pool5, keep_prob=self.keep_prob)
         out = utils.linear(dropout, 1000)
     return out
Ejemplo n.º 27
0
    def prediction_save_cache(self, x):
        """
        Compute prediction for the fully-connected net and save intermediate 
        activations.

        N samples, D dims per sample, each sample is a row vec, M is the dims of
        y/prediction

        Input: 
            x: A numpy array of input data, shape (N, D)
        Return:
            output: Output prediction/prediction of label, shape (N, M)
            caches: Saved intermediate activations for use in backprop
        """
        caches = {}
        h = x  # Input into the next layer or previous hidden activation
        for l in range(self.n_hidden):
            l = str(l)
            w, b = self.params["w" + l], self.params["b" + l]
            h, caches["affine" + l] = affine(h, w, b)  # Affine layer
            h, caches["relu" + l] = relu(h)  # Activation (ReLU)
            # Dropout layer (train-time dropout)
            h, caches["dropout" + l] = dropout(h, self.dropout)

        # Output layer, simply an affine
        output, cache = affine(h, self.params["w_out"], self.params["b_out"])
        caches["affine_out"] = cache
        return output, caches
Ejemplo n.º 28
0
def forward_propagation(X, parameters):
    """正向传播
    """

    cache = dict()
    L = len(parameters) // 2
    cache['A{}'.format(0)] = X  # A0 = X

    # 第1~(L-1)层,LINEAR -> RELU -> LINEAR -> RELU...
    for l in range(1, L):
        W = parameters['W{}'.format(l)]
        b = parameters['b{}'.format(l)]
        Z = np.dot(W, cache['A{}'.format(l - 1)]) + b  # Zl = Wl * A(l-1) + bl
        A = relu(Z)  # 该层的A值

        cache['W{}'.format(l)] = W
        cache['b{}'.format(l)] = b
        cache['Z{}'.format(l)] = Z
        cache['A{}'.format(l)] = A

    # 第L层,LINEAR -> SIGMOID
    WL = parameters['W{}'.format(L)]
    bL = parameters['b{}'.format(L)]
    ZL = np.dot(WL, cache['A{}'.format(L - 1)]) + bL
    AL = sigmoid(ZL)

    cache['W{}'.format(L)] = WL
    cache['b{}'.format(L)] = bL
    cache['Z{}'.format(L)] = ZL
    cache['A{}'.format(L)] = AL

    return AL, cache
Ejemplo n.º 29
0
    def __init__(self, rng, input, filter_shape, image_shape, 
                 poolsize=(2,2), params_W=None, params_b=None, 
                 mu=0, sigma=0.1, bias_val=0.5, activation_mode=0):
        assert image_shape[1] == filter_shape[1]
        self.input = input

        # if params_W is not given, generate random params_W        
        if params_W == None:
            self.W = theano.shared(
                numpy.asarray(
                    rng.normal(mu, sigma, filter_shape),
                    dtype=theano.config.floatX
                ),
                borrow=True
            )    
        else:
            self.W = theano.shared(
                numpy.asarray(params_W,dtype=theano.config.floatX), borrow=True)

        # if params_b is not given, generate random params_b
        if params_b == None:
            b_values = bias_val * numpy.ones((filter_shape[0],), dtype=theano.config.floatX)
            self.b = theano.shared(value=b_values, borrow=True)
        else:
            self.b = theano.shared(
                numpy.asarray(params_b,dtype=theano.config.floatX), borrow=True
            )

        self.momentum_W = theano.shared(
            numpy.zeros(filter_shape, dtype=theano.config.floatX),
            borrow=True
        )
        self.momentum_b = theano.shared(
            numpy.zeros((filter_shape[0],), dtype=theano.config.floatX),
            borrow=True
        )  

        # feature maps after convolution
        conv_out = conv.conv2d(
            input=input,
            filters=self.W,
            filter_shape=filter_shape,
            image_shape=image_shape
        )
        
        # feature maps after pooling
        pooled_out = downsample.max_pool_2d(
            input=conv_out,
            ds=poolsize,
            ignore_border=True
        )

        # output of layer, activated pooled feature maps
        if activation_mode == 0: 
            self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
        elif activation_mode == 1:
            self.output = relu(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))

        self.params = [self.W, self.b]
Ejemplo n.º 30
0
 def _process_layers(self, weights, data, learning=True):
     for W, mean, std in self._generate_layers(weights):
         data = normalize(data, mean, std)
         data = relu(data)
         if learning and self.dropout is not None:
             data = dropout(data, self.dropout)
         data = np.dot(data, W)
     return data
Ejemplo n.º 31
0
 def fprop(self, X):
     X = np.array([[float(x)] for x in X])
     self._ha = np.dot(
         self._w1, X) + self._b1  # valeur des synapses entre x et hidden
     self._hs = utils.relu(self._ha)  # valeur hidden
     self._oa = np.dot(self._w2,
                       self._hs) + self._b2  # valeur entre hidden et sortie
     self._os = utils.softmax(self._oa)  # valeur de sortie
Ejemplo n.º 32
0
def linear_activation_forward(a_prev, w, b, activation):
    z, linear_cache = linear_forward(a_prev, w, b)
    if activation == "sigmoid":
        a, activation_cache = sigmoid(z)
    else:
        a, activation_cache = relu(z)
    cache = (linear_cache, activation_cache)
    return a, cache
 def predict(self, X, learning):
     hiddenLayerOutput = relu(X.dot(self.W1) + self.b1)
     output = hiddenLayerOutput.dot(self.W2) + self.b2
     if (learning):
         probs = softmax(output)
         action = np.where(probs == np.random.choice(probs, 1, p=probs))
         return (int(action[0]))
     return np.argmax(output)
Ejemplo n.º 34
0
    def f(X):
        n_samples = X.shape[0]

        input    = X.reshape((n_samples, n_feats[0], image_row, image_col))
        conv_out = T.nnet.conv2d(input, P.W_input_conv)
        pool_out_= max_pool_2d(conv_out, (pool_row, pool_col))
        pool_out = pool_out_.flatten(2) + P.b_pool_out
        hidden   = relu(T.dot(pool_out, P.W_pool_out_hidden) + P.b_hidden)
        output   = T.dot(hidden, P.W_hidden_output) + P.b_output
        return output.astype(theano.config.floatX)
Ejemplo n.º 35
0
	def activate(self, activation):
		
		if(self.visible_type == "SIGMOID"):
			m_output = utils.sigmoid(activation)
		elif(self.visible_type == "RELU"):
			m_output = utils.relu(activation)
		elif(self.visible_type == "LEAKY_RELU"):
			m_output = utils.leaky_relu(activation)
		elif(self.visible_type == "LINEAR"):
			m_output = activation
		else:
			raise NotImplemented("Unrecogonised hidden type")
	
		return m_output
Ejemplo n.º 36
0
def layers(x, window, dim_emb, dim_hidden, n_layers, activation=tanh):
    params = []
    zero = T.zeros((1, 1, dim_emb * window), dtype=theano.config.floatX)

    def zero_pad_gate(matrix):
        return T.neq(T.sum(T.eq(matrix, zero), 2, keepdims=True), dim_emb * window)

    for i in xrange(n_layers):
        if i == 0:
            W = theano.shared(sample_weights(dim_emb * window, dim_hidden))
            h = T.max(zero_pad_gate(x) * relu(T.dot(x, W)), 1)
#            h = T.max(T.dot(x, W), 1)
        else:
            W = theano.shared(sample_weights(dim_hidden, dim_hidden))
            h = activation(T.dot(h, W))
        params.append(W)

    return h, params
Ejemplo n.º 37
0
 def fprop(self, X):
     X = np.array([[float(x)] for x in X])
     self._ha = np.dot(self._w1, X) + self._b1  # valeur des synapses entre x et hidden
     self._hs = utils.relu(self._ha)  # valeur hidden
     self._oa = np.dot(self._w2, self._hs) + self._b2  # valeur entre hidden et sortie
     self._os = utils.softmax(self._oa)  # valeur de sortie
Ejemplo n.º 38
0
	def train(self, data_train, initialmomentum, finalmomentum, learn_rate_w,
			learn_rate_visb, learn_rate_hidb, weightcost):
		
		learning_rate = learn_rate_w
		
		self.weightcost = weightcost
		
		N = np.shape(data_train)[0]
		num_visible = np.shape(data_train)[1]

		total_batches = np.int(np.ceil(N/self.mini_batch))

		m_visible_biases = np.zeros(num_visible)
		m_hidden_biases = np.zeros(self.hidden_nodes)

		m_weights = np.random.randn(num_visible, self.hidden_nodes) * 0.01
		momentum_weights = np.zeros_like(m_weights)
		momentum_visible_biases = np.zeros_like(m_visible_biases)
		momentum_hidden_biases = np.zeros_like(m_hidden_biases)
		
		momentum_rate = finalmomentum
		
		best_rmsd = None
		rmsd_logger=[]
		
		for i in range(self.iterations):
			start = (i % total_batches) * self.mini_batch
			end = start + self.mini_batch			
			if end >= N : end = N - 1
			
			#print start, end
			
			batch = data_train[start:end]
			batch = batch
			
			weights, visible_biases, hidden_biases = self._CD1(batch, 
				m_weights, m_visible_biases, m_hidden_biases)
			
			momentum_weights = momentum_rate * momentum_weights + weights
			momentum_visible_biases = momentum_rate * momentum_visible_biases + visible_biases
			momentum_hidden_biases = momentum_rate * momentum_hidden_biases + hidden_biases
			
			m_weights += momentum_weights * learning_rate
			m_visible_biases += momentum_visible_biases * learning_rate
			m_hidden_biases += momentum_hidden_biases * learning_rate
			
			# Reconstruction error, no sampling done, using raw probability
			output = np.dot(data_train, m_weights) \
										+ np.tile(hidden_biases, (N,1))
			if self.visible_type == "SIGMOID":
				hidden_state = u.sigmoid(output)
			elif self.visible_type == "RELU":
				hidden_state = u.relu(output)
			elif self.visible_type == "LINEAR":
				hidden_state = output
				
			
			reconstruction = np.dot(hidden_state, m_weights.T) +  \
										+ np.tile(visible_biases, (N,1))

			if self.visible_type == "SIGMOID":
				reconstruction = u.sigmoid(reconstruction)
			elif self.visible_type == "RELU":
				reconstruction = u.relu(reconstruction)
				
			##################################################
				
			err = (data_train-reconstruction)*(data_train-reconstruction)
			rmsd = np.sqrt(np.mean(err*err))
			rmsd_logger.append(rmsd)
			print "Epoch %4d/%4d, RMS deviation = %7.4f" % (i + 1, self.iterations, rmsd)
			
			if self.early_stop:			
				if best_rmsd is None or (rmsd - best_rmsd) / best_rmsd < -1e-3 :
					best_weights = m_weights
					best_hidden_biases = hidden_biases
					best_visible_biases = visible_biases
					best_rmsd = rmsd
					iter_since_best = 0
				else :
					iter_since_best += 1
					
				if iter_since_best >= self.max_epoch_without_improvement :
					print "Early stop -- best epoch %d / %d, RMS deviation = %7.4f" % (
						i + 1 - iter_since_best, self.iterations, best_rmsd)
					break
				
			else: 
				best_weights = m_weights
				best_hidden_biases = hidden_biases
				best_visible_biases = visible_biases
				best_rmsd = rmsd
				iter_since_best = 0
			
			#print ' rmsd = ', rmsd
		self.weights = best_weights
		self.hidden_biases = best_hidden_biases
		self.visible_biases = best_visible_biases
		rmsd_history = np.asarray(rmsd_logger)
		if iter_since_best > 0 :
			self.train_history = rmsd_history[:-1*iter_since_best]
		else :
			self.train_history = rmsd_history
Ejemplo n.º 39
0
	def train(self, data_train, initialmomentum, finalmomentum, learn_rate_w,
			learn_rate_visb, learn_rate_hidb, weightcost):

		Nd, numdims = np.shape(data_train)
		N = self.mini_batch
		n_tot_batches = np.int(np.ceil(Nd/self.mini_batch))
		
		vishid = 0.01 * np.random.randn(numdims, self.hidden_nodes)
		
		hidbiases = np.zeros(self.hidden_nodes)
		visbiases = np.zeros(numdims)
		
		vishidinc = np.zeros_like(vishid)
		hidbiasinc = np.zeros_like(hidbiases)
		visbiasinc = np.zeros_like(visbiases)
		"""
		pos_hidprobs = np.zeros([N, self.hidden_nodes])
		neg_hidprobs = np.zeros_like(pos_hidprobs)
		pos_prods = np.zeros_like(vishid)
		neg_prods = np.zeros_like(vishid)

		"""
		batchposidprobs = np.empty([N, self.hidden_nodes, n_tot_batches])
		
		rmsd_logger = []
		best_weights = np.zeros_like(vishid)
		best_hidden_biases = np.zeros_like(hidbiases)
		best_rmsd = None
		iter_since_best = 0
		
		for epoch in range(self.iterations):
			errsum = 0
			rmsd = 0
			##print "Epoch %d / %d" % (epoch + 1, self.iterations)
			for batch in range(n_tot_batches):
				#print "  epoch %d / %d -- batch %d / %d" % (epoch + 1, self.iterations,\
				#										 batch + 1, n_tot_batches)
				
				start = (batch % n_tot_batches) * self.mini_batch
				end = start + self.mini_batch			
				if end >= Nd : end = Nd
				data = data_train[start:end]

				## START POSITIVE PHASE ##################################################
				nw = np.dot(data, vishid) + np.tile(hidbiases, (N, 1))
				if self.hidden_type == "SIGMOID":
					pos_hidprobs = utils.sigmoid(nw)
				if self.hidden_type == "RELU":
					pos_hidprobs = utils.relu(nw)
				elif self.hidden_type == "LINEAR": 
					pos_hidprobs = nw
				
				if epoch >= self.iterations - 1:
					batchposidprobs[:,:,batch] = pos_hidprobs			
				pos_prods = np.dot(data.T, pos_hidprobs)
				pos_hidact = np.sum(pos_hidprobs, 0)
				pos_visact = np.sum(data, 0)
				## END OF POSITIVE PHASE ################################################
				if self.hidden_type == "SIGMOID" or self.hidden_type == "RELU":
					ran = np.random.rand(N, self.hidden_nodes)
					pos_hidstates = pos_hidprobs > ran
				elif self.hidden_type == "LINEAR":
					ran = np.random.randn(N, self.hidden_nodes)
					pos_hidstates = pos_hidprobs + ran
					
				## START NEGATIVE PHASE #################################################
				nw = np.dot(pos_hidstates,vishid.T) + np.tile(visbiases, (N,1))
				
				# TODO: Do this only if visible type is sigmoid see C++ line 262 and next
				if self.visible_type == "SIGMOID":
					neg_data = utils.sigmoid(nw)
				if self.visible_type == "RELU":
					neg_data = utils.relu(nw)
				else:
					neg_data = nw
				
				nw = np.dot(neg_data, vishid) + np.tile(hidbiases, (N, 1))

				if self.hidden_type == "SIGMOID":
					neg_hidprobs = utils.sigmoid(nw)
				if self.hidden_type == "RELU":
					neg_hidprobs = utils.relu(nw)
				else: 
					neg_hidprobs = nw
				
				neg_prods = np.dot(neg_data.T, neg_hidprobs)
				neg_hidact = np.sum(neg_hidprobs, 0)
				neg_visact = np.sum(neg_data, 0)

				## END OF NEGATIVE PHASE ################################################
				
				errsum += np.sum((data-neg_data)*(data-neg_data))
				rmsd += np.sqrt(np.mean((data-neg_data)*(data-neg_data)))
				#print rmsd; exit()
				
				if epoch > 5:
					momentum = finalmomentum
				else:
					momentum = initialmomentum 
					
				## UPDATE WEIGHTS AND BIASES ############################################
				vishidinc = momentum * vishidinc + learn_rate_w * \
					((pos_prods - neg_prods)/N - weightcost * vishid)
				visbiasinc = momentum * visbiasinc + learn_rate_visb/N * (pos_visact - neg_visact)
				hidbiasinc = momentum * hidbiasinc + learn_rate_hidb/N * (pos_hidact - neg_hidact)
				
				vishid += vishidinc
				visbiases += visbiasinc
				hidbiases += hidbiasinc
				
			print "Epoch %4d/%4d, RMS deviation = %7.4f" % (epoch + 1, self.iterations, rmsd)
			rmsd_logger.append(rmsd)
			
			if self.early_stop:			
				if best_rmsd is None or (rmsd - best_rmsd) / best_rmsd < -1e-3 :
					best_weights = vishid
					best_hidden_biases = hidbiases
					best_visible_biases = visbiases
					best_rmsd = rmsd
					iter_since_best = 0
				else :
					iter_since_best += 1
					
				if iter_since_best >= self.max_epoch_without_improvement :
					print "Early stop -- best epoch %d / %d, RMS deviation = %7.4f" % (
											epoch + 1 - iter_since_best, self.iterations, best_rmsd)
					break
			else: 
				best_weights = vishid
				best_hidden_biases = hidbiases
				best_visible_biases = visbiases
				best_rmsd = rmsd
				iter_since_best = 0
			
			#print ' rmsd = ', rmsd
		self.weights = best_weights
		self.hidden_biases = best_hidden_biases
		self.visible_biases = best_visible_biases
		rmsd_history = np.asarray(rmsd_logger)
		if iter_since_best > 0 :
			self.train_history = rmsd_history[:-1*iter_since_best]
		else :
			self.train_history = rmsd_history