예제 #1
0
def net(X):
    X = X.reshape((-1, num_inputs))
    h = relu(nd.dot(X, W1) + b1)
    h1 = relu(nd.dot(h, W2) + b2)
    h2 = relu(nd.dot(h1, W3) + b3)
    output = nd.dot(h2, W4) + b4
    return output
예제 #2
0
    def symeig_svd(self, matrix, n_eigenvecs=None):
        """Computes a truncated SVD on `matrix` using symeig

            Uses symeig on matrix.T.dot(matrix) or its transpose

        Parameters
        ----------
        matrix : 2D-array
        n_eigenvecs : int, optional, default is None
            if specified, number of eigen[vectors-values] to return

        Returns
        -------
        U : 2D-array
            of shape (matrix.shape[0], n_eigenvecs)
            contains the right singular vectors
        S : 1D-array
            of shape (n_eigenvecs, )
            contains the singular values of `matrix`
        V : 2D-array
            of shape (n_eigenvecs, matrix.shape[1])
            contains the left singular vectors
        """
        # Check that matrix is... a matrix!
        if self.ndim(matrix) != 2:
            raise ValueError('matrix be a matrix. matrix.ndim is %d != 2' %
                             self.ndim(matrix))

        dim_1, dim_2 = self.shape(matrix)
        if dim_1 <= dim_2:
            min_dim = dim_1
            max_dim = dim_2
        else:
            min_dim = dim_2
            max_dim = dim_1

        if n_eigenvecs is None:
            n_eigenvecs = max_dim

        if min_dim <= n_eigenvecs:
            if n_eigenvecs > max_dim:
                warnings.warn(
                    'Trying to compute SVD with n_eigenvecs={0}, which '
                    'is larger than max(matrix.shape)={1}. Setting '
                    'n_eigenvecs to {1}'.format(n_eigenvecs, max_dim))
                n_eigenvecs = max_dim
            # we compute decomposition on the largest of the two to keep more eigenvecs
            dim_1, dim_2 = dim_2, dim_1

        if dim_1 < dim_2:
            U, S = nd.linalg.syevd(dot(matrix, transpose(matrix)))
            S = self.sqrt(S)
            V = dot(transpose(matrix), U / reshape(S, (1, -1)))
        else:
            V, S = nd.linalg.syevd(dot(transpose(matrix), matrix))
            S = self.sqrt(S)
            U = dot(matrix, V) / reshape(S, (1, -1))

        U, S, V = U[:, ::-1], S[::-1], transpose(V)[::-1, :]
        return U[:, :n_eigenvecs], S[:n_eigenvecs], V[:n_eigenvecs, :]
예제 #3
0
    def function_set(self):
        # 第一层卷积
        # 卷积
        h1_conv = nd.Convolution(
            data=self.__batch_X, weight=self.__W1, bias=self.__b1, kernel=self.__W1.shape[2:], num_filter=self.__W1.shape[0])
        # 激活
        h1_activation = nd.relu(h1_conv)
        # 池化
        h1 = nd.Pooling(data=h1_activation, pool_type="max", kernel=(2, 2), stride=(2, 2))
        # 第二层卷积
        h2_conv = nd.Convolution(
            data=h1, weight=self.__W2, bias=self.__b2, kernel=self.__W2.shape[2:], num_filter=self.__W2.shape[0])
        h2_activation = nd.relu(h2_conv)
        h2 = nd.Pooling(data=h2_activation, pool_type="max", kernel=(2, 2), stride=(2, 2))
        h2 = nd.flatten(h2)
        # 第一层全连接
        h3_linear = nd.dot(h2, self.__W3) + self.__b3
        h3 = nd.relu(h3_linear)
        # 第二层全连接
        h4_linear = nd.dot(h3, self.__W4) + self.__b4

        # print("1st conv block:", h1.shape)
        # print("2nd conv block:", h2.shape)
        # print("1st dense:", h3.shape)
        # print("2nd dense:", h4_linear.shape)
        # print("output:", h4_linear)

        return h4_linear
예제 #4
0
def def_grads(prims):
    """ Define gradient function for primitives """
    identity = lambda x: x
    # dot
    prims('dot').def_grad(lambda ans, a, b: lambda g: ndarray.dot(g, b.T))
    prims('dot').def_grad(
        lambda ans, a, b: lambda g: ndarray.dot(a.T, g), argnum=1)
    # non-linear
    prims('tanh').def_grad(lambda ans, x: lambda g: g * (1 - ans ** 2))
    prims('exp').def_grad(lambda ans, x: lambda g: g * ans)
    prims('log').def_grad(lambda ans, x: lambda g: g / x)
    # reduce
    prims('sum').def_grad(_sum_grad)
    # + - * /
    prims('multiply').def_grad(
        lambda ans, x, y: _unbroadcast(ans, x, lambda g: g * y))
    prims('multiply').def_grad(
        lambda ans, x, y: _unbroadcast(ans, y, lambda g: x * g), argnum=1)
    prims('add').def_grad(lambda ans, x, y: _unbroadcast(ans, x, identity))
    prims('add').def_grad(
        lambda ans, x, y: _unbroadcast(ans, y, identity), argnum=1)
    prims('subtract').def_grad(
        lambda ans, x, y: _unbroadcast(ans, x, identity))
    prims('subtract').def_grad(
        lambda ans, x, y: _unbroadcast(ans, y, operator.neg), argnum=1)
    prims('divide').def_grad(
        lambda ans, x, y: _unbroadcast(ans, x, lambda g: g / y))
    prims('divide').def_grad(
        lambda ans, x, y: _unbroadcast(ans, y, lambda g: -g * x / (y * y)),
        argnum=1)
    prims('true_divide').def_grad(
        lambda ans, x, y: _unbroadcast(ans, x, lambda g: g / y))
    prims('true_divide').def_grad(
        lambda ans, x, y: _unbroadcast(ans, y, lambda g: -g * x / (y * y)),
        argnum=1)
    prims('maximum').def_grad(_maximum_grad_gen0)
    prims('maximum').def_grad(_maximum_grad_gen1, argnum=1)
    # TODO: minjie
    prims('max').def_grad_zero()
    # negate
    prims('negative').def_grad(lambda ans, x: operator.neg)
    prims('transpose').def_grad(lambda ans, x: mxnet.nd.transpose)
    prims('abs').def_grad(lambda ans, x: lambda g: mxnet.nd.sign(x) * g)
    prims('sign').def_grad_zero()
    prims('round').def_grad_zero()
    prims('ceil').def_grad_zero()
    prims('floor').def_grad_zero()
    prims('sqrt').def_grad(lambda ans, x: lambda g: g * 0.5 / mxnet.nd.sqrt(x))
    prims('sin').def_grad(lambda ans, x: lambda g: g * mxnet.nd.cos(x))
    prims('cos').def_grad(lambda ans, x: lambda g: -g * mxnet.nd.sin(x))
    prims('power').def_grad(
        lambda ans, x, y: _unbroadcast(ans, x, lambda g: g * y * mxnet.nd.power(x, y - 1))
    )
    prims('power').def_grad(
        lambda ans, x, y: _unbroadcast(ans, y, lambda g: g * mxnet.nd.log(x) * ans),
        argnum=1)
    prims('reshape').def_grad(
        lambda _0, x, _1: lambda g: NDArray.reshape(g, x.shape))
    prims('expand_dims').def_grad(
        lambda ans, x, axis: lambda g: NDArray.reshape(g, x.shape))
예제 #5
0
    def function_set(self):
        def dropout(batch_X, drop_probability):
            keep_probability = 1 - drop_probability
            assert 0 <= keep_probability <= 1
            if keep_probability == 0:
                return batch_X.zeros_like()

            # > 保存的概率才能够保留该样本该神经元的输出
            mask = nd.random_uniform(
                0, 1.0, batch_X.shape, ctx=batch_X.context) < keep_probability
            # 保证 E[dropout(batch_X)] == batch_X
            scale = 1 / keep_probability

            return mask * batch_X * scale

        # Dense 需要 dropout Conv 其实不需要因为已经 share weight 了
        h1 = dropout(
            nd.relu(
                nd.dot(self.__batch_X.reshape(
                    (-1, self.__num_inputs)), self.__W1) + self.__b1),
            self.__drop_prob1)
        h2 = dropout(nd.relu(nd.dot(h1, self.__W2) + self.__b2),
                     self.__drop_prob2)

        return nd.dot(h2, self.__W3) + self.__b3
예제 #6
0
def net(X):
    # -1表示行的大小自动推断,列的大小为nun_inputs
    X = X.reshape((-1, num_inputs))
    # 隐藏层的数据结果
    hidden1 = relu(nd.dot(X, W1) + b1)
    output = nd.dot(hidden1, W2) + b2
    return output
예제 #7
0
 def check_KL(self):
     ph_act = nd.dot(self.enum_states, self.W) + self.hb
     vt = nd.dot(self.enum_states, self.vb)
     ht = nd.sum(-nd.log(nd.sigmoid(-ph_act)), axis=1)
     p_th = nd.softmax(vt + ht)
     KL = nd.sum(self.prob_states * nd.log(self.prob_states / p_th))
     return KL.asnumpy()[0]
def net(X, verbose=False):
    X = X.as_in_context(W1.context)
    # 第一层卷积
    h1_conv = nd.Convolution(data=X, weight=W1, bias=b1, kernel=W1.shape[2:], num_filter=W1.shape[0])
    h1_activation = nd.relu(h1_conv)
    h1 = nd.Pooling(data=h1_activation, pool_type='max', kernel=(2, 2), stride=(2, 2))

    # 第二层卷积
    h2_conv = nd.Convolution(data=h1, weight=W2, bias=b2, kernel=W2.shape[2:], num_filter=W2.shape[0])
    h2_activation = nd.relu(h2_conv)
    h2 = nd.Pooling(h2_activation, pool_type="max", kernel=(2, 2), stride=(2, 2))
    h2 = nd.flatten(h2)

    # 第一层全连接
    h3_linear = nd.dot(h2, W3) + b3
    h3 = nd.relu(h3_linear)

    # 第二层全连接
    h4_linear = nd.dot(h3, W4) + b4
    if verbose:
        print('1st conv block', h1.shape)
        print('2nd conv block', h2.shape)
        print('1st conv block', h3.shape)
        print('2nd conv block', h4_linear.shape)
        print('output:', h4_linear)
    return h4_linear
예제 #9
0
파일: mxnet_core.py 프로젝트: Vanova/minpy
def def_grads(reg, prims):
    def identity(x):
        return x
    # dot
    prims('dot').def_grad(lambda ans, a, b: lambda g: ndarray.dot(g, b.T))
    prims('dot').def_grad(lambda ans, a, b: lambda g: ndarray.dot(a.T, g), argnum=1)
    # non-linear
    #prims.tanh.def_grad(lambda ans, x: lambda g: g / np.cosh(x) ** 2)
    prims('exp').def_grad(lambda ans, x: lambda g: g * ans)
    prims('log').def_grad(lambda ans, x: lambda g: g / x)
    # reduce
    prims('sum').def_grad(lambda ans, x, axis=None, keepdims=False: gen_sum_grad(ans, x, axis, keepdims))
    # + - * /
    prims('multiply').def_grad(lambda ans, x, y: unbroadcast(ans, x, lambda g: g * y))
    prims('multiply').def_grad(lambda ans, x, y: unbroadcast(ans, y, lambda g: x * g), argnum=1)
    prims('add').def_grad(lambda ans, x, y: unbroadcast(ans, x, identity))
    prims('add').def_grad(lambda ans, x, y: unbroadcast(ans, y, identity), argnum=1)
    prims('subtract').def_grad(lambda ans, x, y: unbroadcast(ans, x, identity))
    prims('subtract').def_grad(lambda ans, x, y: unbroadcast(ans, y, operator.neg), argnum=1)
    prims('divide').def_grad(lambda ans, x, y: unbroadcast(ans, x, lambda g: g / y))
    prims('divide').def_grad(
            lambda ans, x, y: unbroadcast(ans, y, lambda g: - g * x / (y * y)),
            argnum=1)
    prims('true_divide').def_grad(lambda ans, x, y: unbroadcast(ans, x, lambda g: g / y))
    prims('true_divide').def_grad(
            lambda ans, x, y: unbroadcast(ans, y, lambda g: - g * x / (y * y)),
            argnum=1)
    # power
    #prims.power.def_grad(lambda ans, x, y : unbroadcast(ans, x, lambda g : g * y * x ** (y - 1)))
    #prims.power.def_grad(lambda ans, x, y : unbroadcast(ans, y, lambda g : g * ndarray.log(x) * x ** y), argnum=1)
    # mod
    #prims.mod.def_grad(lambda ans, x, y : unbroadcast(ans, x, identity))
    #prims.mod.def_grad(lambda ans, x, y : unbroadcast(ans, y, lambda g : - g * ndarray.floor(x/y)), argnum=1)
    # negate
    prims('negative').def_grad(lambda ans, x: operator.neg)
예제 #10
0
파일: mxnet_core.py 프로젝트: mufeili/minpy
def def_grads(prims):
    """ Define gradient function for primitives """
    identity = lambda x: x
    # dot
    prims('dot').def_grad(lambda ans, a, b: lambda g: ndarray.dot(g, b.T))
    prims('dot').def_grad(lambda ans, a, b: lambda g: ndarray.dot(a.T, g),
                          argnum=1)
    # non-linear
    #prims.tanh.def_grad(lambda ans, x: lambda g: g / np.cosh(x) ** 2)
    prims('exp').def_grad(lambda ans, x: lambda g: g * ans)
    prims('log').def_grad(lambda ans, x: lambda g: g / x)
    # reduce
    prims('sum').def_grad(_sum_grad)
    # + - * /
    prims('multiply').def_grad(
        lambda ans, x, y: _unbroadcast(ans, x, lambda g: g * y))
    prims('multiply').def_grad(
        lambda ans, x, y: _unbroadcast(ans, y, lambda g: x * g), argnum=1)
    prims('add').def_grad(lambda ans, x, y: _unbroadcast(ans, x, identity))
    prims('add').def_grad(lambda ans, x, y: _unbroadcast(ans, y, identity),
                          argnum=1)
    prims('subtract').def_grad(
        lambda ans, x, y: _unbroadcast(ans, x, identity))
    prims('subtract').def_grad(
        lambda ans, x, y: _unbroadcast(ans, y, operator.neg), argnum=1)
    prims('divide').def_grad(
        lambda ans, x, y: _unbroadcast(ans, x, lambda g: g / y))
    prims('divide').def_grad(
        lambda ans, x, y: _unbroadcast(ans, y, lambda g: -g * x / (y * y)),
        argnum=1)
    prims('true_divide').def_grad(
        lambda ans, x, y: _unbroadcast(ans, x, lambda g: g / y))
    prims('true_divide').def_grad(
        lambda ans, x, y: _unbroadcast(ans, y, lambda g: -g * x / (y * y)),
        argnum=1)
    prims('maximum').def_grad(_maximum_grad_gen0)
    prims('maximum').def_grad(_maximum_grad_gen1, argnum=1)
    # TODO: minjie
    prims('max').def_grad_zero()
    # negate
    prims('negative').def_grad(lambda ans, x: operator.neg)
    prims('transpose').def_grad(lambda ans, x: mxnet.nd.transpose)
    prims('abs').def_grad(lambda ans, x: lambda g: mxnet.nd.sign(x) * g)
    prims('sign').def_grad_zero()
    prims('round').def_grad_zero()
    prims('ceil').def_grad_zero()
    prims('floor').def_grad_zero()
    prims('sqrt').def_grad(lambda ans, x: lambda g: g * 0.5 / mxnet.nd.sqrt(x))
    prims('sin').def_grad(lambda ans, x: lambda g: g * mxnet.nd.cos(x))
    prims('cos').def_grad(lambda ans, x: lambda g: -g * mxnet.nd.sin(x))
    prims('power').def_grad(lambda ans, x, y: _unbroadcast(
        ans, x, lambda g: g * y * mxnet.nd.power(x, y - 1)))
    prims('power').def_grad(lambda ans, x, y: _unbroadcast(
        ans, y, lambda g: g * mxnet.nd.log(x) * ans),
                            argnum=1)
    prims('reshape').def_grad(
        lambda _0, x, _1: lambda g: NDArray.reshape(g, x.shape))
    prims('expand_dims').def_grad(
        lambda ans, x, axis: lambda g: NDArray.reshape(g, x.shape))
예제 #11
0
def net(x):
    x = x.reshape((-1, num_inputs))
    h1 = relu(nd.dot(x, w1) + b1)
    h1 = dropout(h1, drou_prop1)
    h10 = relu(nd.dot(h1, w10) + b10)
    h10 = dropout(h10, drou_prop2)
    output = nd.dot(h10, w2) + b2
    return output
    def function_set(self):
        def batch_norm(X, gamma, beta, is_training, moving_mean, moving_variance, eps=1e-5, moving_momentum=0.9):
            assert len(X.shape) in (2, 4)
            # 全连接: batch_size x feature
            if len(X.shape) == 2:
                # 每个输入维度在样本上的平均和方差
                mean = X.mean(axis=0)
                variance = ((X - mean) ** 2).mean(axis=0)
            # 2D卷积: batch_size x channel x height x width
            else:
                # 对每个通道算均值和方差,需要保持 4D 形状使得可以正确的广播
                mean = X.mean(axis=(0, 2, 3), keepdims=True)
                variance = ((X - mean) ** 2).mean(axis=(0, 2, 3), keepdims=True)
                # 变形使得可以正确的广播
                moving_mean = moving_mean.reshape(mean.shape)
                moving_variance = moving_variance.reshape(mean.shape)
            # 均一化
            if is_training:
                X_hat = (X - mean) / nd.sqrt(variance + eps)
                # !!! 更新全局的均值和方差
                # 每一个 batch_X 都会使用上个 batch_X 的 0.9 与 这个 batch_X 的 0.1
                moving_mean[:] = moving_momentum * moving_mean + (1.0 - moving_momentum) * mean
                moving_variance[:] = moving_momentum * moving_variance + (1.0 - moving_momentum) * variance
            else:
                # !!! 测试阶段使用全局的均值和方差
                X_hat = (X - moving_mean) / nd.sqrt(moving_variance + eps)

            # 拉升和偏移
            return gamma.reshape(mean.shape) * X_hat + beta.reshape(mean.shape)

        # 第一层卷积
        h1_conv = nd.Convolution(
            data=self.__batch_X, weight=self.__W1, bias=self.__b1, kernel=(5, 5), num_filter=20)
        # 第一个 BN
        h1_bn = batch_norm(
            h1_conv, self.__gamma1, self.__beta1, self.__is_training, self.__moving_mean1, self.__moving_variance1)
        h1_activation = nd.relu(h1_bn)
        h1 = nd.Pooling(
            data=h1_activation, pool_type="max", kernel=(2, 2), stride=(2, 2))

        # 第二层卷积
        h2_conv = nd.Convolution(
            data=h1, weight=self.__W2, bias=self.__b2, kernel=(3, 3), num_filter=50)
        # 第二个 BN
        h2_bn = batch_norm(
            h2_conv, self.__gamma2, self.__beta2, self.__is_training, self.__moving_mean2, self.__moving_variance2)
        h2_activation = nd.relu(h2_bn)
        h2 = nd.Pooling(data=h2_activation, pool_type="max", kernel=(2, 2), stride=(2, 2))
        h2 = nd.flatten(h2)

        # 第一层全连接
        h3_linear = nd.dot(h2, self.__W3) + self.__b3
        h3 = nd.relu(h3_linear)

        # 第二层全连接
        h4_linear = nd.dot(h3, self.__W4) + self.__b4

        return h4_linear
예제 #13
0
파일: main.py 프로젝트: z01nl1o02/tests
def rnn(inputs, state, *params):
    H = state
    W_xh, W_hh, b_h, W_hy, b_y = params
    outputs = []
    for X in inputs:
        H = nd.tanh(nd.dot(X, W_xh) + nd.dot(H, W_hh) + b_h)
        Y = nd.dot(H, W_hy) + b_y
        outputs.append(Y)
    return (outputs, H)
예제 #14
0
def rnn(inputs,state,*params):
    H = state
    W_xh,W_hh,b_h,W_hy,b_y = params
    outputs = []
    for X in inputs:
        H = nd.tanh(nd.dot(X,W_xh) + nd.dot(H,W_hh) + b_h)
        Y = nd.dot(H,W_hy) + b_y
        outputs.append(Y)
    return (outputs,H)
예제 #15
0
    def network(self, X=None, debug=False,):
                
        filters, kernels, stride, padding, dilate = self.conv_params['num_filter'], self.conv_params['kernel'], \
                                                    self.conv_params['stride'], self.conv_params['padding'], self.conv_params['dilate']
        type_pool, kernels_pool, stride_pool, padding_pool, dilate_pool =  self.pool_params['pool_type'], \
                                                                           self.pool_params['kernel'], self.pool_params['stride'], \
                                                                           self.pool_params['padding'], self.pool_params['dilate']
        act_type = self.act_params['act_type']
        hidden_dim = self.fc_params['hidden_dim']
        
        
        # CNN ##########################################################################################################
        convlayer_out = X
        interlayer = []
        for i, (nf, k, S, P, D, t_p, k_p, S_p, P_p, D_p, a) in enumerate(zip(filters, kernels, stride, padding, dilate, 
                                                                     type_pool, kernels_pool, stride_pool, padding_pool, dilate_pool,
                                                                     act_type)):
            W, b = self.params['W{:d}'.format(i+1,)], self.params['b{:d}'.format(i+1,)]
            convlayer_out = nd.Convolution(data = convlayer_out, weight=W, bias=b, kernel=k, num_filter=nf, stride=S, dilate=D)
            convlayer_out = activation(convlayer_out, act_type = a)
            convlayer_out = nd.Pooling(data=convlayer_out, pool_type=t_p, kernel=k_p, stride=S_p, pad=P_p)

            interlayer.append(convlayer_out)
            i_out = i
            if debug:
                print("layer{:d} shape: {}".format(i+1, convlayer_out.shape))
        
        # MLP ##########################################################################################################
        FClayer_out = nd.flatten(convlayer_out)
        interlayer.append(FClayer_out)
        if debug:
            print("After Flattened, Data shape: {}".format(FClayer_out.shape))

        for j, (hd, a) in enumerate(zip(hidden_dim, act_type[-len(hidden_dim):])):
            W, b = self.params['W{:d}'.format(j+i_out+2,)], self.params['b{:d}'.format(j+i_out+2,)]
            FClayer_out = nd.dot(FClayer_out, W) + b
            FClayer_out = activation(FClayer_out, act_type = a)
            
            if autograd.is_training():
                # 对激活函数的输出使用droupout
                FClayer_out = dropout(FClayer_out, self.drop_prob)
            if debug:
                print("layer{:d} shape: {}".format(j+i_out+2, FClayer_out.shape))
            interlayer.append(FClayer_out)            
            j_out = j
            
        # OUTPUT ##########################################################################################################
        W, b = self.params['W{:d}'.format(j_out+i_out+3,)], self.params['b{:d}'.format(j_out+i_out+3,)]            
        yhat = nd.dot(FClayer_out, W) + b

        if debug:
            print("Output shape: {}".format(yhat.shape))
            print('------------')
        interlayer.append(yhat)       

        return yhat, interlayer
예제 #16
0
파일: dropout.py 프로젝트: wk738126046/ML
def net(x, is_training=False):
    # w1, b1, w2, b2, w3, b3 = params = initParam(verbose=True)
    x = x.reshape(shape=(-1, num_input))  # (256,784)
    # print(x.shape)
    x1 = nd.relu(nd.dot(x, w1) + b1)
    if is_training: x1 = dropout(x1, 0.8)
    x2 = nd.relu(nd.dot(x1, w2) + b2)
    if is_training: x2 = dropout(x2, 0.5)
    out = nd.dot(x2, w3) + b3
    return out
def net(X):
    X = X.reshape((-1, num_inputs))
    h1 = nd.dot(X, w1) + b1
    h1 = nd.relu(h1)
    h1 = dropout(h1, dropout_prob_1)
    h2 = nd.dot(h1, w2) + b2
    h2 = nd.relu(h2)
    h2 = dropout(h2, dropout_prob_2)
    y = nd.dot(h2, w3) + b3
    return y
예제 #18
0
def rnn(inputs, H):
    # inputs: seq_len ? batch_size x vocab_size ??
    # H: batch_size x num_hidden ??
    # outputs: seq_len ? batch_size x vocab_size ??
    outputs = []
    for X in inputs:
        H = nd.tanh(nd.dot(X, Wxh) + nd.dot(H, Whh) + bh)
        Y = nd.dot(H, Why) + by
        outputs.append(Y)
    return (outputs, H)
예제 #19
0
def net(X):
    X = X.reshape((-1, num_inputs))
    # 第一层全连接。
    h1 = nd.relu(nd.dot(X, W1) + b1)
    # 在第一层全连接后添加丢弃层。
    h1 = dropout(h1, drop_prob1)
    # 第二层全连接。
    h2 = nd.relu(nd.dot(h1, W2) + b2)
    # 在第二层全连接后添加丢弃层。
    h2 = dropout(h2, drop_prob2)
    return nd.dot(h2, W3) + b3
예제 #20
0
    def net(self, X):
        X = X.reshape(-1, self.num_inputs)

        H1 = (nd.dot(X, self.W1) + self.b1).relu()
        if autograd.is_training():
            H1 = dropout(H1, self.drop_prob1)

        H2 = (nd.dot(H1, self.W2) + self.b2).relu()
        if autograd.is_training():
            H2 = dropout(H2, self.drop_prob2)

        return nd.dot(H2, self.W3) + self.b3
예제 #21
0
    def function_set(self):
        # relu = lambda x: nd.maximum(x, 0)

        def relu(x):
            return nd.maximum(x, 0)

        hidden_layer_before_act = nd.dot(
            self.__batch_X.reshape(
                (-1, self.__num_input)), self.__w1) + self.__b1
        hidden_layer_after_act = relu(hidden_layer_before_act)
        output_layer_before_act = nd.dot(hidden_layer_after_act,
                                         self.__w2) + self.__b2

        return output_layer_before_act
예제 #22
0
    def contrastive_divergence(self,
                               input,
                               lr=0.1,
                               cdk=1,
                               batch_size=None,
                               shuffle=False):
        n_sample = input.shape[0]
        if batch_size == 0: batch_size = n_sample

        labels = nd.ones([n_sample, 1], ctx=self.ctx)
        dataiter = mx.io.NDArrayIter(input,
                                     labels,
                                     batch_size,
                                     shuffle,
                                     last_batch_handle='discard')

        for batch in dataiter:
            sub = batch.data[0]

            ph_prob, ph_sample = self.sample_h_given_v(sub)
            chain_start = ph_sample

            for step in range(cdk):
                if step == 0:
                    nv_prob, nv_sample, nh_prob, nh_sample = self.gibbs_hvh(
                        chain_start)
                else:
                    nv_prob, nv_sample, nh_prob, nh_sample = self.gibbs_hvh(
                        nh_sample)

            if self.M_coeff > 0:
                self.dW *= self.M_coeff
                self.dv *= self.M_coeff
                self.dh *= self.M_coeff
                self.dW += (nd.dot(sub.T, ph_prob) -
                            nd.dot(nv_sample.T, nh_prob)) * lr / batch_size
                self.dv += nd.mean(sub - nv_sample, axis=0) * lr
                self.dh += nd.mean(ph_prob - nh_prob, axis=0) * lr
            else:
                self.dW = (nd.dot(sub.T, ph_prob) -
                           nd.dot(nv_sample.T, nh_prob)) * lr / batch_size
                self.dv = nd.mean(sub - nv_sample, axis=0) * lr
                self.dh = nd.mean(ph_prob - nh_prob, axis=0) * lr

            self.W += self.dW
            self.vb += self.dv
            self.hb += self.dh

            self.W_decay(lr)
        return
예제 #23
0
파일: lstm.py 프로젝트: ylxdzsw/xi-rnn
def lstm(x, h, c, Wxi, Wxf, Wxo, Whi, Whf, Who, Wxc, Whc, bi, bf, bo, bc):
    i = nd.sigmoid(nd.dot(x, Wxi) + nd.dot(h, Whi) + bi)
    f = nd.sigmoid(nd.dot(x, Wxf) + nd.dot(h, Whf) + bf)
    o = nd.sigmoid(nd.dot(x, Wxo) + nd.dot(h, Who) + bo)
    c̃ = nd.tanh(nd.dot(x, Wxc) + nd.dot(h, Whc) + bc)
    c = f * c + i * c̃
    h = o * nd.tanh(c)
    return h, c
예제 #24
0
파일: hack_mabdqn.py 프로젝트: WowCZ/strac
    def bayes_forward(self,
                      x,
                      dense,
                      loss,
                      activation_fn=None,
                      is_target=False):
        weight = self.get_sample(mu=dense.weight_mu.data(),
                                 rho=dense.weight_rho.data(),
                                 is_target=is_target)
        bias = self.get_sample(mu=dense.bias_mu.data(),
                               rho=dense.bias_rho.data(),
                               is_target=is_target)

        loss = loss + log_gaussian(x=weight,
                                   mu=dense.weight_mu.data(),
                                   sigma=softplus(dense.weight_rho.data()))
        loss = loss + log_gaussian(x=bias,
                                   mu=dense.bias_mu.data(),
                                   sigma=softplus(dense.bias_rho.data()))
        loss = loss - log_gaussian(x=weight, mu=0., sigma=self.sigma_prior)
        loss = loss - log_gaussian(x=bias, mu=0., sigma=self.sigma_prior)

        result = nd.dot(x, weight) + bias
        if activation_fn is None:
            return result
        elif activation_fn == 'relu':
            return nd.relu(result)
예제 #25
0
    def forward(self, graph, ufeat, ifeat):
        """Forward function.

        Parameters
        ----------
        graph : DGLHeteroGraph
            "Flattened" user-movie graph with only one edge type.
        ufeat : mx.nd.NDArray
            User embeddings. Shape: (|V_u|, D)
        ifeat : mx.nd.NDArray
            Movie embeddings. Shape: (|V_m|, D)

        Returns
        -------
        mx.nd.NDArray
            Predicting scores for each user-movie edge.
        """
        graph = graph.local_var()
        ufeat = self.dropout(ufeat)
        ifeat = self.dropout(ifeat)
        graph.nodes['movie'].data['h'] = ifeat
        basis_out = []
        for i in range(self._num_basis_functions):
            graph.nodes['user'].data['h'] = F.dot(ufeat, self.Ps[i].data())
            graph.apply_edges(fn.u_dot_v('h', 'h', 'sr'))
            basis_out.append(graph.edata['sr'])
        out = F.concat(*basis_out, dim=1)
        out = self.rate_out(out)
        return out
예제 #26
0
def main():
    t1 = time.time()
    # generating dataset
    num_features = 5
    total = 10000
    weights = [1.5, -3.4, -2.6, 7.2, -3.0]
    biases = 2.6
    X = nd.random_normal(shape=(total, num_features),ctx=ctx)
    Y = weights[0] * X[:, 0] + weights[1] * X[:, 1] + weights[2] * X[:, 2] + weights[3] * X[:, 3] + weights[4] * X[:,
                                                                                                                 4] + biases
    # Y +=  nd.random_normal(shape=Y.shape)

    # iniitialize the parameters
    W_hat = nd.random_normal(shape=(num_features, 1),ctx=ctx)
    b_hat = nd.random_normal(shape=(1,),ctx=ctx)
    for i in [W_hat, b_hat]:
        i.attach_grad()

    # training
    epochs = 10
    lr = 0.001
    total_loss = 0
    for epoch in range(epochs):
        for x_, y_ in data_iter(X, Y):
            with ad.record():
                loss = compute_loss(nd.dot(x_ , W_hat) + b_hat, y_)
            loss.backward()

            SGD([W_hat, b_hat], lr)
            total_loss += nd.sum(loss).asscalar()
            # print("Epoch %d, average loss: %f" % (epoch, total_loss / total))
    print(W_hat, b_hat)
    print(time.time() - t1)
예제 #27
0
    def train_model(self):
        for param in self.__params:
            param.attach_grad()

        for e in range(self.__epochs):
            mean_train_loss = 0
            mean_test_loss = 0
            for self.__batch_X, self.__batch_y in self.train_iter():
                with autograd.record():
                    self.__batch_y_hat = self.function_set()
                    train_loss = self.goodness_of_function_loss_function()
                train_loss.backward()
                self.goodness_of_function_optimizer_function()

                mean_train_loss += nd.mean(train_loss).asscalar()

                test_y_hat = nd.dot(self.__X_test, self.__w) + self.__b
                test_loss = ((test_y_hat - self.__y_test)**2 / 2 +
                             self.__lamda *
                             ((self.__w**2).sum() + self.__b**2) / 2)
                mean_test_loss += nd.mean(test_loss).asscalar()

            print("Epoch %d, train average loss: %f" %
                  (e, mean_train_loss / self.__num_train))
            print("Epoch %d, test  average loss: %f" %
                  (e, mean_test_loss / self.__num_test))
예제 #28
0
def getDate():
    true_w = nd.random_normal(shape=(num_input, 1)) * 0.01
    true_b = 0.05
    x = nd.zeros(shape=(num_train + num_test, num_input))
    y = nd.dot(x, true_w) + true_b
    y += nd.random_normal(shape=y.shape) * 0.01
    return x, y
예제 #29
0
파일: hack_bdqn.py 프로젝트: WowCZ/strac
    def forward(self, inputs, is_target=False):
        result = None
        loss = 0.
        for _ in range(self.n_samples):
            tmp = inputs

            weights = []
            biases = []
            for i in range(len(self.weight_mus)):
                weights.append(self.get_sample(
                    mu=self.weight_mus[i].data(), rho=self.weight_rhos[i].data(), is_target=is_target))
                biases.append(self.get_sample(mu=self.bias_mus[i].data(), rho=self.bias_rhos[i].data(), is_target=is_target))
                loss = loss + log_gaussian(
                    x=weights[-1], mu=self.weight_mus[i].data(), sigma=softplus(self.weight_rhos[i].data()))
                loss = loss + log_gaussian(x=biases[-1], mu=self.bias_mus[i].data(), sigma=softplus(self.bias_rhos[i].data()))
                loss = loss - log_gaussian(x=weights[-1], mu=0., sigma=self.sigma_prior)
                loss = loss - log_gaussian(x=weights[-1], mu=0., sigma=self.sigma_prior)
            for i in range(len(weights)):
                tmp = nd.dot(tmp, weights[i]) + biases[i]
                if i != len(weights) - 1:
                    tmp = nd.relu(tmp)
            if result is None:
                result = nd.zeros_like(tmp)
            result = result + tmp
        result = result / float(self.n_samples)
        loss = loss / float(self.n_samples)
        return result, loss
예제 #30
0
def bilinear(x, W, y, input_size, seq_len, batch_size, num_outputs=1, bias_x=False, bias_y=False):
    """
    Do xWy

    :param x: (input_size x seq_len) x batch_size
    :param W: (num_outputs x ny) x nx
    :param y: (input_size x seq_len) x batch_size
    :param input_size: input dimension
    :param seq_len: sequence length
    :param batch_size: batch size
    :param num_outputs: number of outputs
    :param bias_x: whether concat bias vector to input x
    :param bias_y: whether concat bias vector to input y
    :return: [seq_len_y x seq_len_x if output_size == 1 else seq_len_y x num_outputs x seq_len_x] x batch_size
    """
    if bias_x:
        x = nd.concat(x, nd.ones((1, seq_len, batch_size)), dim=0)
    if bias_y:
        y = nd.concat(y, nd.ones((1, seq_len, batch_size)), dim=0)

    nx, ny = input_size + bias_x, input_size + bias_y
    # W: (num_outputs x ny) x nx
    lin = nd.dot(W, x)
    if num_outputs > 1:
        lin = reshape_fortran(lin, (ny, num_outputs * seq_len, batch_size))
    y = y.transpose([2, 1, 0])  # May cause performance issues
    lin = lin.transpose([2, 1, 0])
    blin = nd.batch_dot(lin, y, transpose_b=True)
    blin = blin.transpose([2, 1, 0])
    if num_outputs > 1:
        blin = reshape_fortran(blin, (seq_len, num_outputs, seq_len, batch_size))
    return blin
예제 #31
0
 def __init__(self, true_w, true_b, num_inputs: int, num_examples: int,
              batch_size: int):
     self.features = nd.random.normal(scale=1,
                                      shape=(num_examples, num_inputs))
     self.labels = nd.dot(self.features, true_w) + true_b
     self.labels += nd.random.normal(scale=0.01, shape=self.labels.shape)
     self.batch_size = batch_size
예제 #32
0
파일: utils.py 프로젝트: xihao-1223/GaAN
def get_global_norm(arrays):
    ctx = arrays[0].context
    total_norm = nd.add_n(*[
        nd.dot(x, x).as_in_context(ctx)
        for x in (arr.reshape((-1, )) for arr in arrays)
    ])
    total_norm = nd.sqrt(total_norm).asscalar()
    return total_norm
예제 #33
0
def getData():
    true_w = nd.ones((num_input, 1)) * 0.01
    true_b = 0.05
    x = nd.random_normal(shape=(num_train + num_test, num_input))
    # y = nd.sun([0.01*x[i] for i in range(num_train+num_test)])
    y = nd.dot(x, true_w) + true_b
    y += 0.01 * nd.random_normal(shape=y.shape)
    return x, y
예제 #34
0
def rnn(_inputs, initial_state, *parameters):
    # _inputs: a list with length num_steps,
    # corresponding element: batch_size * input_dim matrix

    H = initial_state

    W_xh, W_hh, b_h, W_hy, b_y = parameters

    _outputs = []

    for X in _inputs:
        # compute hidden state from input and last/initial hidden state
        H = nd.tanh(nd.dot(X, W_xh) + nd.dot(H, W_hh) + b_h)
        # compute output from hidden state
        Y = nd.dot(H, W_hy) + b_y
        _outputs.append(Y)

    return _outputs, H
예제 #35
0
파일: mxnet_core.py 프로젝트: dsqx71/minpy
def def_grads(reg, prims):
    def identity(x):
        return x
    # dot
    prims('dot').def_grad(lambda ans, a, b: lambda g: ndarray.dot(g, b.T))
    prims('dot').def_grad(lambda ans, a, b: lambda g: ndarray.dot(a.T, g), argnum=1)
    # non-linear
    #prims.tanh.def_grad(lambda ans, x: lambda g: g / np.cosh(x) ** 2)
    prims('exp').def_grad(lambda ans, x: lambda g: g * ans)
    prims('log').def_grad(lambda ans, x: lambda g: g / x)
    # reduce
    prims('sum').def_grad(lambda ans, x, axis=None, keepdims=False: gen_sum_grad(ans, x, axis, keepdims))
    # + - * /
    prims('multiply').def_grad(lambda ans, x, y: unbroadcast(ans, x, lambda g: g * y))
    prims('multiply').def_grad(lambda ans, x, y: unbroadcast(ans, y, lambda g: x * g), argnum=1)
    prims('add').def_grad(lambda ans, x, y: unbroadcast(ans, x, identity))
    prims('add').def_grad(lambda ans, x, y: unbroadcast(ans, y, identity), argnum=1)
    prims('subtract').def_grad(lambda ans, x, y: unbroadcast(ans, x, identity))
    prims('subtract').def_grad(lambda ans, x, y: unbroadcast(ans, y, operator.neg), argnum=1)
    prims('divide').def_grad(lambda ans, x, y: unbroadcast(ans, x, lambda g: g / y))
    prims('divide').def_grad(
            lambda ans, x, y: unbroadcast(ans, y, lambda g: - g * x / (y * y)),
            argnum=1)
    prims('true_divide').def_grad(lambda ans, x, y: unbroadcast(ans, x, lambda g: g / y))
    prims('true_divide').def_grad(
            lambda ans, x, y: unbroadcast(ans, y, lambda g: - g * x / (y * y)),
            argnum=1)
    # mod
    #prims.mod.def_grad(lambda ans, x, y : unbroadcast(ans, x, identity))
    #prims.mod.def_grad(lambda ans, x, y : unbroadcast(ans, y, lambda g : - g * ndarray.floor(x/y)), argnum=1)
    # negate
    prims('negative').def_grad(lambda ans, x: operator.neg)
    prims('transpose').def_grad(lambda ans, x: mxnet.nd.transpose)
    prims('abs').def_grad(lambda ans, x: lambda g: mxnet.nd.sign(x) * g)
    prims('sign').def_grad_zero()
    prims('round').def_grad_zero()
    prims('ceil').def_grad_zero()
    prims('floor').def_grad_zero()
    prims('sqrt').def_grad(lambda ans, x: lambda g: g * 0.5 / mxnet.nd.sqrt(x))
    prims('sin').def_grad(lambda ans, x: lambda g: g * mxnet.nd.cos(x))
    prims('cos').def_grad(lambda ans, x: lambda g: -g * mxnet.nd.sin(x))
    prims('power').def_grad(lambda ans, x, y: unbroadcast(ans, x, lambda g: g * y * mxnet.nd.NDArray._power(x, y - 1)))
    prims('power').def_grad(lambda ans, x, y: unbroadcast(ans, y, lambda g: g * mxnet.nd.log(x) * ans), argnum=1)
    prims('reshape').def_grad(lambda _0, x, _1: lambda g: mxnet.nd.NDArray.reshape(g, x.shape))
예제 #36
0
def bilinear(x, W, y, input_size, seq_len, batch_size, num_outputs=1, bias_x=False, bias_y=False):
    """Do xWy

    Parameters
    ----------
    x : NDArray
        (input_size x seq_len) x batch_size
    W : NDArray
        (num_outputs x ny) x nx
    y : NDArray
        (input_size x seq_len) x batch_size
    input_size : int
        input dimension
    seq_len : int
        sequence length
    batch_size : int
        batch size
    num_outputs : int
        number of outputs
    bias_x : bool
        whether concat bias vector to input x
    bias_y : bool
        whether concat bias vector to input y

    Returns
    -------
    output : NDArray
        [seq_len_y x seq_len_x if output_size == 1 else seq_len_y x num_outputs x seq_len_x] x batch_size
    """
    if bias_x:
        x = nd.concat(x, nd.ones((1, seq_len, batch_size)), dim=0)
    if bias_y:
        y = nd.concat(y, nd.ones((1, seq_len, batch_size)), dim=0)

    nx, ny = input_size + bias_x, input_size + bias_y
    # W: (num_outputs x ny) x nx
    lin = nd.dot(W, x)
    if num_outputs > 1:
        lin = reshape_fortran(lin, (ny, num_outputs * seq_len, batch_size))
    y = y.transpose([2, 1, 0])  # May cause performance issues
    lin = lin.transpose([2, 1, 0])
    blin = nd.batch_dot(lin, y, transpose_b=True)
    blin = blin.transpose([2, 1, 0])
    if num_outputs > 1:
        blin = reshape_fortran(blin, (seq_len, num_outputs, seq_len, batch_size))
    return blin
예제 #37
0
from mxnet import ndarray as nd
from mxnet import autograd
from mxnet import gluon

num_train = 20
num_test = 100
num_inputs = 200

true_w = nd.ones((num_inputs, 1)) * 0.01
true_b = 0.05

X = nd.random.normal(shape=(num_train + num_test, num_inputs))
y = nd.dot(X, true_w)
y += .01 * nd.random.normal(shape=y.shape)

X_train, X_test = X[:num_train, :], X[num_train:, :]
y_train, y_test = y[:num_train], y[num_train:]

import matplotlib as mpl
mpl.rcParams['figure.dpi']= 120
import matplotlib.pyplot as plt

batch_size = 1
dataset_train = gluon.data.ArrayDataset(X_train, y_train)
data_iter_train = gluon.data.DataLoader(dataset_train, batch_size, shuffle=True)

square_loss = gluon.loss.L2Loss()

def test(net, X, y):
    return square_loss(net(X), y).mean().asscalar()
예제 #38
0
def net(X):
    return nd.dot(X, w) + b # return the prediction value
예제 #39
0
    def __init__(self):
        self.num_inputs = 3
        self.training_size = 1000
        self.batch_size = 10
        self.learning_rate = 1e-2
        self.num_epochs = 5


config = Config()


true_w = [2.5, 4.7, -3.2]
true_b = 2.9

X = nd.random_normal(shape=(config.training_size, config.num_inputs))
y = nd.dot(X, nd.array(true_w)) + true_b
y += 0.01 * nd.random_normal(shape=y.shape)


def data_generator(batch_size):
    index = list(range(config.training_size))
    random.shuffle(index)

    for i in range(0, config.training_size, batch_size):
        j = nd.array(index[i:min(i + batch_size, config.training_size)])
        yield nd.take(X, j), nd.take(y, j)


w = nd.random_normal(shape=(config.num_inputs, 1))
b = nd.zeros((1,))
parameters = [w, b]
from mxnet import ndarray as nd
from mxnet import autograd
from mxnet import gluon
import mxnet as mx

num_train = 20#训练集大小
num_test = 100#测试集大小
num_inputs = 200#输入神经元个数 xi的个数
#真实模型参数
true_w = nd.ones((num_inputs, 1)) * 0.01# 权重
true_b = 0.05#偏置

#生成 数据集
X = nd.random.normal(shape=(num_train + num_test, num_inputs))#输入
y = nd.dot(X, true_w) + true_b # y = 0.05 + sum(0.01*xi) 
y += .01 * nd.random.normal(shape=y.shape)#噪声 y = 0.05 + sum(0.01*xi) + noise 

X_train, X_test = X[:num_train, :], X[num_train:, :]# 0~19 行  20~99行
y_train, y_test = y[:num_train], y[num_train:]

# 不断读取数据块
import random
batch_size = 1
def data_iter(num_examples):
    idx = list(range(num_examples))
    random.shuffle(idx)#打乱
    for i in range(0, num_examples, batch_size):
        j = nd.array(idx[i:min(i+batch_size,num_examples)])
        yield X.take(j), y.take(j)
def net(X):
    return nd.dot(X, w) + b
def net(X):
    X = X.reshape((-1, num_inputs))
    h1 = relu(nd.dot(X, W1) + b1)# 隐含层输出 非线性激活
    output = nd.dot(h1, W2) + b2
    return output
예제 #43
0
def net(X):
    X = X.reshape((-1, num_inputs))
    h1 = relu(nd.dot(X, W1 + b1))
    output = nd.dot(h1, W2) + b2
    return output
예제 #44
0
def model(_input):
    return nd.dot(_input, w) + b
def net(X):
    return softmax(nd.dot(X.reshape((-1,num_inputs)), W) + b)# y = softmax( a*X + b )