Python LinearLayerの例、layers.LinearLayer Pythonの例

コード例 #1

0

ファイルを表示

 def __init__(self, inshape, hidshape, noutputs):
     ninput = np.prod(inshape)
     nhid = np.prod(hidshape)
     nparams = (ninput + 1) * nhid + (nhid * noutputs)
     # TODO: 
     self.params = np.empty(nparams)
     self._grad = np.empty(nparams)
     inhidwts = ninput * nhid
     hidoutwts = nhid * noutputs
     self.layers = [
         LinearLayer(
             inshape,
             hidshape,
             params=self.params[0:inhidwts],
             grad=self._grad[0:inhidwts]
         ),
         LogisticLayer(
             hidshape,
             params=self.params[inhidwts:(inhidwts + nhid)],
             grad=self._grad[inhidwts:(inhidwts + nhid)]
         ),
         LinearLayer(
             hidshape,
             noutputs,
             params=self.params[(inhidwts + nhid):],
             grad=self._grad[(inhidwts + nhid):]
         ),
         SoftmaxLayer()
     ]

コード例 #2

0

ファイルを表示

ファイル: dynamic_quant_layers.py プロジェクト: LZHgrla/OQA

 def get_active_subnet(self, in_features, preserve_weight=True):
     sub_layer = LinearLayer(in_features, self.out_features, self.bias, dropout_rate=self.dropout_rate)
     sub_layer = sub_layer.to(get_net_device(self))
     if not preserve_weight:
         return sub_layer
     
     sub_layer.linear.weight.data.copy_(self.linear.linear.weight.data[:self.out_features, :in_features])
     if self.bias:
         sub_layer.linear.bias.data.copy_(self.linear.linear.bias.data[:self.out_features])
     return sub_layer

コード例 #3

0

ファイルを表示

ファイル: test_layers.py プロジェクト: jrieke/evolution-learning

def test_LinearLayer():
    """
    Test LinearLayer by comparing the network's computation to basic matrix multiplication.

    Here we generate a set of random inputs and weights, and compare
    the output of the feedforward path as well as the computed
    gradient for the weights in our own implementation and pythorch's
    native one.
    """

    # Generate input data.
    images = torch.randn(2, 10)

    # Generate initial weights and masks.
    weights_mask = torch.randn(10, 10)
    weights_mask[weights_mask < 0] = 0
    weights_mask[weights_mask > 0] = 1
    initial_weights = torch.randn(10, 10) * weights_mask
    assert initial_weights.grad is None

    ############## our method ################
    weights_ours = initial_weights.clone()
    weights_ours.requires_grad = True

    # Create the layer and compute the output.
    layer = LinearLayer(weights_ours, weights_mask)
    output_ours = layer.forward(images)
    loss_ours = ((output_ours - 1)**2).mean()
    loss_ours.backward()
    grad_ours = weights_ours.grad

    ############## pytorch's ################
    weights_py = initial_weights.clone()
    weights_py.requires_grad = True

    # Create output without using our own layer implementation.
    def make_backward_hook(weight_mask):
        """ Helper function to create a backward hook for masking 
        gradients.
        """
        return lambda grad: grad * weight_mask

    weights_py.register_hook(make_backward_hook(weights_mask))
    output_py = torch.mm(images, weights_py.t())
    loss_py = ((output_py - 1)**2).mean()
    loss_py.backward()
    grad_py = weights_py.grad

    ############# compare ################
    assert torch.all(torch.eq(output_ours, output_py))
    assert torch.all(torch.eq(loss_ours, loss_py))
    assert torch.all(torch.eq(grad_ours, grad_py))

コード例 #4

0

ファイルを表示

ファイル: model_ws.py プロジェクト: kcyu2014/SemiNAS

    def __init__(self, width_stages, n_cell_stages, stride_stages, dropout=0):
        super(NASNet, self).__init__()

        self.width_stages = width_stages
        self.n_cell_stages = n_cell_stages
        self.stride_stages = stride_stages

        in_channels = 32
        first_cell_width = 16

        # first conv layer
        self.first_conv = ConvLayer(3, in_channels, 3, 2, 1, 1, False, False,
                                    True, 'relu6', 0, 'weight_bn_act')

        # first block
        first_block_config = {
            "name": "MobileInvertedResidualBlock",
            "mobile_inverted_conv": {
                "name": "MBInvertedConvLayer",
                "in_channels": in_channels,
                "out_channels": first_cell_width,
                "kernel_size": 3,
                "stride": 1,
                "expand_ratio": 1
            },
            "shortcut": None
        }
        self.first_block = MobileInvertedResidualBlock.build_from_config(
            first_block_config)
        in_channels = first_cell_width

        # blocks
        self.blocks = nn.ModuleList()
        for width, n_cell, s in zip(self.width_stages, self.n_cell_stages,
                                    self.stride_stages):
            for i in range(n_cell):
                if i == 0:
                    stride = s
                else:
                    stride = 1
                block = WSMobileInvertedResidualBlock(in_channels, width,
                                                      stride)
                in_channels = width
                self.blocks.append(block)

        self.feature_mix_layer = ConvLayer(in_channels, 1280, 1, 1, 1, 1,
                                           False, False, True, 'relu6', 0,
                                           'weight_bn_act')
        self.global_avg_pooling = nn.AdaptiveAvgPool2d(1)
        self.classifier = LinearLayer(1280, 1000, True, False, None, dropout,
                                      'weight_bn_act')

コード例 #5

0

ファイルを表示

    def test_numeric_gradient(self):
        l = LinearLayer(2, 3, 'random')
        weights = l.W.get()
        x = np.random.rand(2)
        grad = l.numeric_gradient(x)
        assert_almost_equal(grad, weights[:, 0:-1])

        in_delta = np.random.rand(3)
        for i, d in enumerate(in_delta):
            aux = np.zeros(in_delta.size)
            aux[i] = in_delta[i]
            delta = l.backward(aux)
            gradient = l.numeric_gradient(x)
            assert_almost_equal(in_delta[i] * gradient[i, :], delta, decimal=5)

コード例 #6

0

ファイルを表示

ファイル: model.py プロジェクト: leeyang/deepcontact

def make_neural_network(incoming, hidden_number=None):
    input_shape = lasagne.layers.get_output_shape(incoming)
    input_feature_number = input_shape[1]
    max_size = input_shape[2]
    last_layer = incoming
    res = []
    last_input_number = input_feature_number
    for id, now_hidden_number in enumerate(hidden_number):
        now_layer = LinearLayer(incoming=last_layer,
                                max_size=max_size,
                                deepth=last_input_number,
                                num_output=now_hidden_number)
        now_layer = lasagne.layers.BatchNormLayer(now_layer)
        if id == len(hidden_number) - 1:
            now_layer = lasagne.layers.NonlinearityLayer(
                incoming=now_layer,
                nonlinearity=lasagne.nonlinearities.sigmoid)
        else:
            now_layer = lasagne.layers.NonlinearityLayer(
                incoming=now_layer,
                nonlinearity=lasagne.nonlinearities.rectify)
        last_layer = now_layer
        last_input_number = now_hidden_number
        res.append(now_layer)
    return res

コード例 #7

0

ファイルを表示

ファイル: test_layers.py プロジェクト: jrieke/evolution-learning

def test_LinearFunctionFA():
    """Make sure that LinearLayerFA with symmetric weights behaves in the same
    way as LinearLayer."""

    num_inputs = 10

    # Generate weights and masks.
    weights_mask = torch.randn(10, 10)
    weights_mask[weights_mask < 0] = 0
    weights_mask[weights_mask > 0] = 1

    weights_ff = torch.randn(10, 10) * weights_mask
    weights_fb = weights_ff.clone()
    weights_ff.requires_grad = True
    weights_ff_initial = weights_ff.clone().detach()

    # Create the layer and compute the output.
    layerFA = LinearLayerFA(weights_ff, weights_fb, weights_mask)
    optFA = torch.optim.SGD([layerFA.weight_matrix], lr=0.01)

    layer = LinearLayer(weights_ff, weights_mask)
    opt = torch.optim.SGD([layer.weight_matrix], lr=0.01)

    for i in range(num_inputs):
        opt.zero_grad()
        optFA.zero_grad()

        # Generate input data
        images = torch.randn(2, 10)

        output = layer.forward(images)
        outputFA = layerFA.forward(images)

        loss = ((output - 1)**2).mean()
        lossFA = ((outputFA - 1)**2).mean()

        loss.backward()
        lossFA.backward()

        opt.step()
        optFA.step()

        assert torch.all(torch.eq(output, outputFA))
        assert loss == lossFA
        assert torch.all(torch.eq(layerFA.weight_matrix, layer.weight_matrix))
        assert torch.all(torch.eq(layerFA.weight_matrix.grad, \
                                            layer.weight_matrix.grad))

コード例 #8

0

ファイルを表示

 def __init__(self, num_neurons: List[int]):
     self.layers = []
     for index, neuron in enumerate(num_neurons[:-1]):
         print(neuron, num_neurons[index + 1])
         layer = LinearLayer((neuron, num_neurons[index + 1]))
         self.layers.append(layer)
         layer = SigmoidLayer()
         self.layers.append(layer)
     self.loss_layer = None

コード例 #9

0

ファイルを表示

ファイル: working.py プロジェクト: tianguangjian/PyNet

    def __init__(self, input_size, output_size):
        self.input_size = input_size
        self.output_size = output_size
        self.n1 = SumGroup(
            MulGroup(
                Sequential(LinearLayer(input_size + output_size, output_size),
                           SigmoidLayer), GenericLayer),
            MulGroup(
                Sequential(LinearLayer(input_size + output_size, output_size),
                           SigmoidLayer),
                Sequential(LinearLayer(input_size + output_size, output_size),
                           TanhLayer)))
        self.n2 = MulGroup(
            Sequential(GenericLayer, TanhLayer),
            Sequential(LinearLayer(input_size + output_size, output_size),
                       SigmoidLayer))

        self.ct = np.zeros(output_size)
        self.ht = np.zeros(output_size)

コード例 #10

0

ファイルを表示

ファイル: model.py プロジェクト: stadlerism/quasiNewton_ML

    def __init__(self, widths=[2, 2, 3, 2], lr=0.05, loss=L2Loss()):
        sigmoid = Sigmoid()
        self._layers = []
        self._lr = lr

        for n_in, n_out in zip(widths[:-1], widths[1:]):
            linearLayer = LinearLayer(n_in,
                                      n_out,
                                      bias=True,
                                      scale=1 / np.sqrt(n_in))
            self._layers.append(ActivatedLayer(linearLayer, sigmoid))

        self._loss = loss

コード例 #11

0

ファイルを表示

    def test_update(self):
        l = LinearLayer(2, 6, 'ones')
        y = l.forward(np.array([2.0, 2.0]))
        dJdW = l.dJdW_gradient(np.array([1.0, 2.0, 3.0, 4.0, 1.0, 1.0]))
        self.assertEqual(l.W.get().shape, (6, 3))
        self.assertEqual(dJdW.shape, (6, 3))

        l = LinearLayer(2, 3, 'ones')
        y = l.forward(np.array([2.0, 2.0]))
        dJdW = l.dJdW_gradient(np.array([1.0, 2.0, 3.0]))
        self.assertEqual(l.W.get().shape, (3, 3))
        self.assertEqual(dJdW.shape, (3, 3))
        assert_array_equal(
            dJdW, np.matrix([[2.0, 2.0, 1.0], [4.0, 4.0, 2.0], [6.0, 6.0,
                                                                3.0]]))

コード例 #12

0

ファイルを表示

    def test_LinearLayer(self):
        l1 = LinearLayer(5, 6, 'ones')
        n = Sequential([l1])
        y = n.forward(np.array([2.0, 1.0, 2.0, 3.0, 4.0]))
        self.assertEqual(y.shape, (6, ))
        assert_array_equal(y, np.array([
            13.0,
            13.0,
            13.0,
            13.0,
            13.0,
            13.0,
        ]))

        l2 = LinearLayer(6, 2, 'ones')
        n.add(l2)
        y = n.forward(np.array([2.0, 1.0, 2.0, 3.0, 4.0]))
        self.assertEqual(y.shape, (2, ))
        assert_array_equal(y, np.array([79.0, 79.0]))

        d = n.backward(np.array([2.0, 3.0]))
        self.assertEqual(d.shape, (5, ))
        assert_array_equal(d, np.array([30., 30., 30., 30., 30.]))

コード例 #13

0

ファイルを表示

ファイル: main.py プロジェクト: jostmorgenstern/Neural-Net-From-Scratch

def main():
    mnist_path = os.path.join(os.getcwd(), "MNIST")
    (train_images, train_labels), (test_images,
                                   test_labels) = load_data(mnist_path)

    layers = [
        LinearLayer(32, 28**2, xavier),
        SigmoidLayer(),
        LinearLayer(32, 32, xavier),
        SigmoidLayer(),
        LinearLayer(10, 32, xavier),
        SigmoidLayer()
    ]
    net = NeuralNet(layers)

    np.seterr(over='ignore')
    train(net,
          train_images,
          train_labels,
          flatten_mnist_input,
          mnist_label_as_one_hot,
          epoch_count=1000,
          batch_size=1)

    confusion_matrix = DataFrame(np.zeros((10, 10)),
                                 index=range(10),
                                 columns=range(10))
    evaluator = test(net,
                     test_images,
                     test_labels,
                     confusion_matrix,
                     flatten_mnist_input,
                     highest_output_neuron,
                     mnist_label_as_one_hot,
                     title="POST-TRAIN")
    evaluator.plot()

コード例 #14

0

ファイルを表示

    def test_neuron_one_input(self):
        xv = np.array([0.5, 0.1, 0.5])
        x = Input(['x'], 'x')
        Wv = np.array([[0.1, 0.1, 0.2], [0.5, 0.2, 0.2]])
        W = MWeight(3, 2, weights=Wv)
        bv = np.array([0.3, 0.1])
        b = VWeight(2, weights=bv)
        net = ComputationalGraphLayer(Sigmoid(W.dot(x) + b))
        out = net.forward(xv)
        self.assertEqual(out.shape, (2, ))
        check_out = 1.0 / (1.0 + np.exp(-Wv.dot(xv) - bv))
        assert_almost_equal(out, check_out)
        dJdy = net.backward(np.array([1.0, 1.0]))
        self.assertEqual(dJdy.shape, (3, ))
        assert_almost_equal(dJdy, np.sum(net.numeric_gradient(xv), 0))
        assert_almost_equal(dJdy, (check_out * (1 - check_out)).dot(Wv))

        net2 = Sequential(
            LinearLayer(3, 2, weights=np.hstack([Wv, bv.reshape(2, 1)])),
            SigmoidLayer)
        out2 = net2.forward(xv)
        assert_almost_equal(out, out2)
        dJdy2 = net.backward(np.array([1.0, 1.0]))
        assert_almost_equal(dJdy, dJdy2)

コード例 #15

0

ファイルを表示

    def __init__(self,
                 n_classes=1000,
                 bn_param=(0.1, 1e-5),
                 dropout_rate=0.1,
                 base_stage_width=None,
                 width_mult_list=1.0,
                 ks_list=3,
                 expand_ratio_list=6,
                 depth_list=4):

        self.width_mult_list = int2list(width_mult_list, 1)
        self.ks_list = int2list(ks_list, 1)
        self.expand_ratio_list = int2list(expand_ratio_list, 1)
        self.depth_list = int2list(depth_list, 1)
        self.base_stage_width = base_stage_width

        self.width_mult_list.sort()
        self.ks_list.sort()
        self.expand_ratio_list.sort()
        self.depth_list.sort()

        base_stage_width = [16, 24, 40, 80, 112, 160, 960, 1280]

        final_expand_width = [
            make_divisible(base_stage_width[-2] * max(self.width_mult_list), 8)
            for _ in self.width_mult_list
        ]
        self.final_expand_width = final_expand_width
        last_channel = [
            make_divisible(base_stage_width[-1] * max(self.width_mult_list), 8)
            for _ in self.width_mult_list
        ]
        self.last_channel = last_channel

        # stride_stages = [1, 2, 2, 2, 1, 2]
        stride_stages = [1, 2, 2, 2, 1, 1]
        act_stages = ['relu', 'relu', 'relu', 'h_swish', 'h_swish', 'h_swish']
        se_stages = [False, False, True, False, True, True]
        if depth_list is None:
            n_block_list = [1, 2, 3, 4, 2, 3]
            self.depth_list = [4, 4]
            print('Use MobileNetV3 Depth Setting')
        else:
            n_block_list = [1] + [max(self.depth_list)] * 5
        width_list = []
        for base_width in base_stage_width[:-2]:
            width = [
                make_divisible(base_width * width_mult, 8)
                for width_mult in self.width_mult_list
            ]
            width_list.append(width)

        input_channel = width_list[0]
        # first conv layer

        # if width_mult_list has only one elem
        if len(set(input_channel)) == 1:
            first_conv = ConvLayer(3,
                                   max(input_channel),
                                   kernel_size=3,
                                   stride=2,
                                   act_func='h_swish')
            first_block_conv = MBInvertedConvLayer(
                in_channels=max(input_channel),
                out_channels=max(input_channel),
                kernel_size=3,
                stride=stride_stages[0],
                expand_ratio=1,
                act_func=act_stages[0],
                use_se=se_stages[0],
            )
        else:
            first_conv = DynamicConvLayer(
                in_channel_list=int2list(3, len(input_channel)),
                out_channel_list=input_channel,
                kernel_size=3,
                stride=2,
                act_func='h_swish',
            )
            first_block_conv = DynamicMBConvLayer(
                in_channel_list=input_channel,
                out_channel_list=input_channel,
                kernel_size_list=3,
                expand_ratio_list=1,
                stride=stride_stages[0],
                act_func=act_stages[0],
                use_se=se_stages[0],
            )
        first_block = MobileInvertedResidualBlock(
            first_block_conv, IdentityLayer(input_channel, input_channel))

        # inverted residual blocks
        self.block_group_info = []
        blocks = [first_block]
        _block_index = 1
        feature_dim = input_channel

        for width, n_block, s, act_func, use_se in zip(width_list[1:],
                                                       n_block_list[1:],
                                                       stride_stages[1:],
                                                       act_stages[1:],
                                                       se_stages[1:]):
            self.block_group_info.append(
                [_block_index + i for i in range(n_block)])
            _block_index += n_block

            output_channel = width
            for i in range(n_block):
                if i == 0:
                    stride = s
                else:
                    stride = 1
                mobile_inverted_conv = DynamicMBConvLayer(
                    in_channel_list=feature_dim,
                    out_channel_list=output_channel,
                    kernel_size_list=ks_list,
                    expand_ratio_list=expand_ratio_list,
                    stride=stride,
                    act_func=act_func,
                    use_se=use_se,
                )
                if stride == 1 and feature_dim == output_channel:
                    shortcut = IdentityLayer(feature_dim, feature_dim)
                else:
                    shortcut = None
                blocks.append(
                    MobileInvertedResidualBlock(mobile_inverted_conv,
                                                shortcut))
                feature_dim = output_channel
        # final expand layer, feature mix layer & classifier
        if len(final_expand_width) == 1:
            final_expand_layer = ConvLayer(max(feature_dim),
                                           max(final_expand_width),
                                           kernel_size=1,
                                           act_func='h_swish')
            feature_mix_layer = ConvLayer(
                max(final_expand_width),
                max(last_channel),
                kernel_size=1,
                bias=False,
                use_bn=False,
                act_func='h_swish',
            )
        else:
            final_expand_layer = DynamicConvLayer(
                in_channel_list=feature_dim,
                out_channel_list=final_expand_width,
                kernel_size=1,
                act_func='h_swish')
            feature_mix_layer = DynamicConvLayer(
                in_channel_list=final_expand_width,
                out_channel_list=last_channel,
                kernel_size=1,
                use_bn=False,
                act_func='h_swish',
            )
        if len(set(last_channel)) == 1:
            classifier = LinearLayer(max(last_channel),
                                     n_classes,
                                     dropout_rate=dropout_rate)
        else:
            classifier = DynamicLinearLayer(in_features_list=last_channel,
                                            out_features=n_classes,
                                            bias=True,
                                            dropout_rate=dropout_rate)
        super(OFAMobileNetV3,
              self).__init__(first_conv, blocks, final_expand_layer,
                             feature_mix_layer, classifier)

        # set bn param
        self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])

        # runtime_depth
        self.runtime_depth = [
            len(block_idx) for block_idx in self.block_group_info
        ]

コード例 #16

0

ファイルを表示

    for (img, target) in test:
        #print str(np.argmax(model.forward(test_data[ind])))+' '+str(np.argmax(test_targets[ind]))
        if np.argmax(model.forward(img)) != np.argmax(target):
            err += 1
    print(1.0 - err / float(len(test))) * 100.0


if load_net:
    print "Load Network"
    model = StoreNetwork.load(name_net)
else:
    print "New Network"
    #Two layer network
    model = Sequential([
        NormalizationLayer(0, 255, -0.1, 0.1),
        LinearLayer(784, 10, weights='norm_random'),
        # TanhLayer,
        # LinearLayer(50, 10, weights='norm_random'),
        # TanhLayer,
        # NormalizationLayer(0,10,0,1),
        # SigmoidLayer()
    ])

# display = ShowTraining(epochs_num = epochs)

trainer = Trainer(show_training=False)  #, show_function = display.show)

J_list, dJdy_list, J_test = trainer.learn(
    model=model,
    train=train,
    test=test,

コード例 #17

0

ファイルを表示

 def test_backward(self):
     l = LinearLayer(2, 6, 'ones')
     d = l.backward(np.array([1.0, 2.0, 3.0, 4.0, 1.0, 1.0]))
     self.assertEqual(d.shape, (2, ))
     assert_array_equal(d, np.array([12.0, 12.0]))

コード例 #18

0

ファイルを表示

 def test_forward(self):
     l = LinearLayer(2, 6, 'ones')
     y = l.forward(np.array([2.0, 5.0]))
     assert_array_equal(y, np.array([8.0, 8.0, 8.0, 8.0, 8.0, 8.0]))

コード例 #19

0

ファイルを表示

 def test_dim(self):
     l = LinearLayer(5, 6)
     y = l.forward(np.random.rand(5))
     self.assertEqual(y.shape, (6, ))

コード例 #20

0

ファイルを表示

ファイル: ofa_mbv3.py プロジェクト: leiwang1023/once-for-all

    def __init__(self,
                 n_classes=1000,
                 bn_param=(0.1, 1e-5),
                 dropout_rate=0.1,
                 base_stage_width=None,
                 width_mult_list=1.0,
                 ks_list=3,
                 expand_ratio_list=6,
                 depth_list=4):
        """
        Args:
            n_classes: 分类类数
            bn_param: bn参数
            dropout_rate: 用在哪些层里面呢
            width_mult_list: 在单层layer重复一些操作[~~网络基础宽度缩放 X 并不是~~]
            ks_list: 卷积核的候选大小
            expand_ratio_list: 网络宽度/channel数的扩大倍数
            depth_list: 网络深度/layer的重复/堆叠次数
        """

        # int2list 将列表，元组，整数都变为一个列表
        self.width_mult_list = int2list(width_mult_list, 1)
        self.ks_list = int2list(ks_list, 1)
        self.expand_ratio_list = int2list(expand_ratio_list, 1)
        self.depth_list = int2list(depth_list, 1)
        self.base_stage_width = base_stage_width

        self.width_mult_list.sort()
        self.ks_list.sort()
        self.expand_ratio_list.sort()
        self.depth_list.sort()

        base_stage_width = [16, 24, 40, 80, 112, 160, 960, 1280]
        # make_divisible 使得卷积channel数为8的倍数，并以8为基底3舍4入
        final_expand_width = [
            make_divisible(base_stage_width[-2] * max(self.width_mult_list), 8)
            for _ in self.width_mult_list
        ]
        last_channel = [
            make_divisible(base_stage_width[-1] * max(self.width_mult_list), 8)
            for _ in self.width_mult_list
        ]
        # 步长，决定下采样; 激活函数; se指的是,难道是self-attention
        stride_stages = [1, 2, 2, 2, 1, 2]
        act_stages = ['relu', 'relu', 'relu', 'h_swish', 'h_swish', 'h_swish']
        se_stages = [False, False, True, False, True, True]
        # 深度的配置除了第一个卷积，其他五层都可能expand
        if depth_list is None:
            n_block_list = [1, 2, 3, 4, 2, 3]
            self.depth_list = [4, 4]
            print('Use MobileNetV3 Depth Setting')
        else:
            n_block_list = [1] + [max(self.depth_list)] * 5
        # 宽度/channel数配置
        width_list = []
        for base_width in base_stage_width[:-2]:
            width = [
                make_divisible(base_width * width_mult, 8)
                for width_mult in self.width_mult_list
            ]
            width_list.append(width)

        # width_list好想和我想象的功能不太一样，我以为是初始channel的expand倍数
        input_channel = width_list[0]
        # first conv layer
        if len(set(input_channel)) == 1:
            first_conv = ConvLayer(3,
                                   max(input_channel),
                                   kernel_size=3,
                                   stride=2,
                                   act_func='h_swish')
            first_block_conv = MBInvertedConvLayer(
                in_channels=max(input_channel),
                out_channels=max(input_channel),
                kernel_size=3,
                stride=stride_stages[0],
                expand_ratio=1,
                act_func=act_stages[0],
                use_se=se_stages[0],
            )
        else:
            first_conv = DynamicConvLayer(
                in_channel_list=int2list(3, len(input_channel)),
                out_channel_list=input_channel,
                kernel_size=3,
                stride=2,
                act_func='h_swish',
            )
            first_block_conv = DynamicMBConvLayer(
                in_channel_list=input_channel,
                out_channel_list=input_channel,
                kernel_size_list=3,
                expand_ratio_list=1,
                stride=stride_stages[0],
                act_func=act_stages[0],
                use_se=se_stages[0],
            )
        first_block = MobileInvertedResidualBlock(
            first_block_conv, IdentityLayer(input_channel, input_channel))

        # inverted residual blocks
        self.block_group_info = []
        blocks = [first_block]
        _block_index = 1
        feature_dim = input_channel

        for width, n_block, s, act_func, use_se in zip(width_list[1:],
                                                       n_block_list[1:],
                                                       stride_stages[1:],
                                                       act_stages[1:],
                                                       se_stages[1:]):
            self.block_group_info.append(
                [_block_index + i for i in range(n_block)])
            _block_index += n_block

            output_channel = width
            for i in range(n_block):
                if i == 0:
                    stride = s
                else:
                    stride = 1
                mobile_inverted_conv = DynamicMBConvLayer(
                    in_channel_list=feature_dim,
                    out_channel_list=output_channel,
                    kernel_size_list=ks_list,
                    expand_ratio_list=expand_ratio_list,
                    stride=stride,
                    act_func=act_func,
                    use_se=use_se,
                )
                if stride == 1 and feature_dim == output_channel:
                    shortcut = IdentityLayer(feature_dim, feature_dim)
                else:
                    shortcut = None
                blocks.append(
                    MobileInvertedResidualBlock(mobile_inverted_conv,
                                                shortcut))
                feature_dim = output_channel
        # final expand layer, feature mix layer & classifier
        if len(final_expand_width) == 1:
            final_expand_layer = ConvLayer(max(feature_dim),
                                           max(final_expand_width),
                                           kernel_size=1,
                                           act_func='h_swish')
            feature_mix_layer = ConvLayer(
                max(final_expand_width),
                max(last_channel),
                kernel_size=1,
                bias=False,
                use_bn=False,
                act_func='h_swish',
            )
        else:
            final_expand_layer = DynamicConvLayer(
                in_channel_list=feature_dim,
                out_channel_list=final_expand_width,
                kernel_size=1,
                act_func='h_swish')
            feature_mix_layer = DynamicConvLayer(
                in_channel_list=final_expand_width,
                out_channel_list=last_channel,
                kernel_size=1,
                use_bn=False,
                act_func='h_swish',
            )
        if len(set(last_channel)) == 1:
            classifier = LinearLayer(max(last_channel),
                                     n_classes,
                                     dropout_rate=dropout_rate)
        else:
            classifier = DynamicLinearLayer(in_features_list=last_channel,
                                            out_features=n_classes,
                                            bias=True,
                                            dropout_rate=dropout_rate)
        super(OFAMobileNetV3,
              self).__init__(first_conv, blocks, final_expand_layer,
                             feature_mix_layer, classifier)

        # set bn param
        self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])

        # runtime_depth
        self.runtime_depth = [
            len(block_idx) for block_idx in self.block_group_info
        ]

コード例 #21

0

ファイルを表示

 #     np.array([-1.0,-1.0,-1.0,-1.0,-1.0]),
 #     np.array([1.0,1.0,1.0,1.0,1.0])
 # )
 # norm = NormalizationLayer(
 #     np.array([0.0,0.0,0.0,-3.0]),
 #     np.array([5.0,5.0,5.0,3.0]),
 #     np.array([-1.0,-1.0,-1.0,-1.0]),
 #     np.array([1.0,1.0,1.0,1.0])
 # )
 norm = NormalizationLayer(np.array([0.0, 0.0]), np.array([5.0, 5.0]),
                           np.array([0.0, 0.0]), np.array([1.0, 1.0]))
 W1 = utils.SharedWeights('gaussian', 2 + 1, 2)
 W2 = utils.SharedWeights('gaussian', 2 + 1, 3)
 Q = Sequential(
     norm,
     LinearLayer(2, 2, weights=W1),
     TanhLayer,
     LinearLayer(2, 3, weights=W2),
     # TanhLayer
 )
 W3 = utils.SharedWeights('gaussian', 2 + 1, 2)
 W4 = utils.SharedWeights('gaussian', 2 + 1, 3)
 # W3 = utils.SharedWeights(np.array([[10.0,-10.0,0.0],[-10.0,10.0,0.0]]),2+1,2)
 #W2 = utils.SharedWeights('gaussian',2+1,2)
 Q_hat = Sequential(
     norm,
     LinearLayer(2, 2, weights=W3),
     ReluLayer,
     LinearLayer(2, 3, weights=W4),
     # TanhLayer
 )

コード例 #22

0

ファイルを表示

 def _createFinalLayer(self,ni,nu):
     return LinearLayer(ni,nu)

コード例 #23

0

ファイルを表示

else:
    # norm = NormalizationLayer(
    #     np.array([0.0,0.0,-10.0,-10.0]),
    #     np.array([5.0,5.0,10.0,10.0]),
    #     np.array([-1.0,-1.0,-1.0,-1.0]),
    #     np.array([1.0,1.0,1.0,1.0])
    # )
    norm = NormalizationLayer(np.array([0.0, 0.0]), np.array([5.0, 5.0]),
                              np.array([-1.0, -1.0]), np.array([1.0, 1.0]))

    n = Sequential(
        norm,
        # LinearLayer(2,5,weights='gaussian'),
        # TanhLayer,
        #AddGaussian(1),
        LinearLayer(2, 4, weights='gaussian'),
        RandomGaussianLayer(1),
        SoftMaxLayer)
    agent = GenericAgent(n, 4, 40, 5.0)
    agent.set_training_options(
        Trainer(),
        NegativeLogLikelihoodLoss(),
        GradientDescentMomentum(
            learning_rate=0.1,
            momentum=0.7)  #GradientDescent(learning_rate=0.2)
    )

start = np.array([3.5, 3.5])
obstacles = [
    # np.array([2.5,2.5,1.0])
]

コード例 #24

0

ファイルを表示

    def create_torch_layers(self, device=None):
        """
        Create torch layers in self.torch_layers (with weight matrices and masks) based on
        self.connections and self.weights.

        Each layer has one weight matrix/weight mask. This function also deletes self.weights (the
        list of connection strengths), because it might get out of sync with the weight matrices
        once training starts. To get self.weights back (and get rid of torch layers), call
        delete_torch_layers.

        Args:
            device (str or torch.device, optional): The device to put weight matrices and masks on.
        """
        if self.torch_layers is not None:
            raise RuntimeError(
                'Torch layers already exist. If you want to re-create them '
                '(e.g. on a different device), call delete_torch_layers before'
            )

        # Create weight matrices and masks.
        self.torch_layers = []
        for i in range(1, len(
                self.neurons_in_layer)):  # no torch layer for input neurons

            # Find all neurons connecting to this layer.
            neurons_connecting_to_layer = set()  # avoid duplicates
            for from_neuron, to_neuron in self.connections:
                to_layer = self.find_layer(to_neuron)
                if to_layer == i:
                    neurons_connecting_to_layer.add(from_neuron)
            neurons_connecting_to_layer = sorted(
                list(neurons_connecting_to_layer))

            weight_matrix, weight_mask = self.create_weight_matrix(
                self.connections,
                self.weights,
                from_neurons=neurons_connecting_to_layer,
                to_neurons=self.neurons_in_layer[i],
                device=device)
            if not self.train_only_outputs or i == len(
                    self.neurons_in_layer) - 1:
                weight_matrix.requires_grad = True

            if self.use_random_feedback:  # feedback alignment
                backward_weight_matrix = torch.randn_like(weight_matrix,
                                                          requires_grad=False)
                backward_weight_matrix *= weight_mask
                self.torch_layers.append(
                    LinearLayerFA(weight_matrix,
                                  backward_weight_matrix,
                                  weight_mask,
                                  from_neurons=neurons_connecting_to_layer,
                                  to_neurons=self.neurons_in_layer[i]))
            else:  # normal backpropagation
                self.torch_layers.append(
                    LinearLayer(weight_matrix,
                                weight_mask,
                                from_neurons=neurons_connecting_to_layer,
                                to_neurons=self.neurons_in_layer[i]))

        # Delete self.weights so that it doesn't get out of sync with weight matrices during
        # training.
        self.weights = None

コード例 #25

0

ファイルを表示

    def __init__(self,
                 n_classes=1000,
                 bn_param=(0.1, 1e-3),
                 dropout_rate=0.1,
                 base_stage_width=None,
                 width_mult_list=1.0,
                 ks_list=3,
                 expand_ratio_list=6,
                 depth_list=4):

        self.width_mult_list = int2list(width_mult_list, 1)
        self.ks_list = int2list(ks_list, 1)
        self.expand_ratio_list = int2list(expand_ratio_list, 1)
        self.depth_list = int2list(depth_list, 1)
        self.base_stage_width = base_stage_width

        self.width_mult_list.sort()
        self.ks_list.sort()
        self.expand_ratio_list.sort()
        self.depth_list.sort()

        if base_stage_width == 'google':
            base_stage_width = [32, 16, 24, 32, 64, 96, 160, 320, 1280]
        else:
            # ProxylessNAS Stage Width
            base_stage_width = [32, 16, 24, 40, 80, 96, 192, 320, 1280]

        input_channel = [
            make_divisible(base_stage_width[0] * width_mult, 8)
            for width_mult in self.width_mult_list
        ]
        first_block_width = [
            make_divisible(base_stage_width[1] * width_mult, 8)
            for width_mult in self.width_mult_list
        ]
        last_channel = [
            make_divisible(base_stage_width[-1] * width_mult, 8)
            if width_mult > 1.0 else base_stage_width[-1]
            for width_mult in self.width_mult_list
        ]

        # first conv layer
        if len(input_channel) == 1:
            first_conv = ConvLayer(3,
                                   max(input_channel),
                                   kernel_size=3,
                                   stride=2,
                                   use_bn=True,
                                   act_func='relu6',
                                   ops_order='weight_bn_act')
        else:
            first_conv = DynamicConvLayer(in_channel_list=int2list(
                3, len(input_channel)),
                                          out_channel_list=input_channel,
                                          kernel_size=3,
                                          stride=2,
                                          act_func='relu6')
        # first block
        if len(first_block_width) == 1:
            first_block_conv = MBInvertedConvLayer(
                in_channels=max(input_channel),
                out_channels=max(first_block_width),
                kernel_size=3,
                stride=1,
                expand_ratio=1,
                act_func='relu6',
            )
        else:
            first_block_conv = DynamicMBConvLayer(
                in_channel_list=input_channel,
                out_channel_list=first_block_width,
                kernel_size_list=3,
                expand_ratio_list=1,
                stride=1,
                act_func='relu6',
            )
        first_block = MobileInvertedResidualBlock(first_block_conv, None)

        input_channel = first_block_width

        # inverted residual blocks
        self.block_group_info = []
        blocks = [first_block]
        _block_index = 1

        stride_stages = [2, 2, 2, 1, 2, 1]
        if depth_list is None:
            n_block_list = [2, 3, 4, 3, 3, 1]
            self.depth_list = [4, 4]
            print('Use MobileNetV2 Depth Setting')
        else:
            n_block_list = [max(self.depth_list)] * 5 + [1]

        width_list = []
        for base_width in base_stage_width[2:-1]:
            width = [
                make_divisible(base_width * width_mult, 8)
                for width_mult in self.width_mult_list
            ]
            width_list.append(width)

        for width, n_block, s in zip(width_list, n_block_list, stride_stages):
            self.block_group_info.append(
                [_block_index + i for i in range(n_block)])
            _block_index += n_block

            output_channel = width
            for i in range(n_block):
                if i == 0:
                    stride = s
                else:
                    stride = 1

                mobile_inverted_conv = DynamicMBConvLayer(
                    in_channel_list=int2list(input_channel, 1),
                    out_channel_list=int2list(output_channel, 1),
                    kernel_size_list=ks_list,
                    expand_ratio_list=expand_ratio_list,
                    stride=stride,
                    act_func='relu6',
                )

                if stride == 1 and input_channel == output_channel:
                    shortcut = IdentityLayer(input_channel, input_channel)
                else:
                    shortcut = None

                mb_inverted_block = MobileInvertedResidualBlock(
                    mobile_inverted_conv, shortcut)

                blocks.append(mb_inverted_block)
                input_channel = output_channel
        # 1x1_conv before global average pooling
        if len(last_channel) == 1:
            feature_mix_layer = ConvLayer(
                max(input_channel),
                max(last_channel),
                kernel_size=1,
                use_bn=True,
                act_func='relu6',
            )
            classifier = LinearLayer(max(last_channel),
                                     n_classes,
                                     dropout_rate=dropout_rate)
        else:
            feature_mix_layer = DynamicConvLayer(
                in_channel_list=input_channel,
                out_channel_list=last_channel,
                kernel_size=1,
                stride=1,
                act_func='relu6',
            )
            classifier = DynamicLinearLayer(in_features_list=last_channel,
                                            out_features=n_classes,
                                            bias=True,
                                            dropout_rate=dropout_rate)

        super(OFAProxylessNASNets,
              self).__init__(first_conv, blocks, feature_mix_layer, classifier)

        # set bn param
        self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])

        # runtime_depth
        self.runtime_depth = [
            len(block_idx) for block_idx in self.block_group_info
        ]

コード例 #26

0

ファイルを表示

    #     np.array([-1.0,-1.0,-1.0,-1.0,-1.0]),
    #     np.array([1.0,1.0,1.0,1.0,1.0])
    # )
    # norm = NormalizationLayer(
    #     np.array([0.0,0.0,0.0,-3.0]),
    #     np.array([5.0,5.0,5.0,3.0]),
    #     np.array([-1.0,-1.0,-1.0,-1.0]),
    #     np.array([1.0,1.0,1.0,1.0])
    # )
    norm = NormalizationLayer(np.array([0.0, 0.0]), np.array([5.0, 5.0]),
                              np.array([-1.0, -1.0]), np.array([1.0, 1.0]))
    W1 = utils.SharedWeights('gaussian', 2 + 1, 4)
    # W2 = utils.SharedWeights('gaussian',3+1,2)
    n = Sequential(
        norm,
        LinearLayer(2, 4, weights=W1),
        # TanhLayer,
        # #AddGaussian(1),
        # LinearLayer(3,2,weights=W2),
        RandomGaussianLayer(1),
        SoftMaxLayer)
    agent = GenericAgent(n, 4, 25, 0.0)
    agent.set_training_options(
        Trainer(show_training=True),
        NegativeLogLikelihoodLoss(),
        GradientDescentMomentum(
            learning_rate=0.1,
            momentum=0.5)  #GradientDescent(learning_rate=0.2)
    )

コード例 #27

0

ファイルを表示

ファイル: model_ws.py プロジェクト: kcyu2014/SemiNAS

class NASNet(BasicUnit):
    def __init__(self, width_stages, n_cell_stages, stride_stages, dropout=0):
        super(NASNet, self).__init__()

        self.width_stages = width_stages
        self.n_cell_stages = n_cell_stages
        self.stride_stages = stride_stages

        in_channels = 32
        first_cell_width = 16

        # first conv layer
        self.first_conv = ConvLayer(3, in_channels, 3, 2, 1, 1, False, False,
                                    True, 'relu6', 0, 'weight_bn_act')

        # first block
        first_block_config = {
            "name": "MobileInvertedResidualBlock",
            "mobile_inverted_conv": {
                "name": "MBInvertedConvLayer",
                "in_channels": in_channels,
                "out_channels": first_cell_width,
                "kernel_size": 3,
                "stride": 1,
                "expand_ratio": 1
            },
            "shortcut": None
        }
        self.first_block = MobileInvertedResidualBlock.build_from_config(
            first_block_config)
        in_channels = first_cell_width

        # blocks
        self.blocks = nn.ModuleList()
        for width, n_cell, s in zip(self.width_stages, self.n_cell_stages,
                                    self.stride_stages):
            for i in range(n_cell):
                if i == 0:
                    stride = s
                else:
                    stride = 1
                block = WSMobileInvertedResidualBlock(in_channels, width,
                                                      stride)
                in_channels = width
                self.blocks.append(block)

        self.feature_mix_layer = ConvLayer(in_channels, 1280, 1, 1, 1, 1,
                                           False, False, True, 'relu6', 0,
                                           'weight_bn_act')
        self.global_avg_pooling = nn.AdaptiveAvgPool2d(1)
        self.classifier = LinearLayer(1280, 1000, True, False, None, dropout,
                                      'weight_bn_act')

    def forward(self, x, arch, bn_train=False):
        if bn_train:
            for m in self.modules():
                if isinstance(m, nn.BatchNorm1d):
                    m.train()
        x = self.first_conv(x)
        x = self.first_block(x)
        for i, block in enumerate(self.blocks):
            x = block(x, arch[i])
        #x = self.last_block(x)
        if self.feature_mix_layer:
            x = self.feature_mix_layer(x)
        x = self.global_avg_pooling(x)
        x = x.view(x.size(0), -1)  # flatten
        x = self.classifier(x)
        return x

    def get_flops(self, x):
        flop, x = self.first_conv.get_flops(x)

        for block in self.blocks:
            delta_flop, x = block.get_flops(x)
            flop += delta_flop
        if self.feature_mix_layer:
            delta_flop, x = self.feature_mix_layer.get_flops(x)
            flop += delta_flop
        x = self.global_avg_pooling(x)
        x = x.view(x.size(0), -1)  # flatten

        delta_flop, x = self.classifier.get_flops(x)
        flop += delta_flop
        return flop, x

    def set_bn_param(self, bn_momentum, bn_eps):
        for m in self.modules():
            if isinstance(m, nn.BatchNorm2d):
                m.momentum = bn_momentum
                m.eps = bn_eps
        return

    def get_bn_param(self):
        for m in self.modules():
            if isinstance(m, nn.BatchNorm2d):
                return {
                    'momentum': m.momentum,
                    'eps': m.eps,
                }
        return None

    def init_model(self, model_init, init_div_groups=True):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if model_init == 'he_fout':
                    n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                    if init_div_groups:
                        n /= m.groups
                    m.weight.data.normal_(0, math.sqrt(2. / n))
                elif model_init == 'he_fin':
                    n = m.kernel_size[0] * m.kernel_size[1] * m.in_channels
                    if init_div_groups:
                        n /= m.groups
                    m.weight.data.normal_(0, math.sqrt(2. / n))
                else:
                    raise NotImplementedError
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm1d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def weight_parameters(self):
        return self.parameters()

    @staticmethod
    def _make_divisible(v, divisor, min_val=None):
        """
        This function is taken from the original tf repo.
        It ensures that all layers have a channel number that is divisible by 8
        It can be seen here:
        https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
        :param v:
        :param divisor:
        :param min_val:
        :return:
        """
        if min_val is None:
            min_val = divisor
        new_v = max(min_val, int(v + divisor / 2) // divisor * divisor)
        # Make sure that round down does not go down by more than 10%.
        if new_v < 0.9 * v:
            new_v += divisor
        return new_v

コード例 #28

0

ファイルを表示

ファイル: testsort_lstm.py プロジェクト: tianguangjian/PyNet

if load:
    lstm = GenericLayer.load('lstm.net')
else:
    l = LSTMNet(vocab_size,
                hidden_size,
                Wi=Wi,
                Wf=Wf,
                Wc=Wc,
                Wo=Wo,
                bi=bi,
                bf=bf,
                bc=bc,
                bo=bo)
    lstm = Sequential(
        l,
        LinearLayer(hidden_size, vocab_size),
    )

sm = SoftMaxLayer()

# lstm.on_message('init_nodes',20)
#
# x = to_one_hot_vect(char_to_ix['b'],vocab_size)
# print len(x)
# for i in range(20):
#     print lstm.forward(x,update = True)
#
# print lstm.backward(x)

epochs = 100

コード例 #29

0

ファイルを表示

    targets = [to_one_hot_vect(target, num_classes) for target in targets]

    train = zip(
        np.array(data[:n * 9 / 10]).astype(np.float),
        np.array(targets[:n * 9 / 10]).astype(np.float))
    test = zip(
        np.array(data[n / 10:]).astype(np.float),
        np.array(targets[n / 10:]).astype(np.float))

    return train, test


train, test = gen_data()

model = Sequential([
    LinearLayer(2, 20, weights='random'),
    TanhLayer(),
    #SigmoidLayer(),
    # HeavisideLayer(),
    # LinearLayer(10, 20, weights='random'),
    # SigmoidLayer(),
    LinearLayer(20, num_classes, weights='random', L1=0.001),
    # ReluLayer(),
    # SigmoidLayer()
    SoftMaxLayer()
])

# model = Sequential([
#     LinearLayer(2, 5, weights='random'),
#     SigmoidLayer(),
#     #LinearLayer(3, 3, weights='random'),

コード例 #30

0

ファイルを表示

ファイル: C02.py プロジェクト: del18687058912/TTS-DNN-models

    def __init__(self, numpy_rng = numpy.random.RandomState(2**30), theano_rng=None, n_ins=601,
                 n_outs=259, l1_reg = None, l2_reg = None, 
                 hidden_layers_sizes= [256, 256, 256, 256, 256], 
                 hidden_activation='tanh', output_activation='sigmoid'):
        
        print "DNN Initialisation"
        #logger = logging.getLogger("DNN initialization")

        self.sigmoid_layers = []
        self.params = []
        self.delta_params   = []
        self.n_layers = len(hidden_layers_sizes)
        
        self.n_ins = n_ins
        self.n_outs = n_outs
        #self.speaker_ID = []
        
        self.output_activation = output_activation

        self.l1_reg = l1_reg
        self.l2_reg = l2_reg       
        #vctk_class = Code_01.VCTK_feat_collection()
        
        assert self.n_layers > 0
        
        if not theano_rng:
            theano_rng = RandomStreams(numpy.random.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x') 
        self.y = T.matrix('y') 
        
        
        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.tanh)  ##T.nnet.sigmoid)  # 
           
           
            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params) 
            self.delta_params.extend(sigmoid_layer.delta_params)
         
     
        # add final layer
        if self.output_activation == 'linear':
            self.final_layer = LinearLayer(rng = numpy_rng,
                                           input=self.sigmoid_layers[-1].output,
                                           n_in=hidden_layers_sizes[-1],
                                           n_out=n_outs)
            
        elif self.output_activation == 'sigmoid':
            self.final_layer = SigmoidLayer(
                 rng = numpy_rng,
                 input=self.sigmoid_layers[-1].output,
                 n_in=hidden_layers_sizes[-1],
                 n_out=n_outs, activation=T.nnet.sigmoid)
        else:
            print ("This output activation function: %s is not supported right now!" %(self.output_activation))
            sys.exit(1)

        self.params.extend(self.final_layer.params)
        self.delta_params.extend(self.final_layer.delta_params)
    
        ### MSE
        self.finetune_cost = T.mean(T.sum( (self.final_layer.output-self.y)*(self.final_layer.output-self.y), axis=1 ))
        
        self.errors = T.mean(T.sum( (self.final_layer.output-self.y)*(self.final_layer.output-self.y), axis=1 ))
        
        ### L1-norm
        if self.l1_reg is not None:
            for i in xrange(self.n_layers):
                W = self.params[i * 2]
                self.finetune_cost += self.l1_reg * (abs(W).sum())

        ### L2-norm
        if self.l2_reg is not None:
            for i in xrange(self.n_layers):
                W = self.params[i * 2]
                self.finetune_cost += self.l2_reg * T.sqr(W).sum()