Example #1
0
    def network_definition(self):

        # (multi_views, self.batch_size, 3, self.img_h, self.img_w),
        self.x = tensor5()
        self.is_x_tensor4 = False

        img_w = self.img_w
        img_h = self.img_h
        n_gru_vox = 4
        # n_vox = self.n_vox

        n_convfilter = [96, 128, 256, 256, 256, 256]
        n_fc_filters = [1024]
        n_deconvfilter = [128, 128, 128, 64, 32, 2]
        input_shape = (self.batch_size, 3, img_w, img_h)

        # To define weights, define the network structure first
        x = InputLayer(input_shape)
        conv1 = ConvLayer(x, (n_convfilter[0], 7, 7))
        pool1 = PoolLayer(conv1)

        conv2 = ConvLayer(pool1, (n_convfilter[1], 3, 3))
        pool2 = PoolLayer(conv2)

        conv3 = ConvLayer(pool2, (n_convfilter[2], 3, 3))
        pool3 = PoolLayer(conv3)

        conv4 = ConvLayer(pool3, (n_convfilter[3], 3, 3))
        pool4 = PoolLayer(conv4)

        conv5 = ConvLayer(pool4, (n_convfilter[4], 3, 3))
        pool5 = PoolLayer(conv5)

        conv6 = ConvLayer(pool5, (n_convfilter[5], 3, 3))
        pool6 = PoolLayer(conv6)

        flat6 = FlattenLayer(pool6)

        fc7 = TensorProductLayer(flat6, n_fc_filters[0])

        # Set the size to be 256x4x4x4
        s_shape = (self.batch_size, n_gru_vox, n_deconvfilter[0], n_gru_vox,
                   n_gru_vox)

        # Dummy 3D grid hidden representations
        prev_s = InputLayer(s_shape)

        t_x_s_update = FCConv3DLayer(
            prev_s, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3))
        t_x_s_reset = FCConv3DLayer(
            prev_s, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3))

        reset_gate = SigmoidLayer(t_x_s_reset)

        rs = EltwiseMultiplyLayer(reset_gate, prev_s)
        t_x_rs = FCConv3DLayer(rs, fc7,
                               (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3))

        def recurrence(x_curr, prev_s_tensor, prev_in_gate_tensor):
            # Scan function cannot use compiled function.
            input_ = InputLayer(input_shape, x_curr)
            conv1_ = ConvLayer(input_, (n_convfilter[0], 7, 7),
                               params=conv1.params)
            pool1_ = PoolLayer(conv1_)
            rect1_ = LeakyReLU(pool1_)
            conv2_ = ConvLayer(rect1_, (n_convfilter[1], 3, 3),
                               params=conv2.params)
            pool2_ = PoolLayer(conv2_)
            rect2_ = LeakyReLU(pool2_)
            conv3_ = ConvLayer(rect2_, (n_convfilter[2], 3, 3),
                               params=conv3.params)
            pool3_ = PoolLayer(conv3_)
            rect3_ = LeakyReLU(pool3_)
            conv4_ = ConvLayer(rect3_, (n_convfilter[3], 3, 3),
                               params=conv4.params)
            pool4_ = PoolLayer(conv4_)
            rect4_ = LeakyReLU(pool4_)
            conv5_ = ConvLayer(rect4_, (n_convfilter[4], 3, 3),
                               params=conv5.params)
            pool5_ = PoolLayer(conv5_)
            rect5_ = LeakyReLU(pool5_)
            conv6_ = ConvLayer(rect5_, (n_convfilter[5], 3, 3),
                               params=conv6.params)
            pool6_ = PoolLayer(conv6_)
            rect6_ = LeakyReLU(pool6_)
            flat6_ = FlattenLayer(rect6_)
            fc7_ = TensorProductLayer(flat6_,
                                      n_fc_filters[0],
                                      params=fc7.params)
            rect7_ = LeakyReLU(fc7_)

            prev_s_ = InputLayer(s_shape, prev_s_tensor)

            t_x_s_update_ = FCConv3DLayer(
                prev_s_,
                rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3),
                params=t_x_s_update.params)
            t_x_s_reset_ = FCConv3DLayer(
                prev_s_,
                rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3),
                params=t_x_s_reset.params)

            update_gate_ = SigmoidLayer(t_x_s_update_)
            comp_update_gate_ = ComplementLayer(update_gate_)
            reset_gate_ = SigmoidLayer(t_x_s_reset_)

            rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_)
            t_x_rs_ = FCConv3DLayer(
                rs_,
                rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3),
                params=t_x_rs.params)
            tanh_t_x_rs_ = TanhLayer(t_x_rs_)

            gru_out_ = AddLayer(
                EltwiseMultiplyLayer(update_gate_, prev_s_),
                EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_))

            return gru_out_.output, update_gate_.output

        s_update, _ = theano.scan(
            recurrence,
            sequences=[
                self.x
            ],  # along with images, feed in the index of the current frame
            outputs_info=[
                tensor.zeros_like(np.zeros(s_shape),
                                  dtype=theano.config.floatX),
                tensor.zeros_like(np.zeros(s_shape),
                                  dtype=theano.config.floatX)
            ])

        update_all = s_update[-1]
        s_all = s_update[0]
        s_last = s_all[-1]

        gru_s = InputLayer(s_shape, s_last)

        unpool7 = Unpool3DLayer(gru_s)
        conv7 = Conv3DLayer(unpool7, (n_deconvfilter[1], 3, 3, 3))
        rect7 = LeakyReLU(conv7)

        unpool8 = Unpool3DLayer(rect7)
        conv8 = Conv3DLayer(unpool8, (n_deconvfilter[2], 3, 3, 3))
        rect8 = LeakyReLU(conv8)

        unpool9 = Unpool3DLayer(rect8)
        conv9 = Conv3DLayer(unpool9, (n_deconvfilter[3], 3, 3, 3))
        rect9 = LeakyReLU(conv9)

        # unpool10 = Unpool3DLayer(rect9)
        conv10 = Conv3DLayer(rect9, (n_deconvfilter[4], 3, 3, 3))
        rect10 = LeakyReLU(conv10)

        conv11 = Conv3DLayer(rect10, (n_deconvfilter[5], 3, 3, 3))

        softmax_loss = SoftmaxWithLoss3D(conv11.output)
        self.loss = softmax_loss.loss(self.y)
        self.error = softmax_loss.error(self.y)
        self.params = get_trainable_params()
        self.output = softmax_loss.prediction()
        self.activations = [update_all]
        self.t_x_s_update = t_x_s_update
Example #2
0
    def network_definition(self):
        self.x = tensor5()
        self.is_x_tensor4 = False

        img_w = self.img_w
        img_h = self.img_h

        n_gru_vox = [32, 32, 16, 8, 4]
        n_convfilter = [16, 32, 64, 128, 256, 512]
        n_deconvfilter = [2, 2, 8, 32, 128]
        n_middlefilter = [1, 4, 16, 64]
        input_shape = (self.batch_size, 3, img_w, img_h)

        x = InputLayer(input_shape)
        conv1a = ConvLayer(x, (n_convfilter[0], 7, 7))
        conv1b = ConvLayer(conv1a, (n_convfilter[0], 3, 3))
        pool1 = PoolLayer(conv1b, padding=(0, 0))  # H/2->64

        conv2a = ConvLayer(pool1, (n_convfilter[1], 3, 3))
        conv2b = ConvLayer(conv2a, (n_convfilter[1], 3, 3))
        conv2c = ConvLayer(pool1, (n_convfilter[1], 1, 1))
        se2 = SELayer(conv2b)
        pool2 = PoolLayer(conv2c)  # H/4->32

        conv3a = ConvLayer(pool2, (n_convfilter[2], 3, 3))
        conv3b = ConvLayer(conv3a, (n_convfilter[2], 3, 3))
        conv3c = ConvLayer(pool2, (n_convfilter[2], 1, 1))
        se3 = SELayer(conv3b)
        pool3 = PoolLayer(conv3c, padding=(0, 0))  # H/8->16

        conv4a = ConvLayer(pool3, (n_convfilter[3], 3, 3))
        conv4b = ConvLayer(conv4a, (n_convfilter[3], 3, 3))
        conv4c = ConvLayer(pool3, (n_convfilter[3], 1, 1))
        se4 = SELayer(conv4b)
        pool4 = PoolLayer(conv4c, padding=(0, 0))  # H/16->8

        conv5a = ConvLayer(pool4, (n_convfilter[4], 3, 3))
        conv5b = ConvLayer(conv5a, (n_convfilter[4], 3, 3))
        conv5c = ConvLayer(pool4, (n_convfilter[4], 1, 1))  # H/32->4
        se5 = SELayer(conv5b)
        pool5 = PoolLayer(conv5c, padding=(0, 0))

        conv6a = ConvLayer(pool5, (n_convfilter[5], 3, 3))
        conv6b = ConvLayer(conv6a, (n_convfilter[5], 3, 3))
        conv6c = ConvLayer(pool5, (n_convfilter[5], 1, 1))  # H/32->4
        se6 = SELayer(conv6b)

        def encoder(x):
            input_ = InputLayer(input_shape, x)
            conv1a_ = ConvLayer(input_, (n_convfilter[0], 7, 7), params=conv1a.params)
            rect1a_ = LeakyReLU(conv1a_)
            conv1b_ = ConvLayer(rect1a_, (n_convfilter[0], 3, 3), params=conv1b.params)
            rect1b_ = LeakyReLU(conv1b_)
            pool1_ = PoolLayer(rect1b_)

            print("pool1: ", pool1_.output_shape)

            conv2a_ = ConvLayer(pool1_, (n_convfilter[1], 3, 3), params=conv2a.params)
            rect2a_ = LeakyReLU(conv2a_)
            conv2b_ = ConvLayer(rect2a_, (n_convfilter[1], 3, 3), params=conv2b.params)
            rect2b_ = LeakyReLU(conv2b_)
            serect2b_ = SELayer(rect2b_, params=se2.params)
            conv2c_ = ConvLayer(pool1_, (n_convfilter[1], 1, 1), params=conv2c.params)
            res2_ = AddLayer(conv2c_, serect2b_)
            pool2_ = PoolLayer(res2_, padding=(0, 0))

            print("rect2b_: ", rect2b_.output_shape)
            print("serect2b_: ", serect2b_.output_shape)
            print("conv2c_: ", conv2c_.output_shape)

            conv3a_ = ConvLayer(pool2_, (n_convfilter[2], 3, 3), params=conv3a.params)
            rect3a_ = LeakyReLU(conv3a_)
            conv3b_ = ConvLayer(rect3a_, (n_convfilter[2], 3, 3), params=conv3b.params)
            rect3b_ = LeakyReLU(conv3b_)
            serect3b_ = SELayer(rect3b_, params=se3.params)
            conv3c_ = ConvLayer(pool2_, (n_convfilter[2], 1, 1), params=conv3c.params)
            # res3_ = AddLayer(conv3c_, rect3b_)
            res3_ = AddLayer(conv3c_, serect3b_)
            pool3_ = PoolLayer(res3_, padding=(0, 0))

            print("rect3b_: ", rect3b_.output_shape)
            print("serect3b_: ", serect3b_.output_shape)
            print("conv3c_: ", conv3c_.output_shape)

            conv4a_ = ConvLayer(pool3_, (n_convfilter[3], 3, 3), params=conv4a.params)
            rect4a_ = LeakyReLU(conv4a_)
            conv4b_ = ConvLayer(rect4a_, (n_convfilter[3], 3, 3), params=conv4b.params)
            rect4b_ = LeakyReLU(conv4b_)
            serect4b_ = SELayer(rect4b_, params=se4.params)
            conv4c_ = ConvLayer(pool3_, (n_convfilter[3], 1, 1), params=conv4c.params)
            res4_ = AddLayer(conv4c_, serect4b_)
            pool4_ = PoolLayer(res4_, padding=(0, 0))

            print("rect4b_: ", rect4b_.output_shape)
            # print("serect4b_: ", serect4b_.output_shape)
            print("conv4c_: ", conv4c_.output_shape)

            conv5a_ = ConvLayer(pool4_, (n_convfilter[4], 3, 3), params=conv5a.params)
            rect5a_ = LeakyReLU(conv5a_)
            conv5b_ = ConvLayer(rect5a_, (n_convfilter[4], 3, 3), params=conv5b.params)
            rect5b_ = LeakyReLU(conv5b_)
            serect5b_ = SELayer(rect5b_, params=se5.params)
            conv5c_ = ConvLayer(pool4_, (n_convfilter[4], 1, 1), params=conv5c.params)
            res5_ = AddLayer(conv5c_, serect5b_)
            pool5_ = PoolLayer(res5_, padding=(0, 0))

            print("rect5b_: ", rect5b_.output_shape)
            # print("serect5b_: ", serect5b_.output_shape)
            print("conv5c_: ", conv5c_.output_shape)

            conv6a_ = ConvLayer(pool5_, (n_convfilter[5], 3, 3), params=conv6a.params)
            rect6a_ = LeakyReLU(conv6a_)
            conv6b_ = ConvLayer(rect6a_, (n_convfilter[5], 3, 3), params=conv6b.params)
            rect6b_ = LeakyReLU(conv6b_)
            serect6b_ = SELayer(rect6b_, params=se6.params)
            conv6c_ = ConvLayer(pool5_, (n_convfilter[5], 1, 1), params=conv6c.params)
            res6_ = AddLayer(conv6c_, serect6b_)

            print("rect6b_: ", rect6b_.output_shape)
            # print("serect6b_: ", serect6b_.output_shape)
            print("conv6c_: ", conv6c_.output_shape)

            flat3_ = FlattenLayer(res3_)
            flat4_ = FlattenLayer(res4_)
            flat5_ = FlattenLayer(res5_)
            flat6_ = FlattenLayer(res6_)

            # print("serect2: ", serect2b_.output_shape)
            # print("serect3: ", serect3b_.output_shape)
            # print("serect4: ", serect4b_.output_shape)
            # print("serect5: ", serect5b_.output_shape)
            # print("serect6: ", serect6b_.output_shape)

            print("res3: ", res3_.output_shape)
            print("res4: ", res4_.output_shape)
            print("res5: ", res5_.output_shape)
            print("res6: ", res6_.output_shape)
            # pool6_ = PoolLayer(res6_)

            return flat3_.output, flat4_.output, flat5_.output, flat6_.output

        # Set the shape of each resolution
        s_shape5 = (self.batch_size, n_gru_vox[4], n_deconvfilter[4], n_gru_vox[4], n_gru_vox[4])
        s_shape4 = (self.batch_size, n_gru_vox[3], n_deconvfilter[3], n_gru_vox[3], n_gru_vox[3])
        s_shape3 = (self.batch_size, n_gru_vox[2], n_deconvfilter[2], n_gru_vox[2], n_gru_vox[2])
        s_shape2 = (self.batch_size, n_gru_vox[1], n_deconvfilter[1], n_gru_vox[1], n_gru_vox[1])

        ## resolution 5
        prev_s5 = InputLayer(s_shape5)
        curr_s5 = InputLayer(s_shape5)
        t_x_s_update5 = CConv3DLayer(prev_s5, curr_s5, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3))
        t_x_s_reset5 = CConv3DLayer(prev_s5, curr_s5, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3))
        reset_gate5 = SigmoidLayer(t_x_s_reset5)
        rs5 = EltwiseMultiplyLayer(reset_gate5, prev_s5)
        t_x_rs5 = CConv3DLayer(rs5, curr_s5, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3))

        ## resolution 4
        prev_s4 = InputLayer(s_shape4)
        curr_s4 = InputLayer(s_shape4)
        t_x_s_update4 = CConv3DLayer(prev_s4, curr_s4, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3))
        t_x_s_reset4 = CConv3DLayer(prev_s4, curr_s4, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3))
        reset_gate4 = SigmoidLayer(t_x_s_reset4)
        rs4 = EltwiseMultiplyLayer(reset_gate4, prev_s4)
        t_x_rs4 = CConv3DLayer(rs4, curr_s4, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3))

        # resolution 3
        prev_s3 = InputLayer(s_shape3)
        curr_s3 = InputLayer(s_shape3)
        t_x_s_update3 = CConv3DLayer(prev_s3, curr_s3, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3))
        t_x_s_reset3 = CConv3DLayer(prev_s3, curr_s3, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3))
        reset_gate3 = SigmoidLayer(t_x_s_reset3)
        rs3 = EltwiseMultiplyLayer(reset_gate3, prev_s3)
        t_x_rs3 = CConv3DLayer(rs3, curr_s3, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3))

        # resolution 4
        prev_s2 = InputLayer(s_shape2)
        curr_s2 = InputLayer(s_shape2)
        t_x_s_update2 = CConv3DLayer(prev_s2, curr_s2, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3))
        t_x_s_reset2 = CConv3DLayer(prev_s2, curr_s2, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3))
        reset_gate2 = SigmoidLayer(t_x_s_reset2)
        rs2 = EltwiseMultiplyLayer(reset_gate2, prev_s2)
        t_x_rs2 = CConv3DLayer(rs2, curr_s2, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3))

        def gru5(curr_s5, prev_s5):
            curr_s5 = tensor.reshape(curr_s5, s_shape5)
            curr_s5_ = InputLayer(s_shape5, curr_s5)

            prev_s5_ = InputLayer(s_shape5, prev_s5)

            print("curr_s5: ", curr_s5_.output_shape)
            print("prev_s5: ", prev_s5_.output_shape)
            t_x_s_update5_ = CConv3DLayer(prev_s5_, curr_s5_, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3),
                                          params=t_x_s_update5.params)
            t_x_s_reset5_ = CConv3DLayer(prev_s5_, curr_s5_, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3),
                                         params=t_x_s_reset5.params)

            update5_ = SigmoidLayer(t_x_s_update5_)
            comp_udpate_gate5_ = ComplementLayer(update5_)
            reset_gate5_ = SigmoidLayer(t_x_s_reset5_)

            rs5_ = EltwiseMultiplyLayer(reset_gate5_, prev_s5_)
            t_x_rs5_ = CConv3DLayer(rs5_, curr_s5_, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3),
                                    params=t_x_rs5.params)
            tanh_t_x_rs5_ = TanhLayer(t_x_rs5_)

            # print("t_x_s_update5: ", t_x_s_update5_.output_shape)
            # print("t_x_s_reset5: ", t_x_s_reset5_.output_shape)

            gru_out5_ = AddLayer(
                EltwiseMultiplyLayer(update5_, prev_s5_),
                EltwiseMultiplyLayer(comp_udpate_gate5_, tanh_t_x_rs5_))

            print("gru_out5: ", gru_out5_.output_shape)
            return gru_out5_.output

        def gru4(curr_s4, prev_s4):
            curr_s4 = tensor.reshape(curr_s4, s_shape4)
            curr_s4_ = InputLayer(s_shape4, curr_s4)
            prev_s4_ = InputLayer(s_shape4, prev_s4)

            t_x_s_update4_ = CConv3DLayer(prev_s4_, curr_s4_, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3),
                                          params=t_x_s_update4.params)
            t_x_s_reset4_ = CConv3DLayer(prev_s4_, curr_s4_, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3),
                                         params=t_x_s_reset4.params)

            update4_ = SigmoidLayer(t_x_s_update4_)
            comp_udpate_gate4_ = ComplementLayer(update4_)
            reset_gate4_ = SigmoidLayer(t_x_s_reset4_)

            rs4_ = EltwiseMultiplyLayer(reset_gate4_, prev_s4_)
            t_x_rs4_ = CConv3DLayer(rs4_, curr_s4_, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3),
                                    params=t_x_rs4.params)
            tanh_t_x_rs4_ = TanhLayer(t_x_rs4_)

            gru_out4_ = AddLayer(
                EltwiseMultiplyLayer(update4_, prev_s4_),
                EltwiseMultiplyLayer(comp_udpate_gate4_, tanh_t_x_rs4_))

            print("gru_out4: ", gru_out4_.output_shape)

            return gru_out4_.output

        def gru3(curr_s3, prev_s3):
            curr_s3 = tensor.reshape(curr_s3, s_shape3)
            curr_s3_ = InputLayer(s_shape3, curr_s3)
            prev_s3_ = InputLayer(s_shape3, prev_s3)

            t_x_s_update3_ = CConv3DLayer(prev_s3_, curr_s3_, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3),
                                          params=t_x_s_update3.params)
            t_x_s_reset3_ = CConv3DLayer(prev_s3_, curr_s3_, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3),
                                         params=t_x_s_reset3.params)

            update3_ = SigmoidLayer(t_x_s_update3_)
            comp_udpate_gate3_ = ComplementLayer(update3_)
            reset_gate3_ = SigmoidLayer(t_x_s_reset3_)

            rs3_ = EltwiseMultiplyLayer(reset_gate3_, prev_s3_)
            t_x_rs3_ = CConv3DLayer(rs3_, curr_s3_, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3),
                                    params=t_x_rs3.params)
            tanh_t_x_rs3_ = TanhLayer(t_x_rs3_)

            gru_out3_ = AddLayer(
                EltwiseMultiplyLayer(update3_, prev_s3_),
                EltwiseMultiplyLayer(comp_udpate_gate3_, tanh_t_x_rs3_))

            print("gru_out3: ", gru_out3_.output_shape)

            return gru_out3_.output

        def gru2(curr_s2, prev_s2):
            curr_s2 = tensor.reshape(curr_s2, s_shape2)
            curr_s2_ = InputLayer(s_shape2, curr_s2)
            prev_s2_ = InputLayer(s_shape2, prev_s2)

            t_x_s_update2_ = CConv3DLayer(prev_s2_, curr_s2_, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3),
                                          params=t_x_s_update2.params)
            t_x_s_reset2_ = CConv3DLayer(prev_s2_, curr_s2_, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3),
                                         params=t_x_s_reset2.params)

            update2_ = SigmoidLayer(t_x_s_update2_)
            comp_udpate_gate2_ = ComplementLayer(update2_)
            reset_gate2_ = SigmoidLayer(t_x_s_reset2_)

            rs2_ = EltwiseMultiplyLayer(reset_gate2_, prev_s2_)
            t_x_rs2_ = CConv3DLayer(rs2_, curr_s2_, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3),
                                    params=t_x_rs2.params)
            tanh_t_x_rs2_ = TanhLayer(t_x_rs2_)

            gru_out2_ = AddLayer(
                EltwiseMultiplyLayer(update2_, prev_s2_),
                EltwiseMultiplyLayer(comp_udpate_gate2_, tanh_t_x_rs2_))

            print("gru_out2: ", gru_out2_.output_shape)
            return gru_out2_.output

        s_encoder, _ = theano.scan(encoder,
                                sequences=[self.x])

        # print("self.x: ", self.x)
        out_encoder5 = s_encoder[3]
        out_encoder4 = s_encoder[2]
        out_encoder3 = s_encoder[1]
        out_encoder2 = s_encoder[0]

        s_gru5, _ = theano.scan(gru5,
                                sequences=[out_encoder5],
                                outputs_info=[tensor.zeros_like(np.zeros(s_shape5),
                                                                dtype=theano.config.floatX)])

        input_5 = InputLayer(s_shape5, s_gru5[-1])
        # print("input_5: ", input_5.output_shape)
        pred5 = Conv3DLayer(input_5, (2, 3, 3, 3))
        unpool5 = Unpool3DLayer(input_5)
        conv3d5a = Conv3DLayer(unpool5, (n_middlefilter[3], 3, 3, 3))
        rect3d5a = LeakyReLU(conv3d5a)
        conv3d5b = Conv3DLayer(rect3d5a, (n_deconvfilter[3], 3, 3, 3))
        rect3d5b = LeakyReLU(conv3d5b)
        se3d5    = SE3DLayer(rect3d5b)
        conv3d5c = Conv3DLayer(unpool5, (n_deconvfilter[3], 1, 1, 1))
        res3d5 = AddLayer(conv3d5c, se3d5)

        print("se3d5b: ", se3d5.output_shape)
        print("rect3d5c: ", conv3d5c.output_shape)

        print("res3d5: ", res3d5.output_shape)

        s_gru4, _ = theano.scan(gru4,
                                sequences=[out_encoder4],
                                outputs_info=[res3d5.output]
                                )

        input_4 = InputLayer(s_shape4, s_gru4[-1])
        pred4 = Conv3DLayer(input_4, (2, 3, 3, 3))
        unpool4 = Unpool3DLayer(input_4)
        conv3d4a = Conv3DLayer(unpool4, (n_middlefilter[2], 3, 3, 3))
        rect3d4a = LeakyReLU(conv3d4a)
        conv3d4b = Conv3DLayer(rect3d4a, (n_deconvfilter[2], 3, 3, 3))
        rect3d4b = LeakyReLU(conv3d4b)
        se3d4    = SE3DLayer(rect3d4b)
        conv3d4c = Conv3DLayer(unpool4, (n_deconvfilter[2], 1, 1, 1))
        res3d4 = AddLayer(conv3d4c, se3d4)

        print("se3d4: ", se3d4.output_shape)
        print("rect3d4c: ", conv3d4c.output_shape)
        print("res3d4c: ", res3d4.output_shape)

        s_gru3, _ = theano.scan(gru3,
                                sequences=[out_encoder3],
                                outputs_info=[res3d4.output])

        input_3 = InputLayer(s_shape3, s_gru3[-1])
        pred3 = Conv3DLayer(input_3, (2, 3, 3, 3))
        unpool3 = Unpool3DLayer(input_3)
        conv3d3a = Conv3DLayer(unpool3, (n_middlefilter[1], 3, 3, 3))
        rect3d3a = LeakyReLU(conv3d3a)
        conv3d3b = Conv3DLayer(rect3d3a, (n_deconvfilter[1], 3, 3, 3))
        rect3d3b = LeakyReLU(conv3d3b)
        se3d3    = SE3DLayer(rect3d3b)
        conv3d3c = Conv3DLayer(unpool3, (n_deconvfilter[1], 1, 1, 1))
        res3d3 = AddLayer(conv3d3c, se3d3)

        print("se3d3: ", se3d3.output_shape)
        print("rect3d3c: ", conv3d3c.output_shape)
        print("res3d3c: ", res3d3.output_shape)

        s_gru2, _ = theano.scan(gru2,
                                sequences=[out_encoder2],
                                outputs_info=[res3d3.output])

        input_2 = InputLayer(s_shape2, s_gru2[-1])
        pred2 = Conv3DLayer(input_2, (2, 3, 3, 3))

        labele_shape = self.y.shape
        label3 = self.y[:, 0:labele_shape[1]:2, :, 0:labele_shape[3]:2, 0:labele_shape[4]:2]
        label4 = self.y[:, 0:labele_shape[1]:4, :, 0:labele_shape[3]:4, 0:labele_shape[4]:4]
        label5 = self.y[:, 0:labele_shape[1]:8, :, 0:labele_shape[3]:8, 0:labele_shape[4]:8]

        print("label2: ", self.y.shape)
        print("label3: ", label3.shape)
        print("label4: ", label4.shape)
        print("label5: ", label5.shape)

        print("pred5: ", pred5.output_shape)
        print("pred4: ", pred4.output_shape)
        print("pred3: ", pred3.output_shape)
        print("pred2: ", pred2.output_shape)

        softmax_loss5 = SoftmaxWithLoss3D(pred5.output)
        softmax_loss4 = SoftmaxWithLoss3D(pred4.output)
        softmax_loss3 = SoftmaxWithLoss3D(pred3.output)
        softmax_loss2 = SoftmaxWithLoss3D(pred2.output)

        # self.loss = softmax_loss2.loss(self.y)
        self.loss = (softmax_loss5.loss(label5)  + softmax_loss4.loss(label4)
                     + softmax_loss3.loss(label3) + softmax_loss2.loss(self.y)) / 4.
        # self.loss = self.loss /
        self.error = softmax_loss2.error(self.y)
        self.output = softmax_loss2.prediction()
        self.params = get_trainable_params()
        self.activations = []
Example #3
0
    def network_definition(self):

        # (multi_views, self.batch_size, 3, self.img_h, self.img_w),
        self.x = tensor5()
        self.is_x_tensor4 = False

        img_w = self.img_w
        img_h = self.img_h
        n_gru_vox = 4
        # n_vox = self.n_vox

        n_convfilter = [96, 128, 256, 256, 256, 256]
        n_fc_filters = [1024]
        n_deconvfilter = [128, 128, 128, 64, 32, 2]
        input_shape = (self.batch_size, 3, img_w, img_h)

        # To define weights, define the network structure first
        x = InputLayer(input_shape)
        conv1a = ConvLayer(x, (n_convfilter[0], 7, 7))
        conv1b = ConvLayer(conv1a, (n_convfilter[0], 3, 3))
        pool1 = PoolLayer(conv1b)

        conv2a = ConvLayer(pool1, (n_convfilter[1], 3, 3))
        conv2b = ConvLayer(conv2a, (n_convfilter[1], 3, 3))
        conv2c = ConvLayer(pool1, (n_convfilter[1], 1, 1))
        pool2 = PoolLayer(conv2c)

        conv3a = ConvLayer(pool2, (n_convfilter[2], 3, 3))
        conv3b = ConvLayer(conv3a, (n_convfilter[2], 3, 3))
        conv3c = ConvLayer(pool2, (n_convfilter[2], 1, 1))
        pool3 = PoolLayer(conv3b)

        conv4a = ConvLayer(pool3, (n_convfilter[3], 3, 3))
        conv4b = ConvLayer(conv4a, (n_convfilter[3], 3, 3))
        pool4 = PoolLayer(conv4b)

        conv5a = ConvLayer(pool4, (n_convfilter[4], 3, 3))
        conv5b = ConvLayer(conv5a, (n_convfilter[4], 3, 3))
        conv5c = ConvLayer(pool4, (n_convfilter[4], 1, 1))
        pool5 = PoolLayer(conv5b)

        conv6a = ConvLayer(pool5, (n_convfilter[5], 3, 3))
        conv6b = ConvLayer(conv6a, (n_convfilter[5], 3, 3))
        pool6 = PoolLayer(conv6b)

        flat6 = FlattenLayer(pool6)
        fc7 = TensorProductLayer(flat6, n_fc_filters[0])

        # Set the size to be 256x4x4x4
        s_shape = (self.batch_size, n_gru_vox, n_deconvfilter[0], n_gru_vox,
                   n_gru_vox)

        # Dummy 3D grid hidden representations
        prev_s = InputLayer(s_shape)

        t_x_s_update = FCConv3DLayer(
            prev_s, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3))
        t_x_s_reset = FCConv3DLayer(
            prev_s, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3))

        reset_gate = SigmoidLayer(t_x_s_reset)

        rs = EltwiseMultiplyLayer(reset_gate, prev_s)
        t_x_rs = FCConv3DLayer(rs, fc7,
                               (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3))

        def recurrence(x_curr, prev_s_tensor, prev_in_gate_tensor):
            # Scan function cannot use compiled function.
            input_ = InputLayer(input_shape, x_curr)
            conv1a_ = ConvLayer(input_, (n_convfilter[0], 7, 7),
                                params=conv1a.params)
            rect1a_ = LeakyReLU(conv1a_)
            conv1b_ = ConvLayer(rect1a_, (n_convfilter[0], 3, 3),
                                params=conv1b.params)
            rect1_ = LeakyReLU(conv1b_)
            pool1_ = PoolLayer(rect1_)

            conv2a_ = ConvLayer(pool1_, (n_convfilter[1], 3, 3),
                                params=conv2a.params)
            rect2a_ = LeakyReLU(conv2a_)
            conv2b_ = ConvLayer(rect2a_, (n_convfilter[1], 3, 3),
                                params=conv2b.params)
            rect2_ = LeakyReLU(conv2b_)
            conv2c_ = ConvLayer(pool1_, (n_convfilter[1], 1, 1),
                                params=conv2c.params)
            res2_ = AddLayer(conv2c_, rect2_)
            pool2_ = PoolLayer(res2_)

            conv3a_ = ConvLayer(pool2_, (n_convfilter[2], 3, 3),
                                params=conv3a.params)
            rect3a_ = LeakyReLU(conv3a_)
            conv3b_ = ConvLayer(rect3a_, (n_convfilter[2], 3, 3),
                                params=conv3b.params)
            rect3_ = LeakyReLU(conv3b_)
            conv3c_ = ConvLayer(pool2_, (n_convfilter[2], 1, 1),
                                params=conv3c.params)
            res3_ = AddLayer(conv3c_, rect3_)
            pool3_ = PoolLayer(res3_)

            conv4a_ = ConvLayer(pool3_, (n_convfilter[3], 3, 3),
                                params=conv4a.params)
            rect4a_ = LeakyReLU(conv4a_)
            conv4b_ = ConvLayer(rect4a_, (n_convfilter[3], 3, 3),
                                params=conv4b.params)
            rect4_ = LeakyReLU(conv4b_)
            pool4_ = PoolLayer(rect4_)

            conv5a_ = ConvLayer(pool4_, (n_convfilter[4], 3, 3),
                                params=conv5a.params)
            rect5a_ = LeakyReLU(conv5a_)
            conv5b_ = ConvLayer(rect5a_, (n_convfilter[4], 3, 3),
                                params=conv5b.params)
            rect5_ = LeakyReLU(conv5b_)
            conv5c_ = ConvLayer(pool4_, (n_convfilter[4], 1, 1),
                                params=conv5c.params)
            res5_ = AddLayer(conv5c_, rect5_)
            pool5_ = PoolLayer(res5_)

            conv6a_ = ConvLayer(pool5_, (n_convfilter[5], 3, 3),
                                params=conv6a.params)
            rect6a_ = LeakyReLU(conv6a_)
            conv6b_ = ConvLayer(rect6a_, (n_convfilter[5], 3, 3),
                                params=conv6b.params)
            rect6_ = LeakyReLU(conv6b_)
            res6_ = AddLayer(pool5_, rect6_)
            pool6_ = PoolLayer(res6_)

            flat6_ = FlattenLayer(pool6_)
            fc7_ = TensorProductLayer(flat6_,
                                      n_fc_filters[0],
                                      params=fc7.params)
            rect7_ = LeakyReLU(fc7_)

            prev_s_ = InputLayer(s_shape, prev_s_tensor)

            t_x_s_update_ = FCConv3DLayer(
                prev_s_,
                rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3),
                params=t_x_s_update.params)

            t_x_s_reset_ = FCConv3DLayer(
                prev_s_,
                rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3),
                params=t_x_s_reset.params)

            update_gate_ = SigmoidLayer(t_x_s_update_)
            comp_update_gate_ = ComplementLayer(update_gate_)
            reset_gate_ = SigmoidLayer(t_x_s_reset_)

            rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_)
            t_x_rs_ = FCConv3DLayer(
                rs_,
                rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3),
                params=t_x_rs.params)
            tanh_t_x_rs_ = TanhLayer(t_x_rs_)

            gru_out_ = AddLayer(
                EltwiseMultiplyLayer(update_gate_, prev_s_),
                EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_))

            return gru_out_.output, update_gate_.output

        s_update, _ = theano.scan(
            recurrence,
            sequences=[
                self.x
            ],  # along with images, feed in the index of the current frame
            outputs_info=[
                tensor.zeros_like(np.zeros(s_shape),
                                  dtype=theano.config.floatX),
                tensor.zeros_like(np.zeros(s_shape),
                                  dtype=theano.config.floatX)
            ])

        update_all = s_update[-1]
        s_all = s_update[0]
        s_last = s_all[-1]
        gru_s = InputLayer(s_shape, s_last)
        unpool7 = Unpool3DLayer(gru_s)
        conv7a = Conv3DLayer(unpool7, (n_deconvfilter[1], 3, 3, 3))
        rect7a = LeakyReLU(conv7a)
        conv7b = Conv3DLayer(rect7a, (n_deconvfilter[1], 3, 3, 3))
        rect7 = LeakyReLU(conv7b)
        res7 = AddLayer(unpool7, rect7)

        unpool8 = Unpool3DLayer(res7)
        conv8a = Conv3DLayer(unpool8, (n_deconvfilter[2], 3, 3, 3))
        rect8a = LeakyReLU(conv8a)
        conv8b = Conv3DLayer(rect8a, (n_deconvfilter[2], 3, 3, 3))
        rect8 = LeakyReLU(conv8b)
        res8 = AddLayer(unpool8, rect8)

        unpool9 = Unpool3DLayer(res8)
        conv9a = Conv3DLayer(unpool9, (n_deconvfilter[3], 3, 3, 3))
        rect9a = LeakyReLU(conv9a)
        conv9b = Conv3DLayer(rect9a, (n_deconvfilter[3], 3, 3, 3))
        rect9 = LeakyReLU(conv9b)

        conv9c = Conv3DLayer(unpool9, (n_deconvfilter[3], 1, 1, 1))
        res9 = AddLayer(conv9c, rect9)

        conv10a = Conv3DLayer(res9, (n_deconvfilter[4], 3, 3, 3))
        rect10a = LeakyReLU(conv10a)
        conv10b = Conv3DLayer(rect10a, (n_deconvfilter[4], 3, 3, 3))
        rect10 = LeakyReLU(conv10b)

        conv10c = Conv3DLayer(rect10a, (n_deconvfilter[4], 3, 3, 3))
        res10 = AddLayer(conv10c, rect10)

        conv11 = Conv3DLayer(res10, (n_deconvfilter[5], 3, 3, 3))

        # printing
        # x = tensor.dvector('x')
        # printing_op = printing.Print('vector----------------------',attrs = ['shape'])(x)
        # printed_x = printing_op(x)

        # f = function([x],printed_x)
        # result = f(conv11.output)

        # print('------------------- conv11 output shape', conv11.output.shape.eval())

        softmax_loss = SoftmaxWithLoss3D(conv11.output)

        self.loss = softmax_loss.loss(self.y)
        self.error = softmax_loss.error(self.y)
        self.params = get_trainable_params()
        self.output = softmax_loss.prediction()
        self.activations = [update_all]
Example #4
0
    def network_definition(self):
        # (views, batch_size, 3, img_h, img_w)
        self.x = tensor5()
        self.is_x_tensor4 = False

        img_w = self.img_w
        img_h = self.img_h
        n_gru_vox = [4, 8, 16, 32]

        n_convfilter = [8, 16, 32, 64, 128]
        n_fc_filters = [256]
        n_deconvfilter = [128, 64, 32, 16, 2]
        input_shape = (self.batch_size, 3, img_w, img_h)
        fc_shape = (self.batch_size, n_fc_filters[0])

        # To define the weights, define the net structure first
        x = InputLayer(input_shape)
        conv1a = ConvLayer(x, (n_convfilter[0], 7, 7))
        conv1b = ConvLayer(conv1a, (n_convfilter[0], 3, 3))
        pool1 = PoolLayer(conv1b)  # H/2

        conv2a = ConvLayer(pool1, (n_convfilter[1], 3, 3))
        conv2b = ConvLayer(conv2a, (n_convfilter[1], 3, 3))
        conv2c = ConvLayer(pool1, (n_convfilter[1], 1, 1))
        pool2 = PoolLayer(conv2c)  # H/4

        conv3a = ConvLayer(pool2, (n_convfilter[2], 3, 3))
        conv3b = ConvLayer(conv3a, (n_convfilter[2], 3, 3))
        conv3c = ConvLayer(pool2, (n_convfilter[2], 1, 1))
        pool3 = PoolLayer(conv3c)  # H/8

        conv4a = ConvLayer(pool3, (n_convfilter[3], 3, 3))
        conv4b = ConvLayer(conv4a, (n_convfilter[3], 3, 3))
        pool4 = PoolLayer(conv4b)  # H/16

        conv5a = ConvLayer(pool4, (n_convfilter[4], 3, 3))
        conv5b = ConvLayer(conv5a, (n_convfilter[4], 3, 3))
        conv5c = ConvLayer(pool4, (n_convfilter[4], 1, 1))  # H/32
        pool5 = PoolLayer(conv5b)

        flat5 = FlattenLayer(pool5)
        fc5 = TensorProductLayer(flat5, n_fc_filters[0])

        flat4 = FlattenLayer(pool4)
        fc4 = TensorProductLayer(flat4, n_fc_filters[0])

        flat3 = FlattenLayer(pool3)
        fc3 = TensorProductLayer(flat3, n_fc_filters[0])

        flat2 = FlattenLayer(pool2)
        fc2 = TensorProductLayer(flat2, n_fc_filters[0])

        # flat1 = FlattenLayer(pool1)
        # fc1 = TensorProductLayer(flat1, n_fc_filters[0])

        # ==================== recurrence 5 ========================#
        s_shape_5 = (self.batch_size, n_gru_vox[0], n_deconvfilter[0],
                     n_gru_vox[0], n_gru_vox[0])
        # s_shape_5 = (self.batch_size, n_gru_vox[4], n_deconvfilter[4], n_gru_vox[4], n_gru_vox[4])
        prev_s_5 = InputLayer(s_shape_5)

        t_x_s_update_5 = FCConv3DLayer(
            prev_s_5, fc5, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3))
        t_x_s_reset_5 = FCConv3DLayer(
            prev_s_5, fc5, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3))

        reset_gate_5 = SigmoidLayer(t_x_s_reset_5)
        rs_5 = EltwiseMultiplyLayer(reset_gate_5, prev_s_5)
        t_x_rs_5 = FCConv3DLayer(
            rs_5, fc5, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3))

        # ==================== recurrence 4 ========================#
        s_shape_4 = (self.batch_size, n_gru_vox[1], n_deconvfilter[1],
                     n_gru_vox[1], n_gru_vox[1])
        prev_s_4 = InputLayer(s_shape_4)

        t_x_s_update_4 = FCConv3DLayer(
            prev_s_4, fc4, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3))
        t_x_s_reset_4 = FCConv3DLayer(
            prev_s_4, fc4, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3))

        reset_gate_4 = SigmoidLayer(t_x_s_reset_4)
        rs_4 = EltwiseMultiplyLayer(reset_gate_4, prev_s_4)
        t_x_rs_4 = FCConv3DLayer(
            rs_4, fc4, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3))

        # =================== recurrence 3 =======================#
        s_shape_3 = (self.batch_size, n_gru_vox[2], n_deconvfilter[2],
                     n_gru_vox[2], n_gru_vox[2])
        prev_s_3 = InputLayer(s_shape_3)

        t_x_s_update_3 = FCConv3DLayer(
            prev_s_3, fc3, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3))
        t_x_s_reset_3 = FCConv3DLayer(
            prev_s_3, fc3, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3))

        reset_gate_3 = SigmoidLayer(t_x_s_reset_3)
        rs_3 = EltwiseMultiplyLayer(reset_gate_3, prev_s_3)
        t_x_rs_3 = FCConv3DLayer(
            rs_3, fc3, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3))

        # ================== recurrence 2 =======================#
        s_shape_2 = (self.batch_size, n_gru_vox[3], n_deconvfilter[3],
                     n_gru_vox[3], n_gru_vox[3])
        prev_s_2 = InputLayer(s_shape_2)

        t_x_s_update_2 = FCConv3DLayer(
            prev_s_2, fc2, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3))
        t_x_s_reset_2 = FCConv3DLayer(
            prev_s_2, fc2, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3))

        reset_gate_2 = SigmoidLayer(t_x_s_reset_2)
        rs_2 = EltwiseMultiplyLayer(reset_gate_2, prev_s_2)
        t_x_rs_2 = FCConv3DLayer(
            rs_2, fc2, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3))

        # # ================= recurrence 1 ========================#
        # s_shape_1 = (self.batch_size, n_gru_vox[4], n_deconvfilter[4], n_gru_vox[4], n_gru_vox[4])
        # prev_s_1 = InputLayer(s_shape_1)
        #
        # t_x_s_update_1 = FCConv3DLayer(prev_s_1, fc1, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3))
        # t_x_s_reset_1 = FCConv3DLayer(prev_s_1, fc1, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3))
        #
        # reset_gate_1 = SigmoidLayer(t_x_s_reset_1)
        # rs_1 = EltwiseMultiplyLayer(reset_gate_1, prev_s_1)
        # t_x_rs_1 = FCConv3DLayer(rs_1, fc1, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3))

        def encode_recurrence(x_curr):
            input_ = InputLayer(input_shape, x_curr)
            conv1a_ = ConvLayer(input_, (n_convfilter[0], 7, 7),
                                params=conv1a.params)
            rect1a_ = LeakyReLU(conv1a_)
            conv1b_ = ConvLayer(rect1a_, (n_convfilter[0], 3, 3),
                                params=conv1b.params)
            rect1_ = LeakyReLU(conv1b_)
            pool1_ = PoolLayer(rect1_)

            # flat1_ = FlattenLayer(pool1_)
            # fc1_ = TensorProductLayer(flat1_, n_fc_filters[0], params=fc1.params)
            # out1_ = LeakyReLU(fc1_)

            conv2a_ = ConvLayer(pool1_, (n_convfilter[1], 3, 3),
                                params=conv2a.params)
            rect2a_ = LeakyReLU(conv2a_)
            conv2b_ = ConvLayer(rect2a_, (n_convfilter[1], 3, 3),
                                params=conv2b.params)
            rect2_ = LeakyReLU(conv2b_)
            conv2c_ = ConvLayer(pool1_, (n_convfilter[1], 1, 1),
                                params=conv2c.params)
            res2_ = AddLayer(conv2c_, rect2_)
            pool2_ = PoolLayer(res2_)

            flat2_ = FlattenLayer(pool2_)
            fc2_ = TensorProductLayer(flat2_,
                                      n_fc_filters[0],
                                      params=fc2.params)
            out2_ = LeakyReLU(fc2_)

            conv3a_ = ConvLayer(pool2_, (n_convfilter[2], 3, 3),
                                params=conv3a.params)
            rect3a_ = LeakyReLU(conv3a_)
            conv3b_ = ConvLayer(rect3a_, (n_convfilter[2], 3, 3),
                                params=conv3b.params)
            rect3_ = LeakyReLU(conv3b_)
            conv3c_ = ConvLayer(pool2_, (n_convfilter[2], 1, 1),
                                params=conv3c.params)
            res3_ = AddLayer(conv3c_, rect3_)
            pool3_ = PoolLayer(res3_)

            flat3_ = FlattenLayer(pool3_)
            fc3_ = TensorProductLayer(flat3_,
                                      n_fc_filters[0],
                                      params=fc3.params)
            out3_ = LeakyReLU(fc3_)

            conv4a_ = ConvLayer(pool3_, (n_convfilter[3], 3, 3),
                                params=conv4a.params)
            rect4a_ = LeakyReLU(conv4a_)
            conv4b_ = ConvLayer(rect4a_, (n_convfilter[3], 3, 3),
                                params=conv4b.params)
            rect4_ = LeakyReLU(conv4b_)
            pool4_ = PoolLayer(rect4_)

            flat4_ = FlattenLayer(pool4_)
            fc4_ = TensorProductLayer(flat4_,
                                      n_fc_filters[0],
                                      params=fc4.params)
            out4_ = LeakyReLU(fc4_)

            conv5a_ = ConvLayer(pool4_, (n_convfilter[4], 3, 3),
                                params=conv5a.params)
            rect5a_ = LeakyReLU(conv5a_)
            conv5b_ = ConvLayer(rect5a_, (n_convfilter[4], 3, 3),
                                params=conv5b.params)
            rect5_ = LeakyReLU(conv5b_)
            conv5c_ = ConvLayer(pool4_, (n_convfilter[4], 1, 1),
                                params=conv5c.params)
            res5_ = AddLayer(conv5c_, rect5_)
            pool5_ = PoolLayer(res5_)

            flat5_ = FlattenLayer(pool5_)
            fc5_ = TensorProductLayer(flat5_,
                                      n_fc_filters[0],
                                      params=fc5.params)
            out5_ = LeakyReLU(fc5_)

            return out5_.output, out4_.output, out3_.output, out2_.output  # , out1_.output

        s_encoder, _ = theano.scan(encode_recurrence, sequences=[self.x])
        out_5 = s_encoder[0]
        out_4 = s_encoder[1]
        out_3 = s_encoder[2]
        out_2 = s_encoder[3]

        # out_1 = s_encoder[4]

        def decode_recurrence_5(x_curr, prev_s_tensor, prev_in_gate_tensor):
            x_curr_ = InputLayer(fc_shape, x_curr)
            prev_s_5_ = InputLayer(s_shape_5, prev_s_tensor)
            t_x_s_update_5_ = FCConv3DLayer(
                prev_s_5_,
                x_curr_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3),
                params=t_x_s_update_5.params)

            t_x_s_reset_5_ = FCConv3DLayer(
                prev_s_5_,
                x_curr_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3),
                params=t_x_s_reset_5.params)

            update_gate_ = SigmoidLayer(t_x_s_update_5_)
            comp_update_gate_ = ComplementLayer(update_gate_)
            reset_gate_ = SigmoidLayer(t_x_s_reset_5_)

            rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_5_)
            t_x_rs_5_ = FCConv3DLayer(
                rs_,
                x_curr_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3),
                params=t_x_rs_5.params)
            tanh_t_x_rs_ = TanhLayer(t_x_rs_5_)

            gru_out_5_ = AddLayer(
                EltwiseMultiplyLayer(update_gate_, prev_s_5_),
                EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_))

            return gru_out_5_.output, update_gate_.output

        s_update_5_, _ = theano.scan(
            decode_recurrence_5,
            sequences=[out_5],
            outputs_info=[
                tensor.zeros_like(np.zeros(s_shape_5),
                                  dtype=theano.config.floatX),
                tensor.zeros_like(np.zeros(s_shape_5),
                                  dtype=theano.config.floatX)
            ])
        update_all_5 = s_update_5_[-1]
        s_out_5 = update_all_5[0][-1]
        input_5 = InputLayer(s_shape_5, s_out_5)
        # Unpooling s_out_5
        unpool5 = Unpool3DLayer(input_5)
        conv_out5 = Conv3DLayer(unpool5, (64, 3, 3, 3))

        print("conv_out5", conv_out5.output_shape)

        def decode_recurrence_4(x_curr, prev_s_tensor, prev_in_gate_tensor):

            x_curr_ = InputLayer(fc_shape, x_curr)
            prev_s_4_ = InputLayer(s_shape_4, prev_s_tensor)
            t_x_s_update_4_ = FCConv3DLayer(
                prev_s_4_,
                x_curr_, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3),
                params=t_x_s_update_4.params)

            t_x_s_reset_4_ = FCConv3DLayer(
                prev_s_4_,
                x_curr_, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3),
                params=t_x_s_reset_4.params)

            print("x_curr: ", x_curr_.output_shape)
            print("prev_s_4_: ", prev_s_4_.output_shape)
            print("t_x_s_update_4_: ", t_x_s_update_4_.output_shape)
            print("t_x_s_reset_4_: ", t_x_s_reset_4_.output_shape)

            update_gate_ = SigmoidLayer(t_x_s_update_4_)
            comp_update_gate_ = ComplementLayer(update_gate_)
            reset_gate_ = SigmoidLayer(t_x_s_reset_4_)

            rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_4_)
            t_x_rs_4_ = FCConv3DLayer(
                rs_,
                x_curr_, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3),
                params=t_x_rs_4.params)
            tanh_t_x_rs_ = TanhLayer(t_x_rs_4_)

            gru_out_4_ = AddLayer(
                EltwiseMultiplyLayer(update_gate_, prev_s_4_),
                EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_))

            return gru_out_4_.output, update_gate_.output

        s_update_4_, _ = theano.scan(decode_recurrence_4,
                                     sequences=[out_4],
                                     outputs_info=[
                                         conv_out5.output,
                                         tensor.zeros_like(
                                             np.zeros(s_shape_4),
                                             dtype=theano.config.floatX)
                                     ])
        update_all_4 = s_update_4_[-1]
        s_out_4 = update_all_4[0][-1]
        input_4 = InputLayer(s_shape_4, s_out_4)
        # Unpooling s_out_4
        unpool4 = Unpool3DLayer(input_4)
        conv_out4 = Conv3DLayer(unpool4, (n_deconvfilter[2], 3, 3, 3))

        print("conv_out_4: ", conv_out4.output_shape)
        print("conv_out_4: ", conv_out4.output)

        def decode_recurrence_3(x_curr, prev_s_tensor, prev_in_gate_tensor):
            x_curr_ = InputLayer(fc_shape, x_curr)
            prev_s_3_ = InputLayer(s_shape_3, prev_s_tensor)
            t_x_s_update_3_ = FCConv3DLayer(
                prev_s_3_,
                x_curr_, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3),
                params=t_x_s_update_3.params)

            t_x_s_reset_3_ = FCConv3DLayer(
                prev_s_3_,
                x_curr_, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3),
                params=t_x_s_reset_3.params)

            update_gate_ = SigmoidLayer(t_x_s_update_3_)
            comp_update_gate_ = ComplementLayer(update_gate_)
            reset_gate_ = SigmoidLayer(t_x_s_reset_3_)

            rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_3_)
            t_x_rs_3_ = FCConv3DLayer(
                rs_,
                x_curr_, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3),
                params=t_x_rs_3.params)
            tanh_t_x_rs_ = TanhLayer(t_x_rs_3_)

            gru_out_3_ = AddLayer(
                EltwiseMultiplyLayer(update_gate_, prev_s_3_),
                EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_))

            return gru_out_3_.output, update_gate_.output

        s_update_3_, _ = theano.scan(decode_recurrence_3,
                                     sequences=[out_3],
                                     outputs_info=[
                                         conv_out4.output,
                                         tensor.zeros_like(
                                             np.zeros(s_shape_3),
                                             dtype=theano.config.floatX)
                                     ])
        update_all_3 = s_update_3_[-1]
        s_out_3 = update_all_3[0][-1]
        input_3 = InputLayer(s_shape_3, s_out_3)
        # Unpooling s_out_4
        unpool3 = Unpool3DLayer(input_3)
        conv_out3 = Conv3DLayer(unpool3, (n_deconvfilter[3], 3, 3, 3))

        print("conv_out_3: ", conv_out3.output_shape)
        print("conv_out_3: ", conv_out3.output)

        def decode_recurrence_2(x_curr, prev_s_tensor, prev_in_gate_tensor):
            x_curr_ = InputLayer(fc_shape, x_curr)
            prev_s_2_ = InputLayer(s_shape_2, prev_s_tensor)
            t_x_s_update_2_ = FCConv3DLayer(
                prev_s_2_,
                x_curr_, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3),
                params=t_x_s_update_2.params)

            t_x_s_reset_2_ = FCConv3DLayer(
                prev_s_2_,
                x_curr_, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3),
                params=t_x_s_reset_2.params)

            update_gate_ = SigmoidLayer(t_x_s_update_2_)
            comp_update_gate_ = ComplementLayer(update_gate_)
            reset_gate_ = SigmoidLayer(t_x_s_reset_2_)

            rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_2_)
            t_x_rs_2_ = FCConv3DLayer(
                rs_,
                x_curr_, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3),
                params=t_x_rs_2.params)
            tanh_t_x_rs_ = TanhLayer(t_x_rs_2_)

            gru_out_2_ = AddLayer(
                EltwiseMultiplyLayer(update_gate_, prev_s_2_),
                EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_))

            return gru_out_2_.output, update_gate_.output

        s_update_2_, _ = theano.scan(decode_recurrence_2,
                                     sequences=[out_2],
                                     outputs_info=[
                                         conv_out3.output,
                                         tensor.zeros_like(
                                             np.zeros(s_shape_2),
                                             dtype=theano.config.floatX)
                                     ])
        update_all_2 = s_update_2_[-1]
        s_out_2 = update_all_2[0][-1]
        input_2 = InputLayer(s_shape_2, s_out_2)
        # Unpooling s_out_4
        # unpool2 = Unpool3DLayer(input_2)
        # conv_out2 = Unpool3DLayer(unpool2, (n_deconvfilter[4], 3, 3, 3))

        # def decode_recurrence_1(x_curr, prev_s_tensor, prev_in_gate_tensor):
        #     x_curr_ = InputLayer(fc_shape, x_curr)
        #     prev_s_1_ = InputLayer(s_shape_1, prev_s_tensor)
        #     t_x_s_update_1_ = FCConv3DLayer(prev_s_1_,
        #                                     x_curr_, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3),
        #                                     params=t_x_s_update_1.params)
        #
        #     t_x_s_reset_1_ = FCConv3DLayer(prev_s_1_, x_curr_, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3),
        #                                    params=t_x_s_reset_1.params)
        #
        #     update_gate_ = SigmoidLayer(t_x_s_update_1_)
        #     comp_update_gate_ = ComplementLayer(update_gate_)
        #     reset_gate_ = SigmoidLayer(t_x_s_reset_1_)
        #
        #     rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_1_)
        #     t_x_rs_1_ = FCConv3DLayer(rs_, x_curr_, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3),
        #                               params=t_x_rs_1.params)
        #     tanh_t_x_rs_ = TanhLayer(t_x_rs_1_)
        #
        #     gru_out_1_ = AddLayer(
        #         EltwiseMultiplyLayer(update_gate_, prev_s_1_),
        #         EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_))
        #
        #     return gru_out_1_.output, update_gate_.output
        #
        # s_update_1_, _ = theano.scan(decode_recurrence_1,
        #                              sequences=[out_1],
        #                              outputs_info=[conv_out2.output,
        #                                            tensor.zeros_like(np.zeros(s_shape_1),
        #                                                              dtype=theano.config.floatX)])
        # update_all_1 = s_update_1_[-1]
        # s_out_1 = update_all_1[0][-1]
        #
        # s_out_1_input = InputLayer(s_shape_1, s_out_1)
        conv_out2 = Conv3DLayer(input_2, (n_deconvfilter[4], 3, 3, 3))
        softmax_loss = SoftmaxWithLoss3D(conv_out2.output)

        print("conv_out_2: ", conv_out2.output_shape)
        print("conv_out_2: ", conv_out2.output)

        self.loss = softmax_loss.loss(self.y)
        self.error = softmax_loss.error(self.y)
        self.params = get_trainable_params()
        self.output = softmax_loss.prediction()
        self.activations = [
            update_all_5, update_all_4, update_all_3, update_all_2
        ]
Example #5
0
    def network_definition(self):

        # (multi_views, self.batch_size, 3, self.img_h, self.img_w),
        self.x = tensor5()
        self.is_x_tensor4 = False

        img_w = self.img_w
        img_h = self.img_h
        n_gru_vox = 4
        # n_vox = self.n_vox

        n_convfilter = [96, 128, 256, 256, 256, 256]
        n_fc_filters = [1024]
        n_deconvfilter = [128, 128, 128, 64, 32, 2]
        input_shape = (self.batch_size, 3, img_w, img_h)

        # To define weights, define the network structure first
        x = InputLayer(input_shape)
        conv1 = ConvLayer(x, (n_convfilter[0], 7, 7))
        pool1 = PoolLayer(conv1)

        conv2 = ConvLayer(pool1, (n_convfilter[1], 3, 3))
        pool2 = PoolLayer(conv2)

        conv3 = ConvLayer(pool2, (n_convfilter[2], 3, 3))
        pool3 = PoolLayer(conv3)

        conv4 = ConvLayer(pool3, (n_convfilter[3], 3, 3))
        pool4 = PoolLayer(conv4)

        conv5 = ConvLayer(pool4, (n_convfilter[4], 3, 3))
        pool5 = PoolLayer(conv5)

        conv6 = ConvLayer(pool5, (n_convfilter[5], 3, 3))
        pool6 = PoolLayer(conv6)

        flat6 = FlattenLayer(pool6)

        fc7 = TensorProductLayer(flat6, n_fc_filters[0])

        #LSTM
        # Set the size to be 256x4x4x4
        h_shape = (self.batch_size, n_gru_vox, n_deconvfilter[0], n_gru_vox,
                   n_gru_vox)

        # Dummy 3D grid hidden representations
        prev_h = InputLayer(h_shape)

        t_x_s_forget = FCConv3DLayer(
            prev_h, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3))
        t_x_s_input = FCConv3DLayer(
            prev_h, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3))
        t_x_s_cell = FCConv3DLayer(
            prev_h, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3))

        #initialize hidden state and cell state with 0
        if self.hidden_last is None:
            self.hidden_last = theano.shared(
                np.zeros(h_shape, dtype=theano.config.floatX))
        if self.cell_last is None:
            self.cell_last = theano.shared(
                np.zeros(h_shape, dtype=theano.config.floatX))

        def recurrence(x_curr, prev_h_tensor, prev_s_tensor):
            #prev_h_tensor: previous hidden state output tensor
            #prev_s_tensor:previous cell state output tensor

            # Scan function cannot use compiled function.
            input_ = InputLayer(input_shape, x_curr)
            conv1_ = ConvLayer(input_, (n_convfilter[0], 7, 7),
                               params=conv1.params)
            pool1_ = PoolLayer(conv1_)
            rect1_ = LeakyReLU(pool1_)
            conv2_ = ConvLayer(rect1_, (n_convfilter[1], 3, 3),
                               params=conv2.params)
            pool2_ = PoolLayer(conv2_)
            rect2_ = LeakyReLU(pool2_)
            conv3_ = ConvLayer(rect2_, (n_convfilter[2], 3, 3),
                               params=conv3.params)
            pool3_ = PoolLayer(conv3_)
            rect3_ = LeakyReLU(pool3_)
            conv4_ = ConvLayer(rect3_, (n_convfilter[3], 3, 3),
                               params=conv4.params)
            pool4_ = PoolLayer(conv4_)
            rect4_ = LeakyReLU(pool4_)
            conv5_ = ConvLayer(rect4_, (n_convfilter[4], 3, 3),
                               params=conv5.params)
            pool5_ = PoolLayer(conv5_)
            rect5_ = LeakyReLU(pool5_)
            conv6_ = ConvLayer(rect5_, (n_convfilter[5], 3, 3),
                               params=conv6.params)
            pool6_ = PoolLayer(conv6_)
            rect6_ = LeakyReLU(pool6_)
            flat6_ = FlattenLayer(rect6_)
            fc7_ = TensorProductLayer(flat6_,
                                      n_fc_filters[0],
                                      params=fc7.params)
            rect7_ = LeakyReLU(fc7_)

            #LSTM
            # Dummy 3D grid hidden representations for previous hidden state and cell state
            prev_h_ = InputLayer(h_shape, prev_h_tensor)
            prev_s_ = InputLayer(h_shape, prev_s_tensor)

            t_x_s_forget_ = FCConv3DLayer(
                prev_h_,
                rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3),
                params=t_x_s_forget.params)

            t_x_s_input_ = FCConv3DLayer(
                prev_h_,
                rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3),
                params=t_x_s_input.params)

            t_x_s_cell_ = FCConv3DLayer(
                prev_h_,
                rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3),
                params=t_x_s_cell.params)

            forget_gate_ = SigmoidLayer(t_x_s_forget_)
            input_gate_ = SigmoidLayer(t_x_s_input_)
            tanh_t_x_s_cell_ = TanhLayer(t_x_s_cell_)
            #current cell state
            cell_state_ = AddLayer(
                EltwiseMultiplyLayer(forget_gate_, prev_s_),
                EltwiseMultiplyLayer(input_gate_, tanh_t_x_s_cell_))
            #current hidden state, i.e. the output of lstm
            hidden_state_ = TanhLayer(cell_state_)

            return hidden_state_.output, cell_state_.output

        s_update, _ = theano.scan(
            recurrence,
            sequences=[
                self.x
            ],  # along with images, feed in the index of the current frame
            outputs_info=[
                self.hidden_last.get_value(),
                self.cell_last.get_value()
            ])
        #s_update means updates of hidden states and cell states
        cell_all = s_update[-1]
        h_all = s_update[0]
        h_last = h_all[-1]

        lstm_s = InputLayer(h_shape, h_last)

        unpool7 = Unpool3DLayer(lstm_s)
        conv7 = Conv3DLayer(unpool7, (n_deconvfilter[1], 3, 3, 3))
        rect7 = LeakyReLU(conv7)

        unpool8 = Unpool3DLayer(rect7)
        conv8 = Conv3DLayer(unpool8, (n_deconvfilter[2], 3, 3, 3))
        rect8 = LeakyReLU(conv8)

        unpool9 = Unpool3DLayer(rect8)
        conv9 = Conv3DLayer(unpool9, (n_deconvfilter[3], 3, 3, 3))
        rect9 = LeakyReLU(conv9)

        # unpool10 = Unpool3DLayer(rect9)
        conv10 = Conv3DLayer(rect9, (n_deconvfilter[4], 3, 3, 3))
        rect10 = LeakyReLU(conv10)

        conv11 = Conv3DLayer(rect10, (n_deconvfilter[5], 3, 3, 3))

        softmax_loss = SoftmaxWithLoss3D(conv11.output)
        self.loss = softmax_loss.loss(self.y)
        self.error = softmax_loss.error(self.y)
        self.params = get_trainable_params()
        self.output = softmax_loss.prediction()
        #activation of all cell states
        self.activations = [cell_all]
        self.new_hidden_last = h_last
        self.new_cell_last = cell_all[-1]