def network_definition(self): # (multi_views, self.batch_size, 3, self.img_h, self.img_w), self.x = tensor5() self.is_x_tensor4 = False img_w = self.img_w img_h = self.img_h n_gru_vox = 4 # n_vox = self.n_vox n_convfilter = [96, 128, 256, 256, 256, 256] n_fc_filters = [1024] n_deconvfilter = [128, 128, 128, 64, 32, 2] input_shape = (self.batch_size, 3, img_w, img_h) # To define weights, define the network structure first x = InputLayer(input_shape) conv1 = ConvLayer(x, (n_convfilter[0], 7, 7)) pool1 = PoolLayer(conv1) conv2 = ConvLayer(pool1, (n_convfilter[1], 3, 3)) pool2 = PoolLayer(conv2) conv3 = ConvLayer(pool2, (n_convfilter[2], 3, 3)) pool3 = PoolLayer(conv3) conv4 = ConvLayer(pool3, (n_convfilter[3], 3, 3)) pool4 = PoolLayer(conv4) conv5 = ConvLayer(pool4, (n_convfilter[4], 3, 3)) pool5 = PoolLayer(conv5) conv6 = ConvLayer(pool5, (n_convfilter[5], 3, 3)) pool6 = PoolLayer(conv6) flat6 = FlattenLayer(pool6) fc7 = TensorProductLayer(flat6, n_fc_filters[0]) # Set the size to be 256x4x4x4 s_shape = (self.batch_size, n_gru_vox, n_deconvfilter[0], n_gru_vox, n_gru_vox) # Dummy 3D grid hidden representations prev_s = InputLayer(s_shape) t_x_s_update = FCConv3DLayer( prev_s, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3)) t_x_s_reset = FCConv3DLayer( prev_s, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3)) reset_gate = SigmoidLayer(t_x_s_reset) rs = EltwiseMultiplyLayer(reset_gate, prev_s) t_x_rs = FCConv3DLayer(rs, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3)) def recurrence(x_curr, prev_s_tensor, prev_in_gate_tensor): # Scan function cannot use compiled function. input_ = InputLayer(input_shape, x_curr) conv1_ = ConvLayer(input_, (n_convfilter[0], 7, 7), params=conv1.params) pool1_ = PoolLayer(conv1_) rect1_ = LeakyReLU(pool1_) conv2_ = ConvLayer(rect1_, (n_convfilter[1], 3, 3), params=conv2.params) pool2_ = PoolLayer(conv2_) rect2_ = LeakyReLU(pool2_) conv3_ = ConvLayer(rect2_, (n_convfilter[2], 3, 3), params=conv3.params) pool3_ = PoolLayer(conv3_) rect3_ = LeakyReLU(pool3_) conv4_ = ConvLayer(rect3_, (n_convfilter[3], 3, 3), params=conv4.params) pool4_ = PoolLayer(conv4_) rect4_ = LeakyReLU(pool4_) conv5_ = ConvLayer(rect4_, (n_convfilter[4], 3, 3), params=conv5.params) pool5_ = PoolLayer(conv5_) rect5_ = LeakyReLU(pool5_) conv6_ = ConvLayer(rect5_, (n_convfilter[5], 3, 3), params=conv6.params) pool6_ = PoolLayer(conv6_) rect6_ = LeakyReLU(pool6_) flat6_ = FlattenLayer(rect6_) fc7_ = TensorProductLayer(flat6_, n_fc_filters[0], params=fc7.params) rect7_ = LeakyReLU(fc7_) prev_s_ = InputLayer(s_shape, prev_s_tensor) t_x_s_update_ = FCConv3DLayer( prev_s_, rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_s_update.params) t_x_s_reset_ = FCConv3DLayer( prev_s_, rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_s_reset.params) update_gate_ = SigmoidLayer(t_x_s_update_) comp_update_gate_ = ComplementLayer(update_gate_) reset_gate_ = SigmoidLayer(t_x_s_reset_) rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_) t_x_rs_ = FCConv3DLayer( rs_, rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_rs.params) tanh_t_x_rs_ = TanhLayer(t_x_rs_) gru_out_ = AddLayer( EltwiseMultiplyLayer(update_gate_, prev_s_), EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_)) return gru_out_.output, update_gate_.output s_update, _ = theano.scan( recurrence, sequences=[ self.x ], # along with images, feed in the index of the current frame outputs_info=[ tensor.zeros_like(np.zeros(s_shape), dtype=theano.config.floatX), tensor.zeros_like(np.zeros(s_shape), dtype=theano.config.floatX) ]) update_all = s_update[-1] s_all = s_update[0] s_last = s_all[-1] gru_s = InputLayer(s_shape, s_last) unpool7 = Unpool3DLayer(gru_s) conv7 = Conv3DLayer(unpool7, (n_deconvfilter[1], 3, 3, 3)) rect7 = LeakyReLU(conv7) unpool8 = Unpool3DLayer(rect7) conv8 = Conv3DLayer(unpool8, (n_deconvfilter[2], 3, 3, 3)) rect8 = LeakyReLU(conv8) unpool9 = Unpool3DLayer(rect8) conv9 = Conv3DLayer(unpool9, (n_deconvfilter[3], 3, 3, 3)) rect9 = LeakyReLU(conv9) # unpool10 = Unpool3DLayer(rect9) conv10 = Conv3DLayer(rect9, (n_deconvfilter[4], 3, 3, 3)) rect10 = LeakyReLU(conv10) conv11 = Conv3DLayer(rect10, (n_deconvfilter[5], 3, 3, 3)) softmax_loss = SoftmaxWithLoss3D(conv11.output) self.loss = softmax_loss.loss(self.y) self.error = softmax_loss.error(self.y) self.params = get_trainable_params() self.output = softmax_loss.prediction() self.activations = [update_all] self.t_x_s_update = t_x_s_update
def network_definition(self): self.x = tensor5() self.is_x_tensor4 = False img_w = self.img_w img_h = self.img_h n_gru_vox = [32, 32, 16, 8, 4] n_convfilter = [16, 32, 64, 128, 256, 512] n_deconvfilter = [2, 2, 8, 32, 128] n_middlefilter = [1, 4, 16, 64] input_shape = (self.batch_size, 3, img_w, img_h) x = InputLayer(input_shape) conv1a = ConvLayer(x, (n_convfilter[0], 7, 7)) conv1b = ConvLayer(conv1a, (n_convfilter[0], 3, 3)) pool1 = PoolLayer(conv1b, padding=(0, 0)) # H/2->64 conv2a = ConvLayer(pool1, (n_convfilter[1], 3, 3)) conv2b = ConvLayer(conv2a, (n_convfilter[1], 3, 3)) conv2c = ConvLayer(pool1, (n_convfilter[1], 1, 1)) se2 = SELayer(conv2b) pool2 = PoolLayer(conv2c) # H/4->32 conv3a = ConvLayer(pool2, (n_convfilter[2], 3, 3)) conv3b = ConvLayer(conv3a, (n_convfilter[2], 3, 3)) conv3c = ConvLayer(pool2, (n_convfilter[2], 1, 1)) se3 = SELayer(conv3b) pool3 = PoolLayer(conv3c, padding=(0, 0)) # H/8->16 conv4a = ConvLayer(pool3, (n_convfilter[3], 3, 3)) conv4b = ConvLayer(conv4a, (n_convfilter[3], 3, 3)) conv4c = ConvLayer(pool3, (n_convfilter[3], 1, 1)) se4 = SELayer(conv4b) pool4 = PoolLayer(conv4c, padding=(0, 0)) # H/16->8 conv5a = ConvLayer(pool4, (n_convfilter[4], 3, 3)) conv5b = ConvLayer(conv5a, (n_convfilter[4], 3, 3)) conv5c = ConvLayer(pool4, (n_convfilter[4], 1, 1)) # H/32->4 se5 = SELayer(conv5b) pool5 = PoolLayer(conv5c, padding=(0, 0)) conv6a = ConvLayer(pool5, (n_convfilter[5], 3, 3)) conv6b = ConvLayer(conv6a, (n_convfilter[5], 3, 3)) conv6c = ConvLayer(pool5, (n_convfilter[5], 1, 1)) # H/32->4 se6 = SELayer(conv6b) def encoder(x): input_ = InputLayer(input_shape, x) conv1a_ = ConvLayer(input_, (n_convfilter[0], 7, 7), params=conv1a.params) rect1a_ = LeakyReLU(conv1a_) conv1b_ = ConvLayer(rect1a_, (n_convfilter[0], 3, 3), params=conv1b.params) rect1b_ = LeakyReLU(conv1b_) pool1_ = PoolLayer(rect1b_) print("pool1: ", pool1_.output_shape) conv2a_ = ConvLayer(pool1_, (n_convfilter[1], 3, 3), params=conv2a.params) rect2a_ = LeakyReLU(conv2a_) conv2b_ = ConvLayer(rect2a_, (n_convfilter[1], 3, 3), params=conv2b.params) rect2b_ = LeakyReLU(conv2b_) serect2b_ = SELayer(rect2b_, params=se2.params) conv2c_ = ConvLayer(pool1_, (n_convfilter[1], 1, 1), params=conv2c.params) res2_ = AddLayer(conv2c_, serect2b_) pool2_ = PoolLayer(res2_, padding=(0, 0)) print("rect2b_: ", rect2b_.output_shape) print("serect2b_: ", serect2b_.output_shape) print("conv2c_: ", conv2c_.output_shape) conv3a_ = ConvLayer(pool2_, (n_convfilter[2], 3, 3), params=conv3a.params) rect3a_ = LeakyReLU(conv3a_) conv3b_ = ConvLayer(rect3a_, (n_convfilter[2], 3, 3), params=conv3b.params) rect3b_ = LeakyReLU(conv3b_) serect3b_ = SELayer(rect3b_, params=se3.params) conv3c_ = ConvLayer(pool2_, (n_convfilter[2], 1, 1), params=conv3c.params) # res3_ = AddLayer(conv3c_, rect3b_) res3_ = AddLayer(conv3c_, serect3b_) pool3_ = PoolLayer(res3_, padding=(0, 0)) print("rect3b_: ", rect3b_.output_shape) print("serect3b_: ", serect3b_.output_shape) print("conv3c_: ", conv3c_.output_shape) conv4a_ = ConvLayer(pool3_, (n_convfilter[3], 3, 3), params=conv4a.params) rect4a_ = LeakyReLU(conv4a_) conv4b_ = ConvLayer(rect4a_, (n_convfilter[3], 3, 3), params=conv4b.params) rect4b_ = LeakyReLU(conv4b_) serect4b_ = SELayer(rect4b_, params=se4.params) conv4c_ = ConvLayer(pool3_, (n_convfilter[3], 1, 1), params=conv4c.params) res4_ = AddLayer(conv4c_, serect4b_) pool4_ = PoolLayer(res4_, padding=(0, 0)) print("rect4b_: ", rect4b_.output_shape) # print("serect4b_: ", serect4b_.output_shape) print("conv4c_: ", conv4c_.output_shape) conv5a_ = ConvLayer(pool4_, (n_convfilter[4], 3, 3), params=conv5a.params) rect5a_ = LeakyReLU(conv5a_) conv5b_ = ConvLayer(rect5a_, (n_convfilter[4], 3, 3), params=conv5b.params) rect5b_ = LeakyReLU(conv5b_) serect5b_ = SELayer(rect5b_, params=se5.params) conv5c_ = ConvLayer(pool4_, (n_convfilter[4], 1, 1), params=conv5c.params) res5_ = AddLayer(conv5c_, serect5b_) pool5_ = PoolLayer(res5_, padding=(0, 0)) print("rect5b_: ", rect5b_.output_shape) # print("serect5b_: ", serect5b_.output_shape) print("conv5c_: ", conv5c_.output_shape) conv6a_ = ConvLayer(pool5_, (n_convfilter[5], 3, 3), params=conv6a.params) rect6a_ = LeakyReLU(conv6a_) conv6b_ = ConvLayer(rect6a_, (n_convfilter[5], 3, 3), params=conv6b.params) rect6b_ = LeakyReLU(conv6b_) serect6b_ = SELayer(rect6b_, params=se6.params) conv6c_ = ConvLayer(pool5_, (n_convfilter[5], 1, 1), params=conv6c.params) res6_ = AddLayer(conv6c_, serect6b_) print("rect6b_: ", rect6b_.output_shape) # print("serect6b_: ", serect6b_.output_shape) print("conv6c_: ", conv6c_.output_shape) flat3_ = FlattenLayer(res3_) flat4_ = FlattenLayer(res4_) flat5_ = FlattenLayer(res5_) flat6_ = FlattenLayer(res6_) # print("serect2: ", serect2b_.output_shape) # print("serect3: ", serect3b_.output_shape) # print("serect4: ", serect4b_.output_shape) # print("serect5: ", serect5b_.output_shape) # print("serect6: ", serect6b_.output_shape) print("res3: ", res3_.output_shape) print("res4: ", res4_.output_shape) print("res5: ", res5_.output_shape) print("res6: ", res6_.output_shape) # pool6_ = PoolLayer(res6_) return flat3_.output, flat4_.output, flat5_.output, flat6_.output # Set the shape of each resolution s_shape5 = (self.batch_size, n_gru_vox[4], n_deconvfilter[4], n_gru_vox[4], n_gru_vox[4]) s_shape4 = (self.batch_size, n_gru_vox[3], n_deconvfilter[3], n_gru_vox[3], n_gru_vox[3]) s_shape3 = (self.batch_size, n_gru_vox[2], n_deconvfilter[2], n_gru_vox[2], n_gru_vox[2]) s_shape2 = (self.batch_size, n_gru_vox[1], n_deconvfilter[1], n_gru_vox[1], n_gru_vox[1]) ## resolution 5 prev_s5 = InputLayer(s_shape5) curr_s5 = InputLayer(s_shape5) t_x_s_update5 = CConv3DLayer(prev_s5, curr_s5, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3)) t_x_s_reset5 = CConv3DLayer(prev_s5, curr_s5, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3)) reset_gate5 = SigmoidLayer(t_x_s_reset5) rs5 = EltwiseMultiplyLayer(reset_gate5, prev_s5) t_x_rs5 = CConv3DLayer(rs5, curr_s5, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3)) ## resolution 4 prev_s4 = InputLayer(s_shape4) curr_s4 = InputLayer(s_shape4) t_x_s_update4 = CConv3DLayer(prev_s4, curr_s4, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3)) t_x_s_reset4 = CConv3DLayer(prev_s4, curr_s4, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3)) reset_gate4 = SigmoidLayer(t_x_s_reset4) rs4 = EltwiseMultiplyLayer(reset_gate4, prev_s4) t_x_rs4 = CConv3DLayer(rs4, curr_s4, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3)) # resolution 3 prev_s3 = InputLayer(s_shape3) curr_s3 = InputLayer(s_shape3) t_x_s_update3 = CConv3DLayer(prev_s3, curr_s3, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3)) t_x_s_reset3 = CConv3DLayer(prev_s3, curr_s3, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3)) reset_gate3 = SigmoidLayer(t_x_s_reset3) rs3 = EltwiseMultiplyLayer(reset_gate3, prev_s3) t_x_rs3 = CConv3DLayer(rs3, curr_s3, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3)) # resolution 4 prev_s2 = InputLayer(s_shape2) curr_s2 = InputLayer(s_shape2) t_x_s_update2 = CConv3DLayer(prev_s2, curr_s2, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3)) t_x_s_reset2 = CConv3DLayer(prev_s2, curr_s2, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3)) reset_gate2 = SigmoidLayer(t_x_s_reset2) rs2 = EltwiseMultiplyLayer(reset_gate2, prev_s2) t_x_rs2 = CConv3DLayer(rs2, curr_s2, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3)) def gru5(curr_s5, prev_s5): curr_s5 = tensor.reshape(curr_s5, s_shape5) curr_s5_ = InputLayer(s_shape5, curr_s5) prev_s5_ = InputLayer(s_shape5, prev_s5) print("curr_s5: ", curr_s5_.output_shape) print("prev_s5: ", prev_s5_.output_shape) t_x_s_update5_ = CConv3DLayer(prev_s5_, curr_s5_, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3), params=t_x_s_update5.params) t_x_s_reset5_ = CConv3DLayer(prev_s5_, curr_s5_, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3), params=t_x_s_reset5.params) update5_ = SigmoidLayer(t_x_s_update5_) comp_udpate_gate5_ = ComplementLayer(update5_) reset_gate5_ = SigmoidLayer(t_x_s_reset5_) rs5_ = EltwiseMultiplyLayer(reset_gate5_, prev_s5_) t_x_rs5_ = CConv3DLayer(rs5_, curr_s5_, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3), params=t_x_rs5.params) tanh_t_x_rs5_ = TanhLayer(t_x_rs5_) # print("t_x_s_update5: ", t_x_s_update5_.output_shape) # print("t_x_s_reset5: ", t_x_s_reset5_.output_shape) gru_out5_ = AddLayer( EltwiseMultiplyLayer(update5_, prev_s5_), EltwiseMultiplyLayer(comp_udpate_gate5_, tanh_t_x_rs5_)) print("gru_out5: ", gru_out5_.output_shape) return gru_out5_.output def gru4(curr_s4, prev_s4): curr_s4 = tensor.reshape(curr_s4, s_shape4) curr_s4_ = InputLayer(s_shape4, curr_s4) prev_s4_ = InputLayer(s_shape4, prev_s4) t_x_s_update4_ = CConv3DLayer(prev_s4_, curr_s4_, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3), params=t_x_s_update4.params) t_x_s_reset4_ = CConv3DLayer(prev_s4_, curr_s4_, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3), params=t_x_s_reset4.params) update4_ = SigmoidLayer(t_x_s_update4_) comp_udpate_gate4_ = ComplementLayer(update4_) reset_gate4_ = SigmoidLayer(t_x_s_reset4_) rs4_ = EltwiseMultiplyLayer(reset_gate4_, prev_s4_) t_x_rs4_ = CConv3DLayer(rs4_, curr_s4_, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3), params=t_x_rs4.params) tanh_t_x_rs4_ = TanhLayer(t_x_rs4_) gru_out4_ = AddLayer( EltwiseMultiplyLayer(update4_, prev_s4_), EltwiseMultiplyLayer(comp_udpate_gate4_, tanh_t_x_rs4_)) print("gru_out4: ", gru_out4_.output_shape) return gru_out4_.output def gru3(curr_s3, prev_s3): curr_s3 = tensor.reshape(curr_s3, s_shape3) curr_s3_ = InputLayer(s_shape3, curr_s3) prev_s3_ = InputLayer(s_shape3, prev_s3) t_x_s_update3_ = CConv3DLayer(prev_s3_, curr_s3_, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3), params=t_x_s_update3.params) t_x_s_reset3_ = CConv3DLayer(prev_s3_, curr_s3_, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3), params=t_x_s_reset3.params) update3_ = SigmoidLayer(t_x_s_update3_) comp_udpate_gate3_ = ComplementLayer(update3_) reset_gate3_ = SigmoidLayer(t_x_s_reset3_) rs3_ = EltwiseMultiplyLayer(reset_gate3_, prev_s3_) t_x_rs3_ = CConv3DLayer(rs3_, curr_s3_, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3), params=t_x_rs3.params) tanh_t_x_rs3_ = TanhLayer(t_x_rs3_) gru_out3_ = AddLayer( EltwiseMultiplyLayer(update3_, prev_s3_), EltwiseMultiplyLayer(comp_udpate_gate3_, tanh_t_x_rs3_)) print("gru_out3: ", gru_out3_.output_shape) return gru_out3_.output def gru2(curr_s2, prev_s2): curr_s2 = tensor.reshape(curr_s2, s_shape2) curr_s2_ = InputLayer(s_shape2, curr_s2) prev_s2_ = InputLayer(s_shape2, prev_s2) t_x_s_update2_ = CConv3DLayer(prev_s2_, curr_s2_, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3), params=t_x_s_update2.params) t_x_s_reset2_ = CConv3DLayer(prev_s2_, curr_s2_, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3), params=t_x_s_reset2.params) update2_ = SigmoidLayer(t_x_s_update2_) comp_udpate_gate2_ = ComplementLayer(update2_) reset_gate2_ = SigmoidLayer(t_x_s_reset2_) rs2_ = EltwiseMultiplyLayer(reset_gate2_, prev_s2_) t_x_rs2_ = CConv3DLayer(rs2_, curr_s2_, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3), params=t_x_rs2.params) tanh_t_x_rs2_ = TanhLayer(t_x_rs2_) gru_out2_ = AddLayer( EltwiseMultiplyLayer(update2_, prev_s2_), EltwiseMultiplyLayer(comp_udpate_gate2_, tanh_t_x_rs2_)) print("gru_out2: ", gru_out2_.output_shape) return gru_out2_.output s_encoder, _ = theano.scan(encoder, sequences=[self.x]) # print("self.x: ", self.x) out_encoder5 = s_encoder[3] out_encoder4 = s_encoder[2] out_encoder3 = s_encoder[1] out_encoder2 = s_encoder[0] s_gru5, _ = theano.scan(gru5, sequences=[out_encoder5], outputs_info=[tensor.zeros_like(np.zeros(s_shape5), dtype=theano.config.floatX)]) input_5 = InputLayer(s_shape5, s_gru5[-1]) # print("input_5: ", input_5.output_shape) pred5 = Conv3DLayer(input_5, (2, 3, 3, 3)) unpool5 = Unpool3DLayer(input_5) conv3d5a = Conv3DLayer(unpool5, (n_middlefilter[3], 3, 3, 3)) rect3d5a = LeakyReLU(conv3d5a) conv3d5b = Conv3DLayer(rect3d5a, (n_deconvfilter[3], 3, 3, 3)) rect3d5b = LeakyReLU(conv3d5b) se3d5 = SE3DLayer(rect3d5b) conv3d5c = Conv3DLayer(unpool5, (n_deconvfilter[3], 1, 1, 1)) res3d5 = AddLayer(conv3d5c, se3d5) print("se3d5b: ", se3d5.output_shape) print("rect3d5c: ", conv3d5c.output_shape) print("res3d5: ", res3d5.output_shape) s_gru4, _ = theano.scan(gru4, sequences=[out_encoder4], outputs_info=[res3d5.output] ) input_4 = InputLayer(s_shape4, s_gru4[-1]) pred4 = Conv3DLayer(input_4, (2, 3, 3, 3)) unpool4 = Unpool3DLayer(input_4) conv3d4a = Conv3DLayer(unpool4, (n_middlefilter[2], 3, 3, 3)) rect3d4a = LeakyReLU(conv3d4a) conv3d4b = Conv3DLayer(rect3d4a, (n_deconvfilter[2], 3, 3, 3)) rect3d4b = LeakyReLU(conv3d4b) se3d4 = SE3DLayer(rect3d4b) conv3d4c = Conv3DLayer(unpool4, (n_deconvfilter[2], 1, 1, 1)) res3d4 = AddLayer(conv3d4c, se3d4) print("se3d4: ", se3d4.output_shape) print("rect3d4c: ", conv3d4c.output_shape) print("res3d4c: ", res3d4.output_shape) s_gru3, _ = theano.scan(gru3, sequences=[out_encoder3], outputs_info=[res3d4.output]) input_3 = InputLayer(s_shape3, s_gru3[-1]) pred3 = Conv3DLayer(input_3, (2, 3, 3, 3)) unpool3 = Unpool3DLayer(input_3) conv3d3a = Conv3DLayer(unpool3, (n_middlefilter[1], 3, 3, 3)) rect3d3a = LeakyReLU(conv3d3a) conv3d3b = Conv3DLayer(rect3d3a, (n_deconvfilter[1], 3, 3, 3)) rect3d3b = LeakyReLU(conv3d3b) se3d3 = SE3DLayer(rect3d3b) conv3d3c = Conv3DLayer(unpool3, (n_deconvfilter[1], 1, 1, 1)) res3d3 = AddLayer(conv3d3c, se3d3) print("se3d3: ", se3d3.output_shape) print("rect3d3c: ", conv3d3c.output_shape) print("res3d3c: ", res3d3.output_shape) s_gru2, _ = theano.scan(gru2, sequences=[out_encoder2], outputs_info=[res3d3.output]) input_2 = InputLayer(s_shape2, s_gru2[-1]) pred2 = Conv3DLayer(input_2, (2, 3, 3, 3)) labele_shape = self.y.shape label3 = self.y[:, 0:labele_shape[1]:2, :, 0:labele_shape[3]:2, 0:labele_shape[4]:2] label4 = self.y[:, 0:labele_shape[1]:4, :, 0:labele_shape[3]:4, 0:labele_shape[4]:4] label5 = self.y[:, 0:labele_shape[1]:8, :, 0:labele_shape[3]:8, 0:labele_shape[4]:8] print("label2: ", self.y.shape) print("label3: ", label3.shape) print("label4: ", label4.shape) print("label5: ", label5.shape) print("pred5: ", pred5.output_shape) print("pred4: ", pred4.output_shape) print("pred3: ", pred3.output_shape) print("pred2: ", pred2.output_shape) softmax_loss5 = SoftmaxWithLoss3D(pred5.output) softmax_loss4 = SoftmaxWithLoss3D(pred4.output) softmax_loss3 = SoftmaxWithLoss3D(pred3.output) softmax_loss2 = SoftmaxWithLoss3D(pred2.output) # self.loss = softmax_loss2.loss(self.y) self.loss = (softmax_loss5.loss(label5) + softmax_loss4.loss(label4) + softmax_loss3.loss(label3) + softmax_loss2.loss(self.y)) / 4. # self.loss = self.loss / self.error = softmax_loss2.error(self.y) self.output = softmax_loss2.prediction() self.params = get_trainable_params() self.activations = []
def network_definition(self): # (multi_views, self.batch_size, 3, self.img_h, self.img_w), self.x = tensor5() self.is_x_tensor4 = False img_w = self.img_w img_h = self.img_h n_gru_vox = 4 # n_vox = self.n_vox n_convfilter = [96, 128, 256, 256, 256, 256] n_fc_filters = [1024] n_deconvfilter = [128, 128, 128, 64, 32, 2] input_shape = (self.batch_size, 3, img_w, img_h) # To define weights, define the network structure first x = InputLayer(input_shape) conv1a = ConvLayer(x, (n_convfilter[0], 7, 7)) conv1b = ConvLayer(conv1a, (n_convfilter[0], 3, 3)) pool1 = PoolLayer(conv1b) conv2a = ConvLayer(pool1, (n_convfilter[1], 3, 3)) conv2b = ConvLayer(conv2a, (n_convfilter[1], 3, 3)) conv2c = ConvLayer(pool1, (n_convfilter[1], 1, 1)) pool2 = PoolLayer(conv2c) conv3a = ConvLayer(pool2, (n_convfilter[2], 3, 3)) conv3b = ConvLayer(conv3a, (n_convfilter[2], 3, 3)) conv3c = ConvLayer(pool2, (n_convfilter[2], 1, 1)) pool3 = PoolLayer(conv3b) conv4a = ConvLayer(pool3, (n_convfilter[3], 3, 3)) conv4b = ConvLayer(conv4a, (n_convfilter[3], 3, 3)) pool4 = PoolLayer(conv4b) conv5a = ConvLayer(pool4, (n_convfilter[4], 3, 3)) conv5b = ConvLayer(conv5a, (n_convfilter[4], 3, 3)) conv5c = ConvLayer(pool4, (n_convfilter[4], 1, 1)) pool5 = PoolLayer(conv5b) conv6a = ConvLayer(pool5, (n_convfilter[5], 3, 3)) conv6b = ConvLayer(conv6a, (n_convfilter[5], 3, 3)) pool6 = PoolLayer(conv6b) flat6 = FlattenLayer(pool6) fc7 = TensorProductLayer(flat6, n_fc_filters[0]) # Set the size to be 256x4x4x4 s_shape = (self.batch_size, n_gru_vox, n_deconvfilter[0], n_gru_vox, n_gru_vox) # Dummy 3D grid hidden representations prev_s = InputLayer(s_shape) t_x_s_update = FCConv3DLayer( prev_s, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3)) t_x_s_reset = FCConv3DLayer( prev_s, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3)) reset_gate = SigmoidLayer(t_x_s_reset) rs = EltwiseMultiplyLayer(reset_gate, prev_s) t_x_rs = FCConv3DLayer(rs, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3)) def recurrence(x_curr, prev_s_tensor, prev_in_gate_tensor): # Scan function cannot use compiled function. input_ = InputLayer(input_shape, x_curr) conv1a_ = ConvLayer(input_, (n_convfilter[0], 7, 7), params=conv1a.params) rect1a_ = LeakyReLU(conv1a_) conv1b_ = ConvLayer(rect1a_, (n_convfilter[0], 3, 3), params=conv1b.params) rect1_ = LeakyReLU(conv1b_) pool1_ = PoolLayer(rect1_) conv2a_ = ConvLayer(pool1_, (n_convfilter[1], 3, 3), params=conv2a.params) rect2a_ = LeakyReLU(conv2a_) conv2b_ = ConvLayer(rect2a_, (n_convfilter[1], 3, 3), params=conv2b.params) rect2_ = LeakyReLU(conv2b_) conv2c_ = ConvLayer(pool1_, (n_convfilter[1], 1, 1), params=conv2c.params) res2_ = AddLayer(conv2c_, rect2_) pool2_ = PoolLayer(res2_) conv3a_ = ConvLayer(pool2_, (n_convfilter[2], 3, 3), params=conv3a.params) rect3a_ = LeakyReLU(conv3a_) conv3b_ = ConvLayer(rect3a_, (n_convfilter[2], 3, 3), params=conv3b.params) rect3_ = LeakyReLU(conv3b_) conv3c_ = ConvLayer(pool2_, (n_convfilter[2], 1, 1), params=conv3c.params) res3_ = AddLayer(conv3c_, rect3_) pool3_ = PoolLayer(res3_) conv4a_ = ConvLayer(pool3_, (n_convfilter[3], 3, 3), params=conv4a.params) rect4a_ = LeakyReLU(conv4a_) conv4b_ = ConvLayer(rect4a_, (n_convfilter[3], 3, 3), params=conv4b.params) rect4_ = LeakyReLU(conv4b_) pool4_ = PoolLayer(rect4_) conv5a_ = ConvLayer(pool4_, (n_convfilter[4], 3, 3), params=conv5a.params) rect5a_ = LeakyReLU(conv5a_) conv5b_ = ConvLayer(rect5a_, (n_convfilter[4], 3, 3), params=conv5b.params) rect5_ = LeakyReLU(conv5b_) conv5c_ = ConvLayer(pool4_, (n_convfilter[4], 1, 1), params=conv5c.params) res5_ = AddLayer(conv5c_, rect5_) pool5_ = PoolLayer(res5_) conv6a_ = ConvLayer(pool5_, (n_convfilter[5], 3, 3), params=conv6a.params) rect6a_ = LeakyReLU(conv6a_) conv6b_ = ConvLayer(rect6a_, (n_convfilter[5], 3, 3), params=conv6b.params) rect6_ = LeakyReLU(conv6b_) res6_ = AddLayer(pool5_, rect6_) pool6_ = PoolLayer(res6_) flat6_ = FlattenLayer(pool6_) fc7_ = TensorProductLayer(flat6_, n_fc_filters[0], params=fc7.params) rect7_ = LeakyReLU(fc7_) prev_s_ = InputLayer(s_shape, prev_s_tensor) t_x_s_update_ = FCConv3DLayer( prev_s_, rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_s_update.params) t_x_s_reset_ = FCConv3DLayer( prev_s_, rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_s_reset.params) update_gate_ = SigmoidLayer(t_x_s_update_) comp_update_gate_ = ComplementLayer(update_gate_) reset_gate_ = SigmoidLayer(t_x_s_reset_) rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_) t_x_rs_ = FCConv3DLayer( rs_, rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_rs.params) tanh_t_x_rs_ = TanhLayer(t_x_rs_) gru_out_ = AddLayer( EltwiseMultiplyLayer(update_gate_, prev_s_), EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_)) return gru_out_.output, update_gate_.output s_update, _ = theano.scan( recurrence, sequences=[ self.x ], # along with images, feed in the index of the current frame outputs_info=[ tensor.zeros_like(np.zeros(s_shape), dtype=theano.config.floatX), tensor.zeros_like(np.zeros(s_shape), dtype=theano.config.floatX) ]) update_all = s_update[-1] s_all = s_update[0] s_last = s_all[-1] gru_s = InputLayer(s_shape, s_last) unpool7 = Unpool3DLayer(gru_s) conv7a = Conv3DLayer(unpool7, (n_deconvfilter[1], 3, 3, 3)) rect7a = LeakyReLU(conv7a) conv7b = Conv3DLayer(rect7a, (n_deconvfilter[1], 3, 3, 3)) rect7 = LeakyReLU(conv7b) res7 = AddLayer(unpool7, rect7) unpool8 = Unpool3DLayer(res7) conv8a = Conv3DLayer(unpool8, (n_deconvfilter[2], 3, 3, 3)) rect8a = LeakyReLU(conv8a) conv8b = Conv3DLayer(rect8a, (n_deconvfilter[2], 3, 3, 3)) rect8 = LeakyReLU(conv8b) res8 = AddLayer(unpool8, rect8) unpool9 = Unpool3DLayer(res8) conv9a = Conv3DLayer(unpool9, (n_deconvfilter[3], 3, 3, 3)) rect9a = LeakyReLU(conv9a) conv9b = Conv3DLayer(rect9a, (n_deconvfilter[3], 3, 3, 3)) rect9 = LeakyReLU(conv9b) conv9c = Conv3DLayer(unpool9, (n_deconvfilter[3], 1, 1, 1)) res9 = AddLayer(conv9c, rect9) conv10a = Conv3DLayer(res9, (n_deconvfilter[4], 3, 3, 3)) rect10a = LeakyReLU(conv10a) conv10b = Conv3DLayer(rect10a, (n_deconvfilter[4], 3, 3, 3)) rect10 = LeakyReLU(conv10b) conv10c = Conv3DLayer(rect10a, (n_deconvfilter[4], 3, 3, 3)) res10 = AddLayer(conv10c, rect10) conv11 = Conv3DLayer(res10, (n_deconvfilter[5], 3, 3, 3)) # printing # x = tensor.dvector('x') # printing_op = printing.Print('vector----------------------',attrs = ['shape'])(x) # printed_x = printing_op(x) # f = function([x],printed_x) # result = f(conv11.output) # print('------------------- conv11 output shape', conv11.output.shape.eval()) softmax_loss = SoftmaxWithLoss3D(conv11.output) self.loss = softmax_loss.loss(self.y) self.error = softmax_loss.error(self.y) self.params = get_trainable_params() self.output = softmax_loss.prediction() self.activations = [update_all]
def network_definition(self): # (views, batch_size, 3, img_h, img_w) self.x = tensor5() self.is_x_tensor4 = False img_w = self.img_w img_h = self.img_h n_gru_vox = [4, 8, 16, 32] n_convfilter = [8, 16, 32, 64, 128] n_fc_filters = [256] n_deconvfilter = [128, 64, 32, 16, 2] input_shape = (self.batch_size, 3, img_w, img_h) fc_shape = (self.batch_size, n_fc_filters[0]) # To define the weights, define the net structure first x = InputLayer(input_shape) conv1a = ConvLayer(x, (n_convfilter[0], 7, 7)) conv1b = ConvLayer(conv1a, (n_convfilter[0], 3, 3)) pool1 = PoolLayer(conv1b) # H/2 conv2a = ConvLayer(pool1, (n_convfilter[1], 3, 3)) conv2b = ConvLayer(conv2a, (n_convfilter[1], 3, 3)) conv2c = ConvLayer(pool1, (n_convfilter[1], 1, 1)) pool2 = PoolLayer(conv2c) # H/4 conv3a = ConvLayer(pool2, (n_convfilter[2], 3, 3)) conv3b = ConvLayer(conv3a, (n_convfilter[2], 3, 3)) conv3c = ConvLayer(pool2, (n_convfilter[2], 1, 1)) pool3 = PoolLayer(conv3c) # H/8 conv4a = ConvLayer(pool3, (n_convfilter[3], 3, 3)) conv4b = ConvLayer(conv4a, (n_convfilter[3], 3, 3)) pool4 = PoolLayer(conv4b) # H/16 conv5a = ConvLayer(pool4, (n_convfilter[4], 3, 3)) conv5b = ConvLayer(conv5a, (n_convfilter[4], 3, 3)) conv5c = ConvLayer(pool4, (n_convfilter[4], 1, 1)) # H/32 pool5 = PoolLayer(conv5b) flat5 = FlattenLayer(pool5) fc5 = TensorProductLayer(flat5, n_fc_filters[0]) flat4 = FlattenLayer(pool4) fc4 = TensorProductLayer(flat4, n_fc_filters[0]) flat3 = FlattenLayer(pool3) fc3 = TensorProductLayer(flat3, n_fc_filters[0]) flat2 = FlattenLayer(pool2) fc2 = TensorProductLayer(flat2, n_fc_filters[0]) # flat1 = FlattenLayer(pool1) # fc1 = TensorProductLayer(flat1, n_fc_filters[0]) # ==================== recurrence 5 ========================# s_shape_5 = (self.batch_size, n_gru_vox[0], n_deconvfilter[0], n_gru_vox[0], n_gru_vox[0]) # s_shape_5 = (self.batch_size, n_gru_vox[4], n_deconvfilter[4], n_gru_vox[4], n_gru_vox[4]) prev_s_5 = InputLayer(s_shape_5) t_x_s_update_5 = FCConv3DLayer( prev_s_5, fc5, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3)) t_x_s_reset_5 = FCConv3DLayer( prev_s_5, fc5, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3)) reset_gate_5 = SigmoidLayer(t_x_s_reset_5) rs_5 = EltwiseMultiplyLayer(reset_gate_5, prev_s_5) t_x_rs_5 = FCConv3DLayer( rs_5, fc5, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3)) # ==================== recurrence 4 ========================# s_shape_4 = (self.batch_size, n_gru_vox[1], n_deconvfilter[1], n_gru_vox[1], n_gru_vox[1]) prev_s_4 = InputLayer(s_shape_4) t_x_s_update_4 = FCConv3DLayer( prev_s_4, fc4, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3)) t_x_s_reset_4 = FCConv3DLayer( prev_s_4, fc4, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3)) reset_gate_4 = SigmoidLayer(t_x_s_reset_4) rs_4 = EltwiseMultiplyLayer(reset_gate_4, prev_s_4) t_x_rs_4 = FCConv3DLayer( rs_4, fc4, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3)) # =================== recurrence 3 =======================# s_shape_3 = (self.batch_size, n_gru_vox[2], n_deconvfilter[2], n_gru_vox[2], n_gru_vox[2]) prev_s_3 = InputLayer(s_shape_3) t_x_s_update_3 = FCConv3DLayer( prev_s_3, fc3, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3)) t_x_s_reset_3 = FCConv3DLayer( prev_s_3, fc3, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3)) reset_gate_3 = SigmoidLayer(t_x_s_reset_3) rs_3 = EltwiseMultiplyLayer(reset_gate_3, prev_s_3) t_x_rs_3 = FCConv3DLayer( rs_3, fc3, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3)) # ================== recurrence 2 =======================# s_shape_2 = (self.batch_size, n_gru_vox[3], n_deconvfilter[3], n_gru_vox[3], n_gru_vox[3]) prev_s_2 = InputLayer(s_shape_2) t_x_s_update_2 = FCConv3DLayer( prev_s_2, fc2, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3)) t_x_s_reset_2 = FCConv3DLayer( prev_s_2, fc2, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3)) reset_gate_2 = SigmoidLayer(t_x_s_reset_2) rs_2 = EltwiseMultiplyLayer(reset_gate_2, prev_s_2) t_x_rs_2 = FCConv3DLayer( rs_2, fc2, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3)) # # ================= recurrence 1 ========================# # s_shape_1 = (self.batch_size, n_gru_vox[4], n_deconvfilter[4], n_gru_vox[4], n_gru_vox[4]) # prev_s_1 = InputLayer(s_shape_1) # # t_x_s_update_1 = FCConv3DLayer(prev_s_1, fc1, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3)) # t_x_s_reset_1 = FCConv3DLayer(prev_s_1, fc1, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3)) # # reset_gate_1 = SigmoidLayer(t_x_s_reset_1) # rs_1 = EltwiseMultiplyLayer(reset_gate_1, prev_s_1) # t_x_rs_1 = FCConv3DLayer(rs_1, fc1, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3)) def encode_recurrence(x_curr): input_ = InputLayer(input_shape, x_curr) conv1a_ = ConvLayer(input_, (n_convfilter[0], 7, 7), params=conv1a.params) rect1a_ = LeakyReLU(conv1a_) conv1b_ = ConvLayer(rect1a_, (n_convfilter[0], 3, 3), params=conv1b.params) rect1_ = LeakyReLU(conv1b_) pool1_ = PoolLayer(rect1_) # flat1_ = FlattenLayer(pool1_) # fc1_ = TensorProductLayer(flat1_, n_fc_filters[0], params=fc1.params) # out1_ = LeakyReLU(fc1_) conv2a_ = ConvLayer(pool1_, (n_convfilter[1], 3, 3), params=conv2a.params) rect2a_ = LeakyReLU(conv2a_) conv2b_ = ConvLayer(rect2a_, (n_convfilter[1], 3, 3), params=conv2b.params) rect2_ = LeakyReLU(conv2b_) conv2c_ = ConvLayer(pool1_, (n_convfilter[1], 1, 1), params=conv2c.params) res2_ = AddLayer(conv2c_, rect2_) pool2_ = PoolLayer(res2_) flat2_ = FlattenLayer(pool2_) fc2_ = TensorProductLayer(flat2_, n_fc_filters[0], params=fc2.params) out2_ = LeakyReLU(fc2_) conv3a_ = ConvLayer(pool2_, (n_convfilter[2], 3, 3), params=conv3a.params) rect3a_ = LeakyReLU(conv3a_) conv3b_ = ConvLayer(rect3a_, (n_convfilter[2], 3, 3), params=conv3b.params) rect3_ = LeakyReLU(conv3b_) conv3c_ = ConvLayer(pool2_, (n_convfilter[2], 1, 1), params=conv3c.params) res3_ = AddLayer(conv3c_, rect3_) pool3_ = PoolLayer(res3_) flat3_ = FlattenLayer(pool3_) fc3_ = TensorProductLayer(flat3_, n_fc_filters[0], params=fc3.params) out3_ = LeakyReLU(fc3_) conv4a_ = ConvLayer(pool3_, (n_convfilter[3], 3, 3), params=conv4a.params) rect4a_ = LeakyReLU(conv4a_) conv4b_ = ConvLayer(rect4a_, (n_convfilter[3], 3, 3), params=conv4b.params) rect4_ = LeakyReLU(conv4b_) pool4_ = PoolLayer(rect4_) flat4_ = FlattenLayer(pool4_) fc4_ = TensorProductLayer(flat4_, n_fc_filters[0], params=fc4.params) out4_ = LeakyReLU(fc4_) conv5a_ = ConvLayer(pool4_, (n_convfilter[4], 3, 3), params=conv5a.params) rect5a_ = LeakyReLU(conv5a_) conv5b_ = ConvLayer(rect5a_, (n_convfilter[4], 3, 3), params=conv5b.params) rect5_ = LeakyReLU(conv5b_) conv5c_ = ConvLayer(pool4_, (n_convfilter[4], 1, 1), params=conv5c.params) res5_ = AddLayer(conv5c_, rect5_) pool5_ = PoolLayer(res5_) flat5_ = FlattenLayer(pool5_) fc5_ = TensorProductLayer(flat5_, n_fc_filters[0], params=fc5.params) out5_ = LeakyReLU(fc5_) return out5_.output, out4_.output, out3_.output, out2_.output # , out1_.output s_encoder, _ = theano.scan(encode_recurrence, sequences=[self.x]) out_5 = s_encoder[0] out_4 = s_encoder[1] out_3 = s_encoder[2] out_2 = s_encoder[3] # out_1 = s_encoder[4] def decode_recurrence_5(x_curr, prev_s_tensor, prev_in_gate_tensor): x_curr_ = InputLayer(fc_shape, x_curr) prev_s_5_ = InputLayer(s_shape_5, prev_s_tensor) t_x_s_update_5_ = FCConv3DLayer( prev_s_5_, x_curr_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_s_update_5.params) t_x_s_reset_5_ = FCConv3DLayer( prev_s_5_, x_curr_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_s_reset_5.params) update_gate_ = SigmoidLayer(t_x_s_update_5_) comp_update_gate_ = ComplementLayer(update_gate_) reset_gate_ = SigmoidLayer(t_x_s_reset_5_) rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_5_) t_x_rs_5_ = FCConv3DLayer( rs_, x_curr_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_rs_5.params) tanh_t_x_rs_ = TanhLayer(t_x_rs_5_) gru_out_5_ = AddLayer( EltwiseMultiplyLayer(update_gate_, prev_s_5_), EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_)) return gru_out_5_.output, update_gate_.output s_update_5_, _ = theano.scan( decode_recurrence_5, sequences=[out_5], outputs_info=[ tensor.zeros_like(np.zeros(s_shape_5), dtype=theano.config.floatX), tensor.zeros_like(np.zeros(s_shape_5), dtype=theano.config.floatX) ]) update_all_5 = s_update_5_[-1] s_out_5 = update_all_5[0][-1] input_5 = InputLayer(s_shape_5, s_out_5) # Unpooling s_out_5 unpool5 = Unpool3DLayer(input_5) conv_out5 = Conv3DLayer(unpool5, (64, 3, 3, 3)) print("conv_out5", conv_out5.output_shape) def decode_recurrence_4(x_curr, prev_s_tensor, prev_in_gate_tensor): x_curr_ = InputLayer(fc_shape, x_curr) prev_s_4_ = InputLayer(s_shape_4, prev_s_tensor) t_x_s_update_4_ = FCConv3DLayer( prev_s_4_, x_curr_, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3), params=t_x_s_update_4.params) t_x_s_reset_4_ = FCConv3DLayer( prev_s_4_, x_curr_, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3), params=t_x_s_reset_4.params) print("x_curr: ", x_curr_.output_shape) print("prev_s_4_: ", prev_s_4_.output_shape) print("t_x_s_update_4_: ", t_x_s_update_4_.output_shape) print("t_x_s_reset_4_: ", t_x_s_reset_4_.output_shape) update_gate_ = SigmoidLayer(t_x_s_update_4_) comp_update_gate_ = ComplementLayer(update_gate_) reset_gate_ = SigmoidLayer(t_x_s_reset_4_) rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_4_) t_x_rs_4_ = FCConv3DLayer( rs_, x_curr_, (n_deconvfilter[1], n_deconvfilter[1], 3, 3, 3), params=t_x_rs_4.params) tanh_t_x_rs_ = TanhLayer(t_x_rs_4_) gru_out_4_ = AddLayer( EltwiseMultiplyLayer(update_gate_, prev_s_4_), EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_)) return gru_out_4_.output, update_gate_.output s_update_4_, _ = theano.scan(decode_recurrence_4, sequences=[out_4], outputs_info=[ conv_out5.output, tensor.zeros_like( np.zeros(s_shape_4), dtype=theano.config.floatX) ]) update_all_4 = s_update_4_[-1] s_out_4 = update_all_4[0][-1] input_4 = InputLayer(s_shape_4, s_out_4) # Unpooling s_out_4 unpool4 = Unpool3DLayer(input_4) conv_out4 = Conv3DLayer(unpool4, (n_deconvfilter[2], 3, 3, 3)) print("conv_out_4: ", conv_out4.output_shape) print("conv_out_4: ", conv_out4.output) def decode_recurrence_3(x_curr, prev_s_tensor, prev_in_gate_tensor): x_curr_ = InputLayer(fc_shape, x_curr) prev_s_3_ = InputLayer(s_shape_3, prev_s_tensor) t_x_s_update_3_ = FCConv3DLayer( prev_s_3_, x_curr_, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3), params=t_x_s_update_3.params) t_x_s_reset_3_ = FCConv3DLayer( prev_s_3_, x_curr_, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3), params=t_x_s_reset_3.params) update_gate_ = SigmoidLayer(t_x_s_update_3_) comp_update_gate_ = ComplementLayer(update_gate_) reset_gate_ = SigmoidLayer(t_x_s_reset_3_) rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_3_) t_x_rs_3_ = FCConv3DLayer( rs_, x_curr_, (n_deconvfilter[2], n_deconvfilter[2], 3, 3, 3), params=t_x_rs_3.params) tanh_t_x_rs_ = TanhLayer(t_x_rs_3_) gru_out_3_ = AddLayer( EltwiseMultiplyLayer(update_gate_, prev_s_3_), EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_)) return gru_out_3_.output, update_gate_.output s_update_3_, _ = theano.scan(decode_recurrence_3, sequences=[out_3], outputs_info=[ conv_out4.output, tensor.zeros_like( np.zeros(s_shape_3), dtype=theano.config.floatX) ]) update_all_3 = s_update_3_[-1] s_out_3 = update_all_3[0][-1] input_3 = InputLayer(s_shape_3, s_out_3) # Unpooling s_out_4 unpool3 = Unpool3DLayer(input_3) conv_out3 = Conv3DLayer(unpool3, (n_deconvfilter[3], 3, 3, 3)) print("conv_out_3: ", conv_out3.output_shape) print("conv_out_3: ", conv_out3.output) def decode_recurrence_2(x_curr, prev_s_tensor, prev_in_gate_tensor): x_curr_ = InputLayer(fc_shape, x_curr) prev_s_2_ = InputLayer(s_shape_2, prev_s_tensor) t_x_s_update_2_ = FCConv3DLayer( prev_s_2_, x_curr_, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3), params=t_x_s_update_2.params) t_x_s_reset_2_ = FCConv3DLayer( prev_s_2_, x_curr_, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3), params=t_x_s_reset_2.params) update_gate_ = SigmoidLayer(t_x_s_update_2_) comp_update_gate_ = ComplementLayer(update_gate_) reset_gate_ = SigmoidLayer(t_x_s_reset_2_) rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_2_) t_x_rs_2_ = FCConv3DLayer( rs_, x_curr_, (n_deconvfilter[3], n_deconvfilter[3], 3, 3, 3), params=t_x_rs_2.params) tanh_t_x_rs_ = TanhLayer(t_x_rs_2_) gru_out_2_ = AddLayer( EltwiseMultiplyLayer(update_gate_, prev_s_2_), EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_)) return gru_out_2_.output, update_gate_.output s_update_2_, _ = theano.scan(decode_recurrence_2, sequences=[out_2], outputs_info=[ conv_out3.output, tensor.zeros_like( np.zeros(s_shape_2), dtype=theano.config.floatX) ]) update_all_2 = s_update_2_[-1] s_out_2 = update_all_2[0][-1] input_2 = InputLayer(s_shape_2, s_out_2) # Unpooling s_out_4 # unpool2 = Unpool3DLayer(input_2) # conv_out2 = Unpool3DLayer(unpool2, (n_deconvfilter[4], 3, 3, 3)) # def decode_recurrence_1(x_curr, prev_s_tensor, prev_in_gate_tensor): # x_curr_ = InputLayer(fc_shape, x_curr) # prev_s_1_ = InputLayer(s_shape_1, prev_s_tensor) # t_x_s_update_1_ = FCConv3DLayer(prev_s_1_, # x_curr_, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3), # params=t_x_s_update_1.params) # # t_x_s_reset_1_ = FCConv3DLayer(prev_s_1_, x_curr_, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3), # params=t_x_s_reset_1.params) # # update_gate_ = SigmoidLayer(t_x_s_update_1_) # comp_update_gate_ = ComplementLayer(update_gate_) # reset_gate_ = SigmoidLayer(t_x_s_reset_1_) # # rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_1_) # t_x_rs_1_ = FCConv3DLayer(rs_, x_curr_, (n_deconvfilter[4], n_deconvfilter[4], 3, 3, 3), # params=t_x_rs_1.params) # tanh_t_x_rs_ = TanhLayer(t_x_rs_1_) # # gru_out_1_ = AddLayer( # EltwiseMultiplyLayer(update_gate_, prev_s_1_), # EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_)) # # return gru_out_1_.output, update_gate_.output # # s_update_1_, _ = theano.scan(decode_recurrence_1, # sequences=[out_1], # outputs_info=[conv_out2.output, # tensor.zeros_like(np.zeros(s_shape_1), # dtype=theano.config.floatX)]) # update_all_1 = s_update_1_[-1] # s_out_1 = update_all_1[0][-1] # # s_out_1_input = InputLayer(s_shape_1, s_out_1) conv_out2 = Conv3DLayer(input_2, (n_deconvfilter[4], 3, 3, 3)) softmax_loss = SoftmaxWithLoss3D(conv_out2.output) print("conv_out_2: ", conv_out2.output_shape) print("conv_out_2: ", conv_out2.output) self.loss = softmax_loss.loss(self.y) self.error = softmax_loss.error(self.y) self.params = get_trainable_params() self.output = softmax_loss.prediction() self.activations = [ update_all_5, update_all_4, update_all_3, update_all_2 ]
def network_definition(self): # (multi_views, self.batch_size, 3, self.img_h, self.img_w), self.x = tensor5() self.is_x_tensor4 = False img_w = self.img_w img_h = self.img_h n_gru_vox = 4 # n_vox = self.n_vox n_convfilter = [96, 128, 256, 256, 256, 256] n_fc_filters = [1024] n_deconvfilter = [128, 128, 128, 64, 32, 2] input_shape = (self.batch_size, 3, img_w, img_h) # To define weights, define the network structure first x = InputLayer(input_shape) conv1 = ConvLayer(x, (n_convfilter[0], 7, 7)) pool1 = PoolLayer(conv1) conv2 = ConvLayer(pool1, (n_convfilter[1], 3, 3)) pool2 = PoolLayer(conv2) conv3 = ConvLayer(pool2, (n_convfilter[2], 3, 3)) pool3 = PoolLayer(conv3) conv4 = ConvLayer(pool3, (n_convfilter[3], 3, 3)) pool4 = PoolLayer(conv4) conv5 = ConvLayer(pool4, (n_convfilter[4], 3, 3)) pool5 = PoolLayer(conv5) conv6 = ConvLayer(pool5, (n_convfilter[5], 3, 3)) pool6 = PoolLayer(conv6) flat6 = FlattenLayer(pool6) fc7 = TensorProductLayer(flat6, n_fc_filters[0]) #LSTM # Set the size to be 256x4x4x4 h_shape = (self.batch_size, n_gru_vox, n_deconvfilter[0], n_gru_vox, n_gru_vox) # Dummy 3D grid hidden representations prev_h = InputLayer(h_shape) t_x_s_forget = FCConv3DLayer( prev_h, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3)) t_x_s_input = FCConv3DLayer( prev_h, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3)) t_x_s_cell = FCConv3DLayer( prev_h, fc7, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3)) #initialize hidden state and cell state with 0 if self.hidden_last is None: self.hidden_last = theano.shared( np.zeros(h_shape, dtype=theano.config.floatX)) if self.cell_last is None: self.cell_last = theano.shared( np.zeros(h_shape, dtype=theano.config.floatX)) def recurrence(x_curr, prev_h_tensor, prev_s_tensor): #prev_h_tensor: previous hidden state output tensor #prev_s_tensor:previous cell state output tensor # Scan function cannot use compiled function. input_ = InputLayer(input_shape, x_curr) conv1_ = ConvLayer(input_, (n_convfilter[0], 7, 7), params=conv1.params) pool1_ = PoolLayer(conv1_) rect1_ = LeakyReLU(pool1_) conv2_ = ConvLayer(rect1_, (n_convfilter[1], 3, 3), params=conv2.params) pool2_ = PoolLayer(conv2_) rect2_ = LeakyReLU(pool2_) conv3_ = ConvLayer(rect2_, (n_convfilter[2], 3, 3), params=conv3.params) pool3_ = PoolLayer(conv3_) rect3_ = LeakyReLU(pool3_) conv4_ = ConvLayer(rect3_, (n_convfilter[3], 3, 3), params=conv4.params) pool4_ = PoolLayer(conv4_) rect4_ = LeakyReLU(pool4_) conv5_ = ConvLayer(rect4_, (n_convfilter[4], 3, 3), params=conv5.params) pool5_ = PoolLayer(conv5_) rect5_ = LeakyReLU(pool5_) conv6_ = ConvLayer(rect5_, (n_convfilter[5], 3, 3), params=conv6.params) pool6_ = PoolLayer(conv6_) rect6_ = LeakyReLU(pool6_) flat6_ = FlattenLayer(rect6_) fc7_ = TensorProductLayer(flat6_, n_fc_filters[0], params=fc7.params) rect7_ = LeakyReLU(fc7_) #LSTM # Dummy 3D grid hidden representations for previous hidden state and cell state prev_h_ = InputLayer(h_shape, prev_h_tensor) prev_s_ = InputLayer(h_shape, prev_s_tensor) t_x_s_forget_ = FCConv3DLayer( prev_h_, rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_s_forget.params) t_x_s_input_ = FCConv3DLayer( prev_h_, rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_s_input.params) t_x_s_cell_ = FCConv3DLayer( prev_h_, rect7_, (n_deconvfilter[0], n_deconvfilter[0], 3, 3, 3), params=t_x_s_cell.params) forget_gate_ = SigmoidLayer(t_x_s_forget_) input_gate_ = SigmoidLayer(t_x_s_input_) tanh_t_x_s_cell_ = TanhLayer(t_x_s_cell_) #current cell state cell_state_ = AddLayer( EltwiseMultiplyLayer(forget_gate_, prev_s_), EltwiseMultiplyLayer(input_gate_, tanh_t_x_s_cell_)) #current hidden state, i.e. the output of lstm hidden_state_ = TanhLayer(cell_state_) return hidden_state_.output, cell_state_.output s_update, _ = theano.scan( recurrence, sequences=[ self.x ], # along with images, feed in the index of the current frame outputs_info=[ self.hidden_last.get_value(), self.cell_last.get_value() ]) #s_update means updates of hidden states and cell states cell_all = s_update[-1] h_all = s_update[0] h_last = h_all[-1] lstm_s = InputLayer(h_shape, h_last) unpool7 = Unpool3DLayer(lstm_s) conv7 = Conv3DLayer(unpool7, (n_deconvfilter[1], 3, 3, 3)) rect7 = LeakyReLU(conv7) unpool8 = Unpool3DLayer(rect7) conv8 = Conv3DLayer(unpool8, (n_deconvfilter[2], 3, 3, 3)) rect8 = LeakyReLU(conv8) unpool9 = Unpool3DLayer(rect8) conv9 = Conv3DLayer(unpool9, (n_deconvfilter[3], 3, 3, 3)) rect9 = LeakyReLU(conv9) # unpool10 = Unpool3DLayer(rect9) conv10 = Conv3DLayer(rect9, (n_deconvfilter[4], 3, 3, 3)) rect10 = LeakyReLU(conv10) conv11 = Conv3DLayer(rect10, (n_deconvfilter[5], 3, 3, 3)) softmax_loss = SoftmaxWithLoss3D(conv11.output) self.loss = softmax_loss.loss(self.y) self.error = softmax_loss.error(self.y) self.params = get_trainable_params() self.output = softmax_loss.prediction() #activation of all cell states self.activations = [cell_all] self.new_hidden_last = h_last self.new_cell_last = cell_all[-1]