def _pad_tensors_to_same_length(x, y): """Pad x and y so that the results have the same length (second dimension).""" with flow.scope.namespace("pad_to_same_length"): x_length = x.shape[1] y_length = y.shape[1] # max_length = flow.math.maximum(x_length, y_length) max_length = max(x_length, y_length) x = flow.pad(x, [[0, 0], [0, max_length - x_length], [0, 0] ]) # input dimension is 3D y = flow.pad( y, [[0, 0], [0, max_length - y_length]]) # target dimension is 2D return x, y
def resnet50(images, args, trainable=True, training=True): weight_regularizer = flow.regularizers.l2(args.wd) if args.wd > 0.0 and args.wd < 1.0 else None builder = ResnetBuilder(weight_regularizer, trainable, training, args.channel_last, args.fuse_bn_relu, args.fuse_bn_add_relu) if args.pad_output: if args.channel_last: paddings = ((0, 0), (0, 0), (0, 0), (0, 1)) else: paddings = ((0, 0), (0, 1), (0, 0), (0, 0)) images = flow.pad(images, paddings=paddings) with flow.scope.namespace("Resnet"): stem = builder.resnet_stem(images) body = builder.resnet_conv_x_body(stem) pool5 = flow.nn.avg_pool2d( body, ksize=7, strides=1, padding="VALID", data_format=builder.data_format, name="pool5", ) fc1001 = flow.layers.dense( flow.reshape(pool5, (pool5.shape[0], -1)), units=1000, use_bias=True, kernel_initializer=flow.variance_scaling_initializer(2, 'fan_in', 'random_normal'), bias_initializer=flow.zeros_initializer(), kernel_regularizer=weight_regularizer, bias_regularizer=weight_regularizer, trainable=trainable, name="fc1001", ) return fc1001
def _conv2d(inputs, filters, kernel_size, strides=1, padding="VALID", groups=1, use_bias=False, trainable=True, name=None): if padding != "SAME" and padding != "VALID": if isinstance(padding, list): inputs = flow.pad(inputs, (padding)) padding = "VALID" elif isinstance(padding, tuple): inputs = flow.pad(inputs, padding) padding = "VALID" else: raise ValueError("padding must be SAME, VALID or a list/tuple.") return flow.layers.conv2d( inputs, filters, kernel_size, strides, padding, data_format="NCHW", dilation_rate=1, groups=groups, activation=None, use_bias=use_bias, kernel_initializer=flow.random_normal_initializer(), bias_initializer=flow.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None, trainable=trainable, name=name, weight_name=name + "-weight", bias_name=name + "-bias")
def insightface(images): print("args.network", args.network) if args.network == "mobilefacenet": embedding = MobileFacenet( images, embedding_size=128, bn_is_training=True ) elif args.network == "resnet100": embedding = Resnet100(images, embedding_size=512, fc_type="E") elif args.network == "resnet50": if args.use_fp16 and args.pad_output: if args.channel_last: paddings = ((0, 0), (0, 0), (0, 0), (0, 1)) else: paddings = ((0, 0), (0, 1), (0, 0), (0, 0)) images = flow.pad(images, paddings=paddings) embedding = Resnet50(images, embedding_size=512, fc_type="E", channel_last=args.channel_last) else: raise NotImplementedError return embedding
def discriminator(self, inputs, targets, trainable=True, reuse=False, const_init=False): # (n, 6, 256, 256) d0 = flow.concat([inputs, targets], axis=1) # (n, 64, 128, 128) d1 = self._downsample( d0, 64, 4, name="d_d1", apply_batchnorm=False, reuse=reuse, const_init=const_init, trainable=trainable, ) # (n, 64, 64, 64) d2 = self._downsample(d1, 128, 4, name="d_d2", reuse=reuse, const_init=const_init) # (n, 256, 32, 32) d3 = self._downsample(d2, 256, 4, name="d_d3", reuse=reuse, const_init=const_init) # (n, 256, 34, 34) pad1 = flow.pad(d3, [[0, 0], [0, 0], [1, 1], [1, 1]]) # (n, 512, 31, 31) conv1 = layers.conv2d( pad1, 512, 4, strides=1, padding="valid", name="d_conv1", trainable=trainable, reuse=reuse, const_init=const_init, use_bias=False, ) bn1 = layers.batchnorm(conv1, name="d_bn", reuse=reuse, trainable=trainable) leaky_relu = flow.nn.leaky_relu(bn1, alpha=0.3) # (n, 512, 33, 33) pad2 = flow.pad(leaky_relu, [[0, 0], [0, 0], [1, 1], [1, 1]]) # (n, 1, 30, 30) conv2 = layers.conv2d( pad2, 1, 4, strides=1, padding="valid", name="d_conv2", trainable=trainable, reuse=reuse, const_init=const_init, ) return conv2
def pad(x=flow.FixedTensorDef((3, 5))): return flow.pad(x, [(1, 2), (3, 4)], 1)
def build_network(self): if self.need_transpose: images = flow.transpose(self.images, name="transpose", perm=[0, 3, 1, 2]) else: images = self.images conv1 = _conv2d(images, 64, kernel_size=7, strides=2, padding=([0, 0], [0, 0], [3, 3], [3, 3]), groups=1, use_bias=False, trainable=self.trainable, name="conv1") bn1 = _batch_norm(conv1, trainable=self.trainable, training=self.training, name="bn1") relu = flow.nn.relu(bn1, name="relu1") pad_before_max_pool = flow.pad(relu, ([0, 0], [0, 0], [1, 1], [1, 1])) max_pool = flow.nn.max_pool2d(relu, ksize=3, strides=2, padding="VALID", data_format="NCHW", name="max_pool") layer1 = self._make_layer(max_pool, 64, self.layers[0], self.num_group, layer_num="layer1") layer2 = self._make_layer(layer1[-1], 128, self.layers[1], self.num_group, strides=2, layer_num="layer2") layer3 = self._make_layer(layer2[-1], 256, self.layers[2], self.num_group, strides=2, layer_num="layer3") layer4 = self._make_layer(layer3[-1], 512, self.layers[3], self.num_group, strides=2, layer_num="layer4") # debug mode: dump data for debugging # with flow.watch_scope(blob_watcher=blob_watched, # diff_blob_watcher=diff_blob_watched): # bn1_identity = flow.identity(layer4[-1], name="layer4_last_out") avg_pool = flow.nn.avg_pool2d(layer4[-1], 7, strides=1, padding="VALID", data_format="NCHW", name="avg_pool") reshape = flow.reshape(avg_pool, (avg_pool.shape[0], -1)) fc = flow.layers.dense( reshape, units=self.num_classes, use_bias=True, kernel_initializer=flow.xavier_uniform_initializer(), bias_initializer=flow.zeros_initializer(), trainable=self.trainable, name="fc") return fc
def build_network(self,inputs): b,c,t,h,w=inputs.shape N=self.time_dim templist=[] for i in range(N): tempname=datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S.%f') if i!=N//2: out = flow.range(t, dtype=flow.int64) one = flow.constant_like(out, i, dtype= flow.int64) out=flow.math.add(out, one) out=flow.expand_dims(out,axis=0) templist.append(out) neighbor_time_index=flow.concat(templist,axis=0) neighbor_time_index=flow.transpose(neighbor_time_index,[1,0]) neighbor_time_index=flow.flatten(neighbor_time_index, start_dim=0, end_dim=-1) # feature map registration tempname=datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S.%f') init=flow.kaiming_initializer(shape=inputs.shape,mode="fan_out",nonlinearity="relu") semantic=conv3d_layer("conv_semantic_"+tempname,inputs,self.out_channels, kernel_size=1,use_bias=False,padding="VALID",trainable=self.trainable, weight_initializer=init ) inputs_norm=flow.math.l2_normalize( semantic,axis=1 ) inputs_norm_padding=flow.pad(inputs_norm,paddings=[ (0,0),(0,0),((self.time_dim-1)//2,(self.time_dim-1)//2), (0,0),(0,0)] ) inputs_norm_expand=flow.expand_dims(inputs_norm,axis=3) temp_inputs_norm_expand=inputs_norm_expand for i in range(N-2): inputs_norm_expand=flow.concat( inputs=[ inputs_norm_expand,temp_inputs_norm_expand], axis=3 ) inputs_norm_expand=flow.transpose(inputs_norm_expand,perm=[0, 2, 3, 4, 5, 1]) inputs_norm_expand=flow.reshape(inputs_norm_expand,shape=[-1, h*w, c//16]) slice_list=[] for index in neighbor_time_index: temp=flow.slice( inputs_norm_padding, begin=[None,None,int(index),None,None], size=[None,None,1,None,None] ) slice_list.append(temp) neighbor_norm=flow.concat( slice_list,axis=2 ) neighbor_norm=flow.transpose(neighbor_norm,perm=[0, 2, 1, 3, 4]) neighbor_norm=flow.reshape(neighbor_norm,shape=[-1, c//16, h*w]) similarity=flow.matmul(inputs_norm_expand,neighbor_norm)*self.temperature similarity=nn.softmax(similarity,axis=-1) inputs_padding=flow.pad(inputs, paddings=[ (0,0),(0,0),((self.time_dim-1)//2,(self.time_dim-1)//2), (0,0),(0,0)] ) slice_list=[] for index in neighbor_time_index: temp=flow.slice( inputs_padding, begin=[None,None,int(index),None,None], size=[None,None,1,None,None] ) slice_list.append(temp) neighbor=flow.concat( slice_list,axis=2 ) neighbor=flow.transpose(neighbor,perm=[0,2,3,4,1]) neighbor=flow.reshape(neighbor,shape=[-1, h*w, c]) neighbor_new=flow.matmul(similarity,neighbor) neighbor_new=flow.reshape(neighbor_new,shape=[b, t*(N-1), h, w, c]) neighbor_new=flow.transpose(neighbor_new,perm=[0, 4, 1, 2, 3]) # contrastive attention if self.contrastive_att: temp_input=flow.expand_dims(inputs,axis=3) temp_temp_input=temp_input for i in range(N-2): temp_input=flow.concat( inputs=[ temp_input,temp_temp_input], axis=3 ) temp_input=flow.reshape(temp_input,shape=[b, c, (N-1)*t, h, w]) input_att=conv3d_layer( "conv3d_inputmapping_"+tempname,temp_input,self.out_channels, kernel_size=1, use_bias=False,trainable=False,weight_initializer=flow.kaiming_initializer(shape=temp_input.shape,mode="fan_out",nonlinearity="relu") ) n_att=conv3d_layer( "conv3d_nmapping_"+tempname,neighbor_new,self.out_channels, kernel_size=1, use_bias=False,trainable=False,weight_initializer=flow.kaiming_initializer(shape=neighbor_new.shape,mode="fan_out",nonlinearity="relu") ) temp_input=input_att*n_att contrastive_att_net=conv3d_layer( "conv3d_att_net_"+tempname,temp_input,1, kernel_size=1, use_bias=False,trainable=self.trainable,weight_initializer=flow.kaiming_initializer(shape=temp_input.shape,mode="fan_out",nonlinearity="relu") ) contrastive_att_net=flow.math.sigmoid(contrastive_att_net) neighbor_new=flow.math.multiply( neighbor_new,contrastive_att_net ) # integrating feature maps init = flow.zeros_initializer() tempname=datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S.%f') input_offset = flow.get_variable( "input_offset_"+tempname, shape=(b, c, N*t, h, w), initializer=init, dtype=inputs.dtype, trainable=self.trainable) with flow.scope.placement("cpu", "0:0"): input_index=np.array( [i for i in range(t*N) if i%N==N//2] ) neighbor_index=np.array( [i for i in range(t*N) if i%N!=N//2]) input_offset_list=[] inputs_list=[] neighbor_new_list=[] for index in range(input_offset.shape[2]): temp=flow.slice( input_offset, begin=[None,None,int(index),None,None], size=[None,None,1,None,None] ) input_offset_list.append(temp) for index in range(inputs.shape[2]): temp=flow.slice( inputs, begin=[None,None,int(index),None,None], size=[None,None,1,None,None] ) inputs_list.append(temp) for index in range(neighbor_new.shape[2]): temp=flow.slice( neighbor_new, begin=[None,None,int(index),None,None], size=[None,None,1,None,None] ) neighbor_new_list.append(temp) temp_index=0 for index in input_index: input_offset_list[index]+=inputs_list[temp_index] temp_index+=1 temp_index=0 for index in neighbor_index: input_offset_list[index]+=neighbor_new_list[temp_index] temp_index+=1 input_offset=flow.concat( input_offset_list,axis=2 ) return input_offset
def build_network(self, inputs): b, c, t, h, w = inputs.shape N = self.time_dim templist = [np.arange(0, t) + i for i in range(N) if i != N // 2] templist = np.expand_dims(templist, axis=0) neighbor_time_index = np.concatenate(templist, axis=0) # neighbor_time_index=flow.concat( # templist,axis=0 # ) neighbor_time_index = np.transpose(neighbor_time_index) neighbor_time_index = np.ndarray.flatten(neighbor_time_index) #寻找tensor.long的代替(把tensor变成longtensor) #tensor 中long 是64整形 neighbor_time_index = np.int64(neighbor_time_index) semantic = conv3d_layer("conv_semantic_", inputs, self.out_channels, kernel_size=1, use_bias=False, padding="SAME") inputs_norm = flow.math.l2_normalize(semantic, axis=1) inputs_norm_padding = flow.pad(inputs_norm, paddings=[(0, 0), (0, 0), ((self.time_dim - 1) // 2, (self.time_dim - 1) // 2), (0, 0), (0, 0)]) inputs_norm_expand = flow.expand_dims(inputs_norm, axis=3) temp_inputs_norm_expand = inputs_norm_expand for i in range(N - 2): inputs_norm_expand = flow.concat( inputs=[inputs_norm_expand, temp_inputs_norm_expand], axis=3) #inputs_norm_expand=flow.transpose(inputs_norm_expand,perm=[0, 2, 3, 4, 5, 1]) print("inputs_norm_expand", inputs_norm_expand.shape) inputs_norm_expand = flow.reshape( inputs_norm_expand, (inputs_norm_expand.shape[0], inputs_norm_expand.shape[2], inputs_norm_expand.shape[3], inputs_norm_expand.shape[4], inputs_norm_expand.shape[5], inputs_norm_expand.shape[1])) inputs_norm_expand = flow.reshape(inputs_norm_expand, shape=[-1, h * w, c // 16]) slice_list = [] for index in neighbor_time_index: temp = flow.slice( inputs_norm_padding, begin=[None, None, int(index), None, None], #size=[None,slice_shape[1],1,slice_shape[3],slice_shape[4]] size=[None, None, 1, None, None]) slice_list.append(temp) neighbor_norm = flow.concat(slice_list, axis=2) neighbor_norm = flow.transpose(neighbor_norm, perm=[0, 2, 1, 3, 4]) #inputs_norm_expand=flow.reshape(neighbor_norm,(neighbor_norm.shape[0],neighbor_norm.shape[2],neighbor_norm.shape[3],neighbor_norm.shape[4],neighbor_norm.shape[5],neighbor_norm.shape[1])) neighbor_norm = flow.reshape(neighbor_norm, shape=[-1, c // 16, h * w]) similarity = flow.matmul(inputs_norm_expand, neighbor_norm) * self.temperature similarity = nn.softmax(similarity, axis=-1) inputs_padding = flow.pad(inputs, paddings=[(0, 0), (0, 0), ((self.time_dim - 1) // 2, (self.time_dim - 1) // 2), (0, 0), (0, 0)]) #neighbor=inputs_padding[:, :, neighbor_time_index, :, :] slice_list = [] for index in neighbor_time_index: temp = flow.slice(inputs_padding, begin=[None, None, int(index), None, None], size=[None, None, 1, None, None]) slice_list.append(temp) neighbor = flow.concat(slice_list, axis=2) neighbor = flow.transpose(neighbor, perm=[0, 2, 3, 4, 1]) neighbor = flow.reshape(neighbor, shape=[-1, h * w, c]) neighbor_new = flow.matmul(similarity, neighbor) neighbor_new = flow.reshape(neighbor_new, shape=[b, t * (N - 1), h, w, c]) neighbor_new = flow.transpose(neighbor_new, perm=[0, 4, 1, 2, 3]) if self.contrastive_att: temp_input = flow.expand_dims(inputs, axis=3) temp_temp_input = temp_input temp_input = flow.concat(inputs=[temp_input, temp_temp_input], axis=3) temp_input = flow.reshape(temp_input, shape=[b, c, (N - 1) * t, h, w]) input_att = conv3d_layer("conv3d_inputmapping", temp_input, self.out_channels, kernel_size=1, use_bias=False, trainable=False) n_att = conv3d_layer("conv3d_nmapping", neighbor_new, self.out_channels, kernel_size=1, use_bias=False, trainable=False) contrastive_att_net = conv3d_layer("conv3d_att_net", input_att * n_att, self.out_channels, kernel_size=1, use_bias=False) constastive_att = flow.math.sigmoid(contrastive_att_net) neighbor_new = neighbor_new * self.contrastive_att #device 暂时先空着了 input_offset = np.zeros([b, c, N * t, h, w], dtype=np.float) init = flow.zeros_initializer() input_offset = flow.get_variable("input_offset", shape=(b, c, N * t, h, w), initializer=init, dtype=inputs.dtype, trainable=True) input_index = np.array([i for i in range(t * N) if i % N == N // 2]) neighbor_index = np.array([i for i in range(t * N) if i % N != N // 2]) # print("inputs: ",inputs.shape) # print("input_index:",input_index) # print("input_index_len:",len(input_index)) print("input_offset:", input_offset.shape) input_offset_list = [] inputs_list = [] neighbor_new_list = [] for index in range(input_offset.shape[2]): temp = flow.slice(input_offset, begin=[None, None, int(index), None, None], size=[None, None, 1, None, None]) input_offset_list.append(temp) for index in range(inputs.shape[2]): temp = flow.slice(inputs, begin=[None, None, int(index), None, None], size=[None, None, 1, None, None]) inputs_list.append(temp) for index in range(neighbor_new.shape[2]): temp = flow.slice(neighbor_new, begin=[None, None, int(index), None, None], size=[None, None, 1, None, None]) neighbor_new_list.append(temp) temp_index = 0 for index in input_index: input_offset_list[index] += inputs_list[temp_index] temp_index += 1 # print("neighbor_new:",neighbor_new.shape) # print("neighbor_index:",neighbor_index.shape) temp_index = 0 for index in neighbor_index: input_offset_list[index] += neighbor_new_list[temp_index] temp_index += 1 # print("before",input_offset.shape) input_offset = flow.concat(input_offset_list, axis=2) print("after", input_offset.shape) return input_offset
def pad(x: tp.Numpy.Placeholder((3, 5))): return flow.pad(x, [(1, 2), (3, 4)], 1)