def gap_layer(input_tensor, layer_name): """ global average pooling ref. Min Lin, Qiang Chen, Shuicheng Yan. Network In Network. arXiv:1312.4400 """ shape = input_tensor.get_shape().as_list() H, W = shape[2:] ksize = strides = [1, 1, H, W] with tf.variable_scope(layer_name): unflattened = tf.nn.avg_pool(input_tensor, ksize=ksize, strides=strides, padding='SAME', data_format='NCHW') output_tensor = flatten(unflattened) return output_tensor
def inference(images, keep_prob): # images [N, 3, 33, 33] conv1 = conv_layer(images, 64, 'conv1') conv2 = conv_layer(conv1, 64, 'conv2') pool1 = max_pooling_layer(conv2, layer_name='max_pool_1') conv3 = conv_layer(pool1, 128, 'conv3') conv4 = conv_layer(conv3, 128, 'conv4') pool2 = max_pooling_layer(conv4, layer_name='max_pool_2') conv5 = conv_layer(pool2, 256, 'conv5') conv6 = conv_layer(conv5, 256, 'conv6') pool3 = max_pooling_layer(conv6, layer_name='max_pool_3') conv7 = conv_layer(pool3, 512, 'conv7') conv8 = conv_layer(conv7, 512, 'conv8') pool4 = max_pooling_layer(conv8, layer_name='max_pool_4') ''' Global average pooling gap = gap_layer(conv8, layer_name='global_avg_pool') fc = fc_layer(gap, 128, 'fc', use_DW=True) with tf.name_scope('dropout'): dropped = tf.nn.dropout(fc, keep_prob) logits = fc_layer(input_tensor=dropped, output_dim=2, layer_name='logits', act_ftn=tf.identity, use_DW=True) return logits ''' flat = flatten(pool4) fc1 = fc_layer(input_tensor = flat, output_dim = 512, layer_name = 'fc1') fc2 = fc_layer(input_tensor = fc1, output_dim = 256, layer_name = 'fc2') fc3 = fc_layer(input_tensor = fc2, output_dim = 128, layer_name = 'fc3') with tf.name_scope('dropout'): dropped = tf.nn.dropout(fc3, keep_prob) logits = fc_layer(input_tensor=dropped, output_dim=2, layer_name='logits', act_ftn=tf.identity) return logits
def forward(self, inputs, initial_states=None, sequence_length=None, **kwargs): if F.in_dygraph_mode(): class OutputArray(object): def __init__(self, x): self.array = [x] def append(self, x): self.array.append(x) def _maybe_copy(state, new_state, step_mask): # TODO: use where_op new_state = L.elementwise_mul(new_state, step_mask, axis=0) - \ L.elementwise_mul(state, (step_mask - 1), axis=0) return new_state #logging.info("inputs shape: {}".format(inputs.shape)) flat_inputs = U.flatten(inputs) #logging.info("flat inputs len: {}".format(len(flat_inputs))) #logging.info("flat inputs[0] shape: {}".format(flat_inputs[0].shape)) batch_size, time_steps = ( flat_inputs[0].shape[self.batch_index], flat_inputs[0].shape[self.time_step_index]) #logging.info("batch_size: {}".format(batch_size)) #logging.info("time_steps: {}".format(time_steps)) if initial_states is None: initial_states = self.cell.get_initial_states( batch_ref=inputs, batch_dim_idx=self.batch_index) if not self.time_major: # 如果第一维不是时间步 则第一维和第二维交换 # 第一维为时间步 inputs = U.map_structure( lambda x: L.transpose(x, [1, 0] + list( range(2, len(x.shape)))), inputs) if sequence_length is not None: mask = L.sequence_mask( sequence_length, maxlen=time_steps, dtype=U.flatten(initial_states)[0].dtype) # 同样 第一维为时间步 mask = L.transpose(mask, [1, 0]) if self.is_reverse: # 如果反向 # 则第一维反向 inputs = U.map_structure(lambda x: L.reverse(x, axis=[0]), inputs) mask = L.reverse(mask, axis=[0]) if sequence_length is not None else None states = initial_states outputs = [] # 遍历时间步 for i in range(time_steps): # 取该时间步的输入 step_inputs = U.map_structure(lambda x: x[i], inputs) # 输入当前输入和状态 # 得到输出和新状态 step_outputs, new_states = self.cell(step_inputs, states, **kwargs) if sequence_length is not None: # 如果有mask 则被mask的地方 用原state的数 # _maybe_copy: 未mask的部分用new_states, 被mask的部分用states new_states = U.map_structure( partial(_maybe_copy, step_mask=mask[i]), states, new_states) states = new_states #logging.info("step_output shape: {}".format(step_outputs.shape)) if i == 0: # 初始时,各输出 outputs = U.map_structure(lambda x: OutputArray(x), step_outputs) else: # 各输出加入对应list中 U.map_structure(lambda x, x_array: x_array.append(x), step_outputs, outputs) # 最后按时间步的维度堆叠 final_outputs = U.map_structure( lambda x: L.stack(x.array, axis=self.time_step_index), outputs) #logging.info("final_outputs shape: {}".format(final_outputs.shape)) if self.is_reverse: # 如果是反向 则最后结果也反向一下 final_outputs = U.map_structure( lambda x: L.reverse(x, axis=self.time_step_index), final_outputs) final_states = new_states else: final_outputs, final_states = L.rnn( self.cell, inputs, initial_states=initial_states, sequence_length=sequence_length, time_major=self.time_major, is_reverse=self.is_reverse, **kwargs) return final_outputs, final_states
def get_initial_states(self, batch_ref, shape=None, dtype=None, init_value=0, batch_dim_idx=0): """ Generate initialized states according to provided shape, data type and value. Parameters: batch_ref: A (possibly nested structure of) tensor variable[s]. The first dimension of the tensor will be used as batch size to initialize states. shape: A (possiblely nested structure of) shape[s], where a shape is represented as a list/tuple of integer). -1(for batch size) will beautomatically inserted if shape is not started with it. If None, property `state_shape` will be used. The default value is None. dtype: A (possiblely nested structure of) data type[s]. The structure must be same as that of `shape`, except when all tensors' in states has the same data type, a single data type can be used. If None and property `cell.state_shape` is not available, float32 will be used as the data type. The default value is None. init_value: A float value used to initialize states. Returns: Variable: tensor variable[s] packed in the same structure provided \ by shape, representing the initialized states. """ # TODO: use inputs and batch_size batch_ref = U.flatten(batch_ref)[0] def _is_shape_sequence(seq): if sys.version_info < (3, ): integer_types = ( int, long, ) else: integer_types = (int, ) """For shape, list/tuple of integer is the finest-grained objection""" # 如果是列表或元组 其元素类型应该在integer_types之类 if (isinstance(seq, list) or isinstance(seq, tuple)): # reduce(func, sequence, initial) if reduce(lambda flag, x: isinstance(x, integer_types) and flag, seq, True): return False # 可以是字典 # TODO: Add check for the illegal if isinstance(seq, dict): return True # 可以是collections.Sequence 且不是字符串类型即可 return (isinstance(seq, collections.Sequence) and not isinstance(seq, six.string_types)) class Shape(object): def __init__(self, shape): self.shape = shape if shape[0] == -1 else ([-1] + list(shape)) # nested structure of shapes states_shapes = self.state_shape if shape is None else shape is_sequence_ori = U.is_sequence U.is_sequence = _is_shape_sequence states_shapes = U.map_structure(lambda shape: Shape(shape), states_shapes) U.is_sequence = is_sequence_ori # nested structure of dtypes try: states_dtypes = self.state_dtype if dtype is None else dtype except NotImplementedError: # use fp32 as default states_dtypes = "float32" if len(U.flatten(states_dtypes)) == 1: dtype = U.flatten(states_dtypes)[0] states_dtypes = U.map_structure(lambda shape: dtype, states_shapes) init_states = U.map_structure( lambda shape, dtype: L.fill_constant_batch_size_like( input=batch_ref, shape=shape.shape, dtype=dtype, value=init_value, input_dim_idx=batch_dim_idx), states_shapes, states_dtypes) return init_states