def yolo_head(graph, feats, anchors, num_classes): with graph.as_default(): num_anchors = len(anchors) anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) conv_dims = K.shape(feats)[1:3] conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.softmax(feats[..., 5:]) box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_xy, box_wh, box_confidence, box_class_probs
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def get_initial_state(self, inputs): if type(self.model.input) is not list: return [] try: batch_size = K.int_shape(inputs)[0] except: batch_size = None state_shapes = list(map(K.int_shape, self.model.input[1:])) states = [] if self.readout: state_shapes.pop() # default value for initial_readout is handled in call() for shape in state_shapes: if None in shape[1:]: raise Exception( 'Only the batch dimension of a state can be left unspecified. Got state with shape ' + str(shape)) if shape[0] is None: ndim = K.ndim(inputs) z = K.zeros_like(inputs) slices = [slice(None)] + [0] * (ndim - 1) z = z[slices] # (batch_size,) state_ndim = len(shape) z = K.reshape(z, (-1, ) + (1, ) * (state_ndim - 1)) z = K.tile(z, (1, ) + tuple(shape[1:])) states.append(z) else: states.append(K.zeros(shape)) state_initializer = self.state_initializer if state_initializer: # some initializers don't accept symbolic shapes for i in range(len(state_shapes)): if state_shapes[i][0] is None: if hasattr(self, 'batch_size'): state_shapes[i] = ( self.batch_size, ) + state_shapes[i][1:] if None in state_shapes[i]: state_shapes[i] = K.shape(states[i]) num_state_init = len(state_initializer) num_state = self.num_states assert num_state_init == num_state, 'RNN has ' + str( num_state) + ' states, but was provided ' + str( num_state_init) + ' state initializers.' for i in range(len(states)): init = state_initializer[i] shape = state_shapes[i] try: if not isinstance(init, initializers.Zeros): states[i] = init(shape) except: raise Exception( 'Seems the initializer ' + init.__class__.__name__ + ' does not support symbolic shapes(' + str(shape) + '). Try providing the full input shape (include batch dimension) for you RecurrentModel.' ) return states
def test_tile(self): shape = (3, 4) arr = np.arange(np.prod(shape)).reshape(shape) arr_th = KTH.variable(arr) arr_tf = KTF.variable(arr) n = (2, 1) th_rep = KTH.eval(KTH.tile(arr_th, n)) tf_rep = KTF.eval(KTF.tile(arr_tf, n)) assert_allclose(tf_rep, th_rep, atol=1e-05)
def test_tile(self): shape = (3, 4) arr = np.arange(np.prod(shape)).reshape(shape) arr_th = KTH.variable(arr) arr_tf = KTF.variable(arr) n = (2, 1) th_z = KTH.tile(arr_th, n) th_rep = KTH.eval(th_z) tf_rep = KTF.eval(KTF.tile(arr_tf, n)) assert_allclose(tf_rep, th_rep, atol=1e-05) if hasattr(th_z, '_keras_shape'): assert th_z._keras_shape == th_rep.shape
def test_tile(self): shape = (3, 4) arr = np.arange(np.prod(shape)).reshape(shape) arr_th = KTH.variable(arr) arr_tf = KTF.variable(arr) n = (2, 1) th_z = KTH.tile(arr_th, n) th_rep = KTH.eval(th_z) tf_rep = KTF.eval(KTF.tile(arr_tf, n)) assert_allclose(tf_rep, th_rep, atol=1e-05) if hasattr(th_z, '_keras_shape'): assert th_z._keras_shape == th_rep.shape # test theano shape inference when # input shape has None entries if K.backend() == 'theano': x = K.placeholder(shape=(None, 4)) n = 2 y = KTH.tile(x, n) assert y._keras_shape == (None, 8) n = (4, 3) y = K.tile(x, n) assert y._keras_shape == (None, 12)
def call(self, inputs, initial_state=None, initial_readout=None, ground_truth=None, mask=None, training=None): # input shape: `(samples, time (padded with zeros), input_dim)` # note that the .build() method of subclasses MUST define # self.input_spec and self.state_spec with complete input shapes. if type(mask) is list: mask = mask[0] if self.model is None: raise Exception('Empty RecurrentModel.') num_req_states = self.num_states if self.readout: num_actual_states = num_req_states - 1 else: num_actual_states = num_req_states if type(inputs) is list: inputs_list = inputs[:] inputs = inputs_list.pop(0) initial_states = inputs_list[:num_actual_states] if len(initial_states) > 0: if self._is_optional_input_placeholder(initial_states[0]): initial_states = self.get_initial_state(inputs) inputs_list = inputs_list[num_actual_states:] if self.readout: initial_readout = inputs_list.pop(0) if self.teacher_force: ground_truth = inputs_list.pop() else: if initial_state is not None: if not isinstance(initial_state, (list, tuple)): initial_states = [initial_state] else: initial_states = list(initial_state) if self._is_optional_input_placeholder(initial_states[0]): initial_states = self.get_initial_state(inputs) elif self.stateful: initial_states = self.states else: initial_states = self.get_initial_state(inputs) if self.readout: if initial_readout is None or self._is_optional_input_placeholder( initial_readout): output_shape = K.int_shape(_to_list((self.model.output))[0]) output_ndim = len(output_shape) input_ndim = K.ndim(inputs) initial_readout = K.zeros_like(inputs) slices = [slice(None)] + [0] * (input_ndim - 1) initial_readout = initial_readout[slices] # (batch_size,) initial_readout = K.reshape(initial_readout, (-1, ) + (1, ) * (output_ndim - 1)) initial_readout = K.tile(initial_readout, (1, ) + tuple(output_shape[1:])) initial_states.append(initial_readout) if self.teacher_force: if ground_truth is None or self._is_optional_input_placeholder( ground_truth): raise Exception( 'ground_truth must be provided for RecurrentModel with teacher_force=True.' ) if K.backend() == 'tensorflow': with tf.control_dependencies(None): counter = K.zeros((1, )) else: counter = K.zeros((1, )) counter = K.cast(counter, 'int32') initial_states.insert(-1, counter) initial_states[-2] initial_states.insert(-1, ground_truth) num_req_states += 2 if len(initial_states) != num_req_states: raise ValueError('Layer requires ' + str(num_req_states) + ' states but was passed ' + str(len(initial_states)) + ' initial states.') input_shape = K.int_shape(inputs) if self.unroll and input_shape[1] is None: raise ValueError('Cannot unroll a RNN if the ' 'time dimension is undefined. \n' '- If using a Sequential model, ' 'specify the time dimension by passing ' 'an `input_shape` or `batch_input_shape` ' 'argument to your first layer. If your ' 'first layer is an Embedding, you can ' 'also use the `input_length` argument.\n' '- If using the functional API, specify ' 'the time dimension by passing a `shape` ' 'or `batch_shape` argument to your Input layer.') preprocessed_input = self.preprocess_input(inputs, training=None) constants = self.get_constants(inputs, training=None) if self.decode: initial_states.insert(0, inputs) preprocessed_input = K.zeros((1, self.output_length, 1)) input_length = self.output_length else: input_length = input_shape[1] if self.uses_learning_phase: with learning_phase_scope(0): last_output_test, outputs_test, states_test, updates = rnn( self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=input_length) with learning_phase_scope(1): last_output_train, outputs_train, states_train, updates = rnn( self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=input_length) last_output = K.in_train_phase(last_output_train, last_output_test, training=training) outputs = K.in_train_phase(outputs_train, outputs_test, training=training) states = [] for state_train, state_test in zip(states_train, states_test): states.append( K.in_train_phase(state_train, state_test, training=training)) else: last_output, outputs, states, updates = rnn( self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=input_length) states = list(states) if self.decode: states.pop(0) if self.readout: states.pop() if self.teacher_force: states.pop() states.pop() if len(updates) > 0: self.add_update(updates) if self.stateful: updates = [] for i in range(len(states)): updates.append((self.states[i], states[i])) self.add_update(updates, inputs) # Properly set learning phase if 0 < self.dropout + self.recurrent_dropout: last_output._uses_learning_phase = True outputs._uses_learning_phase = True if self.return_sequences: y = outputs else: y = last_output if self.return_states: return [y] + states else: return y