def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = tf.reshape(tf.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = tf.shape(feats)[1:3] # height, width grid_y = tf.tile( tf.reshape(tf.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = tf.tile( tf.reshape(tf.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = tf.concatenate([grid_x, grid_y]) grid = tf.cast(grid, tf.dtype(feats)) feats = tf.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (tf.sigmoid(feats[..., :2]) + grid) / tf.cast( grid_shape[::-1], tf.dtype(feats)) box_wh = tf.exp(feats[..., 2:4]) * anchors_tensor / tf.cast( input_shape[::-1], tf.dtype(feats)) box_confidence = tf.sigmoid(feats[..., 4:5]) box_class_probs = tf.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def get_cell_sampling_probas(attractivity_cells, square_ids_cells): unique_square_ids, inverse, counts = tf.unique(square_ids_cells, return_inverse=True, return_counts=True) # `inverse` is an re-numering of `square_ids_cells` following its order: 3, 4, 6 => 0, 1, 2 width_sample = tf.max(counts) print(f'width_sample: {width_sample}') # create a sequential index dor the cells in the squares: # 1, 2, 3... for the cells in the first square, then 1, 2, .. for the cells in the second square # Trick: 1. shift `counts` one to the right, remove last element and append 0 at the beginning: cell_index_shift = tf.insert(counts, 0, 0)[:-1] cell_index_shift = tf.cumsum(cell_index_shift) # [0, ncells in square0, ncells in square 1, etc...] to_subtract = tf.repeat(cell_index_shift, counts) # repeat each element as many times as the corresponding square has cells inds_cells_in_square = tf.arange(0, attractivity_cells.shape[0]) inds_cells_in_square = tf.subtract(inds_cells_in_square, to_subtract) # we have the right sequential order order = tf.argsort(inverse) inverse = inverse[order] attractivity_cells = attractivity_cells[order] # Create `sample_arr`: one row for each square. The values first value in each row are the attractivity of its cell. Padded with 0. cell_sampling_probas = tf.zeros((unique_square_ids.shape[0], width_sample)) cell_sampling_probas[inverse, inds_cells_in_square] = attractivity_cells # Normalize the rows of `sample_arr` s.t. the rows are probability distribution cell_sampling_probas /= tf.linalg.norm(cell_sampling_probas, ord=1, axis=1, keepdims=True).astype(tf.float32) return cell_sampling_probas, cell_index_shift
def cost(self, X, Y, XXM, YYM, batch_sz=None, num_steps=None, lam=0.0005): ''' Returns loss X - source indice Y - target indice Note that number of batch size is not fixed per update''' if batch_sz is None: batch_sz = tf.shape(Y)[0] if num_steps is None: num_steps = self.TT preds = self.fp(X, XXM, batch_sz, \ num_esteps=num_steps, num_dsteps=num_steps) preds = tf.transpose(preds, perm=[1, 0, 2]) ## Measured based on perplexity - measures how surprised the network ## is to see the next character in a sequence. py = preds.reshape((batch_sz * num_steps, self.D)) Y_len = tf.cast(tf.sum(YYM, 1), 'float32') cost = -tf.log(py)[tf.arange(batch_sz * num_steps), Y.flatten()] * YYM.flatten() cost = cost.reshape((batch_sz, num_steps)) / Y_len.dimshuffle(0, 'x') cost = tf.exp(tf.sum(cost, axis=1)) cost = tf.sum(cost) / tf.cast(batch_sz, 'float32') #l2_loss = tf.add_n([tf.nn.l2_loss(v) \ # for v in tf.trainable_variables()]) with tf.variable_scope('summary'): tf.histogram_summary("prediction_error", preds) tf.scalar_summary("Cost", cost) self.summarize = tf.merge_all_summaries() return cost #+ lam * l2_loss
def generate_anchors(scale, ratios, feature_shape, feature_stride, anchor_stride): """ generate_anchors - function to generate anchors that are relative to the backbone's feature maps Inputs: - scale : Integer Scale of the anchor - ratio : list of Float numbers List of Ratios of one side to another side in an anchor - feature_shape : list Shape of the given backbone feature - feature_stride : Integer The given stride within the feature map - anchor_stride : Integer Outputs: - anchors : list of anchors according to the given inputs """ # Get all combinations of scales and ratios #scales, ratios = np.meshgrid(np.array(scale), np.array(ratios)) scales = scales.flatten() ratios = ratios.flatten() scales, ratios = tf.meshgrid(np.array(scales), np.array(ratios)) # Enumerate heights and widths from scales and ratios heights = scales / ratios widhts = scales * ratios # Enumerate shifts in feature space grid_x = tf.tile(tf.reshape(tf.arange(start = 0, limit = feature_shape[0], \ delta = anchor_stride), [1, -1, 1]), [feature_shape[1], 1, 1]) * feature_stride grid_y = tf.tile(tf.reshape(tf.arange(start = 0, limit = feature_shape[1], \ delta = anchor_stride), [-1, 1, 1]), [1, feature_shape[0], 1]) * feature_stride heights = tf.tile(tf.reshape(heights, [1, -1, 1]), [heights.shape, 1, 1]) widths = tf.tile(tf.reshape(widths, [-1, 1, 1]), [1, widths.shape, 1]) # Enumerate combinations of shifts, widths, and shifts box_xy = tf.concat([grid_x, grid_y], axis=2).reshape([-1, 2]) box_wh = tf.concat([heights, widths], axis=2).reshape([-1, 2]) return tf.concat([box_xy - 0.5 * box_wh, box_xy + 0.5 * box_wh], axis=1)
def lifter(cepstra, L=22): """Apply a cepstral lifter the the matrix of cepstra. This has the effect of increasing the magnitude of the high frequency DCT coeffs. :param cepstra: the matrix of mel-cepstra, will be numframes * numcep in size. :param L: the liftering coefficient to use. Default is 22. L <= 0 disables lifter. """ if L > 0: nframes, ncoeff = np.shape(cepstra) n = tf.arange(ncoeff) lift = 1 + (L / 2.) * tf.sin(tf.pi * n / L) return lift * cepstra else: # values of L <= 0, do nothing return cepstra
def _pull_values_offsets(self, values_offset): """ values_offset is either a tuple (values, offsets) or just values. Values is a tensor. This method is used to turn a tensor into its sparse representation """ # pull_values_offsets, return values offsets diff_offsets diff_offsets = None if isinstance(values_offset, tuple): values = tf.reshape(values_offset[0], [-1]) diff_offsets = tf.cast(tf.reshape(values_offset[1], [-1]), dtype=tf.int64) offsets = tf.math.cumsum(diff_offsets) else: values = tf.reshape(values_offset, [-1]) offsets = tf.arange(tf.shape(values)[0], dtype=tf.int64) diff_offsets = offsets[1:] - offsets[:-1] num_rows = len(offsets) return values, offsets, diff_offsets, num_rows
def get_mask_i_float(i, n): """Create a 1D array of zeros with one element at one, with floating type. Parameters ---------- i : int Index of the non-zero element. n: n Length of the created array. Returns ------- mask_i_float : array-like, shape=[n,] 1D array of zeros except at index i, where it is one """ range_n = arange(n) i_float = cast(array([i]), int32)[0] mask_i = equal(range_n, i_float) mask_i_float = cast(mask_i, float32) return mask_i_float
def arange(start=0, limit=None, step=1): return tf.arange(start=0, limit=limit, delta=step)