def autodetect_shotname(frame_w, frame_h, num_shots_hint): best_shot_names = None best_loss = None for shot_name in shots: aspect_ratio = shots[shot_name]["aspect_ratio"] c = min(frame_h, frame_w / aspect_ratio) slice_h_shift = r((frame_h - c) / 2) slice_w_shift = r((frame_w - c * aspect_ratio) / 2) if slice_w_shift != 0 and slice_h_shift == 0: loss = slice_w_shift * frame_w * 2 elif slice_w_shift == 0 and slice_h_shift != 0: loss = slice_h_shift * frame_h * 2 else: if slice_w_shift != 0 and slice_h_shift != 0: raise ErrorSignal(math_is_wrong_error) else: loss = 0 if (best_loss is None) or (loss < best_loss): best_loss = loss best_shot_names = [shot_name] elif loss == best_loss: best_shot_names.append(shot_name) if num_shots_hint is None: num_shots_hint = 0 best_num_shots_name = None best_num_shots_diff = None for shot_name in best_shot_names: num_shots = len(shots[shot_name]['shots']) if (best_num_shots_name is None) or abs(num_shots - num_shots_hint) < best_num_shots_diff: best_num_shots_name = shot_name best_num_shots_diff = abs(num_shots - num_shots_hint) return best_num_shots_name
def inverted_res_block(input_tensor, expansion, stride, alpha, filters): in_channels = input_tensor.shape.as_list()[-1] filters = r(filters * alpha) output_tensor = input_tensor output_tensor = Conv2D(expansion * in_channels, kernel_size=(1, 1), use_bias=False)(output_tensor) output_tensor = BatchNormalization( epsilon=batch_norm_eps, momentum=batch_norm_momentum)(output_tensor) output_tensor = ReLU(relu_threshold)(output_tensor) output_tensor = ZeroPadding2D()(output_tensor) output_tensor = DepthwiseConv2D(kernel_size=(3, 3), strides=stride, use_bias=False)(output_tensor) output_tensor = BatchNormalization( epsilon=batch_norm_eps, momentum=batch_norm_momentum)(output_tensor) output_tensor = ReLU(relu_threshold)(output_tensor) output_tensor = Conv2D(filters, kernel_size=(1, 1), use_bias=False)(output_tensor) output_tensor = BatchNormalization( epsilon=batch_norm_eps, momentum=batch_norm_momentum)(output_tensor) if in_channels == filters and stride == 1: output_tensor = Add()([input_tensor, output_tensor]) return output_tensor
def FDMobileNet(input_tensor, alpha=1.0): def SepConvBlock(input_tensor, filters, strides): output_tensor = input_tensor output_tensor = ZeroPadding2D()(output_tensor) output_tensor = DepthwiseConv2D(kernel_size=(3, 3), strides=strides)(output_tensor) output_tensor = BatchNormalization()(output_tensor) output_tensor = Conv2D(kernel_size=(1, 1), filters=filters)(output_tensor) output_tensor = BatchNormalization()(output_tensor) output_tensor = LeakyReLU(alpha=0.1)(output_tensor) return output_tensor output_tensor = input_tensor output_tensor = Conv2D(kernel_size=(3, 3), strides=(2, 2), filters=r(32 * alpha))(output_tensor) output_tensor = BatchNormalization()(output_tensor) output_tensor = LeakyReLU(alpha=0.1)(output_tensor) output_tensor = SepConvBlock(output_tensor, r(64 * alpha), (2, 2)) output_tensor = SepConvBlock(output_tensor, r(128 * alpha), (2, 2)) output_tensor = SepConvBlock(output_tensor, r(128 * alpha), (1, 1)) output_tensor = SepConvBlock(output_tensor, r(256 * alpha), (2, 2)) output_tensor = SepConvBlock(output_tensor, r(256 * alpha), (1, 1)) output_tensor = SepConvBlock(output_tensor, r(512 * alpha), (2, 2)) for i in range(4): output_tensor = SepConvBlock(output_tensor, r(512 * alpha), (1, 1)) output_tensor = SepConvBlock(output_tensor, r(1024 * alpha), (1, 1)) return output_tensor
def recognise(self, frame, faces): resized_images = [] for face in faces: resized_image = np.ones((self.image_size, self.image_size, 1), dtype=np.uint8) * fill_color face = np.array(face) face[0] *= self.frame_w face[2] *= self.frame_w face[1] *= self.frame_h face[3] *= self.frame_h face_w = face[2] - face[0] face_h = face[3] - face[1] face[0] -= face_w * padding face[1] -= face_h * padding face[2] += face_w * padding face[3] += face_h * padding face_w = face[2] - face[0] face_h = face[3] - face[1] if face_w > face_h: face[1] -= (face_w - face_h) / 2 face[3] += (face_w - face_h) / 2 if face[1] < 0 and face[3] < self.frame_h: face[3] += -face[1] face[1] = 0 elif face[1] > 0 and face[3] > self.frame_h: face[1] -= (face[3] - self.frame_h) face[3] = self.frame_h else: face[0] -= (face_h - face_w) / 2 face[2] += (face_h - face_w) / 2 if face[0] < 0 and face[2] < self.frame_w: face[2] += -face[0] face[0] = 0 elif face[0] > 0 and face[2] > self.frame_w: face[0] -= (face[2] - self.frame_w) face[2] = self.frame_w face[0] /= self.frame_w face[2] /= self.frame_w face[1] /= self.frame_h face[3] /= self.frame_h face = np.minimum(np.maximum(face, 0), 1) crop_x1 = r(face[0] * self.frame_w) crop_y1 = r(face[1] * self.frame_h) crop_x2 = r(face[2] * self.frame_w) crop_y2 = r(face[3] * self.frame_h) crop_w = crop_x2 - crop_x1 crop_h = crop_y2 - crop_y1 if crop_w > crop_h: crop_h = r(crop_h / crop_w * self.image_size) crop_w = self.image_size else: crop_w = r(crop_w / crop_h * self.image_size) crop_h = self.image_size shift_x = r((self.image_size - crop_w) / 2) shift_y = r((self.image_size - crop_h) / 2) resized_image[shift_y:(shift_y + crop_h), shift_x:(shift_x + crop_w)] = np.dot(cv2.resize(frame[crop_y1:crop_y2, crop_x1:crop_x2], (crop_w, crop_h), interpolation=cv2.INTER_NEAREST), Y_coefs).reshape((crop_h, crop_w, 1)) resized_images.append(resized_image) resized_images = np.array(resized_images, dtype=np.uint8) if len(resized_images) > 0: predictions = self.model.predict(resized_images, batch_size=min(batch_size, len(resized_images)), verbose=0) predictions = softmax(predictions) else: predictions = [] return np.array(predictions)
def FaceMobileNet(input_tensor, alpha=1.0): output_tensor = input_tensor output_tensor = ZeroPadding2D()(output_tensor) output_tensor = Conv2D(filters=r(64 * alpha), kernel_size=(3, 3), strides=(2, 2), use_bias=False)(output_tensor) output_tensor = BatchNormalization( epsilon=batch_norm_eps, momentum=batch_norm_momentum)(output_tensor) output_tensor = ReLU(relu_threshold)(output_tensor) output_tensor = ZeroPadding2D()(output_tensor) output_tensor = DepthwiseConv2D(kernel_size=(3, 3), use_bias=False)(output_tensor) output_tensor = BatchNormalization( epsilon=batch_norm_eps, momentum=batch_norm_momentum)(output_tensor) output_tensor = ReLU(relu_threshold)(output_tensor) output_tensor = inverted_res_block(output_tensor, filters=64, alpha=alpha, stride=2, expansion=2) output_tensor = inverted_res_block(output_tensor, filters=64, alpha=alpha, stride=1, expansion=2) output_tensor = inverted_res_block(output_tensor, filters=64, alpha=alpha, stride=1, expansion=2) output_tensor = inverted_res_block(output_tensor, filters=64, alpha=alpha, stride=1, expansion=2) output_tensor = inverted_res_block(output_tensor, filters=64, alpha=alpha, stride=1, expansion=2) output_tensor = inverted_res_block(output_tensor, filters=128, alpha=alpha, stride=2, expansion=4) output_tensor = inverted_res_block(output_tensor, filters=128, alpha=alpha, stride=1, expansion=2) output_tensor = inverted_res_block(output_tensor, filters=128, alpha=alpha, stride=1, expansion=2) output_tensor = inverted_res_block(output_tensor, filters=128, alpha=alpha, stride=1, expansion=2) output_tensor = inverted_res_block(output_tensor, filters=128, alpha=alpha, stride=1, expansion=2) output_tensor = inverted_res_block(output_tensor, filters=128, alpha=alpha, stride=1, expansion=2) output_tensor = inverted_res_block(output_tensor, filters=128, alpha=alpha, stride=1, expansion=2) output_tensor = inverted_res_block(output_tensor, filters=128, alpha=alpha, stride=2, expansion=4) output_tensor = inverted_res_block(output_tensor, filters=128, alpha=alpha, stride=1, expansion=2) output_tensor = inverted_res_block(output_tensor, filters=128, alpha=alpha, stride=1, expansion=2) output_tensor = Conv2D(filters=r(512 * alpha), kernel_size=(1, 1), use_bias=False)(output_tensor) output_tensor = BatchNormalization( epsilon=batch_norm_eps, momentum=batch_norm_momentum)(output_tensor) output_tensor = ReLU(relu_threshold)(output_tensor) output_tensor = DepthwiseConv2D( kernel_size=(output_tensor.shape.as_list()[1], output_tensor.shape.as_list()[2]), use_bias=False)(output_tensor) output_tensor = BatchNormalization( epsilon=batch_norm_eps, momentum=batch_norm_momentum)(output_tensor) output_tensor = Conv2D(filters=r(128 * alpha), kernel_size=(1, 1), use_bias=False)(output_tensor) output_tensor = BatchNormalization( epsilon=batch_norm_eps, momentum=batch_norm_momentum)(output_tensor) return output_tensor
def detect(self, frame): original_frame_shape = frame.shape aspect_ratio = self.shots["aspect_ratio"] c = min(frame.shape[0], frame.shape[1] / aspect_ratio) slice_h_shift = r((frame.shape[0] - c) / 2) slice_w_shift = r((frame.shape[1] - c * aspect_ratio) / 2) if slice_w_shift != 0 and slice_h_shift == 0: frame = frame[:, slice_w_shift:-slice_w_shift] elif slice_w_shift == 0 and slice_h_shift != 0: frame = frame[slice_h_shift:-slice_h_shift, :] else: if slice_w_shift != 0 and slice_h_shift != 0: raise ErrorSignal(math_is_wrong_error) frames = [] for s in self.shots["shots"]: frames.append(cv2.resize(frame[r(s[1] * frame.shape[0]):r((s[1] + s[3]) * frame.shape[0]), r(s[0] * frame.shape[1]):r((s[0] + s[2]) * frame.shape[1])], (self.image_size, self.image_size), interpolation=cv2.INTER_NEAREST)) frames = np.array(frames) predictions = self.model.predict(frames, batch_size=min(len(frames), batch_size), verbose=0) boxes = [] prob = [] shots = self.shots['shots'] for i in range(len(shots)): slice_boxes = [] slice_prob = [] for j in range(predictions.shape[1]): for k in range(predictions.shape[2]): p = sigmoid(predictions[i][j][k][4]) if not(p is None) and p > self.prob_threshold: px = sigmoid(predictions[i][j][k][0]) py = sigmoid(predictions[i][j][k][1]) pw = min(math.exp(predictions[i][j][k][2] / self.grids), self.grids) ph = min(math.exp(predictions[i][j][k][3] / self.grids), self.grids) if not(px is None) and not(py is None) and not(pw is None) and not(ph is None) and pw > eps and ph > eps: cx = (px + j) / self.grids cy = (py + k) / self.grids wx = pw / self.grids wy = ph / self.grids if wx <= shots[i][4] and wy <= shots[i][4]: lx = min(max(cx - wx / 2, 0), 1) ly = min(max(cy - wy / 2, 0), 1) rx = min(max(cx + wx / 2, 0), 1) ry = min(max(cy + wy / 2, 0), 1) lx *= shots[i][2] ly *= shots[i][3] rx *= shots[i][2] ry *= shots[i][3] lx += shots[i][0] ly += shots[i][1] rx += shots[i][0] ry += shots[i][1] slice_boxes.append([lx, ly, rx, ry]) slice_prob.append(p) slice_boxes = np.array(slice_boxes) slice_prob = np.array(slice_prob) slice_boxes = non_max_suppression(slice_boxes, slice_prob, self.iou_threshold) for sb in slice_boxes: boxes.append(sb) boxes = np.array(boxes) boxes = union_suppression(boxes, self.union_threshold) for i in range(len(boxes)): boxes[i][0] /= original_frame_shape[1] / frame.shape[1] boxes[i][1] /= original_frame_shape[0] / frame.shape[0] boxes[i][2] /= original_frame_shape[1] / frame.shape[1] boxes[i][3] /= original_frame_shape[0] / frame.shape[0] boxes[i][0] += slice_w_shift / original_frame_shape[1] boxes[i][1] += slice_h_shift / original_frame_shape[0] boxes[i][2] += slice_w_shift / original_frame_shape[1] boxes[i][3] += slice_h_shift / original_frame_shape[0] return list(boxes)