def main(): directory = Path(os.path.expanduser('~')) / 'BigDatas/car' info_pathname = directory / 'positive_samples.txt' new_info_pathname = directory / 'scaled_positive_samples.txt' with info_pathname.open() as info_file, \ new_info_pathname.open('w') as new_info_file: for line in info_file: frame_filename_str, num_str, rects_str = (line.rstrip('\n').split( ' ', 2)) image_pathname = directory / frame_filename_str image = cv2.imread(str(image_pathname)) image_height, image_width, _ = image.shape rects_strs = rects_str.split() rects = [] for index in range(0, len(rects_strs), 4): x, y, width, height = map(int, rects_strs[index:index + 4]) new_width = int(width * 1.2) new_x = int(x - width * 0.1) new_height = int(height * 1.2) new_y = int(y - height * 0.1) if (new_x >= 0 and new_y >= 0 and new_width < image_width and new_height < image_height): rects.append(Rect(new_x, new_y, new_width, new_height)) else: rects.append(Rect(x, y, width, height)) new_rects_str = ' '.join( [' '.join(map(str, tuple(rect))) for rect in rects]) new_info_file.write('{} {} {}\n'.format(frame_filename_str, num_str, new_rects_str))
def __init__(self, tile_width, tile_height, cols, rows): self.tile_width = tile_width self.tile_height = tile_height self.cols = cols self.rows = rows self.layers = [] self.object_layers = [] self.scanlines = [TilemapScanline() for row in range(rows)] self.sprite_layer_index = 6 self.tiles = [] y = 0 for row in range(rows): x = 0 for col in range(cols): self.tiles.append( Tile(col, row, Rect(x, y, x + tile_width, y + tile_height))) x += tile_width y += tile_height # default tile self.tiles.append( Tile(-1, -1, Rect(0, 0, 0, 0), walkable=False, accept_items=False)) self.tiles[-1].can_target = False
def main(): parser = argparse.ArgumentParser( description='读取 cascade_model, 并直接调用 detectMultiScale 做全图多目标识别') parser.add_argument('-v', action='store', dest='video_pathname_str') parser.add_argument('-c', action='store', dest='cascade_pathname_str') parser.add_argument('--noshow', action='store_true', default=False) special_help = '无用参数,仅为兼容 makefile/phodopus 现有的 parameter.' parser.add_argument('--proto', action='store', help=special_help) parser.add_argument('--model', action='store', help=special_help) parser.add_argument('--mean', action='store', help=special_help) parser.add_argument('--lf_proto', action='store', help=special_help) parser.add_argument('--lf_model', action='store', help=special_help) parser.add_argument('--lf_mean', action='store', help=special_help) # 以上六行的参数目前对脚本没用,仅仅为了兼容 makefile/phodopus. args = parser.parse_args(sys.argv[1:]) video = video_generator(Path(args.video_pathname_str)) cascade = cv2.CascadeClassifier(args.cascade_pathname_str) for frame_filename_str, frame in video: rects = cascade.detectMultiScale(frame, 1.2, 3, 0, (20, 20)) new_rects = [] for x, y, width, height in rects: if not args.noshow: frame = cv2.rectangle(frame, (x, y), (x + width, y + height), (0, 255, 0)) new_rect = Rect(1 * x, 1 * y, 1 * width, 1 * height) new_rects.append(new_rect) rects_str = ' '.join([','.join(map(str, rect)) for rect in new_rects]) sys.stdout.write('{} {}\n'.format(frame_filename_str, rects_str)) if not args.noshow: cv2.imshow('fight!', frame) cv2.waitKey(0)
class KaiRecognizer: _kai_img = None _kai_mask_img = None _roi = Rect(5, 90, 40, 24) _method = cv2.TM_SQDIFF_NORMED _threshold = 0.05 def __init__(self): self._kai_img = cv2.imread('kai.png', cv2.IMREAD_COLOR) self._kai_mask_img = cv2.imread('kai_mask.png', cv2.IMREAD_COLOR) def recognize(self, input_img): # Consider ROI only input_roi_img = input_img[ self._roi.y : self._roi.y + self._roi.h, self._roi.x : self._roi.x + self._roi.w, :] # For each of R, G, B kai_h, kai_w, _ = self._kai_img.shape scores = np.zeros((self._roi.h - kai_h + 1, self._roi.w - kai_w + 1)) for channel in range(3): result = cv2.matchTemplate( input_roi_img[:, :, channel], self._kai_img[:,:,channel], self._method, mask = self._kai_mask_img[:,:,channel]) scores = np.add(scores, np.square(result)) # Filter by the threshold result = np.any(scores.flatten() <= self._threshold) return result
def _file2dict(pathname): dict_ = defaultdict(list) if pathname is None: return dict_ else: with pathname.open() as file_: for line in file_: frame_filename_str, rects_strs = line.rstrip('\n').split( ' ', 1) rects_str = rects_strs.split() dict_[frame_filename_str] = [ Rect(rect_str) for rect_str in rects_str ] return dict_
def detect(video_pathname, cascade_pathname, scale_down_ratio, log_file): video = video_generator(video_pathname) cascade = cv2.CascadeClassifier(str(cascade_pathname)) for frame_filename_str, frame in video: frame = cv2.resize(frame, (20, 20)) rects = cascade.detectMultiScale(frame, 1.2, 3, 0, (20, 20)) new_rects = [] for x, y, width, height in rects: new_rect = Rect(scale_down_ratio * x, scale_down_ratio * y, scale_down_ratio * width, scale_down_ratio * height) new_rects.append(new_rect) rects_str = ' '.join([','.join(map(str, rect)) for rect in new_rects]) log_file.write('{} {}\n'.format(frame_filename_str, rects_str))
def parse(log_pathname, label_pathname, tp_pathname=None, fp_pathname=None, fn_pathname=None, overlap_rate=kitti_rate): log_dict = defaultdict(list) tp_count, fp_count, fn_count = 0, 0, 0 log_file = log_pathname.open() label_file = label_pathname.open() tp_file = _unwrap_or_tempfile(tp_pathname, 'w') fp_file = _unwrap_or_tempfile(fp_pathname, 'w') fn_file = _unwrap_or_tempfile(fn_pathname, 'w') try: for line in log_file: frame_filename_str, rects_strs = line.rstrip('\n').split(' ', 1) log_rects = [Rect(rect_str) for rect_str in rects_strs.split()] log_dict[frame_filename_str] = log_rects for line in label_file: frame_filename_str, rects_strs = line.rstrip('\n').split(' ', 1) label_rects = [Rect(rect_str) for rect_str in rects_strs.split()] tp, fp, fn = _compare(log_dict[frame_filename_str], label_rects, overlap_rate) _log_pos_samples(tp_file, frame_filename_str, tp) _log_pos_samples(fp_file, frame_filename_str, fp) _log_pos_samples(fn_file, frame_filename_str, fn) tp_count += len(tp) fp_count += len(fp) fn_count += len(fn) finally: log_file.close() label_file.close() tp_file.close() fp_file.close() fn_file.close() return tp_count, fp_count, fn_count
def update(self, image): """ Detects people and other object in the frame """ # remove previous detections self.people = [] self.other_objects = [] frame = image.copy() # normalize the image for the network input blob = cv.dnn.blobFromImage(frame, inScaleFactor, (inWidth, inHeight), (meanVal, meanVal, meanVal), False, False) # run the frame though the network self.net.setInput(blob) detections = self.net.forward() # rows and columns of the image cols = frame.shape[1] rows = frame.shape[0] # get detections with confidence higher than threshold for i in range(detections.shape[2]): # confidence for this detection confidence = detections[0, 0, i, 2] # type of detected object class_id = int(detections[0, 0, i, 1]) if confidence < self.confidence: continue if class_id >= len(classNames): # unknown object continue # bounding box coordinates xLeftBottom = int(detections[0, 0, i, 3] * cols) yLeftBottom = int(detections[0, 0, i, 4] * rows) xRightTop = int(detections[0, 0, i, 5] * cols) yRightTop = int(detections[0, 0, i, 6] * rows) # create rectangle for bounding box roi = Rect((xLeftBottom, yLeftBottom), (xRightTop, yRightTop)) # save people and other object separately if classNames[class_id] == 'person': # save roi self.people.append(roi) else: # save just class name for other objects self.other_objects.append(classNames[class_id])
def _find_neg_rect(image, pos_rects): image_height, image_width, _ = image.shape if image_width < min_width or image_height < min_height: return None loop_times = 15 for loop_time in range(loop_times): width = random.randint(min_width, image_width) x = random.randint(0, image_width - width) height = random.randint(min_height, image_height) y = random.randint(0, image_height - height) random_rect = Rect(x, y, width, height) duplicate_rects = [ rect for rect in pos_rects if is_same_target(random_rect, rect, overlap_rate) ] if len(duplicate_rects) == 0: return random_rect return None
def generate_background(cascade_pathname, scale_down_ratio): background_pathname = directory / 'background.txt' for image_pathname in background_directory.iterdir(): image_pathname.unlink() with background_pathname.open('w') as background_file: for pathname in trainset_directory.iterdir(): if pathname.suffix == '' and Path(pathname / '0.jpg').exists(): logging.debug('detect {}'.format(pathname)) log_pathname = pathname.with_suffix('.log') label_pathname = pathname.with_suffix('.txt') fp_pathname = pathname.with_suffix('.fp') with log_pathname.open('w') as log_file: detect(pathname, cascade_pathname, scale_down_ratio, log_file) phodopus.parse(log_pathname, label_pathname, fp_pathname=fp_pathname) with fp_pathname.open() as fp_file: for line in fp_file: image_filename_str, rects_str = line.split(' ', 1) for index, rect_str in enumerate(rects_str.split()): rect = Rect(rect_str) image = cv2.imread( str(pathname / image_filename_str)) image = image[rect.y:rect.y + rect.height, rect.x:rect.x + rect.width] width = int(image.shape[1] / scale_down_ratio) height = int(image.shape[0] / scale_down_ratio) image = cv2.resize(image, (width, height)) new_image_filename_str = '{}-{}-{}.jpg'.format( pathname.name, image_filename_str.split('.')[0], index) new_image_pathname = (background_directory / new_image_filename_str) cv2.imwrite(str(new_image_pathname), image) background_file.write('background/{}\n'.format( new_image_filename_str)) shuf_command = [ 'shuf', str(background_pathname), '-o', str(background_pathname) ] shuf_command = map(str, shuf_command) subprocess.call(shuf_command)
def _handle_video(label_pathname, sample_pathname, caffe_dataset_directory, caffe_label_file): label_dict = {} for line in label_pathname.open(): frame_filename_str, rects_str = line.rstrip('\n').split(' ', 1) frame_filename_str = frame_filename_str.split('.')[0] rects = [Rect(rect_str) for rect_str in rects_str.split(' ')] label_dict[frame_filename_str] = rects video = video_generator(sample_pathname) for frame_filename_str, frame in video: frame_filename_str = frame_filename_str.split('.')[0] if frame_filename_str in label_dict: rects = label_dict[frame_filename_str] for rect_index, rect in enumerate(rects): image_filename = '{}-{}-{}.jpg'.format(sample_pathname.stem, frame_filename_str, rect_index) x, y, width, height = tuple(rect) image = frame[y:y + height, x:x + width] cv2.imwrite( '{}/{}'.format(caffe_dataset_directory, image_filename), image) caffe_label_file.write('{} {}\n'.format( image_filename, pos_label)) logging.debug('handle image: {}'.format(image_filename)) neg_rect = _find_neg_rect(frame, rects) if neg_rect is not None: x, y, width, height = tuple(neg_rect) neg_image = frame[y:y + height, x:x + width] neg_image_filename = '{}-{}-{}.jpg'.format( sample_pathname.stem, frame_filename_str, len(rects)) cv2.imwrite( '{}/{}'.format(caffe_dataset_directory, neg_image_filename), neg_image) caffe_label_file.write('{} {}\n'.format( neg_image_filename, neg_label)) logging.debug( 'handle neg_image: {}'.format(neg_image_filename))
class RarityRecognizer: _star_img = None _star_mask_img = None _roi = Rect(30, 83, 90, 16) _method = cv2.TM_SQDIFF_NORMED _threshold = 0.05 _min_interval = 6 _precise_star_xs = [[41], [36, 46], [30, 41, 51], [25, 36, 46, 57], [20, 30, 41, 51, 61], [15, 25, 36, 46, 57, 67], [10, 20, 30, 41, 51, 61, 71], [7, 17, 26, 36, 45, 55, 64, 74]] _precise_star_y = 6 def __init__(self): self._star_img = cv2.imread('star.png', cv2.IMREAD_COLOR) self._star_mask_img = cv2.imread('star_mask.png', cv2.IMREAD_COLOR) def recognize(self, input_img): # Consider ROI only input_roi_img = input_img[self._roi.y:self._roi.y + self._roi.h, self._roi.x:self._roi.x + self._roi.w, :] # For each of R, G, B star_h, star_w, _ = self._star_img.shape scores = np.zeros((self._roi.h - star_h + 1, self._roi.w - star_w + 1)) for channel in range(3): result = cv2.matchTemplate(input_roi_img[:, :, channel], self._star_img[:, :, channel], self._method, mask=self._star_mask_img[:, :, channel]) scores = np.add(scores, np.square(result)) # Filter by the threshold, take x-axis only xys = np.where(scores <= self._threshold) ys = xys[0] xs = xys[1] # Prevent count a star twice xs.sort() last_x = -999999 star_groups = [] for x in xs: if x - last_x >= self._min_interval: last_x = x star_groups.append([]) star_groups[-1].append(x) if len(star_groups) == 0 or len(star_groups) > 8: return (0, Point()) best_offset = Point() best_score = 0 for candidate in star_groups[0]: dx = candidate - self._precise_star_xs[len(star_groups) - 1][0] score = 0 for g in range(len(star_groups)): for x in star_groups[g]: if x - self._precise_star_xs[len(star_groups) - 1][g] == dx: score += 1 if score > best_score: best_score = score best_offset.x = dx ys = ys.tolist() best_offset.y = max(set(ys), key=ys.count) - self._precise_star_y return (len(star_groups), best_offset)
def get_bounds(self): return Rect(0, 0, self.cols * self.tile_width, self.rows * self.tile_height)
def get_tile_rect(self, x, y): tx = floor(x / self.tile_width) ty = floor(y / self.tile_height) x = tx * self.tile_width y = ty * self.tile_height return Rect(x, y, x + self.tile_width, y + self.tile_height)