def predict(inputImg): pixel_threshold = 0.7 #cfg.pixel_threshold # img = image.load_img(img_path) d_wight, d_height = resize_image(inputImg, cfg.max_predict_img_size) img = inputImg.resize((d_wight, d_height), Image.NEAREST).convert('RGB') img = image.img_to_array(img) img = preprocess_input(img, mode='tf') x = np.expand_dims(img, axis=0) y = east_detect.predict(x) y = np.squeeze(y, axis=0) y[:, :, :3] = sigmoid(y[:, :, :3]) cond = np.greater_equal(y[:, :, 0], pixel_threshold) activation_pixels = np.where(cond) quad_scores, quad_after_nms = nms(y, activation_pixels) if True: im = inputImg.copy() pixel_size = 4 im_array = image.img_to_array(im.convert('RGB')) d_wight, d_height = resize_image(im, cfg.max_predict_img_size) scale_ratio_w = d_wight / im.width scale_ratio_h = d_height / im.height im = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB') quad_im = im.copy() draw = ImageDraw.Draw(im) for i, j in zip(activation_pixels[0], activation_pixels[1]): px = (j + 0.5) * pixel_size py = (i + 0.5) * pixel_size line_width, line_color = 1, 'red' if y[i, j, 1] >= cfg.side_vertex_pixel_threshold: if y[i, j, 2] < cfg.trunc_threshold: line_width, line_color = 2, 'yellow' elif y[i, j, 2] >= 1 - cfg.trunc_threshold: line_width, line_color = 2, 'green' draw.line([(px - 0.5 * pixel_size, py - 0.5 * pixel_size), (px + 0.5 * pixel_size, py - 0.5 * pixel_size), (px + 0.5 * pixel_size, py + 0.5 * pixel_size), (px - 0.5 * pixel_size, py + 0.5 * pixel_size), (px - 0.5 * pixel_size, py - 0.5 * pixel_size)], width=line_width, fill=line_color) quad_draw = ImageDraw.Draw(quad_im) quads = [] for score, geo, s in zip(quad_scores, quad_after_nms, range(len(quad_scores))): if np.amin(score) > 0: quad_draw.line([ tuple(geo[0]), tuple(geo[1]), tuple(geo[2]), tuple(geo[3]), tuple(geo[0]) ], width=3, fill='blue') rescaled_geo = geo / [scale_ratio_w, scale_ratio_h] rescaled_geo_list = np.reshape(rescaled_geo, (8, )).tolist() quads.append(rescaled_geo_list) return im, quad_im, quads, y
def predict_txt(east_detect, img_path, txt_path, pixel_threshold, quiet=False): img = image.load_img(img_path) d_wight, d_height = resize_image(img, cfg.max_predict_img_size) scale_ratio_w = d_wight / img.width scale_ratio_h = d_height / img.height img = img.resize((d_wight, d_height), Image.NEAREST).convert('RGB') img = image.img_to_array(img) img = imagenet_utils.preprocess_input(img, mode='tf') x = np.expand_dims(img, axis=0) y = east_detect.predict(x) y = np.squeeze(y, axis=0) y[:, :, :3] = sigmoid(y[:, :, :3]) cond = np.greater_equal(y[:, :, 0], pixel_threshold) activation_pixels = np.where(cond) quad_scores, quad_after_nms = nms(y, activation_pixels) txt_items = [] for score, geo in zip(quad_scores, quad_after_nms): if np.amin(score) > 0: rescaled_geo = geo / [scale_ratio_w, scale_ratio_h] rescaled_geo_list = np.reshape(rescaled_geo, (8,)).tolist() txt_item = ','.join(map(str, rescaled_geo_list)) txt_items.append(txt_item + '\n') elif not quiet: print('quad invalid with vertex num less then 4.') if cfg.predict_write2txt and len(txt_items) > 0: with open(txt_path, 'w') as f_txt: f_txt.writelines(txt_items)
def predict_txt(east_detect, img_path, txt_path, pixel_threshold, quiet=False): img = Image.open(img_path) # 为PIL图像对象,默认RGB d_wight, d_height = resize_image(img, cfg.max_predict_img_size) scale_ratio_w = d_wight / img.width scale_ratio_h = d_height / img.height transform = transforms.Compose([ transforms.Resize((d_wight, d_height), interpolation=2), transforms.ToTensor() ]) x = transform(img) x = torch.unsqueeze(x, 0) # 增加一个维度 y = east_detect(x) y = torch.squeeze(y, 0) # 减少一个维度 print(y.shape) y = y.detach().numpy() # 7*64*64 if y.shape[0] == 7: y = y.transpose((1, 2, 0)) # CHW->HWC y[:, :, :3] = sigmoid(y[:, :, :3]) cond = np.greater_equal(y[:, :, 0], pixel_threshold) activation_pixels = np.where(cond) quad_scores, quad_after_nms = nms(y, activation_pixels) txt_items = [] for score, geo in zip(quad_scores, quad_after_nms): if np.amin(score) > 0: rescaled_geo = geo / [scale_ratio_w, scale_ratio_h] rescaled_geo_list = np.reshape(rescaled_geo, (8, )).tolist() txt_item = ','.join(map(str, rescaled_geo_list)) txt_items.append(txt_item + '\n') elif not quiet: print('quad invalid with vertex num less then 4.') if cfg.predict_write2txt and len(txt_items) > 0: with open(txt_path, 'w') as f_txt: f_txt.writelines(txt_items)
def test(model_name, plot_file=None, test_directory=DEFAULT_TEST_DIRECTORY, model_directory=DEFAULT_MODEL_DIRECTORY): model = deepconvnet(model_name, LR, (IMG_SIZE, IMG_SIZE), model_directory=model_directory) X_orig, y_test = load_images_with_labels(test_directory) X = preprocess(X_orig, [conv_gray_scale, lambda x: resize_image(x, (IMG_SIZE, IMG_SIZE))]) X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1) start = time.time() predictions = model.predict(X) print("\nTime taken to predict outcomes: {} secs \n".format(time.time() - start)) if plot_file: plot_predictions(X_orig, predictions, plot_file)
def capture(): name = "./play/" + "1" + ".png" monitor = {"top": 272, "left": 570, "width": 800, "height": 600} output = name.format(**monitor) sct_img = sct.grab(monitor) mss.tools.to_png(sct_img.rgb, sct_img.size, output=output) img = imread(name) vec = resize_image(img) vec = np.expand_dims(vec, axis=0) os.remove(name) return vec
def train(model_name, train_directory=DEFAULT_TRAIN_DIRECTORY, epoch=DEFAULT_EPOCH, learning_rate=LR, model_save_path=DEFAULT_MODEL_DIRECTORY): model = deepconvnet(model_name, learning_rate, (IMG_SIZE, IMG_SIZE)) X, y = load_images_with_labels(train_directory) X = preprocess(X, [conv_gray_scale, lambda x: resize_image(x, (IMG_SIZE, IMG_SIZE))]) X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1) X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.2, random_state=42) model.fit( {'input': X_train}, {'targets': y_train}, validation_set=({"input": X_validate}, {"targets": y_validate}), n_epoch=epoch, snapshot_step=500, show_metric=True, run_id=model_name ) model.save(os.path.join(model_save_path, model_name))
def path_to_tensor(self, image_path): # convert an image to tensor preped_img = None if self.face_crop == True: preped_img = preprocess_image(image_path, None, crop_dim=224) if preped_img is None: return [] else: # use the whole picture and first resize the image reserving its width/heigh ratio and ## if one side of short of 224 padding preped_img = resize_image(image_path, None, size=224, random_padding_border_color=True) logger.info("Processed image {} shape {}".format( image_path, preped_img.shape)) return np.expand_dims(np.array(preped_img, dtype='float'), axis=0)
def predict(east_detect, img_path, pixel_threshold, quiet=False): img = image.load_img(img_path) d_wight, d_height = resize_image(img, cfg.max_predict_img_size) img = img.resize((d_wight, d_height), Image.NEAREST).convert('RGB') img = image.img_to_array(img) # 将张量的值 调到【-1 1】 img = imagenet_utils.preprocess_input(img,mode='tf') # 变成4维张量 x = np.expand_dims(img, axis=0) y = east_detect.predict(x) y = np.squeeze(y, axis=0) y[:, :, :3] = sigmoid(y[:, :, :3]) cond = np.greater_equal(y[:, :, 0], pixel_threshold) activation_pixels = np.where(cond) quad_scores, quad_after_nms = nms(y, activation_pixels) with Image.open(img_path) as im: im_array = image.img_to_array(im.convert('RGB')) d_wight, d_height = resize_image(im, cfg.max_predict_img_size) scale_ratio_w = d_wight / im.width scale_ratio_h = d_height / im.height im = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB') quad_im = im.copy() draw = ImageDraw.Draw(im) for i, j in zip(activation_pixels[0], activation_pixels[1]): px = (j + 0.5) * cfg.pixel_size py = (i + 0.5) * cfg.pixel_size line_width, line_color = 1, 'red' if y[i, j, 1] >= cfg.side_vertex_pixel_threshold: if y[i, j, 2] < cfg.trunc_threshold: line_width, line_color = 2, 'yellow' elif y[i, j, 2] >= 1 - cfg.trunc_threshold: line_width, line_color = 2, 'green' draw.line([(px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size), (px + 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size), (px + 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size), (px - 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size), (px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size)], width=line_width, fill=line_color) im.save(img_path + '_act.jpg') quad_draw = ImageDraw.Draw(quad_im) txt_items = [] for score, geo, s in zip(quad_scores, quad_after_nms, range(len(quad_scores))): if np.amin(score) > 0: quad_draw.line([tuple(geo[0]), tuple(geo[1]), tuple(geo[2]), tuple(geo[3]), tuple(geo[0])], width=2, fill='red') if cfg.predict_cut_text_line: cut_text_line(geo, scale_ratio_w, scale_ratio_h, im_array, img_path, s) rescaled_geo = geo / [scale_ratio_w, scale_ratio_h] rescaled_geo_list = np.reshape(rescaled_geo, (8,)).tolist() txt_item = ','.join(map(str, rescaled_geo_list)) txt_items.append(txt_item + '\n') elif not quiet: print('quad invalid with vertex num less then 4.') quad_im.save(img_path + '_predict.jpg') if cfg.predict_write2txt and len(txt_items) > 0: with open(img_path[:-4] + '.txt', 'w') as f_txt: f_txt.writelines(txt_items)
def predict_quad(model, img, pixel_threshold=cfg.pixel_threshold, quiet=False, img_name=None): """ Args: model: 检测模型,要load_weights的 img: image 图片,文件类型 pixel_threshold: 阈值 quiet: img_name: 图片的名字 Returns: text_recs_all:一个列表,每个元素是检测边界的quad值 text_recs_len:text_recs_all的长度,一共检测到多少个区域 img_all: 一个四维数组,img_all[0] 是img_to_array的结果 """ if not os.path.exists(root_temp): os.makedirs(root_temp) if not os.path.exists(root_predict): os.makedirs(root_predict) # 获取计算后的图像长宽 d_wight, d_height = resize_image(img, cfg.max_predict_img_size) # 调整图像大小,便于预测 img = img.resize((d_wight, d_height), Image.BILINEAR).convert('RGB') img = image.img_to_array(img) num_img = 1 # 一个4维张量,也就是只有1个3维张量的4维张量 img_all = np.zeros((num_img, d_height, d_wight, 3)) img_all[0] = img # 将张量的数值大小调到【-1 1】 img_ori = imagenet_utils.preprocess_input(img, mode='tf') # suit tf tensor # 又整个一样的 x = np.zeros((num_img, d_height, d_wight, 3)) x[0] = img_ori # (sample, h, w, channels) y_pred = model.predict(x) text_recs_all = [] text_recs_len = [] for n in range(num_img): # (sample, rows, cols, 7_points_pred) y = y_pred[n] y[:, :, :3] = sigmoid(y[:, :, :3]) cond = np.greater_equal(y[:, :, 0], pixel_threshold) activation_pixels = np.where(cond) # fixme 返回元祖tuple类型 a[0]保存了纵坐标 a[1]保存横坐标 quad_scores, quad_after_nms = nms(y, activation_pixels) text_recs = [] x[n] = np.uint8(x[n]) with image.array_to_img(img_all[n]) as im: # Image.fromarray(x[n]) error ? im_array = x[n] # fixme 注意:拿去CRNN识别的是缩放后的图像 scale_ratio_w = 1 scale_ratio_h = 1 quad_im = im.copy() draw = ImageDraw.Draw(im) # 拷贝一个原图像,在拷贝的图像上有文字的地方画线 for i, j in zip(activation_pixels[0], activation_pixels[1]): px = (j + 0.5) * cfg.pixel_size py = (i + 0.5) * cfg.pixel_size line_width, line_color = 1, 'blue' if y[i, j, 1] >= cfg.side_vertex_pixel_threshold: if y[i, j, 2] < cfg.trunc_threshold: line_width, line_color = 2, 'yellow' elif y[i, j, 2] >= 1 - cfg.trunc_threshold: line_width, line_color = 2, 'green' draw.line([(px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size), (px + 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size), (px + 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size), (px - 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size), (px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size)], width=line_width, fill=line_color) if not img_name is None: im.save(root_temp + img_name + '.jpg') quad_draw = ImageDraw.Draw(quad_im) for score, geo, s in zip(quad_scores, quad_after_nms, range(len(quad_scores))): if np.amin(score) > 0: quad_draw.line([tuple(geo[0]), tuple(geo[1]), tuple(geo[2]), tuple(geo[3]), tuple(geo[0])], width=2, fill='blue') if cfg.predict_cut_text_line: cut_text_line(geo, scale_ratio_w, scale_ratio_h, im_array, img_name, s) rescaled_geo = geo / [scale_ratio_w, scale_ratio_h] text_rec = np.reshape(rescaled_geo, (8,)).tolist() text_recs.append(text_rec) elif not quiet: print('quad invalid with vertex num less then 4.') if not img_name is None: quad_im.save(root_predict + img_name + '.jpg' ) for t in range(len(text_recs)): text_recs_all.append(text_recs[t]) text_recs_len.append(len(text_recs)) return text_recs_all, text_recs_len, img_all
def predict(east_detect, img_path, pixel_threshold, quiet=False): """ 预测图片 :param east_detect: :param img_path: :param pixel_threshold: :param quiet: :return: """ img = image.load_img(img_path) d_wight, d_height = resize_image(img, cfg.max_predict_img_size) img = img.resize((d_wight, d_height), Image.NEAREST).convert('RGB') img = image.img_to_array(img) img = preprocess_input(img, mode='tf') x = np.expand_dims(img, axis=0) y = east_detect.predict(x) # 取消第一维 y = np.squeeze(y, axis=0) # 每个向量的前三位通过sigmoid转化为0-1之间的值 y[:, :, :3] = sigmoid(y[:, :, :3]) # 判断一下y矩阵里面的第一个元素是否为文本,设置了一个阈值 cond = np.greater_equal(y[:, :, 0], pixel_threshold) # 查找符合条件的像素点坐标 activation_pixels = np.where(cond) # 非极大值抑制 quad_scores, quad_after_nms = nms(y, activation_pixels) with Image.open(img_path) as im: # 重新读取图片 im_array = image.img_to_array(im.convert('RGB')) # 缩放尺寸 d_wight, d_height = resize_image(im, cfg.max_predict_img_size) scale_ratio_w = d_wight / im.width scale_ratio_h = d_height / im.height im = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB') # 新建一个图片绘图 quad_im = im.copy() # 绘制像素点图形 draw = ImageDraw.Draw(im) # 绘制每个识别出的像素点 for i, j in zip(activation_pixels[0], activation_pixels[1]): px = (j + 0.5) * cfg.pixel_size py = (i + 0.5) * cfg.pixel_size line_width, line_color = 1, 'red' # 如果大于设置的pixel阈值,那么就认为是头或尾 if y[i, j, 1] >= cfg.side_vertex_pixel_threshold: # 如果小于 分类的阈值,则认为是头,否则是尾 if y[i, j, 2] < cfg.trunc_threshold: line_width, line_color = 2, 'yellow' elif y[i, j, 2] >= 1 - cfg.trunc_threshold: line_width, line_color = 2, 'green' draw.line([(px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size), (px + 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size), (px + 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size), (px - 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size), (px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size)], width=line_width, fill=line_color) im.save(img_path + '_act.jpg') # 绘制边框 quad_draw = ImageDraw.Draw(quad_im) txt_items = [] for score, geo, s in zip(quad_scores, quad_after_nms, range(len(quad_scores))): if np.amin(score) > 0: # 绘制矩形框 quad_draw.line([ tuple(geo[0]), tuple(geo[1]), tuple(geo[2]), tuple(geo[3]), tuple(geo[0]) ], width=2, fill='red') # 是否切割每个文本行的图片 if cfg.predict_cut_text_line: cut_text_line(geo, scale_ratio_w, scale_ratio_h, im_array, img_path, s) # 坐标针对w和h进行缩放 rescaled_geo = geo / [scale_ratio_w, scale_ratio_h] # 坐标转换为原始图像的坐标 rescaled_geo_list = np.reshape(rescaled_geo, (8, )).tolist() txt_item = ','.join(map(str, rescaled_geo_list)) txt_items.append(txt_item + '\n') elif not quiet: print('quad invalid with vertex num less then 4.') quad_im.save(img_path + '_predict.jpg') # 输出坐标信息 if cfg.predict_write2txt and len(txt_items) > 0: with open(img_path[:-4] + '.txt', 'w') as f_txt: f_txt.writelines(txt_items)
def predict(east_detect, img_path, pixel_threshold, quiet=False): img = image.load_img(img_path) d_wight, d_height = resize_image(img, cfg.image_size) img = img.resize((d_wight, d_height), Image.NEAREST).convert('RGB') img = image.img_to_array(img) img = preprocess_input(img, mode='tf') x = np.expand_dims(img, axis=0) y = east_detect.predict(x) y = np.squeeze(y, axis=0) y[:, :, :3] = sigmoid(y[:, :, :3]) cond = np.greater_equal(y[:, :, 0], pixel_threshold) activation_pixels = np.where(cond) quad_scores, quad_after_nms = nms(y, activation_pixels) with Image.open(img_path) as im: im_array = image.img_to_array(im.convert('RGB')) d_wight, d_height = resize_image(im, cfg.image_size) scale_ratio_w = d_wight / im.width scale_ratio_h = d_height / im.height im = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB') quad_im = im.copy() quad_draw = ImageDraw.Draw(quad_im) txt_items = [] flag = False for score, geo, s in zip(quad_scores, quad_after_nms, range(len(quad_scores))): if np.amin(score) > 0: flag = True quad_draw.line([ tuple(geo[0]), tuple(geo[1]), tuple(geo[2]), tuple(geo[3]), tuple(geo[0]) ], width=2, fill='blue') rescaled_geo = geo / [scale_ratio_w, scale_ratio_h] rescaled_geo_list = np.reshape(rescaled_geo, (8, )).tolist() txt_item = ','.join(map(str, rescaled_geo_list)) txt_items.append(txt_item + '\n') if cfg.detection_box_crop: img_crop = crop_rectangle(im_array, rescaled_geo) cv2.imwrite( os.path.join( 'output_crop', img_path.split('/')[-1].split('.')[0] + '.jpg'), img_crop) elif not quiet: print('quad invalid with vertex num less then 4.') if flag: quad_im.save( os.path.join( 'output', img_path.split('/')[-1].split('.')[0] + '_predict.jpg')) if cfg.predict_write2txt and len(txt_items) > 0: with open( os.path.join( "output_txt", img_path.split('/')[-1].split('.')[0] + '.txt'), 'w') as f_txt: f_txt.writelines(txt_items)
def predict(east_detect, img_path, text_pixel_threshold=cfg.text_pixel_threshold, text_side_threshold=cfg.text_side_vertex_pixel_threshold, text_trunc_threshold=cfg.text_trunc_threshold, action_pixel_threshold=cfg.action_pixel_threshold, action_side_vertex_pixel_threshold=cfg. action_side_vertex_pixel_threshold, arrow_trunc_threshold=cfg.arrow_trunc_threshold, nock_trunc_threshold=cfg.nock_trunc_threshold, quiet=False): img = image.load_img(img_path) d_wight, d_height = resize_image(img, cfg.max_predict_img_size) img = img.resize((d_wight, d_height), Image.NEAREST).convert('RGB') img = image.img_to_array(img) img = preprocess_input(img, mode='tf') x = np.expand_dims(img, axis=0) y = east_detect.predict(x) y = np.squeeze(y, axis=0) y[:, :, :1] = sigmoid(y[:, :, :1]) y[:, :, 1:4] = softmax(y[:, :, 1:4]) y[:, :, 4:6] = sigmoid(y[:, :, 4:6]) #y[:, :, :5] = sigmoid(y[:, :, :5]) txt_items = [] with Image.open(img_path) as im: im_array = image.img_to_array(im.convert('RGB')) d_wight, d_height = resize_image(im, cfg.max_predict_img_size) scale_ratio_w = d_wight / im.width scale_ratio_h = d_height / im.height im = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB') quad_im = im.copy() draw = ImageDraw.Draw(im) quad_draw = ImageDraw.Draw(quad_im) for idx in range(3, 0, -1): if idx == 1: cond_act = np.greater_equal(y[:, :, 0], text_pixel_threshold) cond_cls1 = y[:, :, 1] > y[:, :, 2] cond_cls2 = y[:, :, 1] > y[:, :, 3] elif idx == 2: cond_act = np.greater_equal(y[:, :, 0], action_pixel_threshold) cond_cls1 = y[:, :, 2] > y[:, :, 1] cond_cls2 = y[:, :, 2] > y[:, :, 3] elif idx == 3: cond_act = np.greater_equal(y[:, :, 0], action_pixel_threshold) cond_cls1 = y[:, :, 3] > y[:, :, 1] cond_cls2 = y[:, :, 3] > y[:, :, 2] activation_pixels = np.where( np.logical_and(cond_act, cond_cls1, cond_cls2)) quad_scores, quad_after_nms = nms( y, activation_pixels, idx, text_side_threshold, text_trunc_threshold, action_side_vertex_pixel_threshold, nock_trunc_threshold, arrow_trunc_threshold) for i, j in zip(activation_pixels[0], activation_pixels[1]): px = (j + 0.5) * cfg.pixel_size py = (i + 0.5) * cfg.pixel_size line_width, line_color = 1, 'red' if idx == 1 and y[i, j, 4] >= text_side_threshold and y[ i, j, 5] < text_trunc_threshold: line_width, line_color = 2, 'orange' elif idx == 1 and y[i, j, 4] >= text_side_threshold and y[ i, j, 5] >= 1 - text_trunc_threshold: line_width, line_color = 2, 'blue' elif idx == 2 and y[i, j, 4] >= \ action_side_vertex_pixel_threshold and y[i, j, 5] >= 1 - \ nock_trunc_threshold: line_width, line_color = 2, 'yellow' elif idx == 3 and y[i, j, 4] >= action_side_vertex_pixel_threshold and \ y[i, j, 5] >= arrow_trunc_threshold: line_width, line_color = 2, 'purple' draw.line( [(px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size), (px + 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size), (px + 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size), (px - 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size), (px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size)], width=line_width, fill=line_color) for score, geo, s in zip(quad_scores, quad_after_nms, range(len(quad_scores))): # form a box for current object if np.amin(score) > 0: if idx == 1: convert_bounding_box(geo) rescaled_geo = geo / [scale_ratio_w, scale_ratio_h] #text quad_draw.line([ tuple(geo[0]), tuple(geo[1]), tuple(geo[2]), tuple(geo[3]), tuple(geo[0]) ], width=3, fill='red') # form bounding box rescaled_geo_list = np.reshape( rescaled_geo.astype(np.int32), (8, )).tolist() # normalize rescaled_geo_list # for list_idx in range(len(rescaled_geo_list)): # if rescaled_geo_list[list_idx] < 0: # rescaled_geo_list[list_idx] = 0 # elif list_idx % 2 != 0 and rescaled_geo_list[ # list_idx] > im.height: # rescaled_geo_list[list_idx] = im.width # elif list_idx % 2 == 0 and rescaled_geo_list[ # list_idx] > im.width: # rescaled_geo_list[list_idx] = im.height txt_item = ','.join(map(str, rescaled_geo_list)) txt_item = 'text' + '\t' + txt_item txt_items.append(txt_item + '\n') del txt_item elif idx == 2: #nock # quad_draw.line([tuple(geo[0]), # tuple(geo[1]), # tuple(geo[2]), # tuple(geo[3]), # tuple(geo[0])], width=3, fill='blue') pass elif idx == 3: #arrow convert_bounding_box(geo) rescaled_geo = geo / [scale_ratio_w, scale_ratio_h] quad_draw.line([ tuple(geo[0]), tuple(geo[1]), tuple(geo[2]), tuple(geo[3]), tuple(geo[0]) ], width=3, fill='green') # form bounding box rescaled_geo_list = np.reshape( rescaled_geo.astype(np.int32), (8, )).tolist() # normalize rescaled_geo_list # for list_idx in range(len(rescaled_geo_list)): # if rescaled_geo_list[list_idx] < 0: # rescaled_geo_list[list_idx] = 0 # elif list_idx % 2 != 0 and rescaled_geo_list[ # list_idx] > im.width: # rescaled_geo_list[list_idx] = im.width # elif list_idx % 2 == 0 and rescaled_geo_list[ # list_idx] > im.height: # rescaled_geo_list[list_idx] = im.height txt_item = ','.join(map(str, rescaled_geo_list)) txt_item = 'arrow' + '\t' + txt_item txt_items.append(txt_item + '\n') del txt_item elif not quiet: print('quad invalid with vertex num less then 4.') del activation_pixels im.save(img_path + '_act.jpg') quad_im.save(img_path + '_predict.jpg') del im, quad_im, draw, quad_draw, img return txt_items
break countStuck = 0 preDistance = 0 f = open('model_play.txt', 'w') while (True): name = "./play/" + str(id) + ".png" #monitor = {"top": 272, "left": 570, "width": 800, "height": 600} monitor = {"top": 272, "left": 570, "width": 798, "height": 300} output = name.format(**monitor) sct_img = sct.grab(monitor) mss.tools.to_png(sct_img.rgb, sct_img.size, output=output) img = imread(name) vec = resize_image(img) vec = np.expand_dims(vec, axis=0) joystick = model.predict(vec, batch_size=1)[0] output = [ int(TransformAxisValue(joystick[0])), int(TransformAxisValue(joystick[1])), joystick[2], joystick[3], joystick[4], ] print(output) f.write("{}\n".format(output[0])) id += 1 output[0], Lfirst, Rfirst, state, count = check_x(output[0], Lfirst, Rfirst, state, count) button = check_button(np.argmax(joystick[-3:]), output[0])
def detect(img_path, model, device, pixel_threshold, quiet=True): img = Image.open(img_path) d_wight, d_height = resize_image(img, cfg.max_predict_img_size) img = img.resize((d_wight, d_height), Image.NEAREST).convert('RGB') with torch.no_grad(): east_detect = model(load_pil(img).to(device)) y = np.squeeze(east_detect.cpu().numpy(), axis=0) # c, h, w y[:3, :, :] = sigmoid(y[:3, :, :]) cond = np.greater_equal(y[0, :, :], pixel_threshold) activation_pixels = np.where(cond) quad_scores, quad_after_nms = nms(y, activation_pixels) with Image.open(img_path) as im: d_wight, d_height = resize_image(im, cfg.max_predict_img_size) scale_ratio_w = d_wight / im.width scale_ratio_h = d_height / im.height im = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB') quad_im = im.copy() draw = ImageDraw.Draw(im) for i, j in zip(activation_pixels[0], activation_pixels[1]): px = (j + 0.5) * cfg.pixel_size py = (i + 0.5) * cfg.pixel_size line_width, line_color = 1, 'red' if y[1, i, j] >= cfg.side_vertex_pixel_threshold: if y[2, i, j] < cfg.trunc_threshold: line_width, line_color = 2, 'yellow' elif y[2, i, j] >= 1 - cfg.trunc_threshold: line_width, line_color = 2, 'green' draw.line([(px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size), (px + 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size), (px + 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size), (px - 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size), (px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size)], width=line_width, fill=line_color) im.save(img_path + '_act.jpg') quad_draw = ImageDraw.Draw(quad_im) txt_items = [] for score, geo, s in zip(quad_scores, quad_after_nms, range(len(quad_scores))): if np.amin(score) > 0: quad_draw.line([ tuple(geo[0]), tuple(geo[1]), tuple(geo[2]), tuple(geo[3]), tuple(geo[0]) ], width=2, fill='red') rescaled_geo = geo / [scale_ratio_w, scale_ratio_h] rescaled_geo_list = np.reshape(rescaled_geo, (8, )).tolist() txt_item = ','.join(map(str, rescaled_geo_list)) txt_items.append(txt_item + '\n') elif not quiet: print('quad invalid with vertex num less then 4.') quad_im.save(img_path + '_predict.jpg') if cfg.predict_write2txt and len(txt_items) > 0: with open(img_path[:-4] + '.txt', 'w') as f_txt: f_txt.writelines(txt_items)
def predict(east_detect, img_path, pixel_threshold, quiet=False): img = Image.open(img_path) # 为PIL图像对象,默认RGB d_wight, d_height = resize_image(img, cfg.max_predict_img_size) transform = transforms.Compose([ transforms.Resize((d_wight, d_height), interpolation=2), transforms.ToTensor() ]) x = transform(img) x = torch.unsqueeze(x, 0) # 增加一个维度 y = east_detect(x) y = torch.squeeze(y, 0) # 减少一个维度 print(y.shape) y = y.detach().numpy() # 7*64*64 if y.shape[0] == 7: y = y.transpose((1, 2, 0)) # CHW->HWC y[:, :, :3] = sigmoid(y[:, :, :3]) cond = np.greater_equal(y[:, :, 0], pixel_threshold) activation_pixels = np.where(cond) quad_scores, quad_after_nms = nms(y, activation_pixels) with Image.open(img_path) as im: im_array = np.array(im.convert('RGB')) # 图片转为numpy数组 d_wight, d_height = resize_image(im, cfg.max_predict_img_size) scale_ratio_w = d_wight / im.width scale_ratio_h = d_height / im.height im = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB') quad_im = im.copy() draw = ImageDraw.Draw(im) for i, j in zip(activation_pixels[0], activation_pixels[1]): px = (j + 0.5) * cfg.pixel_size py = (i + 0.5) * cfg.pixel_size line_width, line_color = 1, 'red' if y[i, j, 1] >= cfg.side_vertex_pixel_threshold: if y[i, j, 2] < cfg.trunc_threshold: line_width, line_color = 2, 'yellow' elif y[i, j, 2] >= 1 - cfg.trunc_threshold: line_width, line_color = 2, 'green' draw.line([(px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size), (px + 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size), (px + 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size), (px - 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size), (px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size)], width=line_width, fill=line_color) im.save(img_path + '_act.jpg') quad_draw = ImageDraw.Draw(quad_im) txt_items = [] for score, geo, s in zip(quad_scores, quad_after_nms, range(len(quad_scores))): if np.amin(score) > 0: quad_draw.line([ tuple(geo[0]), tuple(geo[1]), tuple(geo[2]), tuple(geo[3]), tuple(geo[0]) ], width=2, fill='red') if cfg.predict_cut_text_line: cut_text_line(geo, scale_ratio_w, scale_ratio_h, im_array, img_path, s) rescaled_geo = geo / [scale_ratio_w, scale_ratio_h ] # (N, 4, 2)标签坐标 rescaled_geo_list = np.reshape(rescaled_geo, (8, )).tolist() txt_item = ','.join(map(str, rescaled_geo_list)) txt_items.append(txt_item + '\n') elif not quiet: print('quad invalid with vertex num less then 4.') quad_im.save(img_path + '_predict.jpg') if cfg.predict_write2txt and len(txt_items) > 0: with open(img_path[:-4] + '.txt', 'w') as f_txt: f_txt.writelines(txt_items)