예제 #1
0
def predict(inputImg):
    pixel_threshold = 0.7  #cfg.pixel_threshold
    #     img = image.load_img(img_path)
    d_wight, d_height = resize_image(inputImg, cfg.max_predict_img_size)
    img = inputImg.resize((d_wight, d_height), Image.NEAREST).convert('RGB')
    img = image.img_to_array(img)
    img = preprocess_input(img, mode='tf')
    x = np.expand_dims(img, axis=0)
    y = east_detect.predict(x)

    y = np.squeeze(y, axis=0)
    y[:, :, :3] = sigmoid(y[:, :, :3])
    cond = np.greater_equal(y[:, :, 0], pixel_threshold)
    activation_pixels = np.where(cond)
    quad_scores, quad_after_nms = nms(y, activation_pixels)
    if True:
        im = inputImg.copy()
        pixel_size = 4
        im_array = image.img_to_array(im.convert('RGB'))
        d_wight, d_height = resize_image(im, cfg.max_predict_img_size)
        scale_ratio_w = d_wight / im.width
        scale_ratio_h = d_height / im.height
        im = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB')
        quad_im = im.copy()
        draw = ImageDraw.Draw(im)
        for i, j in zip(activation_pixels[0], activation_pixels[1]):
            px = (j + 0.5) * pixel_size
            py = (i + 0.5) * pixel_size
            line_width, line_color = 1, 'red'
            if y[i, j, 1] >= cfg.side_vertex_pixel_threshold:
                if y[i, j, 2] < cfg.trunc_threshold:
                    line_width, line_color = 2, 'yellow'
                elif y[i, j, 2] >= 1 - cfg.trunc_threshold:
                    line_width, line_color = 2, 'green'
            draw.line([(px - 0.5 * pixel_size, py - 0.5 * pixel_size),
                       (px + 0.5 * pixel_size, py - 0.5 * pixel_size),
                       (px + 0.5 * pixel_size, py + 0.5 * pixel_size),
                       (px - 0.5 * pixel_size, py + 0.5 * pixel_size),
                       (px - 0.5 * pixel_size, py - 0.5 * pixel_size)],
                      width=line_width,
                      fill=line_color)
        quad_draw = ImageDraw.Draw(quad_im)
        quads = []
        for score, geo, s in zip(quad_scores, quad_after_nms,
                                 range(len(quad_scores))):
            if np.amin(score) > 0:
                quad_draw.line([
                    tuple(geo[0]),
                    tuple(geo[1]),
                    tuple(geo[2]),
                    tuple(geo[3]),
                    tuple(geo[0])
                ],
                               width=3,
                               fill='blue')
                rescaled_geo = geo / [scale_ratio_w, scale_ratio_h]
                rescaled_geo_list = np.reshape(rescaled_geo, (8, )).tolist()
                quads.append(rescaled_geo_list)
        return im, quad_im, quads, y
예제 #2
0
def predict_txt(east_detect, img_path, txt_path, pixel_threshold, quiet=False):
    img = image.load_img(img_path)
    d_wight, d_height = resize_image(img, cfg.max_predict_img_size)
    scale_ratio_w = d_wight / img.width
    scale_ratio_h = d_height / img.height
    img = img.resize((d_wight, d_height), Image.NEAREST).convert('RGB')
    img = image.img_to_array(img)
    img = imagenet_utils.preprocess_input(img, mode='tf')
    x = np.expand_dims(img, axis=0)
    y = east_detect.predict(x)

    y = np.squeeze(y, axis=0)
    y[:, :, :3] = sigmoid(y[:, :, :3])
    cond = np.greater_equal(y[:, :, 0], pixel_threshold)
    activation_pixels = np.where(cond)
    quad_scores, quad_after_nms = nms(y, activation_pixels)

    txt_items = []
    for score, geo in zip(quad_scores, quad_after_nms):
        if np.amin(score) > 0:
            rescaled_geo = geo / [scale_ratio_w, scale_ratio_h]
            rescaled_geo_list = np.reshape(rescaled_geo, (8,)).tolist()
            txt_item = ','.join(map(str, rescaled_geo_list))
            txt_items.append(txt_item + '\n')
        elif not quiet:
            print('quad invalid with vertex num less then 4.')
    if cfg.predict_write2txt and len(txt_items) > 0:
        with open(txt_path, 'w') as f_txt:
            f_txt.writelines(txt_items)
예제 #3
0
def predict_txt(east_detect, img_path, txt_path, pixel_threshold, quiet=False):
    img = Image.open(img_path)  # 为PIL图像对象,默认RGB
    d_wight, d_height = resize_image(img, cfg.max_predict_img_size)
    scale_ratio_w = d_wight / img.width
    scale_ratio_h = d_height / img.height
    transform = transforms.Compose([
        transforms.Resize((d_wight, d_height), interpolation=2),
        transforms.ToTensor()
    ])
    x = transform(img)
    x = torch.unsqueeze(x, 0)  # 增加一个维度
    y = east_detect(x)
    y = torch.squeeze(y, 0)  # 减少一个维度
    print(y.shape)
    y = y.detach().numpy()  # 7*64*64
    if y.shape[0] == 7:
        y = y.transpose((1, 2, 0))  # CHW->HWC
    y[:, :, :3] = sigmoid(y[:, :, :3])
    cond = np.greater_equal(y[:, :, 0], pixel_threshold)
    activation_pixels = np.where(cond)
    quad_scores, quad_after_nms = nms(y, activation_pixels)

    txt_items = []
    for score, geo in zip(quad_scores, quad_after_nms):
        if np.amin(score) > 0:
            rescaled_geo = geo / [scale_ratio_w, scale_ratio_h]
            rescaled_geo_list = np.reshape(rescaled_geo, (8, )).tolist()
            txt_item = ','.join(map(str, rescaled_geo_list))
            txt_items.append(txt_item + '\n')
        elif not quiet:
            print('quad invalid with vertex num less then 4.')
    if cfg.predict_write2txt and len(txt_items) > 0:
        with open(txt_path, 'w') as f_txt:
            f_txt.writelines(txt_items)
예제 #4
0
def test(model_name, plot_file=None, test_directory=DEFAULT_TEST_DIRECTORY, model_directory=DEFAULT_MODEL_DIRECTORY):
    model = deepconvnet(model_name, LR, (IMG_SIZE, IMG_SIZE), model_directory=model_directory)
    X_orig, y_test = load_images_with_labels(test_directory)
    X = preprocess(X_orig, [conv_gray_scale, lambda x: resize_image(x, (IMG_SIZE, IMG_SIZE))])
    X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
    start = time.time()
    predictions = model.predict(X)
    print("\nTime taken to predict outcomes: {} secs \n".format(time.time() - start))

    if plot_file:
        plot_predictions(X_orig, predictions, plot_file)
예제 #5
0
def capture():
    name = "./play/" + "1" + ".png"
    monitor = {"top": 272, "left": 570, "width": 800, "height": 600}
    output = name.format(**monitor)
    sct_img = sct.grab(monitor)
    mss.tools.to_png(sct_img.rgb, sct_img.size, output=output)
    
    img = imread(name)
    vec = resize_image(img)
    vec = np.expand_dims(vec, axis=0)

    os.remove(name)
    return vec
예제 #6
0
def train(model_name, train_directory=DEFAULT_TRAIN_DIRECTORY, epoch=DEFAULT_EPOCH, learning_rate=LR,
          model_save_path=DEFAULT_MODEL_DIRECTORY):
    model = deepconvnet(model_name, learning_rate, (IMG_SIZE, IMG_SIZE))
    X, y = load_images_with_labels(train_directory)
    X = preprocess(X, [conv_gray_scale, lambda x: resize_image(x, (IMG_SIZE, IMG_SIZE))])
    X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
    X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.2, random_state=42)
    model.fit(
        {'input': X_train},
        {'targets': y_train},
        validation_set=({"input": X_validate}, {"targets": y_validate}),
        n_epoch=epoch,
        snapshot_step=500,
        show_metric=True,
        run_id=model_name
    )
    model.save(os.path.join(model_save_path, model_name))
예제 #7
0
 def path_to_tensor(self, image_path):
     #  convert an image to tensor
     preped_img = None
     if self.face_crop == True:
         preped_img = preprocess_image(image_path, None, crop_dim=224)
         if preped_img is None:
             return []
     else:
         #  use the whole picture and first resize the image reserving its width/heigh ratio and
         ## if one side of short of 224 padding
         preped_img = resize_image(image_path,
                                   None,
                                   size=224,
                                   random_padding_border_color=True)
     logger.info("Processed image  {} shape {}".format(
         image_path, preped_img.shape))
     return np.expand_dims(np.array(preped_img, dtype='float'), axis=0)
예제 #8
0
def predict(east_detect, img_path, pixel_threshold, quiet=False):
    img = image.load_img(img_path)
    d_wight, d_height = resize_image(img, cfg.max_predict_img_size)
    img = img.resize((d_wight, d_height), Image.NEAREST).convert('RGB')
    img = image.img_to_array(img)
    # 将张量的值 调到【-1 1】
    img = imagenet_utils.preprocess_input(img,mode='tf')
    # 变成4维张量
    x = np.expand_dims(img, axis=0)
    y = east_detect.predict(x)

    y = np.squeeze(y, axis=0)
    y[:, :, :3] = sigmoid(y[:, :, :3])
    cond = np.greater_equal(y[:, :, 0], pixel_threshold)
    activation_pixels = np.where(cond)
    quad_scores, quad_after_nms = nms(y, activation_pixels)
    with Image.open(img_path) as im:
        im_array = image.img_to_array(im.convert('RGB'))
        d_wight, d_height = resize_image(im, cfg.max_predict_img_size)
        scale_ratio_w = d_wight / im.width
        scale_ratio_h = d_height / im.height
        im = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB')
        quad_im = im.copy()
        draw = ImageDraw.Draw(im)
        for i, j in zip(activation_pixels[0], activation_pixels[1]):
            px = (j + 0.5) * cfg.pixel_size
            py = (i + 0.5) * cfg.pixel_size
            line_width, line_color = 1, 'red'
            if y[i, j, 1] >= cfg.side_vertex_pixel_threshold:
                if y[i, j, 2] < cfg.trunc_threshold:
                    line_width, line_color = 2, 'yellow'
                elif y[i, j, 2] >= 1 - cfg.trunc_threshold:
                    line_width, line_color = 2, 'green'
            draw.line([(px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size),
                       (px + 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size),
                       (px + 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size),
                       (px - 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size),
                       (px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size)],
                      width=line_width, fill=line_color)
        im.save(img_path + '_act.jpg')
        quad_draw = ImageDraw.Draw(quad_im)
        txt_items = []
        for score, geo, s in zip(quad_scores, quad_after_nms,
                                 range(len(quad_scores))):
            if np.amin(score) > 0:
                quad_draw.line([tuple(geo[0]),
                                tuple(geo[1]),
                                tuple(geo[2]),
                                tuple(geo[3]),
                                tuple(geo[0])], width=2, fill='red')
                if cfg.predict_cut_text_line:
                    cut_text_line(geo, scale_ratio_w, scale_ratio_h, im_array,
                                  img_path, s)
                rescaled_geo = geo / [scale_ratio_w, scale_ratio_h]
                rescaled_geo_list = np.reshape(rescaled_geo, (8,)).tolist()
                txt_item = ','.join(map(str, rescaled_geo_list))
                txt_items.append(txt_item + '\n')
            elif not quiet:
                print('quad invalid with vertex num less then 4.')
        quad_im.save(img_path + '_predict.jpg')
        if cfg.predict_write2txt and len(txt_items) > 0:
            with open(img_path[:-4] + '.txt', 'w') as f_txt:
                f_txt.writelines(txt_items)
예제 #9
0
def predict_quad(model, img, pixel_threshold=cfg.pixel_threshold, quiet=False, img_name=None):
    """
    Args:
        model: 检测模型,要load_weights的
        img:  image 图片,文件类型
        pixel_threshold: 阈值
        quiet:
        img_name: 图片的名字

    Returns:
        text_recs_all:一个列表,每个元素是检测边界的quad值
        text_recs_len:text_recs_all的长度,一共检测到多少个区域
        img_all: 一个四维数组,img_all[0] 是img_to_array的结果

    """

    if not os.path.exists(root_temp):
        os.makedirs(root_temp)
    if not os.path.exists(root_predict):
        os.makedirs(root_predict)

    # 获取计算后的图像长宽
    d_wight, d_height = resize_image(img, cfg.max_predict_img_size)
    # 调整图像大小,便于预测
    img = img.resize((d_wight, d_height), Image.BILINEAR).convert('RGB')
    img = image.img_to_array(img)
    num_img = 1
    # 一个4维张量,也就是只有1个3维张量的4维张量
    img_all = np.zeros((num_img, d_height, d_wight, 3))
    img_all[0] = img

    # 将张量的数值大小调到【-1 1】
    img_ori = imagenet_utils.preprocess_input(img, mode='tf')  # suit tf tensor

    # 又整个一样的
    x = np.zeros((num_img, d_height, d_wight, 3))
    x[0] = img_ori

    # (sample, h, w, channels)
    y_pred = model.predict(x)

    text_recs_all = []
    text_recs_len = []
    for n in range(num_img):
        # (sample, rows, cols, 7_points_pred)
        y = y_pred[n]
        y[:, :, :3] = sigmoid(y[:, :, :3])
        cond = np.greater_equal(y[:, :, 0], pixel_threshold)
        activation_pixels = np.where(cond)  # fixme 返回元祖tuple类型 a[0]保存了纵坐标 a[1]保存横坐标
        quad_scores, quad_after_nms = nms(y, activation_pixels)

        text_recs = []
        x[n] = np.uint8(x[n])
        with image.array_to_img(img_all[n]) as im:     # Image.fromarray(x[n]) error ?
            im_array = x[n]

            # fixme 注意:拿去CRNN识别的是缩放后的图像
            scale_ratio_w = 1
            scale_ratio_h = 1

            quad_im = im.copy()
            draw = ImageDraw.Draw(im)
            # 拷贝一个原图像,在拷贝的图像上有文字的地方画线
            for i, j in zip(activation_pixels[0], activation_pixels[1]):
                px = (j + 0.5) * cfg.pixel_size
                py = (i + 0.5) * cfg.pixel_size
                line_width, line_color = 1, 'blue'
                if y[i, j, 1] >= cfg.side_vertex_pixel_threshold:
                    if y[i, j, 2] < cfg.trunc_threshold:
                        line_width, line_color = 2, 'yellow'
                    elif y[i, j, 2] >= 1 - cfg.trunc_threshold:
                        line_width, line_color = 2, 'green'
                draw.line([(px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size),
                           (px + 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size),
                           (px + 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size),
                           (px - 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size),
                           (px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size)],
                          width=line_width, fill=line_color)

            if not img_name is None:
                im.save(root_temp + img_name + '.jpg')

            quad_draw = ImageDraw.Draw(quad_im)
            for score, geo, s in zip(quad_scores, quad_after_nms,
                                     range(len(quad_scores))):
                if np.amin(score) > 0:
                    quad_draw.line([tuple(geo[0]),
                                    tuple(geo[1]),
                                    tuple(geo[2]),
                                    tuple(geo[3]),
                                    tuple(geo[0])], width=2, fill='blue')

                    if cfg.predict_cut_text_line:
                        cut_text_line(geo, scale_ratio_w, scale_ratio_h, im_array,
                                      img_name, s)

                    rescaled_geo = geo / [scale_ratio_w, scale_ratio_h]
                    text_rec = np.reshape(rescaled_geo, (8,)).tolist()
                    text_recs.append(text_rec)
                elif not quiet:
                    print('quad invalid with vertex num less then 4.')

            if not img_name is None:
                quad_im.save(root_predict + img_name + '.jpg' )

        for t in range(len(text_recs)):
            text_recs_all.append(text_recs[t])

        text_recs_len.append(len(text_recs))

    return text_recs_all, text_recs_len, img_all
예제 #10
0
def predict(east_detect, img_path, pixel_threshold, quiet=False):
    """
    预测图片

    :param east_detect:
    :param img_path:
    :param pixel_threshold:
    :param quiet:
    :return:
    """
    img = image.load_img(img_path)
    d_wight, d_height = resize_image(img, cfg.max_predict_img_size)
    img = img.resize((d_wight, d_height), Image.NEAREST).convert('RGB')
    img = image.img_to_array(img)
    img = preprocess_input(img, mode='tf')
    x = np.expand_dims(img, axis=0)
    y = east_detect.predict(x)
    # 取消第一维
    y = np.squeeze(y, axis=0)
    # 每个向量的前三位通过sigmoid转化为0-1之间的值
    y[:, :, :3] = sigmoid(y[:, :, :3])
    # 判断一下y矩阵里面的第一个元素是否为文本,设置了一个阈值
    cond = np.greater_equal(y[:, :, 0], pixel_threshold)
    # 查找符合条件的像素点坐标
    activation_pixels = np.where(cond)
    # 非极大值抑制
    quad_scores, quad_after_nms = nms(y, activation_pixels)

    with Image.open(img_path) as im:
        # 重新读取图片
        im_array = image.img_to_array(im.convert('RGB'))
        # 缩放尺寸
        d_wight, d_height = resize_image(im, cfg.max_predict_img_size)
        scale_ratio_w = d_wight / im.width
        scale_ratio_h = d_height / im.height
        im = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB')

        # 新建一个图片绘图
        quad_im = im.copy()

        # 绘制像素点图形
        draw = ImageDraw.Draw(im)
        # 绘制每个识别出的像素点
        for i, j in zip(activation_pixels[0], activation_pixels[1]):
            px = (j + 0.5) * cfg.pixel_size
            py = (i + 0.5) * cfg.pixel_size
            line_width, line_color = 1, 'red'

            # 如果大于设置的pixel阈值,那么就认为是头或尾
            if y[i, j, 1] >= cfg.side_vertex_pixel_threshold:
                # 如果小于 分类的阈值,则认为是头,否则是尾
                if y[i, j, 2] < cfg.trunc_threshold:
                    line_width, line_color = 2, 'yellow'
                elif y[i, j, 2] >= 1 - cfg.trunc_threshold:
                    line_width, line_color = 2, 'green'
            draw.line([(px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size),
                       (px + 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size),
                       (px + 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size),
                       (px - 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size),
                       (px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size)],
                      width=line_width,
                      fill=line_color)
        im.save(img_path + '_act.jpg')

        # 绘制边框
        quad_draw = ImageDraw.Draw(quad_im)
        txt_items = []
        for score, geo, s in zip(quad_scores, quad_after_nms,
                                 range(len(quad_scores))):
            if np.amin(score) > 0:

                # 绘制矩形框
                quad_draw.line([
                    tuple(geo[0]),
                    tuple(geo[1]),
                    tuple(geo[2]),
                    tuple(geo[3]),
                    tuple(geo[0])
                ],
                               width=2,
                               fill='red')

                # 是否切割每个文本行的图片
                if cfg.predict_cut_text_line:
                    cut_text_line(geo, scale_ratio_w, scale_ratio_h, im_array,
                                  img_path, s)

                # 坐标针对w和h进行缩放
                rescaled_geo = geo / [scale_ratio_w, scale_ratio_h]
                # 坐标转换为原始图像的坐标
                rescaled_geo_list = np.reshape(rescaled_geo, (8, )).tolist()
                txt_item = ','.join(map(str, rescaled_geo_list))
                txt_items.append(txt_item + '\n')
            elif not quiet:
                print('quad invalid with vertex num less then 4.')
        quad_im.save(img_path + '_predict.jpg')

        # 输出坐标信息
        if cfg.predict_write2txt and len(txt_items) > 0:
            with open(img_path[:-4] + '.txt', 'w') as f_txt:
                f_txt.writelines(txt_items)
def predict(east_detect, img_path, pixel_threshold, quiet=False):
    img = image.load_img(img_path)
    d_wight, d_height = resize_image(img, cfg.image_size)
    img = img.resize((d_wight, d_height), Image.NEAREST).convert('RGB')
    img = image.img_to_array(img)
    img = preprocess_input(img, mode='tf')
    x = np.expand_dims(img, axis=0)
    y = east_detect.predict(x)
    y = np.squeeze(y, axis=0)
    y[:, :, :3] = sigmoid(y[:, :, :3])
    cond = np.greater_equal(y[:, :, 0], pixel_threshold)
    activation_pixels = np.where(cond)
    quad_scores, quad_after_nms = nms(y, activation_pixels)
    with Image.open(img_path) as im:
        im_array = image.img_to_array(im.convert('RGB'))
        d_wight, d_height = resize_image(im, cfg.image_size)
        scale_ratio_w = d_wight / im.width
        scale_ratio_h = d_height / im.height
        im = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB')
        quad_im = im.copy()
        quad_draw = ImageDraw.Draw(quad_im)
        txt_items = []
        flag = False
        for score, geo, s in zip(quad_scores, quad_after_nms,
                                 range(len(quad_scores))):
            if np.amin(score) > 0:
                flag = True
                quad_draw.line([
                    tuple(geo[0]),
                    tuple(geo[1]),
                    tuple(geo[2]),
                    tuple(geo[3]),
                    tuple(geo[0])
                ],
                               width=2,
                               fill='blue')
                rescaled_geo = geo / [scale_ratio_w, scale_ratio_h]
                rescaled_geo_list = np.reshape(rescaled_geo, (8, )).tolist()
                txt_item = ','.join(map(str, rescaled_geo_list))
                txt_items.append(txt_item + '\n')
                if cfg.detection_box_crop:
                    img_crop = crop_rectangle(im_array, rescaled_geo)
                    cv2.imwrite(
                        os.path.join(
                            'output_crop',
                            img_path.split('/')[-1].split('.')[0] + '.jpg'),
                        img_crop)
            elif not quiet:
                print('quad invalid with vertex num less then 4.')
        if flag:
            quad_im.save(
                os.path.join(
                    'output',
                    img_path.split('/')[-1].split('.')[0] + '_predict.jpg'))
        if cfg.predict_write2txt and len(txt_items) > 0:
            with open(
                    os.path.join(
                        "output_txt",
                        img_path.split('/')[-1].split('.')[0] + '.txt'),
                    'w') as f_txt:
                f_txt.writelines(txt_items)
def predict(east_detect,
            img_path,
            text_pixel_threshold=cfg.text_pixel_threshold,
            text_side_threshold=cfg.text_side_vertex_pixel_threshold,
            text_trunc_threshold=cfg.text_trunc_threshold,
            action_pixel_threshold=cfg.action_pixel_threshold,
            action_side_vertex_pixel_threshold=cfg.
            action_side_vertex_pixel_threshold,
            arrow_trunc_threshold=cfg.arrow_trunc_threshold,
            nock_trunc_threshold=cfg.nock_trunc_threshold,
            quiet=False):

    img = image.load_img(img_path)
    d_wight, d_height = resize_image(img, cfg.max_predict_img_size)
    img = img.resize((d_wight, d_height), Image.NEAREST).convert('RGB')
    img = image.img_to_array(img)
    img = preprocess_input(img, mode='tf')
    x = np.expand_dims(img, axis=0)
    y = east_detect.predict(x)

    y = np.squeeze(y, axis=0)
    y[:, :, :1] = sigmoid(y[:, :, :1])
    y[:, :, 1:4] = softmax(y[:, :, 1:4])
    y[:, :, 4:6] = sigmoid(y[:, :, 4:6])
    #y[:, :, :5] = sigmoid(y[:, :, :5])
    txt_items = []
    with Image.open(img_path) as im:
        im_array = image.img_to_array(im.convert('RGB'))
        d_wight, d_height = resize_image(im, cfg.max_predict_img_size)
        scale_ratio_w = d_wight / im.width
        scale_ratio_h = d_height / im.height
        im = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB')
        quad_im = im.copy()
        draw = ImageDraw.Draw(im)
        quad_draw = ImageDraw.Draw(quad_im)
        for idx in range(3, 0, -1):
            if idx == 1:
                cond_act = np.greater_equal(y[:, :, 0], text_pixel_threshold)
                cond_cls1 = y[:, :, 1] > y[:, :, 2]
                cond_cls2 = y[:, :, 1] > y[:, :, 3]
            elif idx == 2:
                cond_act = np.greater_equal(y[:, :, 0], action_pixel_threshold)
                cond_cls1 = y[:, :, 2] > y[:, :, 1]
                cond_cls2 = y[:, :, 2] > y[:, :, 3]
            elif idx == 3:
                cond_act = np.greater_equal(y[:, :, 0], action_pixel_threshold)
                cond_cls1 = y[:, :, 3] > y[:, :, 1]
                cond_cls2 = y[:, :, 3] > y[:, :, 2]

            activation_pixels = np.where(
                np.logical_and(cond_act, cond_cls1, cond_cls2))

            quad_scores, quad_after_nms = nms(
                y, activation_pixels, idx, text_side_threshold,
                text_trunc_threshold, action_side_vertex_pixel_threshold,
                nock_trunc_threshold, arrow_trunc_threshold)

            for i, j in zip(activation_pixels[0], activation_pixels[1]):
                px = (j + 0.5) * cfg.pixel_size
                py = (i + 0.5) * cfg.pixel_size
                line_width, line_color = 1, 'red'
                if idx == 1 and y[i, j, 4] >= text_side_threshold and y[
                        i, j, 5] < text_trunc_threshold:
                    line_width, line_color = 2, 'orange'
                elif idx == 1 and y[i, j, 4] >= text_side_threshold and y[
                        i, j, 5] >= 1 - text_trunc_threshold:
                    line_width, line_color = 2, 'blue'
                elif idx == 2 and y[i, j, 4] >= \
                    action_side_vertex_pixel_threshold and y[i, j, 5] >= 1 - \
                    nock_trunc_threshold:
                    line_width, line_color = 2, 'yellow'
                elif idx == 3 and y[i, j, 4] >= action_side_vertex_pixel_threshold and \
                    y[i, j, 5] >= arrow_trunc_threshold:
                    line_width, line_color = 2, 'purple'
                draw.line(
                    [(px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size),
                     (px + 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size),
                     (px + 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size),
                     (px - 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size),
                     (px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size)],
                    width=line_width,
                    fill=line_color)

            for score, geo, s in zip(quad_scores, quad_after_nms,
                                     range(len(quad_scores))):
                # form a box for current object
                if np.amin(score) > 0:
                    if idx == 1:
                        convert_bounding_box(geo)
                        rescaled_geo = geo / [scale_ratio_w, scale_ratio_h]

                        #text
                        quad_draw.line([
                            tuple(geo[0]),
                            tuple(geo[1]),
                            tuple(geo[2]),
                            tuple(geo[3]),
                            tuple(geo[0])
                        ],
                                       width=3,
                                       fill='red')

                        # form bounding box

                        rescaled_geo_list = np.reshape(
                            rescaled_geo.astype(np.int32), (8, )).tolist()

                        # normalize rescaled_geo_list
                        # for list_idx in range(len(rescaled_geo_list)):
                        #     if rescaled_geo_list[list_idx] < 0:
                        #         rescaled_geo_list[list_idx] = 0
                        #     elif list_idx % 2 != 0 and rescaled_geo_list[
                        #         list_idx] > im.height:
                        #         rescaled_geo_list[list_idx] = im.width
                        #     elif list_idx % 2 == 0 and rescaled_geo_list[
                        #         list_idx] > im.width:
                        #         rescaled_geo_list[list_idx] = im.height

                        txt_item = ','.join(map(str, rescaled_geo_list))
                        txt_item = 'text' + '\t' + txt_item
                        txt_items.append(txt_item + '\n')
                        del txt_item
                    elif idx == 2:
                        #nock
                        # quad_draw.line([tuple(geo[0]),
                        #                 tuple(geo[1]),
                        #                 tuple(geo[2]),
                        #                 tuple(geo[3]),
                        #                 tuple(geo[0])], width=3, fill='blue')
                        pass
                    elif idx == 3:
                        #arrow
                        convert_bounding_box(geo)
                        rescaled_geo = geo / [scale_ratio_w, scale_ratio_h]

                        quad_draw.line([
                            tuple(geo[0]),
                            tuple(geo[1]),
                            tuple(geo[2]),
                            tuple(geo[3]),
                            tuple(geo[0])
                        ],
                                       width=3,
                                       fill='green')

                        # form bounding box

                        rescaled_geo_list = np.reshape(
                            rescaled_geo.astype(np.int32), (8, )).tolist()

                        # normalize rescaled_geo_list
                        # for list_idx in range(len(rescaled_geo_list)):
                        #     if rescaled_geo_list[list_idx] < 0:
                        #         rescaled_geo_list[list_idx] = 0
                        #     elif list_idx % 2 != 0 and rescaled_geo_list[
                        #         list_idx] > im.width:
                        #         rescaled_geo_list[list_idx] = im.width
                        #     elif list_idx % 2 == 0 and rescaled_geo_list[
                        #         list_idx] > im.height:
                        #         rescaled_geo_list[list_idx] = im.height

                        txt_item = ','.join(map(str, rescaled_geo_list))
                        txt_item = 'arrow' + '\t' + txt_item
                        txt_items.append(txt_item + '\n')
                        del txt_item
                elif not quiet:
                    print('quad invalid with vertex num less then 4.')
            del activation_pixels
        im.save(img_path + '_act.jpg')
        quad_im.save(img_path + '_predict.jpg')
    del im, quad_im, draw, quad_draw, img
    return txt_items
예제 #13
0
            break
countStuck = 0
preDistance = 0

f = open('model_play.txt', 'w')

while (True):
    name = "./play/" + str(id) + ".png"
    #monitor = {"top": 272, "left": 570, "width": 800, "height": 600}
    monitor = {"top": 272, "left": 570, "width": 798, "height": 300}
    output = name.format(**monitor)
    sct_img = sct.grab(monitor)
    mss.tools.to_png(sct_img.rgb, sct_img.size, output=output)

    img = imread(name)
    vec = resize_image(img)
    vec = np.expand_dims(vec, axis=0)
    joystick = model.predict(vec, batch_size=1)[0]
    output = [
        int(TransformAxisValue(joystick[0])),
        int(TransformAxisValue(joystick[1])),
        joystick[2],
        joystick[3],
        joystick[4],
    ]
    print(output)
    f.write("{}\n".format(output[0]))
    id += 1
    output[0], Lfirst, Rfirst, state, count = check_x(output[0], Lfirst,
                                                      Rfirst, state, count)
    button = check_button(np.argmax(joystick[-3:]), output[0])
예제 #14
0
def detect(img_path, model, device, pixel_threshold, quiet=True):
    img = Image.open(img_path)
    d_wight, d_height = resize_image(img, cfg.max_predict_img_size)
    img = img.resize((d_wight, d_height), Image.NEAREST).convert('RGB')
    with torch.no_grad():
        east_detect = model(load_pil(img).to(device))
    y = np.squeeze(east_detect.cpu().numpy(), axis=0)  # c, h, w
    y[:3, :, :] = sigmoid(y[:3, :, :])
    cond = np.greater_equal(y[0, :, :], pixel_threshold)
    activation_pixels = np.where(cond)
    quad_scores, quad_after_nms = nms(y, activation_pixels)
    with Image.open(img_path) as im:
        d_wight, d_height = resize_image(im, cfg.max_predict_img_size)
        scale_ratio_w = d_wight / im.width
        scale_ratio_h = d_height / im.height
        im = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB')
        quad_im = im.copy()
        draw = ImageDraw.Draw(im)
        for i, j in zip(activation_pixels[0], activation_pixels[1]):
            px = (j + 0.5) * cfg.pixel_size
            py = (i + 0.5) * cfg.pixel_size
            line_width, line_color = 1, 'red'
            if y[1, i, j] >= cfg.side_vertex_pixel_threshold:
                if y[2, i, j] < cfg.trunc_threshold:
                    line_width, line_color = 2, 'yellow'
                elif y[2, i, j] >= 1 - cfg.trunc_threshold:
                    line_width, line_color = 2, 'green'
            draw.line([(px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size),
                       (px + 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size),
                       (px + 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size),
                       (px - 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size),
                       (px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size)],
                      width=line_width,
                      fill=line_color)
        im.save(img_path + '_act.jpg')
        quad_draw = ImageDraw.Draw(quad_im)
        txt_items = []
        for score, geo, s in zip(quad_scores, quad_after_nms,
                                 range(len(quad_scores))):

            if np.amin(score) > 0:
                quad_draw.line([
                    tuple(geo[0]),
                    tuple(geo[1]),
                    tuple(geo[2]),
                    tuple(geo[3]),
                    tuple(geo[0])
                ],
                               width=2,
                               fill='red')

                rescaled_geo = geo / [scale_ratio_w, scale_ratio_h]
                rescaled_geo_list = np.reshape(rescaled_geo, (8, )).tolist()
                txt_item = ','.join(map(str, rescaled_geo_list))
                txt_items.append(txt_item + '\n')
            elif not quiet:
                print('quad invalid with vertex num less then 4.')
        quad_im.save(img_path + '_predict.jpg')
        if cfg.predict_write2txt and len(txt_items) > 0:
            with open(img_path[:-4] + '.txt', 'w') as f_txt:
                f_txt.writelines(txt_items)
예제 #15
0
def predict(east_detect, img_path, pixel_threshold, quiet=False):
    img = Image.open(img_path)  # 为PIL图像对象,默认RGB
    d_wight, d_height = resize_image(img, cfg.max_predict_img_size)
    transform = transforms.Compose([
        transforms.Resize((d_wight, d_height), interpolation=2),
        transforms.ToTensor()
    ])
    x = transform(img)
    x = torch.unsqueeze(x, 0)  # 增加一个维度
    y = east_detect(x)
    y = torch.squeeze(y, 0)  # 减少一个维度
    print(y.shape)
    y = y.detach().numpy()  # 7*64*64
    if y.shape[0] == 7:
        y = y.transpose((1, 2, 0))  # CHW->HWC
    y[:, :, :3] = sigmoid(y[:, :, :3])
    cond = np.greater_equal(y[:, :, 0], pixel_threshold)
    activation_pixels = np.where(cond)
    quad_scores, quad_after_nms = nms(y, activation_pixels)
    with Image.open(img_path) as im:
        im_array = np.array(im.convert('RGB'))  # 图片转为numpy数组
        d_wight, d_height = resize_image(im, cfg.max_predict_img_size)
        scale_ratio_w = d_wight / im.width
        scale_ratio_h = d_height / im.height
        im = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB')
        quad_im = im.copy()
        draw = ImageDraw.Draw(im)
        for i, j in zip(activation_pixels[0], activation_pixels[1]):
            px = (j + 0.5) * cfg.pixel_size
            py = (i + 0.5) * cfg.pixel_size
            line_width, line_color = 1, 'red'
            if y[i, j, 1] >= cfg.side_vertex_pixel_threshold:
                if y[i, j, 2] < cfg.trunc_threshold:
                    line_width, line_color = 2, 'yellow'
                elif y[i, j, 2] >= 1 - cfg.trunc_threshold:
                    line_width, line_color = 2, 'green'
            draw.line([(px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size),
                       (px + 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size),
                       (px + 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size),
                       (px - 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size),
                       (px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size)],
                      width=line_width,
                      fill=line_color)
        im.save(img_path + '_act.jpg')
        quad_draw = ImageDraw.Draw(quad_im)
        txt_items = []
        for score, geo, s in zip(quad_scores, quad_after_nms,
                                 range(len(quad_scores))):
            if np.amin(score) > 0:
                quad_draw.line([
                    tuple(geo[0]),
                    tuple(geo[1]),
                    tuple(geo[2]),
                    tuple(geo[3]),
                    tuple(geo[0])
                ],
                               width=2,
                               fill='red')
                if cfg.predict_cut_text_line:
                    cut_text_line(geo, scale_ratio_w, scale_ratio_h, im_array,
                                  img_path, s)
                rescaled_geo = geo / [scale_ratio_w, scale_ratio_h
                                      ]  # (N, 4, 2)标签坐标
                rescaled_geo_list = np.reshape(rescaled_geo, (8, )).tolist()
                txt_item = ','.join(map(str, rescaled_geo_list))
                txt_items.append(txt_item + '\n')
            elif not quiet:
                print('quad invalid with vertex num less then 4.')
        quad_im.save(img_path + '_predict.jpg')
        if cfg.predict_write2txt and len(txt_items) > 0:
            with open(img_path[:-4] + '.txt', 'w') as f_txt:
                f_txt.writelines(txt_items)