Python boxesの例、nms.nms.boxes Pythonの例

コード例 #1

0

ファイルを表示

def run_nms(test_images_dict: dict) -> dict:
    """ Runs non-maximum suppression on all of the predicted boxes/scores for each of the class-categories.
    It does this...
    (1) for each of the images
    (2) for each image's windows

    Stores the results in test_images dict

    This should prune any boxes that have the same class-category and that overlap considerably and it 
    should favor the higher-scoring boxes.
    """
    # run non-maximum suppression per image
    for image_id, image_info in test_images_dict.items():
        image_indices = []  # the total list of indices to keep
        # for each class-category, run non-maximum suppression and add resulting indices to total list of indices
        for (predicted_boxes,
             predicted_scores) in get_same_class_annotations(image_info):
            image_class_indices = nms.boxes(predicted_boxes, predicted_scores)
            image_class_indices = [
                index for index in image_class_indices
                if predicted_scores[index] != 0.0
            ]  # filter out placeholders
            image_indices.extend(image_class_indices)

        # only keep specified indices of annotations
        test_images_dict[image_id]['predicted_boxes']  = \
            [box for index, box in enumerate(image_info['predicted_boxes']) if index in image_indices]
        test_images_dict[image_id]['predicted_classes']  = \
            [category for index, category in enumerate(image_info['predicted_classes'])  if index in image_indices]
        test_images_dict[image_id]['predicted_scores'] = \
            [score for index, score in enumerate(image_info['predicted_scores']) if index in image_indices]

        # run non-maximum suppression per-window
        for window_id, window_info in image_info['windows'].items():
            window_indices = []  # the total list of indices to keep
            # for each class-category, run non-maximum suppression and add resulting indices to total list of indices
            for (predicted_boxes,
                 predicted_scores) in get_same_class_annotations(window_info):
                window_class_indices = nms.boxes(predicted_boxes,
                                                 predicted_scores)
                window_class_indices = [
                    index for index in window_class_indices
                    if predicted_scores[index] != 0.0
                ]  # filter out placeholders
                window_indices.extend(window_class_indices)

            test_images_dict[image_id]['windows'][window_id]['predicted_boxes'] = \
                [box for index, box in enumerate(window_info['predicted_boxes'])  if index in window_indices]
            test_images_dict[image_id]['windows'][window_id]['predicted_classes'] = \
                [category for index, category in enumerate(window_info['predicted_classes'])  if index in window_indices]
            test_images_dict[image_id]['windows'][window_id]['predicted_scores'] = \
                [score for index, score in enumerate(window_info['predicted_scores'])  if index in window_indices]

    return test_images_dict

コード例 #2

0

ファイルを表示

ファイル: darknet_video_double.py プロジェクト: renjiraND/darknet

def doubleDetect(darknet_image, darknet_image_long, frame_rgb):
    global netMain, metaMain
    detections = []
    nmsdet = []

    # Sequential Processing
    # run detections
    nmsdet += detect(netMain, metaMain, darknet_image, 0)
    detections += detect(netMain, metaMain, darknet_image_long, 1)

    # convert detections to numpy array
    if len(detections):
        dets = np.array(detections)
        # run non maximum supression to eliminate overlapping bounding boxes
        # returns indexes
        idx = nms.boxes(dets[:, :4], dets[:, -1:], nms_algorithm=fast.nms)
        # get best bounding boxes from nms
        for i in idx:
            nmsdet.append(detections[i])

    # draw all bounding boxes
    image = cvDrawBoxes(nmsdet, img=frame_rgb, color="g")
    # image = cvDrawBoxes(detections, img=frame_rgb, color="r")

    return image

コード例 #3

0

ファイルを表示

ファイル: OpenCVObjectDetector.py プロジェクト: MihaiTheCoder/OCR_FUN

    def get_boxes(self, image_path):
        image = cv2.imread(image_path)
        (origHeight, origWidth) = image.shape[:2]

        # set the new width and height and then determine the ratio in change
        # for both the width and height
        (newW, newH) = (self.width, self.height)
        ratioWidth = origWidth / float(newW)
        ratioHeight = origHeight / float(newH)

        # resize the image and grab the new image dimensions
        image = cv2.resize(image, (newW, newH))
        (imageHeight, imageWidth) = image.shape[:2]

        blob = cv2.dnn.blobFromImage(image, 1.0, (imageWidth, imageHeight), (123.68, 116.78, 103.94), swapRB=True,
                                     crop=False)

        start = time.time()
        self.net.setInput(blob)
        (scores, geometry) = self.model_downloader.forward(self.net)
        end = time.time()
        print(f"time for forward pass:{end-start}")
        (boxes, confidences, baggage) = decode(scores, geometry, self.min_confidence)

        functions = [nms.felzenszwalb.nms, nms.fast.nms, nms.malisiewicz.nms]

        indicies = nms.boxes(boxes, confidences, nms_function=functions[0], confidence_threshold=self.min_confidence,
                             nsm_threshold=self.min_nms_treshold)

        return [scale_box(boxes[i], ratioWidth, ratioHeight) for i in indicies]

コード例 #4

0

ファイルを表示

ファイル: eval_test.py プロジェクト: 4g/kuzushiji-ocr

def get_predictions(image):
    original_shape = image.shape
    image_mask_pred = cv2.resize(image, img_size)
    display.show(image_mask_pred, "image")
    image_mask_pred = np.expand_dims(image_mask_pred, axis=0) / 255.
    pred_masks = mask_cnn.predict(image_mask_pred, batch_size=8)
    boxes, box_scores = get_boxes(pred_masks[0:1], num_boxes, box_threshold)
    boxes = rescale_coords(boxes, original_shape, img_size)
    box_image = draw_boxes(image, boxes)
    display.show(box_image, "boxes")

    char_images = get_chars_from_image(image, boxes)
    char_prediction_scores = []
    if len(boxes) != 0:
        char_prediction_scores = char_cnn.predict(np.asarray(char_images))

    #display.show(join_images(char_images), "chars")

    cindices = [np.argmax(score) for score in char_prediction_scores]
    char_scores = [score[i] for i, score in zip(cindices, char_prediction_scores)]
    chars = [index2char[cindex] for cindex in cindices]
    lists = [chars, boxes, char_scores, char_images]
    chars, boxes, char_scores, char_images = filter_by_threshold(10.0/n_chars, char_scores, lists)
    nms_indices = nms.boxes(boxes, char_scores)
    chars, boxes, char_scores, char_images = filter_by_indices(nms_indices, lists)
    display.show(box_image, "missing boxes")
    return chars, boxes, char_scores, char_images

コード例 #5

0

ファイルを表示

ファイル: maskrcnn.py プロジェクト: dpsnewailab/adou

    def _analyze(self, img=None, *args, **kwargs):
        assert img is not None, ValueError('img can not be None')

        img = F.to_tensor(img)

        output = self.model([img])[0]
        for key, item in output.items():
            if self.device == 'cuda':
                item = item.cpu()
            output[key] = item.detach().numpy()

        boxes = [[x1, y1, x2 - x1, y2 - y1]
                 for x1, y1, x2, y2 in output['boxes']]
        scores = output['scores']
        rects = nms.boxes(rects=boxes, scores=scores, nms_threshold=0.25)
        output['boxes'] = [
            output['boxes'][id] for id in rects if output['scores'][id] > 0.5
        ]
        output['labels'] = [
            output['labels'][id] for id in rects if output['scores'][id] > 0.5
        ]
        output['scores'] = [
            output['scores'][id] for id in rects if output['scores'][id] > 0.5
        ]

        return output

コード例 #6

0

ファイルを表示

ファイル: detect.py プロジェクト: prantik1998/EAST

def get_boxes(score, geo, score_thresh=0.9, nms_thresh=0.2):
    '''get boxes from feature map
	Input:
		score       : score map from model <numpy.ndarray, (1,row,col)>
		geo         : geo map from model <numpy.ndarray, (5,row,col)>
		score_thresh: threshold to segment score map
		nms_thresh  : threshold in nms
	Output:
		boxes       : final polys <numpy.ndarray, (n,9)>
	'''
    score = score[0, :, :]
    xy_text = np.argwhere(score > score_thresh)  # n x 2, format is [r, c]
    if xy_text.size == 0:
        return None

    xy_text = xy_text[np.argsort(xy_text[:, 0])]
    valid_pos = xy_text[:, ::-1].copy()  # n x 2, [x, y]
    valid_geo = geo[:, xy_text[:, 0], xy_text[:, 1]]  # 5 x n
    polys_restored, index = restore_polys(valid_pos, valid_geo, score.shape)
    if polys_restored.size == 0:
        return None

    boxes = np.zeros((polys_restored.shape[0], 9), dtype=np.float32)
    boxes[:, :8] = polys_restored
    boxes[:, 8] = score[xy_text[index, 0], xy_text[index, 1]]
    rect = [[
        boxes[i, 0], boxes[i, 1],
        abs(boxes[i, 2] - boxes[i, 0]),
        abs(boxes[i, 3] - boxes[i, 1])
    ] for i in range(boxes.shape[0])]
    score = boxes[:, 8]
    ind = fast.boxes(rect, score)
    to_return = [boxes[i] for i in ind]
    return np.array(to_return)

コード例 #7

0

ファイルを表示

def non_maximum_suppression(json_file, save_json_file):
    from nms import nms

    data = json.load(open(json_file, "r"))

    annotations = dict()
    for annotation in data['annotations']:
        if annotation['image_id'] in annotations:
            annotations[annotation['image_id']] += [annotation]
        else:
            annotations[annotation['image_id']] = [annotation]

    image_ids = annotations.keys()

    new_annotations = list()
    for image_id in image_ids:
        boxes = [[int(x) for x in box["bbox"]]
                 for box in annotations[image_id]]
        scores = [box["score"] for box in annotations[image_id]]
        indices = nms.boxes(boxes, scores)
        new_annotations += [annotations[image_id][i] for i in indices]

    new_data = dict()
    new_data["images"] = data["images"]
    new_data["categories"] = data["categories"]
    new_data["annotations"] = new_annotations

    with open(save_json_file, "w") as outfile:
        json.dump(new_data, outfile)

コード例 #8

0

ファイルを表示

ファイル: yoloDetectorGPU.py プロジェクト: tomstrain92/SLAM

def run_inference(net, img):
	# run inference of an image - eventually I will have to change
	#print("running inference")
	show=False
	vehicle_classes = ['car', 'vehicle,', 'truck', 'bus', 'bike', 'motorbike']
	# not sure what this does
	img2 = Image(img)
	# running inference:
	start = timer()
	results = net.detect(img2)
	end = timer()

	results_labels = [x[0].decode("utf-8") for x in results]

	# finding vehicles
	vehicle_boxes =  np.empty((0,4), int)
	buffer = 0

	# running non max supression
	scores = [score for cat, score, bound in results]
	bounds = [bound for cat, score, bound in results]
	indices = boxes(bounds, scores)
	results = [results[ind] for ind in indices]

	# box buffer
	buffer = 10
	for cat, score, bounds in results:
		x, y, w, h = bounds # x,y are centre.
		if cat.decode("utf-8") in vehicle_classes:
			vehicle_boxes = np.append(vehicle_boxes,
								np.array([[int(x - w / 2) - buffer,
										   int(y - h / 2) - buffer,
								 		   int(w) + buffer, int(h) + buffer]]))
		# plotting results if you so wish
		if show:
			cv2.rectangle(img, (int(x - w / 2), int(y - h / 2)),
				(int(x + w / 2), int(y + h / 2)), (0, 0, 255), 5)

		#cv2.putText(img, str(cat.decode("utf-8")),(int(x),int(y)),
			#cv2.FONT_HERSHEY_COMPLEX,1,(255,255,0))
	if show:
		cv2.imshow("output", img)
		cv2.waitKey(0)

	#num_vehicles = int(len(vehicle_boxes)/4)
	#print("detection took {:f} seconds, {:d} vehicles found".format(end-start,num_vehicles))
	return vehicle_boxes

コード例 #9

0

ファイルを表示

ファイル: dog_counter.py プロジェクト: LaikaIztech/LaikaIztech

def count_doggo(img_path: str) -> int:
    face_cascade = cv2.CascadeClassifier(r'..\data\catface.xml')
    img = cv2.imread(img_path)
    grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    objs = []
    reject_levels = []
    level_weights = []
    objs, rej, scores = face_cascade.detectMultiScale3(grayscale,
                                                       outputRejectLevels=True)
    indices = boxes([[x, y, x + w, y + h] for (x, y, w, h) in objs], scores)
    detections = [objs[i] for i in indices]
    scores = [scores[i] for i in indices]
    i = 0
    for (x, y, w, h) in detections:
        cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 10)
        cv2.putText(img, f"{scores[i]}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX,
                    2, (36, 255, 12), 4)
        i += 1
    cv_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.imshow(cv_rgb)
    return len(detections)

コード例 #10

0

ファイルを表示

ファイル: YourFaceDetector.py プロジェクト: ankitapandit/masters_projects

def testing_images(img, file, output_clfs, output_alphas):
    classification = list()
    img1 = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    height, width = img1.shape
    box_height = 100
    box_width = 100
    while (box_height < height / 2 and box_width < width / 2):
        for x in range(0, height, 20):
            for y in range(0, width, 20):
                block = img1[x:x + box_height, y:y + box_width]
                window = cv2.resize(block, (19, 19))
                flag, total = detect_face(window, output_clfs, output_alphas)
                if flag == 1:
                    classification.append([(y, x),
                                           (y + block.shape[1],
                                            x + block.shape[0]), total])
        box_height += 20
        box_width += 20
    coord = list()
    scores = list()
    for res in classification:
        coord.append((res[0][0], res[0][1], res[1][0], res[1][1]))
        scores.append(res[2])
    output = nms.boxes(coord, scores)
    output = output[:2]
    for x in output:
        if classification[x][2] > 205:
            json_dict = dict()
            cv2.rectangle(img, classification[x][0], classification[x][1],
                          (255, 255, 0), 2)
            print(file)
            json_dict['iname'] = str(file)
            width = classification[x][1][0] - classification[x][0][0]
            height = classification[x][1][1] - classification[x][0][1]
            json_dict['bbox'] = [
                classification[x][0][0], classification[x][0][1], width, height
            ]
            # print(json_dict)
            json_list.append(json_dict)
    cv2.imwrite("output/" + file, img)

コード例 #11

0

ファイルを表示

def doubleDetect(darknet_image, darknet_image_crop, frame_rgb):
    global netMain, metaMain
    detections = []
    nmsdet = []
# Sequential processing
    detections += detect_sequential(netMain, metaMain, darknet_image, 0)
    detections += detect_sequential(netMain, metaMain, darknet_image_crop, 1)
    dets = np.array(detections)
    if len(dets) > 0:
        boxes = dets[:,:4]
        scores = dets[:,-1:]
        idx = nms.boxes(boxes, scores, nms_algorithm=nms.fast.nms)
        for i in idx:
            nmsdet.append(detections[i])
    # print("detection done in:",round(finish-start, 3),"s")

# Multiprocessing
    # q = Queue()
    # processes = []
    # p1 = Process(target=detect, args=(q, netMain, metaMain, darknet_image, 0))
    # p2 = Process(target=detect, args=(q, netMain, metaMain, darknet_image_crop, 1))
    # processes.append(p1)
    # processes.append(p2)
    # start = time.perf_counter()
    # for process in processes:
    #     process.start()

    # for process in processes:
    #     process.join()

    # finish = time.perf_counter()
    # print(round(finish-start,2))
    # while not q.empty():
    #     detections += (q.get())
#
    # image = cvDrawBoxes(dets, img=frame_rgb, color="r")
    image = cvDrawBoxes(nmsdet, img=frame_rgb, color="g")

    return image,np.array(dets)

コード例 #12

0

ファイルを表示

ファイル: demo.py プロジェクト: s-hijiri0311/M2Det

    out = net(img)
    boxes, scores = detector.forward(out, priors)
    boxes = (boxes[0] * scale).cpu().numpy()
    scores = scores[0].cpu().numpy()
    allboxes = []
    # j is class index
    for j in range(1, cfg.model.m2det_config.num_classes):
        inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0]
        if len(inds) == 0:
            continue
        c_bboxes = boxes[inds]
        c_scores = scores[inds, j]
        c_dets = np.hstack((c_bboxes, c_scores[:,
                                               np.newaxis])).astype(np.float32,
                                                                    copy=False)
        keep_idx = nms.boxes(c_bboxes, c_scores)
        keep_idx = keep_idx[:cfg.test_cfg.keep_per_class]
        c_dets = c_dets[keep_idx, :]
        allboxes.extend([c.tolist() + [j] for c in c_dets])

    loop_time = time.time() - loop_start
    allboxes = np.array(allboxes)
    boxes = allboxes[:, :4]
    scores = allboxes[:, 4]
    cls_inds = allboxes[:, 5]

    print('\n'.join(['pos:{}, ids:{}, score:{:.3f}'.format('(%.1f,%.1f,%.1f,%.1f)' % (o[0],o[1],o[2],o[3]) \
            ,labels[int(oo)],ooo) for o,oo,ooo in zip(boxes,cls_inds,scores)]))

    fps = 1.0 / float(loop_time) if cam >= 0 or video else -1
    im2show = draw_detection(image, boxes, scores, cls_inds, fps)

コード例 #13

0

ファイルを表示

ファイル: yolact.py プロジェクト: yxpandjay/ncnn

    def detect(self, conf_preds, loc_data, prior_data, mask_data, img_w,
               img_h):
        """ Perform nms for only the max scoring class that isn't background (class 0) """
        cur_scores = conf_preds[:, 1:]
        num_class = cur_scores.shape[1]

        classes = np.argmax(cur_scores, axis=1)
        conf_scores = cur_scores[range(cur_scores.shape[0]), classes]

        # filte by confidence_threshold
        keep = conf_scores > self.confidence_threshold
        conf_scores = conf_scores[keep]
        classes = classes[keep]
        loc_data = loc_data[keep, :]
        prior_data = prior_data[keep, :]
        masks = mask_data[keep, :]

        # decode x, y, w, h
        boxes = self.decode(loc_data, prior_data, img_w, img_h)

        # nms for every class
        boxes_result = []
        masks_result = []
        classes_result = []
        conf_scores_result = []
        for i in range(num_class):
            where = np.where(classes == i)
            if len(where) == 0:
                continue

            boxes_tmp = boxes[where]
            masks_tmp = masks[where]
            classes_tmp = classes[where]
            conf_scores_tmp = conf_scores[where]

            indexes = nms.boxes(
                boxes_tmp,
                conf_scores_tmp,
                nms_threshold=self.nms_threshold,
                score_threshold=self.confidence_threshold,
                top_k=self.keep_top_k,
            )

            for index in indexes:
                boxes_result.append(boxes_tmp[index])
                masks_result.append(masks_tmp[index])
                classes_result.append(classes_tmp[index] + 1)
                conf_scores_result.append(conf_scores_tmp[index])

        # keep top k
        if len(conf_scores_result) > self.keep_top_k:
            indexes = np.argsort(conf_scores_result)
            indexes = indexes[:self.keep_top_k]

            boxes_result = boxes_result[indexes]
            masks_result = masks_result[indexes]
            classes_result = classes_result[indexes]
            conf_scores_result = conf_scores_result[indexes]

        return (
            np.array(boxes_result),
            np.array(masks_result),
            np.array(classes_result),
            np.array(conf_scores_result),
        )

コード例 #14

0

ファイルを表示

def text_detection(image, east, min_confidence, width, height):
    # load the input image and grab the image dimensions
    image = cv2.imread(image)
    orig = image.copy()
    (origHeight, origWidth) = image.shape[:2]

    # set the new width and height and then determine the ratio in change
    # for both the width and height
    (newW, newH) = (width, height)
    ratioWidth = origWidth / float(newW)
    ratioHeight = origHeight / float(newH)

    # resize the image and grab the new image dimensions
    image = cv2.resize(image, (newW, newH))
    (imageHeight, imageWidth) = image.shape[:2]

    # define the two output layer names for the EAST detector model that
    # we are interested -- the first is the output probabilities and the
    # second can be used to derive the bounding box coordinates of text
    layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]

    # load the pre-trained EAST text detector
    print("[INFO] loading EAST text detector...")
    net = cv2.dnn.readNet(east)

    # construct a blob from the image and then perform a forward pass of
    # the model to obtain the two output layer sets
    blob = cv2.dnn.blobFromImage(image,
                                 1.0, (imageWidth, imageHeight),
                                 (123.68, 116.78, 103.94),
                                 swapRB=True,
                                 crop=False)

    start = time.time()
    net.setInput(blob)
    (scores, geometry) = net.forward(layerNames)
    end = time.time()

    # show timing information on text prediction
    print("[INFO] text detection took {:.6f} seconds".format(end - start))

    # NMS on the the unrotated rects
    confidenceThreshold = min_confidence
    nmsThreshold = 0.4

    # decode the blob info
    (rects, confidences, baggage) = decode(scores, geometry,
                                           confidenceThreshold)

    offsets = []
    thetas = []
    for b in baggage:
        offsets.append(b['offset'])
        thetas.append(b['angle'])

    ##########################################################

    functions = [nms.felzenszwalb.nms, nms.fast.nms, nms.malisiewicz.nms]

    print("[INFO] Running nms.boxes . . .")

    for i, function in enumerate(functions):

        start = time.time()
        indicies = nms.boxes(rects,
                             confidences,
                             nms_function=function,
                             confidence_threshold=confidenceThreshold,
                             nsm_threshold=nmsThreshold)
        end = time.time()

        indicies = np.array(indicies).reshape(-1)

        drawrects = np.array(rects)[indicies]

        name = function.__module__.split('.')[-1].title()
        print("[INFO] {} NMS took {:.6f} seconds and found {} boxes".format(
            name, end - start, len(drawrects)))

        drawOn = orig.copy()
        drawBoxes(drawOn, drawrects, ratioWidth, ratioHeight, (0, 255, 0), 2)

        title = "nms.boxes {}".format(name)
        cv2.imshow(title, drawOn)
        cv2.moveWindow(title, 150 + i * 300, 150)

    #cv2.waitKey(0)

    # convert rects to polys
    polygons = utils.rects2polys(rects, thetas, offsets, ratioWidth,
                                 ratioHeight)

    print("[INFO] Running nms.polygons . . .")

    for i, function in enumerate(functions):

        start = time.time()
        indicies = nms.polygons(polygons,
                                confidences,
                                nms_function=function,
                                confidence_threshold=confidenceThreshold,
                                nsm_threshold=nmsThreshold)
        end = time.time()

        indicies = np.array(indicies).reshape(-1)

        drawpolys = np.array(polygons)[indicies]

        name = function.__module__.split('.')[-1].title()

        print("[INFO] {} NMS took {:.6f} seconds and found {} boxes".format(
            name, end - start, len(drawpolys)))

        drawOn = orig.copy()
        drawPolygons(drawOn, drawpolys, ratioWidth, ratioHeight, (0, 255, 0),
                     2)

        title = "nms.polygons {}".format(name)
        cv2.imshow(title, drawOn)
        cv2.moveWindow(title, 150 + i * 300, 150)

    cv2.waitKey(0)

コード例 #15

0

ファイルを表示

ファイル: FaceDetector.py プロジェクト: rmihir96/Viola-Jones-Face-Detector

def main():
    #Training commented out:
    ##################################################################

    # clf, alpha = train(images, 2429, 4548)
    # print(clf)
    # print(alpha)

    # final = zip(clf, alpha)

    # pickle.dump(final, open('./final_classifier.pickle', 'wb'), protocol=2)

    #########################################
    args = parse_args()
    print("loading images...")
    imagePaths = (os.listdir(args.dir_path))

    with open(r"./final_classifier.pickle", "rb") as f:
        file = pickle.load(f)

    clf = []
    alpha = []
    for i, j in file:
        #     print(i,j)
        clf.append(i)
        alpha.append(j)

    #Weak Classifier List
    # clf = [([[(5, 18, 17, 3)], [(8, 18, 17, 3)]], -1050.0, 1), ([[(5, 13, 9, 6)], [(11, 13, 9, 6)]], -216.0, -1), ([[(5, 13, 10, 6)], [(11, 13, 10, 6)]], -365.0, 1), ([[(4, 13, 11, 5)], [(9, 13, 11, 5)]], -312.0, -1), ([[(8, 13, 10, 6)], [(14, 13, 10, 6)]], -367.0, -1), ([[(5, 14, 13, 5)], [(10, 14, 13, 5)]], -1042.0, 1), ([[(7, 17, 17, 3)], [(10, 17, 17, 3)]], -49.0, -1), ([[(7, 17, 11, 3)], [(10, 17, 11, 3)]], -127.0, 1), ([[(7, 16, 10, 3)], [(10, 16, 10, 3)]], -74.0, -1), ([[(7, 9, 4, 2)], [(9, 9, 4, 2)]], -21.0, 1)]

    #Alpha List
    # alpha = [6.575151471691274, 7.266783069631535, 9.342094529936636, 12.262314436203779, 9.807209897582606, 11.805431834748102, 11.677685482457234, 10.758513212206026, 14.769595937010674, 10.26293208110991]
    json_list = []

    print(len(imagePaths))
    for imagePath in imagePaths:
        print(imagePath)
        image = cv2.imread(os.path.join(args.dir_path, imagePath), 0)
        ogimg = cv2.imread(os.path.join(args.dir_path, imagePath))

        #     ogimg = cv2.imread(os.path.join(path, imagePath))
        img = image.copy()
        faces = list()
        scores = list()
        for i in range(0, len(ogimg), 19):
            for j in range(0, len(ogimg[0]), 19):
                t_img = (img[i:i + 19, j:j + 19])

                if t_img.shape[0] != 19 or t_img.shape[1] != 19:
                    t_img = cv2.resize(t_img, (19, 19))

                t, res = strong_classifier(t_img, alpha, clf)

                if res == 1:

                    faces.append((j, i, j + 60, i + 60))
                    scores.append(t)

        # print(len(faces))

        z = sorted(zip(faces, scores), key=lambda x: x[1], reverse=True)
        faces = [i[0] for i in z]
        scores = [i[1] for i in z]

        #     print((scores))
        indicies = nms.boxes(faces, scores)
        #     print(len(indicies))
        # print(indicies)
        final_faces = []
        final_scores = []
        for c, i in enumerate(indicies[:4]):
            face = faces[i]
            final_faces.append(
                [face[0], face[1], face[2] + 200, face[3] + 200])
            final_scores.append(scores[i])

            # print(faces[i], scores[i])

        index = nms.boxes(final_faces, final_scores)
        # print(index)
        for i in index:
            face = final_faces[i]
            if (face[0] < ogimg.shape[1] and face[2] + 50 < ogimg.shape[1]
                    and face[1] < ogimg.shape[0]
                    and face[3] + 50 < ogimg.shape[0]):
                cv2.rectangle(ogimg, (face[0], face[1]),
                              (face[2] - 100, face[3] - 100), (0, 255, 255), 2)
                element = {
                    "iname": imagePath,
                    "bbox": [face[0], face[1], face[2] - 100, face[3] - 100]
                }
                json_list.append(element)
        print(len(json_list))
    # cv2.imwrite(os.path.join(respath, (imagePath)), ogimg)

    # print(json_list)
    #the result json file name
    output_json = "results.json"
    #dump json_list to result.json
    with open(output_json, 'w') as f:
        json.dump(json_list, f)

コード例 #16

0

ファイルを表示

ファイル: evaluation.py プロジェクト: Sergigb/yolo-phoc-lstm

        for frame in range(num_frames):
            predicted_bboxes_frame = []
            predicted_scores = []
            for j in range(descriptors.shape[1]):
                if predictions[frame, j] > 0.5:
                    center_x, center_y, w, h, p, _ = descriptors[frame, j]
                    center_x *= width
                    w *= width
                    center_y *= height
                    h *= height
                    x = center_x - w / 2.
                    y = center_y - h / 2.
                    predicted_scores.append(predictions[frame, j])
                    predicted_bboxes_frame.append([x, y, x+w, y+h])

            indices = nms.boxes(predicted_bboxes_frame, predicted_scores)  # non maximal suppresion
            for index in indices:
                predicted_bboxes_word[frame].append(predicted_bboxes_frame[index])

        tracked_predictions_word = assign_ids(predicted_bboxes_word)
        for i, tracked_predictions_frame in enumerate(tracked_predictions_word):
            for tracked_prediction in tracked_predictions_frame:
                predicted_bboxes[i].append(list(tracked_prediction[0:4]))
                predicted_ids[i].append(int(tracked_prediction[4]))

    get_metrics(predicted_bboxes, predicted_ids, annotations_path, acc)
    mh = mm.metrics.create()

    try:
        summary = mh.compute_many(
            [acc],

コード例 #17

0

ファイルを表示

ファイル: text_detection.py プロジェクト: selfyu/License-Plates-Recognition

offsets = []
thetas = []
for b in baggage:
    offsets.append(b['offset'])
    thetas.append(b['angle'])

##########################################################

functions = [nms.felzenszwalb.nms, nms.fast.nms, nms.malisiewicz.nms]

print("[INFO] Running nms.boxes . . .")

for i, function in enumerate(functions):

    start = time.time()
    indicies = nms.boxes(rects, confidences, nms_function=function, confidence_threshold=confidenceThreshold,
                             nsm_threshold=nmsThreshold)
    end = time.time()

    indicies = np.array(indicies).reshape(-1)

    drawrects = np.array(rects)[indicies]

    name = function.__module__.split('.')[-1].title()
    print("[INFO] {} NMS took {:.6f} seconds and found {} boxes".format(name, end - start, len(drawrects)))

    drawOn = orig.copy()
    drawBoxes(drawOn, drawrects, ratioWidth, ratioHeight, (0, 255, 0), 2)

    ############## Lo mío ################
    print(drawrects, indicies, ratioHeight, ratioWidth)
    # drawrects contiene las coordenadas de las cajas de texto detectadas en formato (x, y, w, h)

コード例 #18

0

ファイルを表示

ファイル: eval_video.py プロジェクト: Sergigb/yolo-phoc-lstm

            scores = []
            for i in range(descriptors.shape[1]):
                if predictions[0, frame + 1, i] > 0.25:
                    center_x, center_y, w, h, _, _ = descriptors[frame + 1, i]
                    center_x *= inp.shape[1]
                    w *= inp.shape[1]
                    center_y *= inp.shape[0]
                    h *= inp.shape[0]
                    x = center_x - w / 2.
                    y = center_y - h / 2.
                    scores.append(predictions[0, frame + 1, i])
                    rectongles.append(np.array(
                        [x, y, x + w,
                         y + h]))  # sort requires x1, y1, x2, y2, score=1???

            indices = nms.boxes(rectongles, scores)  # non maximal suppression
            for index in indices:
                detections_word[frame].append(rectongles[index])
            frame += 1

        tracked_detections_word = assign_ids(
            detections_word)  # there might be id collisions
        for i, tracked_detections_frame in enumerate(tracked_detections_word):
            for tracked_detection in tracked_detections_frame:
                tracked_detections[i].append([tracked_detection, word])

    cap.set(cv2.CAP_PROP_POS_FRAMES, 1)
    fig, ax = plt.subplots(1, figsize=(15, 15))
    ret, inp = cap.read()
    frame = 0

コード例 #19

0

ファイルを表示

def cli(ctx, sink, opt_disk, opt_net, opt_conf_thresh, opt_nms_thresh):
    """Generates scene text ROIs (CV DNN)"""

    # ----------------------------------------------------------------
    # imports

    import os
    from os.path import join
    from pathlib import Path

    import click
    import cv2 as cv
    import numpy as np
    from nms import nms

    from vframe.utils import click_utils, file_utils, im_utils, logger_utils, dnn_utils
    from vframe.utils import scenetext_utils
    from vframe.models.metadata_item import ROIMetadataItem, ROIDetectResult
    from vframe.settings.paths import Paths
    from vframe.models.bbox import BBox

    # ----------------------------------------------------------------
    # init

    log = logger_utils.Logger.getLogger()

    metadata_type = types.Metadata.TEXT_ROI

    # initialize dnn
    if opt_net == types.SceneTextNet.EAST:
        # TODO externalize
        dnn_size = (320, 320)  # fixed
        dnn_mean_clr = (123.68, 116.78, 103.94)  # fixed
        dnn_scale = 1.0  # fixed
        dnn_layer_names = [
            "feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"
        ]
        fp_model = join(cfg.DIR_MODELS_TF, 'east',
                        'frozen_east_text_detection.pb')
        log.debug('fp_model: {}'.format(fp_model))
        net = cv.dnn.readNet(fp_model)
        #net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
        #net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)

    elif opt_net == types.SceneTextNet.DEEPSCENE:
        dnn_size = (320, 320)  # fixed
        fp_model = join(cfg.DIR_MODELS_CAFFE, 'deepscenetext',
                        "TextBoxes_icdar13.caffemodel")
        fp_prototxt = join(cfg.DIR_MODELS_CAFFE, 'deepscenetext',
                           'textbox.prototxt')
        net = cv.text.TextDetectorCNN_create(fp_prototxt, fp_model)

    # ----------------------------------------------------------------
    # process

    # iterate sink
    while True:

        chair_item = yield

        metadata = {}

        for frame_idx, frame in chair_item.keyframes.items():

            if opt_net == types.SceneTextNet.DEEPSCENE:
                # DeepScene scene text detector (opencv contrib)
                frame = im_utils.resize(frame,
                                        width=dnn_size[0],
                                        height=dnn_size[1])
                frame_dim = frame.shape[:2][::-1]
                rects, probs = net.detect(frame)
                det_results = []
                for r in range(np.shape(rects)[0]):
                    prob = float(probs[r])
                    if prob > opt_conf_thresh:
                        rect = BBox.from_xywh_dim(
                            *rects[r], frame_dim).as_xyxy()  # normalized
                        det_results.append(ROIDetectResult(prob, rect))

                metadata[frame_idx] = det_results

            elif types.SceneTextNet.EAST:
                # EAST scene text detector
                frame = im_utils.resize(frame,
                                        width=dnn_size[0],
                                        height=dnn_size[1])
                # frame = im_utils.resize(frame, width=dnn_size[0], he)
                frame_dim = frame.shape[:2][::-1]
                frame_dim = dnn_size

                # blob = cv.dnn.blobFromImage(frame, dnn_scale, dnn_size, dnn_mean_clr, swapRB=True, crop=False)
                blob = cv.dnn.blobFromImage(frame,
                                            dnn_scale,
                                            dnn_size,
                                            dnn_mean_clr,
                                            swapRB=True,
                                            crop=False)
                net.setInput(blob)
                (scores, geometry) = net.forward(dnn_layer_names)
                (rects, confidences,
                 baggage) = scenetext_utils.east_text_decode(
                     scores, geometry, opt_conf_thresh)

                det_results = []
                if rects:
                    offsets = []
                    thetas = []
                    for b in baggage:
                        offsets.append(b['offset'])
                        thetas.append(b['angle'])

                    # functions = [nms.felzenszwalb.nms, nms.fast.nms, nms.malisiewicz.nms]
                    indicies = nms.boxes(rects,
                                         confidences,
                                         nms_function=nms.fast.nms,
                                         confidence_threshold=opt_conf_thresh,
                                         nsm_threshold=opt_nms_thresh)

                    indicies = np.array(indicies).reshape(-1)
                    rects = np.array(rects)[indicies]
                    scores = np.array(confidences)[indicies]
                    for rect, score in zip(rects, scores):
                        rect = BBox.from_xywh_dim(
                            *rect, frame_dim).as_xyxy()  # normalized
                        det_results.append(ROIDetectResult(score, rect))

                metadata[frame_idx] = det_results

        # append metadata to chair_item's mapping item
        chair_item.set_metadata(metadata_type, ROIMetadataItem(metadata))

        # ----------------------------------------------------------------
        # yield back to the processor pipeline

        # send back to generator
        sink.send(chair_item)

コード例 #20

0

ファイルを表示

        cv2.namedWindow(seq_name, cv2.WINDOW_NORMAL)

    # init(reset) the identifer
    id_num = 0

    # tracking process in each frame
    for nn, im_path in enumerate(images):
        frame = nn + 1
        img = cv2.imread(im_path)
        print('Frame {} is loaded'.format(frame))

        # load the detection results of this frame
        pre_frame_det_results = det_results[det_results[:, 0] == frame]

        # non-maximal surpressing [frame, id, x, y, w, h, score]
        indices = nms.boxes(pre_frame_det_results[:, 2:6],
                            pre_frame_det_results[:, 6])
        frame_det_results = pre_frame_det_results[indices, :]

        # extract the bbox [fr, id, (x, y, w, h), score]
        bboxes = frame_det_results[:, 2:6]

        ############################################
        # ***multiple tracking and associating***  #
        ############################################

        # 1. sort trackers
        index1, index2 = sort_trackers(trackers)

        # 2. save the processed index of trackers
        index_processed = []
        for k in range(2):

コード例 #21

0

ファイルを表示

ファイル: nms_test.py プロジェクト: pengge/SWDT

def main():
	np.random.seed(37)

	max_x, max_y = 10000, 10000
	num_boxes = 100000

	#iou_threshold, score_threshold = 0.5, float('-inf')
	iou_threshold, score_threshold = 0.5, 0.001
	max_output_size = num_boxes

	#--------------------
	print('Start generating data...')
	start_time = time.time()
	boxes = list()
	for _ in range(num_boxes):
		xs = np.random.randint(max_x, size=2)
		ys = np.random.randint(max_y, size=2)
		boxes.append([np.min(xs), np.min(ys), np.max(xs), np.max(ys)])
	boxes = np.array(boxes)
	scores = np.random.rand(num_boxes)
	boxes_scores = np.hstack([boxes, scores.reshape([-1, 1])]).astype(np.float32)
	#boxes2 = np.zeros((num_boxes,), dtype=py_parallel_nms_cpu.PyBox)
	#for idx in range(num_boxes):
	#	boxes2[idx] = py_parallel_nms_cpu.PyBox(*boxes_scores[idx,:])
	print('\tElapsed time = {}'.format(time.time() - start_time))
	print('End generating data.')

	"""
	#--------------------
	# REF [site] >> https://github.com/rbgirshick/py-faster-rcnn
	#	Too slow.
	print('Start py_cpu_nms...')
	start_time = time.time()
	selected_indices = py_cpu_nms.py_cpu_nms(boxes_scores, iou_threshold)
	print('\tElapsed time = {}'.format(time.time() - start_time))
	print('\t#selected boxes =', len(selected_indices))
	#print('\tSelected indices =', selected_indices)
	print('End py_cpu_nms.')

	#--------------------
	# REF [site] >> https://github.com/rbgirshick/py-faster-rcnn
	#	Too slow.
	print('Start cpu_nms...')
	start_time = time.time()
	selected_indices = cpu_nms.cpu_nms(boxes_scores, iou_threshold)
	print('\tElapsed time = {}'.format(time.time() - start_time))
	print('\t#selected boxes =', len(selected_indices))
	#print('\tSelected indices =', selected_indices)
	print('End cpu_nms.')
	"""

	#--------------------
	# REF [site] >> https://github.com/rbgirshick/py-faster-rcnn
	print('Start gpu_nms...')
	start_time = time.time()
	selected_indices = gpu_nms.gpu_nms(boxes_scores, iou_threshold)
	print('\tElapsed time = {}'.format(time.time() - start_time))
	print('\t#selected boxes =', len(selected_indices))
	#print('\tSelected indices =', selected_indices)
	print('End gpu_nms.')

	"""
	#--------------------
	# REF [site] >> https://github.com/jeetkanjani7/Parallel_NMS
	#	Not good implementation.
	#	Too slow.
	print('Start parallel_nms_cpu...')
	start_time = time.time()
	is_kept_list = py_parallel_nms_cpu.py_parallel_nms_cpu(boxes_scores, iou_threshold)
	print('\tElapsed time = {}'.format(time.time() - start_time))
	print('\t#selected boxes =', np.count_nonzero(is_kept_list))
	#print('\tSelected indices =', np.nonzero(is_kept_list))
	print('End parallel_nms_cpu.')

	#--------------------
	# REF [site] >> https://github.com/jeetkanjani7/Parallel_NMS
	#	Not good implementation.
	print('Start parallel_nms_gpu...')
	start_time = time.time()
	is_kept_list = py_parallel_nms_gpu.py_parallel_nms_gpu(boxes_scores, iou_threshold)
	print('\tElapsed time = {}'.format(time.time() - start_time))
	print('\t#selected boxes =', np.count_nonzero(is_kept_list))
	#print('\tSelected indices =', np.nonzero(is_kept_list))
	print('End parallel_nms_gpu.')
	"""

	#--------------------
	# REF [site] >>
	#	https://bitbucket.org/tomhoag/nms
	#	https://nms.readthedocs.io/en/latest/index.html
	print('Start nms...')
	nms_function = nms.fast.nms
	#nms_function = nms.felzenszwalb.nms
	#nms_function = nms.malisiewicz.nms
	start_time = time.time()
	selected_indices = nms.boxes(boxes, scores, score_threshold=score_threshold, nms_threshold=iou_threshold, nms_function=nms_function)
	print('\tElapsed time = {}'.format(time.time() - start_time))
	print('\t#selected boxes =', len(selected_indices))
	#print('\tSelected indices =', selected_indices)
	print('End nms.')

	#--------------------
	# REF [site] >> https://docs.opencv.org/3.4/d6/d0f/group__dnn.html
	print('Start opencv...')
	start_time = time.time()
	selected_indices = cv2.dnn.NMSBoxes(boxes.tolist(), scores, score_threshold=score_threshold, nms_threshold=iou_threshold)
	print('\tElapsed time = {}'.format(time.time() - start_time))
	print('\t#selected boxes =', len(selected_indices))
	#print('\tSelected indices =', selected_indices.ravel().tolist())
	print('End opencv.')

	#--------------------
	# REF [site] >> https://www.tensorflow.org/api_docs/python/tf/image/non_max_suppression
	#	Slow, not as expected.
	print('Start tf...')
	with tf.Session() as sess:
		# For CUDA context initialization.
		#A = tf.constant([1])
		#A.eval(session=sess)
		#sess.run(tf.global_variables_initializer())

		tf_nms = tf.image.non_max_suppression(boxes, scores, max_output_size, iou_threshold=iou_threshold, score_threshold=score_threshold)
		start_time = time.time()
		selected_indices = tf_nms.eval(session=sess)
		print('\tElapsed time = {}'.format(time.time() - start_time))
		print('\t#selected boxes =', len(selected_indices))
		#print('\tSelected indices =', selected_indices)
	print('End tf.')

コード例 #22

0

ファイルを表示

ファイル: skewer.py プロジェクト: fatihsucu/python-skew-correction

    def detect_text(self, min_confidence=0.01, width=320, height=320):
        """
        Detects text fields according to EAST algoritm. Lookup east dataset and marks founded text fields.
        Width and Height should be multiple of 32. It is predefined condition for opencv (dnn) operations.
        """
        (origHeight, origWidth) = self.image.shape[:2]

        (newW, newH) = (width, height)
        ratioWidth = origWidth / float(newW)
        ratioHeight = origHeight / float(newH)

        self.image = cv2.resize(self.image, (newW, newH))
        (imageHeight, imageWidth) = self.image.shape[:2]

        layerNames = [
            "feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"
        ]
        try:
            path = os.path.abspath(os.path.dirname(__file__))
            net = cv2.dnn.readNet(path +
                                  '/resources/frozen_east_text_detection.pb')
        except FileNotFoundError:
            raise ResourceNotFound(
                """Frozen East Text Detector couldn't find in resources.
                For download to resource:
                >>> from skew_correction.data import download
                >>> download()
                
                or 
                
                $ python -c 'from skew_correction.data import download; download();'
                """)

        blob = cv2.dnn.blobFromImage(self.image,
                                     1.0, (imageWidth, imageHeight),
                                     (123.68, 116.78, 103.94),
                                     swapRB=True,
                                     crop=False)

        net.setInput(blob)
        (scores, geometry) = net.forward(layerNames)

        confidenceThreshold = min_confidence
        nmsThreshold = 0.4
        (rects, confidences, baggage) = decode(scores, geometry,
                                               confidenceThreshold)

        offsets = []
        thetas = []
        for b in baggage:
            offsets.append(b['offset'])
            thetas.append(b['angle'])

        indicies = nms.boxes(rects,
                             confidences,
                             nms_function=nms.malisiewicz.nms,
                             confidence_threshold=confidenceThreshold,
                             nsm_threshold=nmsThreshold)

        indicies = np.array(indicies).reshape(-1)

        drawrects = np.array(rects)[indicies]

        self.draw_boxes(self.drawOn, drawrects, ratioWidth, ratioHeight,
                        (0, 0, 0), 2)

コード例 #23

0

ファイルを表示

def get_cropped_image(image,
                      east='frozen_east_text_detection.pb',
                      min_confidence=0.5,
                      width=320,
                      height=320):

    log('[LM] Cropping Begun:')

    if TESTING:
        cv.imshow('INPUT', image)
        cv.waitKey(0)

    image = normalizeImageSize(image)
    (h, w) = image.shape[:2]
    orig = image.copy()

    # Constants
    confidenceThreshold = min_confidence
    nmsThreshold = 0.4
    tb_padding = 0.1
    bb_padding = [0.005, 0.05]
    theta_thresh = 2
    m = 3

    log('[LM] Get scores 1')

    image = normalizeImageSize(image)

    (rects, confidences, baggage) = get_scores(image, east)

    log('[LM] Get scores 1 END')

    offsets = []
    thetas = []
    for b in baggage:
        offsets.append(b['offset'])
        thetas.append(b['angle'])

    incl = np.array(thetas)

    log('[DATA] Initital mean: ', degrees(np.mean(incl)))

    if incl.size == 0:
        log('[ERROR] No theta found in included', thetas)
        return None

    theta, std = np.mean(incl), np.std(incl)
    incl = incl[abs(incl - theta) / std <= m]
    theta = np.mean(incl)

    if degrees(theta) < theta_thresh:
        theta = 0

    log('[DATA] Updated Theta:', degrees(theta))
    log('[DATA] Inclinations and theta: ')
    log('THETAS:', len(np.array(thetas)))
    log('INCL: ', len(incl))

    res = rotate_image(orig.copy(), theta)

    if TESTING:
        cv.imshow('TEST', res)
        cv.waitKey(0)

    drawOn = orig.copy()

    # TODO: Delete this
    function = nms.felzenszwalb.nms
    indicies = nms.boxes(rects,
                         confidences,
                         nms_function=function,
                         confidence_threshold=confidenceThreshold,
                         nsm_threshold=nmsThreshold)

    drawrects = np.array(rects)[indicies]

    drawBoxes(drawOn, drawrects, 1, 1, (0, 255, 0), 2)

    if TESTING:
        cv.imshow('DRAW', drawOn)
        cv.waitKey(0)

    log('[LM] Get scores 2')

    res = normalizeImageSize(res)

    (rects, confidences, baggage) = get_scores(res, east)

    log('[LM] Get scores 2 END')

    function = nms.felzenszwalb.nms
    start = time.time()
    indicies = nms.boxes(rects,
                         confidences,
                         nms_function=function,
                         confidence_threshold=confidenceThreshold,
                         nsm_threshold=nmsThreshold)
    end = time.time()

    indicies = np.array(indicies).reshape(-1)
    drawrects = np.array(rects)[indicies]
    name = function.__module__.split('.')[-1].title()
    log("[INFO] {} NMS took {:.6f} seconds and found {} boxes".format(
        name, end - start, len(drawrects)))

    drawOn = res.copy()
    drawBoxes(drawOn, drawrects, 1, 1, (0, 255, 0), 2)

    if TESTING:
        cv.imshow('DRAWON AFTER ROTATION', drawOn)
        cv.waitKey(0)

    h, w = res.shape[:2]

    uLim = lambda x, m: x if x < m else m
    lLim = lambda x: x if x > 0 else 0

    drawrects = removeExtremes(drawrects)

    bb_coords = [
        int(min(drawrects[:, 0])),
        int(min(drawrects[:, 1])),
        int(max(drawrects[:, 0] + (1 + tb_padding) * drawrects[:, 2])),
        int(max(drawrects[:, 1] + (1 + tb_padding) * drawrects[:, 3]))
    ]
    bb_w = bb_coords[3] - bb_coords[1]
    bb_h = bb_coords[2] - bb_coords[0]

    text_box = res[lLim(bb_coords[1] -
                        int(h * bb_padding[1] /
                            2)):uLim(bb_coords[3] +
                                     int(h * bb_padding[1] / 2), h),
                   lLim(bb_coords[0] -
                        int(w * bb_padding[0] /
                            2)):uLim(bb_coords[2] +
                                     int(w * bb_padding[0] / 2), w)]

    if text_box.size == 0:
        log('[ERROR] Textbox not found')
        return None

    if TESTING:
        cv.imshow("Final Cropping", text_box)
        cv.waitKey(0)

    return text_box

コード例 #24

0

ファイルを表示

ファイル: single_demo.py プロジェクト: YinqiangZhang/MOT_MUST2

def main(args):
    seq_name = args.seq_name
    # the packages of trackers
    from pysot.core.config import cfg  # use the modified config file to reset the tracking system
    from pysot.models.model_builder import ModelBuilder
    # modified single tracker with warpper
    from mot_zj.MUST_sot_builder import build_tracker
    from mot_zj.MUST_utils import draw_bboxes, find_candidate_detection, handle_conflicting_trackers, sort_trackers
    from mot_zj.MUST_ASSO.MUST_asso_model import AssociationModel
    from mot_zj.MUST_utils import traj_interpolate

    dataset_dir = os.path.join(root, 'result')
    seq_type = 'img'
    # set the path of config parameters and
    config_path = os.path.join(track_dir, "mot_zj", "MUST_config_file",
                               "alex_config.yaml")
    model_params = os.path.join(params_dir, "alex_model.pth")
    # enable the visualisation or not
    is_visualisation = False
    # print the information of the tracking process or not
    is_print = True

    results_dir = os.path.join(dataset_dir, 'track')
    if not os.path.exists(results_dir):
        os.makedirs(results_dir)
    img_traj_dir = os.path.join(track_dir, "img_traj")
    if os.path.exists(os.path.join(img_traj_dir, seq_name)):
        shutil.rmtree(os.path.join(img_traj_dir, seq_name))

    seq_dir = os.path.join(dataset_dir, seq_type)
    seq_names = os.listdir(seq_dir)
    seq_num = len(seq_names)

    # record the processing time
    start_point = time.time()

    # load config
    # load the config information from other variables
    cfg.merge_from_file(config_path)

    # set the flag that CUDA is available
    cfg.CUDA = torch.cuda.is_available()
    device = torch.device('cuda' if cfg.CUDA else 'cpu')

    # create the tracker model (Resnet50)
    track_model = ModelBuilder()
    # load tracker model
    track_model.load_state_dict(
        torch.load(model_params,
                   map_location=lambda storage, loc: storage.cpu()))
    track_model.eval().to(device)
    # create assoiation model
    asso_model = AssociationModel(args)

    seq_det_path = os.path.join(seq_dir, seq_name, 'det')
    seq_img_path = os.path.join(seq_dir, seq_name, 'img1')

    # print path and dataset information
    if is_print:
        print('preparing for the sequence: {}'.format(seq_name))
        print('-----------------------------------------------')
        print("detection result path: {}".format(seq_det_path))
        print("image files path: {}".format(seq_img_path))
        print('-----------------------------------------------')

    # read the detection results
    det_results = np.loadtxt(os.path.join(seq_det_path, 'det.txt'),
                             dtype=float,
                             delimiter=',')

    # read images from each sequence
    images = sorted(glob.glob(os.path.join(seq_img_path, '*.jpg')))
    img_num = len(images)

    # the contrainer of trackers
    trackers = []

    # visualisation settings
    if is_visualisation:
        cv2.namedWindow(seq_name, cv2.WINDOW_NORMAL)

    # init(reset) the identifer
    id_num = 0

    # tracking process in each frame
    for nn, im_path in enumerate(images):
        each_start = time.time()
        frame = nn + 1
        img = cv2.imread(im_path)
        print('Frame {} is loaded'.format(frame))

        # load the detection results of this frame
        pre_frame_det_results = det_results[det_results[:, 0] == frame]

        # non-maximal surpressing [frame, id, x, y, w, h, score]
        indices = nms.boxes(pre_frame_det_results[:, 2:6],
                            pre_frame_det_results[:, 6])
        frame_det_results = pre_frame_det_results[indices, :]

        # extract the bbox [fr, id, (x, y, w, h), score]
        bboxes = frame_det_results[:, 2:6]

        ############################################
        # ***multiple tracking and associating***  #
        ############################################

        # 1. sort trackers
        index1, index2 = sort_trackers(trackers)

        # 2. save the processed index of trackers
        index_processed = []
        track_time = 0
        asso_time = 0
        for k in range(2):
            # process trackers in the first or the second class
            if k == 0:
                index_track = index1
            else:
                index_track = index2
            track_start = time.time()
            for ind in index_track:
                if trackers[ind].track_state == cfg.STATE.TRACKED or trackers[
                        ind].track_state == cfg.STATE.ACTIVATED:
                    indices = find_candidate_detection(
                        [trackers[i] for i in index_processed], bboxes)
                    to_track_bboxes = bboxes[
                        indices, :] if not bboxes.size == 0 else np.array([])
                    # MOT_track(tracking process)
                    trackers[ind].track(img, to_track_bboxes, frame)
                    # if the tracker keep its previous tracking state (tracked or activated)
                    if trackers[
                            ind].track_state == cfg.STATE.TRACKED or trackers[
                                ind].track_state == cfg.STATE.ACTIVATED:
                        index_processed.append(ind)
            track_time += time.time() - track_start
            asso_start = time.time()
            for ind in index_track:
                if trackers[ind].track_state == cfg.STATE.LOST:
                    indices = find_candidate_detection(
                        [trackers[i] for i in index_processed], bboxes)
                    to_associate_bboxes = bboxes[
                        indices, :] if not bboxes.size == 0 else np.array([])
                    # MOT_track(association process)
                    trackers[ind].track(img, to_track_bboxes, frame)
                    # add process flag
                    index_processed.append(ind)
            asso_time += time.time() - asso_start
        ############################################
        #        ***init new trackers ***          #
        ############################################

        # find the candidate bboxes to init new trackers
        indices = find_candidate_detection(trackers, bboxes)

        # process the tracker: init (1st frame) and track mathod (the other frames)
        for index in indices:
            id_num += 1
            new_tracker = build_tracker(track_model)
            new_tracker.init(img, bboxes[index, :], id_num, frame, seq_name,
                             asso_model)
            trackers.append(new_tracker)

        # find conflict of trackers (I need to know what conflict)
        trackers = handle_conflicting_trackers(trackers, bboxes)

        # interpolate the tracklet results
        for tracker in trackers:
            if tracker.track_state == cfg.STATE.TRACKED or tracker.track_state == cfg.STATE.ACTIVATED:
                bbox = tracker.tracking_bboxes[-1, :]
                traj_interpolate(tracker, bbox, tracker.frames[-1], 30)

        ############################################
        #    ***collect tracking results***        #
        ############################################

        # collect the tracking results (all the results, without selected)
        if frame == len(images):
            results_bboxes = np.array([])
            for tracker in trackers:
                if results_bboxes.size == 0:
                    results_bboxes = tracker.results_return()
                else:
                    res = tracker.results_return()
                    if not res.size == 0:
                        results_bboxes = np.concatenate(
                            (results_bboxes, tracker.results_return()), axis=0)
            # test code segment
            filename = '{}.txt'.format(seq_name)
            results_bboxes = results_bboxes[np.argsort(results_bboxes[:, 0])]
            print(results_bboxes.shape[0])
            # detections filter
            indices = []
            if seq_name == 'b1':
                for ind, result in enumerate(results_bboxes):
                    if result[3] > 540:
                        if result[4] * result[5] < 10000:
                            indices.append(ind)
                results_bboxes = np.delete(results_bboxes, indices, axis=0)
            np.savetxt(os.path.join(results_dir, filename),
                       results_bboxes,
                       fmt='%d,%d,%.1f,%.1f,%.1f,%.1f')
        ############################################
        #        ***crop tracklet image***         #
        ############################################

        for tracker in trackers:
            if tracker.track_state == cfg.STATE.START or tracker.track_state == cfg.STATE.TRACKED or tracker.track_state == cfg.STATE.ACTIVATED:
                bbox = tracker.tracking_bboxes[-1, :]
                x1 = int(np.floor(np.maximum(1, bbox[0])))
                y1 = int(np.ceil(np.maximum(1, bbox[1])))
                x2 = int(np.ceil(np.minimum(img.shape[1], bbox[0] + bbox[2])))
                y2 = int(np.ceil(np.minimum(img.shape[0], bbox[1] + bbox[3])))
                img_traj = img[y1:y2, x1:x2, :]
                traj_path = os.path.join(img_traj_dir, seq_name,
                                         str(tracker.id_num))
                if not os.path.exists(traj_path):
                    os.makedirs(traj_path)
                tracklet_img_path = os.path.join(traj_path,
                                                 str(tracker.frames[-1]))
                cv2.imwrite("{}.jpg".format(tracklet_img_path), img_traj)
        each_time = time.time() - each_start
        print("period: {}s, track: {}s({:.2f}), asso: {}s({:.2f})".format(
            each_time, track_time, (track_time / each_time) * 100, asso_time,
            (asso_time / each_time) * 100))
        if is_visualisation:
            ##########################################
            # infomation print and visualisation     #
            ##########################################
            # print("THe numger of new trackers: {}".format(len(indices)))
            active_trackers = [
                trackers[i].id_num for i in range(len(trackers))
                if trackers[i].track_state == cfg.STATE.ACTIVATED
                or trackers[i].track_state == cfg.STATE.TRACKED
                or trackers[i].track_state == cfg.STATE.LOST
            ]
            print("The number of active trackers: {}".format(
                len(active_trackers)))
            print(active_trackers)
            anno_img = draw_bboxes(img, bboxes)
            cv2.imshow(seq_name, anno_img)
            cv2.waitKey(1)
        print("The running time is: {} s".format(time.time() - start_point))

    print("The total processing time is: {} s".format(time.time() -
                                                      start_point))

コード例 #25

0

ファイルを表示

ファイル: maskrcnn.py プロジェクト: dpsnewailab/adou

    def batch_analyze(self, images=None, *args, **kwargs):
        """
        Analyze for a batch of images
        :param images:
        :return:
        """
        assert images is not None, ValueError('images can not be None')

        with torch.no_grad():
            if self.device == 'cuda':
                torch.cuda.synchronize()

            _images = []
            for image in images:
                _images.append(F.to_tensor(image).to(self.device))
                del image

            l_images = images.__len__()
            del images

            output = self.model(_images)
            _images = []
            del _images

            if 'use_listmemmap' in kwargs:
                f_out = ListMemMap()
            else:
                f_out = List()

            for id in range(output.__len__()):
                for key, item in output[id].items():
                    if self.device == 'cuda':
                        item = item.cpu()
                    output[id][key] = item.detach().numpy()
                del item

                boxes = [[x1, y1, x2 - x1, y2 - y1]
                         for x1, y1, x2, y2 in output[id]['boxes']]
                scores = output[id]['scores']
                rects = nms.boxes(rects=boxes,
                                  scores=scores,
                                  nms_threshold=0.25)

                tmp = list()
                tmp.append([
                    output[id]['boxes'][idx] for idx in rects
                    if output[id]['scores'][idx] > 0.5
                ])
                tmp.append([
                    output[id]['labels'][idx] for idx in rects
                    if output[id]['scores'][idx] > 0.5
                ])
                tmp.append([
                    output[id]['scores'][idx] for idx in rects
                    if output[id]['scores'][idx] > 0.5
                ])

                f_out.append(tmp)
                del tmp

            del output, l_images, boxes, scores, rects
            if self.device == 'cuda':
                torch.cuda.empty_cache()

            gc.collect()
            return f_out

コード例 #26

0

ファイルを表示

ファイル: main.py プロジェクト: zeljko94/text-detection-scene-image

def text_detection(image, east, min_confidence, width, height):
    image = cv2.imread(image)
    orig = image.copy()
    (origHeight, origWidth) = image.shape[:2]

    (newW, newH) = (width, height)
    ratioWidth = origWidth / float(newW)
    ratioHeight = origHeight / float(newH)

    image = cv2.resize(image, (newW, newH))
    (imageHeight, imageWidth) = image.shape[:2]

    layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]

    # original slika
    cv2.imshow("Original", orig)

    print("[INFO] Učitavanje EAST text detektora...")
    net = cv2.dnn.readNet(east)

    blob = cv2.dnn.blobFromImage(image,
                                 1.0, (imageWidth, imageHeight),
                                 (123.68, 116.78, 103.94),
                                 swapRB=True,
                                 crop=False)

    start = time.time()
    net.setInput(blob)
    (scores, geometry) = net.forward(layerNames)
    end = time.time()

    print("[INFO] Trajanje detekcije: {:.6f} sekundi".format(end - start))

    confidenceThreshold = min_confidence
    nmsThreshold = 0.4

    (rects, confidences, baggage) = decode(scores, geometry,
                                           confidenceThreshold)

    offsets = []
    thetas = []
    for b in baggage:
        offsets.append(b['offset'])
        thetas.append(b['angle'])

    functions = [nms.felzenszwalb.nms, nms.fast.nms, nms.malisiewicz.nms]

    #print("[INFO] Running nms.boxes . . .")

    for i, function in enumerate(functions):

        start = time.time()
        indicies = nms.boxes(rects,
                             confidences,
                             nms_function=function,
                             confidence_threshold=confidenceThreshold,
                             nsm_threshold=nmsThreshold)
        end = time.time()

        indicies = np.array(indicies).reshape(-1)

        drawrects = np.array(rects)[indicies]

        name = function.__module__.split('.')[-1].title()
        print(
            "[INFO] Trajanje izvođenja {} metode:  {:.6f} seconds i prondađeno je {} okvira"
            .format(name, end - start, len(drawrects)))

        drawOn = orig.copy()
        drawBoxes(drawOn, drawrects, ratioWidth, ratioHeight, (0, 255, 0), 2)

        title = "nms.boxes {}".format(name)
        cv2.imshow(title, drawOn)
        cv2.moveWindow(title, 150 + i * 300, 350)

    cv2.waitKey(0)
    """

コード例 #27

0

ファイルを表示

ファイル: yolov5.py プロジェクト: zzzzt634/ncnn

def non_max_suppression(prediction,
                        conf_thres=0.1,
                        iou_thres=0.6,
                        merge=False,
                        classes=None,
                        agnostic=False):
    """Performs Non-Maximum Suppression (NMS) on inference results

    Returns:
        detections with shape: nx6 (x1, y1, x2, y2, conf, cls)
    """
    nc = prediction[0].shape[1] - 5  # number of classes
    xc = prediction[..., 4] > conf_thres  # candidates

    # Settings
    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
    max_det = 300  # maximum number of detections per image
    time_limit = 10.0  # seconds to quit after
    redundant = True  # require redundant detections
    multi_label = nc > 1  # multiple labels per box (adds 0.5ms/img)

    t = time.time()
    output = [None] * prediction.shape[0]
    for xi, x in enumerate(prediction):  # image index, image inference
        # Apply constraints
        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
        x = x[xc[xi]]  # confidence

        # If none remain process next image
        if not x.shape[0]:
            continue

        # Compute conf
        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf

        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
        box = xywh2xyxy(x[:, :4])

        # Detections matrix nx6 (xyxy, conf, cls)
        if multi_label:
            i, j = (x[:, 5:] > conf_thres).nonzero()
            x = np.concatenate(
                (box[i], x[i, j + 5, None], j[:, None].astype(np.float32)),
                axis=1)
        else:  # best class only
            conf, j = x[:, 5:].max(1, keepdim=True)
            x = np.concatenate((box, conf, j.float()),
                               axis=1)[conf.view(-1) > conf_thres]

        # Filter by class
        if classes:
            x = x[(x[:, 5:6] == np.array(classes)).any(1)]

        # Apply finite constraint
        # if not torch.isfinite(x).all():
        #     x = x[torch.isfinite(x).all(1)]

        # If none remain process next image
        n = x.shape[0]  # number of boxes
        if not n:
            continue

        # Sort by confidence
        # x = x[x[:, 4].argsort(descending=True)]

        # Batched NMS
        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
        boxes, scores = x[:, :4] + c, x[:,
                                        4]  # boxes (offset by class), scores
        boxes = xyxy2xywh(boxes)
        i = nms.boxes(boxes, scores, nms_threshold=iou_thres)
        if len(i) > max_det:  # limit detections
            i = i[:max_det]
        if merge and (1 < n <
                      3e3):  # Merge NMS (boxes merged using weighted mean)
            try:  # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
                iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
                weights = iou * scores[None]  # box weights
                x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(
                    1, keepdim=True)  # merged boxes
                if redundant:
                    i = i[iou.sum(1) > 1]  # require redundancy
            except:  # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139
                print(x, i, x.shape, i.shape)
                pass

        output[xi] = x[i]
        if (time.time() - t) > time_limit:
            break  # time limit exceeded

    return output

コード例 #28

0

ファイルを表示

def text_detect(image, min_confidence, width, height):
    # load the input image and grab the image dimensions
    image = cv2.imread(image)
    orig = image.copy()
    (origHeight, origWidth) = image.shape[:2]

    # set the new width and height and then determine the ratio in change
    # for both the width and height
    (newW, newH) = (width, height)
    ratioWidth = origWidth / float(newW)
    ratioHeight = origHeight / float(newH)

    # resize the image and grab the new image dimensions
    image = cv2.resize(image, (newW, newH))
    (imageHeight, imageWidth) = image.shape[:2]

    # define the two output layer names for the EAST detector model that
    # we are interested -- the first is the output probabilities and the
    # second can be used to derive the bounding box coordinates of text
    layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]

    # load the pre-trained EAST text detector
    #     print("[INFO] loading EAST text detector...")
    net = text_model

    # construct a blob from the image and then perform a forward pass of
    # the model to obtain the two output layer sets
    blob = cv2.dnn.blobFromImage(image,
                                 1.0, (imageWidth, imageHeight),
                                 (123.68, 116.78, 103.94),
                                 swapRB=True,
                                 crop=False)

    start = time.time()
    net.setInput(blob)
    (scores, geometry) = net.forward(layerNames)
    end = time.time()

    # NMS on the the unrotated rects
    confidenceThreshold = min_confidence
    nmsThreshold = 0.4

    # decode the blob info
    (rects, confidences, baggage) = decode(scores, geometry,
                                           confidenceThreshold)

    offsets = []
    thetas = []
    for b in baggage:
        offsets.append(b['offset'])
        thetas.append(b['angle'])

    ##########################################################

    functions = [nms.felzenszwalb.nms, nms.fast.nms, nms.malisiewicz.nms]

    drawpolys_count, drawrects_count = 0, 0

    for i, function in enumerate(functions):

        indicies = nms.boxes(rects,
                             confidences,
                             nms_function=function,
                             confidence_threshold=confidenceThreshold,
                             nsm_threshold=nmsThreshold)

        indicies = np.array(indicies).reshape(-1)

        if len(indicies) != 0:
            drawrects = np.array(rects)[indicies]
            drawpolys_count = len(drawrects)

    # convert rects to polys
    polygons = utils.rects2polys(rects, thetas, offsets, ratioWidth,
                                 ratioHeight)

    for i, function in enumerate(functions):

        start = time.time()
        indicies = nms.polygons(polygons,
                                confidences,
                                nms_function=function,
                                confidence_threshold=confidenceThreshold,
                                nsm_threshold=nmsThreshold)
        end = time.time()

        indicies = np.array(indicies).reshape(-1)

        if len(indicies) != 0:
            drawpolys = np.array(polygons)[indicies]
            drawrects_count = len(drawpolys)

    return [drawpolys_count, drawrects_count]