Ejemplo n.º 1
0
    def detect_and_rectify(self, detector, image, angle, dest_img_shape, rgb_rotated=None):
        #do the detection
        detections = detector.detectMultiScale(image, scaleFactor=self.scale, minNeighbors=self.min_neighbors)
        '''
        print 'were about to save the rotated images, if you dont want this, quit and remove this from like 232 in viola_detector.py'
        pdb.set_trace()
        for d in detections:
            cv2.rectangle(rgb_rotated, (d[0], d[1]), (d[0]+d[2], d[1]+d[3]), (255,255,255), 4) 
        cv2.imwrite('rotated.jpg', rgb_rotated)
        pdb.set_trace()
        '''
        #convert to proper coordinate system
        polygons = utils.convert_detections_to_polygons(detections)

        if angle > 0:
            #rotate back to original image coordinates
            print 'rotating...'
            rectified_detections = utils.rotate_detection_polygons(polygons, image, angle, dest_img_shape, remove_off_img=self.remove_off_img)
        else:
            rectified_detections = polygons
        print 'done rotating'

        if self.group:
            bounding_boxes = utils.get_bounding_boxes(np.array(rectified_detections))
        else:
            bounding_boxes = None
        return rectified_detections, bounding_boxes
Ejemplo n.º 2
0
 def non_max_suppression(self,polygons):
     #we start with polygons, get the bounding box of it
     rects = utils.get_bounding_boxes(np.array(polygons))
     #covert the bounding box to what's requested by the non_max_suppression
     boxes = utils.rects2boxes(rects)
     boxes_suppressed = suppression.non_max_suppression(boxes, overlapThresh=self.overlapThresh)
     polygons_suppressed = utils.boxes2polygons(boxes_suppressed)
     return polygons_suppressed 
Ejemplo n.º 3
0
    def run(self):
        '''
        1. Find proposals using ViolaJones
        2. Resize the window and classify it
        3. Net returns a list of the roof coordinates of each type - saved in roof_coords
        '''
        neural_time = 0
        for i, img_name in enumerate(self.img_names):
            print '***************** Image {0}: {1}/{2} *****************'.format(img_name, i, len(self.img_names)-1)

            #VIOLA
            if self.pickle_viola is None:
                self.viola.detect_roofs(img_name=img_name)
                #this next line will fail because you dont get the image shape!
                self.viola.evaluation.score_img(img_name, img_shape[:2])
                self.viola.evaluation.save_images(img_name, fname='beforeNeural')
                current_viola_detections = self.viola.viola_detections 
                viola_time = self.viola.evaluation.detections.total_time
            else:#use the pickled detections for speed in testing the neural network
                current_viola_detections = self.viola_evaluation.detections
                viola_time = self.viola_evaluation.detections.total_time 
            proposal_patches, proposal_coords, img_shape = self.find_viola_proposals(current_viola_detections, img_name=img_name)

            #NEURALNET
            with Timer() as t:
                classified_detections  = self.neural_classification(proposal_patches, proposal_coords) 
                #set detections and score
                for roof_type in utils.ROOF_TYPES:
                    if self.groupThreshold > 0 and roof_type == 'metal':
                        #need to covert to rectangles
                        boxes = utils.get_bounding_boxes(np.array(classified_detections[roof_type]))
                        grouped_boxes, weights = cv2.groupRectangles(np.array(boxes).tolist(), self.groupThreshold)
                        classified_detections[roof_type] = utils.convert_detections_to_polygons(grouped_boxes) 
                        #convert back to polygons

                    elif self.groupBounds and roof_type == 'metal':
                        #grouping with the minimal bound of all overlapping rects
                        classified_detections[roof_type] = self.group_min_bound(classified_detections[roof_type], img_shape[:2], erosion=self.erosion)

                    elif self.suppress and roof_type == 'metal':
                        #proper non max suppression from Felzenszwalb et al.
                        classified_detections[roof_type] = self.non_max_suppression(classified_detections[roof_type])

                    self.detections_after_neural.set_detections(img_name=img_name, 
                                                        roof_type=roof_type, 
                                                        detection_list=classified_detections[roof_type])
            neural_time += t.secs 

            self.evaluation_after_neural.score_img(img_name, img_shape[:2], contours=self.groupBounds)
            self.evaluation_after_neural.save_images(img_name, 'posNeural')
        
        if self.pickle_viola is None:
            self.viola.evaluation.print_report(print_header=True, stage='viola')
        else:
            self.viola_evaluation.print_report(print_header=True, stage='viola')

        self.evaluation_after_neural.detections.total_time = (neural_time)
        self.evaluation_after_neural.print_report(print_header=False, stage='neural')
Ejemplo n.º 4
0
    def detect_and_rectify(self,
                           detector,
                           image,
                           angle,
                           dest_img_shape,
                           rgb_rotated=None):
        #do the detection
        detections = detector.detectMultiScale(image,
                                               scaleFactor=self.scale,
                                               minNeighbors=self.min_neighbors)
        '''
        print 'were about to save the rotated images, if you dont want this, quit and remove this from like 232 in viola_detector.py'
        pdb.set_trace()
        for d in detections:
            cv2.rectangle(rgb_rotated, (d[0], d[1]), (d[0]+d[2], d[1]+d[3]), (255,255,255), 4) 
        cv2.imwrite('rotated.jpg', rgb_rotated)
        pdb.set_trace()
        '''
        #convert to proper coordinate system
        polygons = utils.convert_detections_to_polygons(detections)

        if angle > 0:
            #rotate back to original image coordinates
            print 'rotating...'
            rectified_detections = utils.rotate_detection_polygons(
                polygons,
                image,
                angle,
                dest_img_shape,
                remove_off_img=self.remove_off_img)
        else:
            rectified_detections = polygons
        print 'done rotating'

        if self.group:
            bounding_boxes = utils.get_bounding_boxes(
                np.array(rectified_detections))
        else:
            bounding_boxes = None
        return rectified_detections, bounding_boxes
Ejemplo n.º 5
0
    def run(self):
        '''
        1. Sliding window proposals
        2. Resize the window and classify it
        3. Net returns a list of the roof coordinates of each type - saved in roof_coords
        '''
        neural_time = 0
        for i, img_name in enumerate(self.img_names):
            print '***************** Image {0}: {1}/{2} *****************'.format(img_name, i, len(self.img_names)-1)

            #NEURALNET
            with Timer() as t:
                classified_detections  = self.neural_classification(proposal_patches, proposal_coords) 
                #set detections and score
                for roof_type in utils.ROOF_TYPES:
                    if self.groupThreshold > 0 and roof_type == 'metal':
                        #need to covert to rectangles
                        boxes = utils.get_bounding_boxes(np.array(classified_detections[roof_type]))
                        grouped_boxes, weights = cv2.groupRectangles(np.array(boxes).tolist(), self.groupThreshold)
                        classified_detections[roof_type] = utils.convert_detections_to_polygons(grouped_boxes) 
                        #convert back to polygons

                    elif self.groupBounds and roof_type == 'metal':
                        #grouping with the minimal bound of all overlapping rects
                        classified_detections[roof_type] = self.group_min_bound(classified_detections[roof_type], img_shape[:2], erosion=self.erosion)

                    elif self.suppress and roof_type == 'metal':
                        #proper non max suppression from Felzenszwalb et al.
                        classified_detections[roof_type] = self.non_max_suppression(classified_detections[roof_type])

                    self.detections_after_neural.set_detections(img_name=img_name, 
                                                        roof_type=roof_type, 
                                                        detection_list=classified_detections[roof_type])
            neural_time += t.secs 

            self.evaluation.score_img(img_name, img_shape[:2], contours=self.groupBounds)
            self.evaluation.save_images(img_name, 'posNeural')
               self.evaluation_after_neural.detections.total_time = (neural_time)
Ejemplo n.º 6
0
def main(argv: list):
    # argument check
    if len(argv) == 1:
        print("NO VIDEO FILE", flush=True)
        exit(0)
    # set videopath
    video = argv[1]
    # get file extension
    extension = utils.get_file_extension(video).lower()
    # extension check
    if extension not in SUPPORTED_EXTENSIONS:
        print("NOT SUPPORTED EXTENSION")
        exit(0)
    rotating_angle = 0
    # set rotating angle if second parameter given
    if len(argv) > 2:
        rotating_angle = int(argv[2])
    # construct VideoCapture object
    cap = cv2.VideoCapture(video)
    # initializing base white image
    image_to_save = np.zeros([200, 200, 3], dtype=np.uint8)
    image_to_save.fill(255)
    # 4-character code of codec for writing video
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    datetime_stamp = datetime.datetime.now()\
        .strftime("%Y_%m%d_%H%M%S")
    # temporary video file
    temp_video = "temp_video.mp4"
    filename = 'enarvideo' + datetime_stamp + '.mp4'
    fps = cap.get(cv2.CAP_PROP_FPS)
    # construct VideoWriter object
    # needed to make video from cropped enarnumbers
    out = cv2.VideoWriter(temp_video, fourcc, float(fps), (200, 200))
    # initialize array for enardicts
    dict_array = []
    # processing the video
    while True:
        # get one frame from VideoCapture object
        ret, frame = cap.read()
        # if no more frame, stop processing
        if not ret:
            break

        # getting current_time in sec
        current_time = int(cap.get(cv2.CAP_PROP_POS_MSEC) / 1000)
        # this print needed for the wrapper app
        print(str(
            round(
                cap.get(cv2.CAP_PROP_POS_FRAMES) /
                cap.get(cv2.CAP_PROP_FRAME_COUNT), 3)),
              flush=True)
        # append new dict
        if len(dict_array) == current_time:
            dict_array.append(dict())
        # rotating frame
        frame = rotate(frame, angle=rotating_angle)
        # converting image to HSV colorspace
        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
        # creating mask for filtering
        mask = cv2.inRange(hsv, lower, upper)
        # filtering frame with mask
        res = cv2.bitwise_and(frame, frame, mask=mask)

        # converting to grayscale
        gray = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
        # searching connected components
        output = cv2.connectedComponentsWithStats(gray, 8)
        # number of labels
        nlabels = output[0]
        # stats of the components (width,height etc.)
        stats = output[2]
        modframe = frame.copy()
        # getting the bounding boxes
        bboxes = utils.get_bounding_boxes(nlabels, stats)
        # iterating over the bounding boxes
        for bbox in bboxes:
            # drawing rectangle to displayed frame
            cv2.rectangle(modframe, (bbox[0][0], bbox[0][1]),
                          (bbox[1][0], bbox[1][1]), (0, 0, 255), 1)
            # cropping image
            crop_img = utils.crop_image(frame, bbox)
            # getting string from image
            ocr_string = utils.image_to_string(crop_img, "digits", 1, 6)
            # check
            if (ocr_string):
                # saving cropped image to write out later
                image_to_save = cv2.resize(crop_img, (200, 200))
                # splitting str
                splitted_str = ocr_string.split("\n")
                # ocr_string = 'Recognized:\n' + ocr_string + '\nLength: ' + str(len(ocr_string)) + '  \n END'

                # iterating over the lines
                for _str in splitted_str:
                    # if only one word in the line and longer then 3
                    if len(_str.split(' ')) == 1 and len(_str) > 3:
                        # adding to dict
                        if _str[:4] not in dict_array[current_time]:
                            dict_array[current_time][_str[:4]] = 1
                        else:
                            dict_array[current_time][
                                _str[:4]] = dict_array[current_time].get(
                                    _str[:4]) + 1
                    # if only one word in the line and longer then 4
                    if len(_str.split(' ')) == 1 and len(_str) > 4:
                        # adding to dict
                        if _str[:5] not in dict_array[current_time]:
                            dict_array[current_time][_str[:5]] = 1
                        else:
                            dict_array[current_time][
                                _str[:5]] = dict_array[current_time].get(
                                    _str[:5]) + 1
        out.write(image_to_save)
        # if current_time > 3:
        #     md = utils.merge_dicts(dict_array[current_time], dict_array[current_time - 1], dict_array[current_time - 2],
        #                            dict_array[current_time - 3], dict_array[current_time - 4])
        #     enar5 = utils.get_enar_from_dict(md)
        #
        #     if enar5 != "":
        #         cv2.putText(modframe, "Enar: " + enar5, (40, 200), cv2.FONT_HERSHEY_SIMPLEX, 1,
        #                     (200, 200, 255))

        cv2.imshow('video', resize(modframe, 1200, 1200))
        k = cv2.waitKey(5) & 0xFF
        if k == 27:
            break
    cv2.destroyAllWindows()
    cap.release()
    out.release()

    # writing to file

    # checking size
    if len(dict_array) > 1:
        # opening file to write
        with open("enar" + datetime_stamp + ".txt", "w") as f:
            i = 0
            # iterating over the array
            for _ in dict_array:
                i = i + 1
                # merging dicts
                # and getting enarnumber
                if i == 1:
                    m = utils.merge_dicts(dict_array[i], dict_array[i - 1])
                    #print(m)
                    enar5 = utils.get_enar_from_dict(m)
                    #print(enar4, enar5)
                elif i == len(dict_array):
                    m = utils.merge_dicts(dict_array[i - 1], dict_array[i - 2])
                    #print(m)
                    enar5 = utils.get_enar_from_dict(m)
                    #print(enar4, enar5)
                else:
                    m = utils.merge_dicts(dict_array[i], dict_array[i - 1],
                                          dict_array[i - 2])
                    #print(m)
                    enar5 = utils.get_enar_from_dict(m)
                    #print(enar4, enar5)
                # writing time and recognized enar to file
                f.write(str(i) + " " + (enar5 or "") + "\n")
        # converting video with ffmpeg
        command = "ffmpeg -i {} {}\n".format(temp_video, filename)
        try:
            # running command
            output = subprocess.check_output(command,
                                             stderr=subprocess.STDOUT,
                                             shell=True)
        except subprocess.CalledProcessError as e:
            print('FAIL:\ncmd:{}\noutput:{}'.format(e.cmd, e.output))
    # and deleting temporary video file
    delete_command = "del" if platform.system() == "Windows" else "rm"
    subprocess.call("{} {}".format(delete_command, temp_video), shell=True)
Ejemplo n.º 7
0
            det_tic = time.time()
            rois, cls_prob, bbox_pred, \
            rpn_loss_cls, rpn_loss_box, \
            RCNN_loss_cls, RCNN_loss_bbox, \
            rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

            det_toc = time.time()
            detect_time = det_toc - det_tic
            misc_tic = time.time()

            # Saving predicted boxes for evaluation
            for i in range(0, batch_size):
                index = step * batch_size + i
                allBoundingBoxes = get_bounding_boxes(
                    rois[i].unsqueeze(0), cls_prob[i].unsqueeze(0),
                    bbox_pred[i].unsqueeze(0), im_info[i].unsqueeze(0),
                    allBoundingBoxes, index)

            misc_toc = time.time()
            nms_time = misc_toc - misc_tic

            sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
                .format(step + 1, num_images, detect_time, nms_time))
            sys.stdout.flush()

    # Evaluation
    print('Evaluating detections')
    metricsPerClass = evaluator.GetPascalVOCMetrics(allBoundingBoxes,
                                                    IOUThreshold=0.5)
    pdb.set_trace()
    for mc in metricsPerClass: