Esempio n. 1
0
def face(image_np):
    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
    image_np_expanded = np.expand_dims(image_np, axis=0)
    (boxes, scores, classes,
     num) = face_session.run([
         face_detection_boxes, face_detection_scores, face_detection_classes,
         face_num_detections
     ],
                             feed_dict={face_image_tensor: image_np_expanded})
    # Visualization of the results of a detection.
    vis_util.visualize_boxes_and_labels_on_image_array(
        image_np,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        face_category_index,
        use_normalized_coordinates=True,
        line_thickness=8,
        min_score_thresh=0.85)
    # Get coordinates of detected boxes ; ymin, ymax, xmin, xmax
    coordinates_face = vis_util.return_coordinates(
        image_np,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        face_category_index,
        use_normalized_coordinates=True,
        line_thickness=8,
        min_score_thresh=0.85)

    print("face: ", *coordinates_face)
Esempio n. 2
0
def detection(inputImg, image_path):

    global predictions

    print(inputImg.size)
    image = Image.open(image_path)
    image_np = load_image_into_numpy_array(image)
    image_np_expanded = np.expand_dims(image_np, axis=0)
    output_dict = run_inference_for_single_image(image_np_expanded,
                                                 detection_graph)

    coordinates = vis_util.return_coordinates(
        image_np,
        np.squeeze(output_dict['detection_boxes']),
        np.squeeze(output_dict['detection_classes']).astype(np.int32),
        np.squeeze(output_dict['detection_scores']),
        category_index,
        use_normalized_coordinates=True,
        line_thickness=8,
        min_score_thresh=0.30)

    a = os.path.basename(image_path)
    #b=os.path.splitext(a)[0]

    print(a, ':', coordinates)
    newImage = np.copy(inputImg)
    predictions = []
    for i in range(len(coordinates)):
        label = coordinates[i][5]
        #confidence=coordinates[i][4]
        top_x = coordinates[i][2]
        top_y = coordinates[i][0]
        btm_x = coordinates[i][3]
        btm_y = coordinates[i][1]

        newImage = cv2.rectangle(newImage, (top_x - 5, top_y - 5),
                                 (btm_x + 5, btm_y + 5), (255, 0, 0), 1)
        newImage = cv2.putText(newImage, label, (top_x, top_y + 20),
                               cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), 1,
                               cv2.LINE_AA)

        predictions.append({
            'label': label,
            'topleft': {
                'x': top_x,
                'y': top_y
            },
            'bottomright': {
                'x': btm_x,
                'y': btm_y
            }
        })
    cv2.imwrite('newImage.jpg', newImage)

    return (newImage, predictions)
        count = 0
        for i in range(100):
            if scores is None or final_score[i] > 0.5:
                count = count + 1
        print('count', count)
        printcount = 0

        for i in classes[0]:
            printcount = printcount + 1
            if (category_index[i]['name'] == "serialreadable"):
                checkingSerial = True
                coordinates = vis_util.return_coordinates(
                    frame,
                    np.squeeze(boxes),
                    np.squeeze(classes).astype(np.int32),
                    np.squeeze(scores),
                    category_index,
                    use_normalized_coordinates=True,
                    line_thickness=8,
                    min_score_thresh=0.90)
                print(category_index[i]['name'])
                #if(coordinates.count==4):
                print(coordinates)
                print(i)
                it += 1
                if (it > 10):
                    it = 0
                if coordinates:
                    x = int(coordinates[2])
                    y = int(coordinates[0])
                    w = int(coordinates[3])
        def SabKuch(en):
            global action, deque
            alphabet_drawn = ""
            global prediction1, prediction2, fingertip_tracking_list, letters, black_image, character_recognition_counter, label_map, categories, category_index
            image_np = cv2.imdecode(np.fromstring(en, np.uint8), 1)
            #image_np = cv2.flip(image_np, 1)

            # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
            image_np_expanded = np.expand_dims(image_np, axis=0)
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            # Each box represents a part of the image where a particular object was detected.
            boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
            # Each score represent how level of confidence for each of the objects.
            # Score is shown on the result image, together with the class label.
            scores = detection_graph.get_tensor_by_name('detection_scores:0')
            classes = detection_graph.get_tensor_by_name('detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name(
                'num_detections:0')
            # Actual detection.
            (boxes, scores, classes, num_detections) = sess.run(
                [boxes, scores, classes, num_detections],
                feed_dict={image_tensor: image_np_expanded})
            # Visualization of the results of a detection.
            """vis_util.visualize_boxes_and_labels_on_image_array(
          image_np,
          np.squeeze(boxes),
          np.squeeze(classes).astype(np.int32),
          np.squeeze(scores),
          category_index,
          use_normalized_coordinates=True,
          line_thickness=8)
      """
            image_np = cv2.resize(image_np, (640, 360))
            #image_np = cv2.warpAffine(image_np, M, (640,360))
            coordinates = vis_util.return_coordinates(
                image_np,
                np.squeeze(boxes),
                np.squeeze(classes).astype(np.int32),
                np.squeeze(scores),
                category_index,
                use_normalized_coordinates=True,
                line_thickness=1,
                min_score_thresh=0.80)

            finger_count = len(coordinates)
            # print( action )

            if deque[-1] == finger_count:
                deque.append(finger_count)
            else:
                deque = [finger_count]

            if len(deque) == 5 and action[0] != 3:
                action[0] = deque[-1]
            elif len(deque) == 5 and action[0] == 3:
                action.append(deque[-1])

            if action[0] == 1:
                if len(coordinates) != 0:

                    coordinates = coordinates[0]
                    column = (int(coordinates[0]) + int(coordinates[1])) / 2
                    row = (int(coordinates[2]) + int(coordinates[3])) / 2
                    #print ( row, column)
                    #cv2.circle(image_np,(int(coordinates[2]), int(coordinates[0])), 5, (0,255,0), -1)
                    #cv2.circle(image_np,(int(coordinates[3]), int(coordinates[1])), 5, (0,0,255), -1)
                    fingertip_tracking_list.append((row, column))

            elif action[0] == 2:
                # cv2.imwrite(chr(key-32) + "_" + str(character_recognition_counter) + "haha.jpg", black_image)
                #########################
                blackboard_gray = black_image
                blur1 = cv2.medianBlur(blackboard_gray, 5)
                blur1 = cv2.GaussianBlur(blur1, (5, 5), 0)
                thresh1 = cv2.threshold(blur1, 0, 255,
                                        cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
                #cv2.imshow("thresh", thresh1)
                _, blackboard_cnts, _ = cv2.findContours(
                    thresh1.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
                if len(blackboard_cnts) >= 1:
                    cnt = sorted(blackboard_cnts,
                                 key=cv2.contourArea,
                                 reverse=True)[0]
                    # areas = [cv2.contourArea(c) for c in blackboard_cnts]
                    # max_index = np.argmax(areas)
                    # cnt=contours[max_index]
                    # print(cv2.contourArea(cnt))

                    if True:
                        x, y, w, h = cv2.boundingRect(cnt)
                        alphabet = blackboard_gray[y - 10:y + h + 10,
                                                   x - 10:x + w + 10]
                        newImage = cv2.resize(alphabet, (28, 28))
                        newImage = np.array(newImage)
                        newImage = newImage.astype('float32') / 255

                        # prediction1 = mlp_model.predict(newImage.reshape(1,28,28))[0]
                        # prediction1 = np.argmax(prediction1)

                        prediction2 = cnn_model.predict(
                            newImage.reshape(1, 28, 28, 1))[0]
                        prediction2 = np.argmax(prediction2)
                        alphabet_drawn = str(letters[int(prediction2) + 1])
                        # print("prediction1 : ",str(letters[int(prediction1)+1]))
                        # print("prediction : ", str(letters[int(prediction2)+1]))

                # Empty the points deque and the blackboard
                # points = deque(maxlen=512)
                blackboard = np.zeros((480, 640, 3), dtype=np.uint8)

                #########################
                character_recognition_counter += 1
                fingertip_tracking_list = []
                black_image = np.zeros((360, 640), dtype=np.uint8)

                # print ("Saving" + chr(key-32) + "_" + str(character_recognition_counter) )

            #cv2.imshow('window', image_np)
            # elif action == 3:
            #   break

            if action[0] == 3:
                if len(action) == 1:
                    pass
                else:
                    print(action[-1])
                    deque, action = [-1], [-1]

            if action[0] == 4:
                fingertip_tracking_list = []
                black_image = np.zeros((360, 640), dtype=np.uint8)

            if action[0] == 5:
                pass
            #print (coordinates if not None else "")

            # Put the result on the screen
            # cv2.putText(image_np, "Multilayer Perceptron : " + str(letters[int(prediction1)+1]), (10, 410), cv2.FONT_HERSHEY_SIMPLEX, 0.7,(255, 255, 255), 2)
            #cv2.putText(image_np, "Convolution Neural Network:  ", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.putText(image_np, "" + alphabet_drawn, (550, 50),
                        cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2)
            # cv2.imshow ("Saving this", black_image)

            for z in range(1, len(fingertip_tracking_list)):
                #cv2.circle(image_np,(int(fingertip_tracking_list[z][0]), int(fingertip_tracking_list[z][1])), 5, (0,0,255), -1)
                cv2.line(image_np, (int(fingertip_tracking_list[z - 1][0]),
                                    int(fingertip_tracking_list[z - 1][1])),
                         (int(fingertip_tracking_list[z][0]),
                          int(fingertip_tracking_list[z][1])), (0, 0, 255), 5)
                cv2.line(black_image, (int(fingertip_tracking_list[z - 1][0]),
                                       int(fingertip_tracking_list[z - 1][1])),
                         (int(fingertip_tracking_list[z][0]),
                          int(fingertip_tracking_list[z][1])), (255, 255, 255),
                         5)

            # cv2.imshow("Image",image_np)
            en = cv2.imencode('.jpg', image_np)[1].tostring()

            return en, alphabet_drawn, action[0]
Esempio n. 5
0
def num_detection(name):
    # Name of the directory containing the object detection module we're using
    MODEL_NAME = 'inference_graph'
    ID_NAME = name
    FILE = '_num.jpg'
    IMAGE_FOLDER = 'image_test'

    # Grab path to current working directory
    CWD_PATH = os.getcwd()
    PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME,
                                'frozen_inference_graph_number.pb')
    PATH_TO_LABELS = os.path.join(CWD_PATH, 'training',
                                  'labelmap_number.pbtxt')
    PATH_TO_IMAGE = os.path.join(CWD_PATH, IMAGE_FOLDER, ID_NAME + FILE)

    #Path JSON firebase
    # Number of classes the object detector can identify
    NUM_CLASSES = 12
    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)
    # Load the Tensorflow model into memory.
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

        sess = tf.Session(graph=detection_graph)

    # Define input and output tensors (i.e. data) for the object detection classifier
    # Input tensor is the image
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
    detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
    detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
    detection_classes = detection_graph.get_tensor_by_name(
        'detection_classes:0')

    # Number of objects detected
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')

    image = cv2.imread(PATH_TO_IMAGE)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_expanded = np.expand_dims(image_rgb, axis=0)
    # Perform the actual detection by running the model with the image as input
    (boxes, scores, classes, num) = sess.run(
        [detection_boxes, detection_scores, detection_classes, num_detections],
        feed_dict={image_tensor: image_expanded})

    # Draw the results of the detection (aka 'visulaize the results')

    vis_util.visualize_boxes_and_labels_on_image_array(
        image,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        use_normalized_coordinates=True,
        line_thickness=8,
        min_score_thresh=0.7)

    coordinates = vis_util.return_coordinates(image,
                                              np.squeeze(boxes),
                                              np.squeeze(classes).astype(
                                                  np.int32),
                                              np.squeeze(scores),
                                              category_index,
                                              use_normalized_coordinates=True,
                                              line_thickness=8,
                                              min_score_thresh=0.7)

    for coordinate in coordinates:
        print(coordinate)
        #ymin,ymax,xmin,xmax
        (y1, y2, x1, x2, accuracy, classification) = coordinate

    output = image.copy()
    gray = cv2.cvtColor(output, cv2.COLOR_BGR2GRAY)
    circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, 100)

    circle_data = []

    # ensure at least some circles were found
    if circles is not None:
        #convert the (x, y) coordinates and radius of the circles to integers
        circles = np.round(circles[0, :]).astype("int")
        # loop over the (x, y) coordinates and radius of the circles
        for (x, y, r) in circles:
            cv2.rectangle(output, (x - 5, y - 5), (x + 5, y + 5),
                          (0, 128, 255), -1)
            # line vertical
            start_ver, stop_ver = (x - r, y), (x + r, y)
            # line horizontal
            start_hor, stop_hor = (x, y - r), (x, y + r)
            #draw a line cv2.line(img, Point pt1, Point pt2, color[,thickness[,lineType[,shift]]])
            cv2.line(output, start_ver, stop_ver, (0, 0, 0), (3))
            cv2.line(output, start_hor, stop_hor, (0, 0, 0), (3))
            circle_data = [int(x), int(y), int(r)]

    with open(os.path.join(
            CWD_PATH,
            "json_num/" + "script" + ID_NAME.split(".")[0] + ".json"),
              "w",
              encoding='utf-8') as f:
        data = {'coordinate': coordinates, 'circle': circle_data}
        json.dump(data, f, ensure_ascii=False)
        f.write('\n')

    #cv2.imshow('Object detector', output)
    Image.fromarray(output).show()

    # Press any key to close the image
    cv2.waitKey(0)

    # Clean up
    cv2.destroyAllWindows()
Esempio n. 6
0
def hand_detection(name):
    # Name of the directory containing the object detection module we're using
    MODEL_NAME = 'inference_graph'
    ID_NAME = name
    FILE = '_hands.jpg'
    DETECT_FOLDER = 'detectcircle'
    IMAGE_FOLDER = 'image_test'

    # Grab path to current working directory
    CWD_PATH = os.getcwd()

    PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME,
                                'frozen_inference_graph.pb')
    PATH_TO_LABELS = os.path.join(CWD_PATH, 'training', 'labelmap.pbtxt')
    PATH_TO_IMAGE = os.path.join(CWD_PATH, IMAGE_FOLDER, ID_NAME + FILE)
    NUM_CLASSES = 2

    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)
    # Load the Tensorflow model into memory.
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

        sess = tf.Session(graph=detection_graph)

    # Define input and output tensors (i.e. data) for the object detection classifier
    # Input tensor is the image
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
    detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
    detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
    detection_classes = detection_graph.get_tensor_by_name(
        'detection_classes:0')

    # Number of objects detected
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')

    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    # Load image using OpenCV and
    image = cv2.imread(PATH_TO_IMAGE)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_expanded = np.expand_dims(image_rgb, axis=0)

    # Perform the actual detection by running the model with the image as input
    (boxes, scores, classes, num) = sess.run(
        [detection_boxes, detection_scores, detection_classes, num_detections],
        feed_dict={image_tensor: image_expanded})
    # Draw the results of the detection (aka 'visulaize the results')
    vis_util.visualize_boxes_and_labels_on_image_array(
        image,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        use_normalized_coordinates=True,
        line_thickness=8,
        min_score_thresh=0.70)

    coordinates = vis_util.return_coordinates(image,
                                              np.squeeze(boxes),
                                              np.squeeze(classes).astype(
                                                  np.int32),
                                              np.squeeze(scores),
                                              category_index,
                                              use_normalized_coordinates=True,
                                              line_thickness=8,
                                              min_score_thresh=0.70)

    for coordinate in coordinates:
        print(coordinate)
        #ymin,ymax,xmin,xmax
        (y1, y2, x1, x2, accuracy, classification) = coordinate

    with open(os.path.join(
            CWD_PATH,
            "json_hand/" + "script" + ID_NAME.split(".")[0] + ".json"),
              "w",
              encoding='utf-8') as f:
        json.dump(coordinates, f, ensure_ascii=False, indent=4)
        f.write('\n')

    output = image.copy()
    cv2.imshow('Object detector', output)
    # Press any key to close the image
    cv2.waitKey(0)
    # Clean up
    cv2.destroyAllWindows()
Esempio n. 7
0
def detection():
    while(True):
        try:
            data = str(sock.recv(1024),'utf-8')
        except:
            data = ""
        if (data == 'start_object_detection;#'):
            try:
                percentage = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "confidency_threshold")
            except:
                percentage = "0.9"

            try:
                index = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "camera_index")
            except:
                index = "0"

            if os.path.isfile(CWD_TXT):
                pass
            else:
                with open(CWD_TXT, 'a+') as f:
                    f.write('[]' + '\n')

            if (percentage == ""):
                percentage = "0.9"
            percentage = float(percentage)
            print (index)
            if index == "":
                index = 0
            f = open(CWD_TXT, 'r+')
            f.truncate(0)
            try:
                video = cv2.VideoCapture(int(index))
                width  = video.get(cv2.cv2.CAP_PROP_FRAME_WIDTH)
                height = video.get(cv2.cv2.CAP_PROP_FRAME_HEIGHT)
                cam_fps = video.get(cv2.cv2.CAP_PROP_FPS)
            except:
                pass

            elapsed_timer = QElapsedTimer()
            elapsed_timer.start()

            if video is None or not video.isOpened():
                MessageBox("Vendron","Error No such Camera exist", 64)
                detection()
            else :
                sent = True
                while(True):
                    try:
                        data = str(sock.recv(1024),'utf-8')
                    except:
                        pass
                
                    if (data == 'end_object_detection;#'):
                        sent = False
                        cv2.destroyWindow('Object detector')
                        video.release()
                        socketSend("object_detection_ended;#")
                        break
                    else:
                        data = []
                        myList = []
                        myScore = []
                        result_list = []
                        name = []

                        try:
                            fps = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "frame_rate")
                        except:
                            fps = cam_fps
                        try:
                            ymin_1 = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "y_min_threshold_1")
                        except:
                            ymin_1 = "80"
                        try:
                            ymax_1 = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "y_max_threshold_1")
                        except:
                            ymax_1 = "240"
                        try:
                            ymin_2 = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "y_min_threshold_2")
                        except:
                            ymin_2 = "240"
                        try:
                            ymax_2 = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "y_max_threshold_2")
                        except:
                            ymax_2 = "400"
                        try:
                            places = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "is_camera_reversed")
                        except:
                            places = "false"
                        try:
                            w2 = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "x")
                        except:
                            w2 = "0"
                        try:
                            h2 = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "y")
                        except:
                            h2 = "0"
                        try:
                            w1 = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "width")
                        except:
                            w1 = "640"
                        try:
                            h1 = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "height")
                        except:
                            h1 = "480"


                        if video is None:
                            pass
                        else:
                            ret, frame = video.read()
                            if(w1 == ""):
                                w1 = "640"
                            if(w2 == ""):
                                w2 = "0"
                            if(h1 == ""):
                                h1 = "480"
                            if(h2 == ""):
                                h2 = "0"

                            w1 = int(w1) + int(w2)
                            h1 = int(h1) + int(h2)
                            frame = frame[int(h2):int(h1),int(w2):int(w1)]
                            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                            frame_expanded = np.expand_dims(frame, axis=0)

                            # Perform the actual detection by running the model with the image as input
                            try:
                                (boxes, scores, classes, num) = sess.run(
                                    [detection_boxes, detection_scores, detection_classes, num_detections],
                                    feed_dict={image_tensor: frame_expanded})
                            except:
                                pass

                            # Draw the results of the detection (aka 'visulaize the results')
                            try:
                                vis_util.visualize_boxes_and_labels_on_image_array(
                                    frame,
                                    np.squeeze(boxes),
                                    np.squeeze(classes).astype(np.int32),
                                    np.squeeze(scores),
                                    category_index,
                                    use_normalized_coordinates=True,
                                    line_thickness=3,
                                    min_score_thresh= percentage)

                                data = [category_index.get(value) for index,value in enumerate(classes[0]) if scores[0,index] > percentage]
                                for cl in data:
                                   if cl != None :
                                        myList.append(str(cl['name']))

                                objects = []
                                for index, value in enumerate(classes[0]):
                                    object_dict = {}
                                    if scores[0, index] > percentage:
                                        object_dict[(category_index.get(value)).get('name').encode('utf8')] = \
                                                                                                        scores[0, index]
                                        objects.append(object_dict)

                                coordinates = vis_util.return_coordinates(
                                                frame,
                                                np.squeeze(boxes),
                                                np.squeeze(classes).astype(np.int32),
                                                np.squeeze(scores),
                                                category_index,
                                                use_normalized_coordinates=True,
                                                line_thickness=3,
                                                min_score_thresh= percentage)
                            except:
                                pass
                        
                            if(sent == True):
                                socketSend("object_detection_started;#")
                                sent = False

                            if(places == ""):
                                places = "false"
                            if(ymin_1 == ""):
                                ymin_1 = "80"
                            if(ymin_2 == ""):
                                ymin_2 = "240"
                            if(ymax_1 == ""):
                                ymax_1 = "240"
                            if(ymax_2 == ""):
                                ymax_2 = "400"

                            try:
                                if(places == "true"):
                                    alpha = 0.3;
                                    overlay = frame.copy()
                                    cv2.rectangle(overlay, (0, int(ymin_1)), (int(width), int(ymin_2)),(0, 0, 255), -1)
                                    cv2.addWeighted(overlay, alpha, frame, 1 - alpha,
                                                    0, frame)
                                    overlay_blue = frame.copy()
                                    cv2.rectangle(overlay_blue, (0, int(ymax_1)), (int(width), int(ymax_2)),(255, 0, 0), -1)
                                    cv2.addWeighted(overlay_blue, alpha, frame, 1 - alpha,
                                                    0, frame)
                            
                                elif(places == "false"):
                                    alpha = 0.3;
                                    overlay = frame.copy()
                                    cv2.rectangle(overlay, (0, int(ymax_1)), (int(width), int(ymax_2)),(0, 0, 255), -1)
                                    cv2.addWeighted(overlay, alpha, frame, 1 - alpha,
                                                0, frame)
                                    overlay_blue = frame.copy()
                                    cv2.rectangle(overlay_blue, (0, int(ymin_1)), (int(width), int(ymin_2)),(255, 0, 0), -1)
                                    cv2.addWeighted(overlay_blue, alpha, frame, 1 - alpha,
                                                    0, frame)
                            except:
                                pass

                            if(fps == ""):
                                fps = cam_fps
                            
                            fps = 1/int(fps)

                            print (type(fps))
        
                            while(elapsed_timer.hasExpired(fps)):
                                if coordinates is None:
                                    print("nothing")
                                else:
                                    if video is None:
                                        sent = False
                                        cv2.destroyWindow('Object detector')
                                        socketSend("object_detection_ended;#")
                                        break
                                    
                                    list_1stesult = myList
                                    coordinates_result = coordinates
                                    for ea_list,ea_coor,score in zip(list_1stesult,coordinates_result,objects):
                                        score = str(score)
                                        score = score.split(":")[1]
                                        score = score.replace("}","")
                                        score = score.replace("]","")
                                        score = float(score) * 100
                                        score = str(round(score))
                                        result = os.path.join(ea_list,",",str(ea_coor),",",score)
                                        result = result.replace("[[","[")
                                        result = result.replace("\\","")
                                        result = result.replace("[","")
                                        result = result.replace("]","")
                                        name.append(ea_list)
                                        result_list.append(result)

                                    print (result_list)
                                    result_list = str(result_list).replace("', '","];[")
                                    result_list = result_list.replace("'","")
                                    result_list = result_list.replace("'","")
                                    result_list = result_list.replace(", ",",")
                                    if result_list:
                                        with open(CWD_TXT, "a") as text_file:
                                            text_file.write(str(result_list) + "\n")

                                    if result_list:
                                        with open(CWD_HISTORY,"a") as text_file:
                                            text_file.write(str(result_list) + "\n")
                            
                                    elapsed_timer.start()
                    

                    # All the results have been drawn on the frame, so it's time to display it.
                            try:
                                path_debug = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "debug")
                            except:
                                path_debug = "false"

                            if (path_debug == "true"):
                                try:
                                    cv2.imshow('Object detector', frame)
                                except:
                                    sent = False
                                    cv2.destroyWindow('Object detector')
                                    video.release()
                                    socketSend("object_detection_ended;#")
                                    break
                            else:
                                pass

                            if cv2.waitKey(1) == ord ("q"):
                                pass
def run(path):
  	
	# Percorso immagini da cui estrarre gli oggetti 
	PATH_TO_IMAGE = os.path.join(path) 
	# Percorso salvataggio oggetti estratti
	SUPERRES_PATH = os.path.join("superres/") 
	# Carico la label map. 
	label_map = label_map_util.load_labelmap(PATH_TO_LABELS) 
	categories = label_map_util.convert_label_map_to_categories( 
		label_map, max_num_classes = NUM_CLASSES, use_display_name = True) 
	category_index = label_map_util.create_category_index(categories) 
	  
	# Caricamento modello di TensorFlow 
	detection_graph = tf.Graph() 
	with detection_graph.as_default(): 
	    od_graph_def = tf.GraphDef() 
	    with tf.io.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: 
	        serialized_graph = fid.read() 
	        od_graph_def.ParseFromString(serialized_graph) 
	        tf.import_graph_def(od_graph_def, name ='') 
	  
	    sess = tf.Session(graph = detection_graph) 
	  
	# Considero i tensori di input e di output 
	  
	# Il tensore di input, l'immagine 
	image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') 
	  
	# Tensori di output: box, punteggi di confidenza e classi degli oggetti
	# I box rappresentano l'area dell'immagine in cui si trova un oggetto riconosciuto
	detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') 
	detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') 
	detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') 
	  
	# Numero degli oggetti riconosciuti
	num_detections = detection_graph.get_tensor_by_name('num_detections:0') 
	  
	# Carica l'immagine con OpenCV ed espande le dimensioni formando un array da una singola colonna in cui ogni elemento contiene i valori RGB dei vari pixel
	image = cv2.imread(PATH_TO_IMAGE) 
	image_expanded = np.expand_dims(image, axis = 0) 
	  
	# Esegui il riconoscimento
	(boxes, scores, classes, num) = sess.run( 
	    [detection_boxes, detection_scores, detection_classes, num_detections], 
	    feed_dict ={image_tensor: image_expanded}) 
	  
	# Visualizza il risultato del riconoscimento 
	  
	vis_util.visualize_boxes_and_labels_on_image_array( 
	    image, 
	    np.squeeze(boxes), 
	    np.squeeze(classes).astype(np.int32), 
	    np.squeeze(scores), 
	    category_index, 
	    use_normalized_coordinates = True, 
	    line_thickness = 1, 
	    min_score_thresh = 0.20) 

	coordinates = vis_util.return_coordinates(
		image,
		np.squeeze(boxes),
		np.squeeze(classes).astype(np.int32),
		np.squeeze(scores),
		category_index,
		use_normalized_coordinates=True,
		line_thickness=8,
		min_score_thresh=0.20)

	if not coordinates:
		raise Exception("Nessuna targa rilevata")

	i = 0
	# Per ogni gruppo di coordinate, salva la porzione di immagine corrispondente
	for coordinate in coordinates:
		crop_img = image[coordinate[0]+1:coordinate[1], coordinate[2]+1:coordinate[3]]
		try:
		  i += 1
		  cv2.imwrite(SUPERRES_PATH + str(i)+'.jpg', crop_img)
		except:
		  continue

	raise Exception("Errore nel salvataggio degli oggetti estratti") if i=0
Esempio n. 9
0
def Predict(img,
            detection_graph,
            sess,
            MODEL_FOLDER,
            labels2show,
            threshold=0.7):

    # Grab path to current working directory
    CWD_PATH = os.getcwd()

    # Number of classes the object detector can identify
    NUM_CLASSES = 90

    # Path to label map file
    PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_FOLDER,
                                  'mscoco_complete_label_map.pbtxt')

    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    # color scheeme for different classes
    color_map = {
        'person': 'DeepSkyBlue',
        'dog': 'IndianRed',
        'cat': 'yellow',
        'chair': 'Cyan',
        'bottle': 'Orange'
    }

    # Input tensor is the image
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

    # Output tensors are the detection boxes, scores, and classes
    # Each box represents a part of the image where a particular object was detected
    detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

    # Each score represents level of confidence for each of the objects.
    # The score is shown on the result image, together with the class label.
    detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
    detection_classes = detection_graph.get_tensor_by_name(
        'detection_classes:0')

    # Number of objects detected
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')

    # threshold of detection
    thresh = threshold

    items = []
    coordinates = []
    #if you want to resize to tune inference
    #img = cv2.resize(img_org, (300,300))
    img_expanded = np.expand_dims(img, axis=0)
    #print(img_expanded.shape)
    (boxes, scores, classes, num) = sess.run(
        [detection_boxes, detection_scores, detection_classes, num_detections],
        feed_dict={image_tensor: img_expanded})

    objects = []
    for index, value in enumerate(classes[0]):
        object_dict = {}
        if scores[0, index] > thresh:
            object_dict[(
                category_index.get(value)).get('name')] = scores[0, index]
            objects.append(object_dict)
            #print (objects)

    #Get all the detected class labels in one list
    for y in objects:
        for keys in y.keys():
            m = list(y.keys())[0]
            items.append(m)

    #Get co ordinates of the detected classes
    coordinates = vis_util.return_coordinates(img,
                                              np.squeeze(boxes),
                                              np.squeeze(classes).astype(
                                                  np.int32),
                                              np.squeeze(scores),
                                              category_index,
                                              use_normalized_coordinates=True,
                                              line_thickness=10,
                                              min_score_thresh=thresh)

    new_items = []
    new_coordinates = []
    display_str_list = []
    for i, item in enumerate(items):
        if item.lower() in labels2show:
            new_items.append(item.lower())
            new_coordinates.append(coordinates[i][:-1])
            display_str_list.append(
                [item + ' : ' + str(int(coordinates[i][-1])) + '%'])

    for i, box in enumerate(new_coordinates):

        ymin, ymax, xmin, xmax = box[0], box[1], box[2], box[3]
        display_str = display_str_list[i]
        color = color_map[new_items[i]]
        vis_util.draw_bounding_box_on_image_array(
            img,
            ymin,
            xmin,
            ymax,
            xmax,
            color=color,
            thickness=4,
            display_str_list=display_str,
            use_normalized_coordinates=False)

    return new_coordinates, new_items, img
def detectObjects():
    # Import packages
    import os
    import cv2
    import numpy as np
    import tensorflow as tf
    import sys

    # This is needed since the notebook is stored in the object_detection folder.
    sys.path.append("..")

    # Import utilites
    from utils import label_map_util
    from utils import visualization_utils as vis_util

    # Name of the directory containing the object detection module we're using
    MODEL_NAME = 'inference_graph'
    # IMAGE_NAME = 'test1.jpg'

    # Grab path to current working directory
    CWD_PATH = os.getcwd()

    # Path to frozen detection graph .pb file, which contains the model that is used
    # for object detection.
    PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME,
                                'frozen_inference_graph.pb')

    # Path to label map file
    PATH_TO_LABELS = os.path.join(CWD_PATH, 'training', 'labelmap.pbtxt')

    # Path to image
    # PATH_TO_IMAGE = os.path.join(CWD_PATH,IMAGE_NAME)

    # Number of classes the object detector can identify
    NUM_CLASSES = 4

    # Load the label map.
    # Label maps map indices to category names, so that when our convolution
    # network predicts `5`, we know that this corresponds to `king`.
    # Here we use internal utility functions, but anything that returns a
    # dictionary mapping integers to appropriate string labels would be fine
    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    # Load the Tensorflow model into memory.
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

        sess = tf.Session(graph=detection_graph)

    # Define input and output tensors (i.e. data) for the object detection classifier

    # Input tensor is the image
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

    # Output tensors are the detection boxes, scores, and classes
    # Each box represents a part of the image where a particular object was detected
    detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

    # Each score represents level of confidence for each of the objects.
    # The score is shown on the result image, together with the class label.
    detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
    detection_classes = detection_graph.get_tensor_by_name(
        'detection_classes:0')

    # Number of objects detected
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')

    # Load image using OpenCV and
    # expand image dimensions to have shape: [1, None, None, 3]
    # i.e. a single-column array, where each item in the column has the pixel RGB value
    # image = cv2.imread(PATH_TO_IMAGE)

    # -------------------
    cap = cv2.VideoCapture(1)
    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    ret, image = cap.read()

    image_expanded = np.expand_dims(image, axis=0)

    # Perform the actual detection by running the model with the image as input
    (boxes, scores, classes, num) = sess.run(
        [detection_boxes, detection_scores, detection_classes, num_detections],
        feed_dict={image_tensor: image_expanded})
    print(boxes.shape)
    # Draw the results of the detection (aka 'visulaize the results')

    vis_util.visualize_boxes_and_labels_on_image_array(
        image,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        use_normalized_coordinates=True,
        line_thickness=8,
        min_score_thresh=0.989)
    coordinates = vis_util.return_coordinates(image,
                                              np.squeeze(boxes),
                                              np.squeeze(classes).astype(
                                                  np.int32),
                                              np.squeeze(scores),
                                              category_index,
                                              use_normalized_coordinates=True,
                                              line_thickness=8,
                                              min_score_thresh=0.989)
    # All the results have been drawn on image. Now display the image.
    return image, coordinates

    # Press any key to close the image
    cv2.waitKey(0)

    # Clean up
    cv2.destroyAllWindows()
Esempio n. 11
0
            feed_dict={image_tensor: image_np_expanded})

        vis_util.visualize_boxes_and_labels_on_image_array(
            display_image,
            np.squeeze(detection_boxes),
            np.squeeze(detection_classes).astype(np.int32),
            np.squeeze(detection_classes),
            category_index,
            use_normalized_coordinates=True,
            line_thickness=8,
            min_score_thresh=0.80)
        coordinates = vis_util.return_coordinates(
            display_image,
            np.squeeze(detection_boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(detection_classes),
            category_index,
            use_normalized_coordinates=True,
            line_thickness=8,
            min_score_thresh=0.80)
        scale_percent = 20  # percent of original size
        width = int(image_np.shape[1] * scale_percent / 100)
        height = int(image_np.shape[0] * scale_percent / 100)
        dim = (width, height)

        # Display output
        objects = []
        isim = []

        for index, value in enumerate(classes[0]):
            object_dict = {}
Esempio n. 12
0
 if whichh in image_path:
     continue
 image = Image.open(image_path)
 # the array based representation of the image will be used later in order to prepare the
 # result image with boxes and labels on it.
 image_np = load_image_into_numpy_array(image)
 # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
 image_np_expanded = np.expand_dims(image_np, axis=0)
 # Actual detection.
 output_dict = run_inference_for_single_image(image_np, detection_graph)
 # Visualization of the results of a detection.
 #print(len(output_dict['detection_boxes']),"<<<<<<<<<",image_path.split('/')[-1])
 coordinates = vis_util.return_coordinates(image_np,
                                           output_dict['detection_boxes'],
                                           output_dict['detection_classes'],
                                           output_dict['detection_scores'],
                                           category_index,
                                           use_normalized_coordinates=True,
                                           line_thickness=8,
                                           min_score_thresh=0.5)
 #[ymin, ymax, xmin, xmax, (box_to_score_map[box]*100)] y->height x->width
 #print(coordinates,"<<<<<<<<<",image_path.split('/')[-1])
 vis_util.visualize_boxes_and_labels_on_image_array(
     image_np,
     output_dict['detection_boxes'],
     output_dict['detection_classes'],
     output_dict['detection_scores'],
     category_index,
     instance_masks=output_dict.get('detection_masks'),
     use_normalized_coordinates=True,
     line_thickness=8)
 #plt.figure(figsize=IMAGE_SIZE)
Esempio n. 13
0
    def loopimg(name="Picture 150.png"):
        IMAGE_NAME = str(name)

        print(name)
        PATH_TO_IMAGE = os.path.join(CWD_PATH, IMAGE_NAME)

        # Number of classes the object detector can identify
        NUM_CLASSES = 4

        # Load the label map.
        #
        # Here we use internal utility functions, but anything that returns a
        # dictionary mapping integers to appropriate string labels would be fine
        label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
        categories = label_map_util.convert_label_map_to_categories(
            label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
        category_index = label_map_util.create_category_index(categories)

        # Load the Tensorflow model into memory.
        detection_graph = tf.Graph()
        with detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

            sess = tf.Session(graph=detection_graph)

# Define input and output tensors (i.e. data) for the object detection classifier

# Input tensor is the image
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

        # Output tensors are the detection boxes, scores, and classes
        # Each box represents a part of the image where a particular object was detected
        detection_boxes = detection_graph.get_tensor_by_name(
            'detection_boxes:0')

        # Each score represents level of confidence for each of the objects.
        # The score is shown on the result image, together with the class label.
        detection_scores = detection_graph.get_tensor_by_name(
            'detection_scores:0')
        detection_classes = detection_graph.get_tensor_by_name(
            'detection_classes:0')

        # Number of objects detected
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')

        # Load image using OpenCV and
        # expand image dimensions to have shape: [1, None, None, 3]
        # i.e. a single-column array, where each item in the column has the pixel RGB value
        image = cv2.imread(PATH_TO_IMAGE)
        image_expanded = np.expand_dims(image, axis=0)

        # Perform the actual detection by running the model with the image as input
        (boxes, scores, classes,
         num) = sess.run([
             detection_boxes, detection_scores, detection_classes,
             num_detections
         ],
                         feed_dict={image_tensor: image_expanded})
        printcount = 0
        it = 0
        boxesFiltered = []
        final_score = np.squeeze(scores)
        count = 0
        for i in range(100):
            if scores is None or final_score[i] > 0.5:
                count = count + 1

        for i in classes[0]:
            printcount = printcount + 1
            if (category_index[i]['name'] == "serialreadable"):
                print("cheese")
                checkingSerial = True
                coordinates = vis_util.return_coordinates(
                    image,
                    np.squeeze(boxes),
                    np.squeeze(classes).astype(np.int32),
                    np.squeeze(scores),
                    category_index,
                    use_normalized_coordinates=True,
                    line_thickness=8,
                    min_score_thresh=0.90)
                print(category_index[i]['name'])
                print("cheese")
                if (coordinates != None):
                    print(coordinates)
                    print(i)
                    it += 1

                    x = int(coordinates[2])
                    y = int(coordinates[0])
                    w = int(coordinates[3])
                    h = int(coordinates[1])
                    #im2, contours, hierarchy = cv2.findContours(frame,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
                    #x, y, w, h = cv2.boundingRect(contours[i])
                    roi = image[y:y + h, x:x + w]
                    #gaussian_3 = cv2.GaussianBlur(roi, (9,9), 10.0)
                    #unsharp_image = cv2.addWeighted(roi, 1.5, gaussian_3, -0.5, 0, roi)

                    roi = cv2.resize(roi,
                                     None,
                                     fx=4,
                                     fy=4,
                                     interpolation=cv2.INTER_CUBIC)
                    cv2.fastNlMeansDenoisingColored(roi, None, 15, 15, 7, 21)
                    roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
                    kernel = np.zeros((3, 3), np.uint8)
                    #roi = cv2.dilate(roi, kernel, iterations=1)
                    roi = cv2.erode(roi, kernel, iterations=3)
                    #roi = cv2.bilateralFilter(roi,9,75,75)
                    roi = cv2.medianBlur(roi, 3)
                    #edges = cv2.Canny(roi,100,200)
                    #img_dilation = c_m.dilate(edges,N=3,iterations=2)
                    #kernel = np.ones((5,5), np.uint8)
                    #img_dilation = cv2.dilate(roi, kernel, iterations=2)
                    roi = cv2.adaptiveThreshold(roi, 245,
                                                cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                                cv2.THRESH_BINARY_INV, 115, 1)
                    #ret,roi = cv2.threshold(roi,127,255,cv2.THRESH_BINARY_INV)

                    #plt.subplot(121),plt.imshow(roi,cmap = 'gray')
                    #plt.title('Original Image'), plt.xticks([]), plt.yticks([])
                    #plt.subplot(122),plt.imshow(edges,cmap = 'gray')
                    #plt.title('Edge Image'), plt.xticks([]), plt.yticks([])
                    roi = cv2.medianBlur(roi, 3)
                    #roi = cv2.bilateralFilter(roi,9,75,75)

                    cv2.imwrite(str(name) + "zaca.png", roi)
                    print("zaca")
                    #contours=c_m.find_components(edges)
                    #c_m.process_image(str(it)+"roi.jpg",str(it)+"roi.jpg")
                    #API.SetVariable("classify_enable_learning","0");
                    #API.SetVariable("classify_enable_adaptive_matcher","0")
                    #API.
                    r = 0
                    text = readtext.readtext(roi, r)

                    print("{}\n".format(text))
                    if (text == None):
                        text = str('')
                    else:
                        img2 = np.zeros((512, 512, 3), np.uint8)
                        font = cv2.FONT_HERSHEY_SIMPLEX
                        cv2.putText(img2, text, (10, 500), font, 1,
                                    (255, 255, 255), 2, cv2.LINE_AA)
                        cv2.imshow("Results", img2)
                        checkingSerial = False
                else:
                    break
                return name
                if (printcount == count):
                    break
                return name
# Draw the results of the detection (aka 'visulaize the results')
        vis_util.visualize_boxes_and_labels_on_image_array(
            image,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            category_index,
            use_normalized_coordinates=True,
            line_thickness=8,
            min_score_thresh=0.80)
Esempio n. 14
0
def detection():

    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    # Load the Tensorflow model into memory.
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

        sess = tf.Session(graph=detection_graph)

    # Define input and output tensors (i.e. data) for the object detection classifier

    # Input tensor is the image
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

    # Output tensors are the detection boxes, scores, and classes
    # Each box represents a part of the image where a particular object was detected
    detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

    # Each score represents level of confidence for each of the objects.
    # The score is shown on the result image, together with the class label.
    detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
    detection_classes = detection_graph.get_tensor_by_name(
        'detection_classes:0')

    # Number of objects detected
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')

    # Load image using OpenCV and
    # expand image dimensions to have shape: [1, None, None, 3]
    # i.e. a single-column array, where each item in the column has the pixel RGB value
    image = cv2.imread(PATH_TO_IMAGE)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_expanded = np.expand_dims(image_rgb, axis=0)

    # Perform the actual detection by running the model with the image as input
    (boxes, scores, classes, num) = sess.run(
        [detection_boxes, detection_scores, detection_classes, num_detections],
        feed_dict={image_tensor: image_expanded})

    # Draw the results of the detection (aka 'visulaize the results')

    vis_util.visualize_boxes_and_labels_on_image_array(
        image,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        use_normalized_coordinates=True,
        line_thickness=8,
        min_score_thresh=0.70)

    coordinates = vis_util.return_coordinates(image,
                                              np.squeeze(boxes),
                                              np.squeeze(classes).astype(
                                                  np.int32),
                                              np.squeeze(scores),
                                              category_index,
                                              use_normalized_coordinates=True,
                                              line_thickness=8,
                                              min_score_thresh=0.70)

    for coordinate in coordinates:
        print(coordinate)
        #ymin,ymax,xmin,xmax
        (y1, y2, x1, x2, accuracy, classification) = coordinate

    with open(os.path.join(
            CWD_PATH,
            "json_hand/" + "script" + IMAGE_NAME.split(".")[0] + ".json"),
              "w",
              encoding='utf-8') as f:
        json.dump(coordinates, f, ensure_ascii=False, indent=4)
        f.write('\n')

    output = image.copy()
    cv2.imshow('Object detector', output)

    # Press any key to close the image
    cv2.waitKey(0)

    # Clean up
    cv2.destroyAllWindows()