def face(image_np): # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) (boxes, scores, classes, num) = face_session.run([ face_detection_boxes, face_detection_scores, face_detection_classes, face_num_detections ], feed_dict={face_image_tensor: image_np_expanded}) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), face_category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.85) # Get coordinates of detected boxes ; ymin, ymax, xmin, xmax coordinates_face = vis_util.return_coordinates( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), face_category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.85) print("face: ", *coordinates_face)
def detection(inputImg, image_path): global predictions print(inputImg.size) image = Image.open(image_path) image_np = load_image_into_numpy_array(image) image_np_expanded = np.expand_dims(image_np, axis=0) output_dict = run_inference_for_single_image(image_np_expanded, detection_graph) coordinates = vis_util.return_coordinates( image_np, np.squeeze(output_dict['detection_boxes']), np.squeeze(output_dict['detection_classes']).astype(np.int32), np.squeeze(output_dict['detection_scores']), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.30) a = os.path.basename(image_path) #b=os.path.splitext(a)[0] print(a, ':', coordinates) newImage = np.copy(inputImg) predictions = [] for i in range(len(coordinates)): label = coordinates[i][5] #confidence=coordinates[i][4] top_x = coordinates[i][2] top_y = coordinates[i][0] btm_x = coordinates[i][3] btm_y = coordinates[i][1] newImage = cv2.rectangle(newImage, (top_x - 5, top_y - 5), (btm_x + 5, btm_y + 5), (255, 0, 0), 1) newImage = cv2.putText(newImage, label, (top_x, top_y + 20), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), 1, cv2.LINE_AA) predictions.append({ 'label': label, 'topleft': { 'x': top_x, 'y': top_y }, 'bottomright': { 'x': btm_x, 'y': btm_y } }) cv2.imwrite('newImage.jpg', newImage) return (newImage, predictions)
count = 0 for i in range(100): if scores is None or final_score[i] > 0.5: count = count + 1 print('count', count) printcount = 0 for i in classes[0]: printcount = printcount + 1 if (category_index[i]['name'] == "serialreadable"): checkingSerial = True coordinates = vis_util.return_coordinates( frame, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.90) print(category_index[i]['name']) #if(coordinates.count==4): print(coordinates) print(i) it += 1 if (it > 10): it = 0 if coordinates: x = int(coordinates[2]) y = int(coordinates[0]) w = int(coordinates[3])
def SabKuch(en): global action, deque alphabet_drawn = "" global prediction1, prediction2, fingertip_tracking_list, letters, black_image, character_recognition_counter, label_map, categories, category_index image_np = cv2.imdecode(np.fromstring(en, np.uint8), 1) #image_np = cv2.flip(image_np, 1) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. scores = detection_graph.get_tensor_by_name('detection_scores:0') classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') # Actual detection. (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) # Visualization of the results of a detection. """vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) """ image_np = cv2.resize(image_np, (640, 360)) #image_np = cv2.warpAffine(image_np, M, (640,360)) coordinates = vis_util.return_coordinates( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=1, min_score_thresh=0.80) finger_count = len(coordinates) # print( action ) if deque[-1] == finger_count: deque.append(finger_count) else: deque = [finger_count] if len(deque) == 5 and action[0] != 3: action[0] = deque[-1] elif len(deque) == 5 and action[0] == 3: action.append(deque[-1]) if action[0] == 1: if len(coordinates) != 0: coordinates = coordinates[0] column = (int(coordinates[0]) + int(coordinates[1])) / 2 row = (int(coordinates[2]) + int(coordinates[3])) / 2 #print ( row, column) #cv2.circle(image_np,(int(coordinates[2]), int(coordinates[0])), 5, (0,255,0), -1) #cv2.circle(image_np,(int(coordinates[3]), int(coordinates[1])), 5, (0,0,255), -1) fingertip_tracking_list.append((row, column)) elif action[0] == 2: # cv2.imwrite(chr(key-32) + "_" + str(character_recognition_counter) + "haha.jpg", black_image) ######################### blackboard_gray = black_image blur1 = cv2.medianBlur(blackboard_gray, 5) blur1 = cv2.GaussianBlur(blur1, (5, 5), 0) thresh1 = cv2.threshold(blur1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] #cv2.imshow("thresh", thresh1) _, blackboard_cnts, _ = cv2.findContours( thresh1.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) if len(blackboard_cnts) >= 1: cnt = sorted(blackboard_cnts, key=cv2.contourArea, reverse=True)[0] # areas = [cv2.contourArea(c) for c in blackboard_cnts] # max_index = np.argmax(areas) # cnt=contours[max_index] # print(cv2.contourArea(cnt)) if True: x, y, w, h = cv2.boundingRect(cnt) alphabet = blackboard_gray[y - 10:y + h + 10, x - 10:x + w + 10] newImage = cv2.resize(alphabet, (28, 28)) newImage = np.array(newImage) newImage = newImage.astype('float32') / 255 # prediction1 = mlp_model.predict(newImage.reshape(1,28,28))[0] # prediction1 = np.argmax(prediction1) prediction2 = cnn_model.predict( newImage.reshape(1, 28, 28, 1))[0] prediction2 = np.argmax(prediction2) alphabet_drawn = str(letters[int(prediction2) + 1]) # print("prediction1 : ",str(letters[int(prediction1)+1])) # print("prediction : ", str(letters[int(prediction2)+1])) # Empty the points deque and the blackboard # points = deque(maxlen=512) blackboard = np.zeros((480, 640, 3), dtype=np.uint8) ######################### character_recognition_counter += 1 fingertip_tracking_list = [] black_image = np.zeros((360, 640), dtype=np.uint8) # print ("Saving" + chr(key-32) + "_" + str(character_recognition_counter) ) #cv2.imshow('window', image_np) # elif action == 3: # break if action[0] == 3: if len(action) == 1: pass else: print(action[-1]) deque, action = [-1], [-1] if action[0] == 4: fingertip_tracking_list = [] black_image = np.zeros((360, 640), dtype=np.uint8) if action[0] == 5: pass #print (coordinates if not None else "") # Put the result on the screen # cv2.putText(image_np, "Multilayer Perceptron : " + str(letters[int(prediction1)+1]), (10, 410), cv2.FONT_HERSHEY_SIMPLEX, 0.7,(255, 255, 255), 2) #cv2.putText(image_np, "Convolution Neural Network: ", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) cv2.putText(image_np, "" + alphabet_drawn, (550, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 2) # cv2.imshow ("Saving this", black_image) for z in range(1, len(fingertip_tracking_list)): #cv2.circle(image_np,(int(fingertip_tracking_list[z][0]), int(fingertip_tracking_list[z][1])), 5, (0,0,255), -1) cv2.line(image_np, (int(fingertip_tracking_list[z - 1][0]), int(fingertip_tracking_list[z - 1][1])), (int(fingertip_tracking_list[z][0]), int(fingertip_tracking_list[z][1])), (0, 0, 255), 5) cv2.line(black_image, (int(fingertip_tracking_list[z - 1][0]), int(fingertip_tracking_list[z - 1][1])), (int(fingertip_tracking_list[z][0]), int(fingertip_tracking_list[z][1])), (255, 255, 255), 5) # cv2.imshow("Image",image_np) en = cv2.imencode('.jpg', image_np)[1].tostring() return en, alphabet_drawn, action[0]
def num_detection(name): # Name of the directory containing the object detection module we're using MODEL_NAME = 'inference_graph' ID_NAME = name FILE = '_num.jpg' IMAGE_FOLDER = 'image_test' # Grab path to current working directory CWD_PATH = os.getcwd() PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, 'frozen_inference_graph_number.pb') PATH_TO_LABELS = os.path.join(CWD_PATH, 'training', 'labelmap_number.pbtxt') PATH_TO_IMAGE = os.path.join(CWD_PATH, IMAGE_FOLDER, ID_NAME + FILE) #Path JSON firebase # Number of classes the object detector can identify NUM_CLASSES = 12 label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Load the Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') sess = tf.Session(graph=detection_graph) # Define input and output tensors (i.e. data) for the object detection classifier # Input tensor is the image image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') # Number of objects detected num_detections = detection_graph.get_tensor_by_name('num_detections:0') image = cv2.imread(PATH_TO_IMAGE) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image_expanded = np.expand_dims(image_rgb, axis=0) # Perform the actual detection by running the model with the image as input (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_expanded}) # Draw the results of the detection (aka 'visulaize the results') vis_util.visualize_boxes_and_labels_on_image_array( image, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.7) coordinates = vis_util.return_coordinates(image, np.squeeze(boxes), np.squeeze(classes).astype( np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.7) for coordinate in coordinates: print(coordinate) #ymin,ymax,xmin,xmax (y1, y2, x1, x2, accuracy, classification) = coordinate output = image.copy() gray = cv2.cvtColor(output, cv2.COLOR_BGR2GRAY) circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, 100) circle_data = [] # ensure at least some circles were found if circles is not None: #convert the (x, y) coordinates and radius of the circles to integers circles = np.round(circles[0, :]).astype("int") # loop over the (x, y) coordinates and radius of the circles for (x, y, r) in circles: cv2.rectangle(output, (x - 5, y - 5), (x + 5, y + 5), (0, 128, 255), -1) # line vertical start_ver, stop_ver = (x - r, y), (x + r, y) # line horizontal start_hor, stop_hor = (x, y - r), (x, y + r) #draw a line cv2.line(img, Point pt1, Point pt2, color[,thickness[,lineType[,shift]]]) cv2.line(output, start_ver, stop_ver, (0, 0, 0), (3)) cv2.line(output, start_hor, stop_hor, (0, 0, 0), (3)) circle_data = [int(x), int(y), int(r)] with open(os.path.join( CWD_PATH, "json_num/" + "script" + ID_NAME.split(".")[0] + ".json"), "w", encoding='utf-8') as f: data = {'coordinate': coordinates, 'circle': circle_data} json.dump(data, f, ensure_ascii=False) f.write('\n') #cv2.imshow('Object detector', output) Image.fromarray(output).show() # Press any key to close the image cv2.waitKey(0) # Clean up cv2.destroyAllWindows()
def hand_detection(name): # Name of the directory containing the object detection module we're using MODEL_NAME = 'inference_graph' ID_NAME = name FILE = '_hands.jpg' DETECT_FOLDER = 'detectcircle' IMAGE_FOLDER = 'image_test' # Grab path to current working directory CWD_PATH = os.getcwd() PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, 'frozen_inference_graph.pb') PATH_TO_LABELS = os.path.join(CWD_PATH, 'training', 'labelmap.pbtxt') PATH_TO_IMAGE = os.path.join(CWD_PATH, IMAGE_FOLDER, ID_NAME + FILE) NUM_CLASSES = 2 label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Load the Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') sess = tf.Session(graph=detection_graph) # Define input and output tensors (i.e. data) for the object detection classifier # Input tensor is the image image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') # Number of objects detected num_detections = detection_graph.get_tensor_by_name('num_detections:0') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Load image using OpenCV and image = cv2.imread(PATH_TO_IMAGE) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image_expanded = np.expand_dims(image_rgb, axis=0) # Perform the actual detection by running the model with the image as input (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_expanded}) # Draw the results of the detection (aka 'visulaize the results') vis_util.visualize_boxes_and_labels_on_image_array( image, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.70) coordinates = vis_util.return_coordinates(image, np.squeeze(boxes), np.squeeze(classes).astype( np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.70) for coordinate in coordinates: print(coordinate) #ymin,ymax,xmin,xmax (y1, y2, x1, x2, accuracy, classification) = coordinate with open(os.path.join( CWD_PATH, "json_hand/" + "script" + ID_NAME.split(".")[0] + ".json"), "w", encoding='utf-8') as f: json.dump(coordinates, f, ensure_ascii=False, indent=4) f.write('\n') output = image.copy() cv2.imshow('Object detector', output) # Press any key to close the image cv2.waitKey(0) # Clean up cv2.destroyAllWindows()
def detection(): while(True): try: data = str(sock.recv(1024),'utf-8') except: data = "" if (data == 'start_object_detection;#'): try: percentage = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "confidency_threshold") except: percentage = "0.9" try: index = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "camera_index") except: index = "0" if os.path.isfile(CWD_TXT): pass else: with open(CWD_TXT, 'a+') as f: f.write('[]' + '\n') if (percentage == ""): percentage = "0.9" percentage = float(percentage) print (index) if index == "": index = 0 f = open(CWD_TXT, 'r+') f.truncate(0) try: video = cv2.VideoCapture(int(index)) width = video.get(cv2.cv2.CAP_PROP_FRAME_WIDTH) height = video.get(cv2.cv2.CAP_PROP_FRAME_HEIGHT) cam_fps = video.get(cv2.cv2.CAP_PROP_FPS) except: pass elapsed_timer = QElapsedTimer() elapsed_timer.start() if video is None or not video.isOpened(): MessageBox("Vendron","Error No such Camera exist", 64) detection() else : sent = True while(True): try: data = str(sock.recv(1024),'utf-8') except: pass if (data == 'end_object_detection;#'): sent = False cv2.destroyWindow('Object detector') video.release() socketSend("object_detection_ended;#") break else: data = [] myList = [] myScore = [] result_list = [] name = [] try: fps = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "frame_rate") except: fps = cam_fps try: ymin_1 = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "y_min_threshold_1") except: ymin_1 = "80" try: ymax_1 = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "y_max_threshold_1") except: ymax_1 = "240" try: ymin_2 = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "y_min_threshold_2") except: ymin_2 = "240" try: ymax_2 = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "y_max_threshold_2") except: ymax_2 = "400" try: places = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "is_camera_reversed") except: places = "false" try: w2 = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "x") except: w2 = "0" try: h2 = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "y") except: h2 = "0" try: w1 = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "width") except: w1 = "640" try: h1 = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "height") except: h1 = "480" if video is None: pass else: ret, frame = video.read() if(w1 == ""): w1 = "640" if(w2 == ""): w2 = "0" if(h1 == ""): h1 = "480" if(h2 == ""): h2 = "0" w1 = int(w1) + int(w2) h1 = int(h1) + int(h2) frame = frame[int(h2):int(h1),int(w2):int(w1)] frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_expanded = np.expand_dims(frame, axis=0) # Perform the actual detection by running the model with the image as input try: (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: frame_expanded}) except: pass # Draw the results of the detection (aka 'visulaize the results') try: vis_util.visualize_boxes_and_labels_on_image_array( frame, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=3, min_score_thresh= percentage) data = [category_index.get(value) for index,value in enumerate(classes[0]) if scores[0,index] > percentage] for cl in data: if cl != None : myList.append(str(cl['name'])) objects = [] for index, value in enumerate(classes[0]): object_dict = {} if scores[0, index] > percentage: object_dict[(category_index.get(value)).get('name').encode('utf8')] = \ scores[0, index] objects.append(object_dict) coordinates = vis_util.return_coordinates( frame, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=3, min_score_thresh= percentage) except: pass if(sent == True): socketSend("object_detection_started;#") sent = False if(places == ""): places = "false" if(ymin_1 == ""): ymin_1 = "80" if(ymin_2 == ""): ymin_2 = "240" if(ymax_1 == ""): ymax_1 = "240" if(ymax_2 == ""): ymax_2 = "400" try: if(places == "true"): alpha = 0.3; overlay = frame.copy() cv2.rectangle(overlay, (0, int(ymin_1)), (int(width), int(ymin_2)),(0, 0, 255), -1) cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0, frame) overlay_blue = frame.copy() cv2.rectangle(overlay_blue, (0, int(ymax_1)), (int(width), int(ymax_2)),(255, 0, 0), -1) cv2.addWeighted(overlay_blue, alpha, frame, 1 - alpha, 0, frame) elif(places == "false"): alpha = 0.3; overlay = frame.copy() cv2.rectangle(overlay, (0, int(ymax_1)), (int(width), int(ymax_2)),(0, 0, 255), -1) cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0, frame) overlay_blue = frame.copy() cv2.rectangle(overlay_blue, (0, int(ymin_1)), (int(width), int(ymin_2)),(255, 0, 0), -1) cv2.addWeighted(overlay_blue, alpha, frame, 1 - alpha, 0, frame) except: pass if(fps == ""): fps = cam_fps fps = 1/int(fps) print (type(fps)) while(elapsed_timer.hasExpired(fps)): if coordinates is None: print("nothing") else: if video is None: sent = False cv2.destroyWindow('Object detector') socketSend("object_detection_ended;#") break list_1stesult = myList coordinates_result = coordinates for ea_list,ea_coor,score in zip(list_1stesult,coordinates_result,objects): score = str(score) score = score.split(":")[1] score = score.replace("}","") score = score.replace("]","") score = float(score) * 100 score = str(round(score)) result = os.path.join(ea_list,",",str(ea_coor),",",score) result = result.replace("[[","[") result = result.replace("\\","") result = result.replace("[","") result = result.replace("]","") name.append(ea_list) result_list.append(result) print (result_list) result_list = str(result_list).replace("', '","];[") result_list = result_list.replace("'","") result_list = result_list.replace("'","") result_list = result_list.replace(", ",",") if result_list: with open(CWD_TXT, "a") as text_file: text_file.write(str(result_list) + "\n") if result_list: with open(CWD_HISTORY,"a") as text_file: text_file.write(str(result_list) + "\n") elapsed_timer.start() # All the results have been drawn on the frame, so it's time to display it. try: path_debug = regkey_value(r"HKEY_CURRENT_USER\Software\Silkron\Vendron\ai_vision_fridge", "debug") except: path_debug = "false" if (path_debug == "true"): try: cv2.imshow('Object detector', frame) except: sent = False cv2.destroyWindow('Object detector') video.release() socketSend("object_detection_ended;#") break else: pass if cv2.waitKey(1) == ord ("q"): pass
def run(path): # Percorso immagini da cui estrarre gli oggetti PATH_TO_IMAGE = os.path.join(path) # Percorso salvataggio oggetti estratti SUPERRES_PATH = os.path.join("superres/") # Carico la label map. label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes = NUM_CLASSES, use_display_name = True) category_index = label_map_util.create_category_index(categories) # Caricamento modello di TensorFlow detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.io.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name ='') sess = tf.Session(graph = detection_graph) # Considero i tensori di input e di output # Il tensore di input, l'immagine image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Tensori di output: box, punteggi di confidenza e classi degli oggetti # I box rappresentano l'area dell'immagine in cui si trova un oggetto riconosciuto detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') # Numero degli oggetti riconosciuti num_detections = detection_graph.get_tensor_by_name('num_detections:0') # Carica l'immagine con OpenCV ed espande le dimensioni formando un array da una singola colonna in cui ogni elemento contiene i valori RGB dei vari pixel image = cv2.imread(PATH_TO_IMAGE) image_expanded = np.expand_dims(image, axis = 0) # Esegui il riconoscimento (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict ={image_tensor: image_expanded}) # Visualizza il risultato del riconoscimento vis_util.visualize_boxes_and_labels_on_image_array( image, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates = True, line_thickness = 1, min_score_thresh = 0.20) coordinates = vis_util.return_coordinates( image, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.20) if not coordinates: raise Exception("Nessuna targa rilevata") i = 0 # Per ogni gruppo di coordinate, salva la porzione di immagine corrispondente for coordinate in coordinates: crop_img = image[coordinate[0]+1:coordinate[1], coordinate[2]+1:coordinate[3]] try: i += 1 cv2.imwrite(SUPERRES_PATH + str(i)+'.jpg', crop_img) except: continue raise Exception("Errore nel salvataggio degli oggetti estratti") if i=0
def Predict(img, detection_graph, sess, MODEL_FOLDER, labels2show, threshold=0.7): # Grab path to current working directory CWD_PATH = os.getcwd() # Number of classes the object detector can identify NUM_CLASSES = 90 # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_FOLDER, 'mscoco_complete_label_map.pbtxt') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # color scheeme for different classes color_map = { 'person': 'DeepSkyBlue', 'dog': 'IndianRed', 'cat': 'yellow', 'chair': 'Cyan', 'bottle': 'Orange' } # Input tensor is the image image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Output tensors are the detection boxes, scores, and classes # Each box represents a part of the image where a particular object was detected detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represents level of confidence for each of the objects. # The score is shown on the result image, together with the class label. detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') # Number of objects detected num_detections = detection_graph.get_tensor_by_name('num_detections:0') # threshold of detection thresh = threshold items = [] coordinates = [] #if you want to resize to tune inference #img = cv2.resize(img_org, (300,300)) img_expanded = np.expand_dims(img, axis=0) #print(img_expanded.shape) (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: img_expanded}) objects = [] for index, value in enumerate(classes[0]): object_dict = {} if scores[0, index] > thresh: object_dict[( category_index.get(value)).get('name')] = scores[0, index] objects.append(object_dict) #print (objects) #Get all the detected class labels in one list for y in objects: for keys in y.keys(): m = list(y.keys())[0] items.append(m) #Get co ordinates of the detected classes coordinates = vis_util.return_coordinates(img, np.squeeze(boxes), np.squeeze(classes).astype( np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=10, min_score_thresh=thresh) new_items = [] new_coordinates = [] display_str_list = [] for i, item in enumerate(items): if item.lower() in labels2show: new_items.append(item.lower()) new_coordinates.append(coordinates[i][:-1]) display_str_list.append( [item + ' : ' + str(int(coordinates[i][-1])) + '%']) for i, box in enumerate(new_coordinates): ymin, ymax, xmin, xmax = box[0], box[1], box[2], box[3] display_str = display_str_list[i] color = color_map[new_items[i]] vis_util.draw_bounding_box_on_image_array( img, ymin, xmin, ymax, xmax, color=color, thickness=4, display_str_list=display_str, use_normalized_coordinates=False) return new_coordinates, new_items, img
def detectObjects(): # Import packages import os import cv2 import numpy as np import tensorflow as tf import sys # This is needed since the notebook is stored in the object_detection folder. sys.path.append("..") # Import utilites from utils import label_map_util from utils import visualization_utils as vis_util # Name of the directory containing the object detection module we're using MODEL_NAME = 'inference_graph' # IMAGE_NAME = 'test1.jpg' # Grab path to current working directory CWD_PATH = os.getcwd() # Path to frozen detection graph .pb file, which contains the model that is used # for object detection. PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, 'frozen_inference_graph.pb') # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, 'training', 'labelmap.pbtxt') # Path to image # PATH_TO_IMAGE = os.path.join(CWD_PATH,IMAGE_NAME) # Number of classes the object detector can identify NUM_CLASSES = 4 # Load the label map. # Label maps map indices to category names, so that when our convolution # network predicts `5`, we know that this corresponds to `king`. # Here we use internal utility functions, but anything that returns a # dictionary mapping integers to appropriate string labels would be fine label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Load the Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') sess = tf.Session(graph=detection_graph) # Define input and output tensors (i.e. data) for the object detection classifier # Input tensor is the image image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Output tensors are the detection boxes, scores, and classes # Each box represents a part of the image where a particular object was detected detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represents level of confidence for each of the objects. # The score is shown on the result image, together with the class label. detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') # Number of objects detected num_detections = detection_graph.get_tensor_by_name('num_detections:0') # Load image using OpenCV and # expand image dimensions to have shape: [1, None, None, 3] # i.e. a single-column array, where each item in the column has the pixel RGB value # image = cv2.imread(PATH_TO_IMAGE) # ------------------- cap = cv2.VideoCapture(1) width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) ret, image = cap.read() image_expanded = np.expand_dims(image, axis=0) # Perform the actual detection by running the model with the image as input (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_expanded}) print(boxes.shape) # Draw the results of the detection (aka 'visulaize the results') vis_util.visualize_boxes_and_labels_on_image_array( image, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.989) coordinates = vis_util.return_coordinates(image, np.squeeze(boxes), np.squeeze(classes).astype( np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.989) # All the results have been drawn on image. Now display the image. return image, coordinates # Press any key to close the image cv2.waitKey(0) # Clean up cv2.destroyAllWindows()
feed_dict={image_tensor: image_np_expanded}) vis_util.visualize_boxes_and_labels_on_image_array( display_image, np.squeeze(detection_boxes), np.squeeze(detection_classes).astype(np.int32), np.squeeze(detection_classes), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.80) coordinates = vis_util.return_coordinates( display_image, np.squeeze(detection_boxes), np.squeeze(classes).astype(np.int32), np.squeeze(detection_classes), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.80) scale_percent = 20 # percent of original size width = int(image_np.shape[1] * scale_percent / 100) height = int(image_np.shape[0] * scale_percent / 100) dim = (width, height) # Display output objects = [] isim = [] for index, value in enumerate(classes[0]): object_dict = {}
if whichh in image_path: continue image = Image.open(image_path) # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = load_image_into_numpy_array(image) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. output_dict = run_inference_for_single_image(image_np, detection_graph) # Visualization of the results of a detection. #print(len(output_dict['detection_boxes']),"<<<<<<<<<",image_path.split('/')[-1]) coordinates = vis_util.return_coordinates(image_np, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.5) #[ymin, ymax, xmin, xmax, (box_to_score_map[box]*100)] y->height x->width #print(coordinates,"<<<<<<<<<",image_path.split('/')[-1]) vis_util.visualize_boxes_and_labels_on_image_array( image_np, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=8) #plt.figure(figsize=IMAGE_SIZE)
def loopimg(name="Picture 150.png"): IMAGE_NAME = str(name) print(name) PATH_TO_IMAGE = os.path.join(CWD_PATH, IMAGE_NAME) # Number of classes the object detector can identify NUM_CLASSES = 4 # Load the label map. # # Here we use internal utility functions, but anything that returns a # dictionary mapping integers to appropriate string labels would be fine label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Load the Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') sess = tf.Session(graph=detection_graph) # Define input and output tensors (i.e. data) for the object detection classifier # Input tensor is the image image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Output tensors are the detection boxes, scores, and classes # Each box represents a part of the image where a particular object was detected detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') # Each score represents level of confidence for each of the objects. # The score is shown on the result image, together with the class label. detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') # Number of objects detected num_detections = detection_graph.get_tensor_by_name('num_detections:0') # Load image using OpenCV and # expand image dimensions to have shape: [1, None, None, 3] # i.e. a single-column array, where each item in the column has the pixel RGB value image = cv2.imread(PATH_TO_IMAGE) image_expanded = np.expand_dims(image, axis=0) # Perform the actual detection by running the model with the image as input (boxes, scores, classes, num) = sess.run([ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_expanded}) printcount = 0 it = 0 boxesFiltered = [] final_score = np.squeeze(scores) count = 0 for i in range(100): if scores is None or final_score[i] > 0.5: count = count + 1 for i in classes[0]: printcount = printcount + 1 if (category_index[i]['name'] == "serialreadable"): print("cheese") checkingSerial = True coordinates = vis_util.return_coordinates( image, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.90) print(category_index[i]['name']) print("cheese") if (coordinates != None): print(coordinates) print(i) it += 1 x = int(coordinates[2]) y = int(coordinates[0]) w = int(coordinates[3]) h = int(coordinates[1]) #im2, contours, hierarchy = cv2.findContours(frame,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) #x, y, w, h = cv2.boundingRect(contours[i]) roi = image[y:y + h, x:x + w] #gaussian_3 = cv2.GaussianBlur(roi, (9,9), 10.0) #unsharp_image = cv2.addWeighted(roi, 1.5, gaussian_3, -0.5, 0, roi) roi = cv2.resize(roi, None, fx=4, fy=4, interpolation=cv2.INTER_CUBIC) cv2.fastNlMeansDenoisingColored(roi, None, 15, 15, 7, 21) roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) kernel = np.zeros((3, 3), np.uint8) #roi = cv2.dilate(roi, kernel, iterations=1) roi = cv2.erode(roi, kernel, iterations=3) #roi = cv2.bilateralFilter(roi,9,75,75) roi = cv2.medianBlur(roi, 3) #edges = cv2.Canny(roi,100,200) #img_dilation = c_m.dilate(edges,N=3,iterations=2) #kernel = np.ones((5,5), np.uint8) #img_dilation = cv2.dilate(roi, kernel, iterations=2) roi = cv2.adaptiveThreshold(roi, 245, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 115, 1) #ret,roi = cv2.threshold(roi,127,255,cv2.THRESH_BINARY_INV) #plt.subplot(121),plt.imshow(roi,cmap = 'gray') #plt.title('Original Image'), plt.xticks([]), plt.yticks([]) #plt.subplot(122),plt.imshow(edges,cmap = 'gray') #plt.title('Edge Image'), plt.xticks([]), plt.yticks([]) roi = cv2.medianBlur(roi, 3) #roi = cv2.bilateralFilter(roi,9,75,75) cv2.imwrite(str(name) + "zaca.png", roi) print("zaca") #contours=c_m.find_components(edges) #c_m.process_image(str(it)+"roi.jpg",str(it)+"roi.jpg") #API.SetVariable("classify_enable_learning","0"); #API.SetVariable("classify_enable_adaptive_matcher","0") #API. r = 0 text = readtext.readtext(roi, r) print("{}\n".format(text)) if (text == None): text = str('') else: img2 = np.zeros((512, 512, 3), np.uint8) font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(img2, text, (10, 500), font, 1, (255, 255, 255), 2, cv2.LINE_AA) cv2.imshow("Results", img2) checkingSerial = False else: break return name if (printcount == count): break return name # Draw the results of the detection (aka 'visulaize the results') vis_util.visualize_boxes_and_labels_on_image_array( image, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.80)
def detection(): label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Load the Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') sess = tf.Session(graph=detection_graph) # Define input and output tensors (i.e. data) for the object detection classifier # Input tensor is the image image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Output tensors are the detection boxes, scores, and classes # Each box represents a part of the image where a particular object was detected detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represents level of confidence for each of the objects. # The score is shown on the result image, together with the class label. detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') # Number of objects detected num_detections = detection_graph.get_tensor_by_name('num_detections:0') # Load image using OpenCV and # expand image dimensions to have shape: [1, None, None, 3] # i.e. a single-column array, where each item in the column has the pixel RGB value image = cv2.imread(PATH_TO_IMAGE) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image_expanded = np.expand_dims(image_rgb, axis=0) # Perform the actual detection by running the model with the image as input (boxes, scores, classes, num) = sess.run( [detection_boxes, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_expanded}) # Draw the results of the detection (aka 'visulaize the results') vis_util.visualize_boxes_and_labels_on_image_array( image, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.70) coordinates = vis_util.return_coordinates(image, np.squeeze(boxes), np.squeeze(classes).astype( np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.70) for coordinate in coordinates: print(coordinate) #ymin,ymax,xmin,xmax (y1, y2, x1, x2, accuracy, classification) = coordinate with open(os.path.join( CWD_PATH, "json_hand/" + "script" + IMAGE_NAME.split(".")[0] + ".json"), "w", encoding='utf-8') as f: json.dump(coordinates, f, ensure_ascii=False, indent=4) f.write('\n') output = image.copy() cv2.imshow('Object detector', output) # Press any key to close the image cv2.waitKey(0) # Clean up cv2.destroyAllWindows()