def _saveImage(self, saveAfter=2000): if len(self.imageList) >= saveAfter: dirName = './{}/'.format(self.dirName) # In future, change to a more performatic save-image algorithm for i, image in enumerate(self.imageList): Image.fromarray(image).save( dirName / 'camera_image_{}.jpeg'.format(datetime.now())) self.imageList.clear()
def frame2base64(frame): img = Image.fromarray(frame) # 将每一帧转为Image output_buffer = BytesIO() # 创建一个BytesIO img.save(output_buffer, format='JPEG') # 写入output_buffer byte_data = output_buffer.getvalue() # 在内存中读取 base64_data = base64.b64encode(byte_data) # 转为BASE64 return base64_data # 转码成功 返回base64编码
def classify(self): # *************************************************************** # Using Pytorch Model to do prediction # *************************************************************** scale_tensor = [ torch.FloatTensor([[math.log(i) * 2]]).cuda() for i in self.scale ] #cv_img = cv2.resize(self.image, self.dim) pil_img = Image.fromarray(self.image.astype('uint8')) torch_img = self.data_transform(pil_img) torch_img = np.expand_dims(torch_img, axis=0) input_data = torch.tensor(torch_img).type('torch.FloatTensor').cuda() t_start = time.clock() output = self.model(input_data, scale_tensor) pred_y = int(torch.max(output, 1)[1].cpu().data.numpy()) t_duration = time.clock() - t_start self.t_sum += t_duration self.count += 1 print(self.count) print "prediction time taken = ", t_duration / self.count #print "Predict: ", self.labels[output_max_class] #print output_prob[output_max_class] #if output_prob[output_max_class]<0.7: # return "None" return self.labels[pred_y]
def process_bag(bag_file_path, out_path): global EXPORT_CTR tl_state_set = False tl_state = 0 csv_list = [] bag = rosbag.Bag(bag_file_path) for topic, msg, t in bag.read_messages(topics=[IMG_TOPIC, TL_TOPIC]): if topic == TL_TOPIC: tl_state = msg.lights[0].state tl_state_set = True elif topic == IMG_TOPIC and tl_state_set: img_list = process_image_msg(msg) # Save images if len(img_list) > 0: for img in img_list: img = cv.cvtColor(img, cv.COLOR_RGB2BGR) image = Image.fromarray(img) export_name = IMG_PREFIX + '{:06d}'.format( EXPORT_CTR) + IMG_SUFFIX image.save(join(out_path, export_name)) EXPORT_CTR += 1 # Save name and label for export to csv csv_list.append((export_name, tl_state)) bag.close() return csv_list
def image_callback(self, data): robot_stop = False img = self.bridge.imgmsg_to_cv2(data, 'bgr8') probability, current_class = self.find_dino(img) # Send data/stop robot only if you see a dino that you haven't before if probability > DinoDetect.probability_threshold and current_class != self.prev_class: rospy.loginfo("Found %s...", self.config['dino_names'][current_class]) message = { "dinosaur": self.config['dino_names'][current_class], "confidence": probability, "image": base64.b64encode(Image.fromarray(img)) } # Send the data to the iot cloud when you find a dino self.iotClient.publish( rospy.param("robot_name").lower() + "/dinos", json.dumps(message), 1) self.prev_class = current_class # Stop the robot everytime you see a new robot robot_stop = True # Send image to roadfollowing after self.roadfollow_to_move(img, robot_stop)
def find_best_match_for_single_rgb(self, rgb_image_numpy, img_num): """ :param rgb_image_numpy: should be R, G, B [H,W,3] """ rgb_image_pil = Image.fromarray(rgb_image_numpy) print "rgb_image_pil", rgb_image_pil # compute dense descriptors rgb_tensor = self.dataset.rgb_image_to_tensor(rgb_image_pil) # these are Variables holding torch.FloatTensors, first grab the data, then convert to numpy res = self.dcn.forward_single_image_tensor(rgb_tensor).data.cpu().numpy() # descriptor_target = self.get_descriptor_target_from_yaml() descriptor_target = np.array(self.pick_point_config["descriptor"]) best_match_uv, best_match_diff, norm_diffs = self.dcn.find_best_match_for_descriptor(res,descriptor_target) print "best match diff: ", best_match_diff if self.debug_visualize: cv2_img = rgb_image_numpy[:,:,::-1].copy() self.draw_best_match(cv2_img, best_match_uv[0], best_match_uv[1]) cv2.imshow('img_rgb_labeled'+str(img_num), cv2_img) cv2.moveWindow('img_rgb_labeled'+str(img_num), 0, 0) cv2.moveWindow('img_rgb_labeled'+str(img_num), 350, 0+img_num*500) cv2.waitKey(1000) return best_match_uv, best_match_diff
def detect(self): """ Detect objects in an image with a trained SSD300, and visualize the results. :param original_image: image, a PIL Image :param min_score: minimum threshold for a detected box to be considered a match for a certain class :param max_overlap: maximum overlap two boxes can have so that the one with the lower score is not suppressed via Non-Maximum Suppression (NMS) :param top_k: if there are a lot of resulting detection across all classes, keep only the top 'k' :param suppress: classes that you know for sure cannot be in the image or you do not want in the image, a list :return: annotated image, a PIL Image """ original_image = Image.fromarray(self.photo) original_image = original_image.convert('RGB') # Transform image = self.normalize(self.to_tensor(self.resize(original_image))).to( self.model.device) # Forward prop. predicted_locs, predicted_scores = self.model.model(image.unsqueeze(0)) # Detect objects in SSD output det_boxes, det_labels, det_scores = self.model.model.detect_objects( predicted_locs, predicted_scores, min_score=self.min_score, max_overlap=self.max_overlap, top_k=self.top_k) # Move detections to the CPU det_boxes = det_boxes[0].to('cpu') # Transform to original image dimensions original_dims = torch.FloatTensor([ original_image.width, original_image.height, original_image.width, original_image.height ]).unsqueeze(0) det_boxes = det_boxes * original_dims # Decode class integer labels det_labels = [ rev_label_map[l] for l in det_labels[0].to('cpu').tolist() ] self.annot_image = original_image # If no objects found, the detected labels will be set to ['0.'], i.e. ['background'] in SSD300.detect_objects() in model.py if det_labels == ['background']: # Provices original image. No box outline for cat since none was found return None # Annotates the Image and returns the location of the box around the cat box_location = self.annotate_Image(det_scores[0], det_labels, det_boxes) return box_location
def img_callback(self, selected_data): t = time.time() image_msg = selected_data.raw_image bridge = cv_bridge.CvBridge() cv_img = bridge.imgmsg_to_cv2(image_msg, 'passthrough') # print("image shape is ", cv_img.shape) image_np = cv_img image_np = cv2.cvtColor(image_np, cv2.COLOR_BAYER_BG2BGR, 3) selected_roi = selected_data.selected_box print('new data', selected_roi) # crop_img = img[y:y+h, x:x+w] crop_img = image_np[selected_roi.y_offset:selected_roi.y_offset + selected_roi.height, selected_roi.x_offset:selected_roi.x_offset + selected_roi.width] img0 = Image.fromarray(crop_img) img0 = img0.convert("RGB") # img0 = img0.convert("BGR") transform = transforms.Compose( [transforms.Resize((32, 32)), transforms.ToTensor()]) img = transform(img0) img = img.unsqueeze(0) img = img.to(self.device) output = self.model(img) print(output) data = torch.argmax(output, dim=1) # print(output) data = data.type(torch.cuda.FloatTensor) light_color = self.traffic_light[int(data)] c1, c2 = (int(selected_roi.x_offset), int(selected_roi.y_offset)), (int(selected_roi.x_offset + selected_roi.width), int(selected_roi.y_offset + selected_roi.height)) # data=2 if data == 0: #black cv2.rectangle(image_np, c1, c2, color=(255, 255, 255), thickness=2) elif data == 1: #green cv2.rectangle(image_np, c1, c2, color=(0, 255, 0), thickness=2) elif data == 2: #red cv2.rectangle(image_np, c1, c2, color=(0, 0, 255), thickness=2) cv2.imshow('color detector', image_np) ch = cv2.waitKey(1) thorth = time.time() time_cost = round((thorth - t) * 1000) print("Inference Time", time_cost) # print("publishing bbox:- ", self.tl_bbox.data) # print("publishing bbox:- ", ppros_data.detections) another_time_msg = self.get_clock().now().to_msg() selected_data.selected_box.header.stamp = another_time_msg selected_data.selected_box.color = int(data) self._pub.publish(selected_data)
def classify(self, image): clas = 15 image = Image.fromarray(image) #image = image.crop(crop_area) image = image.resize(self.image_size, Image.LANCZOS) image = np.asarray(image).reshape(1, self.image_size[0], self.image_size[1], 3) #image = image/127.5 #CHANGE ACCORDING TO NETWORK !! image = preprocess_input(image) conf, clas_net = self.find_pred(self.model.predict(image)) clas = self.dict.get(clas_net, 15) print clas return conf * 100, clas
def show_diffeomorphisms(self): for i in range(len(self.estimators)): #estr in self.estimators: D = self.estimators[i].summarize() cmd = self.command_list[i] save_path = '/home/adam/' #Image.fromarray(diffeo_to_rgb_angle(D.d)).show() Image.fromarray(diffeo_to_rgb_angle(D.d)).save(save_path+'cmd'+str(cmd).replace(' ','')+'angle.png') Image.fromarray(diffeo_to_rgb_norm(D.d)).save(save_path+'cmd'+str(cmd).replace(' ','')+'nomr.png') Image.fromarray((D.variance*255).astype(np.uint8)).save(save_path+'cmd'+str(cmd).replace(' ','')+'variance.png')
def resize_keep_ratio(self, img, width, height, fill_color=(0, 0, 0, 0)): #======= Make sure image is smaller than background ======= h, w, channel = img.shape h_ratio = float(float(height) / float(h)) w_ratio = float(float(width) / float(w)) #if h_ratio <= 1 or w_ratio <= 1: ratio = h_ratio if h_ratio > w_ratio: ratio = w_ratio img = cv2.resize(img, (int(ratio * w), int(ratio * h))) #======== Paste image to background ======= im = Image.fromarray(np.uint8(img)) x, y = im.size new_im = Image.new('RGBA', (width, height), fill_color) new_im.paste(im, ((width - x) / 2, (height - y) / 2)) return np.asarray(new_im)
def draw_bboxes(self, image, b_boxes, scores, classes): array = np.uint8((image)) image = Image.fromarray(array) # draw the bounding boxes for i, c in reversed(list(enumerate(classes))): predicted_class = self.class_names[c] box = b_boxes[i] score = scores[i] label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, self.font) top, left, bottom, right = box top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) right = min(image.size[0], np.floor(right + 0.5).astype('int32')) print(label, (left, top), (right, bottom)) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) # a good redistributable image drawing library. for i in range(self.thickness): draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) draw.text(text_origin, label, fill=(0, 0, 0), font=self.font) del draw image = np.asarray(image) return image
def listener_callback(self, request, response): # Use OpenCV to visualize the images being classified from webcam start = timer() try: cv_image = self.bridge.imgmsg_to_cv2(request.img, "bgr8") cv_image = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB) im_pil = Image.fromarray(cv_image) except CvBridgeError as e: print(e) # im_pil = np.frombuffer(image_data.data, dtype=np.uint8).reshape(image_data.height, image_data.width, -1) # img_data = np.asarray(request.img.data) # img_reshaped = np.reshape(img_data,(request.img.height, request.img.width, 3)) classified, confidence = self.classify_image(im_pil) end = timer() time = str(end - start) to_display = "Classification: " + classified + " ,confidence: " + str( confidence) + " time: " + time self.get_logger().info(to_display) # Definition of Classification2D message response.classification.header = request.img.header result = ObjectHypothesis() result.id = classified result.score = confidence response.classification.results.append(result) print('Returning the response') # cv2.imwrite(os.path.join('/home/khalid/Downloads/temp/cvimages', classified + str(confidence)) + '.jpeg', cv_image) # Use OpenCV to visualize the images being classified from webcam # try: # cv_image = self.bridge.imgmsg_to_cv2(request.img, "bgr8") # cv2.imwrite(os.path.join('/home/khalid/Downloads/temp/cvimages', classified + str(confidence)) + '.jpeg', cv_image) # except CvBridgeError as e: # print(e) # cv2.imshow('webcam_window', cv_image) # cv2.waitKey(0) # Publish Classification results return response
def classify(self): # *************************************************************** # Using Pytorch Model to do prediction # *************************************************************** #scale_tensor = [torch.FloatTensor([[i]]).cuda() for i in self.scale] #cv_img = cv2.resize(self.image, self.dim) pil_img = Image.fromarray(self.img.astype('uint8')) torch_img = self.data_transform(pil_img) torch_img = np.expand_dims(torch_img, axis=0) input_data = torch.tensor(torch_img).type('torch.FloatTensor').cuda() t_start = time.clock() input_data = input_data.repeat(1, 3, 1, 1) # from 1-channel image to 3-channel image output = self.model(input_data) pred_y = int(torch.max(output, 1)[1].cpu().data.numpy()) t_duration = float(time.clock() - t_start) #self.count += 1 #print "prediction time taken = ", t_duration #print "Predict: ", self.labels[pred_y] #print output_prob[output_max_class] #if output_prob[output_max_class]<0.7: # return "None" return self.labels[pred_y]
pub = rospy.Publisher('konum', String, queue_size=10) rate = rospy.Rate(20) time.sleep(4.0) cap = cv2.VideoCapture(0) yolo = YOLO(params) while(True): ret, frame = cap.read() frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) r_image,data = yolo.detect_image(image) print(data) open_cv_image = np.array(r_image) open_cv_image = open_cv_image[:, :, ::-1].copy() cv2.imshow('frame', open_cv_image) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def print_road_map(image, left_line, right_line): """ print simple road map """ img = cv2.imread('images/top_view_car.png', -1) img = cv2.resize(img, (120, 246)) rows, cols = image.shape[:2] window_img = np.zeros_like(image) window_margin = left_line.window_margin left_plotx, right_plotx = left_line.allx, right_line.allx ploty = left_line.ally lane_width = right_line.startx - left_line.startx lane_center = (right_line.startx + left_line.startx) / 2 lane_offset = cols / 2 - (2 * left_line.startx + lane_width) / 2 car_offset = int(lane_center - 360) # Generate a polygon to illustrate the search window area # And recast the x and y points into usable format for cv2.fillPoly() left_pts_l = np.array([ np.transpose( np.vstack([ right_plotx - lane_width + lane_offset - window_margin / 4, ploty ])) ]) left_pts_r = np.array([ np.flipud( np.transpose( np.vstack([ right_plotx - lane_width + lane_offset + window_margin / 4, ploty ]))) ]) left_pts = np.hstack((left_pts_l, left_pts_r)) right_pts_l = np.array([ np.transpose( np.vstack([right_plotx + lane_offset - window_margin / 4, ploty])) ]) right_pts_r = np.array([ np.flipud( np.transpose( np.vstack( [right_plotx + lane_offset + window_margin / 4, ploty]))) ]) right_pts = np.hstack((right_pts_l, right_pts_r)) # Draw the lane onto the warped blank image cv2.fillPoly(window_img, np.int_([left_pts]), (140, 0, 170)) cv2.fillPoly(window_img, np.int_([right_pts]), (140, 0, 170)) # Recast the x and y points into usable format for cv2.fillPoly() pts_left = np.array([ np.transpose( np.vstack([ right_plotx - lane_width + lane_offset + window_margin / 4, ploty ])) ]) pts_right = np.array([ np.flipud( np.transpose( np.vstack( [right_plotx + lane_offset - window_margin / 4, ploty]))) ]) pts = np.hstack((pts_left, pts_right)) # Draw the lane onto the warped blank image cv2.fillPoly(window_img, np.int_([pts]), (0, 160, 0)) #window_img[10:133,300:360] = img road_map = Image.new('RGBA', image.shape[:2], (0, 0, 0, 0)) window_img = Image.fromarray(window_img) img = Image.fromarray(img) road_map.paste(window_img, (0, 0)) road_map.paste(img, (300 - car_offset, 590), mask=img) road_map = np.array(road_map) road_map = cv2.resize(road_map, (95, 95)) road_map = cv2.cvtColor(road_map, cv2.COLOR_BGRA2BGR) return road_map
def get_pil_image(self): self.lock_rgb.acquire() img = np.copy(self.rgb_img) self.lock_rgb.release() return Image.fromarray(img)
def cdp4_loop_random_uniform( t, image, joints, logical_image, horizontal_eye_pos_pub, vertical_eye_pos_pub, initialization, bridge, np, saliency_model, tf, global_salmap, global_weight, decay_strength, ts, T_SIM, Nrc, target_selection_idx, last_horizontal, last_vertical, previous_count, saccade_generator, horizontal_joint_limit, vertical_joint_limit, field_of_view, loop_counter, label, labels, layout, layouts, icub_position, icub_positions, ts_output, timestamps, eye_positions, sequence, sequences, dataset_path, global_dataset_counter): # initialize variables to persist if initialization.value is None: import os import sys sys.path.insert( 0, '/home/bbpnrsoa/cdp4_venv/lib/python3.8/site-packages') import numpy np.value = numpy bridge.value = CvBridge() # Create directory to save data if it doesn't exist if 'cdp4_dataset' not in os.listdir(dataset_path.value): os.mkdir(dataset_path.value + 'cdp4_dataset') labels.value = np.value.array([], dtype=int) eye_positions.value = np.value.array([], dtype=float) sequences.value = np.value.array([], dtype=int) layouts.value = np.value.array([], dtype=int) icub_positions = np.value.array([], dtype=int) else: # Load previous labels list and dataset_counter existing_data = os.listdir(dataset_path.value + 'cdp4_dataset') existing_data = [e for e in existing_data if 'png' in e] global_dataset_counter.value = len(existing_data) labels.value = np.value.load(dataset_path.value + 'cdp4_dataset/labels.npy', allow_pickle=True) eye_positions.value = np.value.load( dataset_path.value + 'cdp4_dataset/eye_positions.npy', allow_pickle=True) sequences.value = np.value.load(dataset_path.value + 'cdp4_dataset/sequences.npy', allow_pickle=True) layouts.value = np.value.load(dataset_path.value + 'cdp4_dataset/layouts.npy', allow_pickle=True) icub_positions.value = np.value.load( dataset_path.value + 'cdp4_dataset/icub_positions.npy', allow_pickle=True) clientLogger.info("Initialization ... Done!") initialization.value = True return if joints.value is not None and image.value is not None and logical_image.value is not None: import cv2 import json from PIL import Image cv2_img_original = bridge.value.imgmsg_to_cv2(image.value, 'rgb8') horizontal_eye_joint_name = 'eye_version' vertical_eye_joint_name = 'eye_tilt' horizontal_index = joints.value.name.index(horizontal_eye_joint_name) vertical_index = joints.value.name.index(vertical_eye_joint_name) current_eye_pos = (joints.value.position[horizontal_index], joints.value.position[vertical_index]) # Generate random eye positions std = 0.698131 / 9 new_eye_pos_h = np.value.random.normal(scale=std) new_eye_pos_v = np.value.random.normal(scale=std) horizontal_eye_pos_pub.send_message(new_eye_pos_h) vertical_eye_pos_pub.send_message(new_eye_pos_v) loop_counter.value = loop_counter.value + 1 clientLogger.info("Counter: {}".format(loop_counter.value)) # Save every second image if np.value.mod(loop_counter.value, 2) == 0: labels_dict = { 'bed_room': 0, 'kitchen': 1, 'living_room': 2, 'office': 3 } labels.value = np.value.append(labels.value, labels_dict[label.value]) eye_positions.value = np.value.append(eye_positions.value, current_eye_pos) sequences.value = np.value.append(sequences.value, int(sequence.value)) layouts.value = np.value.append(layouts.value, int(layout.value)) icub_positions.value = np.value.append(icub_positions.value, int(icub_position.value)) im = Image.fromarray(cv2_img_original) name = str(global_dataset_counter.value).zfill(5) im_name = "{}.png".format(name) im.save(dataset_path.value + "cdp4_dataset/" + im_name) np.value.save(dataset_path.value + "cdp4_dataset/eye_positions", eye_positions.value) np.value.save(dataset_path.value + "cdp4_dataset/labels", labels.value) np.value.save(dataset_path.value + "cdp4_dataset/sequences", sequences.value) np.value.save(dataset_path.value + "cdp4_dataset/layouts", layouts.value) np.value.save(dataset_path.value + "cdp4_dataset/icub_positions", icub_positions.value) # save visible objects (logical camera output) visible_objects = {} for item in logical_image.value.models: visible_objects[item.type] = [ item.pose.position.x, item.pose.position.y, item.pose.position.z, item.pose.orientation.x, item.pose.orientation.y, item.pose.orientation.z, item.pose.orientation.w ] # save camera position visible_objects['camera'] = [ logical_image.value.pose.position.x, logical_image.value.pose.position.y, logical_image.value.pose.position.z, logical_image.value.pose.orientation.x, logical_image.value.pose.orientation.y, logical_image.value.pose.orientation.z, logical_image.value.pose.orientation.w ] # write json file to disk with open(dataset_path.value + "cdp4_dataset/{}.json".format(name), 'w') as outfile: json.dump(visible_objects, outfile) global_dataset_counter.value = global_dataset_counter.value + 1
def test_diffeo(argv): # Find diffeomorphisms file try: dfile = argv[argv.index('-dl')+1] except ValueError: dfile = '/media/data/learned_diffeo/camera_ptz.dynamics.pickle' print 'Using diffeomorphism from file: ',dfile # Find diffeomorphisms file try: outpath = argv[argv.index('-o')+1] except ValueError: outpath = '/media/data/tempimages/' print 'Saving images to path: ',outpath # Find output prefix file try: prefix = argv[argv.index('-p')+1] except ValueError: prefix = 'logitech_cam_ptz' print 'Using prefix for output files: ',prefix # Find Input image try: infile = argv[argv.index('-im')+1] except ValueError: infile = '/home/adam/git/surf12adam/diffeo_experiments/lighttower640.jpg' print 'Using image file: ',infile # Number of levels to apply try: levels = int(argv[argv.index('-l')+1]) except ValueError: levels = 5 print 'Applying diffeomorphism ',levels,' times' # Image size try: size = eval(argv[argv.index('-size')+1]) except ValueError: size = [160,120] print 'Image size: ',size logfile = open(outpath+prefix+'log.html','w') logfile.write('<html><body><h1>Diffeomorphism test log: '+dfile+'</h1>') logfile.write('Diffeomorphism file: '+dfile+'<br/>') logfile.write('Test image: <br/><img src="'+infile+'.png"/><br/><br/>') logfile.write('<table>\n') logfile.write('<tr align="center">\n <td>Command</td>\n') logfile.write('<td>Diffeomorphism<br/>Angle</td>') logfile.write('<td>Diffeomorphism<br/>Norm</td>') logfile.write('<td>Diffeomorphism<br/>Variance</td>') for i in range(levels+1): logfile.write('<td>'+str(i)+'</td>') logfile.write('</tr>') im = np.array(Image.open(infile).resize(size).getdata(),np.uint8).reshape((size[1],size[0],3)) #Y0,Y1 = get_Y_pair((30,30),(0,0),(160,120),im) # pdb.set_trace() diffeo_list = pickle.load(open(dfile,'rb')) for D in diffeo_list: logfile.write('<tr>') cmdstr = str(D.command).replace(' ','') # outpath+prefix+'diffeo'+cmdstr+'angle'+'.png' logfile.write('<td>') logfile.write(str(D.command)) logfile.write('</td>') Image.fromarray(diffeo_to_rgb_angle(D.d)).save(outpath+prefix+'diffeo'+cmdstr+'angle'+'.png') logfile.write('<td>') logfile.write('<img src="'+outpath+prefix+'diffeo'+cmdstr+'angle'+'.png'+'"/>') logfile.write('</td>') Image.fromarray(diffeo_to_rgb_norm(D.d)).save(outpath+prefix+'diffeo'+cmdstr+'norm'+'.png') logfile.write('<td>') logfile.write('<img src="'+outpath+prefix+'diffeo'+cmdstr+'norm'+'.png'+'"/>') logfile.write('</td>') Image.fromarray((D.variance*255).astype(np.uint8)).save(outpath+prefix+'diffeo'+cmdstr+'variance'+'.png') logfile.write('<td>') logfile.write('<img src="'+outpath+prefix+'diffeo'+cmdstr+'variance'+'.png'+'"/>') logfile.write('</td>') Y = im Image.fromarray(Y).save(outpath+prefix+cmdstr+str(0)+'.png') logfile.write('<td>') logfile.write('<img src="'+outpath+prefix+cmdstr+str(0)+'.png'+'"/>') logfile.write('</td>') for i in range(levels): Y, var = D.apply(Y) Image.fromarray(Y).save(outpath+prefix+cmdstr+str(i+1)+'.png') logfile.write('<td>') logfile.write('<img src="'+outpath+prefix+cmdstr+str(i+1)+'.png'+'"/>') logfile.write('</td>') logfile.write('</tr>') logfile.write('</table></body></html>') logfile.flush() logfile.close()
def refresh_filter(): # grab a reference to the image panels global original_image_widget, thres_image_widget, output_image_widget global confidence if original_image is None: return roi_height_y = roi_desc.get_pixel_height(original_image.shape[0]) roi_height_y = int(roi_height_y) roi_frame = roi_desc.mask_frame_resize(original_image) if current_mode is not None: filtered_img = current_mode.apply_filter(roi_frame, cf_desc.transform) filling = cf_desc.estimate_filling(filtered_img) else: filtered_img = np.zeros_like(roi_frame) filling = 0 try: confidence = int(confidenceValue.get()) except: pass filtered_img_full, filling_full = cf_desc.apply_filters( roi_frame, confidence) rgb_original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) cv2.line(rgb_original_image, (0, roi_height_y), (rgb_original_image.shape[1], roi_height_y), thickness=2, color=(255, 255, 0)) # convert the images to PIL format and then to ImageTk format orig_img = ImageTk.PhotoImage(Image.fromarray(rgb_original_image)) bin_img = ImageTk.PhotoImage(Image.fromarray(filtered_img)) res_img = ImageTk.PhotoImage(Image.fromarray(filtered_img_full)) # if the panels are None, initialize them if original_image_widget is None or thres_image_widget is None or output_image_widget is None: original_image_widget = Label(image=orig_img) original_image_widget.image = orig_img original_image_widget.grid(row=1, column=0, columnspan=2, padx=5, pady=5) thres_image_widget = Label(image=bin_img) thres_image_widget.image = bin_img thres_image_widget.grid(row=1, column=2, columnspan=2, padx=5, pady=5) output_image_widget = Label(image=res_img) output_image_widget.image = res_img output_image_widget.grid(row=1, column=4, columnspan=2, padx=5, pady=5) # otherwise, update the image panels else: # update the pannels original_image_widget.configure(image=orig_img) original_image_widget.image = orig_img thres_image_widget.configure(image=bin_img) thres_image_widget.image = bin_img output_image_widget.configure(image=res_img) output_image_widget.image = res_img filling_var.set('Filling: {} / {} %'.format(filling, filling_full))
scale_factor = [1.0/50,2.0/50,1.0/50] #command_list = [[200,0,0],[-200,0,0],[0,150,0],[0,-150,0],[0,0,5],[0,0,-5],[0,0,0]] #command_list = [[0,0,5],[0,0,-5]] # Keep track of the actual zoom for the last command and the current zoom zoom_last = 1.0 zoom = 1.0 out_bag = rosbag.Bag(outfile, 'w') # Load image to crop subimages from if image_str == 'random': M = np.random.randint(0,255,(748,1024,3)).astype(np.uint8) im = Image.fromarray(M) else: im = Image.open(image_str+'.png') cam_sim = logitech_cam_simulation(size,im,scale_factor) # Y0,Y1,cmd = cam_sim.simulate_Y_pair(size,[0,0,50]) print('simulated') def show(Y0,Y1): Image.fromarray(Y0).show() Image.fromarray(Y1).show() #pdb.set_trace() for i in range(duration): print(i) command = command_list[np.random.randint(0,len(command_list))] Y0,Y1,cmd = cam_sim.simulate_Y_pair(size,command)
def show(Y0,Y1): Image.fromarray(Y0).show() Image.fromarray(Y1).show()
def prep_data(org_image,cv_image,f_points): global count global lane_not_detect_count_left global lane_not_detect_count_right global right_lane_cov global left_lane_cov # msg_cov = [] msg_cov = "" # pub = rospy.Publisher('/chatter', String) # rospy.init_node('prep_data', anonymous=True) # rate = rospy.Rate(8) img = cv_image ymax,xmax = img.shape[:2] # cv2.imshow("Image window", cv_image) # cv2.waitKey(3) org = img.copy() point1 = [] point2 = [] for i in f_points: point1.append([i[0],i[1]]) point2.append([i[2],i[3]]) # print (point1,point2) # count = 0 start_points = [] end_points = [] lane_type = [] image_vec = [] for i,j in zip(point1,point2): i,j = process_point(i,j,xmax,ymax) start_points.append(i) end_points.append(j) offset = 15 # offset = 25 a = (i[0]-offset,i[1]) b = (i[0]+offset,i[1]) c = (j[0]+offset,j[1]) d = (j[0]-offset,j[1]) mask = np.zeros(img.shape, dtype=np.uint8) roi_corners = np.array([[a,b,c,d]], dtype=np.int32) # fill the ROI so it doesn't get wiped out when the mask is applied channel_count = img.shape[2] # i.e. 3 or 4 depending on your img ignore_mask_color = (255,)*channel_count cv2.fillPoly(mask, roi_corners, ignore_mask_color) # from Masterfool: use cv2.fillConvexPoly if you know it's convex # apply the mask masked_img = cv2.bitwise_and(img, mask) # print masked_img.shape # plt.imshow(masked_img) # plt.show() #for curved lane # masked_img_cop = masked_img[60:,:,:] # plt.imshow(masked_img) # plt.show() # print i,j k1 = 70 k2 = 30 z = [0,0] if i[1] < j[1] : z[0] = (k1*i[0] + k2*j[0])/(k1+k2) z[1] = (k1*i[1] + k2*j[1])/(k1+k2) if i[0]<j[0]: # crop_img = masked_img[i[1]:j[1], i[0]:j[0]] crop_img = masked_img[z[1]:j[1], z[0]:j[0]] elif i[0]>j[0]: # crop_img = masked_img[i[1]:j[1], j[0]:i[0]] crop_img = masked_img[z[1]:j[1], j[0]:z[0]] elif i[0] == j[0]: # crop_img = masked_img[i[1]:j[1], j[0]-offset:j[0]+offset] crop_img = masked_img[z[1]:j[1], j[0]-offset:j[0]+offset] elif i[1] > j[1]: z[0] = (k1*j[0] + k2*i[0])/(k1+k2) z[1] = (k1*j[1] + k2*i[1])/(k1+k2) if i[0]<j[0]: # crop_img = masked_img[j[1]:i[1], i[0]:j[0]] crop_img = masked_img[z[1]:i[1], i[0]:z[0]] elif i[0] > j[0]: #right lane # crop_img = masked_img[j[1]:i[1], j[0]:i[0]] crop_img = masked_img[z[1]:i[1], z[0]:i[0]] elif i[0] == j[0]: # crop_img = masked_img[j[1]:i[1], j[0]-offset:j[0]+offset] crop_img = masked_img[z[1]:i[1], j[0]-offset:j[0]+offset] # crop_img = masked_img[z[1]:i[1], cdj[0]-offset:j[0]+offset] # elif i[0] == j[0]: # print "true" # if i[1]<j[1]: # crop_img = masked_img[i[1]:j[1], i[0]-offset:i[0]+offset] # else: # crop_img = masked_img[j[1]:i[1], j[0]-offset:j[0]+offset] # print masked_img_cop.shape # plt.imshow((cv2.cvtColor(masked_img, cv2.COLOR_BGR2RGB))) # cv2.imwrite("results/curve_data/"+str(n)+".jpg",masked_img) # plt.show() crop_img = cv2.resize(crop_img,(224,224)) # print "crop img",crop_img.shape pil_im = Image.fromarray(crop_img).convert('RGB') inputs = trans(pil_im).view(3, 224, 224) # print inputs # print (pil_im) image_vec.append(inputs) # plt.imshow((cv2.cvtColor(crop_img, cv2.COLOR_BGR2RGB))) # cv2.imwrite("result_aug/"+str(n)+".jpg",crop_img) # cv2_im = cv2.cv # tColor(crop_img,cv2.COLOR_BGR2RGB) if len(image_vec)!=0: inputs = torch.stack(image_vec) # pil_im = Image.fromarray(crop_img).convert('RGB') # inputs = trans(pil_im).view(1, 3, 224, 224) inputs = Variable(inputs.cuda()) # print (inputs) outputs = alexnet_model(inputs) o = outputs.data.cpu().numpy() # print (o) preds = np.argmax(o,axis=1) # print ("pred",preds) # e_x = np.exp(o[0] - np.max(o[0])) # score = e_x / e_x.sum(axis=0) # print "pred",(pred) # # print "score",(score) # sky = 260 sky = 185 f_r.predict() f_l.predict() pred_r = f_r.x pred_l = f_l.x pred_r_cov = f_r.P pred_l_cov = f_l.P left_lane_cov.append(np.diagonal(pred_l)) right_lane_cov.append(np.diagonal(pred_r)) hello_str = "" # if list(np.diagonal(pred_l_cov)) <= [29.0,34.5,37.0,41.0]: # if list(np.diagonal(pred_l_cov)) <= [29.0,29.0,29.0,29.0]: # # hello_str = str([i[0] for i in pred_l]) # for i in pred_l: # hello_str = hello_str + str(i[0]) + " " # # msg_cov.append(hello_str) # msg_cov += hello_str # else: # # msg_cov.append("False") # msg_cov += "False " # hello_str = "" # # if list(np.diagonal(pred_r_cov)) <= [57.0,60.5,64.0,67.0]: # if list(np.diagonal(pred_r_cov)) <= [56.0,56.0,56.0,56.0]: # # msg_cov.append("True") # # hello_str = str([i[0] for i in pred_r]) # for i in pred_l: # hello_str = hello_str + str(i[0]) + " " # msg_cov += hello_str # # msg_cov.append(hello_str) # else: # # msg_cov.append("False") # msg_cov += "False " # pub.publish(str(msg_cov)) # rate.sleep() # print pred_r # print pred_l # print left_lane_cov # print right_lane_cov l_slope = [] r_slope = [] for i,j,pred in zip(point1,point2,preds): i[1]+=sky j[1]+=sky x_scale = 964/480.0 y_scale = 1288/640.0 i[0] = int(i[0]*x_scale) i[1] = int(i[1]*y_scale) j[0] = int(j[0]*x_scale) j[1] = int(j[1]*y_scale) # for line in file: # if pred == 0 and np.max(score)>0.85 : if pred == 0 : # print "before process",i,j i,j = process_point(i,j,1288,964) # print "processed",i,j k1 = 40 k2 = 60 z = [0,0] if j[1] < i[1] : j[0] = (k1*j[0] + k2*i[0])/(k1+k2) j[1] = (k1*j[1] + k2*i[1])/(k1+k2) else: i[0] = (k1*i[0] + k2*j[0])/(k1+k2) i[1] = (k1*i[1] + k2*j[1])/(k1+k2) if i[0]-j[0] !=0: slope = (i[1]-j[1])/float((i[0]-j[0])) m = np.degrees(np.arctan(math.fabs(slope))) else: slope = 1 m = 90 # print (m) # print (slope) # if slope > 0 : if slope > 0 and (i[0]<=1288 and j[0]<=1288): r_slope.append((slope,i,j)) # elif slope < 0 : elif slope < 0 and (i[0]>=0 and j[0]>=0): l_slope.append((slope,i,j)) # cv2.line(org,tuple(i),tuple(j),color=(0,255,0),thickness=2) # cv2.line(org_image,tuple(i)+tuple(),tuple(j),color=(0,255,0),thickness=2) # print "score",(score) # if pred == 0 and np.max(score)>0.85 : # # if pred == 0 : # cv2.line(org,tuple(i),tuple(j),color=(0,255,0),thickness=2) # else: # cv2.line(img,tuple(i),tuple(j),color=(0,0,255)) # plt.imshow((cv2.cvtColor(img, cv2.COLOR_BGR2RGB))) # plt.imshow((cv2.cvtColor(crop_img, cv2.COLOR_BGR2RGB))) # plt.show() # SaveFigureAsImage("/home/ayush/Documents/swarath_confidence_new/model_test/"+str(n), plt.gcf() , orig_size=(h,w)) # cv2.imshow("Image window", org_image) # cv2.waitKey(3) ###########################################3 r_slope.sort(key=lambda x: x[0],reverse=True) l_slope.sort(key=lambda x: x[0],reverse=False) if len(r_slope)!=0: if ([r_slope[0][1][1] > r_slope[0][2][1]]): f_r.update(np.array([[r_slope[0][1][0]],[r_slope[0][1][1]],[r_slope[0][2][0]],[r_slope[0][2][1]]])) lane_not_detect_count_right = 0 else: f_r.update(np.array([[r_slope[0][2][0]],[r_slope[0][2][1]],[r_slope[0][1][0]],[r_slope[0][1][1]]])) lane_not_detect_count_right = 0 else: # f_r.update(f_r.x) lane_not_detect_count_right+=1 if len(l_slope)!=0: if ([l_slope[0][1][1] > l_slope[0][2][1]]): f_l.update(np.array([[l_slope[0][1][0]],[l_slope[0][1][1]],[l_slope[0][2][0]],[l_slope[0][2][1]]])) lane_not_detect_count_left = 0 else: f_l.update(np.array([[l_slope[0][2][0]],[l_slope[0][2][1]],[l_slope[0][1][0]],[l_slope[0][1][1]]])) lane_not_detect_count_left = 0 else: # f_l.update(f_l.x) lane_not_detect_count_left += 1 # print "r_slope",r_slope # print "l_slope",l_slope if len(r_slope) != 0 : cv2.line(org_image,tuple([r_slope[0][1][0],r_slope[0][1][1]]),tuple([r_slope[0][2][0],r_slope[0][2][1]]),color=(0,0,255),thickness=2) if len(l_slope) !=0: cv2.line(org_image,tuple([l_slope[0][1][0],l_slope[0][1][1]]),tuple([l_slope[0][2][0],l_slope[0][2][1]]),color=(0,0,255),thickness=2) if lane_not_detect_count_left < 5: cv2.line(org_image,tuple([int(pred_l[0][0]),int(pred_l[1][0])]),tuple([int(pred_l[2][0]),int(pred_l[3][0])]),color=(0,255,0),thickness=2) if lane_not_detect_count_right < 5: cv2.line(org_image,tuple([int(pred_r[0][0]),int(pred_r[1][0])]),tuple([int(pred_r[2][0]),int(pred_r[3][0])]),color=(0,255,0),thickness=2) # out.write(org_image) cv2.imshow('Frame',org_image) cv2.waitKey(3) # cv2.imwrite("./result/dnd_kal/"+str(file_name)+".jpg",img) # cv2.imwrite("./result/india_gate_kal/"+str(file_name)+".jpg",img) # cv2.imwrite("./result/india_kal/"+str(file_name)+".jpg",img) # Press Q on keyboard to exit # if cv2.waitKey(50) & 0xFF == ord('q'): # break ################################3 cv2.imwrite("/home/ayush/Documents/swarath_lane/swarath/src/lanedetection_shubhamIITK/src/kalman_image_test/"+str(count)+".jpg", org_image) # val_file.write("%s\n" %(str(n)+"_"+str(count)+".jpg")) count+=1 # plt.show() # Gradients else: cv2.imshow('Frame',org_image) cv2.waitKey(3) cv2.imwrite("/home/ayush/Documents/swarath_lane/swarath/src/lanedetection_shubhamIITK/src/kalman_image_test/"+str(count)+".jpg", org_image) count+=1
def find_affordance(self, data): aff_list = list() if self.affordance[0] == 'pcb': try: tmp_img = self.bridge.imgmsg_to_cv2(data.assos_img, 'rgb8') tmp_img2 = ImageEnhance.Color( Image.fromarray(tmp_img)).enhance(2) self.curr_img = image.img_to_array(tmp_img2) except CvBridgeError as e: print(e) for part in data.part_array: partname = part.part_id[:-1] if partname == 'pcb': pcb = part aff = Affordance() aff.object_name = pcb.part_id pcb_bounding_values = self.returnPcbBoundingValues(pcb) rospy.set_param('pcb_bounding_values', pcb_bounding_values) img_w = self.curr_img.shape[0] img_h = self.curr_img.shape[1] aff_mask = self.model.predict(self.curr_img) leverup_points, confidences = self.sample_leverup_points(aff_mask) if len(leverup_points) == 0: return aff_list aff.effect_name = 'levered' aff.affordance_name = 'leverable' leverup_points_converted = ConvertPixel2WorldRequest() for i in range(len(leverup_points)): lever_up_point = geometry_msgs.msg.Point() lever_up_point.y = leverup_points[i][0] * img_w / 256.0 lever_up_point.x = leverup_points[i][1] * img_h / 256.0 lever_up_point.z = 0 leverup_points_converted.pixels.append(lever_up_point) resp = self.pixel_world_srv(leverup_points_converted) affordance_vis_image = cv2.resize(tmp_img, (256, 256)) tmp_img = self.bridge.imgmsg_to_cv2( pcb.part_outline.part_mask, pcb.part_outline.part_mask.encoding) tmp_img = cv2.resize(tmp_img, (256, 256)) markerArray = MarkerArray() for i in range(len(leverup_points)): marker = Marker() ap = ActionParameters() ap.confidence = confidences[i] lever_up_point = AscPair() lever_up_point.key = 'start_pose' lever_up_point.value_type = 2 lever_up_point.value_pose.position.x = resp.points[i].x lever_up_point.value_pose.position.y = resp.points[i].y lever_up_point.value_pose.position.z = resp.points[i].z x = leverup_points[i][0] y = leverup_points[i][1] w_size = 9 temp = tmp_img[max(0, x - w_size):min(256, x + w_size), max(0, y - w_size):min(256, y + w_size)] x1, y1 = np.where(temp == 255) x2, y2 = np.where(temp == 0) direction = math.pi + math.atan2( np.mean(y2) - np.mean(y1), np.mean(x2) - np.mean(x1)) if len(y2) == 0 or len(y1) == 0 or len(x2) == 0 or len( x1) == 0 or np.isnan(direction) or np.isinf(direction): continue import tf point_inverted = np.array(leverup_points[i][::-1]) _ = cv2.arrowedLine( affordance_vis_image, tuple(point_inverted.astype(np.int16)), tuple((point_inverted + [np.sin(direction) * 15, np.cos(direction) * 15]).astype(np.int16)), (0, 0, 255), 1, tipLength=0.5) _ = cv2.arrowedLine( affordanceWrapper.affordance_vis_image, tuple(point_inverted.astype(np.int16)), tuple((point_inverted + [np.sin(direction) * 15, np.cos(direction) * 15]).astype(np.int16)), (0, 0, 255), 1, tipLength=0.5) quaternion = tf.transformations.quaternion_from_euler( 0, 0, direction) lever_up_point.value_pose.orientation.x = quaternion[0] lever_up_point.value_pose.orientation.y = quaternion[1] lever_up_point.value_pose.orientation.z = quaternion[2] lever_up_point.value_pose.orientation.w = quaternion[3] ap.parameters.append(lever_up_point) h = std_msgs.msg.Header() h.stamp = rospy.Time.now() h.frame_id = resp.header.frame_id marker.header = h marker.ns = "lever_up_points" marker.id = i marker.type = 0 # arrow marker.action = 0 marker.scale.x = 0.01 marker.scale.y = 0.001 marker.scale.z = 0.002 marker.lifetime = rospy.Duration(150) marker.color.r = 1.0 marker.color.g = 0.0 marker.color.b = 0.0 marker.color.a = 1.0 marker.pose = lever_up_point.value_pose markerArray.markers.append(marker) aff.action_parameters_array.append(ap) aff_list.append(aff) rate = rospy.Rate(10) comp_img = self.bridge.cv2_to_compressed_imgmsg( affordance_vis_image) for _ in range(5): self.lever_up_rviz.publish(markerArray) self.lever_up_image_viz.publish(comp_img) rate.sleep() elif self.affordance[0] == 'magnet': for part in data.part_array: partname = part.part_id[:-1] if partname == 'magnet': aff = Affordance() aff.object_name = part.part_id aff.effect_name = 'levered' aff.affordance_name = 'leverable' ap = ActionParameters() ap.confidence = 0.8 # Dummy Value asc_pair = AscPair() asc_pair.key = 'start_pose' asc_pair.value_type = 2 asc_pair.value_pose = part.pose ap.parameters.append(asc_pair) aff.action_parameters_array.append(ap) aff_list.append(aff) return aff_list
def detect(self, image, lang='eng'): im = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) return pytesseract.image_to_string(im, lang)
def main(): global message message = message() random.seed() vel_pub = rospy.Publisher('/jackal0/jackal_velocity_controller/cmd_vel', Twist, queue_size=10) rospy.init_node('jackal_turn', anonymous=True) rospy.Subscriber("/jackal0/global_pos", Pose, globalCallback) rospy.Subscriber("/jackal0/imu/data", Imu, imuCallback) rospy.Subscriber("/jackal0/front/scan", LaserScan, laserCallback) rospy.Subscriber("/jackal0/goal_pos", Pose, goalCallback) rate = rospy.Rate(10) # rate = 100Hz dt = 0.1 # follow the rate vel = Twist() pf = potential_field() solution = race() rate.sleep() previous_range = [message.range, min(message.range)] rate.sleep() goal = position() goal.x = message.goal_pos.x goal.y = message.goal_pos.y solution.update_goal(goal) robot_pos = position() robot_pos.x = message.posx robot_pos.y = message.posy #rospy.loginfo("goalx:%f, goaly:%f", goalx, goaly) map_size = 2000 prior_pos = np.array([message.posx, message.posy, message.theta]) map = maps(prior_pos, map_size) while solution.reached(robot_pos) != 0: if solution.reached(robot_pos) == 1: linearx, steering = solution.going(robot_pos, message.theta, message.laser_data) else: linearx, steering = solution.goto(robot_pos, message.theta) #linearx, steering = pf.potential_field_force(message.posx, message.posy, message.theta, goalx, goaly, message.laser_data) #rospy.loginfo("a0: %.4f, a1: %.4f, r0: %.4f, r1: %.4f, v: %.4f, s: %.4f, d: %.4f, t: %.4f", attractive[0], attractive[1], repulsive[0], repulsive[1], linearx, steering, distance, angle) robot_pos.x = message.posx robot_pos.y = message.posy prior_pos = np.array([message.posx, message.posy, message.theta]) map.update(prior_pos, message.laser_data) vel.linear.x = linearx vel.angular.z = steering vel_pub.publish(vel) rate.sleep() plt.cla() plt.imshow(map.grid_map) plt.pause(0.0001) image_map = maps.grid_map * 255 im = Image.fromarray(image_map) im = im.convert('L') im.save('outfile.png')
def prep_data(org_image, cv_image, f_points): global count global lane_not_detect_count_left global lane_not_detect_count_right global right_lane_cov global left_lane_cov # msg_cov = [] msg_cov = "" img = cv_image ymax, xmax = img.shape[:2] # cv2.imshow("Image window", cv_image) # cv2.waitKey(3) org = img.copy() point1 = [] point2 = [] for i in f_points: point1.append([i[0], i[1]]) point2.append([i[2], i[3]]) # print (point1,point2) # count = 0 start_points = [] end_points = [] lane_type = [] image_vec = [] for i, j in zip(point1, point2): i, j = process_point(i, j, xmax, ymax) start_points.append(i) end_points.append(j) offset = 15 # offset = 25 a = (i[0] - offset, i[1]) b = (i[0] + offset, i[1]) c = (j[0] + offset, j[1]) d = (j[0] - offset, j[1]) mask = np.zeros(img.shape, dtype=np.uint8) roi_corners = np.array([[a, b, c, d]], dtype=np.int32) # fill the ROI so it doesn't get wiped out when the mask is applied channel_count = img.shape[2] # i.e. 3 or 4 depending on your img ignore_mask_color = (255, ) * channel_count cv2.fillPoly(mask, roi_corners, ignore_mask_color) # from Masterfool: use cv2.fillConvexPoly if you know it's convex # apply the mask masked_img = cv2.bitwise_and(img, mask) k1 = 70 k2 = 30 z = [0, 0] if i[1] < j[1]: z[0] = (k1 * i[0] + k2 * j[0]) / (k1 + k2) z[1] = (k1 * i[1] + k2 * j[1]) / (k1 + k2) if i[0] < j[0]: # crop_img = masked_img[i[1]:j[1], i[0]:j[0]] crop_img = masked_img[z[1]:j[1], z[0]:j[0]] elif i[0] > j[0]: # crop_img = masked_img[i[1]:j[1], j[0]:i[0]] crop_img = masked_img[z[1]:j[1], j[0]:z[0]] elif i[0] == j[0]: # crop_img = masked_img[i[1]:j[1], j[0]-offset:j[0]+offset] crop_img = masked_img[z[1]:j[1], j[0] - offset:j[0] + offset] elif i[1] > j[1]: z[0] = (k1 * j[0] + k2 * i[0]) / (k1 + k2) z[1] = (k1 * j[1] + k2 * i[1]) / (k1 + k2) if i[0] < j[0]: # crop_img = masked_img[j[1]:i[1], i[0]:j[0]] crop_img = masked_img[z[1]:i[1], i[0]:z[0]] elif i[0] > j[0]: #right lane # crop_img = masked_img[j[1]:i[1], j[0]:i[0]] crop_img = masked_img[z[1]:i[1], z[0]:i[0]] elif i[0] == j[0]: # crop_img = masked_img[j[1]:i[1], j[0]-offset:j[0]+offset] crop_img = masked_img[z[1]:i[1], j[0] - offset:j[0] + offset] # crop_img = masked_img[z[1]:i[1], cdj[0]-offset:j[0]+offset] crop_img = cv2.resize(crop_img, (224, 224)) # print "crop img",crop_img.shape pil_im = Image.fromarray(crop_img).convert('RGB') inputs = trans(pil_im).view(3, 224, 224) # print inputs # print (pil_im) image_vec.append(inputs) if len(image_vec) != 0: inputs = torch.stack(image_vec) # pil_im = Image.fromarray(crop_img).convert('RGB') # inputs = trans(pil_im).view(1, 3, 224, 224) inputs = Variable(inputs.cuda()) # print (inputs) outputs = alexnet_model(inputs) o = outputs.data.cpu().numpy() # print (o) preds = np.argmax(o, axis=1) sky = 185 f_r.predict() f_l.predict() pred_r = f_r.x pred_l = f_l.x pred_r_cov = f_r.P pred_l_cov = f_l.P left_lane_cov.append(np.diagonal(pred_l)) right_lane_cov.append(np.diagonal(pred_r)) hello_str = "" l_slope = [] r_slope = [] for i, j, pred in zip(point1, point2, preds): i[1] += sky j[1] += sky x_scale = 964 / 480.0 y_scale = 1288 / 640.0 i[0] = int(i[0] * x_scale) i[1] = int(i[1] * y_scale) j[0] = int(j[0] * x_scale) j[1] = int(j[1] * y_scale) # for line in file: # if pred == 0 and np.max(score)>0.85 : if pred == 0: # print "before process",i,j i, j = process_point(i, j, 1288, 964) # print "processed",i,j k1 = 40 k2 = 60 z = [0, 0] if j[1] < i[1]: j[0] = (k1 * j[0] + k2 * i[0]) / (k1 + k2) j[1] = (k1 * j[1] + k2 * i[1]) / (k1 + k2) else: i[0] = (k1 * i[0] + k2 * j[0]) / (k1 + k2) i[1] = (k1 * i[1] + k2 * j[1]) / (k1 + k2) if i[0] - j[0] != 0: slope = (i[1] - j[1]) / float((i[0] - j[0])) m = np.degrees(np.arctan(math.fabs(slope))) else: slope = 1 m = 90 # print (m) # print (slope) # if slope > 0 : if slope > 0 and (i[0] <= 1288 and j[0] <= 1288): r_slope.append((slope, i, j)) # elif slope < 0 : elif slope < 0 and (i[0] >= 0 and j[0] >= 0): l_slope.append((slope, i, j)) ###########################################3 r_slope.sort(key=lambda x: x[0], reverse=True) l_slope.sort(key=lambda x: x[0], reverse=False) if len(r_slope) != 0: if ([r_slope[0][1][1] > r_slope[0][2][1]]): f_r.update( np.array([[r_slope[0][1][0]], [r_slope[0][1][1]], [r_slope[0][2][0]], [r_slope[0][2][1]]])) lane_not_detect_count_right = 0 else: f_r.update( np.array([[r_slope[0][2][0]], [r_slope[0][2][1]], [r_slope[0][1][0]], [r_slope[0][1][1]]])) lane_not_detect_count_right = 0 else: # f_r.update(f_r.x) lane_not_detect_count_right += 1 if len(l_slope) != 0: if ([l_slope[0][1][1] > l_slope[0][2][1]]): f_l.update( np.array([[l_slope[0][1][0]], [l_slope[0][1][1]], [l_slope[0][2][0]], [l_slope[0][2][1]]])) lane_not_detect_count_left = 0 else: f_l.update( np.array([[l_slope[0][2][0]], [l_slope[0][2][1]], [l_slope[0][1][0]], [l_slope[0][1][1]]])) lane_not_detect_count_left = 0 else: # f_l.update(f_l.x) lane_not_detect_count_left += 1 if len(r_slope) != 0: cv2.line(org_image, tuple([r_slope[0][1][0], r_slope[0][1][1]]), tuple([r_slope[0][2][0], r_slope[0][2][1]]), color=(0, 0, 255), thickness=2) if len(l_slope) != 0: cv2.line(org_image, tuple([l_slope[0][1][0], l_slope[0][1][1]]), tuple([l_slope[0][2][0], l_slope[0][2][1]]), color=(0, 0, 255), thickness=2) if lane_not_detect_count_left < 5: cv2.line(org_image, tuple([int(pred_l[0][0]), int(pred_l[1][0])]), tuple([int(pred_l[2][0]), int(pred_l[3][0])]), color=(0, 255, 0), thickness=2) if lane_not_detect_count_right < 5: cv2.line(org_image, tuple([int(pred_r[0][0]), int(pred_r[1][0])]), tuple([int(pred_r[2][0]), int(pred_r[3][0])]), color=(0, 255, 0), thickness=2) # out.write(org_image) cv2.imshow('Frame', org_image) cv2.waitKey(3) # cv2.imwrite("./result/dnd_kal/"+str(file_name)+".jpg",img) # cv2.imwrite("./result/india_gate_kal/"+str(file_name)+".jpg",img) # cv2.imwrite("./result/india_kal/"+str(file_name)+".jpg",img) # Press Q on keyboard to exit # if cv2.waitKey(50) & 0xFF == ord('q'): # break ################################3 cv2.imwrite("" + str(count) + ".jpg", org_image) # val_file.write("%s\n" %(str(n)+"_"+str(count)+".jpg")) count += 1 # plt.show() # Gradients else: cv2.imshow('Frame', org_image) cv2.waitKey(3) cv2.imwrite( "/home/ayush/Documents/swarath_lane/kalman_image_test/" + str(count) + ".jpg", org_image) count += 1
def detect_video(yolo, video_path, garbage_in_can, emergency_stop): from PIL import Image, ImageFont, ImageDraw #Start ROS node pub, pub_flag, pub_track, pub_frame1, pub_frame2 = start_node() vid = RealsenseCapture() vid.start() bridge = CvBridge() accum_time = 0 curr_fps = 0 fps = "FPS: ??" prev_time = timer() worldy = 0.0 while True: pub_track.publish(0) ret, frames, _ = vid.read() frame = frames[0] depth_frame = frames[1] image = Image.fromarray(frame) image, bottle, person, right, left, bottom, top, right2, left2, bottom2, top2 = yolo.detect_image( image, pub) result = np.asarray(image) curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time = accum_time + exec_time curr_fps = curr_fps + 1 if accum_time > 1: accum_time = accum_time - 1 fps = "FPS: " + str(curr_fps) curr_fps = 0 cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.50, color=(255, 0, 0), thickness=2) cv2.imshow("result", result) yolo_frame = bridge.cv2_to_imgmsg(result, "bgr8") # yolo_frame = result pub_frame1.publish(yolo_frame) if cv2.waitKey(1) & 0xFF == ord('q'): break if (bottle == False) or (person == False): continue # ------------------------------Tracking----------------------------------- # tracker_types = ['BOOSTING', 'MIL','KCF', 'TLD', 'MEDIANFLOW', 'GOTURN', 'MOSSE', 'CSRT'] # tracker_type = tracker_types[7] tracker = cv2.TrackerCSRT_create() tracker2 = cv2.TrackerCSRT_create() # setup initial location of window left, right, top, bottom = left, right, top, bottom r, h, ci, w = top, bottom - top, left, right - left # simply hardcoded the values r, h, c, w frame_b, frame_g, frame_r = frame[:, :, 0], frame[:, :, 1], frame[:, :, 2] hist_b = cv2.calcHist([frame_b[top:bottom, left:right]], [0], None, [256], [0, 256]) hist_g = cv2.calcHist([frame_g[top:bottom, left:right]], [0], None, [256], [0, 256]) hist_r = cv2.calcHist([frame_r[top:bottom, left:right]], [0], None, [256], [0, 256]) cv2.normalize(hist_b, hist_b, 0, 255, cv2.NORM_MINMAX) cv2.normalize(hist_g, hist_g, 0, 255, cv2.NORM_MINMAX) cv2.normalize(hist_r, hist_r, 0, 255, cv2.NORM_MINMAX) track_window = (ci, r, w, h) r2, h2, ci2, w2 = top2, bottom2 - top2, left2, right2 - left2 # simply hardcoded the values r, h, c, w hist_bp = cv2.calcHist([frame_b[top2:bottom2, left2:right2]], [0], None, [256], [0, 256]) hist_gp = cv2.calcHist([frame_g[top2:bottom2, left2:right2]], [0], None, [256], [0, 256]) hist_rp = cv2.calcHist([frame_r[top2:bottom2, left2:right2]], [0], None, [256], [0, 256]) cv2.normalize(hist_bp, hist_bp, 0, 255, cv2.NORM_MINMAX) cv2.normalize(hist_gp, hist_gp, 0, 255, cv2.NORM_MINMAX) cv2.normalize(hist_rp, hist_rp, 0, 255, cv2.NORM_MINMAX) track_window2 = (ci2, r2, w2, h2) # print(left2, top2, right2-left2, bottom2-top2) cv2.imwrite('bottledetect.jpg', frame[r:r + h, ci:ci + w]) cv2.imwrite('persondetect.jpg', frame[r2:r2 + h2, ci2:ci2 + w2]) # set up the ROI for tracking roi = frame[r:r + h, ci:ci + w] hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV) mask = cv2.inRange(hsv_roi, np.array((0., 60., 32.)), np.array((180., 255., 255.))) roi_hist = cv2.calcHist([hsv_roi], [0], mask, [180], [0, 180]) cv2.normalize(roi_hist, roi_hist, 0, 255, cv2.NORM_MINMAX) # Setup the termination criteria, either 10 iteration or move by atleast 1 pt term_crit = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1) ok = tracker.init(frame, track_window) ok2 = tracker2.init(frame, track_window2) track_thing = 0 #bottle pts = Point() pts2 = Point() untrack = 0 while (1): ret, frames, depth = vid.read() frame = frames[0] depth_frame = frames[1] if ret == True: hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) dst = cv2.calcBackProject([hsv], [0], roi_hist, [0, 180], 1) # apply meanshift to get the new location ok, track_window = tracker.update(frame) x, y, w, h = track_window ok, track_window2 = tracker2.update(frame) x2, y2, w2, h2 = track_window2 # Draw it on image img2 = cv2.rectangle(frame, (x, y), (x + w, y + h), 255, 2) if not track_thing: img2 = cv2.rectangle(img2, (x2, y2), (x2 + w2, y2 + h2), 255, 2) else: img2 = cv2.rectangle(img2, (x2, y2), (x2 + w2, y2 + h2), (0, 0, 255), 2) cv2.imshow('Tracking', img2) tracking_frame = bridge.cv2_to_imgmsg(img2, "bgr8") # tracking_frame = img2 pub_frame2.publish(tracking_frame) # https://www.intelrealsense.com/wp-content/uploads/2020/06/Intel-RealSense-D400-Series-Datasheet-June-2020.pdf total, cnt = 0, 0 for i in range(3): for j in range(3): dep = depth.get_distance( np.maximum(0, np.minimum(i + x + w // 2, 639)), np.maximum(0, np.minimum(j + y + h // 2, 479))) if (dep) != 0: total += dep cnt += 1 if cnt != 0: worldz = total / cnt # (x-w//2) # 人にぶつからないように距離を確保するため if (worldz < 1.0) or (worldz > 3.0): worldz = 0 else: worldz = 0 total2, cnt2 = 0, 0 for i in range(3): for j in range(3): dep2 = depth.get_distance( np.maximum(0, np.minimum(i + x2 + w2 // 2, 639)), np.maximum(0, np.minimum(j + y2 + h2 // 2, 479))) if dep2 != 0: total2 += dep2 cnt2 += 1 if cnt2 != 0: worldz2 = total2 / cnt2 if worldz2 < 1.0: worldz2 = 0 else: worldz2 = 0 # print('worldz', worldz) # print('worldz2', worldz2) if (worldz == 0) or (worldz2 == 0): # break worldx, worldy = 0, 0 worldx = 0 pts.x, pts.y, pts.z = 0.0, 0.0, 0.0 worldx2, worldy2 = 0, 0 pts2.x, pts2.y, pts2.z = 0.0, 0.0, 0.0 else: # focus length = 1.93mm, distance between depth cameras = about 5cm, a pixel size = 3um if (track_thing == 0): #bottle Tracking u_ud = (0.05 * 1.88 * 10**(-3)) / (3 * 10**(-6) * worldz) # print('u_ud', u_ud) # print('x, y =', (x+w//2)-(img2.shape[1]//2), (img2.shape[0]//2)-(y+h//2)) # 深度カメラとカラーカメラの物理的な距離を考慮した項(-0.3*u_ud) # これらの座標は物体を見たときの左の深度カメラを基準とする worldx = 0.05 * (x + w // 2 - (img2.shape[1] // 2) - 0.3 * u_ud) / u_ud worldy = 0.05 * ((img2.shape[0] // 2) - (y + h)) / u_ud print('x,y,z = ', worldx, worldy, worldz - 1.0) pts.y, pts.z, pts.x = float(worldx), float( worldy), float(worldz) - 1.0 else: #human Tracking u_ud = (0.05 * 1.88 * 10**(-3)) / (3 * 10**(-6) * worldz2) worldx2 = 0.05 * (x2 + w2 // 2 - (img2.shape[1] // 2) - 0.3 * u_ud) / u_ud worldy2 = 0.05 * ((img2.shape[0] // 2) - (y2 + h2)) / u_ud print('x2,y2,z2 = ', worldx2, worldy2, worldz2 - 1.0) pts2.x, pts2.y, pts.z = float(worldx2), float( worldy2), float(worldz2) - 1.0 print("track_thing = ", track_thing) frame_b, frame_g, frame_r = frame[:, :, 0], frame[:, :, 1], frame[:, :, 2] hist_b2 = cv2.calcHist([frame_b[y:y + h, x:x + w]], [0], None, [256], [0, 256]) hist_g2 = cv2.calcHist([frame_g[y:y + h, x:x + w]], [0], None, [256], [0, 256]) hist_r2 = cv2.calcHist([frame_r[y:y + h, x:x + w]], [0], None, [256], [0, 256]) cv2.normalize(hist_b2, hist_b2, 0, 255, cv2.NORM_MINMAX) cv2.normalize(hist_g2, hist_g2, 0, 255, cv2.NORM_MINMAX) cv2.normalize(hist_r2, hist_r2, 0, 255, cv2.NORM_MINMAX) hist_bp2 = cv2.calcHist([frame_b[y2:y2 + h2, x2:x2 + w2]], [0], None, [256], [0, 256]) hist_gp2 = cv2.calcHist([frame_g[y2:y2 + h2, x2:x2 + w2]], [0], None, [256], [0, 256]) hist_rp2 = cv2.calcHist([frame_r[y2:y2 + h2, x2:x2 + w2]], [0], None, [256], [0, 256]) cv2.normalize(hist_bp2, hist_bp2, 0, 255, cv2.NORM_MINMAX) cv2.normalize(hist_gp2, hist_gp2, 0, 255, cv2.NORM_MINMAX) cv2.normalize(hist_rp2, hist_rp2, 0, 255, cv2.NORM_MINMAX) comp_b = cv2.compareHist(hist_b, hist_b2, cv2.HISTCMP_CORREL) comp_g = cv2.compareHist(hist_g, hist_g2, cv2.HISTCMP_CORREL) comp_r = cv2.compareHist(hist_r, hist_r2, cv2.HISTCMP_CORREL) comp_bp = cv2.compareHist(hist_bp, hist_bp2, cv2.HISTCMP_CORREL) comp_gp = cv2.compareHist(hist_gp, hist_gp2, cv2.HISTCMP_CORREL) comp_rp = cv2.compareHist(hist_rp, hist_rp2, cv2.HISTCMP_CORREL) # print('compareHist(b)', comp_b) # print('compareHist(g)', comp_g) # print('compareHist(r)', comp_r) # print('compareHist(bp)', comp_bp) # print('compareHist(gp)', comp_gp) # print('compareHist(rp)', comp_rp) # print("garbage_in_can", garbage_in_can) # 追跡を止める条件は,bottle追跡中にヒストグラムが大きく変化するか枠が無くなるまたはpersonを見失う,もしくはperson追跡中にヒストグラムが大きく変化するか枠が無くなるまたはゴミがゴミ箱に入れられた, if ((track_thing == 0 and ((comp_b <= 0.1) or (comp_g <= 0.1) or (comp_r <= 0.1) or track_window == (0, 0, 0, 0))) or (track_window2 == (0, 0, 0, 0)) or (track_thing == 1 and ((comp_bp <= 0.) or (comp_gp <= 0.) or (comp_rp <= 0.)))): untrack += 1 print("untrack = ", untrack) if untrack >= 30: print("追跡が外れた!\n") break elif (track_thing == 0 and (x + w > 640 or x < 0) and (y + h > 480 or y < 0)) or (track_thing == 1 and (x2 + w2 > 640 or x2 < 0) and (y2 + h2 > 480 or y2 < 0)): untrack += 1 print("untrack = ", untrack) if untrack >= 50: print("枠が画面外で固まった") break elif (track_thing == 1 and garbage_in_can == 1): print("ゴミを捨てたため追跡を終えます") break else: untrack = 0 # ポイ捨ての基準はbottleを追跡していて,地面から10cmのところまで落ちたか,bottleを見失ったかつ見失う前のフレームでの高さがカメラの10cmより下 print('track_window = ', track_window) if (((worldy <= -0.10) or (track_window == (0, 0, 0, 0) and (worldy < 0.5))) and (not track_thing)): print("ポイ捨てした!\n") track_thing = 1 #human if track_thing == 0: tracking_point = pts if not (pts.x == 0.0 and pts.y == 0.0 and pts.z == 0.0): pub.publish(tracking_point) flag = 0 #bottle else: tracking_point = pts2 if not (pts2.x == 0.0 and pts2.y == 0.0 and pts2.z == 0.0): pub.publish(tracking_point) flag = 1 #person pub_flag.publish(flag) k = cv2.waitKey(1) & 0xff print("emergency_stop", emergency_stop) if (k == 27) or emergency_stop == 1: # dev # if emergency_stop: # ops print("program is stoped!") sys.exit(0) else: break pub_track.publish(1) yolo.close_session()
def video_image(): # grab a reference to the image panels global panelV1 try: while not stopEvent.is_set(): # Capture a still image from the video stream ret, frame = cap.read() # Read a new frame frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) ''' Keep for debug ''' # Canny test code, only called w/ imshow() below #gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) #edged = cv2.Canny(gray, 50, 100) # Convert image to HSV hsvframe = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) # Set up the min and max HSV settings lower=np.array([barH.get(),barS.get(),barV.get()]) upper=np.array([barH2.get(),barS2.get(),barV2.get()]) mask=cv2.inRange(hsvframe, lower, upper) mask=Image.fromarray(mask) mask=ImageTk.PhotoImage(mask) ''' Keep for debug ''' #cv2.imshow("Original",frame) #cv2.imshow("Edged",edged) #cv2.imshow("HSV",hsvframe) #cv2.imshow("Mask", mask) # convert the images to PIL format... #pilframe=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB) #pilframe=Image.fromarray(pilframe) #pilframe=ImageTk.PhotoImage(pilframe) #If the panels are None, initialize them if panelV1 is None: print "enter panelV1 None" # the first panel will store our original image panelV1 = Label(master,image=mask) panelV1.image = mask #panelV1.pack(side="right", padx=10,pady=10) panelV1.pack() # otherwise, update the image panels else: # update the panels #print "enter panelV1 update" panelV1.configure(image=mask) panelV1.image = mask panelV1.pack() ch=cv2.waitKey(10) # 1 == capture every 10 millisec if ch & 0xFF == ord('q'): # "q" to exit loop break # Required to release the device and prevent having to reset system cap.release() cv2.destroyAllWindows() print "exit video_image()" except RuntimeError, e: print "runtime error()"