def CreateDataset(opt): dataset = None from data.coco_dataset import CocoDataset dataset = CocoDataset() print("dataset [%s] was created" % (dataset.name())) dataset.initialize(opt) return dataset
def run(self): # Setup CONFIG = self.deeplab_opt['CONFIG'] self.CONFIG = CONFIG model_path = self.deeplab_opt['model_path'] cuda = self.deeplab_opt['cuda'] crf = self.deeplab_opt['crf'] camera_id = self.deeplab_opt['camera_id'] device = get_device(cuda) torch.set_grad_enabled(False) torch.backends.cudnn.benchmark = True classes = get_classtable(CONFIG) postprocessor = setup_postprocessor(CONFIG) if crf else None model = eval(CONFIG.MODEL.NAME)(n_classes=CONFIG.DATASET.N_CLASSES) state_dict = torch.load(model_path, map_location=lambda storage, loc: storage) model.load_state_dict(state_dict) model.eval() model.to(device) print("Model:", CONFIG.MODEL.NAME) # SPADE model spade_model = Pix2PixModel(self.spade_opt) spade_model.eval() print("Spade!") print(spade_model) coco_dataset = CocoDataset() coco_dataset.initialize(self.spade_opt) print(coco_dataset) #cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) #np.set_printoptions(threshold=sys.maxsize) while True: if len(self.queue) > 0: frame = self.queue.pop() print("Original Image shape {}".format(frame.shape)) image, raw_image = preprocessing(frame, device, CONFIG) print("Image shape {}".format(raw_image.shape)) labelmap = inference(model, image, raw_image, postprocessor) labelmap[labelmap == 164] = 66 labelmap[labelmap == 109] = 66 #not_dining_mask = ((labelmap < 43) | (labelmap > 50)) & (labelmap != 66) & (labelmap != 0) #labelmap[not_dining_mask] = 156 # Sky # tables to dining table labelmap[labelmap == 66] = 154 dining_objects_mask = (labelmap >= 43) & (labelmap <= 50) | ( labelmap == 138) | (labelmap == 142) appliances_mask = (labelmap >= 71) & (labelmap <= 79) labelmap[dining_objects_mask] = 149 labelmap[appliances_mask] = 149 not_sea_mask = (labelmap != 149) & (labelmap != 154) labelmap[not_sea_mask] = 156 #colormap = self.colorize(labelmap) #labelmap[labelmap == 66] = 154 # Frisby and more to sea? #labelmap[labelmap == 33] = 154 #labelmap[labelmap == 66] = 154 #labelmap[labelmap == 80] = 154 #Bottle to flower? #labelmap[labelmap == 43] = 118 # Person to rock? #labelmap[labelmap == 0] = 168 #dog to person #labelmap[labelmap == 17] = 0 # Sky grass and bottle flower #bottle_mask = (labelmap == 43) #labelmap[0:193,:] = 156 #labelmap[:,:] = 123 #labelmap[bottle_mask] = 118 #print(labelmap.shape) #Bottle to potted plant #labelmap[labelmap == 43] = 63# # #dining_stuff = [43,44,45,46,47,48,49,50,66] #dining_mask = np.isin(labelmap, dining_stuff, invert=True) #not_dining_mask = (labelmap < 43) | (labelmap > 50) & (labelmap != 66) #labelmap[dining_objects_mask] = 63 labelimg = Image.fromarray(np.uint8(labelmap), 'L') label_resized = np.array( labelimg.resize((256, 256), Image.NEAREST)) uniques = np.unique(labelmap) instance_counter = 0 instancemap = np.zeros(labelmap.shape) print(uniques) for label_id in uniques: mask = (labelmap == label_id) instancemap[mask] = instance_counter instance_counter += 1 instanceimg = Image.fromarray(np.uint8(instancemap), 'L') #colormap[not_dining_mask] = [0, 0, 0]; item = coco_dataset.get_item_from_images(labelimg, instanceimg) generated = spade_model(item, mode='inference') generated_np = util.tensor2im(generated[0]) #color_resized = cv2.cvtColor(np.array(Image.fromarray(colormap).resize((256,256), Image.NEAREST)),cv2.COLOR_BGR2RGB) generated_np[label_resized == 156] = [0, 0, 0] # Masking #print("Generated image shape {} label resize shape {}".format(generated_np.shape, label_resized.shape)) #generated_np[label_resized == 156, :] = [0, 0, 0]; #generated_np[label_resized == 156, :] = [0, 0, 0]; #not_dining_mask = (label_resized < 43) | (label_resized > 50) & (label_resized != 66) #generated_np[not_dining_mask] = [0, 0, 0]; #only people #people_mask = np.isin(label_resized, [0,66], invert=True) #color_resized[label_resized != 0] = [0,0,0] #generated_rgb = cv2.cvtColor(generated_np, cv2.COLOR_BGR2RGB) #color_gray = cv2.cvtColor(color_resized, cv2.COLOR_BGR2GRAY) #color_gray_rgb = cv2.cvtColor(color_gray, cv2.COLOR_GRAY2RGB) #not_dining_resized = (label_resized < 43) | (label_resized > 50) & (label_resized != 66) #color_gray_rgb[label_resized != 154, :] = [0, 0, 0]; #generated_np[label_resized == 154, :] = [0,0,0] #raw_image_resized = cv2.cvtColor(np.array(Image.fromarray(raw_image).resize((256,256), Image.NEAREST)),cv2.COLOR_BGR2RGB) #raw_image_resized[people_mask] = [0, 0, 0]; #cv2.addWeighted(color_resized, 0.5, raw_image_resized, 0.5 , 0.0, raw_image_resized) #self.push_frame(raw_image_resized) #raw_rgb = cv2.cvtColor(raw_image, cv2.COLOR_BGR2RGB) #final = np.concatenate((generated_np, color_resized), axis=1) #print("Gans shape {}, colormap shape {}, Final shape {}".format(generated_np.shape, color_resized.shape, final.shape)) #final[:,:256,:] = generated_np #final[:,256:,:] = color_resized #raw_rgb = cv2.cvtColor(raw_image, cv2.COLOR_BGR2RGB) #map_rgb = cv2.cvtColor(colormap, cv2.COLOR_BGR2RGB) #cv2.addWeighted(map_rgb, 0.5, raw_rgb, 0.5, 0.0, raw_rgb) #self.push_frame(raw_rgb) #print("raw image shape {}".format(raw_image.shape)) #print("Generated image {}".format(generated_np)) #print("raw image {}".format(raw_image)) # Register mouse callback function #cv2.setMouseCallback(window_name, self.mouse_event, labelmap) # Overlay prediction #cv2.addWeighted(colormap, 1.0, raw_image, 0.0, 0.0, raw_image) # Quit by pressing "q" key #cv2.imshow(window_name, raw_image) #cv2.resizeWindow(window_name, 1024,1024) #if cv2.waitKey(10) == ord("q"): # break self.push_frame(generated_np)
def main(): args = parser.parse_args() print(args) # for now, batch_size should match number of gpus assert(args.batch_size==torch.cuda.device_count()) # create model model = detector(arch=args.cnn_arch, base_cnn_pkl_file=args.cnn_pkl, mapping_file=args.cnn_mapping, output_prob=False, return_rois=False, return_img_features=False) model = model.cuda() # freeze part of the net stop_grad=['conv1','bn1','relu','maxpool','layer1'] model_no_grad=torch.nn.Sequential(*[getattr(model.model,l) for l in stop_grad]) for param in model_no_grad.parameters(): param.requires_grad = False # define optimizer optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=args.base_lr, momentum=args.momentum, weight_decay=args.wd) # create dataset train_dataset = CocoDataset(ann_file=args.dset_ann, img_dir=args.dset_path, proposal_file=args.dset_rois, mode='train', sample_transform=preprocess_sample(target_sizes=[800], sample_proposals_for_training=True)) train_loader = DataLoader(train_dataset, batch_size=args.batch_size,shuffle=False, num_workers=args.workers, collate_fn=collate_custom) training_stats = TrainingStats(losses=['loss_cls','loss_bbox'], metrics=['accuracy_cls'], solver_max_iters=args.max_iter) iter = args.start_iter print('starting training') while iter<args.max_iter: for i, batch in enumerate(train_loader): if args.batch_size==1: batch = to_cuda_variable(batch,volatile=False) else: # when using multiple GPUs convert to cuda later in data_parallel and list_to_tensor batch = to_variable(batch,volatile=False) # update lr lr = get_lr_at_iter(iter) adjust_learning_rate(optimizer, lr) # start measuring time training_stats.IterTic() # forward pass if args.batch_size==1: cls_score,bbox_pred=model(batch['image'],batch['rois']) list_to_tensor = lambda x: x else: cls_score,bbox_pred=data_parallel(model,(batch['image'],batch['rois'])) # run model distributed over gpus and concatenate outputs for all batch # convert gt data from lists to concatenated tensors list_to_tensor = lambda x: torch.cat(tuple([i.cuda() for i in x]),0) cls_labels = list_to_tensor(batch['labels_int32']).long() bbox_targets = list_to_tensor(batch['bbox_targets']) bbox_inside_weights = list_to_tensor(batch['bbox_inside_weights']) bbox_outside_weights = list_to_tensor(batch['bbox_outside_weights']) # compute loss loss_cls=cross_entropy(cls_score,cls_labels) loss_bbox=smooth_L1(bbox_pred,bbox_targets,bbox_inside_weights,bbox_outside_weights) # compute classification accuracy (for stats reporting) acc = accuracy(cls_score,cls_labels) # get final loss loss = loss_cls + loss_bbox # update optimizer.zero_grad() loss.backward() # Without gradient clipping I get inf's and NaNs. # it seems that in Caffe the SGD solver performs grad clipping by default. # https://github.com/BVLC/caffe/blob/master/src/caffe/solvers/sgd_solver.cpp # it also seems that Matterport's Mask R-CNN required grad clipping as well # (see README in https://github.com/matterport/Mask_RCNN) # the value max_norm=35 was taken from here https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto clip_grad_norm(filter(lambda p: p.requires_grad, model.parameters()), max_norm=35, norm_type=2) optimizer.step() # stats training_stats.IterToc() training_stats.UpdateIterStats(losses_dict={'loss_cls': loss_cls.data.cpu().numpy().item(), 'loss_bbox': loss_bbox.data.cpu().numpy().item()}, metrics_dict={'accuracy_cls':acc.data.cpu().numpy().item()}) training_stats.LogIterStats(iter, lr) # save checkpoint if (iter+1)%args.checkpoint_period == 0: save_checkpoint({ 'iter': iter, 'args': args, 'state_dict': model.state_dict(), 'optimizer' : optimizer.state_dict(), }, args.checkpoint_fn) if iter == args.start_iter + 20: # training_stats.LOG_PERIOD=20 # Reset the iteration timer to remove outliers from the first few # SGD iterations training_stats.ResetIterTimer() # allow finishing in the middle of an epoch if iter>args.max_iter: break # advance iteration iter+=1
def live(config_path, model_path, cuda, crf, camera_id): """ Inference from camera stream """ # Setup CONFIG = Dict(yaml.load(config_path)) device = get_device(cuda) torch.set_grad_enabled(False) torch.backends.cudnn.benchmark = True classes = get_classtable(CONFIG) postprocessor = setup_postprocessor(CONFIG) if crf else None model = eval(CONFIG.MODEL.NAME)(n_classes=CONFIG.DATASET.N_CLASSES) state_dict = torch.load(model_path, map_location=lambda storage, loc: storage) model.load_state_dict(state_dict) model.eval() model.to(device) print("Model:", CONFIG.MODEL.NAME) # SPADE model opt = TestOptions().parse() opt.use_vae = False spade_model = Pix2PixModel(opt) spade_model.eval() print("Spade!") print(spade_model) coco_dataset = CocoDataset() coco_dataset.initialize(opt) print(coco_dataset) # UVC camera stream cap = cv2.VideoCapture(camera_id) cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*"YUYV")) def colorize(labelmap): print(labelmap.shape) # Assign a unique color to each label labelmap = labelmap.astype(np.float32) / CONFIG.DATASET.N_CLASSES colormap = cm.jet_r(labelmap)[..., :-1] * 255.0 return np.uint8(colormap) def mouse_event(event, x, y, flags, labelmap): # Show a class name of a mouse-overed pixel label = labelmap[y, x] name = classes[label] print(name) window_name = "{} + {}".format(CONFIG.MODEL.NAME, CONFIG.DATASET.NAME) cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) np.set_printoptions(threshold=sys.maxsize) while True: _, frame = cap.read() image, raw_image = preprocessing(frame, device, CONFIG) #print("Image shape {}".format(image.shape)) labelmap = inference(model, image, raw_image, postprocessor) # Frisby and more to sea? #labelmap[labelmap == 33] = 154 #labelmap[labelmap == 66] = 154 #labelmap[labelmap == 80] = 154 #Bottle to flower? labelmap[labelmap == 43] = 118 # Person to rock? #labelmap[labelmap == 0] = 168 #dog to person #labelmap[labelmap == 17] = 0 # Sky grass and bottle flower #bottle_mask = (labelmap == 43) #labelmap[0:193,:] = 156 #labelmap[:,:] = 123 #labelmap[bottle_mask] = 118 #print(labelmap.shape) #colormap = colorize(labelmap) uniques = np.unique(labelmap) instance_counter = 0 instancemap = np.zeros(labelmap.shape) print(uniques) for label_id in uniques: mask = (labelmap == label_id) instancemap[mask] = instance_counter instance_counter += 1 labelimg = Image.fromarray(np.uint8(labelmap), 'L') instanceimg = Image.fromarray(np.uint8(instancemap), 'L') #labelimg.show() item = coco_dataset.get_item_from_images(labelimg, instanceimg) generated = spade_model(item, mode='inference') generated_np = util.tensor2im(generated[0]) # Masking #print("Generated image shape {} label resize shape {}".format(generated_np.shape, label_resized.shape)) #label_resized = np.array(labelimg.resize((256,256), Image.NEAREST)) #generated_np[label_resized != 118, :] = [0, 0, 0]; generated_rgb = cv2.cvtColor(generated_np, cv2.COLOR_BGR2RGB) #print("raw image shape {}".format(raw_image.shape)) #print("Generated image {}".format(generated_np)) #print("raw image {}".format(raw_image)) # Register mouse callback function cv2.setMouseCallback(window_name, mouse_event, labelmap) # Overlay prediction #cv2.addWeighted(colormap, 1.0, raw_image, 0.0, 0.0, raw_image) # Quit by pressing "q" key cv2.imshow(window_name, generated_rgb) cv2.resizeWindow(window_name, 1024, 1024) if cv2.waitKey(10) == ord("q"): break
def run(self): # Setup CONFIG = self.deeplab_opt['CONFIG'] self.CONFIG = CONFIG model_path = self.deeplab_opt['model_path'] cuda = self.deeplab_opt['cuda'] crf = self.deeplab_opt['crf'] camera_id = self.deeplab_opt['camera_id'] device = get_device(cuda) torch.set_grad_enabled(False) torch.backends.cudnn.benchmark = True classes = get_classtable(CONFIG) postprocessor = setup_postprocessor(CONFIG) if crf else None model = eval(CONFIG.MODEL.NAME)(n_classes=CONFIG.DATASET.N_CLASSES) state_dict = torch.load(model_path, map_location=lambda storage, loc: storage) model.load_state_dict(state_dict) model.eval() model.to(device) print("Model:", CONFIG.MODEL.NAME) # SPADE model spade_model = Pix2PixModel(self.spade_opt) spade_model.eval() spade_model.to(device) print("Spade!") print(spade_model) coco_dataset = CocoDataset() coco_dataset.initialize(self.spade_opt) print(coco_dataset) while True: while not self.osc_queue.empty(): item = self.osc_queue.get() self.process_queue(item) if len(self.queue) > 0: frame = self.queue.pop() #print("Original Image shape {}".format(frame.shape)) image, raw_image = preprocessing(frame, device, CONFIG) raw_image = cv2.cvtColor(raw_image, cv2.COLOR_BGR2RGB) #print("Image shape {}".format(raw_image.shape)) labelmap = inference(model, image, raw_image, postprocessor) if self.current_state == "test-bowl": self.test_bowl(labelmap) uniques = np.unique(labelmap) print([ID_TO_LABEL[unique] for unique in uniques]) if self.send_bowl and LABEL_TO_ID['bowl'] in uniques: box = self.get_bounding_box_of(LABEL_TO_ID['bowl'], labelmap) (rmin, cmin, rmax, cmax) = box coords = (int((cmin + cmax) / 2), int((rmin + rmax) / 2)) print("Bowl coords {} out of {} ".format( coords, labelmap.shape)) self.t2i_client.send_message("/deeplab/bowl", [coords[0], coords[1]]) if not self.map_deeplab: colormap = self.colorize(labelmap) for masking in self.deeplab_masks: mask = np.isin(labelmap, masking['items'], invert=masking['invert']) colormap[mask, :] = [0, 0, 0] for mapping in self.maps: mask = np.isin(labelmap, mapping['from'], invert=mapping['invert']) if mapping['expand'] > 0: mask = self.expand_mask(mask, mapping['expand']) labelmap[mask] = mapping['to'] if self.map_deeplab: colormap = self.colorize(labelmap) for masking in self.deeplab_masks: mask = np.isin(labelmap, masking['items'], invert=masking['invert']) colormap[mask, :] = [0, 0, 0] if self.show_raw: for masking in self.deeplab_masks: mask = np.isin(labelmap, masking['items'], invert=masking['invert']) raw_image[mask, :] = [0, 0, 0] if self.show_labels: for unique in uniques: box = self.get_bounding_box_of(unique, labelmap) self.put_text_in_center(colormap, box, ID_TO_LABEL[unique]) #color_resized = cv2.cvtColor(np.array(Image.fromarray(colormap).resize((256,256), Image.NEAREST)),cv2.COLOR_BGR2RGB) if self.show_gaugan: uniques = np.unique(labelmap) instance_counter = 0 instancemap = np.zeros(labelmap.shape) for label_id in uniques: mask = (labelmap == label_id) instancemap[mask] = instance_counter instance_counter += 1 instanceimg = Image.fromarray(np.uint8(instancemap), 'L') labelimg = Image.fromarray(np.uint8(labelmap), 'L') label_resized = np.array( labelimg.resize((256, 256), Image.NEAREST)) item = coco_dataset.get_item_from_images( labelimg, instanceimg) generated = spade_model(item, mode='inference') generated_np = util.tensor2im(generated[0]) for masking in self.gaugan_masks: mask = np.isin(label_resized, masking['items'], invert=masking['invert']) generated_np[mask, :] = [0, 0, 0] print("SPADE Shape {}".format(generated_np.shape)) else: generated_np = np.uint8(np.zeros((256, 256, 3))) final = np.concatenate((generated_np, colormap, raw_image), axis=1) #final = np.concatenate((generated_np, colormap), axis=1) self.push_frame(final)
def live(config_path, model_path, cuda, crf, camera_id): """ Inference from camera stream """ # Setup CONFIG = Dict(yaml.load(config_path)) device = get_device(cuda) torch.set_grad_enabled(False) torch.backends.cudnn.benchmark = True classes = get_classtable(CONFIG) postprocessor = setup_postprocessor(CONFIG) if crf else None model = eval(CONFIG.MODEL.NAME)(n_classes=CONFIG.DATASET.N_CLASSES) state_dict = torch.load(model_path, map_location=lambda storage, loc: storage) model.load_state_dict(state_dict) model.eval() model.to(device) print("Model:", CONFIG.MODEL.NAME) opt = TestOptions().parse() opt.use_vae = False spade_model = Pix2PixModel(opt) spade_model.eval() print("Spade!") print(spade_model) coco_dataset = CocoDataset() coco_dataset.initialize(opt) print(coco_dataset) def colorize(labelmap): print(labelmap.shape) # Assign a unique color to each label labelmap = labelmap.astype(np.float32) / CONFIG.DATASET.N_CLASSES colormap = cm.jet_r(labelmap)[..., :-1] * 255.0 return np.uint8(colormap) def mouse_event(event, x, y, flags, labelmap): # Show a class name of a mouse-overed pixel label = labelmap[y, x] name = classes[label] print(name) window_name = "{} + {}".format(CONFIG.MODEL.NAME, CONFIG.DATASET.NAME) cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) np.set_printoptions(threshold=sys.maxsize) url = THETA_URL + 'commands/execute' payload = {"name": "camera.getLivePreview"} buffer = bytes() with requests.post(url, json=payload, auth=(HTTPDigestAuth(THETA_ID, THETA_PASSWORD)), stream=True) as r: for chunk in r.iter_content(chunk_size=1024): buffer += chunk a = buffer.find(b'\xff\xd8') b = buffer.find(b'\xff\xd9') if a != -1 and b != -1: jpg = buffer[a:b+2] buffer = buffer[b+2:] frame = cv2.imdecode(np.fromstring(jpg, dtype=np.uint8), cv2.IMREAD_COLOR) image, raw_image = preprocessing(frame, device, CONFIG) labelmap = inference(model, image, raw_image, postprocessor) colormap = colorize(labelmap) uniques = np.unique(labelmap) instance_counter = 0 instancemap = np.zeros(labelmap.shape) print(uniques) for label_id in uniques: mask = (labelmap == label_id) instancemap[mask] = instance_counter instance_counter += 1 labelimg = Image.fromarray(np.uint8(labelmap), 'L') instanceimg = Image.fromarray(np.uint8(instancemap),'L') #labelimg.show() item = coco_dataset.get_item_from_images(labelimg, instanceimg) generated = spade_model(item, mode='inference') generated_np = util.tensor2im(generated[0]) # Masking #print("Generated image shape {} label resize shape {}".format(generated_np.shape, label_resized.shape)) #label_resized = np.array(labelimg.resize((256,256), Image.NEAREST)) #generated_np[label_resized != 50, :] = [0, 0, 0]; generated_rgb = cv2.cvtColor(generated_np, cv2.COLOR_BGR2RGB) # Register mouse callback function cv2.setMouseCallback(window_name, mouse_event, labelmap) # Overlay prediction cv2.addWeighted(colormap, 0.5, raw_image, 0.5, 0.0, raw_image) # Quit by pressing "q" key cv2.imshow(window_name, generated_rgb) cv2.resizeWindow(window_name, 1024,1024) if cv2.waitKey(10) == ord("q"): break