class PiCar: # Camera Info SCREEN_WIDTH = 200 SCREEN_HEIGHT = 66 FRAME_RATE = 24 def __init__(self, record_training_data=False, model=None): print('Setting up PiCar...') if record_training_data and not model: self.record_training_data = True print('Recording training data...') else: self.record_training_data = False if model: self.model_manager = ModelManager() self.model_manager.load_model(model) else: self.model_manager = None self.dc = DriveController() self.aws_manager = AWSManager() self.setup_camera() # Structs for collecting training images and labels self.training_images = [] self.training_labels = [] # Stores the current user drive instruction self.current_drive_input = 'forward' # Using pygame in process remote keyboard inputs pygame.init() pygame.display.set_mode((100, 100)) def setup_camera(self): print('Initializing camera...') self.camera = PiCamera() self.camera.resolution = (self.SCREEN_WIDTH, self.SCREEN_HEIGHT) self.camera.framerate = self.FRAME_RATE self.raw_capture = PiRGBArray(self.camera, size=(self.SCREEN_WIDTH, self.SCREEN_HEIGHT)) # allow the camera to warm up time.sleep(1) print('Camera initialization complete...') def process_user_inputs(self): for event in pygame.event.get(): if event.type == pygame.KEYUP: self.dc.forward() self.current_drive_input = 'forward' elif event.type == pygame.KEYDOWN: if event.key == pygame.K_RIGHT: self.dc.pivot_right() self.current_drive_input = 'right' elif event.key == pygame.K_LEFT: self.dc.pivot_left() self.current_drive_input = 'left' else: self.dc.forward() self.current_drive_input = 'forward' def convert_model_output_to_drive_command(self, model_output): print(str(model_output)) max_index = np.argmax(model_output) if max_index == 0: self.dc.forward() self.current_drive_input = 'forward' elif max_index == 1: self.dc.pivot_right() self.current_drive_input = 'right' elif max_index == 2: self.dc.pivot_left() self.current_drive_input = 'left' print(self.current_drive_input) def drive(self): for frame in self.camera.capture_continuous(self.raw_capture, format="bgr", use_video_port=True): image = frame.array if self.model_manager: # Use model to steer PiCar image = image / 255.0 output = self.model_manager.run_inference(image) self.convert_model_output_to_drive_command(output) else: # Use remote keyboard inputs to steer PiCar self.process_user_inputs() # Record training data if self.record_training_data: self.training_images.append(image) self.training_labels.append(self.current_drive_input) # Display camera feed cv2.imshow("Feed", image) key = cv2.waitKey(1) & 0xFF self.raw_capture.truncate(0) # Exit program if user pressed 'q' if key == ord("q"): self.dc.stop() # Upload training data to AWS before exiting if self.record_training_data: self.aws_manager.upload_training_data(self.training_images, self.training_labels) break
class InferenceManager: """ Main training script called from main.py. """ def __init__(self, options): print('---------------') self.opt = options # Create network and optimiser self.model_manager = ModelManager(self.opt) assert self.opt.load_path is not None self.model_manager.load_model(weights_path=self.opt.load_path, load_optimiser=False) # extract model, optimiser and scheduler for easier access self.model = self.model_manager.model self.model.eval() path_info = load_config(self.opt.config_path) self.test_loaders = {} for test_data_type in self.opt.test_data_types: data_path = path_info[test_data_type] width, height = sizes_lookup[self.opt.network][test_data_type] # create dataloaders folder = 'kitti' if 'kitti' in test_data_type else test_data_type textfile = test_data_type + '.txt' if 'kitti' in test_data_type else 'test_files.txt' filename_path = os.path.join('splits', folder, textfile) test_filenames = readlines(filename_path) dataset_class = data_type_lookup[test_data_type] test_dataset = dataset_class(data_path, test_filenames, height, width, is_train=False, disable_normalisation=self.opt.disable_normalisation, kitti2012=test_data_type == 'kitti2012', load_gt=test_data_type != 'kitti2015submission') test_loader = DataLoader(test_dataset, shuffle=False, drop_last=False, num_workers=self.opt.num_workers, batch_size=1) self.test_loaders[test_data_type] = test_loader self.error_metrics = defaultdict(list) self.resized_disps = [] def run_inference(self): all_errors = {} for data_type, loader in self.test_loaders.items(): print('---------------') print('running evaluation on:') print(data_type) self.error_metrics = defaultdict(list) self.resized_disps = [] with torch.no_grad(): for inputs in tqdm(loader, ncols=60, position=0, leave=True): _ = self.process_batch(inputs, compute_errors=data_type not in ['flicker', 'kitti2015submission']) for key, val in self.error_metrics.items(): self.error_metrics[key] = str(np.round(np.mean(val), 5)) all_errors[data_type] = self.error_metrics if self.opt.save_disparities: # also save resized disparities for visualisation _savepath = os.path.join(self.opt.load_path, data_type, 'npys') os.makedirs(_savepath, exist_ok=True) for idx, disp in enumerate(self.resized_disps): np.save(os.path.join(_savepath, '{}.npy'.format(str(idx).zfill(3))), disp) if data_type == 'kitti2015submission': _savepath = os.path.join(_savepath, 'disp_0') os.makedirs(_savepath, exist_ok=True) for idx, disp in enumerate(self.resized_disps): disp = (disp * 256).astype(np.uint16) print(disp.shape) io.imsave(os.path.join(_savepath, '{}_10.png'.format(str(idx).zfill(6))), disp) print('Finished inference!') print('---------------') for data_type, errors in all_errors.items(): print('Metrics for {}:'.format(data_type)) for key, error in errors.items(): print('{} -- {}'.format(key, error)) print('---------------') with open(os.path.join(self.opt.load_path, 'eval_results.json'), 'w') as file_handler: json.dump(all_errors, file_handler, indent=2) def process_batch(self, inputs, compute_errors=True): # move to GPU if torch.cuda.is_available(): for key, val in inputs.items(): inputs[key] = val.cuda() outputs = self.model(inputs['image'], inputs['stereo_image']) preds = outputs[('raw', 0)][:, 0].cpu().numpy() # get errors gts = inputs['disparity'].cpu().numpy() for i in range(len(gts)): # resize and rescale prediction to match gt height, width = gts[i].shape pred_disp = cv2.resize(preds[i], dsize=(width, height)) * width / preds[i].shape[ 1] if compute_errors: d1, d2, d3, EPE = self.compute_errors(gts[i], pred_disp) self.error_metrics['d1'].append(d1) self.error_metrics['d2'].append(d2) self.error_metrics['d3'].append(d3) self.error_metrics['EPE'].append(EPE) if self.opt.save_disparities: self.resized_disps.append(pred_disp) return outputs def compute_errors(self, gt_disp, pred_disp): mask = gt_disp > 0 abs_diff = np.abs(gt_disp[mask] - pred_disp[mask]) EPE = abs_diff.mean() d1 = (abs_diff >= 1).sum() / mask.sum() d2 = (abs_diff >= 2).sum() / mask.sum() d3 = (abs_diff >= 3).sum() / mask.sum() return d1, d2, d3, EPE
class TrainManager: """ Main training script called from main.py. """ def __init__(self, options): print('---------------') print('setting up...') self.opt = options # Create network and optimiser self.model_manager = ModelManager(self.opt) if self.opt.load_path is not None: self.model_manager.load_model(weights_path=self.opt.load_path, load_optimiser=False) # extract model, optimiser and scheduler for easier access self.model = self.model_manager.model self.optimiser = self.model_manager.optimiser self.scheduler = self.model_manager.scheduler self.scales = self.model_manager.scales print('models done!') path_info = load_config(self.opt.config_path) train_datasets = [] val_datasets = [] for dataset_type in self.opt.training_datasets: dataset_path = path_info[dataset_type] train_filenames = readlines( os.path.join('splits', dataset_type, 'train_files_all.txt')) val_filenames = 'val_files_all.txt' if dataset_type != 'sceneflow' else 'test_files.txt' val_filenames = readlines( os.path.join('splits', dataset_type, val_filenames)) dataset_class = dataset_lookup[dataset_type] # subsample data optionally if self.opt.data_sampling != 1.0: sampling = self.opt.data_sampling assert sampling > 0 assert sampling < 1.0 train_filenames = list( np.random.choice(np.array(train_filenames), int(sampling * len(train_filenames)), replace=False)) train_dataset = dataset_class( dataset_path, train_filenames, self.opt.height, self.opt.width, is_train=True, disable_normalisation=self.opt.disable_normalisation, max_disparity=self.opt.max_disparity, keep_aspect_ratio=True, disable_synthetic_augmentation=self.opt. disable_synthetic_augmentation, disable_sharpening=self.opt.disable_sharpening, monodepth_model=self.opt.monodepth_model, disable_background=self.opt.disable_background) val_dataset = dataset_class( dataset_path, val_filenames, self.opt.height, self.opt.width, is_train=False, disable_normalisation=self.opt.disable_normalisation, max_disparity=self.opt.max_disparity, keep_aspect_ratio=True, disable_synthetic_augmentation=self.opt. disable_synthetic_augmentation, disable_sharpening=self.opt.disable_sharpening, monodepth_model=self.opt.monodepth_model, disable_background=self.opt.disable_background) train_datasets.append(train_dataset) val_datasets.append(val_dataset) self.train_dataset = ConcatDataset(train_datasets) self.val_dataset = ConcatDataset(val_datasets) # use custom sampler so we can continue from specific step my_sampler = MyRandomSampler(self.train_dataset, start_step=self.opt.start_step) self.train_loader = DataLoader(self.train_dataset, sampler=my_sampler, drop_last=True, num_workers=self.opt.num_workers, batch_size=self.opt.batch_size) self.val_loader = DataLoader(self.val_dataset, shuffle=True, drop_last=True, num_workers=1, batch_size=self.opt.batch_size) self.val_iter = iter(self.val_loader) print('datasets done!') print('dataset info:') print('training on {} images, validating on {} images'.format( len(self.train_dataset), len(self.val_dataset))) # Set up tensorboard writers and logger self.train_writer = SummaryWriter( os.path.join(self.opt.log_path, self.opt.model_name, 'train')) self.val_writer = SummaryWriter( os.path.join(self.opt.log_path, self.opt.model_name, 'val')) os.makedirs(self.opt.log_path, exist_ok=True) self.step = 0 self.epoch = 0 self.training_complete = False print('training setup complete!') print('---------------') def train(self): print('training...') while not self.training_complete: self.run_epoch() self.epoch += 1 print('training complete!') def run_epoch(self): if self.step < self.opt.start_step: print('skipping up to step {}'.format(self.opt.start_step)) for idx, inputs in enumerate(self.train_loader): start_time = time.time() outputs, losses = self.process_batch(inputs, compute_loss=True) # Update weights loss = losses['loss'] self.model.zero_grad() loss.backward() self.optimiser.step() for group in self.optimiser.param_groups: self.lr = group['lr'] print('step {} - time {}'.format( self.step, round(time.time() - start_time, 3))) # validate and log if self.step % self.opt.log_freq == 0: self.log(self.train_writer, inputs, outputs, losses) self.model.eval() self.val() self.model.train() if self.step % 10000 == 0: self.model_manager.save_model( folder_name='weights_{}'.format(self.step)) self.step += 1 if self.step >= self.opt.training_steps: self.training_complete = True break print('Epoch {} complete!'.format(self.epoch)) self.model_manager.save_model( folder_name='weights_{}'.format(self.step)) self.scheduler.step() def val(self): with torch.no_grad(): try: inputs = self.val_iter.next() except StopIteration: self.val_iter = iter(self.val_loader) inputs = self.val_iter.next() outputs, losses = self.process_batch(inputs, compute_loss=True) self.log(self.val_writer, inputs, outputs, losses) def process_batch(self, inputs, compute_loss=False): # move to GPU if torch.cuda.is_available(): for key, val in inputs.items(): inputs[key] = val.cuda() outputs = self.model(inputs['image'], inputs['stereo_image']) for scale in range(self.scales): # upsample to full resolution pred = F.interpolate(outputs[('raw', scale)], mode='bilinear', size=(self.opt.height, self.opt.width), align_corners=True) pred_disp = pred[:, 0] outputs[('disp', scale)] = pred_disp # get losses if compute_loss: losses = self.compute_losses(inputs, outputs) else: losses = {} return outputs, losses def compute_losses(self, inputs, outputs): losses = {} total_loss = 0 for scale in range(self.scales): pred_disp = outputs[('disp', scale)] # compute loss on disparity target_disp = torch.clamp(inputs['disparity'], max=self.opt.max_disparity) disparity_loss = (torch.abs(pred_disp - target_disp) * (target_disp > 0).float()).mean() total_loss += disparity_loss losses['disp_loss/{}'.format(scale)] = disparity_loss total_loss /= self.scales losses['loss'] = total_loss return losses def warp_stereo_image(self, stereo_image, disparity): """Note - for logging only""" height, width = disparity.shape xs, ys = np.meshgrid(range(width), range(height)) xs, ys = torch.from_numpy(xs).float(), torch.from_numpy(ys).float() xs = xs - disparity xs = ((xs / (width - 1)) - 0.5) * 2 ys = ((ys / (height - 1)) - 0.5) * 2 sample_pix = torch.stack([xs, ys], 2) warped_image = F.grid_sample(stereo_image.unsqueeze(0), sample_pix.unsqueeze(0), padding_mode='border', align_corners=True) return warped_image[0] def log(self, writer, inputs, outputs, losses): print('logging') writer.add_scalar('lr', self.lr, self.step) # write to tensorboard for loss_type, loss in losses.items(): writer.add_scalar('{}'.format(loss_type), loss, self.step) for i in range(min(4, len(inputs['image']))): writer.add_image('image_l/{}'.format(i), normalise_image(inputs['image'][i]), self.step) writer.add_image('image_r/{}'.format(i), normalise_image(inputs['stereo_image'][i]), self.step) if inputs.get('disparity') is not None: writer.add_image('disp_target/{}'.format(i), normalise_image(inputs['disparity'][i]), self.step) warped_image = self.warp_stereo_image( inputs['stereo_image'][i].cpu(), inputs['disparity'][i].cpu()) writer.add_image('warped_gt_image/{}'.format(i), normalise_image(warped_image), self.step) if inputs.get('mono_disparity') is not None: writer.add_image('mono_disparity/{}'.format(i), normalise_image(inputs['mono_disparity'][i]), self.step) if inputs.get('occlusion_mask') is not None: writer.add_image('occlusion_mask/{}'.format(i), normalise_image(inputs['occlusion_mask'][i]), self.step) writer.add_image('disp_pred/{}'.format(i), normalise_image(outputs[('disp', 0)][i]), self.step) warped_image = self.warp_stereo_image( inputs['stereo_image'][i].cpu(), outputs[('disp', 0)][i].cpu()) writer.add_image('warped_image/{}'.format(i), normalise_image(warped_image), self.step)