def preprocess_video(process_args: List): # # load object detector # factory = ModelsFactory() # detector: CaterObjectDetector = factory.get_detector_model("object_detector", od_weights_path) # detector.load_model(device) # # # preform predictions for on the entire video and retrieve the results # bb_predictions, labels = output_video_predictions(video_path, detector, device) # # # save the predictions as pickle files # video_path = Path(video_path) # results_dir = Path(results_dir) # video_name = video_path.stem # output_path = results_dir / (video_name + ".pkl") # output_data = {"bb": bb_predictions, "labels": labels} # if (len(output_data["bb"]) == 300) and (len(output_data["labels"]) == 300): # with open(output_path, "wb") as f: # pickle.dump(output_data, f, pickle.HIGHEST_PROTOCOL) # # print(f"Finished writing object detection outputs for video {video_name}") video_path, od_weights, results_dir = process_args print(video_path) # assign an available gpu for this process # num_possible_gpu_devices = torch.cuda.device_count() # assigned_gpu = multiprocessing.current_process().ident % num_possible_gpu_devices # assigned_gpu = multiprocessing.current_process().ident % 2 + 2 # device = torch.device(f'cuda:{assigned_gpu}') device = torch.device(f'cuda:2') # load object detector factory = ModelsFactory() detector: CaterObjectDetector = factory.get_detector_model( "object_detector", od_weights) detector.load_model(device) # preform predictions for on the entire video and retrieve the results bb_predictions, labels = output_video_predictions(video_path, detector, device) # save the predictions as pickle files video_path = Path(video_path) results_dir = Path(results_dir) video_name = video_path.stem output_path = results_dir / (video_name + ".pkl") output_data = {"bb": bb_predictions, "labels": labels} if (len(output_data["bb"]) == 300) and (len(output_data["labels"]) == 300): with open(output_path, "wb") as f: pickle.dump(output_data, f, pickle.HIGHEST_PROTOCOL) print( f"Finished writing object detection outputs for video {video_name}" )
def trackers_inference_main(model_type: str, results_dir: str, config_path: str) -> None: # load configuration dict with open(config_path, "rb") as f: config: Dict[str, str] = json.load(f) # extract paths to video files for the experiment experiment_videos = get_experiment_videos(config) experiment_video_names = { str(Path(vid_path).stem): str(vid_path) for vid_path in experiment_videos } # define global parameters samples_dir = config["sample_dir"] labels_dir = config["labels_dir"] device = torch.device(config["device"]) if "device" in config else "" model_weights = config["model_weights"] if "model_weights" in config else "" # load a tracking model reasoner: AbstractReasoner = ModelsFactory.get_tracker_model( model_type, model_weights, device) for video_name, video_path in tqdm(experiment_video_names.items()): predictions_path = Path(samples_dir) / (video_name + ".pkl") labels_path = Path(labels_dir) / (video_name + "_bb.json") snitch_bb_prediction = track_and_predict(video_name, video_path, reasoner, predictions_path, labels_path, results_dir) DataHelper.write_bb_predictions_to_file(video_path, results_dir, snitch_bb_prediction)
def reasoning_inference_main(model_name: str, results_dir: str, inference_config_path: str, model_config_path: str): with open(inference_config_path, "rb") as f: config: Dict[str, str] = json.load(f) with open(model_config_path, "rb") as f: model_config: Dict[str, int] = json.load(f) samples_dir = config["sample_dir"] labels_dir = config["labels_dir"] batch_size = int(config["batch_size"]) num_workers = int(config["num_workers"]) model_path = config["model_path"] device = torch.device(config["device"]) dataset: data.Dataset = DatasetsFactory.get_inference_dataset( model_name, samples_dir, labels_dir) data_loader = data.DataLoader(dataset, batch_size=batch_size, num_workers=num_workers) dataset_length = len(dataset) # load model model: nn.Module = ModelsFactory.get_model(model_name, model_config, model_path) # predict model results dataset_videos_indices: Dict[str, int] = {} dataset_predictions: List[np.ndarray] = [] dataset_labels: List[np.ndarray] = [] current_sample_idx = 0 model.eval() model.to(device) with torch.no_grad(): frame_shapes = np.array([320, 240, 320, 240]) for batch_idx, sample in enumerate(data_loader): x, y, video_names = sample boxes, index_to_track_labels = x labels, _ = y current_batch_size = len(labels) boxes = boxes.to(device) if model_name in DOUBLE_OUTPUT_MODELS: output, index_to_track_prediction = model(boxes) else: output = model(boxes) # move outputs to cpu and flatten output and labels batch_videos = { video_names[i]: i + current_sample_idx for i in range(current_batch_size) } batch_predictions = output.cpu().numpy().reshape(-1, 4) batch_labels = labels.numpy().reshape(-1, 4) dataset_videos_indices.update(batch_videos) dataset_predictions.extend(batch_predictions) dataset_labels.extend(batch_labels) current_sample_idx += current_batch_size dataset_predictions = (np.array(dataset_predictions) * frame_shapes).reshape( (dataset_length, 300, 4)).astype(np.int32) dataset_labels = (np.array(dataset_labels) * frame_shapes).reshape( (dataset_length, 300, 4)).astype(np.int32) # extract paths to video files for the experiment experiment_videos = get_experiment_videos(config) experiment_video_names = { str(Path(vid_path).stem): str(vid_path) for vid_path in experiment_videos } # write debug videos for video_name, video_path in tqdm(experiment_video_names.items()): out_vid_path = str(Path(results_dir) / (video_name + "_results.avi")) video_idx = dataset_videos_indices.get(video_name, None) if video_idx is not None: video_predictions = dataset_predictions[video_idx] video_labels = dataset_labels[video_idx] video_handler = VideoHandling(video_path, out_vid_path) # start reading video frames and predict video_handler.read_next_frame() video_still_active = video_handler.check_video_still_active() while video_still_active: current_frame_index = video_handler.get_current_frame_index() frame_pred = video_predictions[current_frame_index] frame_gt = video_labels[current_frame_index] video_handler.write_bb_to_frame(list(frame_pred), color=(0, 255, 255)) video_handler.write_bb_to_frame(list(frame_gt), color=(255, 0, 0)) video_handler.write_debug_frame() # read the next frame video_handler.read_next_frame() video_still_active = video_handler.check_video_still_active() video_handler.complete_video_writing() # write bb results to file for future offline analysis DataHelper.write_bb_predictions_to_file(video_path, results_dir, video_predictions)
def cater_setup_inference(model_name: str, results_dir: str, inference_config_path: str, model_config_path: str): with open(inference_config_path, "rb") as f: config: Dict[str, str] = json.load(f) with open(model_config_path, "rb") as f: model_config: Dict[str, int] = json.load(f) samples_dir = config["sample_dir"] labels_dir = config["labels_dir"] batch_size = int(config["batch_size"]) num_workers = int(config["num_workers"]) model_path = config["model_path"] device = torch.device(config["device"]) dataset: data.Dataset = DatasetsFactory.get_inference_dataset( model_name, samples_dir, labels_dir) data_loader = data.DataLoader(dataset, batch_size=batch_size, num_workers=num_workers) dataset_length = len(dataset) # load model model: nn.Module = ModelsFactory.get_model(model_name, model_config, model_path) # predict model results dataset_videos_indices: Dict[str, int] = {} dataset_predictions: List[np.ndarray] = [] current_sample_idx = 0 model.eval() model.to(device) with torch.no_grad(): frame_shapes = np.array([320, 240, 320, 240]) for batch_idx, sample in enumerate(data_loader): x, y, video_names = sample boxes, index_to_track_labels = x labels, _ = y current_batch_size = len(labels) boxes = boxes.to(device) output, _ = model(boxes) # output only prediction in for last frame output = output[:, -1, :] # move outputs to cpu and flatten output and labels batch_videos = { video_names[i]: i + current_sample_idx for i in range(current_batch_size) } batch_predictions = output.cpu().numpy().reshape(-1, 4) dataset_videos_indices.update(batch_videos) dataset_predictions.extend(batch_predictions) current_sample_idx += current_batch_size dataset_predictions = (np.array(dataset_predictions) * frame_shapes).reshape( (dataset_length, 4)).astype(np.int32) cx_cy_output = transform_xyxy_to_w_h(dataset_predictions) pred_classes = get_classes_predictions(cx_cy_output) results = {"video_names": [], "class_predictions": []} for video_name, video_index in dataset_videos_indices.items(): results["video_names"].append(f"{video_name}.avi") results["class_predictions"].append(pred_classes[video_index]) results_df = pd.DataFrame(results) results_file = f"{results_dir}/class_pred_results.csv" results_df.to_csv(results_file, index=False)
def training_main(model_name: str, train_config: Dict[str, Any], model_config: Dict[str, int]): # create train and dev datasets using the files specified in the training configuration train_samples_dir = train_config["train_sample_dir"] train_labels_dir = train_config["train_labels_dir"] train_containment_file = train_config["train_containment_file"] dev_samples_dir = train_config["dev_sample_dir"] dev_labels_dir = train_config["dev_labels_dir"] dev_containment_file = train_config["dev_containment_file"] train_dataset: data.Dataset = DatasetsFactory.get_training_dataset( model_name, train_samples_dir, train_labels_dir, train_containment_file) dev_dataset: data.Dataset = DatasetsFactory.get_training_dataset( model_name, dev_samples_dir, dev_labels_dir, dev_containment_file) # training hyper parameters and configuration batch_size = train_config["batch_size"] num_workers = train_config["num_workers"] num_epochs = train_config["num_epochs"] learning_rate = train_config["learning_rate"] print_batch_step = train_config["print_step"] inference_batch_size = train_config["inference_batch_size"] scheduler_patience = train_config["lr_scheduler_patience"] scheduler_factor = train_config["lr_scheduler_factor"] checkpoints_path = train_config["checkpoints_path"] device = torch.device(train_config["device"]) # consistency_rate = train_config["consistency_rate"] # model, loss and optimizer model: nn.Module = ModelsFactory.get_model(model_name, model_config) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=scheduler_factor, patience=scheduler_patience, verbose=True) loss_function = nn.L1Loss(reduction="none") # create data loaders train_config_dict = {"batch_size": batch_size, "num_workers": num_workers} inference_config_dict = { "batch_size": inference_batch_size, "num_workers": num_workers } training_loader = data.DataLoader(train_dataset, **train_config_dict) train_inference_loader = data.DataLoader(train_dataset, **inference_config_dict) dev_loader = data.DataLoader(dev_dataset, **inference_config_dict) # Start training model = model.to(device) highest_dev_iou: float = 0 train_start_time = time.time() for epoch in range(num_epochs): model.train(mode=True) epoch_num = epoch + 1 # loss statistics batches_running_loss = 0 batches_running_pred_loss = 0 batches_running_const_loss = 0 for batch_idx, sample in enumerate(training_loader, 1): x, y, _ = sample boxes, _ = x labels, mask = y boxes = boxes.to(device) labels, mask = labels.to(device), mask.to(device) optimizer.zero_grad() if model_name in DOUBLE_OUTPUT_MODELS: output, index_to_track_prediction = model(boxes) else: output = model(boxes) # prediction loss pred_loss = loss_function(output, labels) # consistency loss next_output_frames = output[:, 1:, :] current_output_frames = output[:, :-1, :] consistency_loss = torch.mean( torch.norm(next_output_frames - current_output_frames, p=2, dim=-1)) if model_name in NO_LABELS_MODELS: pred_loss = pred_loss * mask # mask contains only visible objects pred_loss = torch.mean(pred_loss) else: pred_loss = torch.mean(pred_loss) if model_name in NO_LABELS_MODELS: loss = pred_loss + 0.5 * consistency_loss else: loss = pred_loss batches_running_loss += loss.item() batches_running_pred_loss += pred_loss.item() batches_running_const_loss += consistency_loss.item() loss.backward() optimizer.step() # print inter epoch statistics if batch_idx % print_batch_step == 0: num_samples_seen = batch_idx * batch_size num_samples_total = len(train_dataset) epoch_complete_ratio = 100 * batch_idx / len(training_loader) average_running_loss = batches_running_loss / print_batch_step average_pred_loss = batches_running_pred_loss / print_batch_step average_consist_loss = batches_running_const_loss / print_batch_step time_since_beginning = int(time.time() - train_start_time) print( "Train Epoch: {} [{}/{} ({:.0f}%)]\t Average Loss: Total {:.4f}, Pred {:.4f} Consistent {:.4f} Training began {} seconds ago" .format(epoch_num, num_samples_seen, num_samples_total, epoch_complete_ratio, average_running_loss, average_pred_loss, average_consist_loss, time_since_beginning)) batches_running_loss = 0 batches_running_pred_loss = 0 batches_running_const_loss = 0 # end of epoch - compute mean iou over train and dev train_loss, train_miou, train_containment_miou = inference_and_iou_comp( model_name, model, device, train_inference_loader, len(train_dataset), loss_function) dev_loss, dev_miou, dev_containment_miou = inference_and_iou_comp( model_name, model, device, dev_loader, len(dev_dataset), loss_function) print( "Epoch {} Training Set: Loss {:.4f}, Mean IoU {:.6f}, Mask Mean Iou {:.6f}" .format(epoch_num, train_loss, train_miou, train_containment_miou)) print( "Epoch {} Dev Set: Loss {:.4f}, Mean IoU {:.6f}, Mask Mean Iou {:.6f}" .format(epoch_num, dev_loss, dev_miou, dev_containment_miou)) # learning rate scheduling scheduler.step(train_loss) # check if it is the best performing model so far and save it if dev_miou > highest_dev_iou: highest_dev_iou = dev_miou save_checkpoint(model, model_name, round(highest_dev_iou, 3), checkpoints_path)