def preprocess_video(process_args: List):

    # # load object detector
    # factory = ModelsFactory()
    # detector: CaterObjectDetector = factory.get_detector_model("object_detector", od_weights_path)
    # detector.load_model(device)
    #
    # # preform predictions for on the entire video and retrieve the results
    # bb_predictions, labels = output_video_predictions(video_path, detector, device)
    #
    # # save the predictions as pickle files
    # video_path = Path(video_path)
    # results_dir = Path(results_dir)
    # video_name = video_path.stem
    # output_path = results_dir / (video_name + ".pkl")
    # output_data = {"bb": bb_predictions, "labels": labels}
    # if (len(output_data["bb"]) == 300) and (len(output_data["labels"]) == 300):
    #     with open(output_path, "wb") as f:
    #         pickle.dump(output_data, f, pickle.HIGHEST_PROTOCOL)
    #
    #     print(f"Finished writing object detection outputs for video {video_name}")

    video_path, od_weights, results_dir = process_args
    print(video_path)
    # assign an available gpu for this process
    # num_possible_gpu_devices = torch.cuda.device_count()
    # assigned_gpu = multiprocessing.current_process().ident % num_possible_gpu_devices
    # assigned_gpu = multiprocessing.current_process().ident % 2 + 2
    # device = torch.device(f'cuda:{assigned_gpu}')
    device = torch.device(f'cuda:2')

    # load object detector
    factory = ModelsFactory()
    detector: CaterObjectDetector = factory.get_detector_model(
        "object_detector", od_weights)
    detector.load_model(device)

    # preform predictions for on the entire video and retrieve the results
    bb_predictions, labels = output_video_predictions(video_path, detector,
                                                      device)

    # save the predictions as pickle files
    video_path = Path(video_path)
    results_dir = Path(results_dir)
    video_name = video_path.stem
    output_path = results_dir / (video_name + ".pkl")
    output_data = {"bb": bb_predictions, "labels": labels}
    if (len(output_data["bb"]) == 300) and (len(output_data["labels"]) == 300):
        with open(output_path, "wb") as f:
            pickle.dump(output_data, f, pickle.HIGHEST_PROTOCOL)

        print(
            f"Finished writing object detection outputs for video {video_name}"
        )
예제 #2
0
def trackers_inference_main(model_type: str, results_dir: str,
                            config_path: str) -> None:

    # load configuration dict
    with open(config_path, "rb") as f:
        config: Dict[str, str] = json.load(f)

    # extract paths to video files for the experiment
    experiment_videos = get_experiment_videos(config)
    experiment_video_names = {
        str(Path(vid_path).stem): str(vid_path)
        for vid_path in experiment_videos
    }

    # define global parameters
    samples_dir = config["sample_dir"]
    labels_dir = config["labels_dir"]
    device = torch.device(config["device"]) if "device" in config else ""
    model_weights = config["model_weights"] if "model_weights" in config else ""

    # load a tracking model
    reasoner: AbstractReasoner = ModelsFactory.get_tracker_model(
        model_type, model_weights, device)

    for video_name, video_path in tqdm(experiment_video_names.items()):
        predictions_path = Path(samples_dir) / (video_name + ".pkl")
        labels_path = Path(labels_dir) / (video_name + "_bb.json")

        snitch_bb_prediction = track_and_predict(video_name, video_path,
                                                 reasoner, predictions_path,
                                                 labels_path, results_dir)
        DataHelper.write_bb_predictions_to_file(video_path, results_dir,
                                                snitch_bb_prediction)
예제 #3
0
def reasoning_inference_main(model_name: str, results_dir: str,
                             inference_config_path: str,
                             model_config_path: str):
    with open(inference_config_path, "rb") as f:
        config: Dict[str, str] = json.load(f)

    with open(model_config_path, "rb") as f:
        model_config: Dict[str, int] = json.load(f)

    samples_dir = config["sample_dir"]
    labels_dir = config["labels_dir"]
    batch_size = int(config["batch_size"])
    num_workers = int(config["num_workers"])
    model_path = config["model_path"]
    device = torch.device(config["device"])

    dataset: data.Dataset = DatasetsFactory.get_inference_dataset(
        model_name, samples_dir, labels_dir)
    data_loader = data.DataLoader(dataset,
                                  batch_size=batch_size,
                                  num_workers=num_workers)
    dataset_length = len(dataset)

    # load model
    model: nn.Module = ModelsFactory.get_model(model_name, model_config,
                                               model_path)

    # predict model results
    dataset_videos_indices: Dict[str, int] = {}
    dataset_predictions: List[np.ndarray] = []
    dataset_labels: List[np.ndarray] = []
    current_sample_idx = 0

    model.eval()
    model.to(device)
    with torch.no_grad():
        frame_shapes = np.array([320, 240, 320, 240])

        for batch_idx, sample in enumerate(data_loader):

            x, y, video_names = sample
            boxes, index_to_track_labels = x
            labels, _ = y
            current_batch_size = len(labels)

            boxes = boxes.to(device)

            if model_name in DOUBLE_OUTPUT_MODELS:
                output, index_to_track_prediction = model(boxes)

            else:
                output = model(boxes)

            # move outputs to cpu and flatten output and labels
            batch_videos = {
                video_names[i]: i + current_sample_idx
                for i in range(current_batch_size)
            }
            batch_predictions = output.cpu().numpy().reshape(-1, 4)
            batch_labels = labels.numpy().reshape(-1, 4)

            dataset_videos_indices.update(batch_videos)
            dataset_predictions.extend(batch_predictions)
            dataset_labels.extend(batch_labels)
            current_sample_idx += current_batch_size

    dataset_predictions = (np.array(dataset_predictions) *
                           frame_shapes).reshape(
                               (dataset_length, 300, 4)).astype(np.int32)
    dataset_labels = (np.array(dataset_labels) * frame_shapes).reshape(
        (dataset_length, 300, 4)).astype(np.int32)

    # extract paths to video files for the experiment
    experiment_videos = get_experiment_videos(config)
    experiment_video_names = {
        str(Path(vid_path).stem): str(vid_path)
        for vid_path in experiment_videos
    }

    # write debug videos
    for video_name, video_path in tqdm(experiment_video_names.items()):
        out_vid_path = str(Path(results_dir) / (video_name + "_results.avi"))
        video_idx = dataset_videos_indices.get(video_name, None)

        if video_idx is not None:
            video_predictions = dataset_predictions[video_idx]
            video_labels = dataset_labels[video_idx]

            video_handler = VideoHandling(video_path, out_vid_path)

            # start reading video frames and predict
            video_handler.read_next_frame()
            video_still_active = video_handler.check_video_still_active()

            while video_still_active:
                current_frame_index = video_handler.get_current_frame_index()
                frame_pred = video_predictions[current_frame_index]
                frame_gt = video_labels[current_frame_index]

                video_handler.write_bb_to_frame(list(frame_pred),
                                                color=(0, 255, 255))
                video_handler.write_bb_to_frame(list(frame_gt),
                                                color=(255, 0, 0))
                video_handler.write_debug_frame()

                # read the next frame
                video_handler.read_next_frame()
                video_still_active = video_handler.check_video_still_active()

            video_handler.complete_video_writing()

            # write bb results to file for future offline analysis
            DataHelper.write_bb_predictions_to_file(video_path, results_dir,
                                                    video_predictions)
def cater_setup_inference(model_name: str, results_dir: str,
                          inference_config_path: str, model_config_path: str):
    with open(inference_config_path, "rb") as f:
        config: Dict[str, str] = json.load(f)

    with open(model_config_path, "rb") as f:
        model_config: Dict[str, int] = json.load(f)

    samples_dir = config["sample_dir"]
    labels_dir = config["labels_dir"]
    batch_size = int(config["batch_size"])
    num_workers = int(config["num_workers"])
    model_path = config["model_path"]
    device = torch.device(config["device"])

    dataset: data.Dataset = DatasetsFactory.get_inference_dataset(
        model_name, samples_dir, labels_dir)
    data_loader = data.DataLoader(dataset,
                                  batch_size=batch_size,
                                  num_workers=num_workers)
    dataset_length = len(dataset)

    # load model
    model: nn.Module = ModelsFactory.get_model(model_name, model_config,
                                               model_path)

    # predict model results
    dataset_videos_indices: Dict[str, int] = {}
    dataset_predictions: List[np.ndarray] = []
    current_sample_idx = 0

    model.eval()
    model.to(device)
    with torch.no_grad():
        frame_shapes = np.array([320, 240, 320, 240])

        for batch_idx, sample in enumerate(data_loader):

            x, y, video_names = sample
            boxes, index_to_track_labels = x
            labels, _ = y
            current_batch_size = len(labels)

            boxes = boxes.to(device)

            output, _ = model(boxes)

            # output only prediction in for last frame
            output = output[:, -1, :]

            # move outputs to cpu and flatten output and labels
            batch_videos = {
                video_names[i]: i + current_sample_idx
                for i in range(current_batch_size)
            }
            batch_predictions = output.cpu().numpy().reshape(-1, 4)

            dataset_videos_indices.update(batch_videos)
            dataset_predictions.extend(batch_predictions)
            current_sample_idx += current_batch_size

    dataset_predictions = (np.array(dataset_predictions) *
                           frame_shapes).reshape(
                               (dataset_length, 4)).astype(np.int32)
    cx_cy_output = transform_xyxy_to_w_h(dataset_predictions)
    pred_classes = get_classes_predictions(cx_cy_output)

    results = {"video_names": [], "class_predictions": []}

    for video_name, video_index in dataset_videos_indices.items():
        results["video_names"].append(f"{video_name}.avi")
        results["class_predictions"].append(pred_classes[video_index])

    results_df = pd.DataFrame(results)
    results_file = f"{results_dir}/class_pred_results.csv"
    results_df.to_csv(results_file, index=False)
예제 #5
0
def training_main(model_name: str, train_config: Dict[str, Any],
                  model_config: Dict[str, int]):

    # create train and dev datasets using the files specified in the training configuration
    train_samples_dir = train_config["train_sample_dir"]
    train_labels_dir = train_config["train_labels_dir"]
    train_containment_file = train_config["train_containment_file"]

    dev_samples_dir = train_config["dev_sample_dir"]
    dev_labels_dir = train_config["dev_labels_dir"]
    dev_containment_file = train_config["dev_containment_file"]

    train_dataset: data.Dataset = DatasetsFactory.get_training_dataset(
        model_name, train_samples_dir, train_labels_dir,
        train_containment_file)
    dev_dataset: data.Dataset = DatasetsFactory.get_training_dataset(
        model_name, dev_samples_dir, dev_labels_dir, dev_containment_file)

    # training hyper parameters and configuration
    batch_size = train_config["batch_size"]
    num_workers = train_config["num_workers"]
    num_epochs = train_config["num_epochs"]
    learning_rate = train_config["learning_rate"]
    print_batch_step = train_config["print_step"]
    inference_batch_size = train_config["inference_batch_size"]
    scheduler_patience = train_config["lr_scheduler_patience"]
    scheduler_factor = train_config["lr_scheduler_factor"]
    checkpoints_path = train_config["checkpoints_path"]
    device = torch.device(train_config["device"])
    # consistency_rate = train_config["consistency_rate"]

    # model, loss and optimizer
    model: nn.Module = ModelsFactory.get_model(model_name, model_config)

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = ReduceLROnPlateau(optimizer,
                                  mode='min',
                                  factor=scheduler_factor,
                                  patience=scheduler_patience,
                                  verbose=True)
    loss_function = nn.L1Loss(reduction="none")

    # create data loaders
    train_config_dict = {"batch_size": batch_size, "num_workers": num_workers}
    inference_config_dict = {
        "batch_size": inference_batch_size,
        "num_workers": num_workers
    }
    training_loader = data.DataLoader(train_dataset, **train_config_dict)
    train_inference_loader = data.DataLoader(train_dataset,
                                             **inference_config_dict)
    dev_loader = data.DataLoader(dev_dataset, **inference_config_dict)

    # Start training
    model = model.to(device)
    highest_dev_iou: float = 0
    train_start_time = time.time()

    for epoch in range(num_epochs):
        model.train(mode=True)
        epoch_num = epoch + 1

        # loss statistics
        batches_running_loss = 0
        batches_running_pred_loss = 0
        batches_running_const_loss = 0

        for batch_idx, sample in enumerate(training_loader, 1):

            x, y, _ = sample
            boxes, _ = x
            labels, mask = y
            boxes = boxes.to(device)
            labels, mask = labels.to(device), mask.to(device)

            optimizer.zero_grad()

            if model_name in DOUBLE_OUTPUT_MODELS:
                output, index_to_track_prediction = model(boxes)

            else:
                output = model(boxes)

            # prediction loss
            pred_loss = loss_function(output, labels)

            # consistency loss
            next_output_frames = output[:, 1:, :]
            current_output_frames = output[:, :-1, :]
            consistency_loss = torch.mean(
                torch.norm(next_output_frames - current_output_frames,
                           p=2,
                           dim=-1))

            if model_name in NO_LABELS_MODELS:
                pred_loss = pred_loss * mask  # mask contains only visible objects
                pred_loss = torch.mean(pred_loss)

            else:
                pred_loss = torch.mean(pred_loss)

            if model_name in NO_LABELS_MODELS:
                loss = pred_loss + 0.5 * consistency_loss

            else:
                loss = pred_loss

            batches_running_loss += loss.item()
            batches_running_pred_loss += pred_loss.item()
            batches_running_const_loss += consistency_loss.item()

            loss.backward()
            optimizer.step()

            # print inter epoch statistics
            if batch_idx % print_batch_step == 0:

                num_samples_seen = batch_idx * batch_size
                num_samples_total = len(train_dataset)
                epoch_complete_ratio = 100 * batch_idx / len(training_loader)
                average_running_loss = batches_running_loss / print_batch_step
                average_pred_loss = batches_running_pred_loss / print_batch_step
                average_consist_loss = batches_running_const_loss / print_batch_step
                time_since_beginning = int(time.time() - train_start_time)

                print(
                    "Train Epoch: {} [{}/{} ({:.0f}%)]\t Average Loss: Total {:.4f}, Pred {:.4f} Consistent {:.4f} Training began {} seconds ago"
                    .format(epoch_num, num_samples_seen, num_samples_total,
                            epoch_complete_ratio, average_running_loss,
                            average_pred_loss, average_consist_loss,
                            time_since_beginning))

                batches_running_loss = 0
                batches_running_pred_loss = 0
                batches_running_const_loss = 0

        # end of epoch - compute mean iou over train and dev
        train_loss, train_miou, train_containment_miou = inference_and_iou_comp(
            model_name, model, device, train_inference_loader,
            len(train_dataset), loss_function)
        dev_loss, dev_miou, dev_containment_miou = inference_and_iou_comp(
            model_name, model, device, dev_loader, len(dev_dataset),
            loss_function)

        print(
            "Epoch {} Training Set: Loss {:.4f}, Mean IoU {:.6f}, Mask Mean Iou {:.6f}"
            .format(epoch_num, train_loss, train_miou, train_containment_miou))
        print(
            "Epoch {} Dev Set: Loss {:.4f}, Mean IoU {:.6f}, Mask Mean Iou {:.6f}"
            .format(epoch_num, dev_loss, dev_miou, dev_containment_miou))

        # learning rate scheduling
        scheduler.step(train_loss)

        # check if it is the best performing model so far and save it
        if dev_miou > highest_dev_iou:
            highest_dev_iou = dev_miou
            save_checkpoint(model, model_name, round(highest_dev_iou, 3),
                            checkpoints_path)