Esempio n. 1
0
 def saveFrames(indices, label):
     best_rgb = rgb_frame_seq[indices]
     imageprocessing.displayImages(
         *best_rgb,
         num_rows=1,
         file_path=os.path.join(fig_dir,
                                f'{trial_str}_best-frames-{label}.png'))
Esempio n. 2
0
    def plotBatches(self,
                    io_batches,
                    fig_dir,
                    dataset=None,
                    images_per_fig=None):
        for i, batch in enumerate(io_batches):
            preds, scores, inputs, labels, seq_id = batch

            batch_size = preds.shape[0]

            inputs = np.moveaxis(inputs.cpu().numpy(), -3, -1)
            inputs[inputs > 1] = 1
            flat_inputs = np.stack(tuple(
                np.hstack(tuple(c for c in crops)) for crops in inputs),
                                   axis=0)

            scores = scores.view(batch_size,
                                 *dataset.target_shape).cpu().numpy()
            preds = preds.view(batch_size, *dataset.target_shape).cpu().numpy()
            labels = labels.view(batch_size,
                                 *dataset.target_shape).cpu().numpy()

            imageprocessing.displayImages(*flat_inputs,
                                          *scores,
                                          *preds,
                                          *labels,
                                          num_rows=4,
                                          file_path=os.path.join(
                                              fig_dir, f"batch={i}.png"))
Esempio n. 3
0
def plot_topk(model, test_io_history, num_disp_imgs, file_path):
    inputs = np.moveaxis(
        torch.cat(tuple(batches[2][0] for batches in test_io_history)).numpy(),
        1, -1)
    outputs = torch.cat(tuple(batches[1][0] for batches in test_io_history))

    if inputs.shape[0] > num_disp_imgs:
        idxs = np.arange(inputs.shape[0])
        np.random.shuffle(idxs)
        idxs = np.sort(idxs[:num_disp_imgs])
    else:
        idxs = slice(None, None, None)

    inputs = inputs[idxs]
    outputs = outputs[idxs]

    def make_templates(preds):
        pred_templates = np.moveaxis(model.templates[preds, 0].numpy(), 1, -1)
        pred_templates[pred_templates > 1] = 1
        return pred_templates

    k = 5
    __, topk_preds = torch.topk(outputs, k, dim=-1)
    topk_preds = topk_preds.transpose(0, 1).contiguous().view(-1)
    topk_templates = make_templates(topk_preds)

    imageprocessing.displayImages(*inputs,
                                  *topk_templates,
                                  num_rows=1 + k,
                                  file_path=file_path)
Esempio n. 4
0
def viz_model_params(model, templates_dir):
    templates = model.templates.cpu().numpy()
    # FIXME: SOME RENDERED IMAGES HAVE CHANNEL VALUES > 1.0
    templates[templates > 1] = 1

    for i, assembly_templates in enumerate(templates):
        imageprocessing.displayImages(*assembly_templates,
                                      num_rows=6,
                                      figsize=(15, 15),
                                      file_path=os.path.join(
                                          templates_dir, f"{i}.png"))
Esempio n. 5
0
 def saveFrames(indices, label):
     best_rgb = rgb_frame_seq[indices]
     best_seg = segment_frame_seq[indices]
     rgb_quantized = np.stack(
         tuple(
             videoprocessing.quantizeImage(keyframe_model, rgb_img,
                                           segment_img)
             for rgb_img, segment_img in zip(best_rgb, best_seg)))
     imageprocessing.displayImages(
         *best_rgb,
         *best_seg,
         *rgb_quantized,
         num_rows=3,
         file_path=os.path.join(fig_dir,
                                f'{trial_str}_best-frames-{label}.png'))
Esempio n. 6
0
    def plotBatches(self, io_batches, fig_dir, dataset=None):
        for i, batch in enumerate(io_batches):
            preds, scores, inputs, labels, seq_id = batch

            inputs = np.moveaxis(inputs.cpu().numpy(), 1, -1)
            inputs[inputs > 1] = 1

            scores = np.moveaxis(scores.cpu().numpy(), 1, -1)
            scores[scores > 1] = 1

            imageprocessing.displayImages(*inputs,
                                          *scores,
                                          num_rows=2,
                                          file_path=os.path.join(
                                              fig_dir, f"{i}.png"))
Esempio n. 7
0
    def viz(self, inputs, outputs):
        self._index += 1
        i = self._index

        inputs = np.moveaxis(inputs.cpu().numpy(), 1, -1)
        inputs[inputs > 1] = 1

        outputs = np.moveaxis(outputs.detach().cpu().numpy(), 1, -1)
        outputs[outputs > 1] = 1

        imageprocessing.displayImages(*inputs,
                                      *outputs,
                                      num_rows=2,
                                      file_path=os.path.join(
                                          self._debug_fig_dir, f"{i}.png"))
Esempio n. 8
0
    def plotBatches(self,
                    io_batches,
                    fig_dir,
                    dataset=None,
                    images_per_fig=None):
        for i, batch in enumerate(io_batches):
            preds, scores, inputs, labels, seq_id = batch
            num_batch = preds.shape[0]

            if images_per_fig is None:
                images_per_fig = num_batch

            num_batches = math.ceil(num_batch / images_per_fig)
            for j in range(num_batches):
                start = j * num_batches
                end = start + images_per_fig

                b_scores = scores[start:end]
                b_preds = preds[start:end]
                b_labels = labels[start:end]
                b_inputs = inputs[start:end]

                b_size = b_scores.shape[0]
                if not b_size:
                    continue

                b_inputs = np.moveaxis(b_inputs.cpu().numpy(), 1, -1)
                b_inputs[b_inputs > 1] = 1

                b_scores = b_scores.view(b_size,
                                         *dataset.target_shape).cpu().numpy()
                b_preds = b_preds.view(b_size,
                                       *dataset.target_shape).cpu().numpy()
                b_labels = b_labels.view(b_size,
                                         *dataset.target_shape).cpu().numpy()

                imageprocessing.displayImages(*b_inputs,
                                              *b_scores,
                                              *b_preds,
                                              *b_labels,
                                              num_rows=4,
                                              file_path=os.path.join(
                                                  fig_dir,
                                                  f"batch({i},{j}).png"))
def main(out_dir=None,
         data_dir=None,
         preprocess_dir=None,
         classifier_fn=None,
         display_summary_img=None,
         write_video=None,
         start_from=None,
         stop_after=None):

    if start_from is None:
        start_from = 0

    if stop_after is None:
        stop_after = float("Inf")

    data_dir = os.path.expanduser(data_dir)
    preprocess_dir = os.path.expanduser(preprocess_dir)
    out_dir = os.path.expanduser(out_dir)
    classifier_fn = os.path.expanduser(classifier_fn)

    fig_dir = os.path.join(out_dir, 'figures')
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)

    out_data_dir = os.path.join(out_dir, 'data')
    if not os.path.exists(out_data_dir):
        os.makedirs(out_data_dir)

    def loadFromDataDir(var_name):
        return joblib.load(os.path.join(data_dir, f'{var_name}.pkl'))

    def loadFromPreprocessDir(var_name):
        return joblib.load(os.path.join(preprocess_dir, f'{var_name}.pkl'))

    def saveVariable(var, var_name):
        joblib.dump(var, os.path.join(out_data_dir, f'{var_name}.pkl'))

    classifier = joblib.load(classifier_fn)

    trial_ids = getUniqueTrialIds(preprocess_dir)
    for i, trial_id in enumerate(trial_ids):

        if i < start_from:
            continue

        if i > stop_after:
            break

        trial_str = f"trial-{trial_id}"

        logger.info(
            f"Processing video {i + 1} / {len(trial_ids)}  (trial {trial_id})")
        rgb_frame_seq = loadFromDataDir(f"{trial_str}_rgb-frame-seq")
        # depth_frame_seq = loadFromDataDir(f"{trial_str}_depth-frame-seq")
        # foreground_mask_seq = loadFromPreprocessDir(f'{trial_str}_foreground-mask-seq')
        segment_frame_seq = loadFromPreprocessDir(
            f'{trial_str}_segment-frame-seq')
        # block_segment_frame_seq = loadFromDetectionsDir(f'{trial_str}_block-segment-frame-seq')
        # skin_segment_frame_seq = loadFromDetectionsDir(f'{trial_str}_skin-segment-frame-seq')
        # color_label_frame_seq = loadFromDetectionsDir(f'{trial_str}_color-label-frame-seq')
        # class_label_frame_seq = loadFromDetectionsDir(f'{trial_str}_class-label-frame-seq')

        segment_features_seq, feature_frame_seq = utils.batchProcess(
            extractSegmentFeatures,
            rgb_frame_seq,
            segment_frame_seq,
            static_args=(classifier, ),
            unzip=True)

        saveVariable(segment_features_seq, f'{trial_str}_segment-features-seq')

        if display_summary_img:
            if utils.in_ipython_console():
                file_path = None
            else:
                trial_str = f"trial-{trial_id}"
                file_path = os.path.join(fig_dir,
                                         f'{trial_str}_best-frames.png')
            imageprocessing.displayImages(*rgb_frame_seq,
                                          *feature_frame_seq,
                                          num_rows=2,
                                          file_path=file_path)

        if write_video:
            video_dir = os.path.join(out_dir, 'detection-videos')
            if not os.path.exists(video_dir):
                os.makedirs(video_dir)
            fn = os.path.join(video_dir, f"{trial_str}.gif")
            writer = imageio.get_writer(fn, mode='I')
            for rgb_frame, feature_frame in zip(rgb_frame_seq,
                                                feature_frame_seq):
                feature_frame = feature_frame.astype(float)
                max_val = feature_frame.max()
                if max_val:
                    feature_frame = feature_frame / max_val
                feature_frame = np.stack((feature_frame, ) * 3, axis=-1)
                rgb_frame = img_as_float(rgb_frame)
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    image = img_as_ubyte(np.hstack((rgb_frame, feature_frame)))
                writer.append_data(image)
            writer.close()
Esempio n. 10
0
def main(out_dir=None,
         data_dir=None,
         segs_dir=None,
         scores_dir=None,
         vocab_dir=None,
         label_type='edges',
         gpu_dev_id=None,
         start_from=None,
         stop_at=None,
         num_disp_imgs=None,
         results_file=None,
         sweep_param_name=None,
         model_params={},
         cv_params={}):

    data_dir = os.path.expanduser(data_dir)
    segs_dir = os.path.expanduser(segs_dir)
    scores_dir = os.path.expanduser(scores_dir)
    vocab_dir = os.path.expanduser(vocab_dir)
    out_dir = os.path.expanduser(out_dir)
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt'))

    if results_file is None:
        results_file = os.path.join(out_dir, 'results.csv')
    else:
        results_file = os.path.expanduser(results_file)

    fig_dir = os.path.join(out_dir, 'figures')
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)

    io_dir_images = os.path.join(fig_dir, 'model-io_images')
    if not os.path.exists(io_dir_images):
        os.makedirs(io_dir_images)

    io_dir_plots = os.path.join(fig_dir, 'model-io_plots')
    if not os.path.exists(io_dir_plots):
        os.makedirs(io_dir_plots)

    out_data_dir = os.path.join(out_dir, 'data')
    if not os.path.exists(out_data_dir):
        os.makedirs(out_data_dir)

    seq_ids = utils.getUniqueIds(scores_dir,
                                 prefix='trial=',
                                 suffix='score-seq.*',
                                 to_array=True)

    logger.info(
        f"Loaded scores for {len(seq_ids)} sequences from {scores_dir}")

    link_vocab = {}
    joint_vocab = {}
    joint_type_vocab = {}
    vocab, parts_vocab, part_labels = load_vocab(link_vocab, joint_vocab,
                                                 joint_type_vocab, vocab_dir)
    pred_vocab = []  # FIXME

    if label_type == 'assembly':
        logger.info("Converting assemblies -> edges")
        state_pred_seqs = tuple(
            utils.loadVariable(f"trial={seq_id}_pred-label-seq", scores_dir)
            for seq_id in seq_ids)
        state_true_seqs = tuple(
            utils.loadVariable(f"trial={seq_id}_true-label-seq", scores_dir)
            for seq_id in seq_ids)
        edge_pred_seqs = tuple(part_labels[seq] for seq in state_pred_seqs)
        edge_true_seqs = tuple(part_labels[seq] for seq in state_true_seqs)
    elif label_type == 'edge':
        logger.info("Converting edges -> assemblies (will take a few minutes)")
        edge_pred_seqs = tuple(
            utils.loadVariable(f"trial={seq_id}_pred-label-seq", scores_dir)
            for seq_id in seq_ids)
        edge_true_seqs = tuple(
            utils.loadVariable(f"trial={seq_id}_true-label-seq", scores_dir)
            for seq_id in seq_ids)
        state_pred_seqs = tuple(
            edges_to_assemblies(seq, pred_vocab, parts_vocab, part_labels)
            for seq in edge_pred_seqs)
        state_true_seqs = tuple(
            edges_to_assemblies(seq, vocab, parts_vocab, part_labels)
            for seq in edge_true_seqs)

    device = torchutils.selectDevice(gpu_dev_id)
    dataset = sim2real.LabeledConnectionDataset(
        utils.loadVariable('parts-vocab', vocab_dir),
        utils.loadVariable('part-labels', vocab_dir),
        utils.loadVariable('vocab', vocab_dir),
        device=device)

    all_metrics = collections.defaultdict(list)

    # Define cross-validation folds
    cv_folds = utils.makeDataSplits(len(seq_ids), **cv_params)
    utils.saveVariable(cv_folds, 'cv-folds', out_data_dir)

    for cv_index, cv_fold in enumerate(cv_folds):
        train_indices, val_indices, test_indices = cv_fold
        logger.info(
            f"CV FOLD {cv_index + 1} / {len(cv_folds)}: "
            f"{len(train_indices)} train, {len(val_indices)} val, {len(test_indices)} test"
        )

        train_states = np.hstack(
            tuple(state_true_seqs[i] for i in (train_indices)))
        train_edges = part_labels[train_states]
        # state_train_vocab = np.unique(train_states)
        # edge_train_vocab = part_labels[state_train_vocab]
        train_freq_bigram, train_freq_unigram = edge_joint_freqs(train_edges)
        # state_probs = utils.makeHistogram(len(vocab), train_states, normalize=True)

        test_states = np.hstack(
            tuple(state_true_seqs[i] for i in (test_indices)))
        test_edges = part_labels[test_states]
        # state_test_vocab = np.unique(test_states)
        # edge_test_vocab = part_labels[state_test_vocab]
        test_freq_bigram, test_freq_unigram = edge_joint_freqs(test_edges)

        f, axes = plt.subplots(1, 2)
        axes[0].matshow(train_freq_bigram)
        axes[0].set_title('Train')
        axes[1].matshow(test_freq_bigram)
        axes[1].set_title('Test')
        plt.tight_layout()
        plt.savefig(
            os.path.join(fig_dir, f"edge-freqs-bigram_cvfold={cv_index}.png"))

        f, axis = plt.subplots(1)
        axis.stem(train_freq_unigram,
                  label='Train',
                  linefmt='C0-',
                  markerfmt='C0o')
        axis.stem(test_freq_unigram,
                  label='Test',
                  linefmt='C1--',
                  markerfmt='C1o')
        plt.legend()
        plt.tight_layout()
        plt.savefig(
            os.path.join(fig_dir, f"edge-freqs-unigram_cvfold={cv_index}.png"))

        for i in test_indices:
            seq_id = seq_ids[i]
            logger.info(f"  Processing sequence {seq_id}...")

            trial_prefix = f"trial={seq_id}"
            # I include the '.' to differentiate between 'rgb-frame-seq' and
            # 'rgb-frame-seq-before-first-touch'
            # rgb_seq = utils.loadVariable(f"{trial_prefix}_rgb-frame-seq.", data_dir)
            # seg_seq = utils.loadVariable(f"{trial_prefix}_seg-labels-seq", segs_dir)
            score_seq = utils.loadVariable(f"{trial_prefix}_score-seq",
                                           scores_dir)
            # if score_seq.shape[0] != rgb_seq.shape[0]:
            #     err_str = f"scores shape {score_seq.shape} != data shape {rgb_seq.shape}"
            #     raise AssertionError(err_str)

            edge_pred_seq = edge_pred_seqs[i]
            edge_true_seq = edge_true_seqs[i]
            state_pred_seq = state_pred_seqs[i]
            state_true_seq = state_true_seqs[i]

            num_types = np.unique(state_pred_seq).shape[0]
            num_samples = state_pred_seq.shape[0]
            num_total = len(pred_vocab)
            logger.info(
                f"    {num_types} assemblies predicted ({num_total} total); "
                f"{num_samples} samples")

            # edge_freq_bigram, edge_freq_unigram = edge_joint_freqs(edge_true_seq)
            # dist_shift = np.linalg.norm(train_freq_unigram - edge_freq_unigram)
            metric_dict = {
                # 'State OOV rate': oov_rate_state(state_true_seq, state_train_vocab),
                # 'Edge OOV rate': oov_rate_edges(edge_true_seq, edge_train_vocab),
                # 'State avg prob, true': state_probs[state_true_seq].mean(),
                # 'State avg prob, pred': state_probs[state_pred_seq].mean(),
                # 'Edge distribution shift': dist_shift
            }
            metric_dict = eval_edge_metrics(edge_pred_seq,
                                            edge_true_seq,
                                            append_to=metric_dict)
            metric_dict = eval_state_metrics(state_pred_seq,
                                             state_true_seq,
                                             append_to=metric_dict)
            for name, value in metric_dict.items():
                logger.info(f"    {name}: {value * 100:.2f}%")
                all_metrics[name].append(value)

            utils.writeResults(results_file, metric_dict, sweep_param_name,
                               model_params)

            if num_disp_imgs is not None:
                pred_images = tuple(
                    render(dataset, vocab[seg_label])
                    for seg_label in utils.computeSegments(state_pred_seq)[0])
                imageprocessing.displayImages(
                    *pred_images,
                    file_path=os.path.join(
                        io_dir_images,
                        f"seq={seq_id:03d}_pred-assemblies.png"),
                    num_rows=None,
                    num_cols=5)
                true_images = tuple(
                    render(dataset, vocab[seg_label])
                    for seg_label in utils.computeSegments(state_true_seq)[0])
                imageprocessing.displayImages(
                    *true_images,
                    file_path=os.path.join(
                        io_dir_images,
                        f"seq={seq_id:03d}_true-assemblies.png"),
                    num_rows=None,
                    num_cols=5)

                utils.plot_array(score_seq.T,
                                 (edge_true_seq.T, edge_pred_seq.T),
                                 ('true', 'pred'),
                                 fn=os.path.join(io_dir_plots,
                                                 f"seq={seq_id:03d}.png"))
Esempio n. 11
0
def main(out_dir=None,
         data_dir=None,
         person_masks_dir=None,
         bg_masks_dir=None,
         sat_thresh=1,
         start_from=None,
         stop_at=None,
         num_disp_imgs=None):

    out_dir = os.path.expanduser(out_dir)
    data_dir = os.path.expanduser(data_dir)
    person_masks_dir = os.path.expanduser(person_masks_dir)
    bg_masks_dir = os.path.expanduser(bg_masks_dir)

    logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt'))

    fig_dir = os.path.join(out_dir, 'figures')
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)

    out_data_dir = os.path.join(out_dir, 'data')
    if not os.path.exists(out_data_dir):
        os.makedirs(out_data_dir)

    def loadFromDir(var_name, dir_name):
        return joblib.load(os.path.join(dir_name, f"{var_name}.pkl"))

    def saveToWorkingDir(var, var_name):
        joblib.dump(var, os.path.join(out_data_dir, f"{var_name}.pkl"))

    trial_ids = utils.getUniqueIds(data_dir, prefix='trial=', to_array=True)

    for seq_idx, trial_id in enumerate(trial_ids):

        if start_from is not None and seq_idx < start_from:
            continue

        if stop_at is not None and seq_idx > stop_at:
            break

        trial_str = f"trial={trial_id}"

        logger.info(
            f"Processing video {seq_idx + 1} / {len(trial_ids)}  (trial {trial_id})"
        )

        logger.info("  Loading data...")
        rgb_frame_seq = loadFromDir(f'{trial_str}_rgb-frame-seq', data_dir)
        person_mask_seq = loadFromDir(f'{trial_str}_person-mask-seq',
                                      person_masks_dir)
        bg_mask_seq_depth = loadFromDir(f'{trial_str}_bg-mask-seq-depth',
                                        bg_masks_dir)
        # bg_mask_seq_rgb = loadFromDir(f'{trial_str}_bg-mask-seq-rgb', bg_masks_dir)

        logger.info("  Making segment labels...")
        fg_mask_seq = ~bg_mask_seq_depth
        seg_labels_seq = np.stack(tuple(
            map(makeCoarseSegmentLabels, fg_mask_seq)),
                                  axis=0)

        hsv_frame_seq = np.stack(tuple(map(makeHsvFrame, rgb_frame_seq)),
                                 axis=0)
        sat_frame_seq = hsv_frame_seq[..., 1]
        bg_mask_seq_sat = sat_frame_seq < sat_thresh

        seg_labels_seq[person_mask_seq] = 0
        seg_labels_seq = np.stack(tuple(
            makeFineSegmentLabels(segs, sat)
            for segs, sat in zip(seg_labels_seq, bg_mask_seq_sat)),
                                  axis=0)

        logger.info("  Saving output...")
        saveToWorkingDir(seg_labels_seq.astype(np.uint8),
                         f'{trial_str}_seg-labels-seq')

        plotHsvHist(hsv_frame_seq,
                    seg_labels_seq,
                    file_path=os.path.join(fig_dir,
                                           f'{trial_str}_hsv-hists.png'))

        if num_disp_imgs is not None:
            if rgb_frame_seq.shape[0] > num_disp_imgs:
                idxs = np.arange(rgb_frame_seq.shape[0])
                np.random.shuffle(idxs)
                idxs = idxs[:num_disp_imgs]
            else:
                idxs = slice(None, None, None)
            imageprocessing.displayImages(*(rgb_frame_seq[idxs]),
                                          *(bg_mask_seq_sat[idxs]),
                                          *(bg_mask_seq_depth[idxs]),
                                          *(person_mask_seq[idxs]),
                                          *(seg_labels_seq[idxs]),
                                          num_rows=5,
                                          file_path=os.path.join(
                                              fig_dir,
                                              f'{trial_str}_best-frames.png'))
Esempio n. 12
0
def main(out_dir=None,
         data_dir=None,
         background_data_dir=None,
         learn_bg_model=False,
         gpu_dev_id=None,
         start_from=None,
         stop_at=None,
         num_disp_imgs=None,
         depth_bg_detection_kwargs={},
         rgb_bg_detection_kwargs={}):

    out_dir = os.path.expanduser(out_dir)
    data_dir = os.path.expanduser(data_dir)
    background_data_dir = os.path.expanduser(background_data_dir)

    logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt'))

    fig_dir = os.path.join(out_dir, 'figures')
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)

    out_data_dir = os.path.join(out_dir, 'data')
    if not os.path.exists(out_data_dir):
        os.makedirs(out_data_dir)

    def loadFromDir(var_name, dir_name):
        return joblib.load(os.path.join(dir_name, f"{var_name}.pkl"))

    def saveToWorkingDir(var, var_name):
        joblib.dump(var, os.path.join(out_data_dir, f"{var_name}.pkl"))

    trial_ids = utils.getUniqueIds(data_dir, prefix='trial=', to_array=True)

    device = torchutils.selectDevice(gpu_dev_id)

    camera_pose = render.camera_pose
    camera_params = render.intrinsic_matrix

    for seq_idx, trial_id in enumerate(trial_ids):

        if start_from is not None and seq_idx < start_from:
            continue

        if stop_at is not None and seq_idx > stop_at:
            break

        trial_str = f"trial={trial_id}"

        logger.info(
            f"Processing video {seq_idx + 1} / {len(trial_ids)}  (trial {trial_id})"
        )

        logger.info("  Loading data...")
        try:
            rgb_frame_seq = loadFromDir(f"{trial_str}_rgb-frame-seq", data_dir)
            depth_frame_seq = loadFromDir(f"{trial_str}_depth-frame-seq",
                                          data_dir)
            rgb_train = loadFromDir(
                f"{trial_str}_rgb-frame-seq-before-first-touch",
                background_data_dir)
            depth_train = loadFromDir(
                f"{trial_str}_depth-frame-seq-before-first-touch",
                background_data_dir)

            if isinstance(depth_train, tuple) and isinstance(
                    depth_frame_seq, tuple):
                logger.info("  Skipping video: depth frames missing")
                continue

            rgb_frame_seq = np.stack(tuple(
                skimage.img_as_float(f) for f in rgb_frame_seq),
                                     axis=0)
            rgb_train = np.stack(tuple(
                skimage.img_as_float(f) for f in rgb_train),
                                 axis=0)

        except FileNotFoundError as e:
            logger.info(e)
            continue

        logger.info("  Removing background...")

        try:
            bg_mask_depth_train = loadFromDir(
                f'{trial_str}_bg-mask-depth-train', out_data_dir)
            bg_mask_seq_depth = loadFromDir(f'{trial_str}_bg-mask-seq-depth',
                                            out_data_dir)
        except FileNotFoundError:
            bg_model_depth, bg_mask_depth_train = fitBackgroundDepth(
                depth_train,
                camera_params=camera_params,
                camera_pose=camera_pose,
                **depth_bg_detection_kwargs)

            bg_mask_seq_depth = detectBackgroundDepth(
                bg_model_depth,
                depth_frame_seq,
                camera_params=camera_params,
                camera_pose=camera_pose,
                **depth_bg_detection_kwargs)

            __, bg_model_depth_image, __ = render.renderPlane(
                bg_model_depth,
                camera_pose=camera_pose,
                camera_params=camera_params)
            imageprocessing.displayImage(
                bg_model_depth_image,
                file_path=os.path.join(fig_dir,
                                       f'{trial_str}_bg-image-depth.png'))

        bg_model_rgb = fitBackgroundRgb(
            np.vstack((rgb_train, rgb_frame_seq)),
            np.vstack((bg_mask_depth_train, bg_mask_seq_depth)))

        if learn_bg_model:
            model = RgbBackgroundModel(bg_model_rgb,
                                       update_bg=True,
                                       device=device)
            losses, metrics = model.fit(np.vstack((rgb_train, rgb_frame_seq)),
                                        np.vstack((bg_mask_depth_train,
                                                   bg_mask_seq_depth)),
                                        num_epochs=100)

            outputs = model.forward(
                torch.tensor(rgb_frame_seq, dtype=torch.float,
                             device=device).permute(0, -1, 1, 2))
            bg_mask_seq_rgb = model.predict(outputs).cpu().numpy().squeeze()
            plot_dict = {'Loss': losses, 'Accuracy': metrics},
        else:

            def f1(preds, targets):
                true_positives = np.sum((targets == 1) * (preds == 1))
                false_positives = np.sum((targets == 0) * (preds == 1))
                false_negatives = np.sum((targets == 1) * (preds == 0))

                precision = true_positives / (true_positives + false_positives)
                recall = true_positives / (true_positives + false_negatives)

                f1 = 2 * (precision * recall) / (precision + recall)
                return f1

            def acc(preds, targets):
                matches = preds == targets
                return matches.mean()

            bg_dists = np.linalg.norm(rgb_frame_seq - bg_model_rgb[None, ...],
                                      axis=-1)
            thresh_vals = np.linspace(0, 1, num=50)
            scores = np.array(
                [acc(bg_dists < t, bg_mask_seq_depth) for t in thresh_vals])
            best_index = scores.argmax()
            best_thresh = thresh_vals[best_index]
            bg_mask_seq_rgb = bg_dists < best_thresh
            plot_dict = {'Accuracy': scores}

        torchutils.plotEpochLog(plot_dict,
                                subfig_size=(10, 2.5),
                                title='Training performance',
                                fn=os.path.join(fig_dir,
                                                f'{trial_str}_train-plot.png'))

        logger.info("  Saving output...")
        saveToWorkingDir(bg_mask_depth_train.astype(bool),
                         f'{trial_str}_bg-mask-depth-train')
        saveToWorkingDir(bg_mask_seq_depth.astype(bool),
                         f'{trial_str}_bg-mask-seq-depth')
        saveToWorkingDir(bg_mask_seq_rgb.astype(bool),
                         f'{trial_str}_bg-mask-seq-rgb')

        if num_disp_imgs is not None:
            if rgb_frame_seq.shape[0] > num_disp_imgs:
                idxs = np.arange(rgb_frame_seq.shape[0])
                np.random.shuffle(idxs)
                idxs = idxs[:num_disp_imgs]
            else:
                idxs = slice(None, None, None)
            imageprocessing.displayImages(*(rgb_frame_seq[idxs]),
                                          *(bg_mask_seq_rgb[idxs]),
                                          *(depth_frame_seq[idxs]),
                                          *(bg_mask_seq_depth[idxs]),
                                          num_rows=4,
                                          file_path=os.path.join(
                                              fig_dir,
                                              f'{trial_str}_best-frames.png'))
            imageprocessing.displayImage(bg_model_rgb,
                                         file_path=os.path.join(
                                             fig_dir,
                                             f'{trial_str}_bg-image-rgb.png'))
Esempio n. 13
0
def main(out_dir=None,
         scores_dir=None,
         preprocessed_data_dir=None,
         keyframe_model_name=None,
         subsample_period=None,
         window_size=None,
         corpus_name=None,
         default_annotator=None,
         cv_scheme=None,
         max_trials_per_fold=None,
         model_name=None,
         numeric_backend=None,
         gpu_dev_id=None,
         visualize=False,
         model_config={},
         camera_params_config={}):

    out_dir = os.path.expanduser(out_dir)
    scores_dir = os.path.expanduser(scores_dir)
    preprocessed_data_dir = os.path.expanduser(preprocessed_data_dir)

    m.set_backend('numpy')

    def loadFromWorkingDir(var_name):
        return joblib.load(os.path.join(scores_dir, f"{var_name}.pkl"))

    def saveToWorkingDir(var, var_name):
        joblib.dump(var, os.path.join(out_dir, f"{var_name}.pkl"))

    # Load camera parameters from external file and add them to model config kwargs
    model_config['init_kwargs'].update(
        render.loadCameraParams(**camera_params_config, as_dict=True))

    trial_ids = joblib.load(
        os.path.join(preprocessed_data_dir, 'trial_ids.pkl'))

    corpus = duplocorpus.DuploCorpus(corpus_name)
    assembly_seqs = tuple(
        labels.parseLabelSeq(
            corpus.readLabels(trial_id, default_annotator)[0])
        for trial_id in trial_ids)

    logger.info(f"Selecting keyframes...")
    keyframe_idx_seqs = []
    rgb_keyframe_seqs = []
    depth_keyframe_seqs = []
    seg_keyframe_seqs = []
    background_keyframe_seqs = []
    assembly_keyframe_seqs = []
    for seq_idx, trial_id in enumerate(trial_ids):
        trial_str = f"trial-{trial_id}"
        rgb_frame_seq = loadFromWorkingDir(f'{trial_str}_rgb-frame-seq')
        depth_frame_seq = loadFromWorkingDir(f'{trial_str}_depth-frame-seq')
        segment_seq = loadFromWorkingDir(f'{trial_str}_segment-seq')
        frame_scores = loadFromWorkingDir(f'{trial_str}_frame-scores')
        background_plane_seq = loadFromWorkingDir(
            f'{trial_str}_background-plane-seq')

        assembly_seq = assembly_seqs[seq_idx]
        # FIXME: Get the real frame index numbers instead of approximating
        assembly_seq[-1].end_idx = len(rgb_frame_seq) * subsample_period

        keyframe_idxs = videoprocessing.selectSegmentKeyframes(
            frame_scores, score_thresh=0, prepend_first=True)

        selectKeyframes = functools.partial(utils.select, keyframe_idxs)
        rgb_keyframe_seq = selectKeyframes(rgb_frame_seq)
        depth_keyframe_seq = selectKeyframes(depth_frame_seq)
        seg_keyframe_seq = selectKeyframes(segment_seq)
        background_keyframe_seq = selectKeyframes(background_plane_seq)

        # FIXME: Get the real frame index numbers instead of approximating
        keyframe_idxs_orig = keyframe_idxs * subsample_period
        assembly_keyframe_seq = labels.resampleStateSeq(
            keyframe_idxs_orig, assembly_seq)

        # Store all keyframe sequences in memory
        keyframe_idx_seqs.append(keyframe_idxs)
        rgb_keyframe_seqs.append(rgb_keyframe_seq)
        depth_keyframe_seqs.append(depth_keyframe_seq)
        seg_keyframe_seqs.append(seg_keyframe_seq)
        background_keyframe_seqs.append(background_keyframe_seq)
        assembly_keyframe_seqs.append(assembly_keyframe_seq)

    # Split into train and test sets
    if cv_scheme == 'leave one out':
        num_seqs = len(trial_ids)
        cv_folds = []
        for i in range(num_seqs):
            test_fold = (i, )
            train_fold = tuple(range(0, i)) + tuple(range(i + 1, num_seqs))
            cv_folds.append((train_fold, test_fold))
    elif cv_scheme == 'train on child':
        child_corpus = duplocorpus.DuploCorpus('child')
        child_trial_ids = utils.loadVariable('trial_ids',
                                             'preprocess-all-data', 'child')
        child_assembly_seqs = [
            labels.parseLabelSeq(
                child_corpus.readLabels(trial_id, 'Cathryn')[0])
            for trial_id in child_trial_ids
        ]
        num_easy = len(assembly_keyframe_seqs)
        num_child = len(child_assembly_seqs)
        cv_folds = [(tuple(range(num_easy, num_easy + num_child)),
                     tuple(range(num_easy)))]
        assembly_keyframe_seqs = assembly_keyframe_seqs + child_assembly_seqs

    rgb_keyframe_seqs = tuple(
        tuple(
            imageprocessing.saturateImage(rgb_image,
                                          background_mask=segment_image == 0)
            for rgb_image, segment_image in zip(rgb_frame_seq, seg_frame_seq))
        for rgb_frame_seq, seg_frame_seq in zip(rgb_keyframe_seqs,
                                                seg_keyframe_seqs))

    depth_keyframe_seqs = tuple(
        tuple(depth_image.astype(float) for depth_image in depth_frame_seq)
        for depth_frame_seq in depth_keyframe_seqs)

    device = torchutils.selectDevice(gpu_dev_id)
    m.set_backend('torch')
    m.set_default_device(device)

    assembly_keyframe_seqs = tuple(
        tuple(a.to(device=device, in_place=False) for a in seq)
        for seq in assembly_keyframe_seqs)
    assembly_seqs = tuple(
        tuple(a.to(device=device, in_place=False) for a in seq)
        for seq in assembly_seqs)

    rgb_keyframe_seqs = tuple(
        tuple(m.np.array(frame, dtype=torch.float) for frame in rgb_frame_seq)
        for rgb_frame_seq in rgb_keyframe_seqs)
    depth_keyframe_seqs = tuple(
        tuple(
            m.np.array(frame, dtype=torch.float) for frame in depth_frame_seq)
        for depth_frame_seq in depth_keyframe_seqs)
    seg_keyframe_seqs = tuple(
        tuple(m.np.array(frame, dtype=torch.int) for frame in seg_frame_seq)
        for seg_frame_seq in seg_keyframe_seqs)

    num_cv_folds = len(cv_folds)
    saveToWorkingDir(cv_folds, f'cv-folds')
    for fold_index, (train_idxs, test_idxs) in enumerate(cv_folds):
        logger.info(f"CV FOLD {fold_index + 1} / {num_cv_folds}")

        # Initialize and train model
        utils.validateCvFold(train_idxs, test_idxs)
        selectTrain = functools.partial(utils.select, train_idxs)
        train_assembly_seqs = selectTrain(assembly_keyframe_seqs)
        model = getattr(models, model_name)(**model_config['init_kwargs'])
        logger.info(
            f"  Training {model_name} on {len(train_idxs)} sequences...")
        model.fit(train_assembly_seqs, **model_config['fit_kwargs'])
        logger.info(
            f'    Model trained on {model.num_states} unique assembly states')
        # saveToWorkingDir(model, f'model-fold{fold_index}')

        # Decode on the test set
        selectTest = functools.partial(utils.select, test_idxs)
        test_trial_ids = selectTest(trial_ids)
        test_rgb_keyframe_seqs = selectTest(rgb_keyframe_seqs)
        test_depth_keyframe_seqs = selectTest(depth_keyframe_seqs)
        test_seg_keyframe_seqs = selectTest(seg_keyframe_seqs)
        test_background_keyframe_seqs = selectTest(background_keyframe_seqs)
        test_assembly_keyframe_seqs = selectTest(assembly_keyframe_seqs)
        test_assembly_seqs = selectTest(assembly_seqs)

        logger.info(f"  Testing model on {len(test_idxs)} sequences...")
        for i, trial_id in enumerate(test_trial_ids):
            if max_trials_per_fold is not None and i >= max_trials_per_fold:
                break

            rgb_frame_seq = test_rgb_keyframe_seqs[i]
            depth_frame_seq = test_depth_keyframe_seqs[i]
            seg_frame_seq = test_seg_keyframe_seqs[i]
            background_plane_seq = test_background_keyframe_seqs[i]
            true_assembly_seq = test_assembly_keyframe_seqs[i]
            true_assembly_seq_orig = test_assembly_seqs[i]

            rgb_background_seq, depth_background_seq = utils.batchProcess(
                model.renderPlane, background_plane_seq, unzip=True)

            logger.info(f'    Decoding video {trial_id}...')
            start_time = time.process_time()
            out = model.predictSeq(rgb_frame_seq, depth_frame_seq,
                                   seg_frame_seq, rgb_background_seq,
                                   depth_background_seq,
                                   **model_config['decode_kwargs'])
            pred_assembly_seq, pred_idx_seq, max_log_probs, log_likelihoods, poses_seq = out
            end_time = time.process_time()
            logger.info(utils.makeProcessTimeStr(end_time - start_time))

            num_correct, num_total = metrics.numberCorrect(
                true_assembly_seq, pred_assembly_seq)
            logger.info(f'    ACCURACY: {num_correct} / {num_total}')
            num_correct, num_total = metrics.numberCorrect(
                true_assembly_seq, pred_assembly_seq, ignore_empty_true=True)
            logger.info(f'    RECALL: {num_correct} / {num_total}')
            num_correct, num_total = metrics.numberCorrect(
                true_assembly_seq, pred_assembly_seq, ignore_empty_pred=True)
            logger.info(f'    PRECISION: {num_correct} / {num_total}')

            # Save intermediate results
            logger.info(f"Saving output...")
            saveToWorkingDir(segment_seq, f'segment_seq-{trial_id}')
            saveToWorkingDir(true_assembly_seq_orig,
                             f'true_state_seq_orig-{trial_id}')
            saveToWorkingDir(true_assembly_seq, f'true_state_seq-{trial_id}')
            saveToWorkingDir(pred_assembly_seq, f'pred_state_seq-{trial_id}')
            saveToWorkingDir(poses_seq, f'poses_seq-{trial_id}')
            saveToWorkingDir(background_plane_seq,
                             f'background_plane_seq-{trial_id}')
            saveToWorkingDir(max_log_probs, f'max_log_probs-{trial_id}')
            saveToWorkingDir(log_likelihoods, f'log_likelihoods-{trial_id}')

            # Save figures
            if visualize:
                rgb_rendered_seq, depth_rendered_seq, label_rendered_seq = utils.batchProcess(
                    model.renderScene,
                    pred_assembly_seq,
                    poses_seq,
                    rgb_background_seq,
                    depth_background_seq,
                    unzip=True,
                    static_kwargs={'as_numpy': True})
                if utils.in_ipython_console():
                    file_path = None
                else:
                    trial_str = f"trial-{trial_id}"
                    file_path = os.path.join(out_dir,
                                             f'{trial_str}_best-frames.png')
                rgb_frame_seq = tuple(img.cpu().numpy()
                                      for img in rgb_frame_seq)
                imageprocessing.displayImages(*rgb_frame_seq,
                                              *rgb_rendered_seq,
                                              num_rows=2,
                                              file_path=file_path)
Esempio n. 14
0
def main(out_dir=None,
         data_dir=None,
         gpu_dev_id=None,
         batch_size=None,
         start_from=None,
         stop_at=None,
         num_disp_imgs=None):

    out_dir = os.path.expanduser(out_dir)
    data_dir = os.path.expanduser(data_dir)

    logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt'))

    fig_dir = os.path.join(out_dir, 'figures')
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)

    out_data_dir = os.path.join(out_dir, 'data')
    if not os.path.exists(out_data_dir):
        os.makedirs(out_data_dir)

    def loadFromDir(var_name, dir_name):
        return joblib.load(os.path.join(dir_name, f"{var_name}.pkl"))

    def saveToWorkingDir(var, var_name):
        joblib.dump(var, os.path.join(out_data_dir, f"{var_name}.pkl"))

    trial_ids = utils.getUniqueIds(data_dir, prefix='trial=', to_array=True)

    device = torchutils.selectDevice(gpu_dev_id)

    for seq_idx, trial_id in enumerate(trial_ids):

        if start_from is not None and seq_idx < start_from:
            continue

        if stop_at is not None and seq_idx > stop_at:
            break

        trial_str = f"trial={trial_id}"

        logger.info(
            f"Processing video {seq_idx + 1} / {len(trial_ids)}  (trial {trial_id})"
        )

        logger.info("  Loading data...")
        try:
            rgb_frame_seq = loadFromDir(f"{trial_str}_rgb-frame-seq", data_dir)
            rgb_frame_seq = np.stack(tuple(
                skimage.img_as_float(f) for f in rgb_frame_seq),
                                     axis=0)
        except FileNotFoundError as e:
            logger.info(e)
            continue

        logger.info("  Detecting objects...")
        model = torchvision.models.detection.maskrcnn_resnet50_fpn(
            pretrained=True)
        model = model.to(device=device)
        model.device = device
        model.eval()

        inputs = np.moveaxis(rgb_frame_seq, 3, 1)

        if batch_size is None:
            batch_size = inputs.shape[0]

        def detectBatch(batch_index):
            start = batch_size * batch_index
            end = start + batch_size
            in_batch = torch.tensor(inputs[start:end], dtype=torch.float)
            out_batches = detectCategories(model, in_batch)
            return tuple(batch.numpy().squeeze(axis=1)
                         for batch in out_batches)

        num_batches = math.ceil(inputs.shape[0] / batch_size)
        person_mask_seq, bg_mask_seq = map(
            np.vstack, zip(*(detectBatch(i) for i in range(num_batches))))
        person_mask_seq = person_mask_seq.astype(bool)

        logger.info("  Saving output...")
        saveToWorkingDir(person_mask_seq, f'{trial_str}_person-mask-seq')

        if num_disp_imgs is not None:
            if rgb_frame_seq.shape[0] > num_disp_imgs:
                idxs = np.arange(rgb_frame_seq.shape[0])
                np.random.shuffle(idxs)
                idxs = idxs[:num_disp_imgs]
            else:
                idxs = slice(None, None, None)
            imageprocessing.displayImages(*(rgb_frame_seq[idxs]),
                                          *(person_mask_seq[idxs]),
                                          *(bg_mask_seq[idxs]),
                                          num_rows=3,
                                          file_path=os.path.join(
                                              fig_dir,
                                              f'{trial_str}_best-frames.png'))