예제 #1
0
def estimateKeyframeFnSeqs(frame_fn_seqs, frame_timestamp_seqs,
                           keyframe_timestamp_seqs):
    keyframe_fn_seqs = utils.batchProcess(estimateKeyframeFnSeq, frame_fn_seqs,
                                          frame_timestamp_seqs,
                                          keyframe_timestamp_seqs)

    return keyframe_fn_seqs
예제 #2
0
def main(
        out_dir=None, data_dir=None, model_name=None,
        gpu_dev_id=None, batch_size=None, learning_rate=None, independent_signals=None,
        model_params={}, cv_params={}, train_params={}, viz_params={}):

    data_dir = os.path.expanduser(data_dir)
    out_dir = os.path.expanduser(out_dir)

    fig_dir = os.path.join(out_dir, 'figures')
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)

    out_data_dir = os.path.join(out_dir, 'data')
    if not os.path.exists(out_data_dir):
        os.makedirs(out_data_dir)

    def loadVariable(var_name):
        return joblib.load(os.path.join(data_dir, f'{var_name}.pkl'))

    def saveVariable(var, var_name):
        joblib.dump(var, os.path.join(out_data_dir, f'{var_name}.pkl'))

    # Load data
    trial_ids = loadVariable('trial_ids')
    imu_sample_seqs = loadVariable('imu_sample_seqs')
    imu_label_seqs = loadVariable('imu_label_seqs')
    assembly_seqs = loadVariable('assembly_seqs')

    imu_sample_seqs = tuple(map(np.squeeze, imu_sample_seqs))
    errors = utils.batchProcess(makeErrorSignal, imu_sample_seqs, assembly_seqs)
    state_labels = utils.batchProcess(
        makeLabelSignal, imu_sample_seqs, assembly_seqs,
        static_kwargs={'action': False}
    )
    action_labels = utils.batchProcess(
        makeLabelSignal, imu_sample_seqs, assembly_seqs,
        static_kwargs={'action': True}
    )

    plot_args = zip(
        trial_ids, imu_sample_seqs, errors,
        imu_label_seqs, state_labels, action_labels
    )
    for args in plot_args:
        plotError(*args, fn=os.path.join(fig_dir, f"{args[0]}.png"))
예제 #3
0
def scoreFrames(pixel_classifier,
                rgb_frame_seq,
                segment_seq,
                score_kwargs=None):
    """ Score each video frame. Higher scores means more blocks pixels.

    Parameters
    ----------
    pixel_classifier : models.FrameScorer
        A FrameScorer mixture model. This is used to classify each pixel in the
        RGB frames as blocks or as nuisance (hands or specularity).
    rgb_frame_seq : iterable(numpy array of float, shape (img_height, img_width, 3))
        RGB frames to process.
    segment_seq : iterable(numpy array of int, shape (img_height, img_width))

    Returns
    -------
    scores : numpy array of float, shape (num_frames / sample_rate,)
        Average (log) likelihood ratio of 'blocks' class vs. 'skin' class
    """

    if score_kwargs is None:
        score_kwargs = {'log_domain': True, 'hard_assign_clusters': True}

    fg_px_seq = utils.batchProcess(
        imageprocessing.foregroundPixels,
        rgb_frame_seq,
        segment_seq,
        static_kwargs={
            'image_transform':
            lambda x: skimage.color.rgb2hsv(skimage.img_as_float(x)),
            'background_class_index':
            0
        })

    scores = utils.batchProcess(pixel_classifier.averageSnr,
                                fg_px_seq,
                                static_kwargs=score_kwargs)

    return np.array(scores)
예제 #4
0
def loadDepthFrameSeq(frame_fn_seq,
                      frame_timestamp_seq,
                      stack_frames=False,
                      **load_kwargs):
    if not any(frame_fn_seq):
        return tuple()

    mirror = checkSeqTime(frame_timestamp_seq)
    f = functools.partial(loadDepthFrame, mirror=mirror, **load_kwargs)
    depth_frame_seq = utils.batchProcess(f, frame_fn_seq)

    if stack_frames:
        depth_frame_seq = np.stack(depth_frame_seq)

    return depth_frame_seq
예제 #5
0
    def scoreSamples(self, input_seq):
        """ Score a sample.

        Parameters
        ----------
        input_seq : ???

        Returns
        -------
        scores : torch.Tensor of float, shape (batch_size, seq_len, num_classes)
        """

        seq_scores = utils.batchProcess(
            super(LinearChainScorer, self).forward, input_seq)

        return torch.stack(seq_scores, dim=1)
예제 #6
0
    def predictSeq(self,
                   *feat_seqs,
                   decode_method='MAP',
                   viz_predictions=False,
                   **kwargs):

        input_seq = zip(*feat_seqs)
        scores, component_poses = utils.batchProcess(
            self.obsv_model.forward,
            input_seq,
            static_kwargs={'return_poses': True},
            unzip=True)
        scores = torch.stack(scores, dim=1)

        outputs = super().forward(scores, scores_as_input=True)
        pred_idxs = super().predict(outputs)[0]

        pred_states = self.integerizer.deintegerizeSequence(pred_idxs)
        pred_poses = tuple(component_poses[t][i]
                           for t, i in enumerate(pred_idxs))

        return pred_states, pred_idxs, None, scores, pred_poses
예제 #7
0
def loadRgbFrameSeq(frame_fn_seq,
                    frame_timestamp_seq=None,
                    stack_frames=False):
    """ Load a sequence of video frames.

    NOTE: Since the first videos were recorded when the camera was in 'mirror'
        mode, this function un-mirrors any videos recorded before the time of
        the last mirrored video.

    Parameters
    ----------
    frame_fn_seq : iterable(string)
        The filename of each video frame.
    frame_timestamp_seq : numpy array of float, shape (num_frames,), optional
        The time each video frame was received by the data collection computer.
        This is used to decide if the video should be un-mirrored. If no
        value is passed, this function doesn't un-mirror the video.

    Returns
    -------
    frame_seq : tuple( numpy array of float, shape (img_height, img_width) )
    """

    if not any(frame_fn_seq):
        return tuple()

    if frame_timestamp_seq is None:
        mirror = False
    else:
        mirror = checkSeqTime(frame_timestamp_seq)

    frame_seq = utils.batchProcess(loadRgbFrame,
                                   frame_fn_seq,
                                   static_kwargs={'mirror': mirror})

    if stack_frames:
        frame_seq = np.stack(frame_seq)

    return frame_seq
예제 #8
0
    def _create_data_scores(self,
                            sample,
                            W=None,
                            return_poses=False,
                            **kwargs):
        """
        Parameters
        ----------
        sample :
            (rgb_image, depth_image, segment_image, rgb_background, depth_background)
        **kwargs : optional

        Returns
        -------
        error :
        """

        if W is None:
            W = m.np.ones(2)

        kwargs['W'] = W

        assemblies = tuple(self.integerizer[i] for i in range(self.num_states))

        errors, component_poses = utils.batchProcess(self.fitScene,
                                                     assemblies,
                                                     static_args=sample,
                                                     static_kwargs=kwargs,
                                                     unzip=True)

        error = (m.np.vstack(errors) @ W)[None, :]

        if not return_poses:
            return error

        return error, component_poses
예제 #9
0
def loadRgbFrameSeqs(frame_fn_seqs, frame_timestamp_seqs):
    return utils.batchProcess(loadRgbFrameSeq, frame_fn_seqs,
                              frame_timestamp_seqs)
예제 #10
0
def unmirrorStateSeqs(timestamps, state_seqs, path_ids):
    return utils.batchProcess(unmirrorStateSeq, timestamps, state_seqs,
                              path_ids)
예제 #11
0
def resampleFrameFnSeqs(frame_fn_seqs, frame_timestamp_seqs, seq_bounds):
    resampled_pairs = utils.batchProcess(resampleFrameFnSeq, frame_fn_seqs,
                                         frame_timestamp_seqs, seq_bounds)
    return tuple(zip(*resampled_pairs))
예제 #12
0
def main(
        out_dir=None, data_dir=None, use_vid_ids_from=None,
        output_data=None, magnitude_centering=None, resting_from_gt=None,
        remove_before_first_touch=None, include_signals=None, fig_type=None):

    data_dir = os.path.expanduser(data_dir)
    out_dir = os.path.expanduser(out_dir)

    logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt'))

    logger.info(f"Reading from: {data_dir}")
    logger.info(f"Writing to: {out_dir}")

    fig_dir = os.path.join(out_dir, 'figures')
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)

    out_data_dir = os.path.join(out_dir, 'data')
    if not os.path.exists(out_data_dir):
        os.makedirs(out_data_dir)

    def loadAll(seq_ids, var_name, from_dir=data_dir, prefix='trial='):
        all_data = tuple(
            utils.loadVariable(f"{prefix}{seq_id}_{var_name}", from_dir)
            for seq_id in seq_ids
        )
        return all_data

    def saveVariable(var, var_name, to_dir=out_data_dir):
        utils.saveVariable(var, var_name, to_dir)

    if fig_type is None:
        fig_type = 'multi'

    # Load data
    if use_vid_ids_from is None:
        trial_ids = utils.getUniqueIds(data_dir, prefix='trial=', to_array=True)
    else:
        use_vid_ids_from = os.path.expanduser(use_vid_ids_from)
        trial_ids = utils.getUniqueIds(use_vid_ids_from, prefix='trial-', to_array=True)

    accel_seqs = loadAll(trial_ids, 'accel-samples.pkl')
    gyro_seqs = loadAll(trial_ids, 'gyro-samples.pkl')
    action_seqs = loadAll(trial_ids, 'action-seq.pkl')
    rgb_timestamp_seqs = loadAll(trial_ids, 'rgb-frame-timestamp-seq.pkl')

    def validate_imu(seqs):
        def is_valid(d):
            return not any(np.isnan(x).any() for x in d.values())
        return np.array([is_valid(d) for d in seqs])

    imu_is_valid = validate_imu(accel_seqs) & validate_imu(gyro_seqs)
    logger.info(
        f"Ignoring {(~imu_is_valid).sum()} IMU sequences with NaN-valued samples "
        f"(of {len(imu_is_valid)} total)"
    )

    def chooseValid(seq):
        return tuple(x for x, is_valid in zip(seq, imu_is_valid) if is_valid)
    trial_ids = np.array(list(chooseValid(trial_ids)))
    accel_seqs = chooseValid(accel_seqs)
    gyro_seqs = chooseValid(gyro_seqs)
    action_seqs = chooseValid(action_seqs)
    rgb_timestamp_seqs = chooseValid(rgb_timestamp_seqs)

    vocab = []
    metadata = utils.loadMetadata(data_dir, rows=trial_ids)
    utils.saveMetadata(metadata, out_data_dir)
    utils.saveVariable(vocab, 'vocab', out_data_dir)

    def norm(x):
        norm = np.linalg.norm(imu.getImuSamples(x), axis=1)[:, None]
        return norm
    accel_mag_seqs = tuple(map(lambda x: dictToArray(x, transform=norm), accel_seqs))
    gyro_mag_seqs = tuple(map(lambda x: dictToArray(x, transform=norm), gyro_seqs))

    imu_timestamp_seqs = utils.batchProcess(makeTimestamps, accel_seqs, gyro_seqs)

    if remove_before_first_touch:
        before_first_touch_seqs = utils.batchProcess(
            beforeFirstTouch, action_seqs, rgb_timestamp_seqs, imu_timestamp_seqs
        )

        num_ignored = sum(b is None for b in before_first_touch_seqs)
        logger.info(
            f"Ignoring {num_ignored} sequences without first-touch annotations "
            f"(of {len(before_first_touch_seqs)} total)"
        )
        trials_missing_first_touch = [
            i for b, i in zip(before_first_touch_seqs, trial_ids)
            if b is None
        ]
        logger.info(f"Trials without first touch: {trials_missing_first_touch}")

        def clip(signal, bool_array):
            return signal[~bool_array, ...]
        accel_mag_seqs = tuple(
            clip(signal, b) for signal, b in zip(accel_mag_seqs, before_first_touch_seqs)
            if b is not None
        )
        gyro_mag_seqs = tuple(
            clip(signal, b) for signal, b in zip(gyro_mag_seqs, before_first_touch_seqs)
            if b is not None
        )
        imu_timestamp_seqs = tuple(
            clip(signal, b) for signal, b in zip(imu_timestamp_seqs, before_first_touch_seqs)
            if b is not None
        )
        trial_ids = tuple(
            x for x, b in zip(trial_ids, before_first_touch_seqs)
            if b is not None
        )
        action_seqs = tuple(
            x for x, b in zip(action_seqs, before_first_touch_seqs)
            if b is not None
        )
        rgb_timestamp_seqs = tuple(
            x for x, b in zip(rgb_timestamp_seqs, before_first_touch_seqs)
            if b is not None
        )

    assembly_seqs = utils.batchProcess(
        parseActions,
        action_seqs, rgb_timestamp_seqs, imu_timestamp_seqs
    )

    if output_data == 'components':
        accel_feat_seqs = accel_mag_seqs
        gyro_feat_seqs = gyro_mag_seqs
        unique_components = {frozenset(): 0}
        imu_label_seqs = zip(
            *tuple(
                labels.componentLabels(*args, unique_components)
                for args in zip(action_seqs, rgb_timestamp_seqs, imu_timestamp_seqs)
            )
        )
        saveVariable(unique_components, 'unique_components')
    elif output_data == 'pairwise components':
        imu_label_seqs = utils.batchProcess(
            labels.pairwiseComponentLabels, assembly_seqs,
            static_kwargs={'lower_tri_only': True, 'include_action_labels': False}
        )
        accel_feat_seqs = tuple(map(imu.pairwiseFeats, accel_mag_seqs))
        gyro_feat_seqs = tuple(map(imu.pairwiseFeats, gyro_mag_seqs))
    else:
        raise AssertionError()

    signals = {'accel': accel_feat_seqs, 'gyro': gyro_feat_seqs}
    if include_signals is None:
        include_signals = tuple(signals.keys())
    signals = tuple(signals[key] for key in include_signals)
    imu_feature_seqs = tuple(np.stack(x, axis=-1).squeeze(axis=-1) for x in zip(*signals))

    video_seqs = tuple(zip(imu_feature_seqs, imu_label_seqs, trial_ids))
    imu.plot_prediction_eg(video_seqs, fig_dir, fig_type=fig_type, output_data=output_data)

    video_seqs = tuple(
        zip(assembly_seqs, imu_feature_seqs, imu_timestamp_seqs, imu_label_seqs, trial_ids)
    )
    for assembly_seq, feature_seq, timestamp_seq, label_seq, trial_id in video_seqs:
        id_string = f"trial={trial_id}"
        saveVariable(assembly_seq, f'{id_string}_assembly-seq')
        saveVariable(feature_seq, f'{id_string}_feature-seq')
        saveVariable(timestamp_seq, f'{id_string}_timestamp-seq')
        saveVariable(label_seq, f'{id_string}_label-seq')
예제 #13
0
    def fitScene(self,
                 rgb_image,
                 depth_image,
                 segment_image,
                 rgb_background,
                 depth_background,
                 assembly,
                 camera_params=None,
                 camera_pose=None,
                 block_colors=None,
                 W=None,
                 error_func=None,
                 bias=None,
                 scale=None,
                 ignore_background=False,
                 legacy_mode=False):
        """ Fit a spatial assembly and a background plane to an RGBD image.

        Parameters
        ----------

        Returns
        -------
        """

        if camera_params is None:
            camera_params = self.camera_params

        if camera_pose is None:
            camera_pose = self.camera_pose

        if block_colors is None:
            block_colors = self.block_colors

        if W is None:
            W = m.np.ones(2)

        if error_func is None:
            error_func = sse

        if bias is None:
            bias = m.np.zeros(2)

        if scale is None:
            scale = m.np.ones(2)

        # Estimate initial poses from each detected image segment
        segment_labels = m.np.unique(segment_image[segment_image != 0])

        num_segments = len(segment_labels)
        num_components = len(assembly.connected_components.keys())

        object_masks = tuple(segment_image == i for i in segment_labels)
        object_poses_est = utils.batchProcess(
            imageprocessing.estimateSegmentPose,
            object_masks,
            static_args=(camera_params, camera_pose, depth_image),
            static_kwargs={'estimate_orientation': False})

        num_components = len(assembly.connected_components)
        num_segments = len(segment_labels)

        # Find the best pose for each component of the spatial assembly, assuming
        # we try to match it to a particular segment.
        errors = m.np.zeros((num_components, num_segments))
        poses = m.np.zeros((num_components, num_segments, 3, 4))
        for component_index, component_key in enumerate(
                assembly.connected_components.keys()):
            for segment_index in range(num_segments):
                object_mask = object_masks[segment_index]
                init_pose = object_poses_est[segment_index]
                error, pose = self.refineComponentPose(
                    rgb_image,
                    depth_image,
                    segment_image,
                    assembly,
                    rgb_background=rgb_background,
                    depth_background=depth_background,
                    component_index=component_key,
                    init_pose=init_pose,
                    object_mask=object_mask,
                    W=W,
                    error_func=error_func,
                    bias=bias,
                    scale=scale)
                errors[component_index, segment_index] = error
                poses[component_index, segment_index] = pose

        # Match components to segments by solving the linear sum assignment problem
        # (ie data association)
        # FIXME: set greedy=False
        _, component_poses, _ = matchComponentsToSegments(errors,
                                                          poses,
                                                          greedy=True)

        # Render the complete final scene
        rgb_render, depth_render, label_render = self.renderScene(
            assembly,
            component_poses,
            depth_background=depth_background,
            rgb_background=rgb_background)

        # Subtract background from all depth images. This gives distances relative
        # to the background plane instead of the camera, so RGB and depth models
        # are closer to the same scale.
        depth_render = depth_render - depth_background
        depth_image = depth_image - depth_background

        # Compute the total error of the final scene
        image_background = segment_image == 0
        render_background = label_render == 0

        rgb_error = error_func(rgb_image,
                               rgb_render,
                               bias=bias[0],
                               scale=scale[0],
                               true_mask=image_background,
                               est_mask=render_background)

        depth_error = error_func(depth_image,
                                 depth_render,
                                 bias=bias[1],
                                 scale=scale[1],
                                 true_mask=image_background,
                                 est_mask=render_background)

        error_vec = m.np.array([rgb_error, depth_error])
        return error_vec, component_poses
예제 #14
0
def segmentVideo(pixel_classifier,
                 goal_state,
                 rgb_frame_seq,
                 depth_frame_seq,
                 filter_segments=True,
                 **segmentimage_kwargs):
    """ Segment each frame in an RGB-D video and remove nuisance objects

    This pre-processing procedure consists of the following steps:
        1. Identify the scene background
            a. Fit a plane to the tabletop
            b. Mask some areas in the image
        2. Segment the foreground image
            a. Find connected components in the foreground
            b. Remove the segment that best fits the goal state template image
            c. Segment the remaining foreground into SLIC superpixels
            d. Intersect the foreground and SLIC segmentations
        3. Filter image segments (optional)
            a. Remove segments smaller than 64 pixels
            b. Classify nuisance pixels from RGB and depth images
            c. Remove segments that are mostly nuisance pixels

    Parameters
    ----------
    pixel_classifier : models.FrameScorer
        A FrameScorer mixture model. This is used to classify each pixel in the
        RGB frames as blocks or as nuisance (hands or specularity).
    goal_state : blockassembly.BlockAssembly
        The builder's goal state for this video. This is used to match and
        remove the reference model from the frame.
    rgb_frame_seq : iterable(numpy array of float, shape (img_height, img_width, 3))
        RGB frames to process.
    depth_frame_seq : iterable(numpy array of float, shape (img_height, img_width))
        Depth frames to process.
    filter_segments : bool, optional
        If True, segments are removed if they contain too many nuisance pixels
        or if they are too close to the camera. Default is True.
    **segmentimage_kwargs: optional
        Any extra keyword arguments are passed to ``videoprocessing.segmentImage``.

    Returns
    -------
    segment_seq : tuple(numpy array of int, shape (img_height, img_width))
        Each image is a segmentation of the corresponding (rgb, depth) pair.
        The background is assigned label 0, and foreground segments are
        enumerated sequentially from 1.
    px_class_seq : tuple(numpy array of int, shape (img_height, img_width))
        Each image contains `frame_scorer`'s predictions for the pixels in the
        corresponding RGB image.
    background_model_seq : tuple(geometry.Plane)
        Each element is the plane that best fits the corresponding depth image.
        This plane should represent the tabletop, which supports the planar
        scene recorded by the camera (if we're lucky).
    """

    # Detect the background
    foreground_mask_seq, background_plane_seq = utils.batchProcess(
        foregroundPixels, depth_frame_seq, unzip=True)

    # Create an initial image segmentation
    segment_seq = utils.batchProcess(segmentImage,
                                     rgb_frame_seq,
                                     depth_frame_seq,
                                     background_plane_seq,
                                     foreground_mask_seq,
                                     static_args=(goal_state.copy(), ),
                                     static_kwargs=segmentimage_kwargs)

    # Identify nuisance or outlying objects in images
    pixel_class_seq = utils.batchProcess(pixelClasses,
                                         rgb_frame_seq,
                                         segment_seq,
                                         static_args=(pixel_classifier, ))
    depth_class_seq = utils.batchProcess(depthClasses, depth_frame_seq)

    # Remove segments that are mostly nuisance pixels
    if filter_segments:
        segment_seq = utils.batchProcess(filterSegments, segment_seq,
                                         pixel_class_seq, depth_class_seq)

    return segment_seq, pixel_class_seq, background_plane_seq
예제 #15
0
def main(out_dir=None,
         scores_dir=None,
         preprocessed_data_dir=None,
         keyframe_model_name=None,
         subsample_period=None,
         window_size=None,
         corpus_name=None,
         default_annotator=None,
         cv_scheme=None,
         max_trials_per_fold=None,
         model_name=None,
         numeric_backend=None,
         gpu_dev_id=None,
         visualize=False,
         model_config={},
         camera_params_config={}):

    out_dir = os.path.expanduser(out_dir)
    scores_dir = os.path.expanduser(scores_dir)
    preprocessed_data_dir = os.path.expanduser(preprocessed_data_dir)

    m.set_backend('numpy')

    def loadFromWorkingDir(var_name):
        return joblib.load(os.path.join(scores_dir, f"{var_name}.pkl"))

    def saveToWorkingDir(var, var_name):
        joblib.dump(var, os.path.join(out_dir, f"{var_name}.pkl"))

    # Load camera parameters from external file and add them to model config kwargs
    model_config['init_kwargs'].update(
        render.loadCameraParams(**camera_params_config, as_dict=True))

    trial_ids = joblib.load(
        os.path.join(preprocessed_data_dir, 'trial_ids.pkl'))

    corpus = duplocorpus.DuploCorpus(corpus_name)
    assembly_seqs = tuple(
        labels.parseLabelSeq(
            corpus.readLabels(trial_id, default_annotator)[0])
        for trial_id in trial_ids)

    logger.info(f"Selecting keyframes...")
    keyframe_idx_seqs = []
    rgb_keyframe_seqs = []
    depth_keyframe_seqs = []
    seg_keyframe_seqs = []
    background_keyframe_seqs = []
    assembly_keyframe_seqs = []
    for seq_idx, trial_id in enumerate(trial_ids):
        trial_str = f"trial-{trial_id}"
        rgb_frame_seq = loadFromWorkingDir(f'{trial_str}_rgb-frame-seq')
        depth_frame_seq = loadFromWorkingDir(f'{trial_str}_depth-frame-seq')
        segment_seq = loadFromWorkingDir(f'{trial_str}_segment-seq')
        frame_scores = loadFromWorkingDir(f'{trial_str}_frame-scores')
        background_plane_seq = loadFromWorkingDir(
            f'{trial_str}_background-plane-seq')

        assembly_seq = assembly_seqs[seq_idx]
        # FIXME: Get the real frame index numbers instead of approximating
        assembly_seq[-1].end_idx = len(rgb_frame_seq) * subsample_period

        keyframe_idxs = videoprocessing.selectSegmentKeyframes(
            frame_scores, score_thresh=0, prepend_first=True)

        selectKeyframes = functools.partial(utils.select, keyframe_idxs)
        rgb_keyframe_seq = selectKeyframes(rgb_frame_seq)
        depth_keyframe_seq = selectKeyframes(depth_frame_seq)
        seg_keyframe_seq = selectKeyframes(segment_seq)
        background_keyframe_seq = selectKeyframes(background_plane_seq)

        # FIXME: Get the real frame index numbers instead of approximating
        keyframe_idxs_orig = keyframe_idxs * subsample_period
        assembly_keyframe_seq = labels.resampleStateSeq(
            keyframe_idxs_orig, assembly_seq)

        # Store all keyframe sequences in memory
        keyframe_idx_seqs.append(keyframe_idxs)
        rgb_keyframe_seqs.append(rgb_keyframe_seq)
        depth_keyframe_seqs.append(depth_keyframe_seq)
        seg_keyframe_seqs.append(seg_keyframe_seq)
        background_keyframe_seqs.append(background_keyframe_seq)
        assembly_keyframe_seqs.append(assembly_keyframe_seq)

    # Split into train and test sets
    if cv_scheme == 'leave one out':
        num_seqs = len(trial_ids)
        cv_folds = []
        for i in range(num_seqs):
            test_fold = (i, )
            train_fold = tuple(range(0, i)) + tuple(range(i + 1, num_seqs))
            cv_folds.append((train_fold, test_fold))
    elif cv_scheme == 'train on child':
        child_corpus = duplocorpus.DuploCorpus('child')
        child_trial_ids = utils.loadVariable('trial_ids',
                                             'preprocess-all-data', 'child')
        child_assembly_seqs = [
            labels.parseLabelSeq(
                child_corpus.readLabels(trial_id, 'Cathryn')[0])
            for trial_id in child_trial_ids
        ]
        num_easy = len(assembly_keyframe_seqs)
        num_child = len(child_assembly_seqs)
        cv_folds = [(tuple(range(num_easy, num_easy + num_child)),
                     tuple(range(num_easy)))]
        assembly_keyframe_seqs = assembly_keyframe_seqs + child_assembly_seqs

    rgb_keyframe_seqs = tuple(
        tuple(
            imageprocessing.saturateImage(rgb_image,
                                          background_mask=segment_image == 0)
            for rgb_image, segment_image in zip(rgb_frame_seq, seg_frame_seq))
        for rgb_frame_seq, seg_frame_seq in zip(rgb_keyframe_seqs,
                                                seg_keyframe_seqs))

    depth_keyframe_seqs = tuple(
        tuple(depth_image.astype(float) for depth_image in depth_frame_seq)
        for depth_frame_seq in depth_keyframe_seqs)

    device = torchutils.selectDevice(gpu_dev_id)
    m.set_backend('torch')
    m.set_default_device(device)

    assembly_keyframe_seqs = tuple(
        tuple(a.to(device=device, in_place=False) for a in seq)
        for seq in assembly_keyframe_seqs)
    assembly_seqs = tuple(
        tuple(a.to(device=device, in_place=False) for a in seq)
        for seq in assembly_seqs)

    rgb_keyframe_seqs = tuple(
        tuple(m.np.array(frame, dtype=torch.float) for frame in rgb_frame_seq)
        for rgb_frame_seq in rgb_keyframe_seqs)
    depth_keyframe_seqs = tuple(
        tuple(
            m.np.array(frame, dtype=torch.float) for frame in depth_frame_seq)
        for depth_frame_seq in depth_keyframe_seqs)
    seg_keyframe_seqs = tuple(
        tuple(m.np.array(frame, dtype=torch.int) for frame in seg_frame_seq)
        for seg_frame_seq in seg_keyframe_seqs)

    num_cv_folds = len(cv_folds)
    saveToWorkingDir(cv_folds, f'cv-folds')
    for fold_index, (train_idxs, test_idxs) in enumerate(cv_folds):
        logger.info(f"CV FOLD {fold_index + 1} / {num_cv_folds}")

        # Initialize and train model
        utils.validateCvFold(train_idxs, test_idxs)
        selectTrain = functools.partial(utils.select, train_idxs)
        train_assembly_seqs = selectTrain(assembly_keyframe_seqs)
        model = getattr(models, model_name)(**model_config['init_kwargs'])
        logger.info(
            f"  Training {model_name} on {len(train_idxs)} sequences...")
        model.fit(train_assembly_seqs, **model_config['fit_kwargs'])
        logger.info(
            f'    Model trained on {model.num_states} unique assembly states')
        # saveToWorkingDir(model, f'model-fold{fold_index}')

        # Decode on the test set
        selectTest = functools.partial(utils.select, test_idxs)
        test_trial_ids = selectTest(trial_ids)
        test_rgb_keyframe_seqs = selectTest(rgb_keyframe_seqs)
        test_depth_keyframe_seqs = selectTest(depth_keyframe_seqs)
        test_seg_keyframe_seqs = selectTest(seg_keyframe_seqs)
        test_background_keyframe_seqs = selectTest(background_keyframe_seqs)
        test_assembly_keyframe_seqs = selectTest(assembly_keyframe_seqs)
        test_assembly_seqs = selectTest(assembly_seqs)

        logger.info(f"  Testing model on {len(test_idxs)} sequences...")
        for i, trial_id in enumerate(test_trial_ids):
            if max_trials_per_fold is not None and i >= max_trials_per_fold:
                break

            rgb_frame_seq = test_rgb_keyframe_seqs[i]
            depth_frame_seq = test_depth_keyframe_seqs[i]
            seg_frame_seq = test_seg_keyframe_seqs[i]
            background_plane_seq = test_background_keyframe_seqs[i]
            true_assembly_seq = test_assembly_keyframe_seqs[i]
            true_assembly_seq_orig = test_assembly_seqs[i]

            rgb_background_seq, depth_background_seq = utils.batchProcess(
                model.renderPlane, background_plane_seq, unzip=True)

            logger.info(f'    Decoding video {trial_id}...')
            start_time = time.process_time()
            out = model.predictSeq(rgb_frame_seq, depth_frame_seq,
                                   seg_frame_seq, rgb_background_seq,
                                   depth_background_seq,
                                   **model_config['decode_kwargs'])
            pred_assembly_seq, pred_idx_seq, max_log_probs, log_likelihoods, poses_seq = out
            end_time = time.process_time()
            logger.info(utils.makeProcessTimeStr(end_time - start_time))

            num_correct, num_total = metrics.numberCorrect(
                true_assembly_seq, pred_assembly_seq)
            logger.info(f'    ACCURACY: {num_correct} / {num_total}')
            num_correct, num_total = metrics.numberCorrect(
                true_assembly_seq, pred_assembly_seq, ignore_empty_true=True)
            logger.info(f'    RECALL: {num_correct} / {num_total}')
            num_correct, num_total = metrics.numberCorrect(
                true_assembly_seq, pred_assembly_seq, ignore_empty_pred=True)
            logger.info(f'    PRECISION: {num_correct} / {num_total}')

            # Save intermediate results
            logger.info(f"Saving output...")
            saveToWorkingDir(segment_seq, f'segment_seq-{trial_id}')
            saveToWorkingDir(true_assembly_seq_orig,
                             f'true_state_seq_orig-{trial_id}')
            saveToWorkingDir(true_assembly_seq, f'true_state_seq-{trial_id}')
            saveToWorkingDir(pred_assembly_seq, f'pred_state_seq-{trial_id}')
            saveToWorkingDir(poses_seq, f'poses_seq-{trial_id}')
            saveToWorkingDir(background_plane_seq,
                             f'background_plane_seq-{trial_id}')
            saveToWorkingDir(max_log_probs, f'max_log_probs-{trial_id}')
            saveToWorkingDir(log_likelihoods, f'log_likelihoods-{trial_id}')

            # Save figures
            if visualize:
                rgb_rendered_seq, depth_rendered_seq, label_rendered_seq = utils.batchProcess(
                    model.renderScene,
                    pred_assembly_seq,
                    poses_seq,
                    rgb_background_seq,
                    depth_background_seq,
                    unzip=True,
                    static_kwargs={'as_numpy': True})
                if utils.in_ipython_console():
                    file_path = None
                else:
                    trial_str = f"trial-{trial_id}"
                    file_path = os.path.join(out_dir,
                                             f'{trial_str}_best-frames.png')
                rgb_frame_seq = tuple(img.cpu().numpy()
                                      for img in rgb_frame_seq)
                imageprocessing.displayImages(*rgb_frame_seq,
                                              *rgb_rendered_seq,
                                              num_rows=2,
                                              file_path=file_path)
예제 #16
0
    def refineComponentPose(self,
                            rgb_image,
                            depth_image,
                            segment_image,
                            assembly,
                            component_index=None,
                            init_pose=None,
                            theta_samples=None,
                            object_mask=None,
                            W=None,
                            error_func=None,
                            bias=None,
                            scale=None,
                            **render_kwargs):
        """ Refine a component's initial pose estimate using a simple registration routine.

        Parameters
        ----------

        Returns
        -------
        best_error : float
        best_pose : m.np.array of float, shape (3, 4)
        """

        if error_func is None:
            error_func = sse

        if W is None:
            W = m.np.ones(2)

        if theta_samples is None:
            theta_samples = m.np.linspace(0, 1.5 * m.np.pi, 4)

        R_init, t_init = init_pose
        # pose_candidates = tuple(
        #     (geometry.rotationMatrix(z_angle=theta, x_angle=0) @ R_init, t_init)
        #     for theta in theta_samples
        # )

        rotation_candidates = geometry.zRotations(theta_samples) @ R_init
        pose_candidates = tuple((R, t_init) for R in rotation_candidates)

        rgb_renders, depth_renders, label_renders = utils.batchProcess(
            self.renderComponent,
            pose_candidates,
            static_args=(assembly, component_index),
            static_kwargs=render_kwargs,
            unzip=True)

        # Subtract background from all depth images. This gives distances relative
        # to the background plane instead of the camera, so RGB and depth models
        # are closer to the same scale.
        if 'depth_background' in render_kwargs:
            depth_renders = tuple(d - render_kwargs['depth_background']
                                  for d in depth_renders)
            depth_image = depth_image - render_kwargs['depth_background']

        object_background_mask = ~object_mask
        label_background_masks = tuple(label_render == 0
                                       for label_render in label_renders)

        rgb_errors = [
            error_func(rgb_image,
                       rgb_render,
                       true_mask=object_background_mask,
                       est_mask=label_mask,
                       bias=bias[0],
                       scale=scale[0]) for rgb_render, label_mask in zip(
                           rgb_renders, label_background_masks)
        ]

        depth_errors = [
            error_func(depth_image,
                       depth_render,
                       true_mask=object_background_mask,
                       est_mask=label_mask,
                       bias=bias[1],
                       scale=scale[1]) for depth_render, label_mask in zip(
                           depth_renders, label_background_masks)
        ]

        errors = m.np.column_stack(
            (m.np.array(rgb_errors), m.np.array(depth_errors))) @ W

        best_idx = errors.argmin()
        best_error = errors[best_idx]
        best_pose = pose_candidates[best_idx]
        best_pose = geometry.homogeneousMatrix(pose_candidates[best_idx])

        return best_error, best_pose
def main(out_dir=None,
         data_dir=None,
         preprocess_dir=None,
         classifier_fn=None,
         display_summary_img=None,
         write_video=None,
         start_from=None,
         stop_after=None):

    if start_from is None:
        start_from = 0

    if stop_after is None:
        stop_after = float("Inf")

    data_dir = os.path.expanduser(data_dir)
    preprocess_dir = os.path.expanduser(preprocess_dir)
    out_dir = os.path.expanduser(out_dir)
    classifier_fn = os.path.expanduser(classifier_fn)

    fig_dir = os.path.join(out_dir, 'figures')
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)

    out_data_dir = os.path.join(out_dir, 'data')
    if not os.path.exists(out_data_dir):
        os.makedirs(out_data_dir)

    def loadFromDataDir(var_name):
        return joblib.load(os.path.join(data_dir, f'{var_name}.pkl'))

    def loadFromPreprocessDir(var_name):
        return joblib.load(os.path.join(preprocess_dir, f'{var_name}.pkl'))

    def saveVariable(var, var_name):
        joblib.dump(var, os.path.join(out_data_dir, f'{var_name}.pkl'))

    classifier = joblib.load(classifier_fn)

    trial_ids = getUniqueTrialIds(preprocess_dir)
    for i, trial_id in enumerate(trial_ids):

        if i < start_from:
            continue

        if i > stop_after:
            break

        trial_str = f"trial-{trial_id}"

        logger.info(
            f"Processing video {i + 1} / {len(trial_ids)}  (trial {trial_id})")
        rgb_frame_seq = loadFromDataDir(f"{trial_str}_rgb-frame-seq")
        # depth_frame_seq = loadFromDataDir(f"{trial_str}_depth-frame-seq")
        # foreground_mask_seq = loadFromPreprocessDir(f'{trial_str}_foreground-mask-seq')
        segment_frame_seq = loadFromPreprocessDir(
            f'{trial_str}_segment-frame-seq')
        # block_segment_frame_seq = loadFromDetectionsDir(f'{trial_str}_block-segment-frame-seq')
        # skin_segment_frame_seq = loadFromDetectionsDir(f'{trial_str}_skin-segment-frame-seq')
        # color_label_frame_seq = loadFromDetectionsDir(f'{trial_str}_color-label-frame-seq')
        # class_label_frame_seq = loadFromDetectionsDir(f'{trial_str}_class-label-frame-seq')

        segment_features_seq, feature_frame_seq = utils.batchProcess(
            extractSegmentFeatures,
            rgb_frame_seq,
            segment_frame_seq,
            static_args=(classifier, ),
            unzip=True)

        saveVariable(segment_features_seq, f'{trial_str}_segment-features-seq')

        if display_summary_img:
            if utils.in_ipython_console():
                file_path = None
            else:
                trial_str = f"trial-{trial_id}"
                file_path = os.path.join(fig_dir,
                                         f'{trial_str}_best-frames.png')
            imageprocessing.displayImages(*rgb_frame_seq,
                                          *feature_frame_seq,
                                          num_rows=2,
                                          file_path=file_path)

        if write_video:
            video_dir = os.path.join(out_dir, 'detection-videos')
            if not os.path.exists(video_dir):
                os.makedirs(video_dir)
            fn = os.path.join(video_dir, f"{trial_str}.gif")
            writer = imageio.get_writer(fn, mode='I')
            for rgb_frame, feature_frame in zip(rgb_frame_seq,
                                                feature_frame_seq):
                feature_frame = feature_frame.astype(float)
                max_val = feature_frame.max()
                if max_val:
                    feature_frame = feature_frame / max_val
                feature_frame = np.stack((feature_frame, ) * 3, axis=-1)
                rgb_frame = img_as_float(rgb_frame)
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    image = img_as_ubyte(np.hstack((rgb_frame, feature_frame)))
                writer.append_data(image)
            writer.close()
def main(out_dir=None,
         data_dir=None,
         corpus_name=None,
         start_from=None,
         stop_at=None,
         display_summary_img=None,
         background_removal_options={},
         segment_image_options={}):

    out_dir = os.path.expanduser(out_dir)
    data_dir = os.path.expanduser(data_dir)

    fig_dir = os.path.join(out_dir, 'figures')
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)

    out_data_dir = os.path.join(out_dir, 'data')
    if not os.path.exists(out_data_dir):
        os.makedirs(out_data_dir)

    def loadFromWorkingDir(var_name):
        return joblib.load(os.path.join(data_dir, f"{var_name}.pkl"))

    def saveToWorkingDir(var, var_name):
        joblib.dump(var, os.path.join(out_data_dir, f"{var_name}.pkl"))

    # corpus = duplocorpus.DuploCorpus(corpus_name)

    trial_ids = utils.getUniqueIds(data_dir, prefix='trial=', to_array=True)

    camera_pose = render.camera_pose
    camera_params = render.intrinsic_matrix

    for seq_idx, trial_id in enumerate(trial_ids):

        if start_from is not None and seq_idx < start_from:
            continue

        if stop_at is not None and seq_idx > stop_at:
            break

        trial_str = f"trial={trial_id}"

        logger.info(
            f"Processing video {seq_idx + 1} / {len(trial_ids)}  (trial {trial_id})"
        )
        # task_id = corpus.getTaskIndex(trial_id)
        # goal_state = labels.constructGoalState(task_id)
        goal_state = None

        logger.info(f"  Loading data...")
        rgb_frame_seq = loadFromWorkingDir(f"{trial_str}_rgb-frame-seq")
        depth_frame_seq = loadFromWorkingDir(f"{trial_str}_depth-frame-seq")

        logger.info(f"  Removing background...")
        foreground_mask_seq, background_plane_seq = utils.batchProcess(
            videoprocessing.foregroundPixels,
            depth_frame_seq,
            static_args=(camera_params, camera_pose),
            static_kwargs=background_removal_options,
            unzip=True)
        foreground_mask_seq = np.stack(foreground_mask_seq)

        logger.info(f"  Segmenting foreground...")
        segment_frame_seq = utils.batchProcess(
            videoprocessing.segmentImage,
            rgb_frame_seq,
            depth_frame_seq,
            foreground_mask_seq,
            static_args=(goal_state, ),
            static_kwargs=segment_image_options)
        segment_frame_seq = np.stack(segment_frame_seq)

        foreground_mask_seq_no_ref_model = segment_frame_seq != 0

        logger.info(f"  Saving output...")
        saveToWorkingDir(background_plane_seq,
                         f'{trial_str}_background-plane-seq')
        saveToWorkingDir(foreground_mask_seq,
                         f'{trial_str}_foreground-mask-seq')
        saveToWorkingDir(segment_frame_seq, f'{trial_str}_segment-frame-seq')
        saveToWorkingDir(foreground_mask_seq_no_ref_model,
                         f'{trial_str}_foreground-mask-seq_no-ref-model')

        if display_summary_img:
            if utils.in_ipython_console():
                file_path = None
            else:
                trial_str = f"trial={trial_id}"
                file_path = os.path.join(fig_dir,
                                         f'{trial_str}_best-frames.png')
            imageprocessing.displayImages(*rgb_frame_seq,
                                          *depth_frame_seq,
                                          *segment_frame_seq,
                                          num_rows=3,
                                          file_path=file_path)