def estimateKeyframeFnSeqs(frame_fn_seqs, frame_timestamp_seqs, keyframe_timestamp_seqs): keyframe_fn_seqs = utils.batchProcess(estimateKeyframeFnSeq, frame_fn_seqs, frame_timestamp_seqs, keyframe_timestamp_seqs) return keyframe_fn_seqs
def main( out_dir=None, data_dir=None, model_name=None, gpu_dev_id=None, batch_size=None, learning_rate=None, independent_signals=None, model_params={}, cv_params={}, train_params={}, viz_params={}): data_dir = os.path.expanduser(data_dir) out_dir = os.path.expanduser(out_dir) fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) out_data_dir = os.path.join(out_dir, 'data') if not os.path.exists(out_data_dir): os.makedirs(out_data_dir) def loadVariable(var_name): return joblib.load(os.path.join(data_dir, f'{var_name}.pkl')) def saveVariable(var, var_name): joblib.dump(var, os.path.join(out_data_dir, f'{var_name}.pkl')) # Load data trial_ids = loadVariable('trial_ids') imu_sample_seqs = loadVariable('imu_sample_seqs') imu_label_seqs = loadVariable('imu_label_seqs') assembly_seqs = loadVariable('assembly_seqs') imu_sample_seqs = tuple(map(np.squeeze, imu_sample_seqs)) errors = utils.batchProcess(makeErrorSignal, imu_sample_seqs, assembly_seqs) state_labels = utils.batchProcess( makeLabelSignal, imu_sample_seqs, assembly_seqs, static_kwargs={'action': False} ) action_labels = utils.batchProcess( makeLabelSignal, imu_sample_seqs, assembly_seqs, static_kwargs={'action': True} ) plot_args = zip( trial_ids, imu_sample_seqs, errors, imu_label_seqs, state_labels, action_labels ) for args in plot_args: plotError(*args, fn=os.path.join(fig_dir, f"{args[0]}.png"))
def scoreFrames(pixel_classifier, rgb_frame_seq, segment_seq, score_kwargs=None): """ Score each video frame. Higher scores means more blocks pixels. Parameters ---------- pixel_classifier : models.FrameScorer A FrameScorer mixture model. This is used to classify each pixel in the RGB frames as blocks or as nuisance (hands or specularity). rgb_frame_seq : iterable(numpy array of float, shape (img_height, img_width, 3)) RGB frames to process. segment_seq : iterable(numpy array of int, shape (img_height, img_width)) Returns ------- scores : numpy array of float, shape (num_frames / sample_rate,) Average (log) likelihood ratio of 'blocks' class vs. 'skin' class """ if score_kwargs is None: score_kwargs = {'log_domain': True, 'hard_assign_clusters': True} fg_px_seq = utils.batchProcess( imageprocessing.foregroundPixels, rgb_frame_seq, segment_seq, static_kwargs={ 'image_transform': lambda x: skimage.color.rgb2hsv(skimage.img_as_float(x)), 'background_class_index': 0 }) scores = utils.batchProcess(pixel_classifier.averageSnr, fg_px_seq, static_kwargs=score_kwargs) return np.array(scores)
def loadDepthFrameSeq(frame_fn_seq, frame_timestamp_seq, stack_frames=False, **load_kwargs): if not any(frame_fn_seq): return tuple() mirror = checkSeqTime(frame_timestamp_seq) f = functools.partial(loadDepthFrame, mirror=mirror, **load_kwargs) depth_frame_seq = utils.batchProcess(f, frame_fn_seq) if stack_frames: depth_frame_seq = np.stack(depth_frame_seq) return depth_frame_seq
def scoreSamples(self, input_seq): """ Score a sample. Parameters ---------- input_seq : ??? Returns ------- scores : torch.Tensor of float, shape (batch_size, seq_len, num_classes) """ seq_scores = utils.batchProcess( super(LinearChainScorer, self).forward, input_seq) return torch.stack(seq_scores, dim=1)
def predictSeq(self, *feat_seqs, decode_method='MAP', viz_predictions=False, **kwargs): input_seq = zip(*feat_seqs) scores, component_poses = utils.batchProcess( self.obsv_model.forward, input_seq, static_kwargs={'return_poses': True}, unzip=True) scores = torch.stack(scores, dim=1) outputs = super().forward(scores, scores_as_input=True) pred_idxs = super().predict(outputs)[0] pred_states = self.integerizer.deintegerizeSequence(pred_idxs) pred_poses = tuple(component_poses[t][i] for t, i in enumerate(pred_idxs)) return pred_states, pred_idxs, None, scores, pred_poses
def loadRgbFrameSeq(frame_fn_seq, frame_timestamp_seq=None, stack_frames=False): """ Load a sequence of video frames. NOTE: Since the first videos were recorded when the camera was in 'mirror' mode, this function un-mirrors any videos recorded before the time of the last mirrored video. Parameters ---------- frame_fn_seq : iterable(string) The filename of each video frame. frame_timestamp_seq : numpy array of float, shape (num_frames,), optional The time each video frame was received by the data collection computer. This is used to decide if the video should be un-mirrored. If no value is passed, this function doesn't un-mirror the video. Returns ------- frame_seq : tuple( numpy array of float, shape (img_height, img_width) ) """ if not any(frame_fn_seq): return tuple() if frame_timestamp_seq is None: mirror = False else: mirror = checkSeqTime(frame_timestamp_seq) frame_seq = utils.batchProcess(loadRgbFrame, frame_fn_seq, static_kwargs={'mirror': mirror}) if stack_frames: frame_seq = np.stack(frame_seq) return frame_seq
def _create_data_scores(self, sample, W=None, return_poses=False, **kwargs): """ Parameters ---------- sample : (rgb_image, depth_image, segment_image, rgb_background, depth_background) **kwargs : optional Returns ------- error : """ if W is None: W = m.np.ones(2) kwargs['W'] = W assemblies = tuple(self.integerizer[i] for i in range(self.num_states)) errors, component_poses = utils.batchProcess(self.fitScene, assemblies, static_args=sample, static_kwargs=kwargs, unzip=True) error = (m.np.vstack(errors) @ W)[None, :] if not return_poses: return error return error, component_poses
def loadRgbFrameSeqs(frame_fn_seqs, frame_timestamp_seqs): return utils.batchProcess(loadRgbFrameSeq, frame_fn_seqs, frame_timestamp_seqs)
def unmirrorStateSeqs(timestamps, state_seqs, path_ids): return utils.batchProcess(unmirrorStateSeq, timestamps, state_seqs, path_ids)
def resampleFrameFnSeqs(frame_fn_seqs, frame_timestamp_seqs, seq_bounds): resampled_pairs = utils.batchProcess(resampleFrameFnSeq, frame_fn_seqs, frame_timestamp_seqs, seq_bounds) return tuple(zip(*resampled_pairs))
def main( out_dir=None, data_dir=None, use_vid_ids_from=None, output_data=None, magnitude_centering=None, resting_from_gt=None, remove_before_first_touch=None, include_signals=None, fig_type=None): data_dir = os.path.expanduser(data_dir) out_dir = os.path.expanduser(out_dir) logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt')) logger.info(f"Reading from: {data_dir}") logger.info(f"Writing to: {out_dir}") fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) out_data_dir = os.path.join(out_dir, 'data') if not os.path.exists(out_data_dir): os.makedirs(out_data_dir) def loadAll(seq_ids, var_name, from_dir=data_dir, prefix='trial='): all_data = tuple( utils.loadVariable(f"{prefix}{seq_id}_{var_name}", from_dir) for seq_id in seq_ids ) return all_data def saveVariable(var, var_name, to_dir=out_data_dir): utils.saveVariable(var, var_name, to_dir) if fig_type is None: fig_type = 'multi' # Load data if use_vid_ids_from is None: trial_ids = utils.getUniqueIds(data_dir, prefix='trial=', to_array=True) else: use_vid_ids_from = os.path.expanduser(use_vid_ids_from) trial_ids = utils.getUniqueIds(use_vid_ids_from, prefix='trial-', to_array=True) accel_seqs = loadAll(trial_ids, 'accel-samples.pkl') gyro_seqs = loadAll(trial_ids, 'gyro-samples.pkl') action_seqs = loadAll(trial_ids, 'action-seq.pkl') rgb_timestamp_seqs = loadAll(trial_ids, 'rgb-frame-timestamp-seq.pkl') def validate_imu(seqs): def is_valid(d): return not any(np.isnan(x).any() for x in d.values()) return np.array([is_valid(d) for d in seqs]) imu_is_valid = validate_imu(accel_seqs) & validate_imu(gyro_seqs) logger.info( f"Ignoring {(~imu_is_valid).sum()} IMU sequences with NaN-valued samples " f"(of {len(imu_is_valid)} total)" ) def chooseValid(seq): return tuple(x for x, is_valid in zip(seq, imu_is_valid) if is_valid) trial_ids = np.array(list(chooseValid(trial_ids))) accel_seqs = chooseValid(accel_seqs) gyro_seqs = chooseValid(gyro_seqs) action_seqs = chooseValid(action_seqs) rgb_timestamp_seqs = chooseValid(rgb_timestamp_seqs) vocab = [] metadata = utils.loadMetadata(data_dir, rows=trial_ids) utils.saveMetadata(metadata, out_data_dir) utils.saveVariable(vocab, 'vocab', out_data_dir) def norm(x): norm = np.linalg.norm(imu.getImuSamples(x), axis=1)[:, None] return norm accel_mag_seqs = tuple(map(lambda x: dictToArray(x, transform=norm), accel_seqs)) gyro_mag_seqs = tuple(map(lambda x: dictToArray(x, transform=norm), gyro_seqs)) imu_timestamp_seqs = utils.batchProcess(makeTimestamps, accel_seqs, gyro_seqs) if remove_before_first_touch: before_first_touch_seqs = utils.batchProcess( beforeFirstTouch, action_seqs, rgb_timestamp_seqs, imu_timestamp_seqs ) num_ignored = sum(b is None for b in before_first_touch_seqs) logger.info( f"Ignoring {num_ignored} sequences without first-touch annotations " f"(of {len(before_first_touch_seqs)} total)" ) trials_missing_first_touch = [ i for b, i in zip(before_first_touch_seqs, trial_ids) if b is None ] logger.info(f"Trials without first touch: {trials_missing_first_touch}") def clip(signal, bool_array): return signal[~bool_array, ...] accel_mag_seqs = tuple( clip(signal, b) for signal, b in zip(accel_mag_seqs, before_first_touch_seqs) if b is not None ) gyro_mag_seqs = tuple( clip(signal, b) for signal, b in zip(gyro_mag_seqs, before_first_touch_seqs) if b is not None ) imu_timestamp_seqs = tuple( clip(signal, b) for signal, b in zip(imu_timestamp_seqs, before_first_touch_seqs) if b is not None ) trial_ids = tuple( x for x, b in zip(trial_ids, before_first_touch_seqs) if b is not None ) action_seqs = tuple( x for x, b in zip(action_seqs, before_first_touch_seqs) if b is not None ) rgb_timestamp_seqs = tuple( x for x, b in zip(rgb_timestamp_seqs, before_first_touch_seqs) if b is not None ) assembly_seqs = utils.batchProcess( parseActions, action_seqs, rgb_timestamp_seqs, imu_timestamp_seqs ) if output_data == 'components': accel_feat_seqs = accel_mag_seqs gyro_feat_seqs = gyro_mag_seqs unique_components = {frozenset(): 0} imu_label_seqs = zip( *tuple( labels.componentLabels(*args, unique_components) for args in zip(action_seqs, rgb_timestamp_seqs, imu_timestamp_seqs) ) ) saveVariable(unique_components, 'unique_components') elif output_data == 'pairwise components': imu_label_seqs = utils.batchProcess( labels.pairwiseComponentLabels, assembly_seqs, static_kwargs={'lower_tri_only': True, 'include_action_labels': False} ) accel_feat_seqs = tuple(map(imu.pairwiseFeats, accel_mag_seqs)) gyro_feat_seqs = tuple(map(imu.pairwiseFeats, gyro_mag_seqs)) else: raise AssertionError() signals = {'accel': accel_feat_seqs, 'gyro': gyro_feat_seqs} if include_signals is None: include_signals = tuple(signals.keys()) signals = tuple(signals[key] for key in include_signals) imu_feature_seqs = tuple(np.stack(x, axis=-1).squeeze(axis=-1) for x in zip(*signals)) video_seqs = tuple(zip(imu_feature_seqs, imu_label_seqs, trial_ids)) imu.plot_prediction_eg(video_seqs, fig_dir, fig_type=fig_type, output_data=output_data) video_seqs = tuple( zip(assembly_seqs, imu_feature_seqs, imu_timestamp_seqs, imu_label_seqs, trial_ids) ) for assembly_seq, feature_seq, timestamp_seq, label_seq, trial_id in video_seqs: id_string = f"trial={trial_id}" saveVariable(assembly_seq, f'{id_string}_assembly-seq') saveVariable(feature_seq, f'{id_string}_feature-seq') saveVariable(timestamp_seq, f'{id_string}_timestamp-seq') saveVariable(label_seq, f'{id_string}_label-seq')
def fitScene(self, rgb_image, depth_image, segment_image, rgb_background, depth_background, assembly, camera_params=None, camera_pose=None, block_colors=None, W=None, error_func=None, bias=None, scale=None, ignore_background=False, legacy_mode=False): """ Fit a spatial assembly and a background plane to an RGBD image. Parameters ---------- Returns ------- """ if camera_params is None: camera_params = self.camera_params if camera_pose is None: camera_pose = self.camera_pose if block_colors is None: block_colors = self.block_colors if W is None: W = m.np.ones(2) if error_func is None: error_func = sse if bias is None: bias = m.np.zeros(2) if scale is None: scale = m.np.ones(2) # Estimate initial poses from each detected image segment segment_labels = m.np.unique(segment_image[segment_image != 0]) num_segments = len(segment_labels) num_components = len(assembly.connected_components.keys()) object_masks = tuple(segment_image == i for i in segment_labels) object_poses_est = utils.batchProcess( imageprocessing.estimateSegmentPose, object_masks, static_args=(camera_params, camera_pose, depth_image), static_kwargs={'estimate_orientation': False}) num_components = len(assembly.connected_components) num_segments = len(segment_labels) # Find the best pose for each component of the spatial assembly, assuming # we try to match it to a particular segment. errors = m.np.zeros((num_components, num_segments)) poses = m.np.zeros((num_components, num_segments, 3, 4)) for component_index, component_key in enumerate( assembly.connected_components.keys()): for segment_index in range(num_segments): object_mask = object_masks[segment_index] init_pose = object_poses_est[segment_index] error, pose = self.refineComponentPose( rgb_image, depth_image, segment_image, assembly, rgb_background=rgb_background, depth_background=depth_background, component_index=component_key, init_pose=init_pose, object_mask=object_mask, W=W, error_func=error_func, bias=bias, scale=scale) errors[component_index, segment_index] = error poses[component_index, segment_index] = pose # Match components to segments by solving the linear sum assignment problem # (ie data association) # FIXME: set greedy=False _, component_poses, _ = matchComponentsToSegments(errors, poses, greedy=True) # Render the complete final scene rgb_render, depth_render, label_render = self.renderScene( assembly, component_poses, depth_background=depth_background, rgb_background=rgb_background) # Subtract background from all depth images. This gives distances relative # to the background plane instead of the camera, so RGB and depth models # are closer to the same scale. depth_render = depth_render - depth_background depth_image = depth_image - depth_background # Compute the total error of the final scene image_background = segment_image == 0 render_background = label_render == 0 rgb_error = error_func(rgb_image, rgb_render, bias=bias[0], scale=scale[0], true_mask=image_background, est_mask=render_background) depth_error = error_func(depth_image, depth_render, bias=bias[1], scale=scale[1], true_mask=image_background, est_mask=render_background) error_vec = m.np.array([rgb_error, depth_error]) return error_vec, component_poses
def segmentVideo(pixel_classifier, goal_state, rgb_frame_seq, depth_frame_seq, filter_segments=True, **segmentimage_kwargs): """ Segment each frame in an RGB-D video and remove nuisance objects This pre-processing procedure consists of the following steps: 1. Identify the scene background a. Fit a plane to the tabletop b. Mask some areas in the image 2. Segment the foreground image a. Find connected components in the foreground b. Remove the segment that best fits the goal state template image c. Segment the remaining foreground into SLIC superpixels d. Intersect the foreground and SLIC segmentations 3. Filter image segments (optional) a. Remove segments smaller than 64 pixels b. Classify nuisance pixels from RGB and depth images c. Remove segments that are mostly nuisance pixels Parameters ---------- pixel_classifier : models.FrameScorer A FrameScorer mixture model. This is used to classify each pixel in the RGB frames as blocks or as nuisance (hands or specularity). goal_state : blockassembly.BlockAssembly The builder's goal state for this video. This is used to match and remove the reference model from the frame. rgb_frame_seq : iterable(numpy array of float, shape (img_height, img_width, 3)) RGB frames to process. depth_frame_seq : iterable(numpy array of float, shape (img_height, img_width)) Depth frames to process. filter_segments : bool, optional If True, segments are removed if they contain too many nuisance pixels or if they are too close to the camera. Default is True. **segmentimage_kwargs: optional Any extra keyword arguments are passed to ``videoprocessing.segmentImage``. Returns ------- segment_seq : tuple(numpy array of int, shape (img_height, img_width)) Each image is a segmentation of the corresponding (rgb, depth) pair. The background is assigned label 0, and foreground segments are enumerated sequentially from 1. px_class_seq : tuple(numpy array of int, shape (img_height, img_width)) Each image contains `frame_scorer`'s predictions for the pixels in the corresponding RGB image. background_model_seq : tuple(geometry.Plane) Each element is the plane that best fits the corresponding depth image. This plane should represent the tabletop, which supports the planar scene recorded by the camera (if we're lucky). """ # Detect the background foreground_mask_seq, background_plane_seq = utils.batchProcess( foregroundPixels, depth_frame_seq, unzip=True) # Create an initial image segmentation segment_seq = utils.batchProcess(segmentImage, rgb_frame_seq, depth_frame_seq, background_plane_seq, foreground_mask_seq, static_args=(goal_state.copy(), ), static_kwargs=segmentimage_kwargs) # Identify nuisance or outlying objects in images pixel_class_seq = utils.batchProcess(pixelClasses, rgb_frame_seq, segment_seq, static_args=(pixel_classifier, )) depth_class_seq = utils.batchProcess(depthClasses, depth_frame_seq) # Remove segments that are mostly nuisance pixels if filter_segments: segment_seq = utils.batchProcess(filterSegments, segment_seq, pixel_class_seq, depth_class_seq) return segment_seq, pixel_class_seq, background_plane_seq
def main(out_dir=None, scores_dir=None, preprocessed_data_dir=None, keyframe_model_name=None, subsample_period=None, window_size=None, corpus_name=None, default_annotator=None, cv_scheme=None, max_trials_per_fold=None, model_name=None, numeric_backend=None, gpu_dev_id=None, visualize=False, model_config={}, camera_params_config={}): out_dir = os.path.expanduser(out_dir) scores_dir = os.path.expanduser(scores_dir) preprocessed_data_dir = os.path.expanduser(preprocessed_data_dir) m.set_backend('numpy') def loadFromWorkingDir(var_name): return joblib.load(os.path.join(scores_dir, f"{var_name}.pkl")) def saveToWorkingDir(var, var_name): joblib.dump(var, os.path.join(out_dir, f"{var_name}.pkl")) # Load camera parameters from external file and add them to model config kwargs model_config['init_kwargs'].update( render.loadCameraParams(**camera_params_config, as_dict=True)) trial_ids = joblib.load( os.path.join(preprocessed_data_dir, 'trial_ids.pkl')) corpus = duplocorpus.DuploCorpus(corpus_name) assembly_seqs = tuple( labels.parseLabelSeq( corpus.readLabels(trial_id, default_annotator)[0]) for trial_id in trial_ids) logger.info(f"Selecting keyframes...") keyframe_idx_seqs = [] rgb_keyframe_seqs = [] depth_keyframe_seqs = [] seg_keyframe_seqs = [] background_keyframe_seqs = [] assembly_keyframe_seqs = [] for seq_idx, trial_id in enumerate(trial_ids): trial_str = f"trial-{trial_id}" rgb_frame_seq = loadFromWorkingDir(f'{trial_str}_rgb-frame-seq') depth_frame_seq = loadFromWorkingDir(f'{trial_str}_depth-frame-seq') segment_seq = loadFromWorkingDir(f'{trial_str}_segment-seq') frame_scores = loadFromWorkingDir(f'{trial_str}_frame-scores') background_plane_seq = loadFromWorkingDir( f'{trial_str}_background-plane-seq') assembly_seq = assembly_seqs[seq_idx] # FIXME: Get the real frame index numbers instead of approximating assembly_seq[-1].end_idx = len(rgb_frame_seq) * subsample_period keyframe_idxs = videoprocessing.selectSegmentKeyframes( frame_scores, score_thresh=0, prepend_first=True) selectKeyframes = functools.partial(utils.select, keyframe_idxs) rgb_keyframe_seq = selectKeyframes(rgb_frame_seq) depth_keyframe_seq = selectKeyframes(depth_frame_seq) seg_keyframe_seq = selectKeyframes(segment_seq) background_keyframe_seq = selectKeyframes(background_plane_seq) # FIXME: Get the real frame index numbers instead of approximating keyframe_idxs_orig = keyframe_idxs * subsample_period assembly_keyframe_seq = labels.resampleStateSeq( keyframe_idxs_orig, assembly_seq) # Store all keyframe sequences in memory keyframe_idx_seqs.append(keyframe_idxs) rgb_keyframe_seqs.append(rgb_keyframe_seq) depth_keyframe_seqs.append(depth_keyframe_seq) seg_keyframe_seqs.append(seg_keyframe_seq) background_keyframe_seqs.append(background_keyframe_seq) assembly_keyframe_seqs.append(assembly_keyframe_seq) # Split into train and test sets if cv_scheme == 'leave one out': num_seqs = len(trial_ids) cv_folds = [] for i in range(num_seqs): test_fold = (i, ) train_fold = tuple(range(0, i)) + tuple(range(i + 1, num_seqs)) cv_folds.append((train_fold, test_fold)) elif cv_scheme == 'train on child': child_corpus = duplocorpus.DuploCorpus('child') child_trial_ids = utils.loadVariable('trial_ids', 'preprocess-all-data', 'child') child_assembly_seqs = [ labels.parseLabelSeq( child_corpus.readLabels(trial_id, 'Cathryn')[0]) for trial_id in child_trial_ids ] num_easy = len(assembly_keyframe_seqs) num_child = len(child_assembly_seqs) cv_folds = [(tuple(range(num_easy, num_easy + num_child)), tuple(range(num_easy)))] assembly_keyframe_seqs = assembly_keyframe_seqs + child_assembly_seqs rgb_keyframe_seqs = tuple( tuple( imageprocessing.saturateImage(rgb_image, background_mask=segment_image == 0) for rgb_image, segment_image in zip(rgb_frame_seq, seg_frame_seq)) for rgb_frame_seq, seg_frame_seq in zip(rgb_keyframe_seqs, seg_keyframe_seqs)) depth_keyframe_seqs = tuple( tuple(depth_image.astype(float) for depth_image in depth_frame_seq) for depth_frame_seq in depth_keyframe_seqs) device = torchutils.selectDevice(gpu_dev_id) m.set_backend('torch') m.set_default_device(device) assembly_keyframe_seqs = tuple( tuple(a.to(device=device, in_place=False) for a in seq) for seq in assembly_keyframe_seqs) assembly_seqs = tuple( tuple(a.to(device=device, in_place=False) for a in seq) for seq in assembly_seqs) rgb_keyframe_seqs = tuple( tuple(m.np.array(frame, dtype=torch.float) for frame in rgb_frame_seq) for rgb_frame_seq in rgb_keyframe_seqs) depth_keyframe_seqs = tuple( tuple( m.np.array(frame, dtype=torch.float) for frame in depth_frame_seq) for depth_frame_seq in depth_keyframe_seqs) seg_keyframe_seqs = tuple( tuple(m.np.array(frame, dtype=torch.int) for frame in seg_frame_seq) for seg_frame_seq in seg_keyframe_seqs) num_cv_folds = len(cv_folds) saveToWorkingDir(cv_folds, f'cv-folds') for fold_index, (train_idxs, test_idxs) in enumerate(cv_folds): logger.info(f"CV FOLD {fold_index + 1} / {num_cv_folds}") # Initialize and train model utils.validateCvFold(train_idxs, test_idxs) selectTrain = functools.partial(utils.select, train_idxs) train_assembly_seqs = selectTrain(assembly_keyframe_seqs) model = getattr(models, model_name)(**model_config['init_kwargs']) logger.info( f" Training {model_name} on {len(train_idxs)} sequences...") model.fit(train_assembly_seqs, **model_config['fit_kwargs']) logger.info( f' Model trained on {model.num_states} unique assembly states') # saveToWorkingDir(model, f'model-fold{fold_index}') # Decode on the test set selectTest = functools.partial(utils.select, test_idxs) test_trial_ids = selectTest(trial_ids) test_rgb_keyframe_seqs = selectTest(rgb_keyframe_seqs) test_depth_keyframe_seqs = selectTest(depth_keyframe_seqs) test_seg_keyframe_seqs = selectTest(seg_keyframe_seqs) test_background_keyframe_seqs = selectTest(background_keyframe_seqs) test_assembly_keyframe_seqs = selectTest(assembly_keyframe_seqs) test_assembly_seqs = selectTest(assembly_seqs) logger.info(f" Testing model on {len(test_idxs)} sequences...") for i, trial_id in enumerate(test_trial_ids): if max_trials_per_fold is not None and i >= max_trials_per_fold: break rgb_frame_seq = test_rgb_keyframe_seqs[i] depth_frame_seq = test_depth_keyframe_seqs[i] seg_frame_seq = test_seg_keyframe_seqs[i] background_plane_seq = test_background_keyframe_seqs[i] true_assembly_seq = test_assembly_keyframe_seqs[i] true_assembly_seq_orig = test_assembly_seqs[i] rgb_background_seq, depth_background_seq = utils.batchProcess( model.renderPlane, background_plane_seq, unzip=True) logger.info(f' Decoding video {trial_id}...') start_time = time.process_time() out = model.predictSeq(rgb_frame_seq, depth_frame_seq, seg_frame_seq, rgb_background_seq, depth_background_seq, **model_config['decode_kwargs']) pred_assembly_seq, pred_idx_seq, max_log_probs, log_likelihoods, poses_seq = out end_time = time.process_time() logger.info(utils.makeProcessTimeStr(end_time - start_time)) num_correct, num_total = metrics.numberCorrect( true_assembly_seq, pred_assembly_seq) logger.info(f' ACCURACY: {num_correct} / {num_total}') num_correct, num_total = metrics.numberCorrect( true_assembly_seq, pred_assembly_seq, ignore_empty_true=True) logger.info(f' RECALL: {num_correct} / {num_total}') num_correct, num_total = metrics.numberCorrect( true_assembly_seq, pred_assembly_seq, ignore_empty_pred=True) logger.info(f' PRECISION: {num_correct} / {num_total}') # Save intermediate results logger.info(f"Saving output...") saveToWorkingDir(segment_seq, f'segment_seq-{trial_id}') saveToWorkingDir(true_assembly_seq_orig, f'true_state_seq_orig-{trial_id}') saveToWorkingDir(true_assembly_seq, f'true_state_seq-{trial_id}') saveToWorkingDir(pred_assembly_seq, f'pred_state_seq-{trial_id}') saveToWorkingDir(poses_seq, f'poses_seq-{trial_id}') saveToWorkingDir(background_plane_seq, f'background_plane_seq-{trial_id}') saveToWorkingDir(max_log_probs, f'max_log_probs-{trial_id}') saveToWorkingDir(log_likelihoods, f'log_likelihoods-{trial_id}') # Save figures if visualize: rgb_rendered_seq, depth_rendered_seq, label_rendered_seq = utils.batchProcess( model.renderScene, pred_assembly_seq, poses_seq, rgb_background_seq, depth_background_seq, unzip=True, static_kwargs={'as_numpy': True}) if utils.in_ipython_console(): file_path = None else: trial_str = f"trial-{trial_id}" file_path = os.path.join(out_dir, f'{trial_str}_best-frames.png') rgb_frame_seq = tuple(img.cpu().numpy() for img in rgb_frame_seq) imageprocessing.displayImages(*rgb_frame_seq, *rgb_rendered_seq, num_rows=2, file_path=file_path)
def refineComponentPose(self, rgb_image, depth_image, segment_image, assembly, component_index=None, init_pose=None, theta_samples=None, object_mask=None, W=None, error_func=None, bias=None, scale=None, **render_kwargs): """ Refine a component's initial pose estimate using a simple registration routine. Parameters ---------- Returns ------- best_error : float best_pose : m.np.array of float, shape (3, 4) """ if error_func is None: error_func = sse if W is None: W = m.np.ones(2) if theta_samples is None: theta_samples = m.np.linspace(0, 1.5 * m.np.pi, 4) R_init, t_init = init_pose # pose_candidates = tuple( # (geometry.rotationMatrix(z_angle=theta, x_angle=0) @ R_init, t_init) # for theta in theta_samples # ) rotation_candidates = geometry.zRotations(theta_samples) @ R_init pose_candidates = tuple((R, t_init) for R in rotation_candidates) rgb_renders, depth_renders, label_renders = utils.batchProcess( self.renderComponent, pose_candidates, static_args=(assembly, component_index), static_kwargs=render_kwargs, unzip=True) # Subtract background from all depth images. This gives distances relative # to the background plane instead of the camera, so RGB and depth models # are closer to the same scale. if 'depth_background' in render_kwargs: depth_renders = tuple(d - render_kwargs['depth_background'] for d in depth_renders) depth_image = depth_image - render_kwargs['depth_background'] object_background_mask = ~object_mask label_background_masks = tuple(label_render == 0 for label_render in label_renders) rgb_errors = [ error_func(rgb_image, rgb_render, true_mask=object_background_mask, est_mask=label_mask, bias=bias[0], scale=scale[0]) for rgb_render, label_mask in zip( rgb_renders, label_background_masks) ] depth_errors = [ error_func(depth_image, depth_render, true_mask=object_background_mask, est_mask=label_mask, bias=bias[1], scale=scale[1]) for depth_render, label_mask in zip( depth_renders, label_background_masks) ] errors = m.np.column_stack( (m.np.array(rgb_errors), m.np.array(depth_errors))) @ W best_idx = errors.argmin() best_error = errors[best_idx] best_pose = pose_candidates[best_idx] best_pose = geometry.homogeneousMatrix(pose_candidates[best_idx]) return best_error, best_pose
def main(out_dir=None, data_dir=None, preprocess_dir=None, classifier_fn=None, display_summary_img=None, write_video=None, start_from=None, stop_after=None): if start_from is None: start_from = 0 if stop_after is None: stop_after = float("Inf") data_dir = os.path.expanduser(data_dir) preprocess_dir = os.path.expanduser(preprocess_dir) out_dir = os.path.expanduser(out_dir) classifier_fn = os.path.expanduser(classifier_fn) fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) out_data_dir = os.path.join(out_dir, 'data') if not os.path.exists(out_data_dir): os.makedirs(out_data_dir) def loadFromDataDir(var_name): return joblib.load(os.path.join(data_dir, f'{var_name}.pkl')) def loadFromPreprocessDir(var_name): return joblib.load(os.path.join(preprocess_dir, f'{var_name}.pkl')) def saveVariable(var, var_name): joblib.dump(var, os.path.join(out_data_dir, f'{var_name}.pkl')) classifier = joblib.load(classifier_fn) trial_ids = getUniqueTrialIds(preprocess_dir) for i, trial_id in enumerate(trial_ids): if i < start_from: continue if i > stop_after: break trial_str = f"trial-{trial_id}" logger.info( f"Processing video {i + 1} / {len(trial_ids)} (trial {trial_id})") rgb_frame_seq = loadFromDataDir(f"{trial_str}_rgb-frame-seq") # depth_frame_seq = loadFromDataDir(f"{trial_str}_depth-frame-seq") # foreground_mask_seq = loadFromPreprocessDir(f'{trial_str}_foreground-mask-seq') segment_frame_seq = loadFromPreprocessDir( f'{trial_str}_segment-frame-seq') # block_segment_frame_seq = loadFromDetectionsDir(f'{trial_str}_block-segment-frame-seq') # skin_segment_frame_seq = loadFromDetectionsDir(f'{trial_str}_skin-segment-frame-seq') # color_label_frame_seq = loadFromDetectionsDir(f'{trial_str}_color-label-frame-seq') # class_label_frame_seq = loadFromDetectionsDir(f'{trial_str}_class-label-frame-seq') segment_features_seq, feature_frame_seq = utils.batchProcess( extractSegmentFeatures, rgb_frame_seq, segment_frame_seq, static_args=(classifier, ), unzip=True) saveVariable(segment_features_seq, f'{trial_str}_segment-features-seq') if display_summary_img: if utils.in_ipython_console(): file_path = None else: trial_str = f"trial-{trial_id}" file_path = os.path.join(fig_dir, f'{trial_str}_best-frames.png') imageprocessing.displayImages(*rgb_frame_seq, *feature_frame_seq, num_rows=2, file_path=file_path) if write_video: video_dir = os.path.join(out_dir, 'detection-videos') if not os.path.exists(video_dir): os.makedirs(video_dir) fn = os.path.join(video_dir, f"{trial_str}.gif") writer = imageio.get_writer(fn, mode='I') for rgb_frame, feature_frame in zip(rgb_frame_seq, feature_frame_seq): feature_frame = feature_frame.astype(float) max_val = feature_frame.max() if max_val: feature_frame = feature_frame / max_val feature_frame = np.stack((feature_frame, ) * 3, axis=-1) rgb_frame = img_as_float(rgb_frame) with warnings.catch_warnings(): warnings.simplefilter("ignore") image = img_as_ubyte(np.hstack((rgb_frame, feature_frame))) writer.append_data(image) writer.close()
def main(out_dir=None, data_dir=None, corpus_name=None, start_from=None, stop_at=None, display_summary_img=None, background_removal_options={}, segment_image_options={}): out_dir = os.path.expanduser(out_dir) data_dir = os.path.expanduser(data_dir) fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) out_data_dir = os.path.join(out_dir, 'data') if not os.path.exists(out_data_dir): os.makedirs(out_data_dir) def loadFromWorkingDir(var_name): return joblib.load(os.path.join(data_dir, f"{var_name}.pkl")) def saveToWorkingDir(var, var_name): joblib.dump(var, os.path.join(out_data_dir, f"{var_name}.pkl")) # corpus = duplocorpus.DuploCorpus(corpus_name) trial_ids = utils.getUniqueIds(data_dir, prefix='trial=', to_array=True) camera_pose = render.camera_pose camera_params = render.intrinsic_matrix for seq_idx, trial_id in enumerate(trial_ids): if start_from is not None and seq_idx < start_from: continue if stop_at is not None and seq_idx > stop_at: break trial_str = f"trial={trial_id}" logger.info( f"Processing video {seq_idx + 1} / {len(trial_ids)} (trial {trial_id})" ) # task_id = corpus.getTaskIndex(trial_id) # goal_state = labels.constructGoalState(task_id) goal_state = None logger.info(f" Loading data...") rgb_frame_seq = loadFromWorkingDir(f"{trial_str}_rgb-frame-seq") depth_frame_seq = loadFromWorkingDir(f"{trial_str}_depth-frame-seq") logger.info(f" Removing background...") foreground_mask_seq, background_plane_seq = utils.batchProcess( videoprocessing.foregroundPixels, depth_frame_seq, static_args=(camera_params, camera_pose), static_kwargs=background_removal_options, unzip=True) foreground_mask_seq = np.stack(foreground_mask_seq) logger.info(f" Segmenting foreground...") segment_frame_seq = utils.batchProcess( videoprocessing.segmentImage, rgb_frame_seq, depth_frame_seq, foreground_mask_seq, static_args=(goal_state, ), static_kwargs=segment_image_options) segment_frame_seq = np.stack(segment_frame_seq) foreground_mask_seq_no_ref_model = segment_frame_seq != 0 logger.info(f" Saving output...") saveToWorkingDir(background_plane_seq, f'{trial_str}_background-plane-seq') saveToWorkingDir(foreground_mask_seq, f'{trial_str}_foreground-mask-seq') saveToWorkingDir(segment_frame_seq, f'{trial_str}_segment-frame-seq') saveToWorkingDir(foreground_mask_seq_no_ref_model, f'{trial_str}_foreground-mask-seq_no-ref-model') if display_summary_img: if utils.in_ipython_console(): file_path = None else: trial_str = f"trial={trial_id}" file_path = os.path.join(fig_dir, f'{trial_str}_best-frames.png') imageprocessing.displayImages(*rgb_frame_seq, *depth_frame_seq, *segment_frame_seq, num_rows=3, file_path=file_path)