def saveFrames(indices, label): best_rgb = rgb_frame_seq[indices] imageprocessing.displayImages( *best_rgb, num_rows=1, file_path=os.path.join(fig_dir, f'{trial_str}_best-frames-{label}.png'))
def plotBatches(self, io_batches, fig_dir, dataset=None, images_per_fig=None): for i, batch in enumerate(io_batches): preds, scores, inputs, labels, seq_id = batch batch_size = preds.shape[0] inputs = np.moveaxis(inputs.cpu().numpy(), -3, -1) inputs[inputs > 1] = 1 flat_inputs = np.stack(tuple( np.hstack(tuple(c for c in crops)) for crops in inputs), axis=0) scores = scores.view(batch_size, *dataset.target_shape).cpu().numpy() preds = preds.view(batch_size, *dataset.target_shape).cpu().numpy() labels = labels.view(batch_size, *dataset.target_shape).cpu().numpy() imageprocessing.displayImages(*flat_inputs, *scores, *preds, *labels, num_rows=4, file_path=os.path.join( fig_dir, f"batch={i}.png"))
def plot_topk(model, test_io_history, num_disp_imgs, file_path): inputs = np.moveaxis( torch.cat(tuple(batches[2][0] for batches in test_io_history)).numpy(), 1, -1) outputs = torch.cat(tuple(batches[1][0] for batches in test_io_history)) if inputs.shape[0] > num_disp_imgs: idxs = np.arange(inputs.shape[0]) np.random.shuffle(idxs) idxs = np.sort(idxs[:num_disp_imgs]) else: idxs = slice(None, None, None) inputs = inputs[idxs] outputs = outputs[idxs] def make_templates(preds): pred_templates = np.moveaxis(model.templates[preds, 0].numpy(), 1, -1) pred_templates[pred_templates > 1] = 1 return pred_templates k = 5 __, topk_preds = torch.topk(outputs, k, dim=-1) topk_preds = topk_preds.transpose(0, 1).contiguous().view(-1) topk_templates = make_templates(topk_preds) imageprocessing.displayImages(*inputs, *topk_templates, num_rows=1 + k, file_path=file_path)
def viz_model_params(model, templates_dir): templates = model.templates.cpu().numpy() # FIXME: SOME RENDERED IMAGES HAVE CHANNEL VALUES > 1.0 templates[templates > 1] = 1 for i, assembly_templates in enumerate(templates): imageprocessing.displayImages(*assembly_templates, num_rows=6, figsize=(15, 15), file_path=os.path.join( templates_dir, f"{i}.png"))
def saveFrames(indices, label): best_rgb = rgb_frame_seq[indices] best_seg = segment_frame_seq[indices] rgb_quantized = np.stack( tuple( videoprocessing.quantizeImage(keyframe_model, rgb_img, segment_img) for rgb_img, segment_img in zip(best_rgb, best_seg))) imageprocessing.displayImages( *best_rgb, *best_seg, *rgb_quantized, num_rows=3, file_path=os.path.join(fig_dir, f'{trial_str}_best-frames-{label}.png'))
def plotBatches(self, io_batches, fig_dir, dataset=None): for i, batch in enumerate(io_batches): preds, scores, inputs, labels, seq_id = batch inputs = np.moveaxis(inputs.cpu().numpy(), 1, -1) inputs[inputs > 1] = 1 scores = np.moveaxis(scores.cpu().numpy(), 1, -1) scores[scores > 1] = 1 imageprocessing.displayImages(*inputs, *scores, num_rows=2, file_path=os.path.join( fig_dir, f"{i}.png"))
def viz(self, inputs, outputs): self._index += 1 i = self._index inputs = np.moveaxis(inputs.cpu().numpy(), 1, -1) inputs[inputs > 1] = 1 outputs = np.moveaxis(outputs.detach().cpu().numpy(), 1, -1) outputs[outputs > 1] = 1 imageprocessing.displayImages(*inputs, *outputs, num_rows=2, file_path=os.path.join( self._debug_fig_dir, f"{i}.png"))
def plotBatches(self, io_batches, fig_dir, dataset=None, images_per_fig=None): for i, batch in enumerate(io_batches): preds, scores, inputs, labels, seq_id = batch num_batch = preds.shape[0] if images_per_fig is None: images_per_fig = num_batch num_batches = math.ceil(num_batch / images_per_fig) for j in range(num_batches): start = j * num_batches end = start + images_per_fig b_scores = scores[start:end] b_preds = preds[start:end] b_labels = labels[start:end] b_inputs = inputs[start:end] b_size = b_scores.shape[0] if not b_size: continue b_inputs = np.moveaxis(b_inputs.cpu().numpy(), 1, -1) b_inputs[b_inputs > 1] = 1 b_scores = b_scores.view(b_size, *dataset.target_shape).cpu().numpy() b_preds = b_preds.view(b_size, *dataset.target_shape).cpu().numpy() b_labels = b_labels.view(b_size, *dataset.target_shape).cpu().numpy() imageprocessing.displayImages(*b_inputs, *b_scores, *b_preds, *b_labels, num_rows=4, file_path=os.path.join( fig_dir, f"batch({i},{j}).png"))
def main(out_dir=None, data_dir=None, preprocess_dir=None, classifier_fn=None, display_summary_img=None, write_video=None, start_from=None, stop_after=None): if start_from is None: start_from = 0 if stop_after is None: stop_after = float("Inf") data_dir = os.path.expanduser(data_dir) preprocess_dir = os.path.expanduser(preprocess_dir) out_dir = os.path.expanduser(out_dir) classifier_fn = os.path.expanduser(classifier_fn) fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) out_data_dir = os.path.join(out_dir, 'data') if not os.path.exists(out_data_dir): os.makedirs(out_data_dir) def loadFromDataDir(var_name): return joblib.load(os.path.join(data_dir, f'{var_name}.pkl')) def loadFromPreprocessDir(var_name): return joblib.load(os.path.join(preprocess_dir, f'{var_name}.pkl')) def saveVariable(var, var_name): joblib.dump(var, os.path.join(out_data_dir, f'{var_name}.pkl')) classifier = joblib.load(classifier_fn) trial_ids = getUniqueTrialIds(preprocess_dir) for i, trial_id in enumerate(trial_ids): if i < start_from: continue if i > stop_after: break trial_str = f"trial-{trial_id}" logger.info( f"Processing video {i + 1} / {len(trial_ids)} (trial {trial_id})") rgb_frame_seq = loadFromDataDir(f"{trial_str}_rgb-frame-seq") # depth_frame_seq = loadFromDataDir(f"{trial_str}_depth-frame-seq") # foreground_mask_seq = loadFromPreprocessDir(f'{trial_str}_foreground-mask-seq') segment_frame_seq = loadFromPreprocessDir( f'{trial_str}_segment-frame-seq') # block_segment_frame_seq = loadFromDetectionsDir(f'{trial_str}_block-segment-frame-seq') # skin_segment_frame_seq = loadFromDetectionsDir(f'{trial_str}_skin-segment-frame-seq') # color_label_frame_seq = loadFromDetectionsDir(f'{trial_str}_color-label-frame-seq') # class_label_frame_seq = loadFromDetectionsDir(f'{trial_str}_class-label-frame-seq') segment_features_seq, feature_frame_seq = utils.batchProcess( extractSegmentFeatures, rgb_frame_seq, segment_frame_seq, static_args=(classifier, ), unzip=True) saveVariable(segment_features_seq, f'{trial_str}_segment-features-seq') if display_summary_img: if utils.in_ipython_console(): file_path = None else: trial_str = f"trial-{trial_id}" file_path = os.path.join(fig_dir, f'{trial_str}_best-frames.png') imageprocessing.displayImages(*rgb_frame_seq, *feature_frame_seq, num_rows=2, file_path=file_path) if write_video: video_dir = os.path.join(out_dir, 'detection-videos') if not os.path.exists(video_dir): os.makedirs(video_dir) fn = os.path.join(video_dir, f"{trial_str}.gif") writer = imageio.get_writer(fn, mode='I') for rgb_frame, feature_frame in zip(rgb_frame_seq, feature_frame_seq): feature_frame = feature_frame.astype(float) max_val = feature_frame.max() if max_val: feature_frame = feature_frame / max_val feature_frame = np.stack((feature_frame, ) * 3, axis=-1) rgb_frame = img_as_float(rgb_frame) with warnings.catch_warnings(): warnings.simplefilter("ignore") image = img_as_ubyte(np.hstack((rgb_frame, feature_frame))) writer.append_data(image) writer.close()
def main(out_dir=None, data_dir=None, segs_dir=None, scores_dir=None, vocab_dir=None, label_type='edges', gpu_dev_id=None, start_from=None, stop_at=None, num_disp_imgs=None, results_file=None, sweep_param_name=None, model_params={}, cv_params={}): data_dir = os.path.expanduser(data_dir) segs_dir = os.path.expanduser(segs_dir) scores_dir = os.path.expanduser(scores_dir) vocab_dir = os.path.expanduser(vocab_dir) out_dir = os.path.expanduser(out_dir) if not os.path.exists(out_dir): os.makedirs(out_dir) logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt')) if results_file is None: results_file = os.path.join(out_dir, 'results.csv') else: results_file = os.path.expanduser(results_file) fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) io_dir_images = os.path.join(fig_dir, 'model-io_images') if not os.path.exists(io_dir_images): os.makedirs(io_dir_images) io_dir_plots = os.path.join(fig_dir, 'model-io_plots') if not os.path.exists(io_dir_plots): os.makedirs(io_dir_plots) out_data_dir = os.path.join(out_dir, 'data') if not os.path.exists(out_data_dir): os.makedirs(out_data_dir) seq_ids = utils.getUniqueIds(scores_dir, prefix='trial=', suffix='score-seq.*', to_array=True) logger.info( f"Loaded scores for {len(seq_ids)} sequences from {scores_dir}") link_vocab = {} joint_vocab = {} joint_type_vocab = {} vocab, parts_vocab, part_labels = load_vocab(link_vocab, joint_vocab, joint_type_vocab, vocab_dir) pred_vocab = [] # FIXME if label_type == 'assembly': logger.info("Converting assemblies -> edges") state_pred_seqs = tuple( utils.loadVariable(f"trial={seq_id}_pred-label-seq", scores_dir) for seq_id in seq_ids) state_true_seqs = tuple( utils.loadVariable(f"trial={seq_id}_true-label-seq", scores_dir) for seq_id in seq_ids) edge_pred_seqs = tuple(part_labels[seq] for seq in state_pred_seqs) edge_true_seqs = tuple(part_labels[seq] for seq in state_true_seqs) elif label_type == 'edge': logger.info("Converting edges -> assemblies (will take a few minutes)") edge_pred_seqs = tuple( utils.loadVariable(f"trial={seq_id}_pred-label-seq", scores_dir) for seq_id in seq_ids) edge_true_seqs = tuple( utils.loadVariable(f"trial={seq_id}_true-label-seq", scores_dir) for seq_id in seq_ids) state_pred_seqs = tuple( edges_to_assemblies(seq, pred_vocab, parts_vocab, part_labels) for seq in edge_pred_seqs) state_true_seqs = tuple( edges_to_assemblies(seq, vocab, parts_vocab, part_labels) for seq in edge_true_seqs) device = torchutils.selectDevice(gpu_dev_id) dataset = sim2real.LabeledConnectionDataset( utils.loadVariable('parts-vocab', vocab_dir), utils.loadVariable('part-labels', vocab_dir), utils.loadVariable('vocab', vocab_dir), device=device) all_metrics = collections.defaultdict(list) # Define cross-validation folds cv_folds = utils.makeDataSplits(len(seq_ids), **cv_params) utils.saveVariable(cv_folds, 'cv-folds', out_data_dir) for cv_index, cv_fold in enumerate(cv_folds): train_indices, val_indices, test_indices = cv_fold logger.info( f"CV FOLD {cv_index + 1} / {len(cv_folds)}: " f"{len(train_indices)} train, {len(val_indices)} val, {len(test_indices)} test" ) train_states = np.hstack( tuple(state_true_seqs[i] for i in (train_indices))) train_edges = part_labels[train_states] # state_train_vocab = np.unique(train_states) # edge_train_vocab = part_labels[state_train_vocab] train_freq_bigram, train_freq_unigram = edge_joint_freqs(train_edges) # state_probs = utils.makeHistogram(len(vocab), train_states, normalize=True) test_states = np.hstack( tuple(state_true_seqs[i] for i in (test_indices))) test_edges = part_labels[test_states] # state_test_vocab = np.unique(test_states) # edge_test_vocab = part_labels[state_test_vocab] test_freq_bigram, test_freq_unigram = edge_joint_freqs(test_edges) f, axes = plt.subplots(1, 2) axes[0].matshow(train_freq_bigram) axes[0].set_title('Train') axes[1].matshow(test_freq_bigram) axes[1].set_title('Test') plt.tight_layout() plt.savefig( os.path.join(fig_dir, f"edge-freqs-bigram_cvfold={cv_index}.png")) f, axis = plt.subplots(1) axis.stem(train_freq_unigram, label='Train', linefmt='C0-', markerfmt='C0o') axis.stem(test_freq_unigram, label='Test', linefmt='C1--', markerfmt='C1o') plt.legend() plt.tight_layout() plt.savefig( os.path.join(fig_dir, f"edge-freqs-unigram_cvfold={cv_index}.png")) for i in test_indices: seq_id = seq_ids[i] logger.info(f" Processing sequence {seq_id}...") trial_prefix = f"trial={seq_id}" # I include the '.' to differentiate between 'rgb-frame-seq' and # 'rgb-frame-seq-before-first-touch' # rgb_seq = utils.loadVariable(f"{trial_prefix}_rgb-frame-seq.", data_dir) # seg_seq = utils.loadVariable(f"{trial_prefix}_seg-labels-seq", segs_dir) score_seq = utils.loadVariable(f"{trial_prefix}_score-seq", scores_dir) # if score_seq.shape[0] != rgb_seq.shape[0]: # err_str = f"scores shape {score_seq.shape} != data shape {rgb_seq.shape}" # raise AssertionError(err_str) edge_pred_seq = edge_pred_seqs[i] edge_true_seq = edge_true_seqs[i] state_pred_seq = state_pred_seqs[i] state_true_seq = state_true_seqs[i] num_types = np.unique(state_pred_seq).shape[0] num_samples = state_pred_seq.shape[0] num_total = len(pred_vocab) logger.info( f" {num_types} assemblies predicted ({num_total} total); " f"{num_samples} samples") # edge_freq_bigram, edge_freq_unigram = edge_joint_freqs(edge_true_seq) # dist_shift = np.linalg.norm(train_freq_unigram - edge_freq_unigram) metric_dict = { # 'State OOV rate': oov_rate_state(state_true_seq, state_train_vocab), # 'Edge OOV rate': oov_rate_edges(edge_true_seq, edge_train_vocab), # 'State avg prob, true': state_probs[state_true_seq].mean(), # 'State avg prob, pred': state_probs[state_pred_seq].mean(), # 'Edge distribution shift': dist_shift } metric_dict = eval_edge_metrics(edge_pred_seq, edge_true_seq, append_to=metric_dict) metric_dict = eval_state_metrics(state_pred_seq, state_true_seq, append_to=metric_dict) for name, value in metric_dict.items(): logger.info(f" {name}: {value * 100:.2f}%") all_metrics[name].append(value) utils.writeResults(results_file, metric_dict, sweep_param_name, model_params) if num_disp_imgs is not None: pred_images = tuple( render(dataset, vocab[seg_label]) for seg_label in utils.computeSegments(state_pred_seq)[0]) imageprocessing.displayImages( *pred_images, file_path=os.path.join( io_dir_images, f"seq={seq_id:03d}_pred-assemblies.png"), num_rows=None, num_cols=5) true_images = tuple( render(dataset, vocab[seg_label]) for seg_label in utils.computeSegments(state_true_seq)[0]) imageprocessing.displayImages( *true_images, file_path=os.path.join( io_dir_images, f"seq={seq_id:03d}_true-assemblies.png"), num_rows=None, num_cols=5) utils.plot_array(score_seq.T, (edge_true_seq.T, edge_pred_seq.T), ('true', 'pred'), fn=os.path.join(io_dir_plots, f"seq={seq_id:03d}.png"))
def main(out_dir=None, data_dir=None, person_masks_dir=None, bg_masks_dir=None, sat_thresh=1, start_from=None, stop_at=None, num_disp_imgs=None): out_dir = os.path.expanduser(out_dir) data_dir = os.path.expanduser(data_dir) person_masks_dir = os.path.expanduser(person_masks_dir) bg_masks_dir = os.path.expanduser(bg_masks_dir) logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt')) fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) out_data_dir = os.path.join(out_dir, 'data') if not os.path.exists(out_data_dir): os.makedirs(out_data_dir) def loadFromDir(var_name, dir_name): return joblib.load(os.path.join(dir_name, f"{var_name}.pkl")) def saveToWorkingDir(var, var_name): joblib.dump(var, os.path.join(out_data_dir, f"{var_name}.pkl")) trial_ids = utils.getUniqueIds(data_dir, prefix='trial=', to_array=True) for seq_idx, trial_id in enumerate(trial_ids): if start_from is not None and seq_idx < start_from: continue if stop_at is not None and seq_idx > stop_at: break trial_str = f"trial={trial_id}" logger.info( f"Processing video {seq_idx + 1} / {len(trial_ids)} (trial {trial_id})" ) logger.info(" Loading data...") rgb_frame_seq = loadFromDir(f'{trial_str}_rgb-frame-seq', data_dir) person_mask_seq = loadFromDir(f'{trial_str}_person-mask-seq', person_masks_dir) bg_mask_seq_depth = loadFromDir(f'{trial_str}_bg-mask-seq-depth', bg_masks_dir) # bg_mask_seq_rgb = loadFromDir(f'{trial_str}_bg-mask-seq-rgb', bg_masks_dir) logger.info(" Making segment labels...") fg_mask_seq = ~bg_mask_seq_depth seg_labels_seq = np.stack(tuple( map(makeCoarseSegmentLabels, fg_mask_seq)), axis=0) hsv_frame_seq = np.stack(tuple(map(makeHsvFrame, rgb_frame_seq)), axis=0) sat_frame_seq = hsv_frame_seq[..., 1] bg_mask_seq_sat = sat_frame_seq < sat_thresh seg_labels_seq[person_mask_seq] = 0 seg_labels_seq = np.stack(tuple( makeFineSegmentLabels(segs, sat) for segs, sat in zip(seg_labels_seq, bg_mask_seq_sat)), axis=0) logger.info(" Saving output...") saveToWorkingDir(seg_labels_seq.astype(np.uint8), f'{trial_str}_seg-labels-seq') plotHsvHist(hsv_frame_seq, seg_labels_seq, file_path=os.path.join(fig_dir, f'{trial_str}_hsv-hists.png')) if num_disp_imgs is not None: if rgb_frame_seq.shape[0] > num_disp_imgs: idxs = np.arange(rgb_frame_seq.shape[0]) np.random.shuffle(idxs) idxs = idxs[:num_disp_imgs] else: idxs = slice(None, None, None) imageprocessing.displayImages(*(rgb_frame_seq[idxs]), *(bg_mask_seq_sat[idxs]), *(bg_mask_seq_depth[idxs]), *(person_mask_seq[idxs]), *(seg_labels_seq[idxs]), num_rows=5, file_path=os.path.join( fig_dir, f'{trial_str}_best-frames.png'))
def main(out_dir=None, data_dir=None, background_data_dir=None, learn_bg_model=False, gpu_dev_id=None, start_from=None, stop_at=None, num_disp_imgs=None, depth_bg_detection_kwargs={}, rgb_bg_detection_kwargs={}): out_dir = os.path.expanduser(out_dir) data_dir = os.path.expanduser(data_dir) background_data_dir = os.path.expanduser(background_data_dir) logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt')) fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) out_data_dir = os.path.join(out_dir, 'data') if not os.path.exists(out_data_dir): os.makedirs(out_data_dir) def loadFromDir(var_name, dir_name): return joblib.load(os.path.join(dir_name, f"{var_name}.pkl")) def saveToWorkingDir(var, var_name): joblib.dump(var, os.path.join(out_data_dir, f"{var_name}.pkl")) trial_ids = utils.getUniqueIds(data_dir, prefix='trial=', to_array=True) device = torchutils.selectDevice(gpu_dev_id) camera_pose = render.camera_pose camera_params = render.intrinsic_matrix for seq_idx, trial_id in enumerate(trial_ids): if start_from is not None and seq_idx < start_from: continue if stop_at is not None and seq_idx > stop_at: break trial_str = f"trial={trial_id}" logger.info( f"Processing video {seq_idx + 1} / {len(trial_ids)} (trial {trial_id})" ) logger.info(" Loading data...") try: rgb_frame_seq = loadFromDir(f"{trial_str}_rgb-frame-seq", data_dir) depth_frame_seq = loadFromDir(f"{trial_str}_depth-frame-seq", data_dir) rgb_train = loadFromDir( f"{trial_str}_rgb-frame-seq-before-first-touch", background_data_dir) depth_train = loadFromDir( f"{trial_str}_depth-frame-seq-before-first-touch", background_data_dir) if isinstance(depth_train, tuple) and isinstance( depth_frame_seq, tuple): logger.info(" Skipping video: depth frames missing") continue rgb_frame_seq = np.stack(tuple( skimage.img_as_float(f) for f in rgb_frame_seq), axis=0) rgb_train = np.stack(tuple( skimage.img_as_float(f) for f in rgb_train), axis=0) except FileNotFoundError as e: logger.info(e) continue logger.info(" Removing background...") try: bg_mask_depth_train = loadFromDir( f'{trial_str}_bg-mask-depth-train', out_data_dir) bg_mask_seq_depth = loadFromDir(f'{trial_str}_bg-mask-seq-depth', out_data_dir) except FileNotFoundError: bg_model_depth, bg_mask_depth_train = fitBackgroundDepth( depth_train, camera_params=camera_params, camera_pose=camera_pose, **depth_bg_detection_kwargs) bg_mask_seq_depth = detectBackgroundDepth( bg_model_depth, depth_frame_seq, camera_params=camera_params, camera_pose=camera_pose, **depth_bg_detection_kwargs) __, bg_model_depth_image, __ = render.renderPlane( bg_model_depth, camera_pose=camera_pose, camera_params=camera_params) imageprocessing.displayImage( bg_model_depth_image, file_path=os.path.join(fig_dir, f'{trial_str}_bg-image-depth.png')) bg_model_rgb = fitBackgroundRgb( np.vstack((rgb_train, rgb_frame_seq)), np.vstack((bg_mask_depth_train, bg_mask_seq_depth))) if learn_bg_model: model = RgbBackgroundModel(bg_model_rgb, update_bg=True, device=device) losses, metrics = model.fit(np.vstack((rgb_train, rgb_frame_seq)), np.vstack((bg_mask_depth_train, bg_mask_seq_depth)), num_epochs=100) outputs = model.forward( torch.tensor(rgb_frame_seq, dtype=torch.float, device=device).permute(0, -1, 1, 2)) bg_mask_seq_rgb = model.predict(outputs).cpu().numpy().squeeze() plot_dict = {'Loss': losses, 'Accuracy': metrics}, else: def f1(preds, targets): true_positives = np.sum((targets == 1) * (preds == 1)) false_positives = np.sum((targets == 0) * (preds == 1)) false_negatives = np.sum((targets == 1) * (preds == 0)) precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) f1 = 2 * (precision * recall) / (precision + recall) return f1 def acc(preds, targets): matches = preds == targets return matches.mean() bg_dists = np.linalg.norm(rgb_frame_seq - bg_model_rgb[None, ...], axis=-1) thresh_vals = np.linspace(0, 1, num=50) scores = np.array( [acc(bg_dists < t, bg_mask_seq_depth) for t in thresh_vals]) best_index = scores.argmax() best_thresh = thresh_vals[best_index] bg_mask_seq_rgb = bg_dists < best_thresh plot_dict = {'Accuracy': scores} torchutils.plotEpochLog(plot_dict, subfig_size=(10, 2.5), title='Training performance', fn=os.path.join(fig_dir, f'{trial_str}_train-plot.png')) logger.info(" Saving output...") saveToWorkingDir(bg_mask_depth_train.astype(bool), f'{trial_str}_bg-mask-depth-train') saveToWorkingDir(bg_mask_seq_depth.astype(bool), f'{trial_str}_bg-mask-seq-depth') saveToWorkingDir(bg_mask_seq_rgb.astype(bool), f'{trial_str}_bg-mask-seq-rgb') if num_disp_imgs is not None: if rgb_frame_seq.shape[0] > num_disp_imgs: idxs = np.arange(rgb_frame_seq.shape[0]) np.random.shuffle(idxs) idxs = idxs[:num_disp_imgs] else: idxs = slice(None, None, None) imageprocessing.displayImages(*(rgb_frame_seq[idxs]), *(bg_mask_seq_rgb[idxs]), *(depth_frame_seq[idxs]), *(bg_mask_seq_depth[idxs]), num_rows=4, file_path=os.path.join( fig_dir, f'{trial_str}_best-frames.png')) imageprocessing.displayImage(bg_model_rgb, file_path=os.path.join( fig_dir, f'{trial_str}_bg-image-rgb.png'))
def main(out_dir=None, scores_dir=None, preprocessed_data_dir=None, keyframe_model_name=None, subsample_period=None, window_size=None, corpus_name=None, default_annotator=None, cv_scheme=None, max_trials_per_fold=None, model_name=None, numeric_backend=None, gpu_dev_id=None, visualize=False, model_config={}, camera_params_config={}): out_dir = os.path.expanduser(out_dir) scores_dir = os.path.expanduser(scores_dir) preprocessed_data_dir = os.path.expanduser(preprocessed_data_dir) m.set_backend('numpy') def loadFromWorkingDir(var_name): return joblib.load(os.path.join(scores_dir, f"{var_name}.pkl")) def saveToWorkingDir(var, var_name): joblib.dump(var, os.path.join(out_dir, f"{var_name}.pkl")) # Load camera parameters from external file and add them to model config kwargs model_config['init_kwargs'].update( render.loadCameraParams(**camera_params_config, as_dict=True)) trial_ids = joblib.load( os.path.join(preprocessed_data_dir, 'trial_ids.pkl')) corpus = duplocorpus.DuploCorpus(corpus_name) assembly_seqs = tuple( labels.parseLabelSeq( corpus.readLabels(trial_id, default_annotator)[0]) for trial_id in trial_ids) logger.info(f"Selecting keyframes...") keyframe_idx_seqs = [] rgb_keyframe_seqs = [] depth_keyframe_seqs = [] seg_keyframe_seqs = [] background_keyframe_seqs = [] assembly_keyframe_seqs = [] for seq_idx, trial_id in enumerate(trial_ids): trial_str = f"trial-{trial_id}" rgb_frame_seq = loadFromWorkingDir(f'{trial_str}_rgb-frame-seq') depth_frame_seq = loadFromWorkingDir(f'{trial_str}_depth-frame-seq') segment_seq = loadFromWorkingDir(f'{trial_str}_segment-seq') frame_scores = loadFromWorkingDir(f'{trial_str}_frame-scores') background_plane_seq = loadFromWorkingDir( f'{trial_str}_background-plane-seq') assembly_seq = assembly_seqs[seq_idx] # FIXME: Get the real frame index numbers instead of approximating assembly_seq[-1].end_idx = len(rgb_frame_seq) * subsample_period keyframe_idxs = videoprocessing.selectSegmentKeyframes( frame_scores, score_thresh=0, prepend_first=True) selectKeyframes = functools.partial(utils.select, keyframe_idxs) rgb_keyframe_seq = selectKeyframes(rgb_frame_seq) depth_keyframe_seq = selectKeyframes(depth_frame_seq) seg_keyframe_seq = selectKeyframes(segment_seq) background_keyframe_seq = selectKeyframes(background_plane_seq) # FIXME: Get the real frame index numbers instead of approximating keyframe_idxs_orig = keyframe_idxs * subsample_period assembly_keyframe_seq = labels.resampleStateSeq( keyframe_idxs_orig, assembly_seq) # Store all keyframe sequences in memory keyframe_idx_seqs.append(keyframe_idxs) rgb_keyframe_seqs.append(rgb_keyframe_seq) depth_keyframe_seqs.append(depth_keyframe_seq) seg_keyframe_seqs.append(seg_keyframe_seq) background_keyframe_seqs.append(background_keyframe_seq) assembly_keyframe_seqs.append(assembly_keyframe_seq) # Split into train and test sets if cv_scheme == 'leave one out': num_seqs = len(trial_ids) cv_folds = [] for i in range(num_seqs): test_fold = (i, ) train_fold = tuple(range(0, i)) + tuple(range(i + 1, num_seqs)) cv_folds.append((train_fold, test_fold)) elif cv_scheme == 'train on child': child_corpus = duplocorpus.DuploCorpus('child') child_trial_ids = utils.loadVariable('trial_ids', 'preprocess-all-data', 'child') child_assembly_seqs = [ labels.parseLabelSeq( child_corpus.readLabels(trial_id, 'Cathryn')[0]) for trial_id in child_trial_ids ] num_easy = len(assembly_keyframe_seqs) num_child = len(child_assembly_seqs) cv_folds = [(tuple(range(num_easy, num_easy + num_child)), tuple(range(num_easy)))] assembly_keyframe_seqs = assembly_keyframe_seqs + child_assembly_seqs rgb_keyframe_seqs = tuple( tuple( imageprocessing.saturateImage(rgb_image, background_mask=segment_image == 0) for rgb_image, segment_image in zip(rgb_frame_seq, seg_frame_seq)) for rgb_frame_seq, seg_frame_seq in zip(rgb_keyframe_seqs, seg_keyframe_seqs)) depth_keyframe_seqs = tuple( tuple(depth_image.astype(float) for depth_image in depth_frame_seq) for depth_frame_seq in depth_keyframe_seqs) device = torchutils.selectDevice(gpu_dev_id) m.set_backend('torch') m.set_default_device(device) assembly_keyframe_seqs = tuple( tuple(a.to(device=device, in_place=False) for a in seq) for seq in assembly_keyframe_seqs) assembly_seqs = tuple( tuple(a.to(device=device, in_place=False) for a in seq) for seq in assembly_seqs) rgb_keyframe_seqs = tuple( tuple(m.np.array(frame, dtype=torch.float) for frame in rgb_frame_seq) for rgb_frame_seq in rgb_keyframe_seqs) depth_keyframe_seqs = tuple( tuple( m.np.array(frame, dtype=torch.float) for frame in depth_frame_seq) for depth_frame_seq in depth_keyframe_seqs) seg_keyframe_seqs = tuple( tuple(m.np.array(frame, dtype=torch.int) for frame in seg_frame_seq) for seg_frame_seq in seg_keyframe_seqs) num_cv_folds = len(cv_folds) saveToWorkingDir(cv_folds, f'cv-folds') for fold_index, (train_idxs, test_idxs) in enumerate(cv_folds): logger.info(f"CV FOLD {fold_index + 1} / {num_cv_folds}") # Initialize and train model utils.validateCvFold(train_idxs, test_idxs) selectTrain = functools.partial(utils.select, train_idxs) train_assembly_seqs = selectTrain(assembly_keyframe_seqs) model = getattr(models, model_name)(**model_config['init_kwargs']) logger.info( f" Training {model_name} on {len(train_idxs)} sequences...") model.fit(train_assembly_seqs, **model_config['fit_kwargs']) logger.info( f' Model trained on {model.num_states} unique assembly states') # saveToWorkingDir(model, f'model-fold{fold_index}') # Decode on the test set selectTest = functools.partial(utils.select, test_idxs) test_trial_ids = selectTest(trial_ids) test_rgb_keyframe_seqs = selectTest(rgb_keyframe_seqs) test_depth_keyframe_seqs = selectTest(depth_keyframe_seqs) test_seg_keyframe_seqs = selectTest(seg_keyframe_seqs) test_background_keyframe_seqs = selectTest(background_keyframe_seqs) test_assembly_keyframe_seqs = selectTest(assembly_keyframe_seqs) test_assembly_seqs = selectTest(assembly_seqs) logger.info(f" Testing model on {len(test_idxs)} sequences...") for i, trial_id in enumerate(test_trial_ids): if max_trials_per_fold is not None and i >= max_trials_per_fold: break rgb_frame_seq = test_rgb_keyframe_seqs[i] depth_frame_seq = test_depth_keyframe_seqs[i] seg_frame_seq = test_seg_keyframe_seqs[i] background_plane_seq = test_background_keyframe_seqs[i] true_assembly_seq = test_assembly_keyframe_seqs[i] true_assembly_seq_orig = test_assembly_seqs[i] rgb_background_seq, depth_background_seq = utils.batchProcess( model.renderPlane, background_plane_seq, unzip=True) logger.info(f' Decoding video {trial_id}...') start_time = time.process_time() out = model.predictSeq(rgb_frame_seq, depth_frame_seq, seg_frame_seq, rgb_background_seq, depth_background_seq, **model_config['decode_kwargs']) pred_assembly_seq, pred_idx_seq, max_log_probs, log_likelihoods, poses_seq = out end_time = time.process_time() logger.info(utils.makeProcessTimeStr(end_time - start_time)) num_correct, num_total = metrics.numberCorrect( true_assembly_seq, pred_assembly_seq) logger.info(f' ACCURACY: {num_correct} / {num_total}') num_correct, num_total = metrics.numberCorrect( true_assembly_seq, pred_assembly_seq, ignore_empty_true=True) logger.info(f' RECALL: {num_correct} / {num_total}') num_correct, num_total = metrics.numberCorrect( true_assembly_seq, pred_assembly_seq, ignore_empty_pred=True) logger.info(f' PRECISION: {num_correct} / {num_total}') # Save intermediate results logger.info(f"Saving output...") saveToWorkingDir(segment_seq, f'segment_seq-{trial_id}') saveToWorkingDir(true_assembly_seq_orig, f'true_state_seq_orig-{trial_id}') saveToWorkingDir(true_assembly_seq, f'true_state_seq-{trial_id}') saveToWorkingDir(pred_assembly_seq, f'pred_state_seq-{trial_id}') saveToWorkingDir(poses_seq, f'poses_seq-{trial_id}') saveToWorkingDir(background_plane_seq, f'background_plane_seq-{trial_id}') saveToWorkingDir(max_log_probs, f'max_log_probs-{trial_id}') saveToWorkingDir(log_likelihoods, f'log_likelihoods-{trial_id}') # Save figures if visualize: rgb_rendered_seq, depth_rendered_seq, label_rendered_seq = utils.batchProcess( model.renderScene, pred_assembly_seq, poses_seq, rgb_background_seq, depth_background_seq, unzip=True, static_kwargs={'as_numpy': True}) if utils.in_ipython_console(): file_path = None else: trial_str = f"trial-{trial_id}" file_path = os.path.join(out_dir, f'{trial_str}_best-frames.png') rgb_frame_seq = tuple(img.cpu().numpy() for img in rgb_frame_seq) imageprocessing.displayImages(*rgb_frame_seq, *rgb_rendered_seq, num_rows=2, file_path=file_path)
def main(out_dir=None, data_dir=None, gpu_dev_id=None, batch_size=None, start_from=None, stop_at=None, num_disp_imgs=None): out_dir = os.path.expanduser(out_dir) data_dir = os.path.expanduser(data_dir) logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt')) fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) out_data_dir = os.path.join(out_dir, 'data') if not os.path.exists(out_data_dir): os.makedirs(out_data_dir) def loadFromDir(var_name, dir_name): return joblib.load(os.path.join(dir_name, f"{var_name}.pkl")) def saveToWorkingDir(var, var_name): joblib.dump(var, os.path.join(out_data_dir, f"{var_name}.pkl")) trial_ids = utils.getUniqueIds(data_dir, prefix='trial=', to_array=True) device = torchutils.selectDevice(gpu_dev_id) for seq_idx, trial_id in enumerate(trial_ids): if start_from is not None and seq_idx < start_from: continue if stop_at is not None and seq_idx > stop_at: break trial_str = f"trial={trial_id}" logger.info( f"Processing video {seq_idx + 1} / {len(trial_ids)} (trial {trial_id})" ) logger.info(" Loading data...") try: rgb_frame_seq = loadFromDir(f"{trial_str}_rgb-frame-seq", data_dir) rgb_frame_seq = np.stack(tuple( skimage.img_as_float(f) for f in rgb_frame_seq), axis=0) except FileNotFoundError as e: logger.info(e) continue logger.info(" Detecting objects...") model = torchvision.models.detection.maskrcnn_resnet50_fpn( pretrained=True) model = model.to(device=device) model.device = device model.eval() inputs = np.moveaxis(rgb_frame_seq, 3, 1) if batch_size is None: batch_size = inputs.shape[0] def detectBatch(batch_index): start = batch_size * batch_index end = start + batch_size in_batch = torch.tensor(inputs[start:end], dtype=torch.float) out_batches = detectCategories(model, in_batch) return tuple(batch.numpy().squeeze(axis=1) for batch in out_batches) num_batches = math.ceil(inputs.shape[0] / batch_size) person_mask_seq, bg_mask_seq = map( np.vstack, zip(*(detectBatch(i) for i in range(num_batches)))) person_mask_seq = person_mask_seq.astype(bool) logger.info(" Saving output...") saveToWorkingDir(person_mask_seq, f'{trial_str}_person-mask-seq') if num_disp_imgs is not None: if rgb_frame_seq.shape[0] > num_disp_imgs: idxs = np.arange(rgb_frame_seq.shape[0]) np.random.shuffle(idxs) idxs = idxs[:num_disp_imgs] else: idxs = slice(None, None, None) imageprocessing.displayImages(*(rgb_frame_seq[idxs]), *(person_mask_seq[idxs]), *(bg_mask_seq[idxs]), num_rows=3, file_path=os.path.join( fig_dir, f'{trial_str}_best-frames.png'))