def main(out_dir=None, data_dir=None, feature_fn_format='feature-seq.pkl', label_fn_format='label_seq.pkl', cv_params={}): out_dir = os.path.expanduser(out_dir) data_dir = os.path.expanduser(data_dir) logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt')) fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) out_data_dir = os.path.join(out_dir, 'data') if not os.path.exists(out_data_dir): os.makedirs(out_data_dir) # Load data trial_ids = utils.getUniqueIds(data_dir, prefix='trial=', suffix=feature_fn_format, to_array=True) dataset = utils.CvDataset(trial_ids, data_dir, feature_fn_format=feature_fn_format, label_fn_format=label_fn_format, vocab=[]) utils.saveMetadata(dataset.metadata, out_data_dir) # Make folds dataset_size = len(trial_ids) cv_folds = utils.makeDataSplits(dataset_size, metadata=dataset.metadata, **cv_params) save_cv_folds(cv_folds, os.path.join(out_data_dir, 'cv-folds.json')) # Check folds for cv_index, cv_fold in enumerate(cv_folds): train_data, val_data, test_data = dataset.getFold(cv_fold) train_feats, train_labels, train_ids = train_data test_feats, test_labels, test_ids = test_data val_feats, val_labels, val_ids = val_data logger.info( f'CV fold {cv_index + 1} / {len(cv_folds)}: {len(trial_ids)} total ' f'({len(train_ids)} train, {len(val_ids)} val, {len(test_ids)} test)' )
def main(out_dir=None, data_dir=None, labels_dir=None): out_dir = os.path.expanduser(out_dir) data_dir = os.path.expanduser(data_dir) labels_dir = os.path.expanduser(labels_dir) logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt')) fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) out_data_dir = os.path.join(out_dir, 'data') if not os.path.exists(out_data_dir): os.makedirs(out_data_dir) filenames = [ utils.stripExtension(fn) for fn in glob.glob(os.path.join(labels_dir, '*.csv')) ] metadata = utils.loadMetadata(data_dir) metadata['seq_id'] = metadata.index metadata = metadata.set_index( 'dir_name', drop=False).loc[filenames].set_index('seq_id') seq_ids = np.sort(metadata.index.to_numpy()) logger.info(f"Loaded {len(seq_ids)} sequences from {labels_dir}") vocab = [] for i, seq_id in enumerate(seq_ids): seq_id_str = f"seq={seq_id}" seq_dir_name = metadata['dir_name'].loc[seq_id] labels_fn = os.path.join(labels_dir, f'{seq_dir_name}.csv') event_labels = utils.loadVariable(f'{seq_id_str}_labels', data_dir) assembly_actions = pd.read_csv(labels_fn) label_seq = parseActions(assembly_actions, event_labels.shape[0], vocab) utils.saveVariable(label_seq, f'{seq_id_str}_label-seq', out_data_dir) plotLabels(os.path.join(fig_dir, f'{seq_id_str}_labels.png'), label_seq) writeLabels(os.path.join(fig_dir, f'{seq_id_str}_labels.csv'), label_seq, vocab) utils.saveMetadata(metadata, out_data_dir) utils.saveVariable(vocab, 'vocab', out_data_dir)
def main(out_dir=None, data_dir=None, annotation_dir=None, frames_dir=None, col_format='standard', win_params={}, slowfast_csv_params={}, label_types=('event', 'action', 'part')): out_dir = os.path.expanduser(out_dir) data_dir = os.path.expanduser(data_dir) annotation_dir = os.path.expanduser(annotation_dir) frames_dir = os.path.expanduser(frames_dir) annotation_dir = os.path.join(annotation_dir, 'action_annotations') logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt')) fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) out_labels_dir = os.path.join(out_dir, 'labels') if not os.path.exists(out_labels_dir): os.makedirs(out_labels_dir) data_dirs = { name: os.path.join(out_dir, f"{name}-dataset") for name in label_types } for name, dir_ in data_dirs.items(): if not os.path.exists(dir_): os.makedirs(dir_) event_vocab_df, part_vocab, action_vocab = load_vocabs( os.path.join(data_dir, 'ANU_ikea_dataset', 'indexing_files', 'atomic_action_list.txt')) event_vocab_df.to_csv(os.path.join(out_labels_dir, 'event-vocab.csv')) event_vocab = event_vocab_df.index.tolist() vocabs = {'event': event_vocab, 'action': action_vocab, 'part': part_vocab} vocabs = {label_name: vocabs[label_name] for label_name in label_types} for name, vocab in vocabs.items(): utils.saveVariable(vocab, 'vocab', data_dirs[name]) label_fn = os.path.join(annotation_dir, 'gt_segments.json') seq_ids, event_labels, metadata = load_action_labels( label_fn, event_vocab_df) utils.saveMetadata(metadata, out_labels_dir) for name, dir_ in data_dirs.items(): utils.saveMetadata(metadata, dir_) logger.info(f"Loaded {len(seq_ids)} sequences from {label_fn}") part_names = [name for name in part_vocab if name != ''] col_names = [f"{name}_active" for name in part_names] integerizers = { label_name: {name: i for i, name in enumerate(label_vocab)} for label_name, label_vocab in vocabs.items() } all_slowfast_labels_seg = collections.defaultdict(list) all_slowfast_labels_win = collections.defaultdict(list) counts = np.zeros((len(action_vocab), len(part_vocab)), dtype=int) for i, seq_id in enumerate(seq_ids): seq_id_str = f"seq={seq_id}" seq_dir_name = metadata['dir_name'].loc[seq_id] event_segs = event_labels[i] if not event_segs.any(axis=None): logger.warning(f"No event labels for sequence {seq_id}") continue event_data = make_event_data( event_segs, sorted(glob.glob(os.path.join(frames_dir, seq_dir_name, '*.jpg'))), integerizers['event'], integerizers['action'], integerizers['part'], event_vocab.index('NA'), action_vocab.index('NA'), False) event_wins = make_window_clips(event_data, vocabs['event'], vocabs['action'], **win_params) event_data.to_csv(os.path.join(out_labels_dir, f"{seq_id_str}_data.csv"), index=False) event_segs.to_csv(os.path.join(out_labels_dir, f"{seq_id_str}_segs.csv"), index=False) filenames = event_data['fn'].to_list() label_indices = {} for name in label_types: if name == 'part': label_indices[name] = event_data[col_names].to_numpy() seg_labels_slowfast = make_slowfast_labels( event_segs[['start', 'end']], getActivePart(event_segs[col_names], part_names), event_data['fn'], integerizers[name], col_format=col_format) win_labels_slowfast = make_slowfast_labels( event_wins[['start', 'end']], getActivePart(event_wins[col_names], part_names), event_data['fn'], integerizers[name], col_format=col_format) else: label_indices[name] = event_data[name].to_numpy() seg_labels_slowfast = make_slowfast_labels( event_segs[['start', 'end']], event_segs[name], event_data['fn'], integerizers[name], col_format=col_format) win_labels_slowfast = make_slowfast_labels( event_wins[['start', 'end']], event_wins[name], event_data['fn'], integerizers[name], col_format=col_format) utils.saveVariable(filenames, f'{seq_id_str}_frame-fns', data_dirs[name]) utils.saveVariable(label_indices[name], f'{seq_id_str}_labels', data_dirs[name]) seg_labels_slowfast.to_csv( os.path.join(data_dirs[name], f'{seq_id_str}_slowfast-labels.csv'), **slowfast_csv_params) win_labels_slowfast.to_csv( os.path.join(data_dirs[name], f'{seq_id_str}_slowfast-labels.csv'), **slowfast_csv_params) all_slowfast_labels_seg[name].append(seg_labels_slowfast) all_slowfast_labels_win[name].append(win_labels_slowfast) plot_event_labels(os.path.join(fig_dir, f"{seq_id_str}.png"), label_indices['event'], label_indices['action'], label_indices['part'], event_vocab, action_vocab, part_names) for part_activity_row, action_index in zip(label_indices['part'], label_indices['action']): for i, is_active in enumerate(part_activity_row): part_index = integerizers['part'][part_names[i]] counts[action_index, part_index] += int(is_active) for name, labels in all_slowfast_labels_seg.items(): pd.concat(labels, axis=0).to_csv( os.path.join(data_dirs[name], 'slowfast-labels_seg.csv'), **slowfast_csv_params) for name, labels in all_slowfast_labels_win.items(): pd.concat(labels, axis=0).to_csv( os.path.join(data_dirs[name], 'slowfast-labels_win.csv'), **slowfast_csv_params) utils.saveVariable(counts, 'action-part-counts', out_labels_dir) plt.matshow(counts) plt.xticks(ticks=range(len(part_vocab)), labels=part_vocab, rotation='vertical') plt.yticks(ticks=range(len(action_vocab)), labels=action_vocab) plt.savefig(os.path.join(fig_dir, 'action-part-coocurrence.png'), bbox_inches='tight')
def main( out_dir=None, data_dir=None, prefix='trial=', feature_fn_format='feature-seq.pkl', label_fn_format='label_seq.pkl', slowfast_labels_path=None, cv_params={}, slowfast_csv_params={}): out_dir = os.path.expanduser(out_dir) data_dir = os.path.expanduser(data_dir) logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt')) fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) out_data_dir = os.path.join(out_dir, 'data') if not os.path.exists(out_data_dir): os.makedirs(out_data_dir) # Load data trial_ids = utils.getUniqueIds( data_dir, prefix=prefix, suffix=feature_fn_format, to_array=True ) dataset = utils.CvDataset( trial_ids, data_dir, feature_fn_format=feature_fn_format, label_fn_format=label_fn_format, vocab=[], prefix=prefix ) utils.saveMetadata(dataset.metadata, out_data_dir) # Make folds dataset_size = len(trial_ids) cv_folds = utils.makeDataSplits(dataset_size, metadata=dataset.metadata, **cv_params) save_cv_folds(cv_folds, os.path.join(out_data_dir, 'cv-folds.json')) split_names = ('train', 'val', 'test') # Check folds for cv_index, cv_fold in enumerate(cv_folds): train_data, val_data, test_data = dataset.getFold(cv_fold) train_feats, train_labels, train_ids = train_data test_feats, test_labels, test_ids = test_data val_feats, val_labels, val_ids = val_data logger.info( f'CV fold {cv_index + 1} / {len(cv_folds)}: {len(trial_ids)} total ' f'({len(train_ids)} train, {len(val_ids)} val, {len(test_ids)} test)' ) slowfast_labels_pattern = os.path.join(data_dir, 'slowfast-labels*.csv') for slowfast_labels_path in glob.glob(slowfast_labels_pattern): cv_str = f"cvfold={cv_index}" fn = os.path.basename(slowfast_labels_path) slowfast_labels = pd.read_csv( slowfast_labels_path, index_col=0, keep_default_na=False, **slowfast_csv_params ) for split_indices, split_name in zip(cv_fold, split_names): matches = tuple( slowfast_labels.loc[slowfast_labels['video_name'] == vid_id] for vid_id in dataset.metadata.iloc[split_indices]['dir_name'].to_list() ) if matches: split = pd.concat(matches, axis=0) split.to_csv( os.path.join(out_data_dir, f"{cv_str}_{split_name}_{fn}"), **slowfast_csv_params ) else: logger.info(f' Skipping empty slowfast split: {split_name}')
def main( out_dir=None, data_dir=None, use_vid_ids_from=None, output_data=None, magnitude_centering=None, resting_from_gt=None, remove_before_first_touch=None, include_signals=None, fig_type=None): data_dir = os.path.expanduser(data_dir) out_dir = os.path.expanduser(out_dir) logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt')) logger.info(f"Reading from: {data_dir}") logger.info(f"Writing to: {out_dir}") fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) out_data_dir = os.path.join(out_dir, 'data') if not os.path.exists(out_data_dir): os.makedirs(out_data_dir) def loadAll(seq_ids, var_name, from_dir=data_dir, prefix='trial='): all_data = tuple( utils.loadVariable(f"{prefix}{seq_id}_{var_name}", from_dir) for seq_id in seq_ids ) return all_data def saveVariable(var, var_name, to_dir=out_data_dir): utils.saveVariable(var, var_name, to_dir) if fig_type is None: fig_type = 'multi' # Load data if use_vid_ids_from is None: trial_ids = utils.getUniqueIds(data_dir, prefix='trial=', to_array=True) else: use_vid_ids_from = os.path.expanduser(use_vid_ids_from) trial_ids = utils.getUniqueIds(use_vid_ids_from, prefix='trial-', to_array=True) accel_seqs = loadAll(trial_ids, 'accel-samples.pkl') gyro_seqs = loadAll(trial_ids, 'gyro-samples.pkl') action_seqs = loadAll(trial_ids, 'action-seq.pkl') rgb_timestamp_seqs = loadAll(trial_ids, 'rgb-frame-timestamp-seq.pkl') def validate_imu(seqs): def is_valid(d): return not any(np.isnan(x).any() for x in d.values()) return np.array([is_valid(d) for d in seqs]) imu_is_valid = validate_imu(accel_seqs) & validate_imu(gyro_seqs) logger.info( f"Ignoring {(~imu_is_valid).sum()} IMU sequences with NaN-valued samples " f"(of {len(imu_is_valid)} total)" ) def chooseValid(seq): return tuple(x for x, is_valid in zip(seq, imu_is_valid) if is_valid) trial_ids = np.array(list(chooseValid(trial_ids))) accel_seqs = chooseValid(accel_seqs) gyro_seqs = chooseValid(gyro_seqs) action_seqs = chooseValid(action_seqs) rgb_timestamp_seqs = chooseValid(rgb_timestamp_seqs) vocab = [] metadata = utils.loadMetadata(data_dir, rows=trial_ids) utils.saveMetadata(metadata, out_data_dir) utils.saveVariable(vocab, 'vocab', out_data_dir) def norm(x): norm = np.linalg.norm(imu.getImuSamples(x), axis=1)[:, None] return norm accel_mag_seqs = tuple(map(lambda x: dictToArray(x, transform=norm), accel_seqs)) gyro_mag_seqs = tuple(map(lambda x: dictToArray(x, transform=norm), gyro_seqs)) imu_timestamp_seqs = utils.batchProcess(makeTimestamps, accel_seqs, gyro_seqs) if remove_before_first_touch: before_first_touch_seqs = utils.batchProcess( beforeFirstTouch, action_seqs, rgb_timestamp_seqs, imu_timestamp_seqs ) num_ignored = sum(b is None for b in before_first_touch_seqs) logger.info( f"Ignoring {num_ignored} sequences without first-touch annotations " f"(of {len(before_first_touch_seqs)} total)" ) trials_missing_first_touch = [ i for b, i in zip(before_first_touch_seqs, trial_ids) if b is None ] logger.info(f"Trials without first touch: {trials_missing_first_touch}") def clip(signal, bool_array): return signal[~bool_array, ...] accel_mag_seqs = tuple( clip(signal, b) for signal, b in zip(accel_mag_seqs, before_first_touch_seqs) if b is not None ) gyro_mag_seqs = tuple( clip(signal, b) for signal, b in zip(gyro_mag_seqs, before_first_touch_seqs) if b is not None ) imu_timestamp_seqs = tuple( clip(signal, b) for signal, b in zip(imu_timestamp_seqs, before_first_touch_seqs) if b is not None ) trial_ids = tuple( x for x, b in zip(trial_ids, before_first_touch_seqs) if b is not None ) action_seqs = tuple( x for x, b in zip(action_seqs, before_first_touch_seqs) if b is not None ) rgb_timestamp_seqs = tuple( x for x, b in zip(rgb_timestamp_seqs, before_first_touch_seqs) if b is not None ) assembly_seqs = utils.batchProcess( parseActions, action_seqs, rgb_timestamp_seqs, imu_timestamp_seqs ) if output_data == 'components': accel_feat_seqs = accel_mag_seqs gyro_feat_seqs = gyro_mag_seqs unique_components = {frozenset(): 0} imu_label_seqs = zip( *tuple( labels.componentLabels(*args, unique_components) for args in zip(action_seqs, rgb_timestamp_seqs, imu_timestamp_seqs) ) ) saveVariable(unique_components, 'unique_components') elif output_data == 'pairwise components': imu_label_seqs = utils.batchProcess( labels.pairwiseComponentLabels, assembly_seqs, static_kwargs={'lower_tri_only': True, 'include_action_labels': False} ) accel_feat_seqs = tuple(map(imu.pairwiseFeats, accel_mag_seqs)) gyro_feat_seqs = tuple(map(imu.pairwiseFeats, gyro_mag_seqs)) else: raise AssertionError() signals = {'accel': accel_feat_seqs, 'gyro': gyro_feat_seqs} if include_signals is None: include_signals = tuple(signals.keys()) signals = tuple(signals[key] for key in include_signals) imu_feature_seqs = tuple(np.stack(x, axis=-1).squeeze(axis=-1) for x in zip(*signals)) video_seqs = tuple(zip(imu_feature_seqs, imu_label_seqs, trial_ids)) imu.plot_prediction_eg(video_seqs, fig_dir, fig_type=fig_type, output_data=output_data) video_seqs = tuple( zip(assembly_seqs, imu_feature_seqs, imu_timestamp_seqs, imu_label_seqs, trial_ids) ) for assembly_seq, feature_seq, timestamp_seq, label_seq, trial_id in video_seqs: id_string = f"trial={trial_id}" saveVariable(assembly_seq, f'{id_string}_assembly-seq') saveVariable(feature_seq, f'{id_string}_feature-seq') saveVariable(timestamp_seq, f'{id_string}_timestamp-seq') saveVariable(label_seq, f'{id_string}_label-seq')
def main(out_dir=None, rgb_data_dir=None, rgb_attributes_dir=None, rgb_vocab_dir=None, imu_data_dir=None, imu_attributes_dir=None, modalities=['rgb', 'imu'], gpu_dev_id=None, plot_predictions=None, results_file=None, sweep_param_name=None, model_params={}, cv_params={}, train_params={}, viz_params={}): out_dir = os.path.expanduser(out_dir) rgb_data_dir = os.path.expanduser(rgb_data_dir) rgb_attributes_dir = os.path.expanduser(rgb_attributes_dir) rgb_vocab_dir = os.path.expanduser(rgb_vocab_dir) imu_data_dir = os.path.expanduser(imu_data_dir) imu_attributes_dir = os.path.expanduser(imu_attributes_dir) logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt')) if results_file is None: results_file = os.path.join(out_dir, 'results.csv') else: results_file = os.path.expanduser(results_file) fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) out_data_dir = os.path.join(out_dir, 'data') if not os.path.exists(out_data_dir): os.makedirs(out_data_dir) def saveVariable(var, var_name, to_dir=out_data_dir): utils.saveVariable(var, var_name, to_dir) # Load data if modalities == ['rgb']: trial_ids = utils.getUniqueIds(rgb_data_dir, prefix='trial=', to_array=True) logger.info(f"Processing {len(trial_ids)} videos") else: rgb_trial_ids = utils.getUniqueIds(rgb_data_dir, prefix='trial=', to_array=True) imu_trial_ids = utils.getUniqueIds(imu_data_dir, prefix='trial=', to_array=True) trial_ids = np.array( sorted(set(rgb_trial_ids.tolist()) & set(imu_trial_ids.tolist()))) logger.info( f"Processing {len(trial_ids)} videos common to " f"RGB ({len(rgb_trial_ids)} total) and IMU ({len(imu_trial_ids)} total)" ) device = torchutils.selectDevice(gpu_dev_id) dataset = FusionDataset(trial_ids, rgb_attributes_dir, rgb_data_dir, imu_attributes_dir, imu_data_dir, device=device, modalities=modalities) utils.saveMetadata(dataset.metadata, out_data_dir) saveVariable(dataset.vocab, 'vocab') # parts_vocab = loadVariable('parts-vocab') edge_labels = { 'rgb': utils.loadVariable('part-labels', rgb_vocab_dir), 'imu': np.stack([ labels.inSameComponent(a, lower_tri_only=True) for a in dataset.vocab ]) } # edge_labels = revise_edge_labels(edge_labels, input_seqs) attribute_labels = tuple(edge_labels[name] for name in modalities) logger.info('Making transition probs...') transition_probs = make_transition_scores(dataset.vocab) saveVariable(transition_probs, 'transition-probs') model = AttributeModel(*attribute_labels, device=device) if plot_predictions: figsize = (12, 3) fig, axis = plt.subplots(1, figsize=figsize) axis.imshow(edge_labels['rgb'].T, interpolation='none', aspect='auto') plt.savefig(os.path.join(fig_dir, "edge-labels.png")) plt.close() for i, trial_id in enumerate(trial_ids): logger.info(f"Processing sequence {trial_id}...") trial_prefix = f"trial={trial_id}" true_label_seq = dataset.loadTargets(trial_id) attribute_feats = dataset.loadInputs(trial_id) score_seq = model(attribute_feats) pred_label_seq = model.predict(score_seq) attribute_feats = attribute_feats.cpu().numpy() score_seq = score_seq.cpu().numpy() true_label_seq = true_label_seq.cpu().numpy() pred_label_seq = pred_label_seq.cpu().numpy() saveVariable(score_seq.T, f'{trial_prefix}_score-seq') saveVariable(true_label_seq.T, f'{trial_prefix}_label-seq') if plot_predictions: fn = os.path.join(fig_dir, f'{trial_prefix}.png') utils.plot_array(attribute_feats.T, (true_label_seq, pred_label_seq, score_seq), ('gt', 'pred', 'scores'), fn=fn) metric_dict = eval_metrics(pred_label_seq, true_label_seq) for name, value in metric_dict.items(): logger.info(f" {name}: {value * 100:.2f}%") utils.writeResults(results_file, metric_dict, sweep_param_name, model_params)
def main(out_dir=None, data_dir=None, model_name=None, predict_mode='classify', gpu_dev_id=None, batch_size=None, learning_rate=None, independent_signals=None, active_only=None, output_dim_from_vocab=False, prefix='trial=', feature_fn_format='feature-seq.pkl', label_fn_format='label_seq.pkl', dataset_params={}, model_params={}, cv_params={}, train_params={}, viz_params={}, metric_names=['Loss', 'Accuracy', 'Precision', 'Recall', 'F1'], plot_predictions=None, results_file=None, sweep_param_name=None): data_dir = os.path.expanduser(data_dir) out_dir = os.path.expanduser(out_dir) if not os.path.exists(out_dir): os.makedirs(out_dir) logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt')) if results_file is None: results_file = os.path.join(out_dir, 'results.csv') else: results_file = os.path.expanduser(results_file) fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) out_data_dir = os.path.join(out_dir, 'data') if not os.path.exists(out_data_dir): os.makedirs(out_data_dir) def saveVariable(var, var_name, to_dir=out_data_dir): return utils.saveVariable(var, var_name, to_dir) # Load data device = torchutils.selectDevice(gpu_dev_id) trial_ids = utils.getUniqueIds(data_dir, prefix=prefix, suffix=feature_fn_format, to_array=True) dataset = utils.CvDataset( trial_ids, data_dir, prefix=prefix, feature_fn_format=feature_fn_format, label_fn_format=label_fn_format, ) utils.saveMetadata(dataset.metadata, out_data_dir) utils.saveVariable(dataset.vocab, 'vocab', out_data_dir) # Define cross-validation folds cv_folds = utils.makeDataSplits(len(trial_ids), **cv_params) utils.saveVariable(cv_folds, 'cv-folds', out_data_dir) if predict_mode == 'binary multiclass': # criterion = torchutils.BootstrappedCriterion( # 0.25, base_criterion=torch.nn.functional.binary_cross_entropy_with_logits, # ) criterion = torch.nn.BCEWithLogitsLoss() labels_dtype = torch.float elif predict_mode == 'multiclass': criterion = torch.nn.CrossEntropyLoss() labels_dtype = torch.long elif predict_mode == 'classify': criterion = torch.nn.CrossEntropyLoss() labels_dtype = torch.long else: raise AssertionError() def make_dataset(feats, labels, ids, shuffle=True): dataset = torchutils.SequenceDataset(feats, labels, device=device, labels_dtype=labels_dtype, seq_ids=ids, **dataset_params) loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True) return dataset, loader for cv_index, cv_fold in enumerate(cv_folds): train_data, val_data, test_data = dataset.getFold(cv_fold) if independent_signals: train_data = splitSeqs(*train_data, active_only=active_only) val_data = splitSeqs(*val_data, active_only=active_only) test_data = splitSeqs(*test_data, active_only=False) train_set, train_loader = make_dataset(*train_data, shuffle=True) test_set, test_loader = make_dataset(*test_data, shuffle=False) val_set, val_loader = make_dataset(*val_data, shuffle=True) logger.info( f'CV fold {cv_index + 1} / {len(cv_folds)}: {len(dataset.trial_ids)} total ' f'({len(train_set)} train, {len(val_set)} val, {len(test_set)} test)' ) logger.info(f'{train_set.num_label_types} unique labels in train set; ' f'vocab size is {len(dataset.vocab)}') input_dim = train_set.num_obsv_dims output_dim = train_set.num_label_types if output_dim_from_vocab: output_dim = len(dataset.vocab) if model_name == 'linear': model = torchutils.LinearClassifier( input_dim, output_dim, **model_params).to(device=device) elif model_name == 'conv': model = ConvClassifier(input_dim, output_dim, **model_params).to(device=device) elif model_name == 'TCN': if predict_mode == 'multiclass': num_multiclass = train_set[0][1].shape[-1] output_dim = max([ train_set.num_label_types, test_set.num_label_types, val_set.num_label_types ]) else: num_multiclass = None model = TcnClassifier(input_dim, output_dim, num_multiclass=num_multiclass, **model_params).to(device=device) elif model_name == 'LSTM': if predict_mode == 'multiclass': num_multiclass = train_set[0][1].shape[-1] output_dim = max([ train_set.num_label_types, test_set.num_label_types, val_set.num_label_types ]) else: num_multiclass = None model = LstmClassifier(input_dim, output_dim, num_multiclass=num_multiclass, **model_params).to(device=device) else: raise AssertionError() optimizer_ft = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_ft, step_size=1, gamma=1.00) train_epoch_log = collections.defaultdict(list) val_epoch_log = collections.defaultdict(list) metric_dict = {name: metrics.makeMetric(name) for name in metric_names} model, last_model_wts = torchutils.trainModel( model, criterion, optimizer_ft, lr_scheduler, train_loader, val_loader, device=device, metrics=metric_dict, train_epoch_log=train_epoch_log, val_epoch_log=val_epoch_log, **train_params) # Test model metric_dict = {name: metrics.makeMetric(name) for name in metric_names} test_io_history = torchutils.predictSamples( model.to(device=device), test_loader, criterion=criterion, device=device, metrics=metric_dict, data_labeled=True, update_model=False, seq_as_batch=train_params['seq_as_batch'], return_io_history=True) if independent_signals: test_io_history = tuple(joinSeqs(test_io_history)) logger.info('[TST] ' + ' '.join(str(m) for m in metric_dict.values())) utils.writeResults(results_file, {k: v.value for k, v in metric_dict.items()}, sweep_param_name, model_params) if plot_predictions: io_fig_dir = os.path.join(fig_dir, 'model-io') if not os.path.exists(io_fig_dir): os.makedirs(io_fig_dir) label_names = ('gt', 'pred') preds, scores, inputs, gt_labels, ids = zip(*test_io_history) for batch in test_io_history: batch = tuple( x.cpu().numpy() if isinstance(x, torch.Tensor) else x for x in batch) for preds, _, inputs, gt_labels, seq_id in zip(*batch): fn = os.path.join(io_fig_dir, f"{prefix}{seq_id}_model-io.png") utils.plot_array(inputs, (gt_labels.T, preds.T), label_names, fn=fn) for batch in test_io_history: batch = tuple(x.cpu().numpy() if isinstance(x, torch.Tensor) else x for x in batch) for pred_seq, score_seq, feat_seq, label_seq, trial_id in zip( *batch): saveVariable(pred_seq, f'{prefix}{trial_id}_pred-label-seq') saveVariable(score_seq, f'{prefix}{trial_id}_score-seq') saveVariable(label_seq, f'{prefix}{trial_id}_true-label-seq') saveVariable(model, f'cvfold={cv_index}_{model_name}-best') train_fig_dir = os.path.join(fig_dir, 'train-plots') if not os.path.exists(train_fig_dir): os.makedirs(train_fig_dir) if train_epoch_log: torchutils.plotEpochLog(train_epoch_log, subfig_size=(10, 2.5), title='Training performance', fn=os.path.join( train_fig_dir, f'cvfold={cv_index}_train-plot.png')) if val_epoch_log: torchutils.plotEpochLog(val_epoch_log, subfig_size=(10, 2.5), title='Heldout performance', fn=os.path.join( train_fig_dir, f'cvfold={cv_index}_val-plot.png'))
def main(out_dir=None, data_dir=None, actions_dir=None, parts_dir=None, events_dir=None, edges_dir=None, prefix='seq=', feature_fn_format='score-seq', label_fn_format='true-label-seq', stop_after=None, only_fold=None, plot_io=None, dataset_params={}, model_params={}, cv_params={}): data_dir = os.path.expanduser(data_dir) actions_dir = os.path.expanduser(actions_dir) parts_dir = os.path.expanduser(parts_dir) events_dir = os.path.expanduser(events_dir) edges_dir = os.path.expanduser(edges_dir) out_dir = os.path.expanduser(out_dir) logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt')) fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) out_data_dir = os.path.join(out_dir, 'data') if not os.path.exists(out_data_dir): os.makedirs(out_data_dir) # vocab = utils.loadVariable( # 'assembly-action-vocab', # os.path.join(data_dir, 'event-dataset') # ) # vocab = [BlockAssembly()] + list(abs(x) for x in vocab) dataset = FusionDataset( actions_dir, parts_dir, events_dir, edges_dir, prefix=prefix, **dataset_params, # vocab=vocab, ) utils.saveMetadata(dataset.metadata, out_data_dir) utils.saveVariable(dataset.vocab, 'vocab', out_data_dir) seq_ids = dataset.trial_ids logger.info(f"Loaded scores for {len(seq_ids)} sequences from {data_dir}") # Define cross-validation folds # cv_folds = utils.makeDataSplits(len(seq_ids), **cv_params) # utils.saveVariable(cv_folds, 'cv-folds', out_data_dir) for i, seq_id in enumerate(seq_ids): try: labels = dataset.loadTargets(seq_id) features = dataset.loadInputs(seq_id) except AssertionError as e: logger.warning(f'Skipping sequence {seq_id}: {e}') continue logger.info(f"Processing sequence {seq_id}") if labels.shape[0] != features.shape[0]: message = f'Label shape {labels.shape} != feature shape {features.shape}' raise AssertionError(message) seq_prefix = f"seq={seq_id}" utils.saveVariable(features, f'{seq_prefix}_feature-seq', out_data_dir) utils.saveVariable(labels, f'{seq_prefix}_label-seq', out_data_dir) if plot_io: fn = os.path.join(fig_dir, f'{seq_prefix}.png') utils.plot_array(features.T, (labels.T, ), ('gt', ), fn=fn)
def main( out_dir=None, data_dir=None, results_file=None, cv_file=None, take_log=False, col_format=None, win_params={}, slowfast_csv_params={}): out_dir = os.path.expanduser(out_dir) data_dir = os.path.expanduser(data_dir) results_file = os.path.expanduser(results_file) cv_file = os.path.expanduser(cv_file) logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt')) fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) out_data_dir = os.path.join(out_dir, 'data') if not os.path.exists(out_data_dir): os.makedirs(out_data_dir) vocab = utils.loadVariable('vocab', data_dir) metadata = utils.loadMetadata(data_dir) slowfast_labels = pd.read_csv( cv_file, keep_default_na=False, index_col=0, **slowfast_csv_params ) seg_ids = slowfast_labels.index.to_numpy() vid_names = slowfast_labels['video_name'].unique().tolist() metadata['seq_id'] = metadata.index vid_ids = metadata.set_index('dir_name').loc[vid_names].set_index('seq_id').index metadata = metadata.drop('seq_id', axis=1) with open(results_file, 'rb') as file_: model_probs, gt_labels = pickle.load(file_) model_probs = model_probs.numpy() gt_labels = gt_labels.numpy() if len(model_probs) != len(seg_ids): err_str = f"{len(model_probs)} segment scores != {slowfast_labels.shape[0]} CSV rows" raise AssertionError(err_str) logger.info(f"Loaded {len(seg_ids)} segments, {len(vid_ids)} videos") for vid_id, vid_name in zip(vid_ids, vid_names): matches_video = (slowfast_labels['video_name'] == vid_name).to_numpy() win_labels = gt_labels[matches_video] win_probs = model_probs[matches_video, :] if win_labels.shape == win_probs.shape: win_preds = (win_probs > 0.5).astype(int) else: win_preds = win_probs.argmax(axis=1) if take_log: win_probs = np.log(win_probs) seq_id_str = f"seq={vid_id}" utils.saveVariable(win_probs, f'{seq_id_str}_score-seq', out_data_dir) utils.saveVariable(win_labels, f'{seq_id_str}_true-label-seq', out_data_dir) utils.saveVariable(win_preds, f'{seq_id_str}_pred-label-seq', out_data_dir) utils.plot_array( win_probs.T, (win_labels.T, win_preds.T), ('true', 'pred'), tick_names=vocab, fn=os.path.join(fig_dir, f"{seq_id_str}.png"), subplot_width=12, subplot_height=5 ) utils.saveVariable(vocab, 'vocab', out_data_dir) utils.saveMetadata(metadata, out_data_dir)
def main( out_dir=None, modalities=['rgb', 'imu'], gpu_dev_id=None, plot_io=None, rgb_data_dir=None, rgb_attributes_dir=None, imu_data_dir=None, imu_attributes_dir=None): out_dir = os.path.expanduser(out_dir) rgb_data_dir = os.path.expanduser(rgb_data_dir) rgb_attributes_dir = os.path.expanduser(rgb_attributes_dir) imu_data_dir = os.path.expanduser(imu_data_dir) imu_attributes_dir = os.path.expanduser(imu_attributes_dir) logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt')) fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) out_data_dir = os.path.join(out_dir, 'data') if not os.path.exists(out_data_dir): os.makedirs(out_data_dir) # Load data if modalities == ['rgb']: trial_ids = utils.getUniqueIds(rgb_data_dir, prefix='trial=', to_array=True) logger.info(f"Processing {len(trial_ids)} videos") else: rgb_trial_ids = utils.getUniqueIds(rgb_data_dir, prefix='trial=', to_array=True) imu_trial_ids = utils.getUniqueIds(imu_data_dir, prefix='trial=', to_array=True) trial_ids = np.array(sorted(set(rgb_trial_ids.tolist()) & set(imu_trial_ids.tolist()))) logger.info( f"Processing {len(trial_ids)} videos common to " f"RGB ({len(rgb_trial_ids)} total) and IMU ({len(imu_trial_ids)} total)" ) device = torchutils.selectDevice(gpu_dev_id) dataset = FusionDataset( trial_ids, rgb_attributes_dir, rgb_data_dir, imu_attributes_dir, imu_data_dir, device=device, modalities=modalities, ) utils.saveMetadata(dataset.metadata, out_data_dir) utils.saveVariable(dataset.vocab, 'vocab', out_data_dir) for i, trial_id in enumerate(trial_ids): logger.info(f"Processing sequence {trial_id}...") true_label_seq = dataset.loadTargets(trial_id) attribute_feats = dataset.loadInputs(trial_id) # (Process the samples here if we need to) attribute_feats = attribute_feats.cpu().numpy() true_label_seq = true_label_seq.cpu().numpy() trial_prefix = f"trial={trial_id}" utils.saveVariable(attribute_feats, f'{trial_prefix}_feature-seq', out_data_dir) utils.saveVariable(true_label_seq, f'{trial_prefix}_label-seq', out_data_dir) if plot_io: fn = os.path.join(fig_dir, f'{trial_prefix}.png') utils.plot_array( attribute_feats.T, (true_label_seq,), ('gt',), fn=fn )
def main(out_dir=None, metadata_file=None, corpus_name=None, default_annotator=None, metadata_criteria={}, win_params={}, slowfast_csv_params={}, label_types=('event', 'action', 'part')): out_dir = os.path.expanduser(out_dir) metadata_file = os.path.expanduser(metadata_file) logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt')) fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) out_labels_dir = os.path.join(out_dir, 'labels') if not os.path.exists(out_labels_dir): os.makedirs(out_labels_dir) data_dirs = { name: os.path.join(out_dir, f"{name}-dataset") for name in label_types } for name, dir_ in data_dirs.items(): if not os.path.exists(dir_): os.makedirs(dir_) assembly_data_dir = os.path.join(out_dir, 'assembly-dataset') if not os.path.exists(assembly_data_dir): os.makedirs(assembly_data_dir) (seq_ids, event_labels, assembly_seqs, frame_fn_seqs, frame_fn_idx_seqs, vocabs, assembly_action_vocab, metadata) = load_all_labels(corpus_name, default_annotator, metadata_file, metadata_criteria, start_video_from_first_touch=True, subsample_period=None, use_coarse_actions=True) assembly_vocab = [] assembly_idx_seqs = tuple( tuple(labels.gen_eq_classes(state_seq, assembly_vocab)) for state_seq in assembly_seqs) utils.saveVariable(assembly_vocab, 'vocab', assembly_data_dir) utils.saveVariable(assembly_action_vocab, 'assembly-action-vocab', data_dirs['event']) vocabs = {label_name: vocabs[label_name] for label_name in label_types} for name, vocab in vocabs.items(): utils.saveVariable(vocab, 'vocab', data_dirs[name]) utils.saveMetadata(metadata, out_labels_dir) for name, dir_ in data_dirs.items(): utils.saveMetadata(metadata, dir_) assembly_attrs = labels.blockConnectionsSeq(assembly_vocab) utils.saveVariable(assembly_attrs, 'assembly-attrs', assembly_data_dir) plt.matshow(assembly_attrs.T) plt.savefig(os.path.join(fig_dir, 'assembly-attrs.png')) logger.info( f"Loaded {len(seq_ids)} sequence labels from {corpus_name} dataset") part_names = [name for name in vocabs['part'] if name != ''] col_names = [f"{name}_active" for name in part_names] integerizers = { label_name: {name: i for i, name in enumerate(label_vocab)} for label_name, label_vocab in vocabs.items() } all_slowfast_labels_seg = collections.defaultdict(list) all_slowfast_labels_win = collections.defaultdict(list) counts = np.zeros((len(vocabs['action']), len(vocabs['part'])), dtype=int) for i, seq_id in enumerate(seq_ids): logger.info(f"Processing sequence {i + 1} / {len(seq_ids)}") seq_id_str = f"seq={seq_id}" event_segs = event_labels[i] frame_fns = frame_fn_seqs[i] frame_fn_idxs = frame_fn_idx_seqs[i] assembly_seq = assembly_seqs[i] assembly_label_seq = make_assembly_labels(assembly_seq, assembly_idx_seqs[i], **win_params) # video_dir = os.path.dirname(frame_fns[0]).split('/')[-1] video_dir = f"{seq_id}" event_data = make_event_data(event_segs, frame_fns, frame_fn_idxs, integerizers['event'], integerizers['action'], integerizers['part'], vocabs['event'].index(''), vocabs['action'].index(''), False) # Redefining event segments from the sequence catches background segments # that are not annotated in the source labels event_segs = make_clips(event_data, vocabs['event'], vocabs['action'], clip_type='segment') event_wins = make_clips(event_data, vocabs['event'], vocabs['action'], clip_type='window', **win_params) for name in ('event', 'action'): event_segs[f'{name}_id'] = [ integerizers[name][n] for n in event_segs[name] ] event_wins[f'{name}_id'] = [ integerizers[name][n] for n in event_wins[name] ] event_data.to_csv(os.path.join(out_labels_dir, f"{seq_id_str}_data.csv"), index=False) event_segs.to_csv(os.path.join(out_labels_dir, f"{seq_id_str}_segs.csv"), index=False) event_wins.to_csv(os.path.join(out_labels_dir, f"{seq_id_str}_wins.csv"), index=False) utils.saveVariable(assembly_label_seq, f'seq={seq_id}_label-seq', assembly_data_dir) filenames = event_data['fn'].to_list() label_indices = {} bound_keys = ['start', 'end'] for name in label_types: if name == 'part': label_indices[name] = event_data[col_names].to_numpy() label_keys = col_names else: label_indices[name] = event_data[name].to_numpy() label_keys = [f'{name}_id'] seg_labels_slowfast = make_slowfast_labels( event_segs[bound_keys], event_segs[label_keys], [video_dir for _ in range(event_segs.shape[0])]) win_labels_slowfast = make_slowfast_labels( event_wins[bound_keys], event_wins[label_keys], [video_dir for _ in range(event_wins.shape[0])]) utils.saveVariable(filenames, f'{seq_id_str}_frame-fns', data_dirs[name]) utils.saveVariable(label_indices[name], f'{seq_id_str}_labels', data_dirs[name]) seg_labels_slowfast.to_csv(os.path.join( data_dirs[name], f'{seq_id_str}_slowfast-labels.csv'), index=False, **slowfast_csv_params) win_labels_slowfast.to_csv(os.path.join( data_dirs[name], f'{seq_id_str}_slowfast-labels.csv'), index=False, **slowfast_csv_params) all_slowfast_labels_seg[name].append(seg_labels_slowfast) all_slowfast_labels_win[name].append(win_labels_slowfast) plot_event_labels(os.path.join(fig_dir, f"{seq_id_str}.png"), label_indices['event'], label_indices['action'], label_indices['part'], vocabs['event'], vocabs['action'], part_names) plot_assembly_labels( os.path.join(fig_dir, f"{seq_id_str}_assembly.png"), assembly_label_seq, label_indices['event'], vocabs['event']) for part_activity_row, action_index in zip(label_indices['part'], label_indices['action']): for i, is_active in enumerate(part_activity_row): part_index = integerizers['part'][part_names[i]] counts[action_index, part_index] += int(is_active) for name, sf_labels in all_slowfast_labels_seg.items(): pd.concat(sf_labels, axis=0).to_csv( os.path.join(data_dirs[name], 'slowfast-labels_seg.csv'), **slowfast_csv_params) for name, sf_labels in all_slowfast_labels_win.items(): pd.concat(sf_labels, axis=0).to_csv( os.path.join(data_dirs[name], 'slowfast-labels_win.csv'), **slowfast_csv_params) utils.saveVariable(counts, 'action-part-counts', out_labels_dir) plt.matshow(counts) plt.xticks(ticks=range(len(vocabs['part'])), labels=vocabs['part'], rotation='vertical') plt.yticks(ticks=range(len(vocabs['action'])), labels=vocabs['action']) plt.savefig(os.path.join(fig_dir, 'action-part-coocurrence.png'), bbox_inches='tight')
def main( out_dir=None, annotation_dir=None, frames_dir=None, win_params={}, slowfast_csv_params={}, label_types=('event', 'action', 'part')): out_dir = os.path.expanduser(out_dir) annotation_dir = os.path.expanduser(annotation_dir) frames_dir = os.path.expanduser(frames_dir) logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt')) fig_dir = os.path.join(out_dir, 'figures') if not os.path.exists(fig_dir): os.makedirs(fig_dir) out_labels_dir = os.path.join(out_dir, 'labels') if not os.path.exists(out_labels_dir): os.makedirs(out_labels_dir) data_dirs = {name: os.path.join(out_dir, f"{name}-dataset") for name in label_types} for name, dir_ in data_dirs.items(): if not os.path.exists(dir_): os.makedirs(dir_) seq_ids, event_labels, vocabs, metadata = load_all_labels(annotation_dir) vocabs = {label_name: vocabs[label_name] for label_name in label_types} for name, vocab in vocabs.items(): utils.saveVariable(vocab, 'vocab', data_dirs[name]) utils.saveMetadata(metadata, out_labels_dir) for name, dir_ in data_dirs.items(): utils.saveMetadata(metadata, dir_) logger.info(f"Loaded {len(seq_ids)} sequence labels from {annotation_dir}") part_names = [name for name in vocabs['part'] if name != ''] col_names = [f"{name}_active" for name in part_names] integerizers = { label_name: {name: i for i, name in enumerate(label_vocab)} for label_name, label_vocab in vocabs.items() } all_slowfast_labels_seg = collections.defaultdict(list) all_slowfast_labels_win = collections.defaultdict(list) counts = np.zeros((len(vocabs['action']), len(vocabs['part'])), dtype=int) for i, seq_id in enumerate(seq_ids): logger.info(f"Processing sequence {i + 1} / {len(seq_ids)}") seq_id_str = f"seq={seq_id}" event_segs = event_labels[i] # Ignore 'check booklet' events because they don't have an impact on construction event_segs = event_segs.loc[event_segs['event'] != 'check_booklet'] event_data = make_event_data( event_segs, sorted(glob.glob(os.path.join(frames_dir, f'{seq_id}', '*.jpg'))), integerizers['event'], integerizers['action'], integerizers['part'], vocabs['event'].index(''), vocabs['action'].index(''), False ) # Redefining event segments from the sequence catches background segments # that are not annotated in the source labels event_segs = make_clips( event_data, vocabs['event'], vocabs['action'], clip_type='segment' ) event_wins = make_clips( event_data, vocabs['event'], vocabs['action'], clip_type='window', **win_params ) for name in ('event', 'action'): event_segs[f'{name}_id'] = [integerizers[name][n] for n in event_segs[name]] event_wins[f'{name}_id'] = [integerizers[name][n] for n in event_wins[name]] event_data.to_csv(os.path.join(out_labels_dir, f"{seq_id_str}_data.csv"), index=False) event_segs.to_csv(os.path.join(out_labels_dir, f"{seq_id_str}_segs.csv"), index=False) event_wins.to_csv(os.path.join(out_labels_dir, f"{seq_id_str}_wins.csv"), index=False) filenames = event_data['fn'].to_list() label_indices = {} bound_keys = ['start', 'end'] for name in label_types: if name == 'part': label_indices[name] = event_data[col_names].to_numpy() label_keys = col_names else: label_indices[name] = event_data[name].to_numpy() label_keys = [f'{name}_id'] seg_labels_slowfast = make_slowfast_labels( event_segs[bound_keys], event_segs[label_keys], event_data['fn'] ) win_labels_slowfast = make_slowfast_labels( event_wins[bound_keys], event_wins[label_keys], event_data['fn'] ) utils.saveVariable(filenames, f'{seq_id_str}_frame-fns', data_dirs[name]) utils.saveVariable(label_indices[name], f'{seq_id_str}_labels', data_dirs[name]) seg_labels_slowfast.to_csv( os.path.join(data_dirs[name], f'{seq_id_str}_slowfast-labels.csv'), index=False, **slowfast_csv_params ) win_labels_slowfast.to_csv( os.path.join(data_dirs[name], f'{seq_id_str}_slowfast-labels.csv'), index=False, **slowfast_csv_params ) all_slowfast_labels_seg[name].append(seg_labels_slowfast) all_slowfast_labels_win[name].append(win_labels_slowfast) plot_event_labels( os.path.join(fig_dir, f"{seq_id_str}.png"), label_indices['event'], label_indices['action'], label_indices['part'], vocabs['event'], vocabs['action'], part_names ) for part_activity_row, action_index in zip(label_indices['part'], label_indices['action']): for i, is_active in enumerate(part_activity_row): part_index = integerizers['part'][part_names[i]] counts[action_index, part_index] += int(is_active) for name, labels in all_slowfast_labels_seg.items(): pd.concat(labels, axis=0).to_csv( os.path.join(data_dirs[name], 'slowfast-labels_seg.csv'), **slowfast_csv_params ) for name, labels in all_slowfast_labels_win.items(): pd.concat(labels, axis=0).to_csv( os.path.join(data_dirs[name], 'slowfast-labels_win.csv'), **slowfast_csv_params ) utils.saveVariable(counts, 'action-part-counts', out_labels_dir) plt.matshow(counts) plt.xticks(ticks=range(len(vocabs['part'])), labels=vocabs['part'], rotation='vertical') plt.yticks(ticks=range(len(vocabs['action'])), labels=vocabs['action']) plt.savefig(os.path.join(fig_dir, 'action-part-coocurrence.png'), bbox_inches='tight')