Example #1
0
    def loadInputs(self, seq_id):
        if self.modalities == ['rgb']:
            return self.loadInputsRgb(seq_id)

        trial_prefix = f"trial={seq_id}"
        rgb_attribute_seq = torch.tensor(
            utils.loadVariable(f"{trial_prefix}_score-seq", self.rgb_attributes_dir),
            dtype=torch.float, device=self.device
        )
        rgb_timestamp_seq = utils.loadVariable(
            f"{trial_prefix}_rgb-frame-timestamp-seq",
            from_dir=self.rgb_data_dir
        )
        imu_attribute_seq = torch.tensor(
            utils.loadVariable(f"{trial_prefix}_score-seq", self.imu_attributes_dir),
            dtype=torch.float, device=self.device
        )
        imu_timestamp_seq = utils.loadVariable(f"{trial_prefix}_timestamp-seq", self.imu_data_dir)
        rgb_attribute_seq, imu_attribute_seq = resample(
            rgb_attribute_seq, rgb_timestamp_seq,
            imu_attribute_seq, imu_timestamp_seq
        )

        attribute_feats = {
            'rgb': make_attribute_features(rgb_attribute_seq),
            'imu': make_attribute_features(imu_attribute_seq)
        }

        attribute_feats = torch.cat(
            tuple(attribute_feats[name] for name in self.modalities),
            dim=1
        )

        return attribute_feats
Example #2
0
    def loadInputs(self, seq_id, prefix=None, stride=None):
        if prefix is None:
            prefix = self.prefix

        actions_seq = utils.loadVariable(
            f"{prefix}{seq_id}_{self.feature_fn_format}", self.actions_dir)
        actions_seq = scipy.special.softmax(actions_seq, axis=1)

        events_seq = utils.loadVariable(
            f"{prefix}{seq_id}_{self.feature_fn_format}", self.events_dir)
        events_seq = scipy.special.softmax(events_seq, axis=1)

        parts_seq = utils.loadVariable(
            f"{prefix}{seq_id}_{self.feature_fn_format}", self.parts_dir)
        parts_seq = scipy.special.expit(parts_seq)

        prefix = 'trial='
        edges_seq = utils.loadVariable(
            f"{prefix}{seq_id}_{self.feature_fn_format}", self.edges_dir)
        edges_seq = scipy.special.softmax(edges_seq, axis=1)
        edges_seq = np.reshape(edges_seq, (edges_seq.shape[0], -1))
        edges_seq = edges_seq[::5]

        attribute_feats = {
            'actions': make_attribute_features(actions_seq),
            'parts': make_attribute_features(parts_seq),
            'events': make_attribute_features(events_seq),
            'edges': make_attribute_features(edges_seq)
        }

        attribute_feats = np.concatenate(tuple(attribute_feats[name]
                                               for name in self.modalities),
                                         axis=1)

        return attribute_feats
Example #3
0
def loadPartInfo(assembly_data_dir, event_attr_fn, connection_attr_fn, background_action=''):
    assembly_vocab = utils.loadVariable('vocab', assembly_data_dir)
    assembly_attrs = utils.loadVariable('assembly-attrs', assembly_data_dir)
    # joint_vocab = tuple(tuple(sorted(joint)) for joint in data['joint_vocab'])
    # part_vocab = tuple(data['part_vocab'])
    joint_vocab = None  # FIXME
    part_vocab = None  # FIXME
    part_categories = {}

    final_assembly_attrs = assembly_attrs  # FIXME

    connection_probs, action_vocab, connection_vocab = connection_attrs_to_probs(
        pd.read_csv(connection_attr_fn, index_col=False, keep_default_na=False),
        # normalize=True
    )

    with open(event_attr_fn, 'rt') as file_:
        event_probs, event_vocab = event_attrs_to_probs(
            json.load(file_),
            part_categories, action_vocab, joint_vocab,
            background_action=background_action,
            # normalize=True
        )

    assembly_probs, assembly_vocab = assembly_attrs_to_probs(
        assembly_attrs, joint_vocab, connection_vocab,
        # normalize=True
    )

    num_assemblies = len(assembly_attrs)
    transition_probs = np.zeros((num_assemblies + 1, num_assemblies + 1), dtype=float)
    transition_probs[0, :-1] = prior_probs(assembly_attrs)
    transition_probs[1:, -1] = final_probs(assembly_attrs, final_assembly_attrs)
    transition_probs[1:, :-1] = assembly_transition_probs(
        assembly_attrs,
        allow_self_transitions=True
    )

    probs = (event_probs, connection_probs, assembly_probs, transition_probs)
    vocabs = {
        'event_vocab': event_vocab,
        'part_vocab': part_vocab,
        'action_vocab': action_vocab,
        'joint_vocab': joint_vocab,
        'connection_vocab': connection_vocab,
        'assembly_vocab': assembly_vocab
    }
    return probs, vocabs
Example #4
0
 def loadTargets(self, seq_id):
     trial_prefix = f"trial={seq_id}"
     true_label_seq = torch.tensor(
         utils.loadVariable(f'{trial_prefix}_true-label-seq', self.rgb_attributes_dir),
         dtype=torch.long, device=self.device
     )
     return true_label_seq
Example #5
0
 def loadAssemblies(self, seq_id, var_name, vocab, prefix='trial='):
     assembly_seq = utils.loadVariable(f"{prefix}{seq_id}_{var_name}",
                                       self.data_dir)
     labels = np.zeros(assembly_seq[-1].end_idx, dtype=int)
     for assembly in assembly_seq:
         i = utils.getIndex(assembly, vocab)
         labels[assembly.start_idx:assembly.end_idx] = i
     return labels
Example #6
0
    def loadTargets(self, seq_id, prefix=None):
        if prefix is None:
            prefix = self.prefix

        if self.labels == 'edges':
            prefix = 'trial='
            true_label_seq = utils.loadVariable(
                f'{prefix}{seq_id}_{self.label_fn_format}',
                self.data_dirs[self.labels])
            true_label_seq = true_label_seq[::5]
        else:
            true_label_seq = utils.loadVariable(
                f'{prefix}{seq_id}_{self.label_fn_format}',
                self.data_dirs[self.labels])
            # true_label_seq = self.part_labels[true_label_seq]

        return true_label_seq
Example #7
0
 def loadTargets(self, seq_id):
     trial_prefix = f"trial={seq_id}"
     assembly_seq = utils.loadVariable(f'{trial_prefix}_assembly-seq',
                                       self.rgb_data_dir)
     true_label_seq = torch.tensor(make_labels(assembly_seq, self.vocab),
                                   dtype=torch.long,
                                   device=self.device)
     return true_label_seq
Example #8
0
    def __init__(
            self, trial_ids, rgb_attributes_dir, rgb_data_dir, imu_attributes_dir, imu_data_dir,
            device=None, modalities=None):
        self.trial_ids = trial_ids

        self.metadata = utils.loadMetadata(rgb_data_dir, rows=trial_ids)
        self.vocab = utils.loadVariable('vocab', rgb_attributes_dir)

        self.rgb_attributes_dir = rgb_attributes_dir
        self.rgb_data_dir = rgb_data_dir
        self.imu_attributes_dir = imu_attributes_dir
        self.imu_data_dir = imu_data_dir
        self.device = device
        self.modalities = modalities
Example #9
0
    def loadInputsRgb(self, seq_id):
        trial_prefix = f"trial={seq_id}"
        rgb_attribute_seq = torch.tensor(
            utils.loadVariable(f"{trial_prefix}_score-seq", self.rgb_attributes_dir),
            dtype=torch.float, device=self.device
        )

        attribute_feats = {'rgb': make_attribute_features(rgb_attribute_seq)}
        attribute_feats = torch.cat(
            tuple(attribute_feats[name] for name in self.modalities),
            dim=1
        )

        return attribute_feats
Example #10
0
    def __init__(self, data_dirs, scores_dirs, prefix='seq='):
        self.prefix = prefix

        self.data_dirs = data_dirs
        self.scores_dirs = scores_dirs

        self.seq_ids = utils.getUniqueIds(self.data_dirs['event'],
                                          prefix=prefix,
                                          suffix='labels.*',
                                          to_array=True)

        self.vocabs = {
            name: utils.loadVariable('vocab', dir_)
            for name, dir_ in self.data_dirs.items()
        }
Example #11
0
    def _getScores(self, index, label_name='event'):
        if isinstance(index, collections.abc.Iterable):
            return tuple(
                self._getScores(i, label_name=label_name) for i in index)

        if isinstance(label_name, collections.abc.Iterable) and not isinstance(
                label_name, str):
            return tuple(
                self._getScores(index, label_name=ln) for ln in label_name)

        trial_prefix = f"{self.prefix}{index}"
        score_seq = utils.loadVariable(f"{trial_prefix}_score-seq",
                                       self.scores_dirs[label_name])

        return score_seq
Example #12
0
def load_vocab(link_vocab, joint_vocab, joint_type_vocab, vocab_dir):
    assembly_vocab = utils.loadVariable('vocab', vocab_dir)
    # FIXME: Convert keys from vertex pairs to edge indices
    edge_vocab = utils.loadVariable('parts-vocab', vocab_dir)
    edge_labels = utils.loadVariable('part-labels', vocab_dir)

    assembly_vocab = tuple(
        lib_assembly.Assembly.from_blockassembly(
            a,
            link_vocab=link_vocab,
            joint_vocab=joint_vocab,
            joint_type_vocab=joint_type_vocab) for a in assembly_vocab)

    edge_vocab = {
        edge_key: tuple(
            lib_assembly.Assembly.from_blockassembly(
                a,
                link_vocab=link_vocab,
                joint_vocab=joint_vocab,
                joint_type_vocab=joint_type_vocab) for a in assemblies)
        for edge_key, assemblies in edge_vocab.items()
    }

    return assembly_vocab, edge_vocab, edge_labels
Example #13
0
def main(out_dir=None, data_dir=None, labels_dir=None):
    out_dir = os.path.expanduser(out_dir)
    data_dir = os.path.expanduser(data_dir)
    labels_dir = os.path.expanduser(labels_dir)

    logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt'))

    fig_dir = os.path.join(out_dir, 'figures')
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)

    out_data_dir = os.path.join(out_dir, 'data')
    if not os.path.exists(out_data_dir):
        os.makedirs(out_data_dir)

    filenames = [
        utils.stripExtension(fn)
        for fn in glob.glob(os.path.join(labels_dir, '*.csv'))
    ]

    metadata = utils.loadMetadata(data_dir)
    metadata['seq_id'] = metadata.index
    metadata = metadata.set_index(
        'dir_name', drop=False).loc[filenames].set_index('seq_id')

    seq_ids = np.sort(metadata.index.to_numpy())
    logger.info(f"Loaded {len(seq_ids)} sequences from {labels_dir}")

    vocab = []
    for i, seq_id in enumerate(seq_ids):
        seq_id_str = f"seq={seq_id}"
        seq_dir_name = metadata['dir_name'].loc[seq_id]
        labels_fn = os.path.join(labels_dir, f'{seq_dir_name}.csv')
        event_labels = utils.loadVariable(f'{seq_id_str}_labels', data_dir)

        assembly_actions = pd.read_csv(labels_fn)
        label_seq = parseActions(assembly_actions, event_labels.shape[0],
                                 vocab)
        utils.saveVariable(label_seq, f'{seq_id_str}_label-seq', out_data_dir)

        plotLabels(os.path.join(fig_dir, f'{seq_id_str}_labels.png'),
                   label_seq)
        writeLabels(os.path.join(fig_dir, f'{seq_id_str}_labels.csv'),
                    label_seq, vocab)

    utils.saveMetadata(metadata, out_data_dir)
    utils.saveVariable(vocab, 'vocab', out_data_dir)
Example #14
0
    def __init__(self,
                 actions_dir,
                 parts_dir,
                 events_dir,
                 edges_dir,
                 vocab=None,
                 modalities=('actions', 'parts', 'edges'),
                 labels='edges',
                 prefix='seq=',
                 feature_fn_format='score-seq',
                 label_fn_format='true-label-seq'):
        self.modalities = modalities
        self.labels = labels

        self.actions_dir = actions_dir
        self.parts_dir = parts_dir
        self.events_dir = events_dir
        self.edges_dir = edges_dir

        self.data_dirs = {
            'actions': self.actions_dir,
            'parts': self.parts_dir,
            'events': self.events_dir,
            'edges': self.edges_dir
        }

        labels_dir = self.data_dirs[self.labels]
        self.trial_ids = utils.getUniqueIds(
            labels_dir,
            prefix='trial=' if self.labels == 'edges' else prefix,
            suffix=f'{label_fn_format}.*',
            to_array=True)
        if vocab is None:
            vocab = utils.loadVariable('vocab', labels_dir)
        self.vocab = vocab
        self.metadata = utils.loadMetadata(labels_dir, rows=self.trial_ids)

        self.prefix = prefix
        self.feature_fn_format = feature_fn_format
        self.label_fn_format = label_fn_format
Example #15
0
    def _getLabels(self, index, label_name='event', load_from_data=False):
        if isinstance(index, collections.abc.Iterable):
            return tuple(
                self._getLabels(
                    i, label_name=label_name, load_from_data=load_from_data)
                for i in index)

        if isinstance(label_name, collections.abc.Iterable) and not isinstance(
                label_name, str):
            return tuple(
                self._getLabels(
                    index, label_name=ln, load_from_data=load_from_data)
                for ln in label_name)

        trial_prefix = f"{self.prefix}{index}"

        if load_from_data:
            dir_ = self.data_dirs[label_name]
            fn = f"{trial_prefix}_labels"
        else:
            dir_ = self.scores_dirs[label_name]
            fn = f"{trial_prefix}_true-label-seq"

        return utils.loadVariable(fn, dir_)
Example #16
0
def main(out_dir=None,
         data_dir=None,
         scores_dir=None,
         frames_dir=None,
         vocab_from_scores_dir=None,
         only_fold=None,
         plot_io=None,
         prefix='seq=',
         results_file=None,
         sweep_param_name=None,
         model_params={},
         cv_params={}):

    data_dir = os.path.expanduser(data_dir)
    scores_dir = os.path.expanduser(scores_dir)
    frames_dir = os.path.expanduser(frames_dir)
    out_dir = os.path.expanduser(out_dir)
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt'))

    if results_file is None:
        results_file = os.path.join(out_dir, 'results.csv')
    else:
        results_file = os.path.expanduser(results_file)

    fig_dir = os.path.join(out_dir, 'figures')
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)

    io_dir_images = os.path.join(fig_dir, 'model-io_images')
    if not os.path.exists(io_dir_images):
        os.makedirs(io_dir_images)

    io_dir_plots = os.path.join(fig_dir, 'model-io_plots')
    if not os.path.exists(io_dir_plots):
        os.makedirs(io_dir_plots)

    out_data_dir = os.path.join(out_dir, 'data')
    if not os.path.exists(out_data_dir):
        os.makedirs(out_data_dir)

    seq_ids = utils.getUniqueIds(scores_dir,
                                 prefix=prefix,
                                 suffix='pred-label-seq.*',
                                 to_array=True)

    logger.info(
        f"Loaded scores for {len(seq_ids)} sequences from {scores_dir}")

    if vocab_from_scores_dir:
        vocab = utils.loadVariable('vocab', scores_dir)
    else:
        vocab = utils.loadVariable('vocab', data_dir)

    all_metrics = collections.defaultdict(list)

    # Define cross-validation folds
    cv_folds = utils.makeDataSplits(len(seq_ids), **cv_params)
    utils.saveVariable(cv_folds, 'cv-folds', out_data_dir)

    all_pred_seqs = []
    all_true_seqs = []

    for cv_index, cv_fold in enumerate(cv_folds):
        if only_fold is not None and cv_index != only_fold:
            continue

        train_indices, val_indices, test_indices = cv_fold
        logger.info(
            f"CV FOLD {cv_index + 1} / {len(cv_folds)}: "
            f"{len(train_indices)} train, {len(val_indices)} val, {len(test_indices)} test"
        )

        for i in test_indices:
            seq_id = seq_ids[i]
            logger.info(f"  Processing sequence {seq_id}...")

            trial_prefix = f"{prefix}{seq_id}"
            score_seq = utils.loadVariable(f"{trial_prefix}_score-seq",
                                           scores_dir)
            pred_seq = utils.loadVariable(f"{trial_prefix}_pred-label-seq",
                                          scores_dir)
            true_seq = utils.loadVariable(f"{trial_prefix}_true-label-seq",
                                          scores_dir)

            metric_dict = eval_metrics(pred_seq, true_seq)
            for name, value in metric_dict.items():
                logger.info(f"    {name}: {value * 100:.2f}%")
                all_metrics[name].append(value)

            utils.writeResults(results_file, metric_dict, sweep_param_name,
                               model_params)

            all_pred_seqs.append(pred_seq)
            all_true_seqs.append(true_seq)

            if plot_io:
                utils.plot_array(score_seq.T, (true_seq, pred_seq),
                                 ('true', 'pred'),
                                 fn=os.path.join(io_dir_plots,
                                                 f"seq={seq_id:03d}.png"))

    if False:
        confusions = metrics.confusionMatrix(all_pred_seqs, all_true_seqs,
                                             len(vocab))
        utils.saveVariable(confusions, "confusions", out_data_dir)

        per_class_acc, class_counts = metrics.perClassAcc(confusions,
                                                          return_counts=True)
        class_preds = confusions.sum(axis=1)
        logger.info(f"MACRO ACC: {np.nanmean(per_class_acc) * 100:.2f}%")

        metrics.plotConfusions(os.path.join(fig_dir, 'confusions.png'),
                               confusions, vocab)
        metrics.plotPerClassAcc(os.path.join(fig_dir,
                                             'per-class-results.png'), vocab,
                                per_class_acc, class_preds, class_counts)
Example #17
0
def main(out_dir=None,
         data_dir=None,
         segs_dir=None,
         scores_dir=None,
         vocab_dir=None,
         label_type='edges',
         gpu_dev_id=None,
         start_from=None,
         stop_at=None,
         num_disp_imgs=None,
         results_file=None,
         sweep_param_name=None,
         model_params={},
         cv_params={}):

    data_dir = os.path.expanduser(data_dir)
    segs_dir = os.path.expanduser(segs_dir)
    scores_dir = os.path.expanduser(scores_dir)
    vocab_dir = os.path.expanduser(vocab_dir)
    out_dir = os.path.expanduser(out_dir)
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt'))

    if results_file is None:
        results_file = os.path.join(out_dir, 'results.csv')
    else:
        results_file = os.path.expanduser(results_file)

    fig_dir = os.path.join(out_dir, 'figures')
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)

    io_dir_images = os.path.join(fig_dir, 'model-io_images')
    if not os.path.exists(io_dir_images):
        os.makedirs(io_dir_images)

    io_dir_plots = os.path.join(fig_dir, 'model-io_plots')
    if not os.path.exists(io_dir_plots):
        os.makedirs(io_dir_plots)

    out_data_dir = os.path.join(out_dir, 'data')
    if not os.path.exists(out_data_dir):
        os.makedirs(out_data_dir)

    seq_ids = utils.getUniqueIds(scores_dir,
                                 prefix='trial=',
                                 suffix='score-seq.*',
                                 to_array=True)

    logger.info(
        f"Loaded scores for {len(seq_ids)} sequences from {scores_dir}")

    link_vocab = {}
    joint_vocab = {}
    joint_type_vocab = {}
    vocab, parts_vocab, part_labels = load_vocab(link_vocab, joint_vocab,
                                                 joint_type_vocab, vocab_dir)
    pred_vocab = []  # FIXME

    if label_type == 'assembly':
        logger.info("Converting assemblies -> edges")
        state_pred_seqs = tuple(
            utils.loadVariable(f"trial={seq_id}_pred-label-seq", scores_dir)
            for seq_id in seq_ids)
        state_true_seqs = tuple(
            utils.loadVariable(f"trial={seq_id}_true-label-seq", scores_dir)
            for seq_id in seq_ids)
        edge_pred_seqs = tuple(part_labels[seq] for seq in state_pred_seqs)
        edge_true_seqs = tuple(part_labels[seq] for seq in state_true_seqs)
    elif label_type == 'edge':
        logger.info("Converting edges -> assemblies (will take a few minutes)")
        edge_pred_seqs = tuple(
            utils.loadVariable(f"trial={seq_id}_pred-label-seq", scores_dir)
            for seq_id in seq_ids)
        edge_true_seqs = tuple(
            utils.loadVariable(f"trial={seq_id}_true-label-seq", scores_dir)
            for seq_id in seq_ids)
        state_pred_seqs = tuple(
            edges_to_assemblies(seq, pred_vocab, parts_vocab, part_labels)
            for seq in edge_pred_seqs)
        state_true_seqs = tuple(
            edges_to_assemblies(seq, vocab, parts_vocab, part_labels)
            for seq in edge_true_seqs)

    device = torchutils.selectDevice(gpu_dev_id)
    dataset = sim2real.LabeledConnectionDataset(
        utils.loadVariable('parts-vocab', vocab_dir),
        utils.loadVariable('part-labels', vocab_dir),
        utils.loadVariable('vocab', vocab_dir),
        device=device)

    all_metrics = collections.defaultdict(list)

    # Define cross-validation folds
    cv_folds = utils.makeDataSplits(len(seq_ids), **cv_params)
    utils.saveVariable(cv_folds, 'cv-folds', out_data_dir)

    for cv_index, cv_fold in enumerate(cv_folds):
        train_indices, val_indices, test_indices = cv_fold
        logger.info(
            f"CV FOLD {cv_index + 1} / {len(cv_folds)}: "
            f"{len(train_indices)} train, {len(val_indices)} val, {len(test_indices)} test"
        )

        train_states = np.hstack(
            tuple(state_true_seqs[i] for i in (train_indices)))
        train_edges = part_labels[train_states]
        # state_train_vocab = np.unique(train_states)
        # edge_train_vocab = part_labels[state_train_vocab]
        train_freq_bigram, train_freq_unigram = edge_joint_freqs(train_edges)
        # state_probs = utils.makeHistogram(len(vocab), train_states, normalize=True)

        test_states = np.hstack(
            tuple(state_true_seqs[i] for i in (test_indices)))
        test_edges = part_labels[test_states]
        # state_test_vocab = np.unique(test_states)
        # edge_test_vocab = part_labels[state_test_vocab]
        test_freq_bigram, test_freq_unigram = edge_joint_freqs(test_edges)

        f, axes = plt.subplots(1, 2)
        axes[0].matshow(train_freq_bigram)
        axes[0].set_title('Train')
        axes[1].matshow(test_freq_bigram)
        axes[1].set_title('Test')
        plt.tight_layout()
        plt.savefig(
            os.path.join(fig_dir, f"edge-freqs-bigram_cvfold={cv_index}.png"))

        f, axis = plt.subplots(1)
        axis.stem(train_freq_unigram,
                  label='Train',
                  linefmt='C0-',
                  markerfmt='C0o')
        axis.stem(test_freq_unigram,
                  label='Test',
                  linefmt='C1--',
                  markerfmt='C1o')
        plt.legend()
        plt.tight_layout()
        plt.savefig(
            os.path.join(fig_dir, f"edge-freqs-unigram_cvfold={cv_index}.png"))

        for i in test_indices:
            seq_id = seq_ids[i]
            logger.info(f"  Processing sequence {seq_id}...")

            trial_prefix = f"trial={seq_id}"
            # I include the '.' to differentiate between 'rgb-frame-seq' and
            # 'rgb-frame-seq-before-first-touch'
            # rgb_seq = utils.loadVariable(f"{trial_prefix}_rgb-frame-seq.", data_dir)
            # seg_seq = utils.loadVariable(f"{trial_prefix}_seg-labels-seq", segs_dir)
            score_seq = utils.loadVariable(f"{trial_prefix}_score-seq",
                                           scores_dir)
            # if score_seq.shape[0] != rgb_seq.shape[0]:
            #     err_str = f"scores shape {score_seq.shape} != data shape {rgb_seq.shape}"
            #     raise AssertionError(err_str)

            edge_pred_seq = edge_pred_seqs[i]
            edge_true_seq = edge_true_seqs[i]
            state_pred_seq = state_pred_seqs[i]
            state_true_seq = state_true_seqs[i]

            num_types = np.unique(state_pred_seq).shape[0]
            num_samples = state_pred_seq.shape[0]
            num_total = len(pred_vocab)
            logger.info(
                f"    {num_types} assemblies predicted ({num_total} total); "
                f"{num_samples} samples")

            # edge_freq_bigram, edge_freq_unigram = edge_joint_freqs(edge_true_seq)
            # dist_shift = np.linalg.norm(train_freq_unigram - edge_freq_unigram)
            metric_dict = {
                # 'State OOV rate': oov_rate_state(state_true_seq, state_train_vocab),
                # 'Edge OOV rate': oov_rate_edges(edge_true_seq, edge_train_vocab),
                # 'State avg prob, true': state_probs[state_true_seq].mean(),
                # 'State avg prob, pred': state_probs[state_pred_seq].mean(),
                # 'Edge distribution shift': dist_shift
            }
            metric_dict = eval_edge_metrics(edge_pred_seq,
                                            edge_true_seq,
                                            append_to=metric_dict)
            metric_dict = eval_state_metrics(state_pred_seq,
                                             state_true_seq,
                                             append_to=metric_dict)
            for name, value in metric_dict.items():
                logger.info(f"    {name}: {value * 100:.2f}%")
                all_metrics[name].append(value)

            utils.writeResults(results_file, metric_dict, sweep_param_name,
                               model_params)

            if num_disp_imgs is not None:
                pred_images = tuple(
                    render(dataset, vocab[seg_label])
                    for seg_label in utils.computeSegments(state_pred_seq)[0])
                imageprocessing.displayImages(
                    *pred_images,
                    file_path=os.path.join(
                        io_dir_images,
                        f"seq={seq_id:03d}_pred-assemblies.png"),
                    num_rows=None,
                    num_cols=5)
                true_images = tuple(
                    render(dataset, vocab[seg_label])
                    for seg_label in utils.computeSegments(state_true_seq)[0])
                imageprocessing.displayImages(
                    *true_images,
                    file_path=os.path.join(
                        io_dir_images,
                        f"seq={seq_id:03d}_true-assemblies.png"),
                    num_rows=None,
                    num_cols=5)

                utils.plot_array(score_seq.T,
                                 (edge_true_seq.T, edge_pred_seq.T),
                                 ('true', 'pred'),
                                 fn=os.path.join(io_dir_plots,
                                                 f"seq={seq_id:03d}.png"))
Example #18
0
 def loadVariable(var_name, from_dir=scores_dir):
     var = utils.loadVariable(var_name, from_dir)
     return var
Example #19
0
def main(out_dir=None,
         rgb_data_dir=None,
         rgb_attributes_dir=None,
         rgb_vocab_dir=None,
         imu_data_dir=None,
         imu_attributes_dir=None,
         modalities=['rgb', 'imu'],
         gpu_dev_id=None,
         plot_predictions=None,
         results_file=None,
         sweep_param_name=None,
         model_params={},
         cv_params={},
         train_params={},
         viz_params={}):

    out_dir = os.path.expanduser(out_dir)
    rgb_data_dir = os.path.expanduser(rgb_data_dir)
    rgb_attributes_dir = os.path.expanduser(rgb_attributes_dir)
    rgb_vocab_dir = os.path.expanduser(rgb_vocab_dir)
    imu_data_dir = os.path.expanduser(imu_data_dir)
    imu_attributes_dir = os.path.expanduser(imu_attributes_dir)

    logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt'))

    if results_file is None:
        results_file = os.path.join(out_dir, 'results.csv')
    else:
        results_file = os.path.expanduser(results_file)

    fig_dir = os.path.join(out_dir, 'figures')
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)

    out_data_dir = os.path.join(out_dir, 'data')
    if not os.path.exists(out_data_dir):
        os.makedirs(out_data_dir)

    def saveVariable(var, var_name, to_dir=out_data_dir):
        utils.saveVariable(var, var_name, to_dir)

    # Load data
    if modalities == ['rgb']:
        trial_ids = utils.getUniqueIds(rgb_data_dir,
                                       prefix='trial=',
                                       to_array=True)
        logger.info(f"Processing {len(trial_ids)} videos")
    else:
        rgb_trial_ids = utils.getUniqueIds(rgb_data_dir,
                                           prefix='trial=',
                                           to_array=True)
        imu_trial_ids = utils.getUniqueIds(imu_data_dir,
                                           prefix='trial=',
                                           to_array=True)
        trial_ids = np.array(
            sorted(set(rgb_trial_ids.tolist()) & set(imu_trial_ids.tolist())))
        logger.info(
            f"Processing {len(trial_ids)} videos common to "
            f"RGB ({len(rgb_trial_ids)} total) and IMU ({len(imu_trial_ids)} total)"
        )

    device = torchutils.selectDevice(gpu_dev_id)
    dataset = FusionDataset(trial_ids,
                            rgb_attributes_dir,
                            rgb_data_dir,
                            imu_attributes_dir,
                            imu_data_dir,
                            device=device,
                            modalities=modalities)
    utils.saveMetadata(dataset.metadata, out_data_dir)
    saveVariable(dataset.vocab, 'vocab')

    # parts_vocab = loadVariable('parts-vocab')
    edge_labels = {
        'rgb':
        utils.loadVariable('part-labels', rgb_vocab_dir),
        'imu':
        np.stack([
            labels.inSameComponent(a, lower_tri_only=True)
            for a in dataset.vocab
        ])
    }
    # edge_labels = revise_edge_labels(edge_labels, input_seqs)

    attribute_labels = tuple(edge_labels[name] for name in modalities)

    logger.info('Making transition probs...')
    transition_probs = make_transition_scores(dataset.vocab)
    saveVariable(transition_probs, 'transition-probs')

    model = AttributeModel(*attribute_labels, device=device)

    if plot_predictions:
        figsize = (12, 3)
        fig, axis = plt.subplots(1, figsize=figsize)
        axis.imshow(edge_labels['rgb'].T, interpolation='none', aspect='auto')
        plt.savefig(os.path.join(fig_dir, "edge-labels.png"))
        plt.close()

    for i, trial_id in enumerate(trial_ids):
        logger.info(f"Processing sequence {trial_id}...")

        trial_prefix = f"trial={trial_id}"

        true_label_seq = dataset.loadTargets(trial_id)
        attribute_feats = dataset.loadInputs(trial_id)

        score_seq = model(attribute_feats)
        pred_label_seq = model.predict(score_seq)

        attribute_feats = attribute_feats.cpu().numpy()
        score_seq = score_seq.cpu().numpy()
        true_label_seq = true_label_seq.cpu().numpy()
        pred_label_seq = pred_label_seq.cpu().numpy()

        saveVariable(score_seq.T, f'{trial_prefix}_score-seq')
        saveVariable(true_label_seq.T, f'{trial_prefix}_label-seq')

        if plot_predictions:
            fn = os.path.join(fig_dir, f'{trial_prefix}.png')
            utils.plot_array(attribute_feats.T,
                             (true_label_seq, pred_label_seq, score_seq),
                             ('gt', 'pred', 'scores'),
                             fn=fn)

        metric_dict = eval_metrics(pred_label_seq, true_label_seq)
        for name, value in metric_dict.items():
            logger.info(f"  {name}: {value * 100:.2f}%")

        utils.writeResults(results_file, metric_dict, sweep_param_name,
                           model_params)
Example #20
0
def main(out_dir=None,
         data_dir=None,
         scores_dirs={},
         vocab_from_scores_dir=None,
         only_fold=None,
         plot_io=None,
         prefix='seq=',
         results_file=None,
         sweep_param_name=None,
         model_params={},
         cv_params={}):

    data_dir = os.path.expanduser(data_dir)
    scores_dirs = {
        name: os.path.expanduser(dir_)
        for name, dir_ in scores_dirs.items()
    }
    out_dir = os.path.expanduser(out_dir)
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt'))

    if len(scores_dirs) != 2:
        err_str = (
            f"scores_dirs has {len(scores_dirs)} entries, but this script "
            "compare exactly 2")
        raise NotImplementedError(err_str)

    if results_file is None:
        results_file = os.path.join(out_dir, 'results.csv')
    else:
        results_file = os.path.expanduser(results_file)

    fig_dir = os.path.join(out_dir, 'figures')
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)

    out_data_dir = os.path.join(out_dir, 'data')
    if not os.path.exists(out_data_dir):
        os.makedirs(out_data_dir)

    seq_ids = utils.getUniqueIds(data_dir,
                                 prefix=prefix,
                                 suffix='labels.*',
                                 to_array=True)

    logger.info(f"Loaded scores for {len(seq_ids)} sequences from {data_dir}")

    # Define cross-validation folds
    cv_folds = utils.makeDataSplits(len(seq_ids), **cv_params)
    utils.saveVariable(cv_folds, 'cv-folds', out_data_dir)

    vocabs = {}
    confusions = {}
    accs = {}
    counts = {}
    for expt_name, scores_dir in scores_dirs.items():
        if vocab_from_scores_dir:
            vocabs[expt_name] = utils.loadVariable('vocab', scores_dir)
        else:
            vocabs[expt_name] = utils.loadVariable('vocab', data_dir)

        confusions[expt_name] = utils.loadVariable("confusions", scores_dir)
        per_class_accs, class_counts = metrics.perClassAcc(
            confusions[expt_name], return_counts=True)

        accs[expt_name] = per_class_accs
        counts[expt_name] = class_counts

    vocab = utils.reduce_all_equal(tuple(vocabs.values()))
    class_counts = utils.reduce_all_equal(tuple(counts.values()))

    first_name, second_name = scores_dirs.keys()
    confusions_diff = confusions[first_name] - confusions[second_name]
    acc_diff = accs[first_name] - accs[second_name]

    metrics.plotConfusions(
        os.path.join(fig_dir,
                     f'confusions_{first_name}-minus-{second_name}.png'),
        confusions_diff, vocab)
    metrics.plotPerClassAcc(
        os.path.join(fig_dir, f'accs_{first_name}-minus-{second_name}.png'),
        vocab, acc_diff, confusions_diff.sum(axis=1), class_counts)
Example #21
0
def main(out_dir=None,
         data_dir=None,
         assembly_data_dir=None,
         scores_dir=None,
         event_attr_fn=None,
         connection_attr_fn=None,
         assembly_attr_fn=None,
         only_fold=None,
         plot_io=None,
         prefix='seq=',
         stop_after=None,
         background_action='',
         model_params={},
         cv_params={},
         stride=None,
         results_file=None,
         sweep_param_name=None):

    data_dir = os.path.expanduser(data_dir)
    assembly_data_dir = os.path.expanduser(assembly_data_dir)
    scores_dir = os.path.expanduser(scores_dir)
    event_attr_fn = os.path.expanduser(event_attr_fn)
    connection_attr_fn = os.path.expanduser(connection_attr_fn)
    assembly_attr_fn = os.path.expanduser(assembly_attr_fn)
    out_dir = os.path.expanduser(out_dir)
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt'))

    if results_file is None:
        results_file = os.path.join(out_dir, 'results.csv')
    else:
        results_file = os.path.expanduser(results_file)

    fig_dir = os.path.join(out_dir, 'figures')
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)

    misc_dir = os.path.join(out_dir, 'misc')
    if not os.path.exists(misc_dir):
        os.makedirs(misc_dir)

    out_data_dir = os.path.join(out_dir, 'data')
    if not os.path.exists(out_data_dir):
        os.makedirs(out_data_dir)

    seq_ids = utils.getUniqueIds(data_dir,
                                 prefix=prefix,
                                 suffix='labels.*',
                                 to_array=True)

    dataset = utils.FeaturelessCvDataset(seq_ids,
                                         data_dir,
                                         prefix=prefix,
                                         label_fn_format='labels')

    logger.info(
        f"Loaded scores for {len(seq_ids)} sequences from {scores_dir}")

    # Define cross-validation folds
    cv_folds = utils.makeDataSplits(len(seq_ids), **cv_params)
    utils.saveVariable(cv_folds, 'cv-folds', out_data_dir)

    # Load event, connection attributes
    assembly_vocab = tuple(
        tuple(sorted(tuple(sorted(joint)) for joint in a))
        for a in utils.loadVariable('vocab', assembly_data_dir))
    (*probs, assembly_transition_probs), vocabs = loadPartInfo(
        event_attr_fn,
        connection_attr_fn,
        assembly_attr_fn,
        background_action=background_action,
        assembly_vocab=assembly_vocab)
    event_assembly_scores = event_to_assembly_scores(*probs, vocabs)
    assembly_transition_scores = np.log(assembly_transition_probs)
    viz_transition_probs(os.path.join(fig_dir, 'action-transitions'),
                         np.exp(event_assembly_scores), vocabs['event_vocab'])
    write_transition_probs(os.path.join(misc_dir, 'action-transitions'),
                           np.exp(event_assembly_scores),
                           vocabs['event_vocab'], vocabs['assembly_vocab'])

    for cv_index, cv_fold in enumerate(cv_folds):
        if only_fold is not None and cv_index != only_fold:
            continue

        train_indices, val_indices, test_indices = cv_fold
        logger.info(
            f"CV FOLD {cv_index + 1} / {len(cv_folds)}: "
            f"{len(train_indices)} train, {len(val_indices)} val, {len(test_indices)} test"
        )

        train_data, val_data, test_data = dataset.getFold(cv_fold)

        cv_str = f'cvfold={cv_index}'

        class_priors, event_dur_probs = count_priors(train_data[0],
                                                     len(dataset.vocab),
                                                     stride=stride,
                                                     approx_upto=0.95,
                                                     support_only=True)
        event_dur_scores = np.log(event_dur_probs)
        event_dur_scores = np.zeros_like(event_dur_scores)
        scores = (event_dur_scores, event_assembly_scores,
                  assembly_transition_scores)

        model = decode.AssemblyActionRecognizer(scores, vocabs, model_params)

        viz_priors(os.path.join(fig_dir, f'{cv_str}_priors'), class_priors,
                   event_dur_probs)
        model.write_fsts(os.path.join(misc_dir, f'{cv_str}_fsts'))
        model.save_vocabs(os.path.join(out_data_dir, f'{cv_str}_model-vocabs'))

        for i, (_, seq_id) in enumerate(zip(*test_data)):
            if stop_after is not None and i >= stop_after:
                break

            trial_prefix = f"{prefix}{seq_id}"

            if model_params['return_label'] == 'input':
                true_seq = utils.loadVariable(f"{trial_prefix}_true-label-seq",
                                              scores_dir)
            elif model_params['return_label'] == 'output':
                try:
                    true_seq = utils.loadVariable(f"{trial_prefix}_label-seq",
                                                  assembly_data_dir)
                    true_seq = true_seq[::stride]
                except AssertionError:
                    # logger.info(f'  Skipping sequence {seq_id}: {e}')
                    continue

            logger.info(f"  Processing sequence {seq_id}...")

            event_score_seq = utils.loadVariable(f"{trial_prefix}_score-seq",
                                                 scores_dir)

            if event_score_seq.shape[0] != true_seq.shape[0]:
                err_str = (f'Event scores shape {event_score_seq.shape} '
                           f'!= labels shape {true_seq.shape}')
                raise AssertionError(err_str)

            # FIXME: the serialized variables are probs, not log-probs
            # event_score_seq = suppress_nonmax(event_score_seq)
            # event_score_seq = np.log(event_score_seq)

            decode_score_seq = model.forward(event_score_seq)
            pred_seq = model.predict(decode_score_seq)

            metric_dict = eval_metrics(pred_seq, true_seq)
            for name, value in metric_dict.items():
                logger.info(f"    {name}: {value * 100:.2f}%")
            utils.writeResults(results_file, metric_dict, sweep_param_name,
                               model_params)

            utils.saveVariable(decode_score_seq, f'{trial_prefix}_score-seq',
                               out_data_dir)
            utils.saveVariable(pred_seq, f'{trial_prefix}_pred-label-seq',
                               out_data_dir)
            utils.saveVariable(true_seq, f'{trial_prefix}_true-label-seq',
                               out_data_dir)

            if plot_io:
                utils.plot_array(event_score_seq.T, (pred_seq.T, true_seq.T),
                                 ('pred', 'true'),
                                 fn=os.path.join(fig_dir,
                                                 f"seq={seq_id:03d}.png"))
                write_labels(
                    os.path.join(misc_dir, f"seq={seq_id:03d}_pred-seq.txt"),
                    pred_seq, model.output_vocab.as_raw())
                write_labels(
                    os.path.join(misc_dir, f"seq={seq_id:03d}_true-seq.txt"),
                    true_seq, model.output_vocab.as_raw())
def main(out_dir=None,
         data_dir=None,
         model_name=None,
         gpu_dev_id=None,
         batch_size=None,
         learning_rate=None,
         model_params={},
         cv_params={},
         train_params={},
         viz_params={},
         load_masks_params={},
         kornia_tfs={},
         only_edge=None,
         num_disp_imgs=None,
         results_file=None,
         sweep_param_name=None):

    data_dir = os.path.expanduser(data_dir)
    out_dir = os.path.expanduser(out_dir)
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt'))

    if results_file is None:
        results_file = os.path.join(out_dir, 'results.csv')
    else:
        results_file = os.path.expanduser(results_file)

    fig_dir = os.path.join(out_dir, 'figures')
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)

    io_dir = os.path.join(fig_dir, 'model-io')
    if not os.path.exists(io_dir):
        os.makedirs(io_dir)

    out_data_dir = os.path.join(out_dir, 'data')
    if not os.path.exists(out_data_dir):
        os.makedirs(out_data_dir)

    def saveVariable(var, var_name, to_dir=out_data_dir):
        return utils.saveVariable(var, var_name, to_dir)

    trial_ids = utils.getUniqueIds(data_dir, prefix='trial=', to_array=True)

    vocab = [BlockAssembly()
             ] + [make_single_block_state(i) for i in range(len(defn.blocks))]
    for seq_id in trial_ids:
        assembly_seq = utils.loadVariable(f"trial={seq_id}_assembly-seq",
                                          data_dir)
        for assembly in assembly_seq:
            utils.getIndex(assembly, vocab)
    parts_vocab, part_labels = labels_lib.make_parts_vocab(
        vocab, lower_tri_only=True, append_to_vocab=True)

    if only_edge is not None:
        part_labels = part_labels[:, only_edge:only_edge + 1]

    logger.info(
        f"Loaded {len(trial_ids)} sequences; {len(vocab)} unique assemblies")

    saveVariable(vocab, 'vocab')
    saveVariable(parts_vocab, 'parts-vocab')
    saveVariable(part_labels, 'part-labels')

    device = torchutils.selectDevice(gpu_dev_id)

    if model_name == 'AAE':
        Dataset = sim2real.DenoisingDataset
    elif model_name == 'Resnet':
        Dataset = sim2real.RenderDataset
    elif model_name == 'Connections':
        Dataset = sim2real.ConnectionDataset
    elif model_name == 'Labeled Connections':
        Dataset = sim2real.LabeledConnectionDataset

    occlusion_masks = loadMasks(**load_masks_params)
    if occlusion_masks is not None:
        logger.info(f"Loaded {occlusion_masks.shape[0]} occlusion masks")

    def make_data(shuffle=True):
        dataset = Dataset(
            parts_vocab,
            part_labels,
            vocab,
            device=device,
            occlusion_masks=occlusion_masks,
            kornia_tfs=kornia_tfs,
        )
        data_loader = torch.utils.data.DataLoader(dataset,
                                                  batch_size=batch_size,
                                                  shuffle=shuffle)
        return dataset, data_loader

    for cv_index, cv_splits in enumerate(range(1)):
        cv_str = f"cvfold={cv_index}"

        train_set, train_loader = make_data(shuffle=True)
        test_set, test_loader = make_data(shuffle=False)
        val_set, val_loader = make_data(shuffle=True)

        if model_name == 'AAE':
            model = sim2real.AugmentedAutoEncoder(train_set.data_shape,
                                                  train_set.num_classes)
            criterion = torchutils.BootstrappedCriterion(
                0.25,
                base_criterion=torch.nn.functional.mse_loss,
            )
            metric_names = ('Reciprocal Loss', )
        elif model_name == 'Resnet':
            model = sim2real.ImageClassifier(train_set.num_classes,
                                             **model_params)
            criterion = torch.nn.CrossEntropyLoss()
            metric_names = ('Loss', 'Accuracy')
        elif model_name == 'Connections':
            model = sim2real.ConnectionClassifier(train_set.label_shape[0],
                                                  **model_params)
            criterion = torch.nn.BCEWithLogitsLoss()
            metric_names = ('Loss', 'Accuracy', 'Precision', 'Recall', 'F1')
        elif model_name == 'Labeled Connections':
            out_dim = int(part_labels.max()) + 1
            num_vertices = len(defn.blocks)
            edges = np.column_stack(np.tril_indices(num_vertices, k=-1))
            if only_edge is not None:
                edges = edges[only_edge:only_edge + 1]
            model = sim2real.LabeledConnectionClassifier(
                out_dim, num_vertices, edges, **model_params)
            if only_edge is not None:
                logger.info(f"Class freqs: {train_set.class_freqs}")
                # criterion = torch.nn.CrossEntropyLoss(weight=1 / train_set.class_freqs[:, 0])
                criterion = torch.nn.CrossEntropyLoss()
            else:
                criterion = torch.nn.CrossEntropyLoss()
            # criterion = torchutils.BootstrappedCriterion(
            #     0.25, base_criterion=torch.nn.functional.cross_entropy,
            # )
            metric_names = ('Loss', 'Accuracy', 'Precision', 'Recall', 'F1')

        model = model.to(device=device)

        optimizer_ft = torch.optim.Adam(model.parameters(),
                                        lr=learning_rate,
                                        betas=(0.9, 0.999),
                                        eps=1e-08,
                                        weight_decay=0,
                                        amsgrad=False)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_ft,
                                                       step_size=1,
                                                       gamma=1.00)

        train_epoch_log = collections.defaultdict(list)
        val_epoch_log = collections.defaultdict(list)
        metric_dict = {name: metrics.makeMetric(name) for name in metric_names}
        model, last_model_wts = torchutils.trainModel(
            model,
            criterion,
            optimizer_ft,
            lr_scheduler,
            train_loader,
            val_loader,
            device=device,
            metrics=metric_dict,
            train_epoch_log=train_epoch_log,
            val_epoch_log=val_epoch_log,
            **train_params)

        # Test model
        metric_dict = {name: metrics.makeMetric(name) for name in metric_names}
        test_io_batches = torchutils.predictSamples(
            model.to(device=device),
            test_loader,
            criterion=criterion,
            device=device,
            metrics=metric_dict,
            data_labeled=True,
            update_model=False,
            seq_as_batch=train_params['seq_as_batch'],
            return_io_history=True)
        metric_str = '  '.join(str(m) for m in metric_dict.values())
        logger.info('[TST]  ' + metric_str)
        utils.writeResults(results_file, metric_dict, sweep_param_name,
                           model_params)

        for pred_seq, score_seq, feat_seq, label_seq, trial_id in test_io_batches:
            trial_str = f"trial={trial_id}"
            saveVariable(pred_seq.cpu().numpy(), f'{trial_str}_pred-label-seq')
            saveVariable(score_seq.cpu().numpy(), f'{trial_str}_score-seq')
            saveVariable(label_seq.cpu().numpy(),
                         f'{trial_str}_true-label-seq')

        saveVariable(model, f'{cv_str}_model-best')

        if train_epoch_log:
            torchutils.plotEpochLog(train_epoch_log,
                                    subfig_size=(10, 2.5),
                                    title='Training performance',
                                    fn=os.path.join(
                                        fig_dir, f'{cv_str}_train-plot.png'))

        if val_epoch_log:
            torchutils.plotEpochLog(val_epoch_log,
                                    subfig_size=(10, 2.5),
                                    title='Heldout performance',
                                    fn=os.path.join(fig_dir,
                                                    f'{cv_str}_val-plot.png'))

        if num_disp_imgs is not None:
            model.plotBatches(test_io_batches, io_dir, dataset=test_set)
Example #23
0
def main(
        out_dir=None, data_dir=None, results_file=None, cv_file=None,
        take_log=False, col_format=None, win_params={}, slowfast_csv_params={}):
    out_dir = os.path.expanduser(out_dir)
    data_dir = os.path.expanduser(data_dir)
    results_file = os.path.expanduser(results_file)
    cv_file = os.path.expanduser(cv_file)

    logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt'))

    fig_dir = os.path.join(out_dir, 'figures')
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)

    out_data_dir = os.path.join(out_dir, 'data')
    if not os.path.exists(out_data_dir):
        os.makedirs(out_data_dir)

    vocab = utils.loadVariable('vocab', data_dir)
    metadata = utils.loadMetadata(data_dir)
    slowfast_labels = pd.read_csv(
        cv_file, keep_default_na=False, index_col=0,
        **slowfast_csv_params
    )
    seg_ids = slowfast_labels.index.to_numpy()
    vid_names = slowfast_labels['video_name'].unique().tolist()
    metadata['seq_id'] = metadata.index
    vid_ids = metadata.set_index('dir_name').loc[vid_names].set_index('seq_id').index
    metadata = metadata.drop('seq_id', axis=1)

    with open(results_file, 'rb') as file_:
        model_probs, gt_labels = pickle.load(file_)
        model_probs = model_probs.numpy()
        gt_labels = gt_labels.numpy()

    if len(model_probs) != len(seg_ids):
        err_str = f"{len(model_probs)} segment scores != {slowfast_labels.shape[0]} CSV rows"
        raise AssertionError(err_str)

    logger.info(f"Loaded {len(seg_ids)} segments, {len(vid_ids)} videos")

    for vid_id, vid_name in zip(vid_ids, vid_names):
        matches_video = (slowfast_labels['video_name'] == vid_name).to_numpy()
        win_labels = gt_labels[matches_video]
        win_probs = model_probs[matches_video, :]

        if win_labels.shape == win_probs.shape:
            win_preds = (win_probs > 0.5).astype(int)
        else:
            win_preds = win_probs.argmax(axis=1)

        if take_log:
            win_probs = np.log(win_probs)

        seq_id_str = f"seq={vid_id}"
        utils.saveVariable(win_probs, f'{seq_id_str}_score-seq', out_data_dir)
        utils.saveVariable(win_labels, f'{seq_id_str}_true-label-seq', out_data_dir)
        utils.saveVariable(win_preds, f'{seq_id_str}_pred-label-seq', out_data_dir)
        utils.plot_array(
            win_probs.T, (win_labels.T, win_preds.T), ('true', 'pred'),
            tick_names=vocab,
            fn=os.path.join(fig_dir, f"{seq_id_str}.png"),
            subplot_width=12, subplot_height=5
        )
    utils.saveVariable(vocab, 'vocab', out_data_dir)
    utils.saveMetadata(metadata, out_data_dir)
Example #24
0
 def loadData(self, seq_id):
     rgb_frames = utils.loadVariable(f"trial={seq_id}_rgb-frame-seq.",
                                     self.data_dir)
     seg_frames = utils.loadVariable(f"trial={seq_id}_seg-labels-seq",
                                     self.seg_dir).astype(int)
     return rgb_frames, seg_frames
Example #25
0
def main(out_dir=None,
         scores_dir=None,
         preprocessed_data_dir=None,
         keyframe_model_name=None,
         subsample_period=None,
         window_size=None,
         corpus_name=None,
         default_annotator=None,
         cv_scheme=None,
         max_trials_per_fold=None,
         model_name=None,
         numeric_backend=None,
         gpu_dev_id=None,
         visualize=False,
         model_config={},
         camera_params_config={}):

    out_dir = os.path.expanduser(out_dir)
    scores_dir = os.path.expanduser(scores_dir)
    preprocessed_data_dir = os.path.expanduser(preprocessed_data_dir)

    m.set_backend('numpy')

    def loadFromWorkingDir(var_name):
        return joblib.load(os.path.join(scores_dir, f"{var_name}.pkl"))

    def saveToWorkingDir(var, var_name):
        joblib.dump(var, os.path.join(out_dir, f"{var_name}.pkl"))

    # Load camera parameters from external file and add them to model config kwargs
    model_config['init_kwargs'].update(
        render.loadCameraParams(**camera_params_config, as_dict=True))

    trial_ids = joblib.load(
        os.path.join(preprocessed_data_dir, 'trial_ids.pkl'))

    corpus = duplocorpus.DuploCorpus(corpus_name)
    assembly_seqs = tuple(
        labels.parseLabelSeq(
            corpus.readLabels(trial_id, default_annotator)[0])
        for trial_id in trial_ids)

    logger.info(f"Selecting keyframes...")
    keyframe_idx_seqs = []
    rgb_keyframe_seqs = []
    depth_keyframe_seqs = []
    seg_keyframe_seqs = []
    background_keyframe_seqs = []
    assembly_keyframe_seqs = []
    for seq_idx, trial_id in enumerate(trial_ids):
        trial_str = f"trial-{trial_id}"
        rgb_frame_seq = loadFromWorkingDir(f'{trial_str}_rgb-frame-seq')
        depth_frame_seq = loadFromWorkingDir(f'{trial_str}_depth-frame-seq')
        segment_seq = loadFromWorkingDir(f'{trial_str}_segment-seq')
        frame_scores = loadFromWorkingDir(f'{trial_str}_frame-scores')
        background_plane_seq = loadFromWorkingDir(
            f'{trial_str}_background-plane-seq')

        assembly_seq = assembly_seqs[seq_idx]
        # FIXME: Get the real frame index numbers instead of approximating
        assembly_seq[-1].end_idx = len(rgb_frame_seq) * subsample_period

        keyframe_idxs = videoprocessing.selectSegmentKeyframes(
            frame_scores, score_thresh=0, prepend_first=True)

        selectKeyframes = functools.partial(utils.select, keyframe_idxs)
        rgb_keyframe_seq = selectKeyframes(rgb_frame_seq)
        depth_keyframe_seq = selectKeyframes(depth_frame_seq)
        seg_keyframe_seq = selectKeyframes(segment_seq)
        background_keyframe_seq = selectKeyframes(background_plane_seq)

        # FIXME: Get the real frame index numbers instead of approximating
        keyframe_idxs_orig = keyframe_idxs * subsample_period
        assembly_keyframe_seq = labels.resampleStateSeq(
            keyframe_idxs_orig, assembly_seq)

        # Store all keyframe sequences in memory
        keyframe_idx_seqs.append(keyframe_idxs)
        rgb_keyframe_seqs.append(rgb_keyframe_seq)
        depth_keyframe_seqs.append(depth_keyframe_seq)
        seg_keyframe_seqs.append(seg_keyframe_seq)
        background_keyframe_seqs.append(background_keyframe_seq)
        assembly_keyframe_seqs.append(assembly_keyframe_seq)

    # Split into train and test sets
    if cv_scheme == 'leave one out':
        num_seqs = len(trial_ids)
        cv_folds = []
        for i in range(num_seqs):
            test_fold = (i, )
            train_fold = tuple(range(0, i)) + tuple(range(i + 1, num_seqs))
            cv_folds.append((train_fold, test_fold))
    elif cv_scheme == 'train on child':
        child_corpus = duplocorpus.DuploCorpus('child')
        child_trial_ids = utils.loadVariable('trial_ids',
                                             'preprocess-all-data', 'child')
        child_assembly_seqs = [
            labels.parseLabelSeq(
                child_corpus.readLabels(trial_id, 'Cathryn')[0])
            for trial_id in child_trial_ids
        ]
        num_easy = len(assembly_keyframe_seqs)
        num_child = len(child_assembly_seqs)
        cv_folds = [(tuple(range(num_easy, num_easy + num_child)),
                     tuple(range(num_easy)))]
        assembly_keyframe_seqs = assembly_keyframe_seqs + child_assembly_seqs

    rgb_keyframe_seqs = tuple(
        tuple(
            imageprocessing.saturateImage(rgb_image,
                                          background_mask=segment_image == 0)
            for rgb_image, segment_image in zip(rgb_frame_seq, seg_frame_seq))
        for rgb_frame_seq, seg_frame_seq in zip(rgb_keyframe_seqs,
                                                seg_keyframe_seqs))

    depth_keyframe_seqs = tuple(
        tuple(depth_image.astype(float) for depth_image in depth_frame_seq)
        for depth_frame_seq in depth_keyframe_seqs)

    device = torchutils.selectDevice(gpu_dev_id)
    m.set_backend('torch')
    m.set_default_device(device)

    assembly_keyframe_seqs = tuple(
        tuple(a.to(device=device, in_place=False) for a in seq)
        for seq in assembly_keyframe_seqs)
    assembly_seqs = tuple(
        tuple(a.to(device=device, in_place=False) for a in seq)
        for seq in assembly_seqs)

    rgb_keyframe_seqs = tuple(
        tuple(m.np.array(frame, dtype=torch.float) for frame in rgb_frame_seq)
        for rgb_frame_seq in rgb_keyframe_seqs)
    depth_keyframe_seqs = tuple(
        tuple(
            m.np.array(frame, dtype=torch.float) for frame in depth_frame_seq)
        for depth_frame_seq in depth_keyframe_seqs)
    seg_keyframe_seqs = tuple(
        tuple(m.np.array(frame, dtype=torch.int) for frame in seg_frame_seq)
        for seg_frame_seq in seg_keyframe_seqs)

    num_cv_folds = len(cv_folds)
    saveToWorkingDir(cv_folds, f'cv-folds')
    for fold_index, (train_idxs, test_idxs) in enumerate(cv_folds):
        logger.info(f"CV FOLD {fold_index + 1} / {num_cv_folds}")

        # Initialize and train model
        utils.validateCvFold(train_idxs, test_idxs)
        selectTrain = functools.partial(utils.select, train_idxs)
        train_assembly_seqs = selectTrain(assembly_keyframe_seqs)
        model = getattr(models, model_name)(**model_config['init_kwargs'])
        logger.info(
            f"  Training {model_name} on {len(train_idxs)} sequences...")
        model.fit(train_assembly_seqs, **model_config['fit_kwargs'])
        logger.info(
            f'    Model trained on {model.num_states} unique assembly states')
        # saveToWorkingDir(model, f'model-fold{fold_index}')

        # Decode on the test set
        selectTest = functools.partial(utils.select, test_idxs)
        test_trial_ids = selectTest(trial_ids)
        test_rgb_keyframe_seqs = selectTest(rgb_keyframe_seqs)
        test_depth_keyframe_seqs = selectTest(depth_keyframe_seqs)
        test_seg_keyframe_seqs = selectTest(seg_keyframe_seqs)
        test_background_keyframe_seqs = selectTest(background_keyframe_seqs)
        test_assembly_keyframe_seqs = selectTest(assembly_keyframe_seqs)
        test_assembly_seqs = selectTest(assembly_seqs)

        logger.info(f"  Testing model on {len(test_idxs)} sequences...")
        for i, trial_id in enumerate(test_trial_ids):
            if max_trials_per_fold is not None and i >= max_trials_per_fold:
                break

            rgb_frame_seq = test_rgb_keyframe_seqs[i]
            depth_frame_seq = test_depth_keyframe_seqs[i]
            seg_frame_seq = test_seg_keyframe_seqs[i]
            background_plane_seq = test_background_keyframe_seqs[i]
            true_assembly_seq = test_assembly_keyframe_seqs[i]
            true_assembly_seq_orig = test_assembly_seqs[i]

            rgb_background_seq, depth_background_seq = utils.batchProcess(
                model.renderPlane, background_plane_seq, unzip=True)

            logger.info(f'    Decoding video {trial_id}...')
            start_time = time.process_time()
            out = model.predictSeq(rgb_frame_seq, depth_frame_seq,
                                   seg_frame_seq, rgb_background_seq,
                                   depth_background_seq,
                                   **model_config['decode_kwargs'])
            pred_assembly_seq, pred_idx_seq, max_log_probs, log_likelihoods, poses_seq = out
            end_time = time.process_time()
            logger.info(utils.makeProcessTimeStr(end_time - start_time))

            num_correct, num_total = metrics.numberCorrect(
                true_assembly_seq, pred_assembly_seq)
            logger.info(f'    ACCURACY: {num_correct} / {num_total}')
            num_correct, num_total = metrics.numberCorrect(
                true_assembly_seq, pred_assembly_seq, ignore_empty_true=True)
            logger.info(f'    RECALL: {num_correct} / {num_total}')
            num_correct, num_total = metrics.numberCorrect(
                true_assembly_seq, pred_assembly_seq, ignore_empty_pred=True)
            logger.info(f'    PRECISION: {num_correct} / {num_total}')

            # Save intermediate results
            logger.info(f"Saving output...")
            saveToWorkingDir(segment_seq, f'segment_seq-{trial_id}')
            saveToWorkingDir(true_assembly_seq_orig,
                             f'true_state_seq_orig-{trial_id}')
            saveToWorkingDir(true_assembly_seq, f'true_state_seq-{trial_id}')
            saveToWorkingDir(pred_assembly_seq, f'pred_state_seq-{trial_id}')
            saveToWorkingDir(poses_seq, f'poses_seq-{trial_id}')
            saveToWorkingDir(background_plane_seq,
                             f'background_plane_seq-{trial_id}')
            saveToWorkingDir(max_log_probs, f'max_log_probs-{trial_id}')
            saveToWorkingDir(log_likelihoods, f'log_likelihoods-{trial_id}')

            # Save figures
            if visualize:
                rgb_rendered_seq, depth_rendered_seq, label_rendered_seq = utils.batchProcess(
                    model.renderScene,
                    pred_assembly_seq,
                    poses_seq,
                    rgb_background_seq,
                    depth_background_seq,
                    unzip=True,
                    static_kwargs={'as_numpy': True})
                if utils.in_ipython_console():
                    file_path = None
                else:
                    trial_str = f"trial-{trial_id}"
                    file_path = os.path.join(out_dir,
                                             f'{trial_str}_best-frames.png')
                rgb_frame_seq = tuple(img.cpu().numpy()
                                      for img in rgb_frame_seq)
                imageprocessing.displayImages(*rgb_frame_seq,
                                              *rgb_rendered_seq,
                                              num_rows=2,
                                              file_path=file_path)
Example #26
0
def main(out_dir=None,
         data_dir=None,
         prefix='trial=',
         model_name=None,
         gpu_dev_id=None,
         batch_size=None,
         learning_rate=None,
         file_fn_format=None,
         label_fn_format=None,
         start_from=None,
         stop_at=None,
         model_params={},
         cv_params={},
         train_params={},
         viz_params={},
         num_disp_imgs=None,
         viz_templates=None,
         results_file=None,
         sweep_param_name=None):

    data_dir = os.path.expanduser(data_dir)
    out_dir = os.path.expanduser(out_dir)
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt'))

    if results_file is None:
        results_file = os.path.join(out_dir, 'results.csv')
    else:
        results_file = os.path.expanduser(results_file)

    fig_dir = os.path.join(out_dir, 'figures')
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)

    io_dir = os.path.join(fig_dir, 'model-io')
    if not os.path.exists(io_dir):
        os.makedirs(io_dir)

    out_data_dir = os.path.join(out_dir, 'data')
    if not os.path.exists(out_data_dir):
        os.makedirs(out_data_dir)

    def saveVariable(var, var_name, to_dir=out_data_dir):
        utils.saveVariable(var, var_name, to_dir)

    # Load data
    trial_ids = utils.getUniqueIds(data_dir, prefix=prefix, to_array=True)
    vocab = utils.loadVariable('vocab', data_dir)
    saveVariable(vocab, 'vocab')

    # Define cross-validation folds
    data_loader = utils.CvDataset(trial_ids,
                                  data_dir,
                                  vocab=vocab,
                                  prefix=prefix,
                                  feature_fn_format=file_fn_format,
                                  label_fn_format=label_fn_format)
    cv_folds = utils.makeDataSplits(len(data_loader.trial_ids), **cv_params)

    device = torchutils.selectDevice(gpu_dev_id)
    labels_dtype = torch.long
    criterion = torch.nn.CrossEntropyLoss()
    metric_names = ('Loss', 'Accuracy')

    def make_dataset(fns, labels, ids, batch_mode='sample', shuffle=True):
        dataset = VideoDataset(fns,
                               labels,
                               device=device,
                               labels_dtype=labels_dtype,
                               seq_ids=ids,
                               batch_size=batch_size,
                               batch_mode=batch_mode)
        loader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             shuffle=shuffle)
        return dataset, loader

    for cv_index, cv_fold in enumerate(cv_folds):
        if start_from is not None and cv_index < start_from:
            continue

        if stop_at is not None and cv_index > stop_at:
            break

        train_data, val_data, test_data = data_loader.getFold(cv_fold)
        train_set, train_loader = make_dataset(*train_data,
                                               batch_mode='flatten',
                                               shuffle=True)
        test_set, test_loader = make_dataset(*test_data,
                                             batch_mode='flatten',
                                             shuffle=False)
        val_set, val_loader = make_dataset(*val_data,
                                           batch_mode='flatten',
                                           shuffle=True)

        logger.info(
            f'CV fold {cv_index + 1} / {len(cv_folds)}: {len(data_loader.trial_ids)} total '
            f'({len(train_set)} train, {len(val_set)} val, {len(test_set)} test)'
        )

        model = ImageClassifier(len(vocab), **model_params)

        optimizer_ft = torch.optim.Adam(model.parameters(),
                                        lr=learning_rate,
                                        betas=(0.9, 0.999),
                                        eps=1e-08,
                                        weight_decay=0,
                                        amsgrad=False)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_ft,
                                                       step_size=1,
                                                       gamma=1.00)

        train_epoch_log = collections.defaultdict(list)
        val_epoch_log = collections.defaultdict(list)
        metric_dict = {name: metrics.makeMetric(name) for name in metric_names}
        model, last_model_wts = torchutils.trainModel(
            model,
            criterion,
            optimizer_ft,
            lr_scheduler,
            train_loader,
            val_loader,
            device=device,
            metrics=metric_dict,
            train_epoch_log=train_epoch_log,
            val_epoch_log=val_epoch_log,
            **train_params)

        # Test model
        metric_dict = {name: metrics.makeMetric(name) for name in metric_names}
        test_io_history = torchutils.predictSamples(
            model.to(device=device),
            test_loader,
            criterion=criterion,
            device=device,
            metrics=metric_dict,
            data_labeled=True,
            update_model=False,
            seq_as_batch=train_params['seq_as_batch'],
            return_io_history=True)
        metric_str = '  '.join(str(m) for m in metric_dict.values())
        logger.info('[TST]  ' + metric_str)

        utils.writeResults(results_file,
                           {name: m.value
                            for name, m in metric_dict.items()},
                           sweep_param_name, model_params)

        for pred_seq, score_seq, feat_seq, label_seq, batch_id in test_io_history:
            prefix = f'cvfold={cv_index}_batch={batch_id}'
            saveVariable(pred_seq.cpu().numpy(), f'{prefix}_pred-label-seq')
            saveVariable(score_seq.cpu().numpy(), f'{prefix}_score-seq')
            saveVariable(label_seq.cpu().numpy(), f'{prefix}_true-label-seq')
        saveVariable(test_set.unflatten,
                     f'cvfold={cv_index}_test-set-unflatten')
        saveVariable(model, f'cvfold={cv_index}_{model_name}-best')

        if train_epoch_log:
            torchutils.plotEpochLog(train_epoch_log,
                                    subfig_size=(10, 2.5),
                                    title='Training performance',
                                    fn=os.path.join(
                                        fig_dir,
                                        f'cvfold={cv_index}_train-plot.png'))

        if val_epoch_log:
            torchutils.plotEpochLog(val_epoch_log,
                                    subfig_size=(10, 2.5),
                                    title='Heldout performance',
                                    fn=os.path.join(
                                        fig_dir,
                                        f'cvfold={cv_index}_val-plot.png'))
Example #27
0
 def loadAll(seq_ids, var_name, from_dir=data_dir, prefix='trial='):
     all_data = tuple(
         utils.loadVariable(f"{prefix}{seq_id}_{var_name}", from_dir)
         for seq_id in seq_ids
     )
     return all_data
Example #28
0
def main(out_dir=None,
         data_dir=None,
         segs_dir=None,
         pretrained_model_dir=None,
         model_name=None,
         gpu_dev_id=None,
         batch_size=None,
         learning_rate=None,
         start_from=None,
         stop_at=None,
         model_params={},
         cv_params={},
         train_params={},
         viz_params={},
         num_disp_imgs=None,
         viz_templates=None,
         results_file=None,
         sweep_param_name=None):

    data_dir = os.path.expanduser(data_dir)
    segs_dir = os.path.expanduser(segs_dir)
    pretrained_model_dir = os.path.expanduser(pretrained_model_dir)
    out_dir = os.path.expanduser(out_dir)
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    logger = utils.setupRootLogger(filename=os.path.join(out_dir, 'log.txt'))

    if results_file is None:
        results_file = os.path.join(out_dir, 'results.csv')
    else:
        results_file = os.path.expanduser(results_file)

    fig_dir = os.path.join(out_dir, 'figures')
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)

    io_dir = os.path.join(fig_dir, 'model-io')
    if not os.path.exists(io_dir):
        os.makedirs(io_dir)

    out_data_dir = os.path.join(out_dir, 'data')
    if not os.path.exists(out_data_dir):
        os.makedirs(out_data_dir)

    def saveVariable(var, var_name, to_dir=out_data_dir):
        utils.saveVariable(var, var_name, to_dir)

    # Load data
    trial_ids = utils.getUniqueIds(data_dir, prefix='trial=', to_array=True)
    vocab = utils.loadVariable('vocab', pretrained_model_dir)
    parts_vocab = utils.loadVariable('parts-vocab', pretrained_model_dir)
    edge_labels = utils.loadVariable('part-labels', pretrained_model_dir)
    saveVariable(vocab, 'vocab')
    saveVariable(parts_vocab, 'parts-vocab')
    saveVariable(edge_labels, 'part-labels')

    # Define cross-validation folds
    data_loader = VideoLoader(trial_ids,
                              data_dir,
                              segs_dir,
                              vocab=vocab,
                              label_fn_format='assembly-seq')
    cv_folds = utils.makeDataSplits(len(data_loader.trial_ids), **cv_params)

    Dataset = sim2real.BlocksConnectionDataset
    device = torchutils.selectDevice(gpu_dev_id)
    label_dtype = torch.long
    labels_dtype = torch.long  # FIXME
    criterion = torch.nn.CrossEntropyLoss()

    def make_dataset(labels, ids, batch_mode='sample', shuffle=True):
        dataset = Dataset(vocab,
                          edge_labels,
                          label_dtype,
                          data_loader.loadData,
                          labels,
                          device=device,
                          labels_dtype=labels_dtype,
                          seq_ids=ids,
                          batch_size=batch_size,
                          batch_mode=batch_mode)
        loader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             shuffle=shuffle)
        return dataset, loader

    for cv_index, cv_fold in enumerate(cv_folds):
        if start_from is not None and cv_index < start_from:
            continue

        if stop_at is not None and cv_index > stop_at:
            break

        train_data, val_data, test_data = data_loader.getFold(cv_fold)
        train_set, train_loader = make_dataset(*train_data,
                                               batch_mode='sample',
                                               shuffle=True)
        test_set, test_loader = make_dataset(*test_data,
                                             batch_mode='flatten',
                                             shuffle=False)
        val_set, val_loader = make_dataset(*val_data,
                                           batch_mode='sample',
                                           shuffle=True)

        logger.info(
            f'CV fold {cv_index + 1} / {len(cv_folds)}: {len(data_loader.trial_ids)} total '
            f'({len(train_set)} train, {len(val_set)} val, {len(test_set)} test)'
        )

        logger.info(
            f"Class freqs (train): {np.squeeze(train_set.class_freqs)}")
        logger.info(f"Class freqs   (val): {np.squeeze(val_set.class_freqs)}")
        logger.info(f"Class freqs  (test): {np.squeeze(test_set.class_freqs)}")

        if model_name == 'template':
            model = sim2real.AssemblyClassifier(vocab, **model_params)
        elif model_name == 'pretrained':
            pretrained_model = utils.loadVariable("cvfold=0_model-best",
                                                  pretrained_model_dir)
            model = sim2real.SceneClassifier(pretrained_model, **model_params)
            metric_names = ('Loss', 'Accuracy', 'Precision', 'Recall', 'F1')
            criterion = torch.nn.CrossEntropyLoss()
            # criterion = torchutils.BootstrappedCriterion(
            #     0.25, base_criterion=torch.nn.functional.cross_entropy,
            # )
        else:
            raise AssertionError()

        optimizer_ft = torch.optim.Adam(model.parameters(),
                                        lr=learning_rate,
                                        betas=(0.9, 0.999),
                                        eps=1e-08,
                                        weight_decay=0,
                                        amsgrad=False)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_ft,
                                                       step_size=1,
                                                       gamma=1.00)

        train_epoch_log = collections.defaultdict(list)
        val_epoch_log = collections.defaultdict(list)
        metric_dict = {name: metrics.makeMetric(name) for name in metric_names}
        model, last_model_wts = torchutils.trainModel(
            model,
            criterion,
            optimizer_ft,
            lr_scheduler,
            train_loader,
            val_loader,
            device=device,
            metrics=metric_dict,
            train_epoch_log=train_epoch_log,
            val_epoch_log=val_epoch_log,
            **train_params)

        # Test model
        metric_dict = {name: metrics.makeMetric(name) for name in metric_names}
        test_io_history = torchutils.predictSamples(
            model.to(device=device),
            test_loader,
            criterion=criterion,
            device=device,
            metrics=metric_dict,
            data_labeled=True,
            update_model=False,
            seq_as_batch=train_params['seq_as_batch'],
            return_io_history=True)
        metric_str = '  '.join(str(m) for m in metric_dict.values())
        logger.info('[TST]  ' + metric_str)

        utils.writeResults(results_file,
                           {name: m.value
                            for name, m in metric_dict.items()},
                           sweep_param_name, model_params)

        for pred_seq, score_seq, feat_seq, label_seq, batch_id in test_io_history:
            prefix = f'cvfold={cv_index}_batch={batch_id}'
            saveVariable(pred_seq.cpu().numpy(), f'{prefix}_pred-label-seq')
            saveVariable(score_seq.cpu().numpy(), f'{prefix}_score-seq')
            saveVariable(label_seq.cpu().numpy(), f'{prefix}_true-label-seq')
        saveVariable(test_set.unflatten,
                     f'cvfold={cv_index}_test-set-unflatten')
        saveVariable(model, f'cvfold={cv_index}_{model_name}-best')

        if train_epoch_log:
            torchutils.plotEpochLog(train_epoch_log,
                                    subfig_size=(10, 2.5),
                                    title='Training performance',
                                    fn=os.path.join(
                                        fig_dir,
                                        f'cvfold={cv_index}_train-plot.png'))

        if val_epoch_log:
            torchutils.plotEpochLog(val_epoch_log,
                                    subfig_size=(10, 2.5),
                                    title='Heldout performance',
                                    fn=os.path.join(
                                        fig_dir,
                                        f'cvfold={cv_index}_val-plot.png'))

        if model_name == 'pretrained' and num_disp_imgs is not None:
            cvfold_dir = os.path.join(io_dir, f'cvfold={cv_index}')
            if not os.path.exists(cvfold_dir):
                os.makedirs(cvfold_dir)
            model.plotBatches(test_io_history,
                              cvfold_dir,
                              images_per_fig=num_disp_imgs,
                              dataset=test_set)

        if model_name == 'template' and num_disp_imgs is not None:
            io_dir = os.path.join(fig_dir, 'model-io')
            if not os.path.exists(io_dir):
                os.makedirs(io_dir)
            plot_topk(model, test_io_history, num_disp_imgs,
                      os.path.join(io_dir, f"cvfold={cv_index}.png"))

        if viz_templates:
            sim2real.viz_model_params(model, templates_dir=None)