Ejemplo n.º 1
0
def train(net, dl, start_epoch, end_epoch, save_frequency):
    optim = torch.optim.Adam(net.parameters())
    criterion = utils.CubeLoss('none').to(device)

    for e in range(start_epoch + 1, end_epoch + 1):
        stats = utils.Stats()
        perClass = utils.PerClassStats(maxScrambles)

        for input, target, scrambles in dl:
            optim.zero_grad()
            input, target = input.to(device), target.to(device)
            output = net(input)
            loss, acc = criterion(output, target)
            torch.mean(loss).backward()
            optim.step()
            stats.accumulate(len(target), loss, acc)
            perClass.accumulate(scrambles, loss, acc)

        print(f'Epoch {e}/{end_epoch}:')
        print(f'acc={100*stats.getAcc():.2f}%, loss={stats.getLoss():.3f}')
        print(f'acc= {perClass.accStr()}')
        print()
        if e % save_frequency == 0:
            os.makedirs(modelDir, exist_ok=True)
            filePath = getModelPath(e)
            print(f'Saving to {filePath}')
            torch.save(net, filePath)
Ejemplo n.º 2
0
def train(
    model,
    train_loader,
    device,
    tile_size,
    epochs=10,
    batch_size=1,
    learning_rate=1e-4,
    momentum=0.9,
    weight_decay=5e-3,
):

    writer = SummaryWriter(
        comment=f'LR_{learning_rate}_BS_{batch_size}_Epochs_{epochs}')

    since = time.time()
    criterion = CrossEntropyLoss2d()

    # optimizer = torch.optim.SGD(
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=learning_rate,
        # momentum=momentum,
        weight_decay=weight_decay,
    )
    model.train()
    model = model.to(device=device)

    summary(model, (3, tile_size[0], tile_size[1]))

    criterion = criterion.to(device=device)
    training_stats = utils.Stats()
    running_loss = 0.0

    for n in range(epochs):
        epoch_stats = utils.Stats()
        loader_with_progress = utils.loader_with_progress(train_loader,
                                                          epoch_n=n,
                                                          epoch_total=epochs,
                                                          stats=epoch_stats,
                                                          leave=True)
        progress_bar_output = io.StringIO()
        with redirect_stderr(progress_bar_output):
            for i, (x, y) in enumerate(loader_with_progress):
                # for x, y in loader_with_progress:
                y = y.to(device=device)
                x = x.to(device=device)
                y_pred = model(x)
                loss = criterion(y_pred, y)
                epoch_stats.append_loss(loss.item())
                training_stats.append_loss(loss.item())

                loader_with_progress.set_postfix(epoch_stats.fmt_dict())
                # print(flush=True)
                # sys.stdout.flush()

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                writer.add_scalar("training loss", loss.item(),
                                  n * len(train_loader) + i)

    time_elapsed = time.time() - since
    print("Training complete in {:.0f}m {:.0f}s".format(
        time_elapsed // 60, time_elapsed % 60))

    writer.add_graph(model, x)
    writer.close()

    # print('Best val Acc: {:4f}'.format(best_acc))
    return model, training_stats
Ejemplo n.º 3
0
def do_evaluation(config,
                  datasets,
                  len_past,
                  len_future,
                  save_predictions=False,
                  verbose=0):
    """
    Evaluate the given model on all given datasets.
    :param config: Config to create model.
    :param datasets: List of tuples specifying name and batch size per dataset.
    :param len_past: Number of past frames to use (BiRNN only).
    :param len_future: Number of future frames to use (BiRNN only).
    :param save_predictions: Whether or not to save predictions as pkl files.
    :param verbose: Verbosity level.
    """
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    birnn_eval_chunks = False
    model_stamp = config.get('model_id').split("-")[1]
    eval_str = ""
    if config.get("model_type") == C.MODEL_BIRNN:
        if len_past >= 0:
            print("\nBiRNN is evaluated on chunks: " + str(len_past) + "_" +
                  str(len_future))
            birnn_eval_chunks = True
        else:
            print("\nBiRNN is evaluated on the whole sequence.")

        if birnn_eval_chunks:
            eval_str = "past_{}_future_{}_frames".format(len_past, len_future)
            model_stamp += "_p{}_f{}".format(len_past, len_future)
        else:
            eval_str = "all_frames"
            model_stamp += "_all"

    model_cls = config.model_cls
    dataset_cls = config.dataset_cls

    # Data preprocessing configuration.
    preprocessing_ops = config.get_preprocessing_ops()

    # Logger object.
    logger = Logger(os.path.join(config.get('eval_dir'), "evaluation.txt"),
                    sys.stdout)
    performance_text_format = "*** {} (SIP error): {:.4f} (+/- {:.3f})\n"
    performance_text_over_datasets = "\nSummary of model " + config.get(
        'model_id') + "\n"

    for eval_key, batch_size in datasets:
        logger.print('------------------------------------------')
        logger.print('\nEvaluation on ' + eval_key)
        logger.print('\n------------------------------------------\n')

        # Clean slate.
        tf.reset_default_graph()

        with tf.Session() as sess:
            coord = tf.train.Coordinator()
            queue_threads = []

            prediction_list = []
            gt_list = []
            if config.get(eval_key, None) is None:
                print("Eval Key {} not found, continue.".format(eval_key))
                continue

            eval_dataset = dataset_cls(config.get(eval_key),
                                       var_len_seq=True,
                                       preprocessing_ops=preprocessing_ops)
            assert eval_dataset.num_samples % batch_size == 0, 'number of samples ({}) must be divisible by batch size ({})'.format(
                eval_dataset.num_samples, batch_size)
            num_eval_iterations = int(eval_dataset.num_samples / batch_size)

            with tf.name_scope(eval_key):
                eval_data_feeder = DataFeederTF(eval_dataset,
                                                1,
                                                batch_size,
                                                queue_capacity=1024,
                                                shuffle=False)
                data_placeholders = eval_data_feeder.batch_queue(
                    dynamic_pad=eval_dataset.is_dynamic,
                    queue_capacity=512,
                    queue_threads=2)
                eval_model = model_cls(config=config,
                                       session=sess,
                                       reuse=False,
                                       mode="validation",
                                       placeholders=data_placeholders,
                                       input_dims=eval_dataset.input_dims,
                                       target_dims=eval_dataset.target_dims,
                                       data_stats=None)
                eval_model.build_graph()

                # Load variables
                try:
                    saver = tf.train.Saver()
                    # Restore variables.
                    if config.get('checkpoint_id') is None:
                        checkpoint_path = tf.train.latest_checkpoint(
                            config.get("model_dir"))
                    else:
                        checkpoint_path = os.path.join(
                            config.get("model_dir"),
                            config.get("checkpoint_id"))

                    print("Loading model " + checkpoint_path)
                    saver.restore(sess, checkpoint_path)
                except Exception:
                    raise Exception("Could not load variables.")

            # In case we want to use feed dictionary.
            tf_mask = tf.expand_dims(
                tf.sequence_mask(lengths=data_placeholders[C.PL_SEQ_LEN],
                                 dtype=tf.float32), -1)
            tf_data_fetch = dict()
            tf_data_fetch['targets'] = data_placeholders[C.PL_TARGET]
            tf_data_fetch['mask'] = tf_mask
            tf_data_fetch['inputs'] = data_placeholders[C.PL_INPUT]

            eval_data_feeder.init(sess, coord)
            queue_threads.extend(
                tf.train.start_queue_runners(coord=coord, sess=sess))
            queue_threads.append(eval_data_feeder.enqueue_threads)

            total_loss = 0.0
            total_loss_l2 = 0.0
            n_data = 0
            dof = 9

            # where the sensors are attached
            tracking_sensors = [4, 5, 18, 19, 0, 15]
            sip_eval_sensors = [1, 2, 16, 17]

            # the remaining "sensors" are evaluation sensors
            all_sensors = utils.SMPL_MAJOR_JOINTS
            remaining_eval_sensors = [
                s for s in all_sensors
                if s not in tracking_sensors and s not in sip_eval_sensors
            ]

            with utils.Stats(tracking_sensors, sip_eval_sensors,
                             remaining_eval_sensors, logger) as stats:
                model_evaluation_ops = dict()
                model_evaluation_ops['loss'] = eval_model.ops_loss
                model_evaluation_ops['mask'] = eval_model.seq_loss_mask
                model_evaluation_ops['targets'] = eval_model.pl_targets
                model_evaluation_ops['prediction'] = eval_model.output_sample
                model_evaluation_ops['orientation'] = eval_model.orientation
                model_evaluation_ops['acceleration'] = eval_model.acceleration

                for i in range(num_eval_iterations):
                    if verbose > 0 and (
                        (i + 1) % max(int((num_eval_iterations / 5)), 1) == 0):
                        print(str(i + 1) + "/" + str(num_eval_iterations))

                    if birnn_eval_chunks:
                        np_batch = sess.run(tf_data_fetch)
                        eval_out = eval_model.model.reconstruct_chunks(
                            input_sequence=np_batch['inputs'],
                            target_sequence=np_batch['targets'],
                            len_past=len_past,
                            len_future=len_future)
                        eval_out['mask'] = np_batch['mask']
                        eval_out['targets'] = np_batch['targets']
                        eval_out['prediction'] = eval_out['sample']
                    else:
                        eval_out = sess.run(model_evaluation_ops, feed_dict={})

                    total_loss += eval_out['loss']['total_loss'] * batch_size
                    n_data += batch_size

                    pred = undo_smpl(eval_dataset, eval_out['prediction'],
                                     eval_out['mask'][:, :, 0])
                    targ = undo_smpl(eval_dataset, eval_out['targets'],
                                     eval_out['mask'][:, :, 0])

                    if save_predictions:
                        prediction_list.extend(pred)
                        gt_list.extend(targ)

                    # replace root with sensor data
                    for j in range(batch_size):
                        imu_root = np.reshape(np.eye(3),
                                              [-1]) if dof == 9 else np.array(
                                                  [1.0, 0.0, 0.0, 0.0])
                        pred[j][:, :dof] = imu_root
                        targ[j][:, :dof] = imu_root

                        ja_diffs, euc_diffs = utils.compute_metrics(
                            prediction=pred[j:j + 1],
                            target=targ[j:j + 1],
                            compute_positional_error=False)
                        stats.add(ja_diffs, euc_diffs)

                total_loss = total_loss / float(n_data) if n_data > 0 else 0.0
                total_loss_l2 = total_loss_l2 / float(
                    n_data) if n_data > 0 else 0.0

                logger.print('\n*** Loss ***\n')
                logger.print(
                    'average main loss per time step: {}\n'.format(total_loss))
                logger.print('average l2 loss per time step  : {}\n'.format(
                    total_loss_l2))
                sip_stats = stats.get_sip_stats()

            performance_text_over_datasets += performance_text_format.format(
                eval_key, sip_stats[0], sip_stats[1])

            if save_predictions:
                out = {"prediction": prediction_list, "gt": gt_list}
                file_name = eval_key + "_" + eval_str if eval_str is not None else eval_key
                np.savez_compressed(
                    os.path.join(config.get("eval_dir"), file_name), **out)

            sess.run(
                eval_data_feeder.input_queue.close(
                    cancel_pending_enqueues=True))
            coord.request_stop()
            coord.join(queue_threads,
                       ignore_live_threads=True,
                       stop_grace_period_secs=1)

    logger.print(performance_text_over_datasets)
    logger.close()