Esempio n. 1
0
def main(config):
    experiment_name, current_time = setup_experiment(config.title, config)

    # normalization (creating t1_landmarks.npy file)
    create_normalization_file(
        use_controls=config.use_controls,
        use_nofcd=config.use_ae,
        mods=config.nb_of_modalities,
    )
    print('Normalization is finished')

    # patch extraction
    get_patch_list(use_controls=config.use_controls,
                   use_fcd=config.use_ae,
                   use_coronal=config.use_coronal,
                   use_sagital=config.use_sagital,
                   augment=config.augment,
                   h=config.height,
                   w=config.width,
                   hard_labeling=config.hard_labeling,
                   mods=config.nb_of_modalities,
                   batch_size=config.batch_size)
    print('Patch extraction is finished')

    # cnn model
    top_k_scores = train_model(mods=config.nb_of_modalities,
                               use_ae=config.use_ae,
                               h=config.height,
                               w=config.width,
                               use_coronal=config.use_coronal,
                               use_sagital=config.use_sagital,
                               use_controls=config.use_controls,
                               latent_dim=config.latent_size,
                               batch_size=config.batch_size,
                               lr=config.lr,
                               weight_decay=config.weight_decay,
                               weight_of_class=config.weight_of_class,
                               n_epochs=config.nb_epochs,
                               n_epochs_ae=config.nb_epochs_ae,
                               p=config.dropout_rate,
                               save_masks=config.save_masks,
                               parallel=config.parallel,
                               experiment_name=experiment_name,
                               temporal_division=config.temporal_division,
                               seed=config.seed)

    print(top_k_scores)
    print('LOO mean top-k score:', top_k_scores.mean())

    # logging
    log_experiment(config, current_time, (top_k_scores > 0).mean())
Esempio n. 2
0
def run(dispatch_type):
    '''test all dispatchers whose type is dispatch_type'''
    assert dispatch_type in [
        'Tuner', 'Assessor'
    ], 'Unsupported dispatcher type: %s' % (dispatch_type)
    dipsatcher_list = TUNER_LIST if dispatch_type == 'Tuner' else ASSESSOR_LIST
    for dispatcher_name in dipsatcher_list:
        try:
            # Sleep here to make sure previous stopped exp has enough time to exit to avoid port conflict
            time.sleep(6)
            test_builtin_dispatcher(dispatch_type, dispatcher_name)
            print(GREEN + 'Test %s %s: TEST PASS' %
                  (dispatcher_name, dispatch_type) + CLEAR)
        except Exception as error:
            print(RED + 'Test %s %s: TEST FAIL' %
                  (dispatcher_name, dispatch_type) + CLEAR)
            print('%r' % error)
            traceback.print_exc()
            raise error
        finally:
            subprocess.run(['nnictl', 'stop'])


if __name__ == '__main__':
    installed = (sys.argv[-1] != '--preinstall')
    setup_experiment(installed)

    run('Tuner')
    run('Assessor')
Esempio n. 3
0
    # test cmd `nnictl stop --port`
    proc = subprocess.run(['nnictl', 'stop', '--port', '8990'])
    assert proc.returncode == 0, '`nnictl stop %s` failed with code %d' % (experiment_id, proc.returncode)
    snooze()
    assert not detect_port(8990), '`nnictl stop %s` failed to stop experiments' % experiment_id

    # test cmd `nnictl stop --all`
    proc = subprocess.run(['nnictl', 'stop', '--all'])
    assert proc.returncode == 0, '`nnictl stop --all` failed with code %d' % proc.returncode
    snooze()
    assert not detect_port(8888) and not detect_port(8989), '`nnictl stop --all` failed to stop experiments'


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--config", type=str, required=True)
    parser.add_argument("--preinstall", action='store_true')
    args = parser.parse_args()
    setup_experiment(not args.preinstall)
    try:
        naive_test(args)
        stop_experiment_test(args)
        # TODO: check the output of rest server
        print(GREEN + 'PASS' + CLEAR)
    except Exception as error:
        print(RED + 'FAIL' + CLEAR)
        print('%r' % error)
        traceback.print_exc()
        sys.exit(1)
Esempio n. 4
0
def main(_):
    # Make sure we have a valid config that inherits all the keys defined in the
    # base config.
    validate_config(FLAGS.config, mode="pretrain")

    config = FLAGS.config
    exp_dir = osp.join(config.root_dir, FLAGS.experiment_name)
    setup_experiment(exp_dir, config, FLAGS.resume)

    # No need to do any pretraining if we're loading the raw pretrained
    # ImageNet baseline.
    if FLAGS.raw_imagenet:
        return

    # Setup compute device.
    if torch.cuda.is_available():
        device = torch.device(FLAGS.device)
    else:
        logging.info("No GPU device found. Falling back to CPU.")
        device = torch.device("cpu")
    logging.info("Using device: %s", device)

    # Set RNG seeds.
    if config.seed is not None:
        logging.info("Pretraining experiment seed: %d", config.seed)
        experiment.seed_rngs(config.seed)
        experiment.set_cudnn(config.cudnn_deterministic,
                             config.cudnn_benchmark)
    else:
        logging.info(
            "No RNG seed has been set for this pretraining experiment.")

    logger = Logger(osp.join(exp_dir, "tb"), FLAGS.resume)

    # Load factories.
    (
        model,
        optimizer,
        pretrain_loaders,
        downstream_loaders,
        trainer,
        eval_manager,
    ) = common.get_factories(config, device)

    # Create checkpoint manager.
    checkpoint_dir = osp.join(exp_dir, "checkpoints")
    checkpoint_manager = CheckpointManager(
        checkpoint_dir,
        model=model,
        optimizer=optimizer,
    )

    global_step = checkpoint_manager.restore_or_initialize()
    total_batches = max(1, len(pretrain_loaders["train"]))
    epoch = int(global_step / total_batches)
    complete = False
    stopwatch = Stopwatch()
    try:
        while not complete:
            for batch in pretrain_loaders["train"]:
                train_loss = trainer.train_one_iter(batch)

                if not global_step % config.logging_frequency:
                    for k, v in train_loss.items():
                        logger.log_scalar(v, global_step, k, "pretrain")
                    logger.flush()

                if not global_step % config.eval.eval_frequency:
                    # Evaluate the model on the pretraining validation dataset.
                    valid_loss = trainer.eval_num_iters(
                        pretrain_loaders["valid"],
                        config.eval.val_iters,
                    )
                    for k, v in valid_loss.items():
                        logger.log_scalar(v, global_step, k, "pretrain")

                    # Evaluate the model on the downstream datasets.
                    for split, downstream_loader in downstream_loaders.items():
                        eval_to_metric = eval_manager.evaluate(
                            model,
                            downstream_loader,
                            device,
                            config.eval.val_iters,
                        )
                        for eval_name, eval_out in eval_to_metric.items():
                            eval_out.log(
                                logger,
                                global_step,
                                eval_name,
                                f"downstream/{split}",
                            )

                # Save model checkpoint.
                if not global_step % config.checkpointing_frequency:
                    checkpoint_manager.save(global_step)

                # Exit if complete.
                global_step += 1
                if global_step > config.optim.train_max_iters:
                    complete = True
                    break

                time_per_iter = stopwatch.elapsed()
                logging.info(
                    "Iter[{}/{}] (Epoch {}), {:.6f}s/iter, Loss: {:.3f}".
                    format(
                        global_step,
                        config.optim.train_max_iters,
                        epoch,
                        time_per_iter,
                        train_loss["train/total_loss"].item(),
                    ))
                stopwatch.reset()
            epoch += 1

    except KeyboardInterrupt:
        logging.info(
            "Caught keyboard interrupt. Saving model before quitting.")

    finally:
        checkpoint_manager.save(global_step)
        logger.close()
Esempio n. 5
0
def main(_):
    # Make sure we have a valid config that inherits all the keys defined in the
    # base config.
    validate_config(FLAGS.config, mode="rl")

    config = FLAGS.config
    exp_dir = osp.join(
        config.save_dir,
        FLAGS.experiment_name,
        str(FLAGS.seed),
    )
    utils.setup_experiment(exp_dir, config, FLAGS.resume)

    # Setup compute device.
    if torch.cuda.is_available():
        device = torch.device(FLAGS.device)
    else:
        logging.info("No GPU device found. Falling back to CPU.")
        device = torch.device("cpu")
    logging.info("Using device: %s", device)

    # Set RNG seeds.
    if FLAGS.seed is not None:
        logging.info("RL experiment seed: %d", FLAGS.seed)
        experiment.seed_rngs(FLAGS.seed)
        experiment.set_cudnn(config.cudnn_deterministic,
                             config.cudnn_benchmark)
    else:
        logging.info("No RNG seed has been set for this RL experiment.")

    # Load env.
    env = utils.make_env(
        FLAGS.env_name,
        FLAGS.seed,
        action_repeat=config.action_repeat,
        frame_stack=config.frame_stack,
    )
    eval_env = utils.make_env(
        FLAGS.env_name,
        FLAGS.seed + 42,
        action_repeat=config.action_repeat,
        frame_stack=config.frame_stack,
        save_dir=osp.join(exp_dir, "video", "eval"),
    )

    # Dynamically set observation and action space values.
    config.sac.obs_dim = env.observation_space.shape[0]
    config.sac.action_dim = env.action_space.shape[0]
    config.sac.action_range = [
        float(env.action_space.low.min()),
        float(env.action_space.high.max()),
    ]

    # Resave the config since the dynamic values have been updated at this point
    # and make it immutable for safety :)
    utils.dump_config(exp_dir, config)
    config = config_dict.FrozenConfigDict(config)

    policy = agent.SAC(device, config.sac)

    buffer = utils.make_buffer(env, device, config)

    # Create checkpoint manager.
    checkpoint_dir = osp.join(exp_dir, "checkpoints")
    checkpoint_manager = CheckpointManager(
        checkpoint_dir,
        policy=policy,
        **policy.optim_dict(),
    )

    logger = Logger(osp.join(exp_dir, "tb"), FLAGS.resume)

    try:
        start = checkpoint_manager.restore_or_initialize()
        observation, done = env.reset(), False
        for i in tqdm(range(start, config.num_train_steps), initial=start):
            if i < config.num_seed_steps:
                action = env.action_space.sample()
            else:
                policy.eval()
                action = policy.act(observation, sample=True)
            next_observation, reward, done, info = env.step(action)

            if not done or "TimeLimit.truncated" in info:
                mask = 1.0
            else:
                mask = 0.0

            if not config.reward_wrapper.pretrained_path:
                buffer.insert(observation, action, reward, next_observation,
                              mask)
            else:
                buffer.insert(
                    observation,
                    action,
                    reward,
                    next_observation,
                    mask,
                    env.render(mode="rgb_array"),
                )
            observation = next_observation

            if done:
                observation, done = env.reset(), False
                for k, v in info["episode"].items():
                    logger.log_scalar(v, info["total"]["timesteps"], k,
                                      "training")

            if i >= config.num_seed_steps:
                policy.train()
                train_info = policy.update(buffer, i)

                if (i + 1) % config.log_frequency == 0:
                    for k, v in train_info.items():
                        logger.log_scalar(v, info["total"]["timesteps"], k,
                                          "training")
                    logger.flush()

            if (i + 1) % config.eval_frequency == 0:
                eval_stats = evaluate(policy, eval_env,
                                      config.num_eval_episodes)
                for k, v in eval_stats.items():
                    logger.log_scalar(
                        v,
                        info["total"]["timesteps"],
                        f"average_{k}s",
                        "evaluation",
                    )
                logger.flush()

            if (i + 1) % config.checkpoint_frequency == 0:
                checkpoint_manager.save(i)

    except KeyboardInterrupt:
        print("Caught keyboard interrupt. Saving before quitting.")

    finally:
        checkpoint_manager.save(i)  # pylint: disable=undefined-loop-variable
        logger.close()
Esempio n. 6
0
def main(args):
    # gpu or cpu
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    args = utils.setup_experiment(args)
    utils.init_logging(args)

    # Loading models
    MODEL_PATH_LOAD = "../lidar_experiments/2d/lidar_unet2d/lidar-unet2d-Nov-08-16:29:49/checkpoints/checkpoint_best.pt"

    train_new_model = True

    # Build data loaders, a model and an optimizer
    if train_new_model:
        model = models.build_model(args).to(device)
    else:
        model = models.build_model(args)
        model.load_state_dict(torch.load(args.MODEL_PATH_LOAD)['model'][0])
        model.to(device)

    print(model)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=[5, 15, 30, 50, 100, 250], gamma=0.5)
    logging.info(
        f"Built a model consisting of {sum(p.numel() for p in model.parameters()):,} parameters"
    )

    if args.resume_training:
        state_dict = utils.load_checkpoint(args, model, optimizer, scheduler)
        global_step = state_dict['last_step']
        start_epoch = int(state_dict['last_step'] /
                          (403200 / state_dict['args'].batch_size)) + 1
    else:
        global_step = -1
        start_epoch = 0

    ## Load the pts files
    # Loads as a list of numpy arrays
    scan_line_tensor = torch.load(args.data_path + 'scan_line_tensor.pts')
    train_idx_list = torch.load(args.data_path + 'train_idx_list.pts')
    valid_idx_list = torch.load(args.data_path + 'valid_idx_list.pts')
    sc = torch.load(args.data_path + 'sc.pts')

    # Dataloaders
    train_dataset = LidarLstmDataset(scan_line_tensor, train_idx_list,
                                     args.seq_len, args.mask_pts_per_seq)
    valid_dataset = LidarLstmDataset(scan_line_tensor, valid_idx_list,
                                     args.seq_len, args.mask_pts_per_seq)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               num_workers=4,
                                               shuffle=True)
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=args.batch_size,
                                               num_workers=4,
                                               shuffle=True)

    # Track moving average of loss values
    train_meters = {
        name: utils.RunningAverageMeter(0.98)
        for name in (["train_loss"])
    }
    valid_meters = {name: utils.AverageMeter() for name in (["valid_loss"])}
    writer = SummaryWriter(
        log_dir=args.experiment_dir) if not args.no_visual else None

    ##################################################
    # TRAINING
    for epoch in range(start_epoch, args.num_epochs):
        if args.resume_training:
            if epoch % 1 == 0:
                optimizer.param_groups[0]["lr"] /= 2
                print('learning rate reduced by factor of 2')

        train_bar = utils.ProgressBar(train_loader, epoch)
        for meter in train_meters.values():
            meter.reset()

    #     epoch_loss_sum = 0
        for batch_id, (clean, mask) in enumerate(train_bar):
            # dataloader returns [clean, mask] list
            model.train()
            global_step += 1
            inputs = clean.to(device)
            mask_inputs = mask.to(device)
            # only use the mask part of the outputs
            raw_outputs = model(inputs, mask_inputs)
            outputs = (
                1 - mask_inputs[:, :3, :, :]
            ) * raw_outputs + mask_inputs[:, :3, :, :] * inputs[:, :3, :, :]

            if args.wtd_loss:
                loss = weighted_MSELoss(outputs, inputs[:, :3, :, :],
                                        sc) / (inputs.size(0) *
                                               (args.mask_pts_per_seq**2))
                # Regularization?

            else:
                # normalized by the number of masked points
                loss = F.mse_loss(outputs, inputs[:,:3,:,:], reduction="sum") / \
                       (inputs.size(0) * (args.mask_pts_per_seq**2))

            model.zero_grad()
            loss.backward()
            optimizer.step()
            #         epoch_loss_sum += loss * inputs.size(0)
            train_meters["train_loss"].update(loss)
            train_bar.log(dict(**train_meters,
                               lr=optimizer.param_groups[0]["lr"]),
                          verbose=True)

            if writer is not None and global_step % args.log_interval == 0:
                writer.add_scalar("lr", optimizer.param_groups[0]["lr"],
                                  global_step)
                writer.add_scalar("loss/train", loss.item(), global_step)
                gradients = torch.cat([
                    p.grad.view(-1)
                    for p in model.parameters() if p.grad is not None
                ],
                                      dim=0)
                writer.add_histogram("gradients", gradients, global_step)
                sys.stdout.flush()
    #     epoch_loss = epoch_loss_sum / len(train_loader.dataset)

        if epoch % args.valid_interval == 0:
            model.eval()
            for meter in valid_meters.values():
                meter.reset()

            valid_bar = utils.ProgressBar(valid_loader)
            val_loss = 0
            for sample_id, (clean, mask) in enumerate(valid_bar):
                with torch.no_grad():
                    inputs = clean.to(device)
                    mask_inputs = mask.to(device)
                    # only use the mask part of the outputs
                    raw_output = model(inputs, mask_inputs)
                    output = (
                        1 - mask_inputs[:, :3, :, :]
                    ) * raw_output + mask_inputs[:, :3, :, :] * inputs[:, :
                                                                       3, :, :]

                    # TO DO, only run loss on masked part of output

                    if args.wtd_loss:
                        val_loss = weighted_MSELoss(
                            output, inputs[:, :3, :, :],
                            sc) / (inputs.size(0) * (args.mask_pts_per_seq**2))
                    else:
                        # normalized by the number of masked points
                        val_loss = F.mse_loss(output, inputs[:,:3,:,:], reduction="sum")/(inputs.size(0)* \
                                                                                        (args.mask_pts_per_seq**2))

                    valid_meters["valid_loss"].update(val_loss.item())

            if writer is not None:
                writer.add_scalar("loss/valid", valid_meters['valid_loss'].avg,
                                  global_step)
                sys.stdout.flush()

            logging.info(
                train_bar.print(
                    dict(**train_meters,
                         **valid_meters,
                         lr=optimizer.param_groups[0]["lr"])))
            utils.save_checkpoint(args,
                                  global_step,
                                  model,
                                  optimizer,
                                  score=valid_meters["valid_loss"].avg,
                                  mode="min")
        scheduler.step()

    logging.info(
        f"Done training! Best Loss {utils.save_checkpoint.best_score:.3f} obtained after step {utils.save_checkpoint.best_step}."
    )
Esempio n. 7
0
def main(unused_args):
    assert len(unused_args) == 1, unused_args
    setup_experiment(logging, FLAGS, "critic_model")

    if FLAGS.validation:
        mnist_ds = mnist.read_data_sets(FLAGS.data_dir,
                                        dtype=tf.float32,
                                        reshape=False,
                                        validation_size=0)
        val_ds = mnist_ds.test
    else:
        mnist_ds = mnist.read_data_sets(FLAGS.data_dir,
                                        dtype=tf.float32,
                                        reshape=False,
                                        validation_size=FLAGS.validation_size)
        val_ds = mnist_ds.validation
    train_ds = mnist_ds.train
    val_ds = mnist_ds.validation
    test_ds = mnist_ds.test
    num_classes = FLAGS.num_classes

    img_shape = [None, 1, 28, 28]
    X = tf.placeholder(tf.float32, shape=img_shape, name='X')
    # placeholder to avoid recomputation of adversarial images for critic
    X_hat_h = tf.placeholder(tf.float32, shape=img_shape, name='X_hat')
    y = tf.placeholder(tf.int32, shape=[None], name='y')
    y_onehot = tf.one_hot(y, num_classes)
    reduce_ind = list(range(1, X.get_shape().ndims))
    # test/validation inputs
    X_v = tf.placeholder(tf.float32, shape=img_shape, name='X_v')
    y_v = tf.placeholder(tf.int32, shape=[None], name='y_v')
    y_v_onehot = tf.one_hot(y_v, num_classes)

    # classifier model
    model = create_model(FLAGS, name=FLAGS.model_name)

    def test_model(x, **kwargs):
        return model(x, train=False, **kwargs)

    # generator
    def generator(inputs, confidence, targets=None):
        return high_confidence_attack_unrolled(
            lambda x: model(x)['logits'],
            inputs,
            targets=targets,
            confidence=confidence,
            max_iter=FLAGS.attack_iter,
            over_shoot=FLAGS.attack_overshoot,
            attack_random=FLAGS.attack_random,
            attack_uniform=FLAGS.attack_uniform,
            attack_label_smoothing=FLAGS.attack_label_smoothing)

    def test_generator(inputs, confidence, targets=None):
        return high_confidence_attack(lambda x: test_model(x)['logits'],
                                      inputs,
                                      targets=targets,
                                      confidence=confidence,
                                      max_iter=FLAGS.df_iter,
                                      over_shoot=FLAGS.df_overshoot,
                                      random=FLAGS.attack_random,
                                      uniform=FLAGS.attack_uniform,
                                      clip_dist=FLAGS.df_clip)

    # discriminator
    critic = create_model(FLAGS, prefix='critic_', name='critic')

    # classifier outputs
    outs_x = model(X)
    outs_x_v = test_model(X_v)
    params = tf.trainable_variables()
    model_weights = [param for param in params if "weights" in param.name]
    vars = tf.model_variables()
    target_conf_v = [None]

    if FLAGS.attack_confidence == "same":
        # set the target confidence to the confidence of the original prediction
        target_confidence = outs_x['conf']
        target_conf_v[0] = target_confidence
    elif FLAGS.attack_confidence == "class_running_mean":
        # set the target confidence to the mean confidence of the specific target
        # use running mean estimate
        class_conf_mean = tf.Variable(np.ones(num_classes, dtype=np.float32))
        batch_conf_mean = tf.unsorted_segment_mean(outs_x['conf'],
                                                   outs_x['pred'], num_classes)
        # if batch does not contain predictions for the specific target
        # (zeroes), replace zeroes with stored class mean (previous batch)
        batch_conf_mean = tf.where(tf.not_equal(batch_conf_mean, 0),
                                   batch_conf_mean, class_conf_mean)
        # update class confidence mean
        class_conf_mean = assign_moving_average(class_conf_mean,
                                                batch_conf_mean, 0.5)
        # init class confidence during pre-training
        tf.add_to_collection("PREINIT_OPS", class_conf_mean)

        def target_confidence(targets_onehot):
            targets = tf.argmax(targets_onehot, axis=1)
            check_conf = tf.Assert(
                tf.reduce_all(tf.not_equal(class_conf_mean, 0)),
                [class_conf_mean])
            with tf.control_dependencies([check_conf]):
                t = tf.gather(class_conf_mean, targets)
            target_conf_v[0] = t
            return tf.stop_gradient(t)
    else:
        target_confidence = float(FLAGS.attack_confidence)
        target_conf_v[0] = target_confidence

    X_hat = generator(X, target_confidence)
    outs_x_hat = model(X_hat)
    # select examples for which attack succeeded (changed the prediction)
    X_hat_filter = tf.not_equal(outs_x['pred'], outs_x_hat['pred'])
    X_hat_f = tf.boolean_mask(X_hat, X_hat_filter)
    X_f = tf.boolean_mask(X, X_hat_filter)

    outs_x_f = model(X_f)
    outs_x_hat_f = model(X_hat_f)
    X_hatd = tf.stop_gradient(X_hat)
    X_rec = generator(X_hatd, outs_x['conf'], outs_x['pred'])
    X_rec_f = tf.boolean_mask(X_rec, X_hat_filter)

    # validation/test adversarial examples
    X_v_hat = test_generator(X_v, FLAGS.val_attack_confidence)
    X_v_hatd = tf.stop_gradient(X_v_hat)
    X_v_rec = test_generator(X_v_hatd,
                             outs_x_v['conf'],
                             targets=outs_x_v['pred'])
    X_v_hat_df = deepfool(lambda x: test_model(x)['logits'],
                          X_v,
                          y_v,
                          max_iter=FLAGS.df_iter,
                          clip_dist=FLAGS.df_clip)
    X_v_hat_df_all = deepfool(lambda x: test_model(x)['logits'],
                              X_v,
                              max_iter=FLAGS.df_iter,
                              clip_dist=FLAGS.df_clip)

    y_hat = outs_x['pred']
    y_adv = outs_x_hat['pred']
    y_adv_f = outs_x_hat_f['pred']
    tf.summary.histogram('y_data', y, collections=["model_summaries"])
    tf.summary.histogram('y_hat', y_hat, collections=["model_summaries"])
    tf.summary.histogram('y_adv', y_adv, collections=["model_summaries"])

    # critic outputs
    critic_outs_x = critic(X)
    critic_outs_x_hat = critic(X_hat_f)
    critic_params = list(set(tf.trainable_variables()) - set(params))
    critic_vars = list(set(tf.trainable_variables()) - set(vars))

    # binary logits for a specific target
    logits_data = critic_outs_x['logits']
    logits_data_flt = tf.reshape(logits_data, (-1, ))
    z_data = tf.gather(logits_data_flt,
                       tf.range(tf.shape(X)[0]) * num_classes + y)
    logits_adv = critic_outs_x_hat['logits']
    logits_adv_flt = tf.reshape(logits_adv, (-1, ))
    z_adv = tf.gather(logits_adv_flt,
                      tf.range(tf.shape(X_hat_f)[0]) * num_classes + y_adv_f)

    # classifier/generator losses
    nll = tf.reduce_mean(
        tf.losses.softmax_cross_entropy(y_onehot, outs_x['logits']))
    nll_v = tf.reduce_mean(
        tf.losses.softmax_cross_entropy(y_v_onehot, outs_x_v['logits']))
    # gan losses
    gan = tf.losses.sigmoid_cross_entropy(tf.ones_like(z_adv), z_adv)
    rec_l1 = tf.reduce_mean(
        tf.reduce_sum(tf.abs(X_f - X_rec_f), axis=reduce_ind))
    rec_l2 = tf.reduce_mean(tf.reduce_sum((X_f - X_rec_f)**2, axis=reduce_ind))

    weight_decay = slim.apply_regularization(slim.l2_regularizer(1.0),
                                             model_weights[:-1])
    pretrain_loss = nll + 5e-6 * weight_decay
    loss = nll + FLAGS.lmbd * gan
    if FLAGS.lmbd_rec_l1 > 0:
        loss += FLAGS.lmbd_rec_l1 * rec_l1
    if FLAGS.lmbd_rec_l2 > 0:
        loss += FLAGS.lmbd_rec_l2 * rec_l2
    if FLAGS.weight_decay > 0:
        loss += FLAGS.weight_decay * weight_decay

    # critic loss
    critic_gan_data = tf.losses.sigmoid_cross_entropy(tf.ones_like(z_data),
                                                      z_data)
    # use placeholder for X_hat to avoid recomputation of adversarial noise
    y_adv_h = model(X_hat_h)['pred']
    logits_adv_h = critic(X_hat_h)['logits']
    logits_adv_flt_h = tf.reshape(logits_adv_h, (-1, ))
    z_adv_h = tf.gather(logits_adv_flt_h,
                        tf.range(tf.shape(X_hat_h)[0]) * num_classes + y_adv_h)
    critic_gan_adv = tf.losses.sigmoid_cross_entropy(tf.zeros_like(z_adv_h),
                                                     z_adv_h)
    critic_gan = critic_gan_data + critic_gan_adv

    # Gulrajani discriminator regularizer (we do not interpolate)
    critic_grad_data = tf.gradients(z_data, X)[0]
    critic_grad_adv = tf.gradients(z_adv_h, X_hat_h)[0]
    critic_grad_penalty = norm_penalty(critic_grad_adv) + norm_penalty(
        critic_grad_data)
    critic_loss = critic_gan + FLAGS.lmbd_grad * critic_grad_penalty

    # classifier model_metrics
    err = 1 - slim.metrics.accuracy(outs_x['pred'], y)
    conf = tf.reduce_mean(outs_x['conf'])
    err_hat = 1 - slim.metrics.accuracy(
        test_model(X_hat)['pred'], outs_x['pred'])
    err_hat_f = 1 - slim.metrics.accuracy(
        test_model(X_hat_f)['pred'], outs_x_f['pred'])
    err_rec = 1 - slim.metrics.accuracy(
        test_model(X_rec)['pred'], outs_x['pred'])
    conf_hat = tf.reduce_mean(test_model(X_hat)['conf'])
    conf_hat_f = tf.reduce_mean(test_model(X_hat_f)['conf'])
    conf_rec = tf.reduce_mean(test_model(X_rec)['conf'])
    err_v = 1 - slim.metrics.accuracy(outs_x_v['pred'], y_v)
    conf_v_hat = tf.reduce_mean(test_model(X_v_hat)['conf'])
    l2_hat = tf.sqrt(tf.reduce_sum((X_f - X_hat_f)**2, axis=reduce_ind))
    tf.summary.histogram('l2_hat', l2_hat, collections=["model_summaries"])

    # critic model_metrics
    critic_err_data = 1 - binary_accuracy(
        z_data, tf.ones(tf.shape(z_data), tf.bool), 0.0)
    critic_err_adv = 1 - binary_accuracy(
        z_adv, tf.zeros(tf.shape(z_adv), tf.bool), 0.0)

    # validation model_metrics
    err_df = 1 - slim.metrics.accuracy(test_model(X_v_hat_df)['pred'], y_v)
    err_df_all = 1 - slim.metrics.accuracy(
        test_model(X_v_hat_df_all)['pred'], outs_x_v['pred'])
    l2_v_hat = tf.sqrt(tf.reduce_sum((X_v - X_v_hat)**2, axis=reduce_ind))
    l2_v_rec = tf.sqrt(tf.reduce_sum((X_v - X_v_rec)**2, axis=reduce_ind))
    l1_v_rec = tf.reduce_sum(tf.abs(X_v - X_v_rec), axis=reduce_ind)
    l2_df = tf.sqrt(tf.reduce_sum((X_v - X_v_hat_df)**2, axis=reduce_ind))
    l2_df_norm = l2_df / tf.sqrt(tf.reduce_sum(X_v**2, axis=reduce_ind))
    l2_df_all = tf.sqrt(
        tf.reduce_sum((X_v - X_v_hat_df_all)**2, axis=reduce_ind))
    l2_df_norm_all = l2_df_all / tf.sqrt(tf.reduce_sum(X_v**2,
                                                       axis=reduce_ind))
    tf.summary.histogram('l2_df', l2_df, collections=["adv_summaries"])
    tf.summary.histogram('l2_df_norm',
                         l2_df_norm,
                         collections=["adv_summaries"])

    # model_metrics
    pretrain_model_metrics = OrderedDict([('nll', nll),
                                          ('weight_decay', weight_decay),
                                          ('err', err)])
    model_metrics = OrderedDict([('loss', loss), ('nll', nll),
                                 ('l2_hat', tf.reduce_mean(l2_hat)),
                                 ('gan', gan), ('rec_l1', rec_l1),
                                 ('rec_l2', rec_l2),
                                 ('weight_decay', weight_decay), ('err', err),
                                 ('conf', conf), ('err_hat', err_hat),
                                 ('err_hat_f', err_hat_f),
                                 ('conf_t', tf.reduce_mean(target_conf_v[0])),
                                 ('conf_hat', conf_hat),
                                 ('conf_hat_f', conf_hat_f),
                                 ('err_rec', err_rec), ('conf_rec', conf_rec)])
    critic_metrics = OrderedDict([('c_loss', critic_loss),
                                  ('c_gan', critic_gan),
                                  ('c_gan_data', critic_gan_data),
                                  ('c_gan_adv', critic_gan_adv),
                                  ('c_grad_norm', critic_grad_penalty),
                                  ('c_err_adv', critic_err_adv),
                                  ('c_err_data', critic_err_data)])
    val_metrics = OrderedDict([('nll', nll_v), ('err', err_v)])
    adv_metrics = OrderedDict([('l2_df', tf.reduce_mean(l2_df)),
                               ('l2_df_norm', tf.reduce_mean(l2_df_norm)),
                               ('l2_df_all', tf.reduce_mean(l2_df_all)),
                               ('l2_df_all_norm',
                                tf.reduce_mean(l2_df_norm_all)),
                               ('l2_hat', tf.reduce_mean(l2_v_hat)),
                               ('conf_hat', conf_v_hat),
                               ('l1_rec', tf.reduce_mean(l1_v_rec)),
                               ('l2_rec', tf.reduce_mean(l2_v_rec)),
                               ('err_df', err_df), ('err_df_all', err_df_all)])

    pretrain_metric_mean, pretrain_metric_upd = register_metrics(
        pretrain_model_metrics, collections="pretrain_model_summaries")
    metric_mean, metric_upd = register_metrics(model_metrics,
                                               collections="model_summaries")
    critic_metric_mean, critic_metric_upd = register_metrics(
        critic_metrics, collections="critic_summaries")
    val_metric_mean, val_metric_upd = register_metrics(
        val_metrics, prefix="val_", collections="val_summaries")
    adv_metric_mean, adv_metric_upd = register_metrics(
        adv_metrics, collections="adv_summaries")
    metrics_reset = tf.variables_initializer(tf.local_variables())

    # training ops
    lr = tf.Variable(FLAGS.lr, trainable=False)
    critic_lr = tf.Variable(FLAGS.critic_lr, trainable=False)
    tf.summary.scalar('lr', lr, collections=["model_summaries"])
    tf.summary.scalar('critic_lr', critic_lr, collections=["critic_summaries"])

    optimizer = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.5)

    preinit_ops = tf.get_collection("PREINIT_OPS")
    with tf.control_dependencies(preinit_ops):
        pretrain_solver = optimizer.minimize(pretrain_loss, var_list=params)
    solver = optimizer.minimize(loss, var_list=params)
    critic_solver = (tf.train.AdamOptimizer(
        learning_rate=critic_lr, beta1=0.5).minimize(critic_loss,
                                                     var_list=critic_params))

    # train
    summary_images, summary_labels = select_balanced_subset(
        train_ds.images, train_ds.labels, num_classes, num_classes)
    summary_images = summary_images.transpose((0, 3, 1, 2))
    save_path = os.path.join(FLAGS.samples_dir, 'orig.png')
    save_images(summary_images, save_path)

    if FLAGS.gpu_memory < 1.0:
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=FLAGS.gpu_memory)
        config = tf.ConfigProto(gpu_options=gpu_options)
    else:
        config = None
    with tf.Session(config=config) as sess:
        try:
            # summaries
            summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)
            summaries = tf.summary.merge_all("model_summaries")
            critic_summaries = tf.summary.merge_all("critic_summaries")
            val_summaries = tf.summary.merge_all("val_summaries")
            adv_summaries = tf.summary.merge_all("adv_summaries")

            # initialization
            tf.local_variables_initializer().run()
            tf.global_variables_initializer().run()

            # pretrain model
            if FLAGS.pretrain_niter > 0:
                logging.info("Model pretraining")
                for epoch in range(1, FLAGS.pretrain_niter + 1):
                    train_iterator = batch_iterator(train_ds.images,
                                                    train_ds.labels,
                                                    FLAGS.batch_size,
                                                    shuffle=True)
                    sess.run(metrics_reset)

                    start_time = time.time()
                    for ind, (images, labels) in enumerate(train_iterator):
                        sess.run([pretrain_solver, pretrain_metric_upd],
                                 feed_dict={
                                     X: images,
                                     y: labels
                                 })

                    str_bfr = six.StringIO()
                    str_bfr.write("Pretrain epoch [{}, {:.2f}s]:".format(
                        epoch,
                        time.time() - start_time))
                    print_results_str(str_bfr, pretrain_model_metrics.keys(),
                                      sess.run(pretrain_metric_mean))
                    print_results_str(str_bfr, critic_metrics.keys(),
                                      sess.run(critic_metric_mean))
                    logging.info(str_bfr.getvalue()[:-1])

            # training
            for epoch in range(1, FLAGS.niter + 1):
                train_iterator = batch_iterator(train_ds.images,
                                                train_ds.labels,
                                                FLAGS.batch_size,
                                                shuffle=True)
                sess.run(metrics_reset)

                start_time = time.time()
                for ind, (images, labels) in enumerate(train_iterator):
                    batch_index = (epoch - 1) * (train_ds.images.shape[0] //
                                                 FLAGS.batch_size) + ind
                    # train critic for several steps
                    X_hat_np = sess.run(X_hat, feed_dict={X: images})
                    for _ in range(FLAGS.critic_steps - 1):
                        sess.run([critic_solver],
                                 feed_dict={
                                     X: images,
                                     y: labels,
                                     X_hat_h: X_hat_np
                                 })
                    else:
                        summary = sess.run([
                            critic_solver, critic_metric_upd, critic_summaries
                        ],
                                           feed_dict={
                                               X: images,
                                               y: labels,
                                               X_hat_h: X_hat_np
                                           })[-1]
                        summary_writer.add_summary(summary, batch_index)
                    # train model
                    summary = sess.run([solver, metric_upd, summaries],
                                       feed_dict={
                                           X: images,
                                           y: labels
                                       })[-1]
                    summary_writer.add_summary(summary, batch_index)

                str_bfr = six.StringIO()
                str_bfr.write("Train epoch [{}, {:.2f}s]:".format(
                    epoch,
                    time.time() - start_time))
                print_results_str(str_bfr, model_metrics.keys(),
                                  sess.run(metric_mean))
                print_results_str(str_bfr, critic_metrics.keys(),
                                  sess.run(critic_metric_mean))
                logging.info(str_bfr.getvalue()[:-1])

                val_iterator = batch_iterator(val_ds.images,
                                              val_ds.labels,
                                              100,
                                              shuffle=False)
                for images, labels in val_iterator:
                    summary = sess.run([val_metric_upd, val_summaries],
                                       feed_dict={
                                           X_v: images,
                                           y_v: labels
                                       })[-1]
                    summary_writer.add_summary(summary, epoch)
                str_bfr = six.StringIO()
                str_bfr.write("Valid epoch [{}]:".format(epoch))
                print_results_str(str_bfr, val_metrics.keys(),
                                  sess.run(val_metric_mean))
                logging.info(str_bfr.getvalue()[:-1])

                # learning rate decay
                update_lr = lr_decay(lr, epoch)
                if update_lr is not None:
                    sess.run(update_lr)
                    logging.debug(
                        "learning rate was updated to: {:.10f}".format(
                            lr.eval()))
                critic_update_lr = lr_decay(critic_lr, epoch, prefix='critic_')
                if critic_update_lr is not None:
                    sess.run(critic_update_lr)
                    logging.debug(
                        "critic learning rate was updated to: {:.10f}".format(
                            critic_lr.eval()))

                if epoch % FLAGS.summary_frequency == 0:
                    samples_hat, samples_rec, samples_df, summary = sess.run(
                        [
                            X_v_hat, X_v_rec, X_v_hat_df, adv_summaries,
                            adv_metric_upd
                        ],
                        feed_dict={
                            X_v: summary_images,
                            y_v: summary_labels
                        })[:-1]
                    summary_writer.add_summary(summary, epoch)
                    save_path = os.path.join(FLAGS.samples_dir,
                                             'epoch_orig-%d.png' % epoch)
                    save_images(summary_images, save_path)
                    save_path = os.path.join(FLAGS.samples_dir,
                                             'epoch-%d.png' % epoch)
                    save_images(samples_hat, save_path)
                    save_path = os.path.join(FLAGS.samples_dir,
                                             'epoch_rec-%d.png' % epoch)
                    save_images(samples_rec, save_path)
                    save_path = os.path.join(FLAGS.samples_dir,
                                             'epoch_df-%d.png' % epoch)
                    save_images(samples_df, save_path)

                    str_bfr = six.StringIO()
                    str_bfr.write("Summary epoch [{}]:".format(epoch))
                    print_results_str(str_bfr, adv_metrics.keys(),
                                      sess.run(adv_metric_mean))
                    logging.info(str_bfr.getvalue()[:-1])

                if FLAGS.checkpoint_frequency != -1 and epoch % FLAGS.checkpoint_frequency == 0:
                    save_checkpoint(sess, vars, epoch=epoch)
                    save_checkpoint(sess,
                                    critic_vars,
                                    name="critic_model",
                                    epoch=epoch)
        except KeyboardInterrupt:
            logging.debug("Keyboard interrupt. Stopping training...")
        except NanError as e:
            logging.info(e)
        finally:
            sess.run(metrics_reset)
            save_checkpoint(sess, vars)
            save_checkpoint(sess, critic_vars, name="critic_model")

        # final accuracy
        test_iterator = batch_iterator(test_ds.images,
                                       test_ds.labels,
                                       100,
                                       shuffle=False)
        for images, labels in test_iterator:
            sess.run([val_metric_upd], feed_dict={X_v: images, y_v: labels})
        str_bfr = six.StringIO()
        str_bfr.write("Final epoch [{}]:".format(epoch))
        for metric_name, metric_value in zip(val_metrics.keys(),
                                             sess.run(val_metric_mean)):
            str_bfr.write(" {}: {:.6f},".format(metric_name, metric_value))
        logging.info(str_bfr.getvalue()[:-1])
Esempio n. 8
0
def main(args):
    if not torch.cuda.is_available():
        raise NotImplementedError("Training on CPU is not supported.")
    utils.setup_experiment(args)
    utils.init_logging(args)

    train_loaders, valid_loaders = data.build_dataset(
        args.dataset, args.data_path, batch_size=args.batch_size)
    model = models.build_model(args).cuda()
    optimizer = optim.build_optimizer(args, model.parameters())
    logging.info(
        f"Built a model consisting of {sum(p.numel() for p in model.parameters() if p.requires_grad):,} parameters"
    )

    meters = {
        name: utils.RunningAverageMeter(0.98)
        for name in (["loss", "context", "graph", "target"])
    }
    acc_names = ["overall"
                 ] + [f"task{idx}" for idx in range(len(valid_loaders))]
    acc_meters = {name: utils.AverageMeter() for name in acc_names}
    writer = SummaryWriter(
        log_dir=args.experiment_dir) if not args.no_visual else None

    global_step = -1
    for epoch in range(args.num_epochs):
        acc_tasks = {f"task{idx}": None for idx in range(len(valid_loaders))}
        for task_id, train_loader in enumerate(train_loaders):
            for repeat in range(args.num_repeats_per_task):
                train_bar = utils.ProgressBar(train_loader,
                                              epoch,
                                              prefix=f"task {task_id}")
                for meter in meters.values():
                    meter.reset()

                for batch_id, (images, labels) in enumerate(train_bar):
                    model.train()
                    global_step += 1
                    images, labels = images.cuda(), labels.cuda()
                    outputs = model(images, labels, task_id=task_id)

                    if global_step == 0:
                        continue
                    loss = outputs["loss"]
                    model.zero_grad()
                    loss.backward()
                    optimizer.step()

                    meters["loss"].update(loss.item())
                    meters["context"].update(outputs["context_loss"].item())
                    meters["target"].update(outputs["target_loss"].item())
                    meters["graph"].update(outputs["graph_loss"].item())
                    train_bar.log(dict(
                        **meters,
                        lr=optimizer.get_lr(),
                    ))

                if writer is not None:
                    writer.add_scalar("loss/train", loss.item(), global_step)
                    gradients = torch.cat([
                        p.grad.view(-1)
                        for p in model.parameters() if p.grad is not None
                    ],
                                          dim=0)
                    writer.add_histogram("gradients", gradients, global_step)

            model.eval()
            for meter in acc_meters.values():
                meter.reset()
            for idx, valid_loader in enumerate(valid_loaders):
                valid_bar = utils.ProgressBar(valid_loader,
                                              epoch,
                                              prefix=f"task {task_id}")
                for batch_id, (images, labels) in enumerate(valid_bar):
                    model.eval()
                    with torch.no_grad():
                        images, labels = images.cuda(), labels.cuda()
                        outputs = model.predict(images, labels, task_id=idx)
                        correct = outputs["preds"].eq(labels).sum().item()
                        acc_meters[f"task{idx}"].update(100 * correct,
                                                        n=len(images))
                acc_meters["overall"].update(acc_meters[f"task{idx}"].avg)

            acc_tasks[f"task{task_id}"] = acc_meters[f"task{task_id}"].avg
            if writer is not None:
                for name, meter in acc_meters.items():
                    writer.add_scalar(f"accuracy/{name}", meter.avg,
                                      global_step)
            logging.info(
                train_bar.print(
                    dict(**meters, **acc_meters, lr=optimizer.get_lr())))
            utils.save_checkpoint(args,
                                  global_step,
                                  model,
                                  optimizer,
                                  score=acc_meters["overall"].avg,
                                  mode="max")

    bwt = sum(acc_meters[task].avg - acc
              for task, acc in acc_tasks.items()) / (len(valid_loaders) - 1)
    logging.info(
        f"Done training! Final accuracy {acc_meters['overall'].avg:.4f}, backward transfer {bwt:.4f}."
    )
Esempio n. 9
0
def main(args):
	device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
	utils.setup_experiment(args)
	utils.init_logging(args)

	# Build data loaders, a model and an optimizer
	model = models.build_model(args).to(device)
	print(model)
	optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
	scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[50, 60, 70, 80, 90, 100], gamma=0.5)
	logging.info(f"Built a model consisting of {sum(p.numel() for p in model.parameters()):,} parameters")
	
	if args.resume_training:
		state_dict = utils.load_checkpoint(args, model, optimizer, scheduler)
		global_step = state_dict['last_step']
		start_epoch = int(state_dict['last_step']/(403200/state_dict['args'].batch_size))+1
	else:
		global_step = -1
		start_epoch = 0
		
	train_loader, valid_loader, _ = data.build_dataset(args.dataset, args.data_path, batch_size=args.batch_size)
	
	# Track moving average of loss values
	train_meters = {name: utils.RunningAverageMeter(0.98) for name in (["train_loss", "train_psnr", "train_ssim"])}
	valid_meters = {name: utils.AverageMeter() for name in (["valid_psnr", "valid_ssim"])}
	writer = SummaryWriter(log_dir=args.experiment_dir) if not args.no_visual else None

	for epoch in range(start_epoch, args.num_epochs):
		if args.resume_training:
			if epoch %10 == 0:
				optimizer.param_groups[0]["lr"] /= 2
				print('learning rate reduced by factor of 2')
				
		train_bar = utils.ProgressBar(train_loader, epoch)
		for meter in train_meters.values():
			meter.reset()

		for batch_id, inputs in enumerate(train_bar):
			model.train()

			global_step += 1
			inputs = inputs.to(device)
			noise = utils.get_noise(inputs, mode = args.noise_mode, 
												min_noise = args.min_noise/255., max_noise = args.max_noise/255.,
												noise_std = args.noise_std/255.)

			noisy_inputs = noise + inputs;
			outputs = model(noisy_inputs)
			loss = F.mse_loss(outputs, inputs, reduction="sum") / (inputs.size(0) * 2)

			model.zero_grad()
			loss.backward()
			optimizer.step()

			train_psnr = utils.psnr(outputs, inputs)
			train_ssim = utils.ssim(outputs, inputs)
			train_meters["train_loss"].update(loss.item())
			train_meters["train_psnr"].update(train_psnr.item())
			train_meters["train_ssim"].update(train_ssim.item())
			train_bar.log(dict(**train_meters, lr=optimizer.param_groups[0]["lr"]), verbose=True)

			if writer is not None and global_step % args.log_interval == 0:
				writer.add_scalar("lr", optimizer.param_groups[0]["lr"], global_step)
				writer.add_scalar("loss/train", loss.item(), global_step)
				writer.add_scalar("psnr/train", train_psnr.item(), global_step)
				writer.add_scalar("ssim/train", train_ssim.item(), global_step)
				gradients = torch.cat([p.grad.view(-1) for p in model.parameters() if p.grad is not None], dim=0)
				writer.add_histogram("gradients", gradients, global_step)
				sys.stdout.flush()

		if epoch % args.valid_interval == 0:
			model.eval()
			for meter in valid_meters.values():
				meter.reset()

			valid_bar = utils.ProgressBar(valid_loader)
			for sample_id, sample in enumerate(valid_bar):
				with torch.no_grad():
					sample = sample.to(device)
					noise = utils.get_noise(sample, mode = 'S', 
												noise_std = (args.min_noise +  args.max_noise)/(2*255.))

					noisy_inputs = noise + sample;
					output = model(noisy_inputs)
					valid_psnr = utils.psnr(output, sample)
					valid_meters["valid_psnr"].update(valid_psnr.item())
					valid_ssim = utils.ssim(output, sample)
					valid_meters["valid_ssim"].update(valid_ssim.item())

					if writer is not None and sample_id < 10:
						image = torch.cat([sample, noisy_inputs, output], dim=0)
						image = torchvision.utils.make_grid(image.clamp(0, 1), nrow=3, normalize=False)
						writer.add_image(f"valid_samples/{sample_id}", image, global_step)

			if writer is not None:
				writer.add_scalar("psnr/valid", valid_meters['valid_psnr'].avg, global_step)
				writer.add_scalar("ssim/valid", valid_meters['valid_ssim'].avg, global_step)
				sys.stdout.flush()

			logging.info(train_bar.print(dict(**train_meters, **valid_meters, lr=optimizer.param_groups[0]["lr"])))
			utils.save_checkpoint(args, global_step, model, optimizer, score=valid_meters["valid_psnr"].avg, mode="max")
		scheduler.step()

	logging.info(f"Done training! Best PSNR {utils.save_checkpoint.best_score:.3f} obtained after step {utils.save_checkpoint.best_step}.")
Esempio n. 10
0
def ddpg_multiagent(env, agent1, agent2, cfg, db):
    # Get configuration
    n_episodes = cfg["Training"]["Number_episodes"]
    max_t = cfg["Training"]["Max_timesteps"]
    print_every = cfg["Training"]["Score_window"]
    starting_random = cfg["Training"]["Starting_random"]
    brain_index = cfg["Agent"]["Brain_index"]
    persist_mongodb = cfg["Training"]["Persist_mongodb"]
    success = cfg["Environment"]["Success"]

    #Initialize score lists
    scores_deque = deque(maxlen=print_every)
    scores = []
    # Create a directory to save the findings.
    # experiment_dir = setup_experiment(cfg)
    if persist_mongodb:
        experiment_id = setup_experiment(db, cfg)
    brain_name = env.brain_names[brain_index]
    # Train for n_episodes
    for i_episode in range(1, n_episodes + 1):
        env_info = env.reset(train_mode=True)[brain_name]
        n_agents = len(env_info.agents)
        states = env_info.vector_observations
        agent1.reset()
        agent2.reset()
        actions = np.zeros((n_agents, agent1.action_size))
        score = np.zeros(n_agents)
        for t in range(max_t):
            if i_episode < starting_random:
                actions = 2 * np.random.randn(n_agents,
                                              agent1.action_size) - 1.0
            else:
                actions[0, :] = agent1.act(states[0, :])
                actions[1, :] = agent2.act(states[1, :])
                # for i_agent in range(n_agents):
                #     actions[i_agent, :] = agent.act(states[i_agent, :])
            next_states, rewards, dones, _ = step_unity(
                env, actions, brain_name)
            for i_agent in range(n_agents):
                # Add experience to both of the agent's replay buffers
                agent1.step(states[i_agent, :], actions[i_agent, :],
                            rewards[i_agent], next_states[i_agent, :],
                            dones[i_agent])
                agent2.step(states[i_agent, :], actions[i_agent, :],
                            rewards[i_agent], next_states[i_agent, :],
                            dones[i_agent])
            states = next_states
            score += rewards
            if np.any(dones):
                break
        scores_deque.append(score)
        scores.append(score)
        mean_score = np.vstack(scores_deque).mean(axis=0).max()
        print("\rEpisode {}\tAverage Score: {:.4f}\tNoise Modulation: {:.3f}".
              format(i_episode, mean_score, agent1.noise_modulation),
              end="")
        # print("\rEpisode {}\tAverage Score: {}".format(i_episode, scores_deque), end="")

        visualize = False
        if i_episode % print_every == 0:
            # persist_experiment(experiment_dir, i_episode, agent, scores)
            if persist_mongodb:
                persist_experiment(db, experiment_id, i_episode, agent1,
                                   scores, print_every)
                persist_experiment(db, experiment_id, i_episode, agent2,
                                   scores, print_every)
            else:
                torch.save(agent1.actor_local.state_dict(),
                           f"checkpoint_actor_1_{i_episode}.pth")
                torch.save(agent1.critic_local.state_dict(),
                           f"checkpoint_critic_1_{i_episode}.pth")
                torch.save(agent2.actor_local.state_dict(),
                           f"checkpoint_actor_2_{i_episode}.pth")
                torch.save(agent2.critic_local.state_dict(),
                           f"checkpoint_critic_2_{i_episode}.pth")
            print("\rEpisode {}\tAverage Score: {:.4f}".format(
                i_episode, mean_score))

        if mean_score >= success:
            # This is going to be the first thing I'd be digging in the database for,
            # so here you go.
            fpath_actor_1 = "checkpoint_actor_1_winner_{}.pth".format(
                i_episode)
            fpath_critic_1 = "checkpoint_critic_1_winner_{}.pth".format(
                i_episode)
            torch.save(agent1.actor_local.state_dict(), fpath_actor_1)
            torch.save(agent1.critic_local.state_dict(), fpath_critic_1)
            fpath_actor_2 = "checkpoint_actor_2_winner_{}.pth".format(
                i_episode)
            fpath_critic_2 = "checkpoint_critic_2_winner_{}.pth".format(
                i_episode)
            torch.save(agent2.actor_local.state_dict(), fpath_actor_2)
            torch.save(agent2.critic_local.state_dict(), fpath_critic_2)
            pkl_dump(scores, "scores_winner.pkl")
            break

    return scores
Esempio n. 11
0
def ddpg_selfplay(env, agent, cfg, db):
    # Get configuration
    n_episodes = cfg["Training"]["Number_episodes"]
    max_t = cfg["Training"]["Max_timesteps"]
    print_every = cfg["Training"]["Score_window"]
    starting_random = cfg["Training"]["Starting_random"]
    brain_index = cfg["Agent"]["Brain_index"]
    dump_agent = cfg["Training"]["Dump_agent"]
    success = cfg["Environment"]["Success"]
    persist_mongodb = cfg["Training"]["Persist_mongodb"]
    pretrained = cfg["Training"]["Pretrained"]

    agent.update_every = max_t * starting_random * 2
    #Initialize score lists
    scores_deque = deque(maxlen=print_every)
    scores = []
    # Create a directory to save the findings.
    # experiment_dir = setup_experiment(cfg)
    experiment_id = setup_experiment(db, cfg)
    print("Experiment ID: {}".format(experiment_id))
    brain_name = env.brain_names[brain_index]
    # Train for n_episodes
    for i_episode in range(1, n_episodes + 1):
        env_info = env.reset(train_mode=True)[brain_name]
        n_agents = len(env_info.agents)
        states = env_info.vector_observations
        agent.reset()
        actions = np.zeros((n_agents, agent.action_size))
        score = np.zeros(n_agents)
        if i_episode == starting_random and pretrained:
            print("Loading pre-trained weights")
            agent = load_agent_weights(agent, cfg)
            print("Pre-trained weights loaded!")
            agent.update_every = cfg["Agent"]["Update_every"]
        for t in range(max_t):
            if i_episode < starting_random:
                actions = 2 * np.random.rand(n_agents, agent.action_size) - 1.0
            else:
                for i_agent in range(n_agents):
                    actions[i_agent, :] = agent.act(states[i_agent, :])
            next_states, rewards, dones, _ = step_unity(
                env, actions, brain_name)
            for i_agent in range(n_agents):
                # Add experience to both of the agent's replay buffers
                agent.step(states[i_agent, :], actions[i_agent, :],
                           rewards[i_agent], next_states[i_agent, :],
                           dones[i_agent])
            states = next_states
            score += rewards
            if np.any(dones):
                break
        scores_deque.append(score)
        scores.append(score)
        mean_score = np.vstack(scores_deque).max(axis=1).mean()
        print(
            "\rEpisode {}\tAverage Score: {:.4f}\t Score: {:.3f} {:.3f} noise {:.2f}"
            .format(i_episode, mean_score, score[0], score[1],
                    agent.noise_modulation),
            end="")
        # print("\rEpisode {}\tAverage Score: {}".format(i_episode, scores_deque), end="")

        visualize = False
        if i_episode % print_every == 0:
            print("\rEpisode {}\tAverage Score: {:.4f}".format(
                i_episode, mean_score))
            if persist_mongodb:
                persist_experiment(db, experiment_id, i_episode, agent, scores,
                                   print_every)
            else:
                torch.save(agent.actor_local.state_dict(),
                           f"checkpoint_actor_{i_episode}.pth")
                torch.save(agent.critic_local.state_dict(),
                           f"checkpoint_critic_{i_episode}.pth")
        if i_episode % dump_agent == 0:
            # To heck with it let's save some to disk even though I have utilities to load.
            # It's important.
            fpath_actor = "checkpoint_actor_{}.pth".format(i_episode)
            fpath_critic = "checkpoint_critic_{}.pth".format(i_episode)
            torch.save(agent.actor_local.state_dict(), fpath_actor)
            torch.save(agent.critic_local.state_dict(), fpath_critic)
        if mean_score >= success:
            # This is going to be the first thing I'd be digging in the database for,
            # so here you go.
            fpath_actor = "checkpoint_actor_winner_{}.pth".format(i_episode)
            fpath_critic = "checkpoint_critic_winner_{}.pth".format(i_episode)
            torch.save(agent.actor_local.state_dict(), fpath_actor)
            torch.save(agent.critic_local.state_dict(), fpath_critic)
            pkl_dump(scores, "scores_winner.pkl")
            break

    return scores