Esempio n. 1
0
def main(args):
    config = load_config(args)
    global_eval_config = config["eval_params"]
    models, model_names = config_modelloader(config, load_pretrain = True)

    robust_errs = []
    errs = []

    for model, model_id, model_config in zip(models, model_names, config["models"]):
        # make a copy of global training config, and update per-model config
        eval_config = copy.deepcopy(global_eval_config)
        if "eval_params" in model_config:
            eval_config.update(model_config["eval_params"])

        model = BoundSequential.convert(model, eval_config["method_params"]["bound_opts"]) 
        model = model.cuda()
        # read training parameters from config file
        method = eval_config["method"]
        verbose = eval_config["verbose"]
        eps = eval_config["epsilon"]
        # parameters specific to a training method
        method_param = eval_config["method_params"]
        norm = float(eval_config["norm"])
        train_data, test_data = config_dataloader(config, **eval_config["loader_params"])

        model_name = get_path(config, model_id, "model", load = False)
        print(model_name)
        model_log = get_path(config, model_id, "eval_log")
        logger = Logger(open(model_log, "w"))
        logger.log("evaluation configurations:", eval_config)
            
        logger.log("Evaluating...")
        with torch.no_grad():
            # evaluate
            robust_err, err = Train(model, 0, test_data, EpsilonScheduler("linear", 0, 0, eps, eps, 1), eps, norm, logger, verbose, False, None, method, **method_param)
        robust_errs.append(robust_err)
        errs.append(err)

    print('model robust errors (for robustly trained models, not valid for naturally trained models):')
    print(robust_errs)
    robust_errs = np.array(robust_errs)
    print('min: {:.4f}, max: {:.4f}, median: {:.4f}, mean: {:.4f}'.format(np.min(robust_errs), np.max(robust_errs), np.median(robust_errs), np.mean(robust_errs)))
    print('clean errors for models with min, max and median robust errors')
    i_min = np.argmin(robust_errs)
    i_max = np.argmax(robust_errs)
    i_median = np.argsort(robust_errs)[len(robust_errs) // 2]
    print('for min: {:.4f}, for max: {:.4f}, for median: {:.4f}'.format(errs[i_min], errs[i_max], errs[i_median]))
    print('model clean errors:')
    print(errs)
    print('min: {:.4f}, max: {:.4f}, median: {:.4f}, mean: {:.4f}'.format(np.min(errs), np.max(errs), np.median(errs), np.mean(errs)))
Esempio n. 2
0
def main(args):
    config = load_config(args)
    prefix = config['env_id']
    training_config = config['training_config']
    if config['name_suffix']:
        prefix += config['name_suffix']
    if config['path_prefix']:
        prefix = os.path.join(config['path_prefix'], prefix)
    if not os.path.exists(prefix):
        os.makedirs(prefix)

    train_log = os.path.join(prefix, 'train.log')
    logger = Logger(open(train_log, "w"))
    logger.log('Command line:', " ".join(sys.argv[:]))
    logger.log(args)
    logger.log(config)

    env_params = training_config['env_params']
    env_id = config['env_id']
    if "NoFrameskip" not in env_id:
        env = make_atari_cart(env_id)
    else:
        env = make_atari(env_id)
        env = wrap_deepmind(env, **env_params)
        env = wrap_pytorch(env)

    seed = training_config['seed']
    env.seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)

    state = env.reset()
    dtype = state.dtype
    logger.log("env_shape: {}, num of actions: {}".format(
        env.observation_space.shape, env.action_space.n))
    if "NoFrameskip" in env_id:
        logger.log('action meaning:',
                   env.unwrapped.get_action_meanings()[:env.action_space.n])

    robust = training_config.get('robust', False)
    adv_train = training_config.get('adv_train', False)
    bound_solver = training_config.get('bound_solver', 'cov')
    attack_config = {}
    if adv_train or bound_solver == 'pgd':
        test_config = config['test_config']
        attack_config = training_config["attack_config"]
        adv_ratio = training_config.get('adv_ratio', 1)
        if adv_train:
            logger.log('using adversarial examples for training, adv ratio:',
                       adv_ratio)
        else:
            logger.log('using pgd regularization training')
    if robust or adv_train:
        schedule_start = training_config['schedule_start']
        schedule_length = training_config['schedule_length']
        starting_epsilon = training_config['start_epsilon']
        end_epsilon = training_config['epsilon']
        epsilon_scheduler = EpsilonScheduler(
            training_config.get("schedule_type", "linear"), schedule_start,
            schedule_start + schedule_length - 1, starting_epsilon,
            end_epsilon, 1)
        max_eps = end_epsilon

    model_width = training_config['model_width']
    robust_model = robust and bound_solver != 'pgd'
    dueling = training_config.get('dueling', True)

    current_model = model_setup(env_id, env, robust_model, logger, USE_CUDA,
                                dueling, model_width)
    target_model = model_setup(env_id, env, robust_model, logger, USE_CUDA,
                               dueling, model_width)

    load_path = training_config["load_model_path"]
    if load_path != "" and os.path.exists(load_path):
        load_frame = int(re.findall('^.*frame_([0-9]+).pth$', load_path)[0])
        logger.log('\ntrain from model {}, current frame index is {}\n'.format(
            load_path, load_frame))
        current_model.features.load_state_dict(torch.load(load_path))
        target_model.features.load_state_dict(torch.load(load_path))
    else:
        logger.log('\ntrain from scratch')
        load_frame = 1

    lr = training_config['lr']
    grad_clip = training_config['grad_clip']
    natural_loss_fn = training_config['natural_loss_fn']
    optimizer = optim.Adam(current_model.parameters(),
                           lr=lr,
                           eps=training_config['adam_eps'])
    # Do not evaluate gradient for target model.
    for param in target_model.features.parameters():
        param.requires_grad = False

    buffer_config = training_config['buffer_params']
    replay_initial = buffer_config['replay_initial']
    buffer_capacity = buffer_config['buffer_capacity']
    use_cpp_buffer = training_config["cpprb"]
    use_async_rb = training_config['use_async_rb']
    num_frames = training_config['num_frames']
    batch_size = training_config['batch_size']
    gamma = training_config['gamma']

    if use_cpp_buffer:
        logger.log('using cpp replay buffer')
        if use_async_rb:
            replay_buffer_ctor = AsyncReplayBuffer(initial_state=state,
                                                   batch_size=batch_size)
        else:
            replay_buffer_ctor = cpprb.PrioritizedReplayBuffer
    else:
        logger.log('using python replay buffer')
    per = training_config['per']

    if per:
        logger.log('using prioritized experience replay.')
        alpha = buffer_config['alpha']
        buffer_beta_start = buffer_config['buffer_beta_start']
        buffer_beta_frames = buffer_config.get('buffer_beta_frames', -1)
        if buffer_beta_frames < replay_initial:
            buffer_beta_frames = num_frames - replay_initial
            logger.log('beffer_beta_frames reset to ', buffer_beta_frames)
        buffer_beta_scheduler = BufferBetaScheduler(buffer_beta_start,
                                                    buffer_beta_frames,
                                                    start_frame=replay_initial)
        if use_cpp_buffer:
            replay_buffer = replay_buffer_ctor(
                size=buffer_capacity,
                # env_dict={"obs": {"shape": state.shape, "dtype": np.uint8},
                env_dict={
                    "obs": {
                        "shape": state.shape,
                        "dtype": dtype
                    },
                    "act": {
                        "shape": 1,
                        "dtype": np.uint8
                    },
                    "rew": {},
                    # "next_obs": {"shape": state.shape, "dtype": np.uint8},
                    "next_obs": {
                        "shape": state.shape,
                        "dtype": dtype
                    },
                    "done": {}
                },
                alpha=alpha,
                eps=0.0)  # We add eps manually in training loop
        else:
            replay_buffer = PrioritizedReplayBuffer(buffer_capacity,
                                                    alpha=alpha)

    else:
        logger.log('using regular replay.')
        if use_cpp_buffer:
            replay_buffer = cpprb.ReplayBuffer(
                buffer_capacity,
                # {"obs": {"shape": state.shape, "dtype": np.uint8},
                {
                    "obs": {
                        "shape": state.shape,
                        "dtype": dtype
                    },
                    "act": {
                        "shape": 1,
                        "dtype": np.uint8
                    },
                    "rew": {},
                    # "next_obs": {"shape": state.shape, "dtype": np.uint8},
                    "next_obs": {
                        "shape": state.shape,
                        "dtype": dtype
                    },
                    "done": {}
                })
        else:
            replay_buffer = ReplayBuffer(buffer_capacity)

    update_target(current_model, target_model)

    act_epsilon_start = training_config['act_epsilon_start']
    act_epsilon_final = training_config['act_epsilon_final']
    act_epsilon_decay = training_config['act_epsilon_decay']
    act_epsilon_method = training_config['act_epsilon_method']
    if training_config.get('act_epsilon_decay_zero', True):
        decay_zero = num_frames
    else:
        decay_zero = None
    act_epsilon_scheduler = ActEpsilonScheduler(act_epsilon_start,
                                                act_epsilon_final,
                                                act_epsilon_decay,
                                                method=act_epsilon_method,
                                                start_frame=replay_initial,
                                                decay_zero=decay_zero)

    # Use optimized cuda memory management
    memory_mgr = CudaTensorManager(state.shape,
                                   batch_size,
                                   per,
                                   USE_CUDA,
                                   dtype=dtype)

    losses = []
    td_losses = []
    batch_cur_q = []
    batch_exp_q = []

    sa = None
    kappa = None
    hinge = False
    if robust:
        logger.log(
            'using convex relaxation certified classification loss as a regularization!'
        )
        kappa = training_config['kappa']
        reg_losses = []
        sa = np.zeros(
            (current_model.num_actions, current_model.num_actions - 1),
            dtype=np.int32)
        for i in range(sa.shape[0]):
            for j in range(sa.shape[1]):
                if j < i:
                    sa[i][j] = j
                else:
                    sa[i][j] = j + 1
        sa = torch.LongTensor(sa)
        hinge = training_config.get('hinge', False)
        logger.log('using hinge loss (default is cross entropy): ', hinge)

    if training_config['use_async_env']:
        # Create an environment in a separate process, run asychronously
        async_env = AsyncEnv(env_id,
                             result_path=prefix,
                             draw=training_config['show_game'],
                             record=training_config['record_game'],
                             env_params=env_params,
                             seed=seed)

    # initialize parameters in logging
    all_rewards = []
    episode_reward = 0
    act_epsilon = np.nan
    grad_norm = np.nan
    weights_norm = np.nan
    best_test_reward = -float('inf')
    buffer_stored_size = 0
    if adv_train:
        attack_count = 0
        suc_count = 0
    if robust and bound_solver == 'pgd':
        ori_margin, adv_margin = np.nan, np.nan

    start_time = time.time()
    period_start_time = time.time()

    # Main Loop
    for frame_idx in range(load_frame, num_frames + 1):
        # Step 1: get current action
        frame_start = time.time()
        t = time.time()

        eps = 0
        if adv_train or robust:
            eps = epsilon_scheduler.get_eps(frame_idx, 0)

        act_epsilon = act_epsilon_scheduler.get(frame_idx)
        if adv_train and eps != np.nan and eps >= np.finfo(np.float32).tiny:
            ori_state_tensor = torch.from_numpy(
                np.ascontiguousarray(state)).unsqueeze(0).cuda().to(
                    torch.float32)
            if dtype in UINTS:
                ori_state_tensor /= 255
            attack_config['params']['epsilon'] = eps
            if random.random() < adv_ratio:
                attack_count += 1
                state_tensor = attack(current_model, ori_state_tensor,
                                      attack_config)
                if current_model.act(state_tensor)[0] != current_model.act(
                        ori_state_tensor)[0]:
                    suc_count += 1
            else:
                state_tensor = ori_state_tensor
            action = current_model.act(state_tensor, act_epsilon)[0]
        else:
            with torch.no_grad():
                state_tensor = torch.from_numpy(
                    np.ascontiguousarray(state)).unsqueeze(0).cuda().to(
                        torch.float32)
                if dtype in UINTS:
                    state_tensor /= 255
                ori_state_tensor = torch.clone(state_tensor)
                action = current_model.act(state_tensor, act_epsilon)[0]

        # torch.cuda.synchronize()
        log_time('act_time', time.time() - t)

        # Step 2: run environment
        t = time.time()
        if training_config['use_async_env']:
            async_env.async_step(action)
        else:
            next_state, reward, done, _ = env.step(action)
        log_time('env_time', time.time() - t)

        # Step 3: save to buffer
        # For asynchronous env, defer saving
        if not training_config['use_async_env']:
            t = time.time()
            if use_cpp_buffer:
                replay_buffer.add(obs=state,
                                  act=action,
                                  rew=reward,
                                  next_obs=next_state,
                                  done=done)
            else:
                replay_buffer.push(state, action, reward, next_state, done)
            log_time('save_time', time.time() - t)

        if use_cpp_buffer:
            buffer_stored_size = replay_buffer.get_stored_size()
        else:
            buffer_stored_size = len(replay_buffer)

        beta = np.nan
        buffer_beta = np.nan
        t = time.time()

        if buffer_stored_size > replay_initial:
            if training_config['per']:
                buffer_beta = buffer_beta_scheduler.get(frame_idx)
            if robust:
                convex_final_beta = training_config['convex_final_beta']
                convex_start_beta = training_config['convex_start_beta']
                beta = (
                    max_eps - eps *
                    (1.0 - convex_final_beta)) / max_eps * convex_start_beta

            res = compute_td_loss(current_model,
                                  target_model,
                                  batch_size,
                                  replay_buffer,
                                  per,
                                  use_cpp_buffer,
                                  use_async_rb,
                                  optimizer,
                                  gamma,
                                  memory_mgr,
                                  robust,
                                  buffer_beta=buffer_beta,
                                  grad_clip=grad_clip,
                                  natural_loss_fn=natural_loss_fn,
                                  eps=eps,
                                  beta=beta,
                                  sa=sa,
                                  kappa=kappa,
                                  dtype=dtype,
                                  hinge=hinge,
                                  hinge_c=training_config.get('hinge_c', 1),
                                  env_id=env_id,
                                  bound_solver=bound_solver,
                                  attack_config=attack_config)
            loss, grad_norm, weights_norm, td_loss, batch_cur_q_value, batch_exp_q_value = res[
                0], res[1], res[2], res[3], res[4], res[5]
            if robust:
                reg_loss = res[-1]
                reg_losses.append(reg_loss.data.item())
                if bound_solver == 'pgd':
                    ori_margin, adv_margin = res[-3].data.item(
                    ), res[-2].data.item()

            losses.append(loss.data.item())
            td_losses.append(td_loss.data.item())
            batch_cur_q.append(batch_cur_q_value.data.item())
            batch_exp_q.append(batch_exp_q_value.data.item())

        log_time('loss_time', time.time() - t)

        # Step 2: run environment (async)
        t = time.time()
        if training_config['use_async_env']:
            next_state, reward, done, _ = async_env.wait_step()
        log_time('env_time', time.time() - t)

        # Step 3: save to buffer (async)
        if training_config['use_async_env']:
            t = time.time()
            if use_cpp_buffer:
                replay_buffer.add(obs=state,
                                  act=action,
                                  rew=reward,
                                  next_obs=next_state,
                                  done=done)
            else:
                replay_buffer.push(state, action, reward, next_state, done)
            log_time('save_time', time.time() - t)

        # Update states and reward
        t = time.time()
        state = next_state
        episode_reward += reward
        if done:
            if training_config['use_async_env']:
                state = async_env.reset()
            else:
                state = env.reset()
            all_rewards.append(episode_reward)
            episode_reward = 0
        log_time('env_time', time.time() - t)

        # All kinds of result logging
        if frame_idx % training_config[
                'print_frame'] == 0 or frame_idx == num_frames or (
                    robust and abs(frame_idx - schedule_start) < 5
                ) or abs(buffer_stored_size - replay_initial) < 5:
            logger.log(
                '\nframe {}/{}, learning rate: {:.6g}, buffer beta: {:.6g}, action epsilon: {:.6g}'
                .format(frame_idx, num_frames, lr, buffer_beta, act_epsilon))
            logger.log(
                'total time: {:.2f}, epoch time: {:.4f}, speed: {:.2f} frames/sec, last total loss: {:.6g}, avg total loss: {:.6g}, grad norm: {:.6g}, weights_norm: {:.6g}, latest episode reward: {:.6g}, avg 10 episode reward: {:.6g}'
                .format(
                    time.time() - start_time,
                    time.time() - period_start_time,
                    training_config['print_frame'] /
                    (time.time() - period_start_time),
                    losses[-1] if losses else np.nan,
                    np.average(losses[:-training_config['print_frame'] -
                                      1:-1]) if losses else np.nan, grad_norm,
                    weights_norm, all_rewards[-1] if all_rewards else np.nan,
                    np.average(all_rewards[:-11:-1])
                    if all_rewards else np.nan))
            logger.log('last td loss: {:.6g}, avg td loss: {:.6g}'.format(
                td_losses[-1] if td_losses else np.nan,
                np.average(td_losses[:-training_config['print_frame'] -
                                     1:-1]) if td_losses else np.nan))
            logger.log(
                'last batch cur q: {:.6g}, avg batch cur q: {:.6g}'.format(
                    batch_cur_q[-1] if batch_cur_q else np.nan,
                    np.average(batch_cur_q[:-training_config['print_frame'] -
                                           1:-1]) if batch_cur_q else np.nan))
            logger.log(
                'last batch exp q: {:.6g}, avg batch exp q: {:.6g}'.format(
                    batch_exp_q[-1] if batch_exp_q else np.nan,
                    np.average(batch_exp_q[:-training_config['print_frame'] -
                                           1:-1]) if batch_exp_q else np.nan))
            if robust:
                logger.log('current input epsilon: {:.6g}'.format(eps))
                if bound_solver == 'pgd':
                    logger.log(
                        'last logit margin: ori: {:.6g}, adv: {:.6g}'.format(
                            ori_margin, adv_margin))
                else:
                    logger.log('current bound beta: {:.6g}'.format(beta))
                logger.log(
                    'last cert reg loss: {:.6g}, avg cert reg loss: {:.6g}'.
                    format(
                        reg_losses[-1] if reg_losses else np.nan,
                        np.average(
                            reg_losses[:-training_config['print_frame'] -
                                       1:-1]) if reg_losses else np.nan))
                logger.log('current kappa: {:.6g}'.format(kappa))
            if adv_train:
                logger.log(
                    'current attack epsilon (same as input epsilon): {:.6g}'.
                    format(eps))
                diff = ori_state_tensor - state_tensor
                diff = np.abs(diff.data.cpu().numpy())
                logger.log('current Linf distortion: {:.6g}'.format(
                    np.max(diff)))
                logger.log(
                    'this batch attacked: {}, success: {}, attack success rate: {:.6g}'
                    .format(
                        attack_count, suc_count, suc_count * 1.0 /
                        attack_count if attack_count > 0 else np.nan))
                attack_count = 0
                suc_count = 0
                logger.log('attack stats reseted.')

            period_start_time = time.time()
            log_time.print()
            log_time.clear()

        if frame_idx % training_config[
                'save_frame'] == 0 or frame_idx == num_frames:
            plot(frame_idx, all_rewards, losses, prefix)
            torch.save(current_model.features.state_dict(),
                       '{}/frame_{}.pth'.format(prefix, frame_idx))

        if frame_idx % training_config['update_target_frame'] == 0:
            update_target(current_model, target_model)

        if frame_idx % training_config.get('mini_test', 100000) == 0 and (
            (robust and beta == 0) or
            (not robust and frame_idx * 1.0 / num_frames >= 0.8)):
            test_reward = mini_test(current_model, config, logger, dtype)
            logger.log('this test avg reward: {:6g}'.format(test_reward))
            if test_reward >= best_test_reward:
                best_test_reward = test_reward
                logger.log(
                    'new best reward {:6g} achieved, update checkpoint'.format(
                        test_reward))
                torch.save(current_model.features.state_dict(),
                           '{}/best_frame_{}.pth'.format(prefix, frame_idx))

        log_time.log_time('total', time.time() - frame_start)
Esempio n. 3
0
def main(args):
    config = load_config(args)
    global_train_config = config["training_params"]
    models, model_names = config_modelloader(config)
    for model, model_id, model_config in zip(models, model_names,
                                             config["models"]):
        # make a copy of global training config, and update per-model config
        train_config = copy.deepcopy(global_train_config)
        if "training_params" in model_config:
            train_config = update_dict(train_config,
                                       model_config["training_params"])
        model = BoundSequential.convert(
            model, train_config["method_params"]["bound_opts"])

        # read training parameters from config file
        epochs = train_config["epochs"]
        lr = train_config["lr"]
        weight_decay = train_config["weight_decay"]
        starting_epsilon = train_config["starting_epsilon"]
        end_epsilon = train_config["epsilon"]
        schedule_length = train_config["schedule_length"]
        schedule_start = train_config["schedule_start"]
        optimizer = train_config["optimizer"]
        method = train_config["method"]
        verbose = train_config["verbose"]
        lr_decay_step = train_config["lr_decay_step"]
        lr_decay_milestones = train_config["lr_decay_milestones"]
        lr_decay_factor = train_config["lr_decay_factor"]
        multi_gpu = train_config["multi_gpu"]
        # parameters specific to a training method
        method_param = train_config["method_params"]
        norm = float(train_config["norm"])
        train_data, test_data = config_dataloader(
            config, **train_config["loader_params"])

        if optimizer == "adam":
            opt = optim.Adam(model.parameters(),
                             lr=lr,
                             weight_decay=weight_decay)
        elif optimizer == "sgd":
            opt = optim.SGD(model.parameters(),
                            lr=lr,
                            momentum=0.9,
                            nesterov=True,
                            weight_decay=weight_decay)
        else:
            raise ValueError("Unknown optimizer")

        batch_multiplier = train_config["method_params"].get(
            "batch_multiplier", 1)
        batch_size = train_data.batch_size * batch_multiplier
        num_steps_per_epoch = int(
            np.ceil(1.0 * len(train_data.dataset) / batch_size))
        epsilon_scheduler = EpsilonScheduler(
            train_config.get("schedule_type",
                             "linear"), schedule_start * num_steps_per_epoch,
            ((schedule_start + schedule_length) - 1) * num_steps_per_epoch,
            starting_epsilon, end_epsilon, num_steps_per_epoch)
        max_eps = end_epsilon

        if lr_decay_step:
            # Use StepLR. Decay by lr_decay_factor every lr_decay_step.
            lr_scheduler = optim.lr_scheduler.StepLR(opt,
                                                     step_size=lr_decay_step,
                                                     gamma=lr_decay_factor)
            lr_decay_milestones = None
        elif lr_decay_milestones:
            # Decay learning rate by lr_decay_factor at a few milestones.
            lr_scheduler = optim.lr_scheduler.MultiStepLR(
                opt, milestones=lr_decay_milestones, gamma=lr_decay_factor)
        else:
            raise ValueError(
                "one of lr_decay_step and lr_decay_milestones must be not empty."
            )
        model_name = get_path(config, model_id, "model", load=False)
        best_model_name = get_path(config, model_id, "best_model", load=False)
        model_log = get_path(config, model_id, "train_log")
        logger = Logger(open(model_log, "w"))
        logger.log(model_name)
        logger.log("Command line:", " ".join(sys.argv[:]))
        logger.log("training configurations:", train_config)
        logger.log("Model structure:")
        logger.log(str(model))
        logger.log("data std:", train_data.std)
        best_err = np.inf
        recorded_clean_err = np.inf
        timer = 0.0

        if multi_gpu:
            logger.log(
                "\nUsing multiple GPUs for computing CROWN-IBP bounds\n")
            model = BoundDataParallel(model)
            model = model.cuda()

        for t in range(epochs):
            epoch_start_eps = epsilon_scheduler.get_eps(t, 0)
            epoch_end_eps = epsilon_scheduler.get_eps(t + 1, 0)
            logger.log(
                "Epoch {}, learning rate {}, epsilon {:.6g} - {:.6g}".format(
                    t, lr_scheduler.get_lr(), epoch_start_eps, epoch_end_eps))
            # with torch.autograd.detect_anomaly():
            start_time = time.time()
            Train(model, t, train_data, epsilon_scheduler, max_eps, norm,
                  logger, verbose, True, opt, method, **method_param)
            if lr_decay_step:
                # Use stepLR. Note that we manually set up epoch number here, so the +1 offset.
                lr_scheduler.step(
                    epoch=max(t - (schedule_start + schedule_length - 1) +
                              1, 0))
            elif lr_decay_milestones:
                # Use MultiStepLR with milestones.
                lr_scheduler.step()
            epoch_time = time.time() - start_time
            timer += epoch_time
            logger.log('Epoch time: {:.4f}, Total time: {:.4f}'.format(
                epoch_time, timer))
            logger.log("Evaluating...")
            with torch.no_grad():
                # evaluate
                err, clean_err = Train(
                    model, t, test_data,
                    EpsilonScheduler("linear", 0, 0, epoch_end_eps,
                                     epoch_end_eps, 1), max_eps, norm, logger,
                    verbose, False, None, method, **method_param)

            logger.log('saving to', model_name)
            torch.save(
                {
                    'state_dict':
                    model.module.state_dict()
                    if multi_gpu else model.state_dict(),
                    'epoch':
                    t,
                }, model_name)

            # save the best model after we reached the schedule
            if t >= (schedule_start + schedule_length):
                if err <= best_err:
                    best_err = err
                    recorded_clean_err = clean_err
                    logger.log('Saving best model {} with error {}'.format(
                        best_model_name, best_err))
                    torch.save(
                        {
                            'state_dict':
                            model.module.state_dict()
                            if multi_gpu else model.state_dict(),
                            'robust_err':
                            err,
                            'clean_err':
                            clean_err,
                            'epoch':
                            t,
                        }, best_model_name)

        logger.log('Total Time: {:.4f}'.format(timer))
        logger.log('Model {} best err {}, clean err {}'.format(
            model_id, best_err, recorded_clean_err))