Example #1
0
def main(exp):

    # Number of examples per batch
    batch_size: Argument & int = default(256)

    # Dataset to load
    dataset: Argument

    torch_settings = init_torch()
    dataset = exp.get_dataset(dataset)

    loader = torch.utils.data.DataLoader(
        dataset.train,
        batch_size=batch_size,
        shuffle=True,
        num_workers=torch_settings.workers,
        pin_memory=True
    )

    wrapper = iteration_wrapper(exp, sync=None)

    # Warm up a bit
    for _, batch in zip(range(10), loader):
        for item in batch:
            item.to(torch_settings.device)
        break

    for it, batch in dataloop(loader, wrapper=wrapper):
        it.set_count(batch_size)
        it.log(eta=True)
        batch = [item.to(torch_settings.device) for item in batch]
        if torch_settings.sync:
            torch_settings.sync()
Example #2
0
def main(exp):

    # Dataset to use
    dataset: Argument

    # super resolution upscale factor
    upscale_factor: Argument & int = default(2)

    # # testing batch size (default: 10)
    # test_batch_size: Argument & int = default(10)

    # Learning rate (default: 0.1)
    lr: Argument & float = default(0.1)

    # Batch size (default: 64)
    batch_size: Argument & int = default(64)

    torch_settings = init_torch()
    device = torch_settings.device

    print('===> Loading datasets')
    # dataset_instance = exp.resolve_dataset("milabench.presets:bsds500")
    # folder = dataset_instance["environment"]["root"]
    sets = get_dataset(exp, dataset, upscale_factor)
    train_set = sets.train
    # train_set = get_dataset(os.path.join(folder, "bsds500/BSR/BSDS500/data/images/train"), upscale_factor)
    # test_set = get_dataset(os.path.join(folder, "bsds500/BSR/BSDS500/data/images/test"), upscale_factor)

    training_data_loader = DataLoader(dataset=train_set,
                                      num_workers=torch_settings.workers,
                                      batch_size=batch_size,
                                      shuffle=True)
    # testing_data_loader = DataLoader(
    #     dataset=test_set,
    #     num_workers=torch_settings.workers,
    #     batch_size=test_batch_size,
    #     shuffle=False
    # )

    print('===> Building model')
    model = Net(upscale_factor=upscale_factor).to(device)
    model.train()
    criterion = nn.MSELoss()

    optimizer = optim.Adam(model.parameters(), lr=lr)

    wrapper = iteration_wrapper(exp, sync=torch_settings.sync)
    for it, (input, target) in dataloop(training_data_loader, wrapper=wrapper):
        it.set_count(batch_size)

        input = input.to(device)
        target = target.to(device)

        optimizer.zero_grad()
        loss = criterion(model(input), target)
        it.log(loss=loss.item())
        loss.backward()
        optimizer.step()
Example #3
0
def main(exp):
    # Model float type
    dtype: Argument & str = default("float32")

    # Number of samples
    samples: Argument & int = default(100)

    torch_settings = init_torch()
    device = torch_settings.device

    data = generate_wave_data(20, 1000, samples)

    _dtype = to_type[dtype]

    input = torch.from_numpy(data[3:, :-1]).to(device=device, dtype=_dtype)
    target = torch.from_numpy(data[3:, 1:]).to(device=device, dtype=_dtype)

    test_input = torch.from_numpy(data[:3, :-1]).to(device=device,
                                                    dtype=_dtype)
    test_target = torch.from_numpy(data[:3, 1:]).to(device=device,
                                                    dtype=_dtype)

    # build the model
    seq = Sequence().to(device=device, dtype=_dtype)
    criterion = nn.MSELoss().to(device=device, dtype=_dtype)

    optimizer = optim.SGD(seq.parameters(), lr=0.01)

    total_time = 0

    seq.train()

    wrapper = iteration_wrapper(exp, sync=torch_settings.sync)

    for it, _ in dataloop(count(), wrapper=wrapper):
        it.set_count(samples)

        def closure():
            optimizer.zero_grad()
            out = seq(input.to(device=device, dtype=_dtype))
            loss = criterion(out, target)
            loss.backward()
            it.log(loss=loss.item())
            return loss

        optimizer.step(closure)
Example #4
0
def main(exp):

    # dataset to use
    dataset: Argument

    # batch size
    batch_size: Argument & int = default(32)

    # path to model checkpoint file
    checkpoint: Argument = default(None)

    torch_settings = init_torch()
    wrapper = iteration_wrapper(exp, sync=torch_settings.sync)

    args = NS(
        dataset=dataset,
        checkpoint=checkpoint,
        batch_size=batch_size,
        torch_settings=torch_settings,
        wrapper=wrapper,
    )
    train300_mlperf_coco(exp, args)
Example #5
0
def main(exp):

    # dataset to use
    dataset: Argument & str

    # Number of examples per batch
    batch_size: Argument & int = default(64)

    # path to style-image
    style_image: Argument & str = default(
        os.path.join(repo_base, "neural-style-images/style-images/candy.jpg"))

    # size of training images, default is 256 X 256
    image_size: Argument & int = default(256)

    # size of style-image, default is the original size of style image
    style_size: Argument & int = default(None)

    # weight for content-loss, default is 1e5
    content_weight: Argument & float = default(1e5)

    # weight for style-loss, default is 1e10
    style_weight: Argument & float = default(1e10)

    # learning rate, default is 1e-3
    lr: Argument & float = default(1e-3)

    torch_settings = init_torch()
    device = torch_settings.device

    transform = transforms.Compose([
        transforms.Resize(image_size),
        transforms.CenterCrop(image_size),
        transforms.ToTensor(),
        transforms.Lambda(lambda x: x.mul(255))
    ])
    train_dataset = exp.get_dataset(dataset, transform).train
    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              num_workers=torch_settings.workers)

    transformer = TransformerNet().to(device)
    optimizer = Adam(transformer.parameters(), lr)
    mse_loss = torch.nn.MSELoss()

    vgg = Vgg16(requires_grad=False).to(device)
    print(
        memory_size(vgg,
                    batch_size=batch_size,
                    input_size=(3, image_size, image_size)) * 4)

    style_transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Lambda(lambda x: x.mul(255))])
    style = utils.load_image(style_image, size=style_size)
    style = style_transform(style)
    style = style.repeat(batch_size, 1, 1, 1).to(device)

    features_style = vgg(utils.normalize_batch(style))
    gram_style = [utils.gram_matrix(y) for y in features_style]

    wrapper = iteration_wrapper(exp, sync=torch_settings.sync)

    transformer.train()

    for it, (x, _) in dataloop(train_loader, wrapper=wrapper):
        it.set_count(len(x))

        n_batch = len(x)

        x = x.to(device)
        y = transformer(x)

        y = utils.normalize_batch(y)
        x = utils.normalize_batch(x)

        optimizer.zero_grad()

        features_y = vgg(y)
        features_x = vgg(x)

        content_loss = content_weight * mse_loss(features_y.relu2_2,
                                                 features_x.relu2_2)

        style_loss = 0.
        for ft_y, gm_s in zip(features_y, gram_style):
            gm_y = utils.gram_matrix(ft_y)
            style_loss += mse_loss(gm_y, gm_s[:n_batch, :, :])
        style_loss *= style_weight

        total_loss = content_loss + style_loss
        total_loss.backward()

        it.log(loss=total_loss.item())
        optimizer.step()
Example #6
0
def main(exp, argv):
    os.environ["BABYAI_STORAGE"] = exp.results_directory()

    # Parse arguments
    parser = ArgumentParser()
    parser.add_argument("--algo",
                        default='ppo',
                        help="algorithm to use (default: ppo)")
    parser.add_argument("--discount",
                        type=float,
                        default=0.99,
                        help="discount factor (default: 0.99)")
    parser.add_argument("--reward-scale",
                        type=float,
                        default=20.,
                        help="Reward scale multiplier")
    parser.add_argument(
        "--gae-lambda",
        type=float,
        default=0.99,
        help="lambda coefficient in GAE formula (default: 0.99, 1 means no gae)"
    )
    parser.add_argument("--value-loss-coef",
                        type=float,
                        default=0.5,
                        help="value loss term coefficient (default: 0.5)")
    parser.add_argument("--max-grad-norm",
                        type=float,
                        default=0.5,
                        help="maximum norm of gradient (default: 0.5)")
    parser.add_argument("--clip-eps",
                        type=float,
                        default=0.2,
                        help="clipping epsilon for PPO (default: 0.2)")
    parser.add_argument("--ppo-epochs",
                        type=int,
                        default=4,
                        help="number of epochs for PPO (default: 4)")
    parser.add_argument(
        "--save-interval",
        type=int,
        default=50,
        help=
        "number of updates between two saves (default: 50, 0 means no saving)")
    parser.add_argument("--workers",
                        type=int,
                        default=8,
                        help="number of workers for PyTorch (default: 8)")
    parser.add_argument("--max-count",
                        type=int,
                        default=1000,
                        help="maximum number of frames to run for")
    parser.add_argument("--sample_duration",
                        type=float,
                        default=0.5,
                        help="sampling duration")
    parser.add_argument("--cuda",
                        action="store_true",
                        default=False,
                        help="whether to use cuda")
    args = parser.parse_args(argv)

    utils.seed(args.seed)

    torch_settings = init_torch(
        seed=args.seed,
        cuda=args.cuda,
        workers=args.workers,
    )

    # Generate environments
    envs = []
    for i in range(args.procs):
        env = gym.make(args.env)
        env.seed(100 * args.seed + i)
        envs.append(env)

    # Define model name
    suffix = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S")
    instr = args.instr_arch if args.instr_arch else "noinstr"
    mem = "mem" if not args.no_mem else "nomem"
    model_name_parts = {
        'env': args.env,
        'algo': args.algo,
        'arch': args.arch,
        'instr': instr,
        'mem': mem,
        'seed': args.seed,
        'info': '',
        'coef': '',
        'suffix': suffix
    }
    default_model_name = "{env}_{algo}_{arch}_{instr}_{mem}_seed{seed}{info}{coef}_{suffix}".format(
        **model_name_parts)
    if args.pretrained_model:
        default_model_name = args.pretrained_model + '_pretrained_' + default_model_name
    args.model = args.model.format(
        **model_name_parts) if args.model else default_model_name

    utils.configure_logging(args.model)
    logger = logging.getLogger(__name__)

    # Define obss preprocessor
    if 'emb' in args.arch:
        obss_preprocessor = utils.IntObssPreprocessor(
            args.model, envs[0].observation_space, args.pretrained_model)
    else:
        obss_preprocessor = utils.ObssPreprocessor(args.model,
                                                   envs[0].observation_space,
                                                   args.pretrained_model)

    # Define actor-critic model
    # acmodel = utils.load_model(args.model, raise_not_found=False)
    acmodel = None
    if acmodel is None:
        if args.pretrained_model:
            acmodel = utils.load_model(args.pretrained_model,
                                       raise_not_found=True)
        else:
            acmodel = ACModel(obss_preprocessor.obs_space,
                              envs[0].action_space, args.image_dim,
                              args.memory_dim, args.instr_dim,
                              not args.no_instr, args.instr_arch,
                              not args.no_mem, args.arch)

    obss_preprocessor.vocab.save()
    # utils.save_model(acmodel, args.model)

    if torch_settings.cuda:
        acmodel.cuda()

    # Define actor-critic algo

    reshape_reward = lambda _0, _1, reward, _2: args.reward_scale * reward
    if args.algo == "ppo":
        algo = babyai.rl.PPOAlgo(
            envs, acmodel, args.frames_per_proc, args.discount, args.lr,
            args.beta1, args.beta2, args.gae_lambda, args.entropy_coef,
            args.value_loss_coef, args.max_grad_norm, args.recurrence,
            args.optim_eps, args.clip_eps, args.ppo_epochs, args.batch_size,
            obss_preprocessor, reshape_reward)
    else:
        raise ValueError("Incorrect algorithm name: {}".format(args.algo))

    # When using extra binary information, more tensors (model params) are initialized compared to when we don't use that.
    # Thus, there starts to be a difference in the random state. If we want to avoid it, in order to make sure that
    # the results of supervised-loss-coef=0. and extra-binary-info=0 match, we need to reseed here.

    utils.seed(args.seed)

    # Restore training status

    status_path = os.path.join(utils.get_log_dir(args.model), 'status.json')
    if os.path.exists(status_path):
        with open(status_path, 'r') as src:
            status = json.load(src)
    else:
        status = {'i': 0, 'num_episodes': 0, 'num_frames': 0}

    # # Define logger and Tensorboard writer and CSV writer

    # header = (["update", "episodes", "frames", "FPS", "duration"]
    #         + ["return_" + stat for stat in ['mean', 'std', 'min', 'max']]
    #         + ["success_rate"]
    #         + ["num_frames_" + stat for stat in ['mean', 'std', 'min', 'max']]
    #         + ["entropy", "value", "policy_loss", "value_loss", "loss", "grad_norm"])
    # if args.tb:
    #     from tensorboardX import SummaryWriter

    #     writer = SummaryWriter(utils.get_log_dir(args.model))
    # csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv')
    # first_created = not os.path.exists(csv_path)
    # # we don't buffer data going in the csv log, cause we assume
    # # that one update will take much longer that one write to the log
    # csv_writer = csv.writer(open(csv_path, 'a', 1))
    # if first_created:
    #     csv_writer.writerow(header)

    # Log code state, command, availability of CUDA and model

    babyai_code = list(babyai.__path__)[0]
    try:
        last_commit = subprocess.check_output(
            'cd {}; git log -n1'.format(babyai_code),
            shell=True).decode('utf-8')
        logger.info('LAST COMMIT INFO:')
        logger.info(last_commit)
    except subprocess.CalledProcessError:
        logger.info('Could not figure out the last commit')
    try:
        diff = subprocess.check_output('cd {}; git diff'.format(babyai_code),
                                       shell=True).decode('utf-8')
        if diff:
            logger.info('GIT DIFF:')
            logger.info(diff)
    except subprocess.CalledProcessError:
        logger.info('Could not figure out the last commit')
    logger.info('COMMAND LINE ARGS:')
    logger.info(args)
    logger.info("CUDA available: {}".format(torch.cuda.is_available()))
    logger.info(acmodel)

    # Train model

    total_start_time = time.time()
    best_success_rate = 0
    best_mean_return = 0
    test_env_name = args.env

    wrapper = iteration_wrapper(
        exp,
        sync=torch_settings.sync,
        max_count=args.max_count,
        sample_duration=args.sample_duration,
    )

    # while status['num_frames'] < args.frames:
    while True:
        with wrapper() as it:
            # Update parameters
            if wrapper.done():
                break

            update_start_time = time.time()
            logs = algo.update_parameters()
            update_end_time = time.time()

            it.set_count(logs["num_frames"])
            it.log(loss=logs["loss"], )
Example #7
0
def main(exp):
    # dataset to use
    dataset: Argument & str

    # batch size
    batch_size: Argument & int = default(128)

    # number of predictive factors
    # [alias: -f]
    factors: Argument & int = default(8)

    # size of hidden layers for MLP
    layers: Argument = default("64,32,16,8")

    # number of negative examples per interaction
    # [alias: -n]
    negative_samples: Argument & int = default(4)

    # learning rate for optimizer
    # [alias: -l]
    learning_rate: Argument & float = default(0.001)

    # rank for test examples to be considered a hit
    # [alias: -k]
    topk: Argument & int = default(10)

    layer_sizes = [int(x) for x in layers.split(",")]

    torch_settings = init_torch()
    device = torch_settings.device

    # Load Data
    # ------------------------------------------------------------------------------------------------------------------
    print('Loading data')
    with exp.time('loading_data'):
        t1 = time.time()

        train_dataset = exp.get_dataset(dataset, nb_neg=negative_samples).train

        # mlperf_log.ncf_print(key=# mlperf_log.INPUT_BATCH_SIZE, value=batch_size)
        # mlperf_log.ncf_print(key=# mlperf_log.INPUT_ORDER)  # set shuffle=True in DataLoader
        train_dataloader = torch.utils.data.DataLoader(
            dataset=train_dataset,
            batch_size=batch_size,
            shuffle=True,
            num_workers=torch_settings.workers,
            pin_memory=True)

        nb_users, nb_items = train_dataset.nb_users, train_dataset.nb_items

        print('Load data done [%.1f s]. #user=%d, #item=%d, #train=%d' %
              (time.time() - t1, nb_users, nb_items, train_dataset.mat.nnz))
    # ------------------------------------------------------------------------------------------------------------------

    # Create model
    model = NeuMF(nb_users,
                  nb_items,
                  mf_dim=factors,
                  mf_reg=0.,
                  mlp_layer_sizes=layer_sizes,
                  mlp_layer_regs=[0. for i in layer_sizes]).to(device)
    print(model)
    print("{} parameters".format(utils.count_parameters(model)))

    # Save model text description
    run_dir = exp.results_directory()
    with open(os.path.join(run_dir, 'model.txt'), 'w') as file:
        file.write(str(model))

    # Add optimizer and loss to graph
    # mlperf_log.ncf_print(key=# mlperf_log.OPT_LR, value=learning_rate)
    beta1, beta2, epsilon = 0.9, 0.999, 1e-8

    optimizer = torch.optim.Adam(model.parameters(),
                                 betas=(beta1, beta2),
                                 lr=learning_rate,
                                 eps=epsilon)

    # mlperf_log.ncf_print(key=# mlperf_log.MODEL_HP_LOSS_FN, value=# mlperf_log.BCE)
    criterion = nn.BCEWithLogitsLoss().to(device)

    model.train()

    wrapper = iteration_wrapper(exp, sync=None)

    for it, (user, item, label) in dataloop(train_dataloader, wrapper=wrapper):
        it.set_count(batch_size)

        user = torch.autograd.Variable(user, requires_grad=False).to(device)
        item = torch.autograd.Variable(item, requires_grad=False).to(device)
        label = torch.autograd.Variable(label, requires_grad=False).to(device)

        outputs = model(user, item)
        loss = criterion(outputs, label)
        it.log(loss=loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
Example #8
0
def main(exp):
    # discount factor (default: 0.99)
    gamma: Argument & float = default(0.99)

    # render the environment
    render: Argument & bool = default(False)

    # seed for the environment
    seed: Argument & int = default(1234)

    # length of one episode
    episode_length: Argument & int = default(500)

    torch_settings = init_torch()
    device = torch_settings.device

    env = gym.make('CartPole-v0')
    env.seed(seed)

    policy = Policy()
    optimizer = optim.Adam(policy.parameters(), lr=1e-2)
    eps = np.finfo(np.float32).eps.item()

    print(torch_settings)

    def select_action(state):
        state = torch.from_numpy(state).float().unsqueeze(0)
        probs = policy(state)
        m = Categorical(probs)
        action = m.sample()
        policy.saved_log_probs.append(m.log_prob(action))
        return action.item()

    def finish_episode():
        R = 0
        policy_loss = []
        returns = []

        for r in policy.rewards[::-1]:
            R = r + gamma * R
            returns.insert(0, R)

        returns = torch.tensor(returns)
        returns = (returns - returns.mean()) / (returns.std() + eps)

        for log_prob, R in zip(policy.saved_log_probs, returns):
            policy_loss.append(-log_prob * R)

        optimizer.zero_grad()
        policy_loss = torch.cat(policy_loss).sum()
        policy_loss.backward()
        optimizer.step()

        del policy.rewards[:]
        del policy.saved_log_probs[:]

    running_reward = 10

    wrapper = iteration_wrapper(exp, sync=torch_settings.sync)

    for it, _ in dataloop(count(), wrapper=wrapper):
        it.set_count(episode_length)

        state, ep_reward = env.reset(), 0

        for t in range(episode_length):

            action = select_action(state)

            state, reward, done, _ = env.step(action)
            policy.rewards.append(reward)
            ep_reward += reward

            # we actually do not care about solving the thing
            if done:
                state, ep_reward = env.reset(), 0

        running_reward = 0.05 * ep_reward + (1 - 0.05) * running_reward
        it.log(reward=running_reward)
        finish_episode()
def main(exp):

    # dataset to use
    dataset: Argument & str

    # type of recurrent net (RNN_TANH, RNN_RELU, LSTM, GRU)
    model_name: Argument & str = default('LSTM')

    # size of word embeddings
    emsize: Argument & int = default(200)

    # number of hidden units per layer
    nhid: Argument & int = default(200)

    # number of layers
    nlayers: Argument & int = default(2)

    # initial learning rate
    lr: Argument & float = default(20)

    # gradient clipping
    clip: Argument & float = default(0.25)

    # upper epoch limit
    epochs: Argument & int = default(40)

    # sequence length
    bptt: Argument & int = default(35)

    # dropout applied to layers (0 = no dropout)
    dropout: Argument & float = default(0.2)

    # tie the word embedding and softmax weights
    tied: Argument & bool = default(False)

    # report interval
    log_interval: Argument & int = default(200)

    # Run model in pseudo-fp16 mode (fp16 storage fp32 math).
    fp16: Argument & bool = default(True)

    # Static loss scale, positive power of 2 values can improve fp16 convergence.
    static_loss_scale: Argument & float = default(128.0)

    # Use dynamic loss scaling.
    # If supplied, this argument supersedes --static-loss-scale.
    dynamic_loss_scale: Argument & bool = default(False)

    # path to save the final model
    save: Argument & str = default(None)

    # path to export the final model in onnx format
    batch_size: Argument & int = default(64)

    # Maximum count before stopping
    max_count: Argument & int = default(1000)

    # Number of seconds for sampling items/second
    sample_duration: Argument & float = default(0.5)

    torch_settings = init_torch()
    device = torch_settings.device

    ###############################################################################
    # Load data
    ###############################################################################

    # Ensure that the dictionary length is a multiple of 8,
    # so that the decoder's GEMMs will use Tensor Cores.
    corpus = exp.get_dataset(dataset, pad_to_multiple_of=8).corpus

    # Starting from sequential data, batchify arranges the dataset into columns.
    # For instance, with the alphabet as the sequence and batch size 4, we'd get
    # ┌ a g m s ┐
    # │ b h n t │
    # │ c i o u │
    # │ d j p v │
    # │ e k q w │
    # └ f l r x ┘.
    # These columns are treated as independent by the model, which means that the
    # dependence of e. g. 'g' on 'f' can not be learned, but allows more efficient
    # batch processing.

    def batchify(data, bsz):
        # Work out how cleanly we can divide the dataset into bsz parts.
        nbatch = data.size(0) // bsz
        # Trim off any extra elements that wouldn't cleanly fit (remainders).
        data = data.narrow(0, 0, nbatch * bsz)
        # Evenly divide the data across the bsz batches.
        data = data.view(bsz, -1).t().contiguous()
        if torch_settings.cuda:
            data = data.cuda()
        return data

    eval_batch_size = 10
    train_data = batchify(corpus.train, batch_size)
    val_data = batchify(corpus.valid, eval_batch_size)
    test_data = batchify(corpus.test, eval_batch_size)

    ###############################################################################
    # Build the model
    ###############################################################################

    ntokens = len(corpus.dictionary)

    if fp16 and torch_settings.cuda:
        if ntokens % 8 != 0:
            print(
                "Warning: the dictionary size (ntokens = {}) should be a multiple of 8 to ensure "
                "Tensor Core use for the decoder's GEMMs.".format(ntokens))
        if emsize % 8 != 0 or nhid % 8 != 0 or batch_size % 8 != 0:
            print(
                "Warning: emsize = {}, nhid = {}, batch_size = {} should all be multiples of 8 "
                "to ensure Tensor Core use for the RNN's GEMMs.".format(
                    emsize, nhid, batch_size))

    model = model_module.RNNModel(model_name, ntokens, emsize, nhid, nlayers,
                                  dropout, tied).to(device)

    if torch_settings.cuda and fp16:
        model.type(torch.cuda.HalfTensor)

    criterion = nn.CrossEntropyLoss()

    optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    ###############################################################################
    # Create the FP16_Optimizer instance
    ###############################################################################

    if fp16 and torch_settings.cuda:
        # If dynamic_loss_scale is False, static_loss_scale will be used.
        # If dynamic_loss_scale is True, it will take precedence over static_loss_scale.
        optimizer = FP16_Optimizer(optimizer,
                                   static_loss_scale=static_loss_scale,
                                   dynamic_loss_scale=dynamic_loss_scale)

    ###############################################################################
    # Training code
    ###############################################################################

    def repackage_hidden(h):
        """Detaches hidden states from their history."""
        if torch.is_tensor(h):
            return h.detach()
        else:
            return tuple(repackage_hidden(v) for v in h)

    # get_batch subdivides the source data into chunks of length bptt.
    # If source is equal to the example output of the batchify function, with
    # a bptt-limit of 2, we'd get the following two Variables for i = 0:
    # ┌ a g m s ┐ ┌ b h n t ┐
    # └ b h n t ┘ └ c i o u ┘
    # Note that despite the name of the function, the subdivison of data is not
    # done along the batch dimension (i.e. dimension 1), since that was handled
    # by the batchify function. The chunks are along dimension 0, corresponding
    # to the seq_len dimension in the LSTM.

    def get_batch(source, i):
        seq_len = min(bptt, len(source) - 1 - i)
        data = source[i:i + seq_len]
        target = source[i + 1:i + 1 + seq_len].view(-1)
        return data, target

    def evaluate(data_source):
        # Turn on evaluation mode which disables dropout.
        model.eval()
        total_loss = 0
        ntokens = len(corpus.dictionary)
        hidden = model.init_hidden(eval_batch_size)
        with torch.no_grad():
            for i in range(0, data_source.size(0) - 1, bptt):
                data, targets = get_batch(data_source, i)
                output, hidden = model(data, hidden)
                output_flat = output.view(-1, ntokens)
                #total loss can overflow if accumulated in fp16.
                total_loss += len(data) * criterion(output_flat,
                                                    targets).data.float()
                hidden = repackage_hidden(hidden)
        return to_python_float(total_loss) / len(data_source)

    def train(chrono):
        # Turn on training mode which enables dropout.
        model.train()
        total_loss = 0
        start_time = time.time()
        ntokens = len(corpus.dictionary)
        hidden = model.init_hidden(batch_size)
        for batch, i in enumerate(range(0, len(train_data), bptt)):
            if chrono.done():
                break
            with chrono(count=batch_size) as it:
                data, targets = get_batch(train_data, i)
                # Starting each batch, we detach the hidden state from how it was previously produced.
                # If we didn't, the model would try backpropagating all the way to start of the dataset.
                hidden = repackage_hidden(hidden)
                model.zero_grad()
                output, hidden = model(data, hidden)
                loss = criterion(output.view(-1, ntokens), targets)

                # Clipping gradients helps prevent the exploding gradient problem in RNNs / LSTMs.
                if fp16 and torch_settings.cuda:
                    optimizer.backward(loss)
                    optimizer.clip_master_grads(clip)
                else:
                    loss.backward()
                    # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
                    # apex.fp16_utils.clip_grad_norm selects between "torch.nn.utils.clip_grad_norm"
                    # and "torch.nn.utils.clip_grad_norm_" based on Pytorch version.
                    # It's not FP16-specific, just a small fix to avoid deprecation warnings.
                    clip_grad_norm(model.parameters(), clip)

                optimizer.step()

                it.log(loss=loss.item())
                total_loss += loss.data

                # if batch % args.log_interval == 0 and batch > 0:
                #     cur_loss = to_python_float(total_loss) / args.log_interval
                #     elapsed = time.time() - start_time
                #     print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                #         'loss {:5.2f} | ppl {:8.2f}'.format(
                #             epoch, batch, len(train_data) // args.bptt, lr,
                #             elapsed * 1000 / args.log_interval, cur_loss, math.exp(min(cur_loss, 20))))
                #     total_loss = 0
                #     start_time = time.time()

    # Loop over epochs.
    best_val_loss = None

    chrono = exp.chronos.create(
        "train",
        type="rate",
        sync=torch_settings.sync,
        sample_duration=sample_duration,
        max_count=max_count,
    )

    while not chrono.done():
        train(chrono)
        val_loss = evaluate(val_data)

        exp.metrics["val_loss"] = val_loss

        # Save the model if the validation loss is the best we've seen so far.
        if not best_val_loss or val_loss < best_val_loss:
            best_val_loss = val_loss
        else:
            # Anneal the learning rate if no improvement has been seen in the validation dataset.
            lr /= 4.0

    # Run on test data.
    test_loss = evaluate(test_data)
    print('=' * 89)
    print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
        test_loss, math.exp(test_loss)))
    print('=' * 89)
Example #10
0
def main(exp):

    # dataset to use
    dataset: Argument & str

    # type of recurrent net (RNN_TANH, RNN_RELU, LSTM, GRU)
    model_name: Argument & str = default('LSTM')

    # size of word embeddings
    emsize: Argument & int = default(200)

    # number of hidden units per layer
    nhid: Argument & int = default(200)

    # number of layers
    nlayers: Argument & int = default(2)

    # initial learning rate
    lr: Argument & float = default(20)

    # gradient clipping
    clip: Argument & float = default(0.25)

    # upper epoch limit
    epochs: Argument & int = default(40)

    # sequence length
    bptt: Argument & int = default(35)

    # dropout applied to layers (0 = no dropout)
    dropout: Argument & float = default(0.2)

    # tie the word embedding and softmax weights
    tied: Argument & bool = default(False)

    # report interval
    log_interval: Argument & int = default(200)

    # path to save the final model
    save: Argument & str = default(None)

    # path to export the final model in onnx format
    onnx_export: Argument & str = default('')

    # path to export the final model in onnx format
    batch_size: Argument & int = default(64)

    # Maximum count before stopping
    max_count: Argument & int = default(1000)

    # Number of seconds for sampling items/second
    sample_duration: Argument & float = default(0.5)

    torch_settings = init_torch()
    device = torch_settings.device

    ###############################################################################
    # Load data
    ###############################################################################

    corpus = exp.get_dataset(dataset).corpus

    # Starting from sequential data, batchify arranges the dataset into columns.
    # For instance, with the alphabet as the sequence and batch size 4, we'd get
    # ┌ a g m s ┐
    # │ b h n t │
    # │ c i o u │
    # │ d j p v │
    # │ e k q w │
    # └ f l r x ┘.
    # These columns are treated as independent by the model, which means that the
    # dependence of e. g. 'g' on 'f' can not be learned, but allows more efficient
    # batch processing.

    def batchify(data, bsz):
        # Work out how cleanly we can divide the dataset into bsz parts.
        nbatch = data.size(0) // bsz
        # Trim off any extra elements that wouldn't cleanly fit (remainders).
        data = data.narrow(0, 0, nbatch * bsz)
        # Evenly divide the data across the bsz batches.
        data = data.view(bsz, -1).t().contiguous()
        return data.to(device)

    eval_batch_size = 10
    train_data = batchify(corpus.train, batch_size)
    val_data = batchify(corpus.valid, eval_batch_size)
    test_data = batchify(corpus.test, eval_batch_size)

    ###############################################################################
    # Build the model
    ###############################################################################

    ntokens = len(corpus.dictionary)
    model = model_module.RNNModel(model_name, ntokens, emsize, nhid, nlayers,
                                  dropout, tied).to(device)

    criterion = nn.CrossEntropyLoss()

    ###############################################################################
    # Training code
    ###############################################################################

    def repackage_hidden(h):
        """Wraps hidden states in new Tensors, to detach them from their history."""
        if isinstance(h, torch.Tensor):
            return h.detach()
        else:
            return tuple(repackage_hidden(v) for v in h)

    # get_batch subdivides the source data into chunks of length bptt.
    # If source is equal to the example output of the batchify function, with
    # a bptt-limit of 2, we'd get the following two Variables for i = 0:
    # ┌ a g m s ┐ ┌ b h n t ┐
    # └ b h n t ┘ └ c i o u ┘
    # Note that despite the name of the function, the subdivison of data is not
    # done along the batch dimension (i.e. dimension 1), since that was handled
    # by the batchify function. The chunks are along dimension 0, corresponding
    # to the seq_len dimension in the LSTM.

    def get_batch(source, i):
        seq_len = min(bptt, len(source) - 1 - i)
        data = source[i:i + seq_len]
        target = source[i + 1:i + 1 + seq_len].view(-1)
        return data, target

    def evaluate(data_source):
        # Turn on evaluation mode which disables dropout.
        model.eval()
        total_loss = 0.
        ntokens = len(corpus.dictionary)
        hidden = model.init_hidden(eval_batch_size)
        with torch.no_grad():
            for i in range(0, data_source.size(0) - 1, bptt):
                data, targets = get_batch(data_source, i)
                output, hidden = model(data, hidden)
                output_flat = output.view(-1, ntokens)
                total_loss += len(data) * criterion(output_flat,
                                                    targets).item()
                hidden = repackage_hidden(hidden)
        return total_loss / (len(data_source) - 1)

    def train(chrono):
        # Turn on training mode which enables dropout.
        model.train()
        total_loss = 0.
        start_time = time.time()
        ntokens = len(corpus.dictionary)
        hidden = model.init_hidden(batch_size)

        for batch, i in enumerate(range(0, len(train_data), bptt)):
            if chrono.done():
                break
            with chrono(count=batch_size) as it:
                data, targets = get_batch(train_data, i)
                # Starting each batch, we detach the hidden state from how it was previously produced.
                # If we didn't, the model would try backpropagating all the way to start of the dataset.
                hidden = repackage_hidden(hidden)
                model.zero_grad()
                output, hidden = model(data, hidden)
                loss = criterion(output.view(-1, ntokens), targets)
                loss.backward()

                # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
                torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
                for p in model.parameters():
                    p.data.add_(-lr, p.grad.data)

                it.log(loss=loss.item())
                total_loss += loss.item()

                # if batch % log_interval == 0 and batch > 0:
                #     cur_loss = total_loss / log_interval
                #     elapsed = time.time() - start_time
                #     print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                #             'loss {:5.2f} | ppl {:8.2f}'.format(
                #         epoch, batch, len(train_data) // bptt, lr,
                #         elapsed * 1000 / log_interval, cur_loss, math.exp(cur_loss)))
                #     total_loss = 0
                #     start_time = time.time()

    def export_onnx(path, batch_size, seq_len):
        print('The model is also exported in ONNX format at {}'.format(
            os.path.realpath(onnx_export)))
        model.eval()
        dummy_input = torch.LongTensor(seq_len * batch_size).zero_().view(
            -1, batch_size).to(device)
        hidden = model.init_hidden(batch_size)
        torch.onnx.export(model, (dummy_input, hidden), path)

    # Loop over epochs.
    best_val_loss = None

    chrono = exp.chronos.create(
        "train",
        type="rate",
        sync=torch_settings.sync,
        sample_duration=sample_duration,
        max_count=max_count,
    )

    while not chrono.done():
        train(chrono)
        val_loss = evaluate(val_data)

        exp.metrics["val_loss"] = val_loss

        # Save the model if the validation loss is the best we've seen so far.
        if not best_val_loss or val_loss < best_val_loss:
            best_val_loss = val_loss
        else:
            # Anneal the learning rate if no improvement has been seen in the validation dataset.
            lr /= 4.0

    # Run on test data.
    test_loss = evaluate(test_data)
    print('=' * 89)
    print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
        test_loss, math.exp(test_loss)))
    print('=' * 89)
Example #11
0
def main(exp):
    torch_settings = init_torch()

    # Degree of the polynomial
    poly_degree: Argument & int = default(4)

    # Number of examples per batch
    batch_size: Argument & int = default(64)

    torch_settings = init_torch()
    device = torch_settings.device

    W_target = torch.randn(poly_degree, 1) * 5
    b_target = torch.randn(1) * 5

    def make_features(x):
        """Builds features i.e. a matrix with columns [x, x^2, x^3, x^4]."""
        x = x.unsqueeze(1)
        return torch.cat([x**i for i in range(1, poly_degree + 1)], 1)

    def f(x):
        """Approximated function."""
        return x.mm(W_target) + b_target.item()

    def poly_desc(W, b):
        """Creates a string description of a polynomial."""
        result = 'y = '
        for i, w in enumerate(W):
            result += '{:+.2f} x^{} '.format(w, len(W) - i)
        result += '{:+.2f}'.format(b[0])
        return result

    def get_batch():
        """Builds a batch i.e. (x, f(x)) pair."""
        random = torch.randn(batch_size)
        x = make_features(random)
        y = f(x)
        return x, y

    def dataset():
        while True:
            yield get_batch()

    # Define model
    fc = torch.nn.Linear(W_target.size(0), 1)
    fc.to(device)

    wrapper = iteration_wrapper(exp, sync=torch_settings.sync)

    for it, (batch_x, batch_y) in dataloop(dataset(), wrapper=wrapper):
        it.set_count(batch_size)

        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)

        # Reset gradients
        fc.zero_grad()

        # Forward pass
        output = F.smooth_l1_loss(fc(batch_x), batch_y)
        loss = output.item()

        it.log(loss=loss)

        # Backward pass
        output.backward()

        # Apply gradients
        for param in fc.parameters():
            param.data.add_(-0.01 * param.grad.data)

    print('==> Learned function:\t', poly_desc(fc.weight.view(-1), fc.bias))
    print('==> Actual function:\t', poly_desc(W_target.view(-1), b_target))
Example #12
0
def main(exp):
    models = [
        'alexnet', 'vgg11', 'vgg13', 'vgg16', 'vgg19', 'vgg11_bn', 'vgg13_bn',
        'vgg16_bn', 'vgg19_bn', 'resnet18', 'resnet34', 'resnet50',
        'resnet101', 'resnet152', 'shufflenet', 'shufflenet_v2_x05',
        'shufflenet_v2_x10', 'shufflenet_v2_x15', 'SqueezeNet',
        'SqueezeNet1.1', 'densenet121', 'densenet169', 'densenet201',
        'densenet161', 'inception', 'inception_v3', 'resnext50', 'resnext101',
        'mobilenet_v2', 'googlenet', 'deeplabv3_resnet50',
        'deeplabv3_resnet101', 'fcn_resnet50', 'fcn_resnet101'
    ]

    # Network to run.
    network: Argument & str

    # Batch size (will be split among devices used by this invocation)
    batch_size: Argument & int = default(64)

    # FP16 mixed precision benchmarking
    fp16: Argument & int = default(0)

    # Use torch.nn.DataParallel api to run single process on multiple devices. Use only one of --dataparallel or --distributed_dataparallel
    dataparallel: Argument & bool = default(False)

    # Use torch.nn.parallel.DistributedDataParallel api to run on multiple processes/nodes. The multiple processes need to be launched manually, this script will only launch ONE process per invocation. Use only one of --dataparallel or --distributed_dataparallel
    distributed_dataparallel: Argument & bool = default(False)

    # Comma-separated list (no spaces) to specify which HIP devices (0-indexed) to run dataparallel or distributedDataParallel api on. Might need to use HIP_VISIBLE_DEVICES to limit visiblity of devices to different processes.
    device_ids: Argument & str = default(None)

    # Rank of this process. Required for --distributed_dataparallel
    rank: Argument & int = default(None)

    # Total number of ranks/processes. Required for --distributed_dataparallel
    world_size: Argument & int = default(None)

    # Backend used for distributed training. Can be one of 'nccl' or 'gloo'. Required for --distributed_dataparallel
    dist_backend: Argument & str = default(None)

    # url used for rendezvous of processes in distributed training. Needs to contain IP and open port of master rank0 eg. 'tcp://172.23.2.1:54321'. Required for --distributed_dataparallel
    dist_url: Argument & str = default(None)

    torch_settings = init_torch()

    if device_ids:
        device_ids_values = [int(x) for x in device_ids.split(",")]
    else:
        device_ids_values = None

    distributed_parameters = dict()
    distributed_parameters['rank'] = rank
    distributed_parameters['world_size'] = world_size
    distributed_parameters['dist_backend'] = dist_backend
    distributed_parameters['dist_url'] = dist_url

    # Some arguments are required for distributed_dataparallel
    if distributed_dataparallel:
        assert rank is not None and \
               world_size is not None and \
               dist_backend is not None and \
               dist_url is not None, "rank, world-size, dist-backend and dist-url are required arguments for distributed_dataparallel"

    wrapper = iteration_wrapper(exp, sync=torch_settings.sync)

    run_benchmarking(exp, wrapper, network, batch_size, fp16, dataparallel,
                     distributed_dataparallel, device_ids_values,
                     distributed_parameters)
Example #13
0
def main(exp):

    # Batch size
    batch_size: Argument & int = default(256)

    # Dataset to use
    dataset: Argument

    torch_settings = init_torch()
    device = torch_settings.device
    dataset = exp.get_dataset(dataset)

    kwargs = {
        'num_workers': 1,
        'pin_memory': True
    } if torch_settings.cuda else {}
    train_loader = torch.utils.data.DataLoader(
        dataset.train,
        batch_size=batch_size,
        shuffle=True,
        **kwargs,
    )
    test_loader = torch.utils.data.DataLoader(
        dataset.test,
        batch_size=batch_size,
        shuffle=True,
        **kwargs,
    )

    model = VAE().to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    # Reconstruction + KL divergence losses summed over all elements and batch
    def loss_function(recon_x, x, mu, logvar):
        BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784), reduction='sum')

        # see Appendix B from VAE paper:
        # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
        # https://arxiv.org/abs/1312.6114
        # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
        KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

        return BCE + KLD

    def test(epoch):
        # Not tested
        model.eval()
        test_loss = 0

        with torch.no_grad():

            for i, (data, _) in enumerate(test_loader):
                data = data.to(device)
                recon_batch, mu, logvar = model(data)
                test_loss += loss_function(recon_batch, data, mu,
                                           logvar).item()

                if i == 0:
                    n = min(data.size(0), 8)
                    comparison = torch.cat([
                        data[:n],
                        recon_batch.view(batch_size, 1, 28, 28)[:n]
                    ])
                    save_image(comparison.cpu(),
                               'results/reconstruction_' + str(epoch) + '.png',
                               nrow=n)

        test_loss /= len(test_loader.dataset)
        print('====> Test set loss: {:.4f}'.format(test_loss))

    model.train()

    wrapper = iteration_wrapper(exp, sync=torch_settings.sync)

    for it, (data, target) in dataloop(train_loader, wrapper=wrapper):
        it.set_count(len(data))

        data = data.to(device)
        optimizer.zero_grad()
        recon_batch, mu, logvar = model(data)
        loss = loss_function(recon_batch, data, mu, logvar)
        loss.backward()
        it.log(loss=loss.item())
        optimizer.step()
Example #14
0
def main(exp):

    # Algorithm to use: a2c | ppo | acktr
    algorithm: Argument = default("a2c")

    # Gail epochs (default: 5)
    gail_epoch: Argument & int = default(5)

    # Learning rate (default: 7e-4)
    lr: Argument & float = default(7e-4)

    # Directory that contains expert demonstrations for gail
    gail_experts_dir: Argument = default("./gail_experts")

    # Gail batch size (default: 128)
    gail_batch_size: Argument & int = default(128)

    # Do imitation learning with gail
    gail: Argument & bool = default(False)

    # RMSprop optimizer epsilon (default: 1e-5)
    eps: Argument & float = default(1e-5)

    # RMSprop optimizer apha (default: 0.99)
    alpha: Argument & float = default(0.99)

    # discount factor for rewards (default: 0.99)
    gamma: Argument & float = default(0.99)

    # use generalized advantage estimation
    use_gae: Argument & bool = default(False)

    # gae lambda parameter (default: 0.95)
    gae_lambda: Argument & float = default(0.95)

    # entropy term coefficient (default: 0.01)
    entropy_coef: Argument & float = default(0.01)

    # value loss coefficient (default: 0.5)
    value_loss_coef: Argument & float = default(0.5)

    # max norm of gradients (default: 0.5)
    max_grad_norm: Argument & float = default(0.5)

    # sets flags for determinism when using CUDA (potentially slow!)
    cuda_deterministic: Argument & bool = default(False)

    # how many training CPU processes to use (default: 16)
    num_processes: Argument & int = default(16)

    # number of forward steps in A2C (default: 5)
    num_steps: Argument & int = default(5)

    # number of ppo epochs (default: 4)
    ppo_epoch: Argument & int = default(4)

    # number of batches for ppo (default: 32)
    num_mini_batch: Argument & int = default(32)

    # ppo clip parameter (default: 0.2)
    clip_param: Argument & float = default(0.2)

    # # log interval, one log per n updates (default: 10)
    # log_interval: Argument & int = default(10)

    # # save interval, one save per n updates (default: 100)
    # save_interval: Argument & int = default(100)

    # # eval interval, one eval per n updates (default: None)
    # eval_interval: Argument & int = default(None)

    # number of environment steps to train (default: 10e6)
    num_env_steps: Argument & int = default(10e6)

    # environment to train on (default: PongNoFrameskip-v4)
    env_name: Argument = default('PongNoFrameskip-v4')

    # directory to save agent logs (default: /tmp/gym)
    log_dir: Argument = default(None)

    # directory to save agent logs (default: ./trained_models/)
    save_dir: Argument = default('./trained_models/')

    # compute returns taking into account time limits
    use_proper_time_limits: Argument & bool = default(False)

    # use a recurrent policy
    recurrent_policy: Argument & bool = default(False)

    # use a linear schedule on the learning rate')
    use_linear_lr_decay: Argument & bool = default(False)

    # Seed to use
    seed: Argument & int = default(1234)

    # Number of iterations
    iterations: Argument & int = default(10)

    # we compute steps/sec
    batch_size = num_processes

    torch_settings = init_torch()
    device = torch_settings.device

    assert algorithm in ['a2c', 'ppo', 'acktr']

    if recurrent_policy:
        assert algorithm in ['a2c', 'ppo'], \
            'Recurrent policy is not implemented for ACKTR'

    num_updates = int(num_env_steps) // num_steps // num_processes

    envs = make_vec_envs(env_name, seed, num_processes, gamma, log_dir, device,
                         False)

    actor_critic = Policy(envs.observation_space.shape,
                          envs.action_space,
                          base_kwargs={'recurrent': recurrent_policy})
    actor_critic.to(device)

    if algorithm == 'a2c':
        agent = algo.A2C_ACKTR(actor_critic,
                               value_loss_coef,
                               entropy_coef,
                               lr=lr,
                               eps=eps,
                               alpha=alpha,
                               max_grad_norm=max_grad_norm)
    elif algorithm == 'ppo':
        agent = algo.PPO(actor_critic,
                         clip_param,
                         ppo_epoch,
                         num_mini_batch,
                         value_loss_coef,
                         entropy_coef,
                         lr=lr,
                         eps=eps,
                         max_grad_norm=max_grad_norm)
    elif algorithm == 'acktr':
        agent = algo.A2C_ACKTR(actor_critic,
                               value_loss_coef,
                               entropy_coef,
                               acktr=True)

    rollouts = RolloutStorage(num_steps, num_processes,
                              envs.observation_space.shape, envs.action_space,
                              actor_critic.recurrent_hidden_state_size)
    obs = envs.reset()
    rollouts.obs[0].copy_(obs)
    rollouts.to(device)

    episode_rewards = deque(maxlen=10)

    start = time.time()
    num_updates = int(num_env_steps) // num_steps // num_processes

    wrapper = iteration_wrapper(exp, sync=torch_settings.sync)

    for it, j in dataloop(count(), wrapper=wrapper):
        it.set_count(batch_size)

        if use_linear_lr_decay:
            utils.update_linear_schedule(
                agent.optimizer, j, num_updates,
                agent.optimizer.lr if algorithm == "acktr" else lr)

        for step in range(num_steps):
            # Sample actions
            with torch.no_grad():
                value, action, action_log_prob, recurrent_hidden_states = actor_critic.act(
                    rollouts.obs[step], rollouts.recurrent_hidden_states[step],
                    rollouts.masks[step])

            # Obser reward and next obs
            obs, reward, done, infos = envs.step(action)

            for info in infos:
                if 'episode' in info.keys():
                    episode_rewards.append(info['episode']['r'])

            # If done then clean the history of observations.
            masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                       for done_ in done])
            bad_masks = torch.FloatTensor(
                [[0.0] if 'bad_transition' in info.keys() else [1.0]
                 for info in infos])

            rollouts.insert(obs, recurrent_hidden_states, action,
                            action_log_prob, value, reward, masks, bad_masks)

        with torch.no_grad():
            next_value = actor_critic.get_value(
                rollouts.obs[-1], rollouts.recurrent_hidden_states[-1],
                rollouts.masks[-1]).detach()
        # ---
        rollouts.compute_returns(next_value, use_gae, gamma, gae_lambda,
                                 use_proper_time_limits)

        value_loss, action_loss, dist_entropy = agent.update(rollouts)

        it.log(
            value_loss=value_loss,
            action_loss=action_loss,
        )

        rollouts.after_update()

        total_num_steps = (j + 1) * num_processes * num_steps

        # if j % log_interval == 0 and len(episode_rewards) > 1:
        #     end = time.time()
        #     print(
        #         "Updates {}, num timesteps {}, FPS {} \n Last {} training episodes: mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}\n".
        #         format(j, total_num_steps,
        #             int(total_num_steps / (end - start)),
        #             len(episode_rewards),
        #             np.mean(episode_rewards),
        #             np.median(episode_rewards),
        #             np.min(episode_rewards),
        #             np.max(episode_rewards), dist_entropy,
        #             value_loss, action_loss))
    envs.close()