def _get_random_data(n, **tkwargs):
    train_x1 = torch.linspace(0, 0.95, n + 1, **tkwargs) + 0.05 * torch.rand(
        n + 1, **tkwargs
    )
    train_x2 = torch.linspace(0, 0.95, n, **tkwargs) + 0.05 * torch.rand(n, **tkwargs)
    train_y1 = torch.sin(train_x1 * (2 * math.pi)) + 0.2 * torch.randn_like(train_x1)
    train_y2 = torch.cos(train_x2 * (2 * math.pi)) + 0.2 * torch.randn_like(train_x2)
    return train_x1.unsqueeze(-1), train_x2.unsqueeze(-1), train_y1, train_y2
Ejemplo n.º 2
0
def _get_random_mt_data(**tkwargs):
    train_x = torch.linspace(0, 0.95, 10, **tkwargs) + 0.05 * torch.rand(10, **tkwargs)
    train_y1 = torch.sin(train_x * (2 * math.pi)) + torch.randn_like(train_x) * 0.2
    train_y2 = torch.cos(train_x * (2 * math.pi)) + torch.randn_like(train_x) * 0.2
    train_i_task1 = torch.full_like(train_x, dtype=torch.long, fill_value=0)
    train_i_task2 = torch.full_like(train_x, dtype=torch.long, fill_value=1)
    full_train_x = torch.cat([train_x, train_x])
    full_train_i = torch.cat([train_i_task1, train_i_task2])
    full_train_y = torch.cat([train_y1, train_y2])
    train_X = torch.stack([full_train_x, full_train_i.type_as(full_train_x)], dim=-1)
    train_Y = full_train_y
    return train_X, train_Y
Ejemplo n.º 3
0
def bisect_demo():
    """ Bisect the LB/UB on specified columns.
        The key is to use scatter_() to convert indices into one-hot encodings.
    """
    t1t2 = torch.stack((torch.randn(5, 4), torch.randn(5, 4)), dim=-1)
    lb, _ = torch.min(t1t2, dim=-1)
    ub, _ = torch.max(t1t2, dim=-1)
    print('LB:', lb)
    print('UB:', ub)

    # random idxs for testing
    idxs = torch.randn_like(lb)
    _, idxs = idxs.max(dim=-1)  # <Batch>
    print('Split idxs:', idxs)

    idxs = idxs.unsqueeze(dim=-1)  # Batch x 1
    idxs = torch.zeros_like(lb).byte().scatter_(-1, idxs, 1)  # convert into one-hot encoding
    print('Reorg idxs:', idxs)

    mid = (lb + ub) / 2.0
    lefts_lb = lb
    lefts_ub = torch.where(idxs, mid, ub)  # use the one-hot encoding to call torch.where()
    rights_lb = torch.where(idxs, mid, lb)  # definitely faster than element-wise reassignment
    rights_ub = ub

    print('LEFT LB:', lefts_lb)
    print('LEFT UB:', lefts_ub)
    print('RIGHT LB:', rights_lb)
    print('RIGHT UB:', rights_ub)

    newlb = torch.cat((lefts_lb, rights_lb), dim=0)
    newub = torch.cat((lefts_ub, rights_ub), dim=0)
    return newlb, newub
Ejemplo n.º 4
0
 def reparameterize(self, mu, logvar):
     if self.training:
         std = torch.exp(0.5*logvar)
         eps = torch.randn_like(std)
         return eps.mul(std).add_(mu)
     else:
         return mu
Ejemplo n.º 5
0
def varlen_lstm_backward_setup(forward_output, seed=None):
    if seed:
        torch.manual_seed(seed)
    rnn_utils = torch.nn.utils.rnn
    sequences = forward_output[0]
    padded = rnn_utils.pad_sequence(sequences)
    grad = torch.randn_like(padded)
    return padded, grad
Ejemplo n.º 6
0
    def forward(self, x): # pylint: disable=arguments-differ
        mu, logsigma = self.encoder(x)
        sigma = logsigma.exp()
        eps = torch.randn_like(sigma)
        z = eps.mul(sigma).add_(mu)

        recon_x = self.decoder(z)
        return recon_x, mu, logsigma
Ejemplo n.º 7
0
def to_latent(obs, next_obs):
    """ Transform observations to latent space.

    :args obs: 5D torch tensor (BSIZE, SEQ_LEN, ASIZE, SIZE, SIZE)
    :args next_obs: 5D torch tensor (BSIZE, SEQ_LEN, ASIZE, SIZE, SIZE)

    :returns: (latent_obs, latent_next_obs)
        - latent_obs: 4D torch tensor (BSIZE, SEQ_LEN, LSIZE)
        - next_latent_obs: 4D torch tensor (BSIZE, SEQ_LEN, LSIZE)
    """
    with torch.no_grad():
        obs, next_obs = [
            f.upsample(x.view(-1, 3, SIZE, SIZE), size=RED_SIZE,
                       mode='bilinear', align_corners=True)
            for x in (obs, next_obs)]

        (obs_mu, obs_logsigma), (next_obs_mu, next_obs_logsigma) = [
            vae(x)[1:] for x in (obs, next_obs)]

        latent_obs, latent_next_obs = [
            (x_mu + x_logsigma.exp() * torch.randn_like(x_mu)).view(BSIZE, SEQ_LEN, LSIZE)
            for x_mu, x_logsigma in
            [(obs_mu, obs_logsigma), (next_obs_mu, next_obs_logsigma)]]
    return latent_obs, latent_next_obs
def guide(batch, tag, hidden, label):
    softplus = torch.nn.Softplus()

    # embedding weight distribution priors
    embedding_mu = torch.randn_like(net.embedding.weight)
    embedding_sigma = torch.randn_like(net.embedding.weight)
    embedding_mu_param = pyro.param("embedding_mu", embedding_mu)
    embedding_sigma_param = softplus(
        pyro.param("embedding_sigma", embedding_sigma))
    embedding_prior = Normal(loc=embedding_mu_param,
                             scale=embedding_sigma_param)
    # gru input-hidden weight distribution priors
    gruihw_mu = torch.randn_like(net.gru.weight_ih_l0)
    gruihw_sigma = torch.randn_like(net.gru.weight_ih_l0)
    gruihw_mu_param = pyro.param("gruihw_mu", gruihw_mu)
    gruihw_sigma_param = softplus(pyro.param("gruihw_sigma", gruihw_sigma))
    gruihw_prior = Normal(loc=gruihw_mu_param, scale=gruihw_sigma_param)
    # gru input-hidden bias distribution priors
    gruihb_mu = torch.randn_like(net.gru.bias_ih_l0)
    gruihb_sigma = torch.randn_like(net.gru.bias_ih_l0)
    gruihb_mu_param = pyro.param("gruihb_mu", gruihb_mu)
    gruihb_sigma_param = softplus(pyro.param("gruihb_sigma", gruihb_sigma))
    gruihb_prior = Normal(loc=gruihb_mu_param, scale=gruihb_sigma_param)
    # gru hidden-hidden weight distribution priors
    gruhhw_mu = torch.randn_like(net.gru.weight_hh_l0)
    gruhhw_sigma = torch.randn_like(net.gru.weight_hh_l0)
    gruhhw_mu_param = pyro.param("gruhhw_mu", gruhhw_mu)
    gruhhw_sigma_param = softplus(pyro.param("gruhhw_sigma", gruhhw_sigma))
    gruhhw_prior = Normal(loc=gruhhw_mu_param, scale=gruhhw_sigma_param)
    # gru hidden-hidden bias distribution priors
    gruhhb_mu = torch.randn_like(net.gru.bias_hh_l0)
    gruhhb_sigma = torch.randn_like(net.gru.bias_hh_l0)
    gruhhb_mu_param = pyro.param("gruhhb_mu", gruhhb_mu)
    gruhhb_sigma_param = softplus(pyro.param("gruhhb_sigma", gruhhb_sigma))
    gruhhb_prior = Normal(loc=gruhhb_mu_param, scale=gruhhb_sigma_param)
    # first fully connected layer weight distribution priors
    fc1w_mu = torch.randn_like(net.fc1.weight)
    fc1w_sigma = torch.randn_like(net.fc1.weight)
    fc1w_mu_param = pyro.param("fc1w_mu", fc1w_mu)
    fc1w_sigma_param = softplus(pyro.param("fc1w_sigma", fc1w_sigma))
    fc1w_prior = Normal(loc=fc1w_mu_param, scale=fc1w_sigma_param)
    # first fully connected layer bias distribution priors
    fc1b_mu = torch.randn_like(net.fc1.bias)
    fc1b_sigma = torch.randn_like(net.fc1.bias)
    fc1b_mu_param = pyro.param("fc1b_mu", fc1b_mu)
    fc1b_sigma_param = softplus(pyro.param("fc1b_sigma", fc1b_sigma))
    fc1b_prior = Normal(loc=fc1b_mu_param, scale=fc1b_sigma_param)
    # second fully connected layer weight distribution priors
    fc2w_mu = torch.randn_like(net.fc2.weight)
    fc2w_sigma = torch.randn_like(net.fc2.weight)
    fc2w_mu_param = pyro.param("fc2w_mu", fc2w_mu)
    fc2w_sigma_param = softplus(pyro.param("fc2w_sigma", fc2w_sigma))
    fc2w_prior = Normal(loc=fc2w_mu_param, scale=fc2w_sigma_param)
    # Output layer bias distribution priors
    fc2b_mu = torch.randn_like(net.fc2.bias)
    fc2b_sigma = torch.randn_like(net.fc2.bias)
    fc2b_mu_param = pyro.param("fc2b_mu", fc2b_mu)
    fc2b_sigma_param = softplus(pyro.param("fc2b_sigma", fc2b_sigma))
    fc2b_prior = Normal(loc=fc2b_mu_param, scale=fc2b_sigma_param)

    priors = {
        'embedding.weight': embedding_prior,
        'gru.weight_ih_l0': gruihw_prior,
        'gru.bias_ih_l0': gruihb_prior,
        'gru.weight_hh_l0': gruhhw_prior,
        'gru.bias_hh_l0': gruhhb_prior,
        'fc1.weight': fc1w_prior,
        'fc1.bias': fc1b_prior,
        'fc2.weight': fc2w_prior,
        'fc2.bias': fc2b_prior
    }

    lifted_module = pyro.random_module("module", net, priors)

    return lifted_module()
Ejemplo n.º 9
0
 def reparameterize(self, mu, log_var):
     std = torch.exp(log_var/2)
     eps = torch.randn_like(std)
     return mu + eps * std
Ejemplo n.º 10
0
 def sample_prediction(self, x):
     mu, sigma = self(x)
     eps = torch.randn_like(sigma)
     return mu + sigma * eps
Ejemplo n.º 11
0
 def reparameterize(mu, logvar):
     std = torch.exp(0.5 * logvar)
     eps = torch.randn_like(mu)
     return mu + eps * std
Ejemplo n.º 12
0
 def reparameterize(self, mu, log_var):
     std = torch.exp(log_var/2)
     eps = torch.randn_like(std)
     z = mu + eps * std
     return z
Ejemplo n.º 13
0
def reparameterize(mean, logvar):
    std = torch.exp(0.5 * logvar)
    eps = torch.randn_like(std)
    return mean + eps * std
Ejemplo n.º 14
0
 def reparameterize(self, mu, logvar):
     std = torch.exp(0.5 * logvar)
     eps = torch.randn_like(std)
     return eps.mul(std).add_(mu)
Ejemplo n.º 15
0
def calc_msssim(n_images, model, checkpoint):
    '''calculate image diversity with ms-ssim (multi-scale structural similarity) metric and reconstruction quality with rmse.
       reconstuctions (x_r), samples (x_p).

    Args:
        n_images (int) : number of images to generate to compare against real
        model (model) : vae model to encode and decode image tensors
        checkpoint (str) : stem for checkpoint file to load

    Returns:
        avg_msssim (float) : average ms-ssim score for generated images (smaller is more diverse)
        avg_rmse (float) : average root mean square error for recon to real (smaller is more accurate)
    '''

    # dataloader
    # normalizes to [0-1]
    trans = transforms.ToTensor()
    dataset = datasets.CelebA(transforms=trans)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

    # load model
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    utils.load_checkpoint(f'checkpoints/{checkpoint}.pt', model)
    model = model.to(device)

    # loop over loader and calc ms-ssim and rmse for each batch until hits n_images
    image_count = 0
    m_scores = []
    rmse_scores = []

    while image_count < n_images:
        for images in dataloader:

            images = images.to(device)

            # real image recon
            z_mu, z_log_var = model.encoder(images)
            z = model.reparameterize(z_mu, z_log_var)
            x_r = model.decoder(z)

            # calc rmse
            # sum of all pixel loss per image
            rmse = torch.sqrt(F.mse_loss(x_r, images,
                                         reduction='sum')) / len(images)
            rmse_scores.append(rmse.item())

            # sampled
            z_p = torch.randn_like(z)
            x_p = model.decoder(z_p)

            # calc ms-ssim from recon to sampled
            # good model will have low similarity between recon and sampled, indicates not memorizing
            m = ms_ssim(x_r,
                        x_p,
                        data_range=1.0,
                        size_average=True,
                        win_size=7)
            m_scores.append(m.item())

            # up counter
            image_count += len(images)

    # averaged results
    avg_msssim = sum(m_scores) / len(m_scores)
    avg_rmse = sum(rmse_scores) / len(rmse_scores)

    # write
    output = {
        'checkpoint': checkpoint,
        'ms_ssim': avg_msssim,
        'avg_rmse': avg_rmse
    }

    Path(f'assets/{checkpoint}.yaml').write_text(yaml.dump(output))
def optimize_network(args, model, y, mask, mode, **kwargs):
    assert mode in ['train', 'test']
    print(y.shape)

    # load appropriate hyper-parameters
    if mode == 'train':
        n_epochs = args['n_train_epochs']
        n_epochs = 4000
        batch_size = args['batch_size']
        param_init = args['latent_param_init']

    elif mode == 'test':
        n_epochs = args['n_test_epochs']
        if args.get('test_batch_size') is not None:
            batch_size = args['test_batch_size']
        else:
            batch_size = args['batch_size']
        param_init = args['test_latent_param_init']
        #n_epochs = 1
        #batch_size = 50

    print(f"Mode: {mode}")
    print(f"Batch size: {batch_size}")

    n_points = y.size()[0]

    # initialize latent variables
    if param_init == 'pca':
        pca = PCA(model.latent_size)
        pca.fit(y.cpu())

        latents = torch.tensor(pca.explained_variance_ratio_,
                               dtype=torch.float,
                               device=args['device'])
        latents = latents.repeat(n_points, 1)
        print(latents.size())

    elif param_init == 'train':
        assert mode != 'train'

        print("Initializing latents using training latents as mean",
              file=sys.stderr)
        train_latents = kwargs['train_latents']
        train_means = torch.mean(train_latents, 0)
        train_std = torch.std(train_latents, 0)
        latents = torch.tensor(np.random.normal(train_means,
                                                train_std,
                                                size=(n_points,
                                                      model.latent_size)),
                               device=args['device'])

    else:
        latents = model.init_latents(n_points, args['device'], param_init)

    # latent parameters to update
    latents.requires_grad = True
    latent_params = [latents]
    if args['model'] == 'vae_free':
        # randomly init log_var
        latent_log_var = torch.randn_like(latents, device=args['device'])
        latent_log_var.requires_grad = True
        latent_params.append(latent_log_var)

    epoch = 0
    if mode == 'test':
        # freeze the network weights
        model.freeze_hiddens()

    if mode == 'train':
        lr = args['net_lr']
        latent_lr = args['latent_param_lr']
        if args['use_adam']:
            net_optimizer = optim.Adam(model.parameters(), lr=lr)
            latent_optimizer = optim.Adam(latent_params, lr=latent_lr)
        else:
            net_optimizer = optim.SGD(model.parameters(), lr=lr)
            latent_optimizer = optim.SGD(latent_params, lr=latent_lr)
        # for reduce lr on plateau
        net_scheduler = optim.lr_scheduler.ReduceLROnPlateau(net_optimizer,
                                                             mode='min',
                                                             factor=0.5,
                                                             patience=10,
                                                             verbose=True)
        latent_scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            latent_optimizer,
            mode='min',
            factor=0.5,
            patience=10,
            verbose=True)

        optimizers = [net_optimizer, latent_optimizer]
        schedulers = [net_scheduler, latent_scheduler]
        print(f"Optimizer: {net_optimizer}, {latent_optimizer}",
              file=sys.stderr)

    elif mode == 'test':
        latent_lr = args['test_latent_param_lr']

        if args['use_adam']:
            optimizer = optim.Adam(latent_params, lr=latent_lr)
        else:
            optimizer = optim.SGD(latent_params, lr=latent_lr)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                         mode='min',
                                                         factor=0.5,
                                                         patience=10,
                                                         verbose=True)

        optimizers = [optimizer]
        schedulers = [scheduler]
        print(f"Test optimizer: {optimizer}", file=sys.stderr)

    # start optimization loop
    start_time = time.time()
    losses = []

    while True:
        epoch += 1

        order = np.random.permutation(n_points)
        cumu_loss = 0
        cumu_total_loss = 0
        cumu_kl_loss = 0
        #
        n_batches = n_points // batch_size
        # model.set_verbose(False)
        for i in range(n_batches):
            # model.zero_grad()
            for op in optimizers:
                op.zero_grad()
            # net_optimizer.zero_grad()
            # latent_optimizer.zero_grad()

            idxes = order[i * batch_size:(i + 1) * batch_size]

            if args['model'] == 'vae_free':
                pred_y = model(latents[idxes], latent_log_var[idxes])
            elif args['sfm_transform']:
                pred_y, transform_mat = model(latents[idxes])
            else:
                pred_y = model(latents[idxes])
            # model.set_verbose(False)

            masked_train = y[idxes] * mask[idxes]

            # loss with masking
            loss = torch_mse_mask(y[idxes], pred_y, mask[idxes])

            if args['kl']:
                if args['model'] == 'vae':
                    z_var = torch.full_like(latents[idxes], args['log_var'])
                    kl_loss = 0.5 * torch.sum(
                        torch.exp(z_var) + latents[idxes]**2 - 1. -
                        z_var) / batch_size
                    total_loss = loss + args['ratio_kl'] * kl_loss

                elif args['model'] == 'vae_free':
                    kl_loss = 0.5 * torch.sum(
                        torch.exp(latent_log_var[idxes]) + latents[idxes]**2 -
                        1. - latent_log_var[idxes])
                    kl_loss /= batch_size
                    total_loss = loss + args['ratio_kl'] * kl_loss

                else:
                    raise NotImplementedError
            else:
                kl_loss = 0.
                total_loss = loss

            # loss = loss_fn(pred_y, train_y[idxes]
            # loss *= train_mask[idxes]
            cumu_total_loss += float(total_loss)
            cumu_loss += float(loss)
            cumu_kl_loss += float(kl_loss)

            total_loss.backward()
            for op in optimizers:
                op.step()
            # net_optimizer.step()
            # latent_optimizer.step()

        curr_time = time.time() - start_time
        avg_loss = cumu_loss / n_batches

        avg_kl_loss = cumu_kl_loss / n_batches
        avg_total_loss = cumu_total_loss / n_batches

        print(
            "Epoch {} - Average loss: {:.6f}, Cumulative loss: {:.6f}, KL loss: {:.6f}, Average total loss: {:.6f} ({:.2f} s)"
            .format(epoch, avg_loss, cumu_loss, avg_kl_loss, avg_total_loss,
                    curr_time),
            file=sys.stderr)
        losses.append(
            [float(avg_loss),
             float(avg_kl_loss),
             float(avg_total_loss)])

        # early stopping etc.
        if epoch >= n_epochs:
            print("Max number of epochs reached!", file=sys.stderr)
            break

        if args.get('reduce', False):
            for sch in schedulers:
                sch.step(cumu_loss)
            # net_scheduler.step(cumu_loss)
            # latent_scheduler.step(cumu_loss)

        sys.stderr.flush()
        sys.stdout.flush()

    if mode == 'train':
        # return final latent variables, to possibly initialize during testing
        if args['model'] == 'vae_free':
            train_latents = latents, latent_log_var
        else:
            train_latents = latents
        return train_latents, losses

    elif mode == 'test':
        print("Final test loss: {}".format(losses[-1]), file=sys.stderr)

        # get final predictions to get loss wrt unmasked test data
        all_pred = []
        with torch.no_grad():
            idxes = np.arange(n_points)
            n_batches = math.ceil(n_points / batch_size)

            for i in range(n_batches):
                idx = idxes[i * batch_size:(i + 1) * batch_size]
                if args['model'] == 'vae_free':
                    pred_y = model(latents[idx], latent_log_var[idx])
                elif args['sfm_transform']:
                    pred_y, transform_mat = model(latents[idx])
                else:
                    pred_y = model(latents[idx])
                all_pred.append(pred_y)

        all_pred = torch.cat(all_pred, dim=0)

        if kwargs['clean_y'] is not None:
            clean_y = kwargs['clean_y']
            #final_test_loss = float(loss_fn(all_pred * test_mask, clean_y * test_mask))
            #final_clean_loss = float(loss_fn(all_pred, clean_y))
            final_test_loss = float(torch_mse_mask(clean_y, all_pred, mask))
            final_clean_loss = float(
                torch_mse_mask(clean_y, all_pred, torch.ones_like(all_pred)))
            print("Masked test loss: {}".format(final_test_loss),
                  file=sys.stderr)
            print("Clean test loss: {}".format(final_clean_loss),
                  file=sys.stderr)

            mse = torch.mean(torch.mean((all_pred - clean_y)**2, -1), -1)
            print("Manual calculation: {}".format(mse), file=sys.stderr)

        if args['model'] == 'vae_free':
            test_latents = latents, latent_log_var
        else:
            test_latents = latents

        return losses, (final_test_loss,
                        final_clean_loss), all_pred, test_latents
Ejemplo n.º 17
0
def mix_match(X, U, eval_net, K, T, alpha, mixup_mode, aug_factor):
    # X is labeled data of size BATCH_SIZE, and U is unlabeled data
    # X is list of tuples (data, label), and U is list of data
    # where data and label are of shape (C, D, H, W), numpy array. C of data is 1 and C of label is 2 (one hot)

    b = len(X)

    #step 1: Augmentation
    X_cap = [(augmentation(x[0], aug_factor), x[1])
             for x in X]  #shape unchanged
    #U_cap = [[augmentation(u, aug_factor) for i in range(K)] for u in U] #U_cap is a list (length b) of list (length K)
    U = torch.from_numpy(np.array(U))  #[b, 1, D, H, W]
    if GPU:
        U = U.cuda()
    U_cap = U.repeat(K, 1, 1, 1, 1)  #[K*b, 1, D, H, W]
    U_cap += torch.clamp(torch.randn_like(U_cap) * 0.1, -0.2, 0.2)  #augmented.

    #step 2: label guessing
    with torch.no_grad():
        Y_u = eval_net(U_cap)
        Y_u = F.softmax(Y_u, dim=1)

    guessed = torch.zeros(U.size()).repeat(1, 2, 1, 1,
                                           1)  #empty label [b, 2, D, H, W]
    if GPU:
        guessed = guessed.cuda()
    for i in range(K):
        guessed += Y_u[i * b:(i + 1) * b]
    guessed /= K

    #sharpening
    guessed = guessed**(1 / T)
    guessed = guessed / guessed.sum(dim=1, keepdim=True)
    guessed = guessed.repeat(K, 1, 1, 1, 1)
    guessed = guessed.detach().cpu().numpy()  #shape [b,2,D,H,W]

    U_cap = U_cap.detach().cpu().numpy()

    U_cap = list(zip(U_cap, guessed))

    ## Now we have X_cap ,list of (data, label) of length b, U_cap, list of (data, guessed_label) of length k*b

    #step 3: MixUp
    #original paper mathod

    x_mixup_mode, u_mixup_mode = mixup_mode[0], mixup_mode[1]

    W = X_cap + U_cap  #length = b+b*k
    random.shuffle(W)

    if x_mixup_mode == 'w':
        X_prime = [mix_up(X_cap[i], W[i], alpha) for i in range(b)]
    elif x_mixup_mode == 'x':
        idxs = np.random.permutation(range(b))
        X_prime = [mix_up(X_cap[i], X_cap[idxs[i]], alpha) for i in range(b)]
    elif x_mixup_mode == 'u':
        idxs = np.random.permutation(range(b * K))[:b]
        X_prime = [mix_up(X_cap[i], U_cap[idxs[i]], alpha) for i in range(b)]
    elif x_mixup_mode == '_':
        X_prime = X_cap
    else:
        raise ValueError('wrong mixup_mode')

    if u_mixup_mode == 'w':
        U_prime = [mix_up(U_cap[i], W[b + i], alpha) for i in range(b * K)]
    elif u_mixup_mode == 'x':
        idxs = np.random.permutation(range(b * K)) % b
        U_prime = [
            mix_up(U_cap[i], X_cap[idxs[i]], alpha) for i in range(b * K)
        ]
    elif u_mixup_mode == 'u':
        idxs = np.random.permutation(range(b * K))
        U_prime = [
            mix_up(U_cap[i], U_cap[idxs[i]], alpha) for i in range(b * K)
        ]
    elif u_mixup_mode == '_':
        U_prime = U_cap
    else:
        raise ValueError('wrong mixup_mode')

    #if DEBUG:
    #save_as_image(np.array([x[0] for x in U_prime]), f"../debug_output/u_prime_data")
    #save_as_image(np.array([x[1][[1], :, :, :] for x in U_prime]), f"../debug_output/u_prime_label")

    return X_prime, U_prime
Ejemplo n.º 18
0
""" This is getting started with PyTorch

    REF
    ---
    https://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html#sphx-glr-beginner-blitz-tensor-tutorial-py
"""
from __future__ import print_function
import torch

print("x----Construct an empty matrix----x")
x = torch.empty(5, 3)
print(x)

print("x----Construct a zero matrix----x")
x = torch.zeros(5, 3, dtype=torch.long)
print(x)

print("x----Construct a tensor from data----x")
x = torch.tensor([5.5, 3])
print(x)

print("x----Create a tensor from a tensor----x")
x = x.new_ones(5, 3, dtype=torch.double)  # new_* methods take in sizes
print(x)

x = torch.randn_like(x, dtype=torch.float)  # override dtype!
print(x)  # result has the same size

print("x----Get size----x")
print(x.size())
Ejemplo n.º 19
0
 def __call__(self, x):
     return x + torch.randn_like(x) * self.std
Ejemplo n.º 20
0
 def reparameterize(self, mu, logvar):  #done
     std = torch.exp(0.5*logvar)
     u = torch.randn_like(std)
     return mu + u*std
Ejemplo n.º 21
0
Archivo: agent.py Proyecto: Kavka1/RL
 def get_target_action(self, next_obs_batch: torch.tensor) -> torch.tensor:
     target_action = self.policy_target(next_obs_batch)
     noise = (torch.randn_like(target_action) * self.noise_std).clamp(
         -self.noise_clip, self.noise_clip)
     return target_action + noise
Ejemplo n.º 22
0
    def attack(self, model: nn.Module, inputs: torch.Tensor,
               labels_true: torch.Tensor) -> torch.Tensor:
        # gaussian = GaussianBlurConv(channels=3).to(DEVICE)
        batch_size = inputs.shape[0]
        delta = torch.zeros_like(inputs, requires_grad=True)

        # setup optimizer
        optimizer = optim.SGD([delta], lr=1, momentum=self.momentum)

        # for choosing best results
        best_loss = 1e4 * torch.ones(
            inputs.size(0), dtype=torch.float, device=self.device)
        best_delta = torch.zeros_like(inputs)
        for step in range(self.steps):
            if self.max_norm:
                delta.data.clamp_(-self.max_norm, self.max_norm)
                if self.quantize:
                    delta.data.mul_(self.levels -
                                    1).round_().div_(self.levels - 1)

            adv = inputs + delta
            div_adv = self.input_diversity(adv, low=self.low)
            logits = model(div_adv)

            ce_loss_true = F.cross_entropy(logits,
                                           labels_true,
                                           reduction='none')
            # ce_loss_target = F.cross_entropy(logits, labels_target, reduction='none')
            loss = self.loss_amp - ce_loss_true
            # if self.loss_type == 'psnr':
            #     psnrloss = self.psnr(delta)
            #     loss -= 0.05*psnrloss
            # if (step+1)%5 ==0:
            # print("step:", str(step),": loss = ",str(torch.mean(loss).item()),": lpipscore = ",str(psnrloss.item()),": ce_loss_true = ",str(torch.mean(ce_loss_true).item()))
            # if self.need_fid == True:
            #     fid = calculate_fid_given_paths(adv, inputs, DEVICE, 2048)
            #     loss += 0.5*fid
            # print(str(step)," fid:",str(fid.item()), "ce_loss_true",str(torch.mean(ce_loss_true).item()))
            # loss += max(lpipscore,0.2)*10*self.loss_amp
            is_better = loss < best_loss

            best_loss[is_better] = loss[is_better]
            best_delta[is_better] = delta.data[is_better]

            loss = torch.mean(loss)
            optimizer.zero_grad()
            loss.backward()
            # renorm gradient
            grad_norms = delta.grad.view(batch_size, -1).norm(p=2, dim=1)
            delta.grad.div_(grad_norms.view(-1, 1, 1, 1))

            # avoid nan or inf if gradient is 0
            if (grad_norms == 0).any():
                delta.grad[grad_norms == 0] = torch.randn_like(
                    delta.grad[grad_norms == 0])
            # if self.need_gaussianBlur:
            #     delta.data = gaussian(delta.data)
            optimizer.step()

            # avoid out of bound
            delta.data.add_(inputs)
            delta.data.clamp_(0, 1).sub_(inputs)
        if self.return_delta:
            return best_delta
        else:
            advs = inputs + best_delta
            return advs
Ejemplo n.º 23
0
 def sample(self):
     self.sampled = self.mean + self.stddev * torch.randn_like(self.mean)
    def calculate_loss(self, t1, t2, t3):
        ## Because the loss is based on variational inference, we need to
        ## draw samples from the variational distribution in order to estimate
        ## the loss function.

        ## sample a state at time t3
        t3_z_mu, t3_z_logsigma = self.b_to_z(self.b[:, t3, :])
        #t3_z_logsigma = torch.clamp(t3_z_logsigma, min = -20, max = 20)
        t3_z_epsilon = torch.randn_like(t3_z_mu)
        t3_z = t3_z_mu + torch.exp(t3_z_logsigma) * t3_z_epsilon

        ## sample a state at time t2 (see the reparametralization trick is used)
        t2_qs_z_mu, t2_qs_z_logsigma = self.infer_z(
            torch.cat(
                (t3_z.new_zeros(self.b[:, t2, :].size()), self.b[:, t2, :],
                 self.b[:, t3, :], t3_z.new_zeros(t3_z.size()), t3_z),
                dim=-1))
        #t2_qs_z_logsigma = torch.clamp(t2_qs_z_logsigma, min = -20, max = 20)
        t2_qs_z_epsilon = torch.randn_like(t2_qs_z_mu)
        t2_qs_z = t2_qs_z_mu + torch.exp(t2_qs_z_logsigma) * t2_qs_z_epsilon

        t2_z_mu, t2_z_logsigma = self.b_to_z(self.b[:, t2, :])
        #t2_z_logsigma = torch.clamp(t2_z_logsigma, min = -20, max = 20)
        t2_z_epsilon = torch.randn_like(t2_z_mu)
        t2_z = t2_z_mu + torch.exp(t2_z_logsigma) * t2_z_epsilon

        ## sample a state at time t1
        ## infer state at time t1 based on states at time t2
        t1_qs_z_mu, t1_qs_z_logsigma = self.infer_z(
            torch.cat((self.b[:, t1, :], self.b[:, t2, :], self.b[:, t3, :],
                       t2_z, t3_z),
                      dim=-1))
        #t1_qs_z_logsigma = torch.clamp(t1_qs_z_logsigma, min = -20, max = 20)
        t1_qs_z_epsilon = torch.randn_like(t1_qs_z_mu)
        t1_qs_z = t1_qs_z_mu + torch.exp(t1_qs_z_logsigma) * t1_qs_z_epsilon

        #### After sampling states z from the variational distribution, we can calculate
        #### the loss.

        ## state distribution at time t1 based on belief at time 1
        t1_pb_z_mu, t1_pb_z_logsigma = self.b_to_z(self.b[:, t1, :])
        #t1_pb_z_logsigma = torch.clamp(t1_pb_z_logsigma, min = -20, max = 20)

        ## state distribution at time t2 based on states at time t1 and state transition
        t2_t_z_mu, t2_t_z_logsigma = self.transition_z(t1_qs_z)
        #t2_t_z_logsigma = torch.clamp(t2_t_z_logsigma, min = -20, max = 20)

        ## state distribution at time t3 based on states at time t1, t2 and state transition
        t3_t_z_mu, t3_t_z_logsigma = self.transition_z(t2_qs_z)
        #t3_t_z_logsigma = torch.clamp(t3_t_z_logsigma, min = -20, max = 20)

        ## observation distribution at time t2 based on state at time t2
        t3_x_prob = self.z_to_x(t3_z).view(self.batch_size, -1)  #+ 1e-8
        t2_x_prob = self.z_to_x(t2_z).view(self.batch_size, -1)  #+ 1e-8

        #### start calculating the loss

        #### KL divergence between z distribution at time t1 based on variational distribution
        #### (inference model) and z distribution at time t1 based on belief.
        #### This divergence is between two normal distributions and it can be calculated analytically

        ## KL divergence between t1_l2_pb_z, and t1_l2_qs_z
        #loss = 0.5*torch.sum(((t1_pb_z_mu - t1_qs_z)/torch.exp(t1_pb_z_logsigma))**2,-1) + \
        #torch.sum(t1_pb_z_logsigma, -1) - torch.sum(t1_qs_z_logsigma, -1)

        loss = kl_div_gaussian(t1_qs_z_mu, t1_qs_z_logsigma, t1_pb_z_mu,
                               t1_pb_z_logsigma)  #.mean()
        a = kl_div_gaussian(t1_qs_z_mu, t1_qs_z_logsigma, t1_pb_z_mu,
                            t1_pb_z_logsigma)
        print("kl loss 1: ", a)

        #### The following four terms estimate the KL divergence between the z distribution at time t2
        #### based on variational distribution (inference model) and z distribution at time t2 based on transition.
        #### In contrast with the above KL divergence for z distribution at time t1, this KL divergence
        #### can not be calculated analytically because the transition distribution depends on z_t1, which is sampled
        #### after z_t2. Therefore, the KL divergence is estimated using samples

        ## state log probabilty at time t2 based on belief
        #loss += torch.sum(-0.5*t2_z_epsilon**2 - 0.5*t2_z_epsilon.new_tensor(2*np.pi) - t2_z_logsigma, dim = -1)

        loss += kl_div_gaussian(t2_qs_z_mu, t2_qs_z_logsigma, t2_t_z_mu,
                                t2_t_z_logsigma)  #.mean()
        a = kl_div_gaussian(t2_qs_z_mu, t2_qs_z_logsigma, t2_t_z_mu,
                            t2_t_z_logsigma)
        print("kl loss 2: ", a)

        ## state log probabilty at time t2 based on transition
        #loss += torch.sum(0.5*((t2_z - t2_t_z_mu)/torch.exp(t2_t_z_logsigma))**2 + 0.5*t2_z.new_tensor(2*np.pi) + t2_t_z_logsigma, -1)

        loss += gaussian_log_prob(t3_z_mu, t3_z_logsigma, t3_z)  #.mean()
        a = gaussian_log_prob(t3_z_mu, t3_z_logsigma, t3_z)
        print("gaussian loss 1: ", a)

        loss += -gaussian_log_prob(t3_t_z_mu, t3_t_z_logsigma, t3_z)  #.mean()
        a = gaussian_log_prob(t3_t_z_mu, t3_t_z_logsigma, t3_z)
        print("gaussian loss 2: ", a)

        #loss -= F.binary_cross_entropy(t3_x_prob, self.x[:,t3,:])

        ## observation prob at time t2
        #print("self.x size: ", self.x.size())
        self.x = self.x.view(self.batch_size, 20, -1)

        loss += torch.sum((self.x[:, t3, :] - t3_x_prob)**2, dim=-1)
        a = torch.sum((self.x[:, t3, :] - t3_x_prob)**2, dim=-1)
        print("reconstruct loss 1", a)

        loss += torch.sum((self.x[:, t2, :] - t2_x_prob)**2, dim=-1)
        a = torch.sum((self.x[:, t2, :] - t2_x_prob)**2, dim=-1)
        print("reconstruct loss 2", a)

        #loss += -torch.sum(self.x[:,t3,:]*torch.log(t3_x_prob) + (1-self.x[:,t3,:])*torch.log(1-t3_x_prob), -1)
        #loss += -torch.sum(self.x[:,t2,:]*torch.log(t2_x_prob) + (1-self.x[:,t2,:])*torch.log(1-t2_x_prob), -1)
        #loss += F.binary_cross_entropy(t3_x_prob, self.x[:,t3,:], reduction = 'sum') / self.batch_size
        loss = torch.mean(loss)

        return loss
def default_target_policy_smoothing_func(batch_action):
    """Add noises to actions for target policy smoothing."""
    noise = torch.clamp(0.2 * torch.randn_like(batch_action), -0.5, 0.5)
    return torch.clamp(batch_action + noise, -1, 1)
Ejemplo n.º 26
0
def reparameterize(log_mix, mean, log_std):
    epsilon = torch.randn_like(log_std.exp())
    recon = torch.sum(log_mix.exp() * (mean + log_std.exp() * epsilon), dim=1)
    recon = recon.view(-1, 32)
    return recon
Ejemplo n.º 27
0
    def __init__(self,
                 root: str,
                 normal_class: int = 0,
                 data_augmentation: bool = False,
                 normalize: bool = False,
                 outlier_exposure: bool = False,
                 oe_n_classes: int = 100,
                 seed: int = 0):
        super().__init__(root)

        self.image_size = (3, 32, 32)

        self.n_classes = 2  # 0: normal, 1: outlier
        self.shuffle = True
        random.seed(seed)  # set seed

        if outlier_exposure:
            self.normal_classes = None
            self.outlier_classes = list(range(0, 100))
            self.known_outlier_classes = tuple(
                random.sample(self.outlier_classes, oe_n_classes))
        else:
            # Define normal and outlier classes
            self.normal_classes = tuple([normal_class])
            self.outlier_classes = list(range(0, 100))
            self.outlier_classes.remove(normal_class)
            self.outlier_classes = tuple(self.outlier_classes)

        # CIFAR-100 preprocessing: feature scaling to [0, 1], data normalization, and data augmentation
        train_transform = []
        test_transform = []
        if data_augmentation:
            # only augment training data
            train_transform += [
                transforms.ColorJitter(brightness=0.01,
                                       contrast=0.01,
                                       saturation=0.01,
                                       hue=0.01),
                transforms.RandomHorizontalFlip(p=0.5),
                transforms.RandomCrop(32, padding=4)
            ]
        train_transform += [transforms.ToTensor()]
        test_transform += [transforms.ToTensor()]
        if data_augmentation:
            train_transform += [
                transforms.Lambda(lambda x: x + 0.001 * torch.randn_like(x))
            ]
        if normalize:
            train_transform += [
                transforms.Normalize((0.491373, 0.482353, 0.446667),
                                     (0.247059, 0.243529, 0.261569))
            ]
            test_transform += [
                transforms.Normalize((0.491373, 0.482353, 0.446667),
                                     (0.247059, 0.243529, 0.261569))
            ]
        train_transform = transforms.Compose(train_transform)
        test_transform = transforms.Compose(test_transform)

        target_transform = transforms.Lambda(
            lambda x: int(x in self.outlier_classes))

        # Get train set
        train_set = MyCIFAR100(root=self.root,
                               train=True,
                               transform=train_transform,
                               target_transform=target_transform,
                               download=True)

        if outlier_exposure:
            idx = np.argwhere(
                np.isin(np.array(train_set.targets),
                        self.known_outlier_classes))
            idx = idx.flatten().tolist()
            train_set.semi_targets[idx] = -1 * torch.ones(
                len(idx)).long()  # set outlier exposure labels

            # Subset train_set to selected classes
            self.train_set = Subset(train_set, idx)
            self.train_set.shuffle_idxs = False
            self.test_set = None
        else:
            # Subset train_set to normal_classes
            idx = np.argwhere(
                np.isin(np.array(train_set.targets), self.normal_classes))
            idx = idx.flatten().tolist()
            train_set.semi_targets[idx] = torch.zeros(len(idx)).long()
            self.train_set = Subset(train_set, idx)

            # Get test set
            self.test_set = MyCIFAR100(root=self.root,
                                       train=False,
                                       transform=test_transform,
                                       target_transform=target_transform,
                                       download=True)
def norm_one_gaussian(in_tensor):
    tensor_size = torch.numel(in_tensor)
    return torch.randn_like(in_tensor) / np.sqrt(tensor_size)
Ejemplo n.º 29
0
def simple_backward_setup(output, seed=None):
    assert isinstance(output, torch.Tensor)
    if seed:
        torch.manual_seed(seed)
    grad_output = torch.randn_like(output)
    return output, grad_output
Ejemplo n.º 30
0
 def reparameterize(self, mu, logvar):
     std = torch.exp(0.5*logvar)
     eps = torch.randn_like(std)
     return eps.mul(std).add_(mu)
Ejemplo n.º 31
0
 def reparameterize(self, mu, logvar):
     std = torch.exp(0.5 * logvar)
     eps = torch.randn_like(std)
     return mu + std * eps
Ejemplo n.º 32
0
    def do_train(self,
                 paths,
                 dataset,
                 optimiser,
                 epochs,
                 batch_size,
                 step,
                 lr=1e-4,
                 valid_index=[],
                 use_half=False,
                 do_clip=False):

        if use_half:
            import apex
            optimiser = apex.fp16_utils.FP16_Optimizer(optimiser,
                                                       dynamic_loss_scale=True)
        for p in optimiser.param_groups:
            p['lr'] = lr
        criterion = nn.NLLLoss().cuda()
        k = 0
        saved_k = 0
        pad_left = self.pad_left()
        pad_left_encoder = self.pad_left_encoder()
        pad_left_decoder = self.pad_left_decoder()
        if self.noise_x:
            extra_pad_right = 127
        else:
            extra_pad_right = 0
        pad_right = self.pad_right() + extra_pad_right
        window = 16 * self.total_scale()
        logger.log(
            f'pad_left={pad_left_encoder}|{pad_left_decoder}, pad_right={pad_right}, total_scale={self.total_scale()}'
        )

        for e in range(epochs):
            trn_loader = DataLoader(
                dataset,
                collate_fn=lambda batch: env.collate_samples(
                    pad_left, window, pad_right, batch),
                batch_size=16,
                num_workers=0,
                shuffle=True,
                pin_memory=True)

            start = time.time()
            running_loss_c = 0.
            running_loss_f = 0.
            running_loss_vq = 0.
            running_loss_vqc = 0.
            running_entropy = 0.
            running_max_grad = 0.
            running_loss_ce_label = 0.
            running_max_grad_name = ""

            iters = len(trn_loader)

            # enumerate mfcc, mel, quant for search, mfcc for query, and label
            # search_wave16 : quant
            for i, (search_wave16, search_mel16, query_mfcc16,
                    label) in enumerate(trn_loader):
                search_wave16 = search_wave16.cuda()
                search_mel16 = search_mel16.cuda()
                query_mfcc16 = query_mfcc16.cuda()
                label = label.cuda()
                coarse = (search_wave16 + 2**15) // 256
                fine = (search_wave16 + 2**15) % 256
                coarse_f = coarse.float() / 127.5 - 1.
                fine_f = fine.float() / 127.5 - 1.
                total_f = (search_wave16.float() + 0.5) / 32767.5

                if self.noise_y:
                    noisy_f = total_f * (
                        0.02 * torch.randn(total_f.size(0), 1).cuda()
                    ).exp() + 0.003 * torch.randn_like(total_f)
                else:
                    noisy_f = total_f

                if use_half:
                    coarse_f = coarse_f.half()
                    fine_f = fine_f.half()
                    noisy_f = noisy_f.half()

                x = torch.cat([
                    coarse_f[:, pad_left -
                             pad_left_decoder:-pad_right].unsqueeze(-1),
                    fine_f[:, pad_left -
                           pad_left_decoder:-pad_right].unsqueeze(-1),
                    coarse_f[:, pad_left - pad_left_decoder + 1:1 -
                             pad_right].unsqueeze(-1),
                ],
                              dim=2)
                y_coarse = coarse[:, pad_left + 1:1 - pad_right]
                y_fine = fine[:, pad_left + 1:1 - pad_right]

                if self.noise_x:
                    # Randomly translate the input to the encoder to encourage
                    # translational invariance
                    total_len = coarse_f.size(1)
                    translated = []
                    for j in range(coarse_f.size(0)):
                        shift = random.randrange(256) - 128
                        translated.append(
                            noisy_f[j, pad_left - pad_left_encoder +
                                    shift:total_len - extra_pad_right + shift])
                    translated = torch.stack(translated, dim=0)
                else:
                    translated = noisy_f[:, pad_left - pad_left_encoder:]

                p_cf, vq_pen, encoder_pen, entropy, prediction = self.forward(
                    x, translated, search_mel16, query_mfcc16, label)
                p_c, p_f = p_cf
                loss_c = criterion(p_c.transpose(1, 2).float(), y_coarse)
                loss_f = criterion(p_f.transpose(1, 2).float(), y_fine)
                ce_loss = nn.BCELoss(nn.Sigmoid(prediction), label)

                encoder_weight = 0.01 * min(1, max(0.1, step / 1000 - 1))
                loss = loss_c + loss_f + vq_pen + encoder_weight * encoder_pen + ce_loss

                optimiser.zero_grad()
                if use_half:
                    optimiser.backward(loss)
                    if do_clip:
                        raise RuntimeError(
                            "clipping in half precision is not implemented yet"
                        )
                    else:
                        loss.backward()
                        if do_clip:
                            max_grad = 0
                            max_grad_name = ""
                            for name, param in self.named_parameters():
                                if param.grad is not None:
                                    param_max_grad = param.grad.data.abs().max(
                                    )
                                    if param_max_grad > max_grad:
                                        max_grad = param_max_grad
                                        max_grad_name = name
                                    if 1000000 < param_max_grad:
                                        logger.log(
                                            f'Very large gradient at {name}: {param_max_grad}'
                                        )
                            if 100 < max_grad:
                                for param in self.parameters():
                                    if param.grad is not None:
                                        if 1000000 < max_grad:
                                            param.grad.data.zero_()
                                        else:
                                            param.grad.data.mul_(100 /
                                                                 max_grad)
                            if running_max_grad < max_grad:
                                running_max_grad = max_grad
                                running_max_grad_name = max_grad_name

                            if 100000 < max_grad:
                                torch.save(self.state_dict(), "bad_model.pyt")
                                raise RuntimeError(
                                    "Aborting due to crazy gradient (model saved to bad_model.pyt)"
                                )
                            optimiser.step()
                            running_loss_c += loss_c.item()
                            running_loss_f += loss_f.item()
                            running_loss_vq += vq_pen.item()
                            running_loss_vqc += encoder_pen.item()
                            running_entropy += entropy
                            running_loss_ce_label += ce_loss.item()
                        self.after_update()

                        speed = (i + 1) / (time.time() - start)
                        avg_loss_c = running_loss_c / (i + 1)
                        avg_loss_f = running_loss_f / (i + 1)
                        avg_loss_vq = running_loss_vq / (i + 1)
                        avg_loss_vqc = running_loss_vqc / (i + 1)
                        avg_entropy = running_entropy / (i + 1)
                        avg_loss_ce = running_loss_ce_label / (i + 1)

                        step += 1
                        k = step // 1000

                        # // track cross entropy loss as well
                        logger.status(
                            f'Epoch: {e + 1}/{epochs} -- Batch: {i + 1}/{iters} -- Loss: c={avg_loss_c:#.4} '
                            f'ce_label_loss={avg_loss_ce:#.4} f={avg_loss_f:#.4} vq={avg_loss_vq:#.4} '
                            f'vqc={avg_loss_vqc:#.4} -- Entropy: {avg_entropy:#.4} -- Grad: '
                            f'{running_max_grad:#.1} {running_max_grad_name} Speed: {speed:#.4} steps/sec -- Step: {k}k '
                        )
                    os.makedirs(paths.checkpoint_dir, exist_ok=True)
                    torch.save(self.state_dict(), paths.model_path())
                    np.save(paths.step_path(), step)
                    logger.log_current_status()
                    logger.log(
                        f' <saved>; w[0][0] = {self.overtone.wavernn.gru.weight_ih_l0[0][0]}'
                    )
                    if k > saved_k + 50:
                        torch.save(self.state_dict(),
                                   paths.model_hist_path(step))
                        saved_k = k
                        self.do_generate(paths, step, optimiser, dataset.path,
                                         valid_index)
Ejemplo n.º 33
0
 def safe_jac(cx):
     jac = torch.autograd.functional.jacobian(func, cx)
     jac = torch.randn_like(jac) * 1e-7 + jac
     return jac
Ejemplo n.º 34
0
###############################################################
# Construct a tensor directly from data:

x = torch.tensor([5.5, 3])
print(x)

###############################################################
# or create a tensor based on an existing tensor. These methods
# will reuse properties of the input tensor, e.g. dtype, unless
# new values are provided by user

x = x.new_ones(5, 3, dtype=torch.double)      # new_* methods take in sizes
print(x)

x = torch.randn_like(x, dtype=torch.float)    # override dtype!
print(x)                                      # result has the same size

###############################################################
# Get its size:

print(x.size())

###############################################################
# .. note::
#     ``torch.Size`` is in fact a tuple, so it supports all tuple operations.
#
# Operations
# ^^^^^^^^^^
# There are multiple syntaxes for operations. In the following
# example, we will take a look at the addition operation.
Ejemplo n.º 35
0
        exp.append_episode(*ret, policy_params=params_)
        exp.save(results_filename)

        if it < n_rnd - 1:
            continue
        ps_it = it - n_rnd + 1

        def on_iteration(i, loss, states, actions, rewards, discount):
            writer.add_scalar('mc_pilco/episode_%d/training loss' % ps_it,
                              loss, i)
            if i % 100 == 0:
                states = states.transpose(0, 1).cpu().detach().numpy()
                actions = actions.transpose(0, 1).cpu().detach().numpy()
                rewards = rewards.transpose(0, 1).cpu().detach().numpy()
                utils.plot_trajectories(states,
                                        actions,
                                        rewards,
                                        plot_samples=False)

        # train agent
        agent.fit(exp, H, 120, batch_size=N_particles)

        # plot rollout
        x0 = torch.tensor(exp.sample_states(N_particles, timestep=0)).to(
            agent.dyn.X.device).float()
        x0 = x0 + 1e-1 * x0.std(0) * torch.randn_like(x0)
        x0 = x0.detach()
        utils.plot_rollout(x0, agent.dyn, agent.actor_target, H)
        writer.add_scalar('robot/evaluation_loss',
                          torch.tensor(ret[2]).sum(), ps_it + 1)
Ejemplo n.º 36
0
# fig, ax = plt.subplots(1, 2, figsize=(10, 5))
# ax[0].imshow(np.angle(psi_model[0].cpu()))
# ax[1].imshow(np.abs(psi_model[0].cpu()))
# plt.show()

# %%

Ap = Ap0.cuda()
q = q.cuda()

# %%
T = T1.unsqueeze(0).cuda()
r = th.from_numpy(r1).cuda()

r[1:] += th.randn_like(r[1:]) * 3
r[r < 0] = 0
r[r > 30] = 30
psi_model = psi_model.unsqueeze(0)

a_target = A(T, psi_model, r)
I_target = a_target**2
#%%
print(f'psi_model norm: {th.norm(psi_model)**2}')
print(f'I_target norm: {th.sum(I_target[0])}')
#%%
f, ax = plt.subplots()
ax.imshow(a_target[2].cpu())
plt.show()
#%%
plotmosaic(fftshift(a_target.cpu().numpy(), (1, 2)), cmap='viridis')
Ejemplo n.º 37
0
 def reparameterize(self, mu, std):
     eps = torch.randn_like(std)
     return eps.mul(std).add_(mu)
Ejemplo n.º 38
0
    def test_symeig(self):
        lazy_tensor = self.create_lazy_tensor().detach().requires_grad_(True)
        lazy_tensor_copy = lazy_tensor.clone().detach().requires_grad_(True)
        evaluated = self.evaluate_lazy_tensor(lazy_tensor_copy)

        # Perform forward pass
        evals_unsorted, evecs_unsorted = lazy_tensor.symeig(eigenvectors=True)
        evecs_unsorted = evecs_unsorted.evaluate()

        # since LazyTensor.symeig does not sort evals, we do this here for the check
        evals, idxr = torch.sort(evals_unsorted, dim=-1, descending=False)
        evecs = torch.gather(evecs_unsorted,
                             dim=-1,
                             index=idxr.unsqueeze(-2).expand(
                                 evecs_unsorted.shape))

        evals_actual, evecs_actual = torch.symeig(evaluated.double(),
                                                  eigenvectors=True)
        evals_actual = evals_actual.to(dtype=evaluated.dtype)
        evecs_actual = evecs_actual.to(dtype=evaluated.dtype)

        # Check forward pass
        self.assertAllClose(evals, evals_actual, rtol=1e-4, atol=1e-3)
        lt_from_eigendecomp = evecs @ torch.diag_embed(
            evals) @ evecs.transpose(-1, -2)
        self.assertAllClose(lt_from_eigendecomp,
                            evaluated,
                            rtol=1e-4,
                            atol=1e-3)

        # if there are repeated evals, we'll skip checking the eigenvectors for those
        any_evals_repeated = False
        evecs_abs, evecs_actual_abs = evecs.abs(), evecs_actual.abs()
        for idx in itertools.product(
                *[range(b) for b in evals_actual.shape[:-1]]):
            eval_i = evals_actual[idx]
            if torch.unique(eval_i.detach()).shape[-1] == eval_i.shape[
                    -1]:  # detach to avoid pytorch/pytorch#41389
                self.assertAllClose(evecs_abs[idx],
                                    evecs_actual_abs[idx],
                                    rtol=1e-4,
                                    atol=1e-3)
            else:
                any_evals_repeated = True

        # Perform backward pass
        symeig_grad = torch.randn_like(evals)
        ((evals * symeig_grad).sum()).backward()
        ((evals_actual * symeig_grad).sum()).backward()

        # Check grads if there were no repeated evals
        if not any_evals_repeated:
            for arg, arg_copy in zip(lazy_tensor.representation(),
                                     lazy_tensor_copy.representation()):
                if arg_copy.requires_grad and arg_copy.is_leaf and arg_copy.grad is not None:
                    self.assertAllClose(arg.grad,
                                        arg_copy.grad,
                                        rtol=1e-4,
                                        atol=1e-3)

        # Test with eigenvectors=False
        _, evecs = lazy_tensor.symeig(eigenvectors=False)
        self.assertIsNone(evecs)
Ejemplo n.º 39
0
def reparameterization(mu, logvar):    
    std = torch.exp(logvar/2)
    eps = torch.randn_like(std)
	z = mu + std*eps
Ejemplo n.º 40
0
    def test_cuda(self, test_case):
        if not TEST_CUDA or not self.should_test_cuda:
            raise unittest.SkipTest('Excluded from CUDA tests')
        try:
            cpu_input = self._get_input()
            type_map = {'torch.DoubleTensor': torch.cuda.FloatTensor}
            gpu_input = to_gpu(cpu_input, type_map=type_map)

            cpu_module = self.constructor(*self.constructor_args)
            gpu_module = self.constructor(*self.constructor_args).float().cuda()
            cpu_param = test_case._get_parameters(cpu_module)
            gpu_param = test_case._get_parameters(gpu_module)
            for cpu_p, gpu_p in zip(cpu_param[0], gpu_param[0]):
                gpu_p.data.copy_(cpu_p)

            test_case._zero_grad_input(cpu_input)
            test_case._zero_grad_input(gpu_input)
            test_case._zero_grad_parameters(cpu_module)
            test_case._zero_grad_parameters(gpu_module)
            cpu_output = test_case._forward(cpu_module, cpu_input)
            gpu_output = test_case._forward(gpu_module, gpu_input)
            test_case.assertEqual(cpu_output, gpu_output, self.precision)

            # Run backwards on CPU and GPU and compare results
            for i in range(5):
                cpu_gradOutput = cpu_output.clone().normal_()
                gpu_gradOutput = cpu_gradOutput.type('torch.cuda.FloatTensor')
                cpu_gradInput = test_case._backward(cpu_module, cpu_input, cpu_output, cpu_gradOutput)
                gpu_gradInput = test_case._backward(gpu_module, gpu_input, gpu_output, gpu_gradOutput)
                test_case.assertEqual(cpu_gradInput, gpu_gradInput, self.precision)
                for cpu_d_p, gpu_d_p in zip(cpu_param[1], gpu_param[1]):
                    test_case.assertEqual(cpu_d_p, gpu_d_p, self.precision)

            # Run double-backwards on CPU and GPU and compare results
            if self.check_gradgrad and not self.FIXME_no_cuda_gradgrad_comparison:
                cpu_output = cpu_module(cpu_input)
                gpu_output = gpu_module(gpu_input)

                cpu_gradOutput = torch.randn_like(cpu_output, requires_grad=True)
                gpu_gradOutput = cpu_gradOutput.type_as(gpu_output).detach()
                gpu_gradOutput.requires_grad = True

                cpu_gradInputs = torch.autograd.grad(
                    cpu_output,
                    (cpu_input,) + tuple(cpu_module.parameters()),
                    cpu_gradOutput,
                    create_graph=True)
                gpu_gradInputs = torch.autograd.grad(
                    gpu_output,
                    (gpu_input,) + tuple(gpu_module.parameters()),
                    gpu_gradOutput,
                    create_graph=True)

                for cpu_d_i, gpu_d_i in zip(cpu_gradInputs, gpu_gradInputs):
                    test_case.assertEqual(cpu_d_i, gpu_d_i, self.precision)

                # We mix output into the second backwards computation so that
                # torch.autograd.grad doesn't complain that some inputs
                # are unreachable (which can happen if you differentiate
                # only on the gradient.
                cpu_gg = torch.autograd.grad(
                    cpu_output.sum() + sum(map(lambda x: x.sum(), cpu_gradInputs)),
                    (cpu_input, cpu_gradOutput) + tuple(cpu_module.parameters()),
                    retain_graph=True)
                gpu_gg = torch.autograd.grad(
                    gpu_output.sum() + sum(map(lambda x: x.sum(), gpu_gradInputs)),
                    (gpu_input, gpu_gradOutput) + tuple(gpu_module.parameters()),
                    retain_graph=True)

                test_case.assertEqual(cpu_gradInput, gpu_gradInput, self.precision)
                for cpu_d_p, gpu_d_p in zip(cpu_gg, gpu_gg):
                    test_case.assertEqual(cpu_d_p, gpu_d_p, self.precision)

            self.test_noncontig(test_case, gpu_module, gpu_input)
        except NotImplementedError:
            pass
        # TODO: remove this after CUDA scatter_ is implemented
        except AttributeError as e:
            if len(e.args) == 1 and "'FloatTensor' object has no attribute 'scatter_'" in e.args[0]:
                pass
            else:
                raise
Ejemplo n.º 41
0
 def __init__(self, mu, sigma):
     super().__init__()
     self.d = torch.distributions.Normal(mu, sigma)
     self.x = geoopt.ManifoldParameter(torch.randn_like(mu),
                                       manifold=geoopt.Stiefel())
Ejemplo n.º 42
0
 def get_probe(self, real_data, fake_data):
     return real_data + self.scale * torch.randn_like(real_data)
Ejemplo n.º 43
0
 def __init__(self, mu, sigma):
     super().__init__()
     self.d = torch.distributions.Normal(mu, sigma)
     self.x = torch.nn.Parameter(torch.randn_like(mu))
Ejemplo n.º 44
0
"""
Basic tensor creation
"""
x = torch.empty(5, 3)
x = torch.rand(5, 3)
x = torch.zeros(5, 3, dtype=torch.long)

## Construct Tensor from data
x = torch.tensor([5.5, 3])


#or create a tensor based on an existing tensor. 
#These methods will reuse properties of the input tensor, e.g. dtype,
# unless new values are provided by user

x = torch.randn_like(x, dtype=torch.float) 

print(x)

## Size is a tuple
x = torch.rand(5, 3)
print(x.size()); print (type(x.size()))



"""
#################### OPERATIONS ########################
Operations
There are multiple syntaxes for operations.
 In the following example, we will take a look at the addition operation.
"""
Ejemplo n.º 45
0
    content_image_list.append(file)


vgg_style_losses = np.load('vgg_style_losses.npy')
vgg_style_losses = vgg_style_losses[()]
each_style_loss = np.zeros((len(style_image_list),len(content_image_list)))
for i_style in range(len(style_image_list)):
    for i_content in range(len(content_image_list)):

        print('processing content', i_content, ' style ', i_style)

        style = load_image(style_image_list[i_style]).to(device)
        content = load_image(content_image_list[i_content]).to(device)

        if TARGET_SOURCE == 'random':
            target = torch.randn_like(content).requires_grad_(True).to(device)
        elif TARGET_SOURCE == 'content':
            target = content.clone().requires_grad_(True).to(device)

        if ARCHITECTURE == 'vgg19_adding_conv1x1_h':
            style_weights = {'conv1_1': 1.0,
                             'conv2_2': 1.0,
                             'conv3_4': 1.0,
                             'conv4_4': 1.0,
                             'conv5_4': 1.0}
        elif ARCHITECTURE == 'vgg19_adding_conv1x1_h_removing_1conv3x3':
            style_weights = {'conv1_1': 1.0,
                             'conv2_2': 1.0,
                             'conv3_4': 1.0,
                             'conv4_4': 1.0,
                             'conv5_4': 1.0}