Beispiel #1
0
    def _gaussian_kl_divergence(self, p, q):
        p_mean = p[0][:Z_DIM]
        p_logstd = p[0][Z_DIM:]
        p_var = T.sqrt(T.exp(p_logstd))
        q_mean = q[0][:Z_DIM]
        q_logstd = q[0][Z_DIM:]
        q_var = T.sqrt(T.exp(q_logstd))

        kl = (T.log(q_var/p_var) + (p_var + (p_mean-q_mean)*(p_mean-q_mean))/q_var - 1) * 0.5
        return T.sum(kl)
Beispiel #2
0
def rmsprop(opfunc, x, config, state=None):
    """ An implementation of RMSprop

    ARGS:

    - 'opfunc' : a function that takes a single input (X), the point
                of a evaluation, and returns f(X) and df/dX
    - 'x'      : the initial point
    - 'config` : a table with configuration parameters for the optimizer
    - 'config['learningRate']'      : learning rate
    - 'config['alpha']'             : smoothing constant
    - 'config['epsilon']'           : value with which to initialise m
    - 'config['weightDecay']'       : weight decay
    - 'state'                    : a table describing the state of the optimizer;
                                after each call the state is modified
    - 'state['m']'                  : leaky sum of squares of parameter gradients,
    - 'state['tmp']'                : and the square root (with epsilon smoothing)

    RETURN:
    - `x`     : the new x vector
    - `f(x)`  : the function, evaluated before the update

    """
    # (0) get/update state
    if config is None and state is None:
        raise ValueError("rmsprop requires a dictionary to retain state between iterations")
    state = state if state is not None else config
    lr = config.get('learningRate', 1e-2)
    alpha = config.get('alpha', 0.99)
    epsilon = config.get('epsilon', 1e-8)
    wd = config.get('weightDecay', 0)

    # (1) evaluate f(x) and df/dx
    fx, dfdx = opfunc(x)

    # (2) weight decay
    if wd != 0:
        dfdx.add_(wd, x)

    # (3) initialize mean square values and square gradient storage
    if 'm' not in state:
        state['m'] = x.new().resize_as_(dfdx).zero_()
        state['tmp'] = x.new().resize_as_(dfdx)

    # (4) calculate new (leaky) mean squared values
    state['m'].mul_(alpha)
    state['m'].addcmul_(1.0 - alpha, dfdx, dfdx)

    # (5) perform update
    torch.sqrt(state['m'], out=state['tmp']).add_(epsilon)
    x.addcdiv_(-lr, dfdx, state['tmp'])

    # return x*, f(x) before optimization
    return x, fx
Beispiel #3
0
 def triplet_loss(self, z_p, z_n, z_d, margin=0.1, l2=0):
     l_n = torch.sqrt(((z_p - z_n) ** 2).sum(dim=1))
     l_d = - torch.sqrt(((z_p - z_d) ** 2).sum(dim=1))
     l_nd = l_n + l_d
     loss = F.relu(l_n + l_d + margin)
     l_n = torch.mean(l_n)
     l_d = torch.mean(l_d)
     l_nd = torch.mean(l_n + l_d)
     loss = torch.mean(loss)
     if l2 != 0:
         loss += l2 * (torch.norm(z_p) + torch.norm(z_n) + torch.norm(z_d))
     return loss, l_n, l_d, l_nd
Beispiel #4
0
def ldmk_loss(input, target, weight=None, size_average=True):
    n, c = input.size()

    loss_ = (input - target) ** 2
    iod = torch.sqrt(torch.sum(
        (target[:, 36*2:37*2] - target[:, 45*2:46*2])**2, 1))
    loss = torch.autograd.Variable(torch.zeros((n, c//2))).float().cuda()
    for i in range(c//2):
        loss[:, i] = torch.sqrt((loss_[:, i*2] + loss_[:, i*2+1])) / (iod+1e-6)

    if size_average:
        loss = torch.mean(loss)
    return loss
Beispiel #5
0
def save_conv_shrink_bn(fp, conv_model, bn_model, eps=1e-5):
    if bn_model.bias.is_cuda:
        bias = bn_model.bias.data - bn_model.running_mean * bn_model.weight.data / torch.sqrt(bn_model.running_var + eps)
        convert2cpu(bias).numpy().tofile(fp)
        s = conv_model.weight.data.size()
        weight = conv_model.weight.data * (bn_model.weight.data / torch.sqrt(bn_model.running_var + eps)).view(-1,1,1,1).repeat(1, s[1], s[2], s[3])
        convert2cpu(weight).numpy().tofile(fp)
    else:
        bias = bn_model.bias.data - bn_model.running_mean * bn_model.weight.data / torch.sqrt(bn_model.running_var + eps)
        bias.numpy().tofile(fp)
        s = conv_model.weight.data.size()
        weight = conv_model.weight.data * (bn_model.weight.data / torch.sqrt(bn_model.running_var + eps)).view(-1,1,1,1).repeat(1, s[1], s[2], s[3])
        weight.numpy().tofile(fp)
Beispiel #6
0
def get_negative_expectation(q_samples, measure, average=True):
    log_2 = math.log(2.)

    if measure == 'GAN':
        Eq = F.softplus(-q_samples) + q_samples
    elif measure == 'JSD':
        Eq = F.softplus(-q_samples) + q_samples - log_2
    elif measure == 'X2':
        Eq = -0.5 * ((torch.sqrt(q_samples ** 2) + 1.) ** 2)
    elif measure == 'KL':
        Eq = torch.exp(q_samples)
    elif measure == 'RKL':
        Eq = q_samples - 1.
    elif measure == 'DV':
        Eq = log_sum_exp(q_samples, 0) - math.log(q_samples.size(0))
    elif measure == 'H2':
        Eq = torch.exp(q_samples) - 1.
    elif measure == 'W1':
        Eq = q_samples
    else:
        raise_measure_error(measure)

    if average:
        return Eq.mean()
    else:
        return Eq
Beispiel #7
0
    def forward(self, input1):
        self.batchgrid3d = torch.zeros(torch.Size([input1.size(0)]) + self.grid3d.size())

        for i in range(input1.size(0)):
            self.batchgrid3d[i] = self.grid3d

        self.batchgrid3d = Variable(self.batchgrid3d)
        #print(self.batchgrid3d)

        x = torch.sum(torch.mul(self.batchgrid3d, input1[:,:,:,0:4]), 3)
        y = torch.sum(torch.mul(self.batchgrid3d, input1[:,:,:,4:8]), 3)
        z = torch.sum(torch.mul(self.batchgrid3d, input1[:,:,:,8:]), 3)
        #print(x)
        r = torch.sqrt(x**2 + y**2 + z**2) + 1e-5

        #print(r)
        theta = torch.acos(z/r)/(np.pi/2)  - 1
        #phi = torch.atan(y/x)
        phi = torch.atan(y/(x + 1e-5))  + np.pi * x.lt(0).type(torch.FloatTensor) * (y.ge(0).type(torch.FloatTensor) - y.lt(0).type(torch.FloatTensor))
        phi = phi/np.pi


        output = torch.cat([theta,phi], 3)

        return output
Beispiel #8
0
def test_hmc_conjugate_gaussian(fixture,
                                num_samples,
                                warmup_steps,
                                hmc_params,
                                expected_means,
                                expected_precs,
                                mean_tol,
                                std_tol):
    pyro.get_param_store().clear()
    hmc_kernel = HMC(fixture.model, **hmc_params)
    mcmc_run = MCMC(hmc_kernel, num_samples, warmup_steps).run(fixture.data)
    for i in range(1, fixture.chain_len + 1):
        param_name = 'loc_' + str(i)
        marginal = EmpiricalMarginal(mcmc_run, sites=param_name)
        latent_loc = marginal.mean
        latent_std = marginal.variance.sqrt()
        expected_mean = torch.ones(fixture.dim) * expected_means[i - 1]
        expected_std = 1 / torch.sqrt(torch.ones(fixture.dim) * expected_precs[i - 1])

        # Actual vs expected posterior means for the latents
        logger.info('Posterior mean (actual) - {}'.format(param_name))
        logger.info(latent_loc)
        logger.info('Posterior mean (expected) - {}'.format(param_name))
        logger.info(expected_mean)
        assert_equal(rmse(latent_loc, expected_mean).item(), 0.0, prec=mean_tol)

        # Actual vs expected posterior precisions for the latents
        logger.info('Posterior std (actual) - {}'.format(param_name))
        logger.info(latent_std)
        logger.info('Posterior std (expected) - {}'.format(param_name))
        logger.info(expected_std)
        assert_equal(rmse(latent_std, expected_std).item(), 0.0, prec=std_tol)
Beispiel #9
0
def pullaway_loss(embeddings):
    norm = torch.sqrt(torch.sum(embeddings ** 2.0, 1, keepdim=True))
    normalized_embeddings = embeddings / norm
    similarity = torch.matmul(normalized_embeddings, normalized_embeddings.transpose(1, 0))
    batch_size = embeddings.size()[0]
    pt_loss = (torch.sum(similarity) - batch_size) / (batch_size * (batch_size - 1))
    return pt_loss
Beispiel #10
0
def skewness_score(x, dim=0):
    '''Test whether the skew is different from the normal distribution.

    This function tests the null hypothesis that the skewness of
    the population that the sample was drawn from is the same
    as that of a corresponding normal distribution.
    ripoff from: `scipy.stats.skewtest`.

    Args:
        a: Array of the sample data
        axis: Axis along which to compute test. Default is 0. If None,
           compute over the whole array `a`.
    Returns:
        statistic: The computed z-score for this test.
        p-value: A 2-sided chi squared probability for the hypothesis test.
    '''
    x, n, dim = _x_n_dim(x, dim)
    b2 = (x**3).mean(dim) / (x**2).mean(dim)**1.5
    y = b2 * math.sqrt(((n + 1) * (n + 3)) / (6.0 * (n - 2)))
    beta2 = 3.0 * (n**2 + 27 * n - 70) * (n + 1) * (n + 3) /\
        ((n - 2.0) * (n + 5) * (n + 7) * (n + 9))
    W2 = -1.0 + math.sqrt(2 * (beta2 - 1))
    delta = 1.0 / math.sqrt(0.5 * math.log(W2))
    alpha = math.sqrt(2.0 / (W2 - 1))
    y[y == 0] = 1
    yalpha = y / alpha
    Z = delta * torch.log(yalpha + torch.sqrt(yalpha**2 + 1))
    return Z, 1 + torch.erf(-math.sqrt(0.5) * torch.abs(Z))
    def forward(self, input, label):
        # --------------------------- cos(theta) & phi(theta) ---------------------------
        if self.device_id == None:
            cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        else:
            x = input
            sub_weights = torch.chunk(self.weight, len(self.device_id), dim=0)
            temp_x = x.cuda(self.device_id[0])
            weight = sub_weights[0].cuda(self.device_id[0])
            cosine = F.linear(F.normalize(temp_x), F.normalize(weight))
            for i in range(1, len(self.device_id)):
                temp_x = x.cuda(self.device_id[i])
                weight = sub_weights[i].cuda(self.device_id[i])
                cosine = torch.cat((cosine, F.linear(F.normalize(temp_x), F.normalize(weight)).cuda(self.device_id[0])), dim=1) 
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        # --------------------------- convert label to one-hot ---------------------------
        one_hot = torch.zeros(cosine.size())
        if self.device_id != None:
            one_hot = one_hot.cuda(self.device_id[0])
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        # -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)  # you can use torch.where if your torch.__version__ is 0.4
        output *= self.s

        return output
Beispiel #12
0
def test():
    network.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        target_indices = target
        target_one_hot = to_one_hot(target_indices, length=network.digits.num_units)

        data, target = Variable(data, volatile=True).cuda(), Variable(target_one_hot).cuda()

        output = network(data)

        test_loss += network.loss(data, output, target, size_average=False).data[0] # sum up batch loss

        v_mag = torch.sqrt((output**2).sum(dim=2, keepdim=True))

        pred = v_mag.data.max(1, keepdim=True)[1].cpu()

        correct += pred.eq(target_indices.view_as(pred)).sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss,
        correct,
        len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
Beispiel #13
0
def create_input(points, sigma2):
    bs, N, _ = points.size() #points has size bs,N,2
    OP = torch.zeros(bs,N,N,4).type(dtype)
    E = torch.eye(N).type(dtype).unsqueeze(0).expand(bs,N,N)
    OP[:,:,:,0] = E
    W = points.unsqueeze(1).expand(bs,N,N,dim) - points.unsqueeze(2).expand(bs,N,N,dim)
    dists2 = (W * W).sum(3)
    dists = torch.sqrt(dists2)
    W = torch.exp(-dists2 / sigma2)
    OP[:,:,:,1] = W
    D = E * W.sum(2,True).expand(bs,N,N)
    OP[:,:,:,2] = D
    U = (torch.ones(N,N).type(dtype)/N).unsqueeze(0).expand(bs,N,N)
    OP[:,:,:,3] = U
    OP = Variable(OP)
    x = Variable(points)
    Y = Variable(W.clone())

    # Normalize inputs
    if normalize:
        mu = x.sum(1)/N
        mu_ext = mu.unsqueeze(1).expand_as(x)
        var = ((x - mu_ext)*(x - mu_ext)).sum(1)/N
        var_ext = var.unsqueeze(1).expand_as(x)
        x = x - mu_ext
        x = x/(10 * var_ext)

    return (OP, x, Y), dists
Beispiel #14
0
    def _get_norm(self, gaus):
        norm_tensor = torch.ones([1, 1, self.npixels[0], self.npixels[1]])
        normalization_feats = torch.autograd.Variable(norm_tensor)
        if self.use_gpu:
            normalization_feats = normalization_feats.cuda()

        norm_out = self._compute_gaussian(normalization_feats, gaussian=gaus)
        return 1 / torch.sqrt(norm_out + 1e-20)
Beispiel #15
0
 def forward(self, x, y, xidx=None, yidx=None):
     K = torch.sqrt(l2_distance(x, y))
     u, v = self._get_uv(x, y, xidx, yidx)
     if self.regularization == 'entropy':
         return torch.exp((u[:, None] + v[None, :] - K) / self.alpha)
     else:
         return torch.clamp((u[:, None] + v[None, :] - K),
                            min=0) / (2 * self.alpha)
Beispiel #16
0
    def forward(self, input):
        # Hack: Force noise vectors to be function of input so they are put into
        # predict_net and not init_net when tracing with ONNX
        epsilon_input = torch.randn(1, input.size()[1], device=input.device)
        epsilon_output = torch.randn(
            self.out_dimension - input.size()[1] + input.size()[1],
            1,
            device=input.device,
        )
        epsilon_in = torch.sign(epsilon_input) * torch.sqrt(torch.abs(epsilon_input))
        epsilon_out = torch.sign(epsilon_output) * torch.sqrt(torch.abs(epsilon_output))

        # Add noise to bias and weights
        noise = torch.mul(epsilon_in, epsilon_out)
        bias = self.bias + self.sigma_bias * epsilon_out.t()
        weight = self.weight + self.sigma_weight * noise
        return input.matmul(weight.t()) + bias
Beispiel #17
0
    def _PyramidRoI_Feat(self, feat_maps, rois, im_info):
        ''' roi pool on pyramid feature maps'''
        # do roi pooling based on predicted rois
        img_area = im_info[0][0] * im_info[0][1]
        h = rois.data[:, 4] - rois.data[:, 2] + 1
        w = rois.data[:, 3] - rois.data[:, 1] + 1
        roi_level = torch.log(torch.sqrt(h * w) / 224.0) / np.log(2)
        roi_level = torch.floor(roi_level + 4)
        # --------
        # roi_level = torch.log(torch.sqrt(h * w) / 224.0)
        # roi_level = torch.round(roi_level + 4)
        # ------
        roi_level[roi_level < 2] = 2
        roi_level[roi_level > 5] = 5
        # roi_level.fill_(5)
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            # NOTE: need to add pyrmaid
            grid_xy = _affine_grid_gen(rois, feat_maps.size()[2:], self.grid_size)  ##
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            roi_pool_feat = self.RCNN_roi_crop(feat_maps, Variable(grid_yx).detach()) ##
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                roi_pool_feat = F.max_pool2d(roi_pool_feat, 2, 2)

        elif cfg.POOLING_MODE == 'align':
            roi_pool_feats = []
            box_to_levels = []
            for i, l in enumerate(range(2, 6)):
                if (roi_level == l).sum() == 0:
                    continue
                idx_l = (roi_level == l).nonzero().squeeze()
                box_to_levels.append(idx_l)
                scale = feat_maps[i].size(2) / im_info[0][0]
                feat = self.RCNN_roi_align(feat_maps[i], rois[idx_l], scale)
                roi_pool_feats.append(feat)
            roi_pool_feat = torch.cat(roi_pool_feats, 0)
            box_to_level = torch.cat(box_to_levels, 0)
            idx_sorted, order = torch.sort(box_to_level)
            roi_pool_feat = roi_pool_feat[order]

        elif cfg.POOLING_MODE == 'pool':
            roi_pool_feats = []
            box_to_levels = []
            for i, l in enumerate(range(2, 6)):
                if (roi_level == l).sum() == 0:
                    continue
                idx_l = (roi_level == l).nonzero().squeeze()
                box_to_levels.append(idx_l)
                scale = feat_maps[i].size(2) / im_info[0][0]
                feat = self.RCNN_roi_pool(feat_maps[i], rois[idx_l], scale)
                roi_pool_feats.append(feat)
            roi_pool_feat = torch.cat(roi_pool_feats, 0)
            box_to_level = torch.cat(box_to_levels, 0)
            idx_sorted, order = torch.sort(box_to_level)
            roi_pool_feat = roi_pool_feat[order]
            
        return roi_pool_feat
    def testModulus(self):
        for jit in [True, False]:
            modulus = sl.Modulus(jit=jit)
            x = torch.cuda.FloatTensor(100,10,4,2).copy_(torch.rand(100,10,4,2))
            y = modulus(x)
            u = torch.squeeze(torch.sqrt(torch.sum(x * x, 3)))
            v = y.narrow(3, 0, 1)

            self.assertAlmostEqual(linfnorm(u, v), 0, places=6)
    def __call__(self, states, agent_states):
        states_v = ptan.agent.float32_preprocessor(states).to(self.device)

        mu_v, var_v, _ = self.net(states_v)
        mu = mu_v.data.cpu().numpy()
        sigma = torch.sqrt(var_v).data.cpu().numpy()
        actions = np.random.normal(mu, sigma)
        actions = np.clip(actions, -1, 1)
        return actions, agent_states
Beispiel #20
0
 def forward(self, tensor: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
     # pylint: disable=arguments-differ
     broadcast_mask = mask.unsqueeze(-1).float()
     num_elements = broadcast_mask.sum() * self.size
     mean = (tensor * broadcast_mask).sum() / num_elements
     masked_centered = (tensor - mean) * broadcast_mask
     std = torch.sqrt(
             (masked_centered * masked_centered).sum() / num_elements + self.eps
     )
     return self.gamma * (tensor - mean) / (std + self.eps) + self.beta
Beispiel #21
0
    def loss(self, x, y, xidx=None, yidx=None):
        K = torch.sqrt(l2_distance(x, y))
        u, v = self._get_uv(x, y, xidx, yidx)

        if regularization == 'entropy':
            reg = - alpha * torch.exp((u[:, None] + v[None, :] - K) / alpha)
        else:
            reg = - torch.clamp((u[:, None] + v[None, :] - K),
                                min=0) ** 2 / 4 / alpha
        return - torch.mean(u[:, None] + v[None, :] + reg)
Beispiel #22
0
def MVNError(output, gt):
    outMean = torch.mean(output)
    outStd = torch.std(output)
    output = (output - outMean)/outStd
    gtMean = torch.mean(gt)
    gtStd = torch.std(gt)
    gt = (gt - gtMean)/gtStd
    d = output - gt
    diff = torch.sqrt(torch.mean(d * d))
    return diff
Beispiel #23
0
def zero_mean_covariance(covariance, stability=0.0):
    '''Output covariance of ReLU for zero-mean Gaussian input.

    f(x) = max(x, 0).

    Args:
        covariance: Input covariance matrix (Size, Size).
        stability: For accurate results this should be zero
            if used in training, use a value like 1e-4 for stability.

    Returns:
        Output covariance of ReLU for zero-mean Gaussian input (Size, Size).
    '''
    S = outer(torch.sqrt(torch.diagonal(covariance, 0, -2, -1)))
    V = (covariance / S).clamp_(stability - 1.0, 1.0 - stability)
    Q = torch.acos(-V) * V + torch.sqrt(1.0 - (V**2.0)) - 1.0
    cov = S * Q * (1.0 / (2.0 * math.pi))
    # handle degenerate case when we have zero variance
    cov[cov != cov] = 0  # replace nans with zeros
    return cov
    def reconstruction_loss(self, images, input, size_average=True):
        # Get the lengths of capsule outputs.
        v_mag = torch.sqrt((input**2).sum(dim=2))

        # Get index of longest capsule output.
        _, v_max_index = v_mag.max(dim=1)
        v_max_index = v_max_index.data

        # Use just the winning capsule's representation (and zeros for other capsules) to reconstruct input image.
        batch_size = input.size(0)
        all_masked = [None] * batch_size
        for batch_idx in range(batch_size):
            # Get one sample from the batch.
            input_batch = input[batch_idx]

            # Copy only the maximum capsule index from this batch sample.
            # This masks out (leaves as zero) the other capsules in this sample.
            batch_masked = Variable(torch.zeros(input_batch.size())).cuda()
            batch_masked[v_max_index[batch_idx]] = input_batch[v_max_index[batch_idx]]
            all_masked[batch_idx] = batch_masked

        # Stack masked capsules over the batch dimension.
        masked = torch.stack(all_masked, dim=0)

        # Reconstruct input image.
        masked = masked.view(input.size(0), -1)
        output = self.relu(self.reconstruct0(masked))
        output = self.relu(self.reconstruct1(output))
        output = self.sigmoid(self.reconstruct2(output))
        output = output.view(-1, self.image_channels, self.image_height, self.image_width)

        # Save reconstructed images occasionally.
        if self.reconstructed_image_count % 10 == 0:
            if output.size(1) == 2:
                # handle two-channel images
                zeros = torch.zeros(output.size(0), 1, output.size(2), output.size(3))
                output_image = torch.cat([zeros, output.data.cpu()], dim=1)
            else:
                # assume RGB or grayscale
                output_image = output.data.cpu()
            vutils.save_image(output_image, "reconstruction.png")
        self.reconstructed_image_count += 1

        # The reconstruction loss is the sum squared difference between the input image and reconstructed image.
        # Multiplied by a small number so it doesn't dominate the margin (class) loss.
        error = (output - images).view(output.size(0), -1)
        error = error**2
        error = torch.sum(error, dim=1) * 0.0005

        # Average over batch
        if size_average:
            error = error.mean()

        return error
Beispiel #25
0
 def __init__(self, concentration):
     if concentration.data.min() < 1:
         raise NotImplementedError('concentration < 1 is not supported')
     self.concentration = concentration
     self._standard_gamma = Gamma(concentration, concentration.new_tensor([1.]).squeeze().expand_as(concentration))
     # The following are Marsaglia & Tsang's variable names.
     self._d = self.concentration - 1.0 / 3.0
     self._c = 1.0 / torch.sqrt(9.0 * self._d)
     # Compute log scale using Gamma.log_prob().
     x = self._d.detach()  # just an arbitrary x.
     log_scale = self.propose_log_prob(x) + self.log_prob_accept(x) - self.log_prob(x)
     super(RejectionStandardGamma, self).__init__(self.propose, self.log_prob_accept, log_scale)
Beispiel #26
0
    def __call__(self, boxlists):
        """
        Arguments:
            boxlists (list[BoxList])
        """
        # Compute level ids
        s = torch.sqrt(cat([boxlist.area() for boxlist in boxlists]))

        # Eqn.(1) in FPN paper
        target_lvls = torch.floor(self.lvl0 + torch.log2(s / self.s0 + self.eps))
        target_lvls = torch.clamp(target_lvls, min=self.k_min, max=self.k_max)
        return target_lvls.to(torch.int64) - self.k_min
Beispiel #27
0
 def forward(self, x):
     n = x.size(2) * x.size(3)
     t = x.view(x.size(0), x.size(1), n)
     mean = torch.mean(t, 2).unsqueeze(2).expand_as(x)
     # Calculate the biased var. torch.var returns unbiased var
     var = torch.var(t, 2).unsqueeze(2).expand_as(x) * ((n - 1) / float(n))
     scale_broadcast = self.weight.unsqueeze(1).unsqueeze(1).unsqueeze(0)
     scale_broadcast = scale_broadcast.expand_as(x)
     shift_broadcast = self.bias.unsqueeze(1).unsqueeze(1).unsqueeze(0)
     shift_broadcast = shift_broadcast.expand_as(x)
     out = (x - mean) / torch.sqrt(var + self.eps)
     out = out * scale_broadcast + shift_broadcast
     return out
    def forward(self, input):
        self.epsison_input.normal_()
        self.epsilon_output.normal_()

        func = lambda x: torch.sign(x) * torch.sqrt(torch.abs(x))
        eps_in = func(self.epsilon_input.data)
        eps_out = func(self.epsilon_output.data)

        bias = self.bias
        if bias is not None:
            bias = bias + self.sigma_bias * eps_out.t()
        noise_v = torch.mul(eps_in, eps_out)
        return F.linear(input, self.weight + self.sigma_weight * noise_v, bias)
Beispiel #29
0
def clip_gradient(model, clip_norm):
    """Computes a gradient clipping coefficient based on gradient norm."""
    totalnorm = 0
    for p in model.parameters():
        if p.requires_grad and p.grad is not None:
            modulenorm = p.grad.data.norm()
            totalnorm += modulenorm ** 2
    totalnorm = torch.sqrt(totalnorm).item()
    norm = (clip_norm / max(totalnorm, clip_norm))
    # print totalnorm
    for p in model.parameters():
        if p.requires_grad and p.grad is not None:
            p.grad.mul_(norm)
Beispiel #30
0
    def gradient_penalty(self, y, x):
        """Compute gradient penalty: (L2_norm(dy/dx) - 1)**2."""
        weight = torch.ones(y.size()).to(self.device)
        dydx = torch.autograd.grad(outputs=y,
                                   inputs=x,
                                   grad_outputs=weight,
                                   retain_graph=True,
                                   create_graph=True,
                                   only_inputs=True)[0]

        dydx = dydx.view(dydx.size(0), -1)
        dydx_l2norm = torch.sqrt(torch.sum(dydx**2, dim=1))
        return torch.mean((dydx_l2norm-1)**2)
Beispiel #31
0
    def train(self,data,inform = None,use_cuda=True,TASK = 2,num_epochs = 200,batch_size = 50,k_d=1, k_g = 1,lr = 0.0001):
        g_optimizer = optim.Adam(self.generator.parameters(),lr=lr)
        d_optimizer = optim.Adam(self.discriminator.parameters(), lr=lr)
        try:
            for epoch in range(num_epochs):
                ls_g=[]
                ls_d=[]
                for input_data,info in iterate_minibatches(data, batch_size,inform):
                    
                    # Optimize D
                    
                    for _ in range(k_d):
                        # Sample noise
                        if not (info is None):
                            noise = Variable(torch.cat((torch.Tensor(sample_noise(len(input_data))),torch.Tensor(info)),1).cuda())
                        else:
                            noise = Variable(torch.Tensor(sample_noise(len(input_data))).cuda())
                        
                        # Do an update
                    
                        inp_data = Variable(torch.Tensor(input_data).cuda())
                        data_gen = self.generator(noise)
                        if(TASK==4):
                            #COde from here https://github.com/EmilienDupont/wgan-gp
                            alpha = torch.rand(inp_data.size()[0], 1)
                            alpha = alpha.expand_as(inp_data)
                            if use_cuda:
                                alpha = alpha.cuda()
                            interpolated = alpha * inp_data.data + (1 - alpha) * data_gen.data
                            interpolated = Variable(interpolated, requires_grad=True)
                            if use_cuda:
                                interpolated = interpolated.cuda()
                            prob_interpolated = self.discriminator(interpolated,TASK=TASK)
                            gradients = torch.autograd.grad(outputs=prob_interpolated, inputs=interpolated,
                                                grad_outputs=torch.ones(prob_interpolated.size()).cuda() if use_cuda else torch.ones(
                                                prob_interpolated.size()),
                                                create_graph=True, retain_graph=True)[0]
                            gradients = gradients.view(inp_data.size()[0], -1)
                            gradients_norm = torch.sqrt(torch.sum(gradients ** 2, dim=1) + 1e-12)
                            penalty = 10 * ((gradients_norm - 1) ** 2).mean()
                            if not (info is None):
                                loss = d_loss(self.discriminator(data_gen + Variable(torch.Tensor(info).cuda(), requires_grad=False),TASK = TASK), self.discriminator(inp_data,TASK = TASK),TASK,penalty)
                            else:
                                loss = d_loss(self.discriminator(data_gen,TASK = TASK), self.discriminator(inp_data,TASK = TASK),TASK,penalty)
                        else:
                            if not (info is None):
                                loss = d_loss(self.discriminator(data_gen + Variable(torch.Tensor(info).cuda(), requires_grad=False),TASK = TASK),self.discriminator(inp_data,TASK = TASK),TASK)
                            else:
                                loss = d_loss(self.discriminator(data_gen,TASK = TASK), self.discriminator(inp_data,TASK = TASK),TASK)
                        ls_d.append(loss.data.cpu().numpy())
                        d_optimizer.zero_grad()
                        loss.backward()
                        d_optimizer.step()
                        if TASK == 3:
                            self.discriminator.apply(self.clipper,TASK = TASK)

            
                    # Optimize G
                    for _ in range(k_g):
                        # Sample noise
                        if not (info is None):
                            noise = Variable(torch.cat((torch.Tensor(sample_noise(len(input_data))),torch.Tensor(info)),1).cuda())
                        else:
                            noise = Variable(torch.Tensor(sample_noise(len(input_data))).cuda())
                        
                        # Do an update
                        data_gen = self.generator(noise)
                        if not (info is None):
                             loss = g_loss(self.discriminator(data_gen + Variable(torch.Tensor(info).cuda(), requires_grad=False),TASK = TASK),TASK)
                        else:
                            loss = g_loss(self.discriminator(data_gen,TASK = TASK),TASK)
                        ls_g.append(loss.data.cpu().numpy())
                        g_optimizer.zero_grad()
                        loss.backward()
                        g_optimizer.step()
                if(epoch%10==0):
                    print('generator_loss:',np.mean(ls_g),'discriminator_loss',np.mean(ls_d))
        except KeyboardInterrupt:
            pass
Beispiel #32
0
        elbo_list.append(
            elbo_evaluate(images, labels, para, dim, scale, revise,
                          num_St).item())
        #算法起始位置
        z_samples = sampleZ(para, dim, num_S)
        log_qs = ng_log_Qs(para, z_samples, dim)
        log_priors = ng_log_Priors(z_samples, dim)
        log_likelihoods = ng_log_Likelihoods(images, labels, z_samples, dim)
        for s in range(len(z_samples)):
            gradients[s] = grad_log_Q(para, z_samples[s], dim)[0]
        elbo_temp = log_likelihoods * revise + log_priors / scale - log_qs / scale
        grad_temp = torch.matmul(torch.diag(elbo_temp), gradients)
        grad_avg = torch.mean(grad_temp, 0)
        G += torch.matmul(grad_avg.view(dim * 2, -1),
                          grad_avg.view(-1, dim * 2))
        rho = eta / torch.sqrt(torch.diag(G))
        para.data += rho * grad_avg
        #print information
        if 1:
            print('Epoch[{}/{}], step[{}/{}]'.format(\
                epoch+1,
                num_epochs,
                i+1,len(train_loader)))
            print('ELBO: {:.3f}\n'.format(\
                elbo_list[len(elbo_list)-1]))

if not os.path.exists('./result_elbo'):
    os.makedirs('./result_elbo')
result = np.array(elbo_list)
np.save('./result_elbo/bbvi_basic.npy', result)
Beispiel #33
0
def arccosh(x):
    c0 = torch.log(x)
    c1 = torch.log1p(torch.sqrt(x * x - 1) / x)
    return c0 + c1
Beispiel #34
0
def arcsinh(x):
    return torch.log(x + torch.sqrt(x * x + 1))
Beispiel #35
0
def arcosh(x):
    return torch.log(x + torch.sqrt(x * x - 1))
Beispiel #36
0
def sqrt(val):
    return torch.sqrt(torch.tensor(val).float())
Beispiel #37
0
 def squash(self, x, axis=-1):
     s_squared_norm = (x**2).sum(axis, keepdim=True)
     scale = t.sqrt(s_squared_norm + T_epsilon)
     return x / scale
Beispiel #38
0
def sqrt(input):
    return th.sqrt(input)
    def step(self,
             # futures=(train_episodes_futures, valid_episodes_futures)
             train_futures,
             valid_futures,
             max_kl=1e-3,
             cg_iters=10,
             cg_damping=1e-2,
             ls_max_steps=10,
             ls_backtrack_ratio=0.5):
        num_tasks = len(train_futures[0])
        logs = {}

        # Compute the surrogate loss
        #
        old_losses, old_kls, old_pis = self._async_gather([
            self.surrogate_loss(train, valid, old_pi=None)
            for (train, valid) in zip(zip(*train_futures), valid_futures)])

        logs['loss_before'] = to_numpy(old_losses)
        logs['kl_before'] = to_numpy(old_kls)

        # 计算平均误差,输出为标量
        old_loss = sum(old_losses) / num_tasks
        grads = torch.autograd.grad(old_loss,
                                    self.policy.parameters(),
                                    retain_graph=True)
        grads = parameters_to_vector(grads)

        # Compute the step direction with Conjugate Gradient
        # 计算平均误差,输出为标量
        old_kl = sum(old_kls) / num_tasks
        hessian_vector_product = self.hessian_vector_product(old_kl,
                                                             damping=cg_damping)
        stepdir = conjugate_gradient(hessian_vector_product,
                                     grads,
                                     cg_iters=cg_iters)

        # Compute the Lagrange multiplier
        shs = 0.5 * torch.dot(stepdir,
                              hessian_vector_product(stepdir, retain_graph=False))
        lagrange_multiplier = torch.sqrt(shs / max_kl)

        step = stepdir / lagrange_multiplier

        # Save the old parameters
        old_params = parameters_to_vector(self.policy.parameters())

        """
        vector_to_parameter( * , self.policy.parameters()) 就是对网络参数的更新
        """
        # Line search
        step_size = 1.0
        for _ in range(ls_max_steps):
            vector_to_parameters(old_params - step_size * step,
                                 self.policy.parameters())

            losses, kls, _ = self._async_gather([
                self.surrogate_loss(train, valid, old_pi=old_pi)
                for (train, valid, old_pi)
                in zip(zip(*train_futures), valid_futures, old_pis)])

            improve = (sum(losses) / num_tasks) - old_loss
            kl = sum(kls) / num_tasks
            if (improve.item() < 0.0) and (kl.item() < max_kl):
                logs['loss_after'] = to_numpy(losses)
                logs['kl_after'] = to_numpy(kls)
                break
            step_size *= ls_backtrack_ratio
        else:
            vector_to_parameters(old_params, self.policy.parameters())

        # 查看最终神经网络参数
        params_final = self.policy.parameters()

        # logs['loss_before', 'kl_before', 'loss_after', 'kl_after']
        return logs