def encode(self, reference_boxes, proposals):
        """
        Encode a set of proposals with respect to some
        reference boxes

        Arguments:
            reference_boxes (Tensor): reference boxes
            proposals (Tensor): boxes to be encoded
        """

        TO_REMOVE = 1  # TODO remove
        ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE
        ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE
        ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths
        ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights

        gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE
        gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE
        gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths
        gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights

        wx, wy, ww, wh = self.weights
        targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
        targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
        targets_dw = ww * torch.log(gt_widths / ex_widths)
        targets_dh = wh * torch.log(gt_heights / ex_heights)

        targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)
        return targets
Example #2
0
    def forward(self, feat, right, wrong, batch_wrong, fake=None, fake_diff_mask=None):

        num_wrong = wrong.size(1)
        batch_size = feat.size(0)

        feat = feat.view(-1, self.ninp, 1)
        right_dis = torch.bmm(right.view(-1, 1, self.ninp), feat)
        wrong_dis = torch.bmm(wrong, feat)
        batch_wrong_dis = torch.bmm(batch_wrong, feat)

        wrong_score = torch.sum(torch.exp(wrong_dis - right_dis.expand_as(wrong_dis)),1) \
                + torch.sum(torch.exp(batch_wrong_dis - right_dis.expand_as(batch_wrong_dis)),1)

        loss_dis = torch.sum(torch.log(wrong_score + 1))
        loss_norm = right.norm() + feat.norm() + wrong.norm() + batch_wrong.norm()

        if fake:
            fake_dis = torch.bmm(fake.view(-1, 1, self.ninp), feat)
            fake_score = torch.masked_select(torch.exp(fake_dis - right_dis), fake_diff_mask)

            margin_score = F.relu(torch.log(fake_score + 1) - self.margin)
            loss_fake = torch.sum(margin_score)
            loss_dis += loss_fake
            loss_norm += fake.norm()

        loss = (loss_dis + 0.1 * loss_norm) / batch_size
        if fake:
            return loss, loss_fake.data[0] / batch_size
        else:
            return loss
    def norm_flow(self, params, z, v, logposterior):

        h = F.tanh(params[0][0](z))
        mew_ = params[0][1](h)
        sig_ = F.sigmoid(params[0][2](h)+5.) #[PB,Z]


        z_reshaped = z.view(self.P, self.B, self.z_size)

        gradients = torch.autograd.grad(outputs=logposterior(z_reshaped), inputs=z_reshaped,
                          grad_outputs=self.grad_outputs,
                          create_graph=True, retain_graph=True, only_inputs=True)[0]
        gradients = gradients.detach()

        gradients = gradients.view(-1,self.z_size)


        v = v*sig_ + mew_*gradients

        logdet = torch.sum(torch.log(sig_), 1)


        h = F.tanh(params[1][0](v))
        mew_ = params[1][1](h)
        sig_ = F.sigmoid(params[1][2](h)+5.) #[PB,Z]

        z = z*sig_ + mew_*v

        logdet2 = torch.sum(torch.log(sig_), 1)

        #[PB]
        logdet = logdet + logdet2
        
        #[PB,Z], [PB]
        return z, v, logdet
    def pixelcnn_generate(self, z1, z2):
        # Sampling from PixelCNN
        x_zeros = torch.zeros(
            (z1.size(0), self.args.input_size[0], self.args.input_size[1], self.args.input_size[2]))
        if self.args.cuda:
            x_zeros = x_zeros.cuda()

        for i in range(self.args.input_size[1]):
            for j in range(self.args.input_size[2]):
                samples_mean, samples_logvar = self.p_x(Variable(x_zeros, volatile=True), z1, z2)
                samples_mean = samples_mean.view(samples_mean.size(0), self.args.input_size[0], self.args.input_size[1],
                                                 self.args.input_size[2])

                if self.args.input_type == 'binary':
                    probs = samples_mean[:, :, i, j].data
                    x_zeros[:, :, i, j] = torch.bernoulli(probs).float()
                    samples_gen = samples_mean

                elif self.args.input_type == 'gray' or self.args.input_type == 'continuous':
                    binsize = 1. / 256.
                    samples_logvar = samples_logvar.view(samples_mean.size(0), self.args.input_size[0],
                                                         self.args.input_size[1], self.args.input_size[2])
                    means = samples_mean[:, :, i, j].data
                    logvar = samples_logvar[:, :, i, j].data
                    # sample from logistic distribution
                    u = torch.rand(means.size()).cuda()
                    y = torch.log(u) - torch.log(1. - u)
                    sample = means + torch.exp(logvar) * y
                    x_zeros[:, :, i, j] = torch.floor(sample / binsize) * binsize
                    samples_gen = samples_mean

        return samples_gen
    def log_uniform_candidate_sampler(self, targets, choice_func=_choice):
        # returns sampled, true_expected_count, sampled_expected_count
        # targets = (batch_size, )
        #
        #  samples = (n_samples, )
        #  true_expected_count = (batch_size, )
        #  sampled_expected_count = (n_samples, )

        # see: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/range_sampler.h
        # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/range_sampler.cc

        # algorithm: keep track of number of tries when doing sampling,
        #   then expected count is
        #   -expm1(num_tries * log1p(-p))
        # = (1 - (1-p)^num_tries) where p is self._probs[id]

        np_sampled_ids, num_tries = choice_func(self._num_words, self._num_samples)

        sampled_ids = torch.from_numpy(np_sampled_ids).to(targets.device)

        # Compute expected count = (1 - (1-p)^num_tries) = -expm1(num_tries * log1p(-p))
        # P(class) = (log(class + 2) - log(class + 1)) / log(range_max + 1)
        target_probs = torch.log((targets.float() + 2.0) / (targets.float() + 1.0)) / self._log_num_words_p1
        target_expected_count = -1.0 * (torch.exp(num_tries * torch.log1p(-target_probs)) - 1.0)
        sampled_probs = torch.log((sampled_ids.float() + 2.0) /
                                  (sampled_ids.float() + 1.0)) / self._log_num_words_p1
        sampled_expected_count = -1.0 * (torch.exp(num_tries * torch.log1p(-sampled_probs)) - 1.0)

        sampled_ids.requires_grad_(False)
        target_expected_count.requires_grad_(False)
        sampled_expected_count.requires_grad_(False)

        return sampled_ids, target_expected_count, sampled_expected_count
Example #6
0
    def compute_loss(self, outputs, masks, labels):
        """
        Our implementation of weighted BCE loss.
        """
        labels = labels.view(-1)
        masks = masks.view(-1)
        outputs = outputs.view(-1)

        # Generate the weights
        ones = torch.sum(labels)
        total = labels.nelement()
        weights = torch.FloatTensor(outputs.size()).type_as(outputs.data)
        weights[labels.long() == 1] = 1.0 - ones / total
        weights[labels.long() == 0] = ones / total
        weights = weights.view(weights.size(0), 1).expand(weights.size(0), 2)

        # Generate the log outputs
        outputs = outputs.clamp(min=1e-8)
        log_outputs = torch.log(outputs)
        neg_outputs = 1.0 - outputs
        neg_outputs = neg_outputs.clamp(min=1e-8)
        neg_log_outputs = torch.log(neg_outputs)
        all_outputs = torch.cat((log_outputs.view(-1, 1), neg_log_outputs.view(-1, 1)), 1)

        all_values = all_outputs.mul(torch.autograd.Variable(weights))
        all_labels = torch.autograd.Variable(torch.cat((labels.view(-1, 1), (1.0 - labels).view(-1, 1)), 1))
        all_masks = torch.autograd.Variable(torch.cat((masks.view(-1, 1), masks.view(-1, 1)), 1))
        loss = -torch.sum(all_values.mul(all_labels).mul(all_masks)) / outputs.size(0)
        return loss
    def norm_flow(self, params, z, v):

        # print (z.size())
        h = F.tanh(params[0][0](z))
        mew_ = params[0][1](h)
        sig_ = F.sigmoid(params[0][2](h)+5.) #[PB,Z]

        # print (v.size())
        # print (mew_.size())
        # print (self.B)
        # print (self.P)

        v = v*sig_ + mew_

        logdet = torch.sum(torch.log(sig_), 1)


        h = F.tanh(params[1][0](v))
        mew_ = params[1][1](h)
        sig_ = F.sigmoid(params[1][2](h)+5.) #[PB,Z]

        z = z*sig_ + mew_

        logdet2 = torch.sum(torch.log(sig_), 1)

        #[PB]
        logdet = logdet + logdet2
        
        #[PB,Z], [PB]
        return z, v, logdet
Example #8
0
    def reverse_flow(self, z):

        B = z.shape[0]
        C = z.shape[1]
        f = self.flows

        logdet = 0.
        reverse_ = list(range(self.n_flows))[::-1]
        for i in reverse_:
            z1 = z[:,:C//2]
            z2 = z[:,C//2:]
            sig1 = torch.sigmoid(f[str(i)]['f2_sig'](z1))
            mu1 = f[str(i)]['f2_mu'](z1)

            z2 = (z2 - mu1) / sig1

            sig2 = torch.sigmoid(f[str(i)]['f1_sig'](z2))
            mu2 = f[str(i)]['f1_mu'](z2)

            z1 = (z1 - mu2) / sig2
            
            z = torch.cat([z1,z2],1)
            z = z[:,f[str(i)]['inv_perm']]

            sig1 = sig1.view(B, -1)
            sig2 = sig2.view(B, -1)
            logdet += torch.sum(torch.log(sig1), 1)
            logdet += torch.sum(torch.log(sig2), 1)

        return z, logdet
Example #9
0
    def sample_relax(logits, surrogate):
        cat = Categorical(logits=logits)
        u = torch.rand(B,C).clamp(1e-10, 1.-1e-10).cuda()
        gumbels = -torch.log(-torch.log(u))
        z = logits + gumbels
        b = torch.argmax(z, dim=1) #.view(B,1)
        logprob = cat.log_prob(b).view(B,1)


        # czs = []
        # for j in range(1):
        #     z = sample_relax_z(logits)
        #     surr_input = torch.cat([z, x, logits.detach()], dim=1)
        #     cz = surrogate.net(surr_input)
        #     czs.append(cz)
        # czs = torch.stack(czs)
        # cz = torch.mean(czs, dim=0)#.view(1,1)
        surr_input = torch.cat([z, x, logits.detach()], dim=1)
        cz = surrogate.net(surr_input)


        cz_tildes = []
        for j in range(1):
            z_tilde = sample_relax_given_b(logits, b)
            surr_input = torch.cat([z_tilde, x, logits.detach()], dim=1)
            cz_tilde = surrogate.net(surr_input)
            cz_tildes.append(cz_tilde)
        cz_tildes = torch.stack(cz_tildes)
        cz_tilde = torch.mean(cz_tildes, dim=0) #.view(B,1)

        return b, logprob, cz, cz_tilde
Example #10
0
    def forward_flow(self, z, xenc):

        B = z.shape[0]
        C = z.shape[1]
        f = self.flows
        logdet = 0.
        for i in range(self.n_flows):
            z = z[:,f[str(i)]['perm']]
            z1 = z[:,:C//2]
            z2 = z[:,C//2:]

            sig2 = torch.sigmoid(f[str(i)]['f1_sig'](torch.cat([z2,xenc],1)))
            mu2 = f[str(i)]['f1_mu'](torch.cat([z2,xenc],1))

            z1 = z1*sig2 + mu2

            mu1 = f[str(i)]['f2_mu'](torch.cat([z1,xenc],1))
            sig1 = torch.sigmoid(f[str(i)]['f2_sig'](torch.cat([z1,xenc],1)))

            z2 = z2*sig1 + mu1
            z = torch.cat([z1,z2],1)

            sig1 = sig1.view(B, -1)
            sig2 = sig2.view(B, -1)
            logdet += torch.sum(torch.log(sig1), 1)
            logdet += torch.sum(torch.log(sig2), 1)

        return z, logdet
    def predictive_elbo(self, x, k, s):
        # No pW or qW

        self.B = x.size()[0] #batch size
        # self.k = k  #number of z samples aka particles P
        # self.s = s  #number of W samples

        elbo1s = []
        for i in range(s):

            Ws, logpW, logqW = self.sample_W()  #_ , [1], [1]

            mu, logvar = self.encode(x)  #[B,Z]
            z, logpz, logqz = self.sample_z(mu, logvar, k=k) #[P,B,Z], [P,B]

            x_hat = self.decode(Ws, z) #[P,B,X]
            logpx = log_bernoulli(x_hat, x)  #[P,B]

            elbo = logpx + logpz - logqz #[P,B]
            if k>1:
                max_ = torch.max(elbo, 0)[0] #[B]
                elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B]
            # elbo1 = elbo1 #+ (logpW - logqW)*.00000001 #[B], logp(x|W)p(w)/q(w)
            elbo1s.append(elbo)

        elbo1s = torch.stack(elbo1s) #[S,B]
        if s>1:
            max_ = torch.max(elbo1s, 0)[0] #[B]
            elbo1 = torch.log(torch.mean(torch.exp(elbo1s - max_), 0)) + max_ #[B]            

        elbo = torch.mean(elbo1s) #[1]
        return elbo#, logprobs2[0], logprobs2[1], logprobs2[2], logprobs2[3], logprobs2[4]
Example #12
0
File: srnn.py Project: gsig/srnn
    def forward(self, input):
        n = len(input)
        embeds = self.input_fun(input)

        # pick first node
        scores = self.scores(embeds, 0, 0)
        choice = self.choice(n, scores)
        picks = [choice]
        loss = -torch.log(scores[choice]) / n
        outputs = []

        for i, e in enumerate(embeds):
            outputs.append(self.output)
            if picks[-1] > i:
                # skip elements until next node
                continue
            lstm_out, self.hidden = self.lstm(e.view(1, 1, -1), self.hidden)
            self.output = self.output_fun(lstm_out.view(1, -1))

            if len(picks) < self.subset:
                # pick next node
                scores = self.scores(embeds, len(picks), i + 1)
                choice = self.choice(n, scores)
                picks.append(choice)
                loss -= torch.log(scores[choice]) / (n - i)
        return loss, outputs, picks
Example #13
0
def train(ep):
    model.train()
    total_loss = 0
    count = 0
    train_idx_list = np.arange(len(X_train), dtype="int32")
    np.random.shuffle(train_idx_list)
    for idx in train_idx_list:
        data_line = X_train[idx]
        x, y = Variable(data_line[:-1]), Variable(data_line[1:])
        if args.cuda:
            x, y = x.cuda(), y.cuda()

        optimizer.zero_grad()
        output = model(x.unsqueeze(0)).squeeze(0)
        loss = -torch.trace(torch.matmul(y, torch.log(output).float().t()) +
                            torch.matmul((1 - y), torch.log(1 - output).float().t()))
        total_loss += loss.data[0]
        count += output.size(0)

        if args.clip > 0:
            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        loss.backward()
        optimizer.step()
        if idx > 0 and idx % args.log_interval == 0:
            cur_loss = total_loss / count
            print("Epoch {:2d} | lr {:.5f} | loss {:.5f}".format(ep, lr, cur_loss))
            total_loss = 0.0
            count = 0
Example #14
0
def sample_from_discretized_mix_logistic_1d(l, nr_mix):
    # Pytorch ordering
    l = l.permute(0, 2, 3, 1)
    ls = [int(y) for y in l.size()]
    xs = ls[:-1] + [1] #[3]

    # unpack parameters
    logit_probs = l[:, :, :, :nr_mix]
    l = l[:, :, :, nr_mix:].contiguous().view(xs + [nr_mix * 2]) # for mean, scale

    # sample mixture indicator from softmax
    temp = torch.FloatTensor(logit_probs.size())
    if l.is_cuda : temp = temp.cuda()
    temp.uniform_(1e-5, 1. - 1e-5)
    temp = logit_probs.data - torch.log(- torch.log(temp))
    _, argmax = temp.max(dim=3)
   
    one_hot = to_one_hot(argmax, nr_mix)
    sel = one_hot.view(xs[:-1] + [1, nr_mix])
    # select logistic parameters
    means = torch.sum(l[:, :, :, :, :nr_mix] * sel, dim=4) 
    log_scales = torch.clamp(torch.sum(
        l[:, :, :, :, nr_mix:2 * nr_mix] * sel, dim=4), min=-7.)
    u = torch.FloatTensor(means.size())
    if l.is_cuda : u = u.cuda()
    u.uniform_(1e-5, 1. - 1e-5)
    u = Variable(u)
    x = means + torch.exp(log_scales) * (torch.log(u) - torch.log(1. - u))
    x0 = torch.clamp(torch.clamp(x[:, :, :, 0], min=-1.), max=1.)
    out = x0.unsqueeze(1)
    return out
Example #15
0
def relax_grad2(x, logits, b, surrogate, mixtureweights):
    B = logits.shape[0]
    C = logits.shape[1]

    cat = Categorical(logits=logits)
    # u = torch.rand(B,C).clamp(1e-10, 1.-1e-10).cuda()
    u = myclamp(torch.rand(B,C).cuda())
    gumbels = -torch.log(-torch.log(u))
    z = logits + gumbels
    # b = torch.argmax(z, dim=1) #.view(B,1)
    logq = cat.log_prob(b).view(B,1)

    surr_input = torch.cat([z, x, logits.detach()], dim=1)
    cz = surrogate.net(surr_input)

    z_tilde = sample_relax_given_b(logits, b)
    surr_input = torch.cat([z_tilde, x, logits.detach()], dim=1)
    cz_tilde = surrogate.net(surr_input)

    logpx_given_z = logprob_undercomponent(x, component=b)
    logpz = torch.log(mixtureweights[b]).view(B,1)
    logpxz = logpx_given_z + logpz #[B,1]

    f = logpxz - logq 
    net_loss = - torch.mean( (f.detach() - cz_tilde.detach()) * logq - logq +  cz - cz_tilde )

    grad = torch.autograd.grad([net_loss], [logits], create_graph=True, retain_graph=True)[0] #[B,C]
    pb = torch.exp(logq)

    return grad, pb
Example #16
0
def get_probs_and_logits(ps=None, logits=None, is_multidimensional=True):
    """
    Convert probability values to logits, or vice-versa. Either ``ps`` or
    ``logits`` should be specified, but not both.

    :param ps: tensor of probabilities. Should be in the interval *[0, 1]*.
        If, ``is_multidimensional = True``, then must be normalized along
        axis -1.
    :param logits: tensor of logit values.  For the multidimensional case,
        the values, when exponentiated along the last dimension, must sum
        to 1.
    :param is_multidimensional: determines the computation of ps from logits,
        and vice-versa. For the multi-dimensional case, logit values are
        assumed to be log probabilities, whereas for the uni-dimensional case,
        it specifically refers to log odds.
    :return: tuple containing raw probabilities and logits as tensors.
    """
    assert (ps is None) != (logits is None)
    if ps is not None:
        eps = _get_clamping_buffer(ps)
        ps_clamped = ps.clamp(min=eps, max=1 - eps)
    if is_multidimensional:
        if ps is None:
            ps = softmax(logits, -1)
        else:
            logits = torch.log(ps_clamped)
    else:
        if ps is None:
            ps = F.sigmoid(logits)
        else:
            logits = torch.log(ps_clamped) - torch.log1p(-ps_clamped)
    return ps, logits
Example #17
0
def poisson_nll_loss(input, target, log_input=True, full=False, size_average=True):
    r"""Poisson negative log likelihood loss.

    See :class:`~torch.nn.PoissonNLLLoss` for details.

    Args:
        input: expectation of underlying Poisson distribution.
        target: random sample :math:`target \sim Pois(input)`.
        log_input: if True the loss is computed as
            `exp(input) - target * input`, if False then loss is
            `input - target * log(input)`. Default: True
        full: whether to compute full loss, i. e. to add the Stirling
            approximation term. Default: False
            `target * log(target) - target + 0.5 * log(2 * pi * target)`.
        size_average: By default, the losses are averaged over observations for
            each minibatch. However, if the field sizeAverage is set to False,
            the losses are instead summed for each minibatch. Default: True
    """
    if log_input:
        loss = torch.exp(input) - target * input
    else:
        loss = input - target * torch.log(input)
    if full:
        mask = target > 1
        loss[mask] += (target * torch.log(target) - target + 0.5 * torch.log(2 * math.pi * target))[mask]
    if size_average:
        return torch.mean(loss)
    else:
        return torch.sum(loss)
Example #18
0
 def _mu_law(self, x):
     m = self._variable(torch.FloatTensor(1))
     m[:] = self.n_categories + 1
     s = torch.sign(x)
     x = torch.abs(x)
     x = s * (torch.log(1 + (self.n_categories * x)) / torch.log(m))
     return x
Example #19
0
def bbox_transform(anchor_rois, gt_rois):
    """

    :param anchor_rois <torch.Tensor>:  
    :param gt_rois <torch.Tensor>:
    :return:
    """
    anchor_widths  = anchor_rois[:, 3] - anchor_rois[:, 0]
    anchor_heights = anchor_rois[:, 4] - anchor_rois[:, 1]
    anchor_lengths = anchor_rois[:, 5] - anchor_rois[:, 2]

    anchor_ctr_x = anchor_rois[:, 0] + 0.5 * anchor_widths
    anchor_ctr_y = anchor_rois[:, 1] + 0.5 * anchor_heights
    anchor_ctr_z = anchor_rois[:, 2] + 0.5 * anchor_lengths

    gt_widths  = gt_rois[:, 3] - gt_rois[:, 0]
    gt_heights = gt_rois[:, 4] - gt_rois[:, 1]
    gt_lengths = gt_rois[:, 5] - gt_rois[:, 2]

    gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
    gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
    gt_ctr_z = gt_rois[:, 2] + 0.5 * gt_lengths

    targets_dx = (gt_ctr_x - anchor_ctr_x) / (anchor_widths + 1e-14)
    targets_dy = (gt_ctr_y - anchor_ctr_y) / (anchor_heights + 1e-14)
    targets_dz = (gt_ctr_z - anchor_ctr_z) / (anchor_lengths + 1e-14)

    targets_dw = torch.log(gt_widths  / (anchor_widths  + 1e-14) + 1e-14)
    targets_dh = torch.log(gt_heights / (anchor_heights + 1e-14) + 1e-14)
    targets_dl = torch.log(gt_lengths / (anchor_lengths + 1e-14) + 1e-14)

    targets = torch.stack([targets_dx, targets_dy, targets_dz, targets_dw, targets_dh, targets_dl], 1)

    return targets
Example #20
0
def log_Bernoulli(x, mean, average=False, dim=None):
    probs = torch.clamp( mean, min=min_epsilon, max=max_epsilon )
    log_bernoulli = x * torch.log( probs ) + (1. - x ) * torch.log( 1. - probs )
    if average:
        return torch.mean( log_bernoulli, dim )
    else:
        return torch.sum( log_bernoulli, dim )
Example #21
0
def sample_relax_z(logits):
    B = logits.shape[0]
    C = logits.shape[1]

    u = torch.rand(B,C).clamp(1e-10, 1.-1e-10).cuda()
    gumbels = -torch.log(-torch.log(u))
    z = logits + gumbels
    return z
Example #22
0
def compute_stuff(mask_chosen, scores, weights, volumes):
    bs = weights.size(0)
    mask_chosen = Variable(mask_chosen.float())
    probs = 1e-6 + (1-2e-6) * F.softmax(scores)
    lgp = (torch.log(probs) * mask_chosen + torch.log(1-probs) * (1-mask_chosen)).sum(1)
    w = (weights * mask_chosen).sum(1)
    v = (volumes * mask_chosen).sum(1)
    return lgp, w, v
Example #23
0
def optimize_cnt(worm_img, skel_prev, skel_width, segment_length,  n_epochs = 1000):
    
    
    #this is the variable that is going t obe modified
    skel_r = skel_prev.data #+ torch.zeros(*skel_prev.size()).normal_()
    skel_r = torch.nn.Parameter(skel_r)
    
    optimizer = optim.Adam([skel_r], lr=0.1)
    for ii in range(n_epochs):
        skel_map = get_skel_map(skel_r, skel_width)
        #skel_map += 1e-3
        
        p_w = (skel_map*worm_img)
        
        skel_map_inv = (-skel_map).add_(1)
        worm_img_inv = (-worm_img).add_(1)
        p_bng = (skel_map_inv*worm_img_inv)
        
        #p_bng = torch.sqrt(p_bng)
        
        
        #c_loss = F.binary_cross_entropy(p_w, p_bng)
        c_loss = -(p_bng*torch.log(p_w + 1.e-3) + p_w*torch.log(p_bng + 1.e-3)).mean()
        
        ds = skel_r[1:] - skel_r[:-1]
        dds = ds[1:] - ds[:-1]
        #seg_mean = seg_sizes.mean()
        
        cont_loss = ds.norm(p=2)
        curv_loss = dds.norm(p=2)
        
        seg_sizes = ((ds).pow(2)).sum(1).sqrt()
        d1 = seg_sizes-segment_length*0.9
        d2 = seg_sizes-segment_length*1.5
        seg_loss = (torch.exp(-d1) + torch.exp(d2)).mean()
        
        
        #(seg_sizes-segment_length).cosh().mean()
        #seg_loss = ((seg_sizes - segment_length)).cosh().mean()
        #seg_mean_loss = ((seg_mean-seg_sizes).abs() + 1e-5).mean()
        
        loss = 100*c_loss + 50*seg_loss + cont_loss +  curv_loss
        #loss = 50*c_loss + seg_loss
        optimizer.zero_grad()
        loss.backward()
        
        #torch.nn.utils.clip_grad_norm([skel_r], 0.001)
        optimizer.step()
        
        if ii % 250 == 0:
            print(ii,
                  loss.data[0], 
                  c_loss.data[0],
                  seg_loss.data[0], 
                  cont_loss.data[0],
                  curv_loss.data[0]
                  )
    return skel_r, skel_map
Example #24
0
def categorical(mean, temp):
    g = -torch.log(1e-10 - torch.log(1e-10+Variable(mean.data.new(mean.size()).uniform_())))
    if mean.ndim != 3:
        return F.softmax((torch.log(mean + 1e-10) + g)/temp)
    else:
        shape = (mean.size()[0] * mean.size()[1], mean.size(2))
        samples = F.softmax(((torch.log(mean + 1e-10) + g)/temp).view(shape))

        return samples.view_as(mean)
Example #25
0
    def sample_relax_given_b(logits, b):

        u_b = torch.rand(B,1).clamp(1e-10, 1.-1e-10).cuda()
        z_tilde_b = -torch.log(-torch.log(u_b))

        u = torch.rand(B,C).clamp(1e-10, 1.-1e-10).cuda()
        z_tilde = -torch.log((- torch.log(u) / torch.softmax(logits,dim=1)) - torch.log(u_b))
        z_tilde.scatter_(dim=1, index=b.view(B,1), src=z_tilde_b)

        return z_tilde
Example #26
0
 def batch_log_pdf(self, x):
     """
     Ref: :py:meth:`pyro.distributions.distribution.Distribution.batch_log_pdf`
     """
     a = self.a.expand(self.shape(x))
     b = self.b.expand(self.shape(x))
     lb = x.ge(a).type_as(a)
     ub = x.le(b).type_as(b)
     batch_log_pdf_shape = self.batch_shape(x) + (1,)
     return torch.sum(torch.log(lb.mul(ub)) - torch.log(b - a), -1).contiguous().view(batch_log_pdf_shape)
Example #27
0
    def forward(self, prob, targets, infos, wt=None):
        prob = prob.clamp(min=1e-7, max=1-1e-7)
        if wt is None:
            wt1 = torch.ones_like(prob)
        if config.TRAIN.CE_LOSS_WEIGHTED and self.pos_wt is not None:
            wt1 = wt * (targets.detach() * self.pos_wt + (1-targets.detach()) * self.neg_wt)

        loss = -torch.mean(wt1 * (torch.log(prob) * targets + torch.log(1-prob) * (1-targets)))

        return loss
Example #28
0
    def custom_cross_entropy(x, y):
        sigmoid_x = torch.sigmoid(x)
        sigmoid_x2 = torch.sigmoid(x ** 2)
        neg_log_sigmoid_x = -1 * torch.log(sigmoid_x)
        neg_log_1_minus_sigmoid_x2 = -1 * torch.log(1 - sigmoid_x2)

        l1 = torch.mul(y, neg_log_sigmoid_x)
        l2 = torch.mul(1 - y, neg_log_1_minus_sigmoid_x2)

        return torch.sum(l1 + l2)
Example #29
0
    def forward(self, x=None, warmup=1., inf_net=None): #, k=1): #, marginf_type=0):

        outputs = {}
        B = x.shape[0]

        if inf_net is None:
            # mu, logvar = self.inference_net(x)
            z, logits = self.q.sample(x) 
        else:
            # mu, logvar = inf_net.inference_net(x)   
            z, logqz = inf_net.sample(x) 

        # print (z[0])
        # b = harden(z)
        # print (b[0])
        
        # logpz = torch.sum( self.prior.log_prob(b), dim=1)

        # print (logpz[0])
        # print (logpz.shape)
        # fdasf

        probs_q = torch.sigmoid(logits)
        probs_q = torch.clamp(probs_q, min=.00000001, max=.9999999)
        probs_p = torch.ones(B, self.z_size).cuda() *.5
        KL = probs_q*torch.log(probs_q/probs_p) + (1-probs_q)*torch.log((1-probs_q)/(1-probs_p))
        KL = torch.sum(KL, dim=1)

        # print (z.shape)
        # Decode Image
        x_hat = self.generator.forward(z)
        alpha = torch.sigmoid(x_hat)
        beta = Beta(alpha*self.beta_scale, (1.-alpha)*self.beta_scale)
        x_noise = torch.clamp(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda(), min=1e-5, max=1-1e-5)
        logpx = beta.log_prob(x_noise) #[120,3,112,112]  # add uniform noise here

        logpx = torch.sum(logpx.view(B, -1),1) # [PB]  * self.w_logpx

        # print (logpx.shape,logpz.shape,logqz.shape)
        # fsdfda

        log_ws = logpx - KL #+ logpz - logqz

        outputs['logpx'] = torch.mean(logpx)
        outputs['x_recon'] = alpha
        # outputs['welbo'] = torch.mean(logpx + warmup*( logpz - logqz))
        outputs['welbo'] = torch.mean(logpx + warmup*(KL))
        outputs['elbo'] = torch.mean(log_ws)
        outputs['logws'] = log_ws
        outputs['z'] = z
        outputs['logpz'] = torch.zeros(1) #torch.mean(logpz)
        outputs['logqz'] = torch.mean(KL)
        # outputs['logvar'] = logvar

        return outputs
Example #30
0
def probs_to_logits(probs, is_binary=False):
    r"""
    Converts a tensor of probabilities into logits. For the binary case,
    this denotes the probability of occurrence of the event indexed by `1`.
    For the multi-dimensional case, the values along the last dimension
    denote the probabilities of occurrence of each of the events.
    """
    ps_clamped = clamp_probs(probs)
    if is_binary:
        return torch.log(ps_clamped) - torch.log1p(-ps_clamped)
    return torch.log(ps_clamped)
Example #31
0
    def forward(self,
                inputs_NTF,
                seq_lens_N=None,
                pad_val=0,
                return_hiddens=False):
        ''' Forward pass of input data through NN module

        Cleanly handles variable-length sequences (though internals a bit messy).

        Args
        ----
        inputs_NTF : 3D array (n_sequences, n_timesteps, n_features)
            Each row is one sequence, padded to length T = n_timesteps
        seq_lens_N : 1D array-like (n_sequences)
            Each entry indicates how many timesteps the n-th sequence has.
            (Remaining entries are all padding and should be ignored).

        Returns
        -------
        yproba_N2 : 2D array (n_sequences, 2)
            Each row gives probability that given sequence is class 0 or 1
            Each row sums to one
        
        hiddens_NTH : 3D array (n_sequences, n_timesteps, n_hiddens)
            Each (n,t) index gives the hidden-state vector at sequence n, timestep t
        '''
        N, T, F = inputs_NTF.shape

        if seq_lens_N is None:
            seq_lens_N = torch.zeros(N, dtype=torch.int64)
            for n in range(N):
                bmask_T = torch.all(inputs_NTF[n] == pad_val, dim=-1)
                seq_lens_N[n] = np.searchsorted(bmask_T, 1)

        ## Create PackedSequence representation to handle variable-length sequences
        # Requires sorting all sequences in current batch in descending order by length
        sorted_seq_lens_N, ids_N = seq_lens_N.sort(0, descending=True)
        _, rev_ids_N = ids_N.sort(0, descending=False)
        sorted_inputs_NTF = inputs_NTF[ids_N]
        packed_inputs_PF = nn.utils.rnn.pack_padded_sequence(sorted_inputs_NTF,
                                                             sorted_seq_lens_N,
                                                             batch_first=True)

        # Apply dropout to the non-recurrent layer weights between LSTM layers before output ie is weights for h_(l-1)^t
        # See https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM for choosing the right weights
        if (self.dropout_proba_non_recurrent > 0.0
                and self.rnn.num_layers > 1):
            dropout = nn.Dropout(p=self.dropout_proba_non_recurrent)
            self.rnn.weight_ih_l1 = torch.nn.Parameter(dropout(
                self.rnn.weight_ih_l1),
                                                       requires_grad=True)
            self.rnn.bias_ih_l1 = torch.nn.Parameter(dropout(
                self.rnn.bias_ih_l1),
                                                     requires_grad=True)

        # Apply the RNN
        if (self.convert_to_log_reg == False):
            packed_outputs_PH, _ = self.rnn(packed_inputs_PF)
            # Unpack to N x T x H padded representation
            outputs_NTH, _ = nn.utils.rnn.pad_packed_sequence(
                packed_outputs_PH, batch_first=True)
            # Apply weights + softmax to final timestep of each sequence
            end_hiddens_NH = outputs_NTH[range(N), sorted_seq_lens_N - 1]
            yproba_N2 = nn.functional.softmax(self.output(end_hiddens_NH),
                                              dim=-1)
            #yproba_N2 = nn.functional.logsigmoid(self.output(end_hiddens_NH))
            # Unsort and return
            if return_hiddens:
                return yproba_N2.index_select(
                    0, rev_ids_N), outputs_NTH.index_select(0, rev_ids_N)
            else:
                return yproba_N2.index_select(0, rev_ids_N)

        else:  # convert to logistic regression
            assert (
                self.rnn.hidden_size == F
            ), "Number of hidden units must equal number of input features for conversion to logistic regression!"

            if (
                    self.first_pass == False
            ):  # weird handling of validation set of gridsearchcv and validation set of LSTM object
                if (N != self.ht.shape[1]) & (N != self.htval.shape[1]):
                    init_weights_for_logistic_regression_conversion(self.rnn)
                    self.first_pass = True

            # set end hidden layer output to be same as input for logistic regression conversion
            h0 = torch.zeros(self.rnn.num_layers, N,
                             self.rnn.hidden_size).double()
            c0 = torch.ones(self.rnn.num_layers, N,
                            self.rnn.hidden_size).double()
            if (self.first_pass) & (self.training):
                packed_outputs_PH, (self.ht, self.ct) = self.rnn(
                    packed_inputs_PF, (h0, c0))
            elif (self.first_pass == False) & (self.training):
                packed_outputs_PH, (self.ht, self.ct) = self.rnn(
                    packed_inputs_PF, (self.ht, self.ct))
            elif (self.first_pass) & (self.training == False):  # eval mode
                packed_outputs_PH, (self.htval, self.ctval) = self.rnn(
                    packed_inputs_PF, (h0, c0))
                self.first_pass = False
            elif (self.first_pass == False) & (self.training == False):
                packed_outputs_PH, (self.htval, self.ctval) = self.rnn(
                    packed_inputs_PF, (self.htval, self.ctval))
            outputs_NTH, _ = nn.utils.rnn.pad_packed_sequence(
                packed_outputs_PH, batch_first=True)
            outputs_NTH = torch.log(
                outputs_NTH / (1 - outputs_NTH)
            )  # inverse sigmoid the output of hidden units to get back input features
            outputs_NTH[torch.isinf(
                outputs_NTH)] = 0  # remove inf's from sigmoid inversion
            end_hiddens_NH = outputs_NTH[range(N), sorted_seq_lens_N - 1]
            yproba_N2 = nn.functional.logsigmoid(
                self.output(end_hiddens_NH)).index_select(0, rev_ids_N)
            return yproba_N2
Example #32
0
    def forward(self, x):
        #syage 0

        # x is prediction of resnet, which will be ignored
        # xr1 to xr4 are the resnet feature spaces beginning on top.
        x, xr1, xr2, xr3, xr4 = self.pretrained_resnet(x)

        if self.mode is not None:
            if self.mode == 'mode1':
                #reduce resnet filter, than upsample and concat with other reduced resnet features
                x_redu_4 = self.onexone1(xr4)
                x_redu_3 = self.onexone2(xr3)
                x_redu_2 = self.onexone3(xr2)
                x_redu_1 = self.onexone4(xr1)

                x_up_1 = self.upsample(x_redu_4)
                x_cat_1 = torch.cat((x_up_1, x_redu_3), dim=1)

                x_up_2 = self.upsample(x_cat_1)
                x_cat_2 = torch.cat((x_up_2, x_redu_2), dim=1)

                x_up_3 = self.upsample(x_cat_2)
                x_cat_3 = torch.cat((x_up_3, x_redu_1), dim=1)

                reduced = self.onexone(x_cat_3)

            elif self.mode == 'mode2':
                # reduce resnet filter, than upsample and concat with other reduced resnet features
                x_redu_4 = self.onexone1(xr4)
                x_redu_3 = self.onexone2(xr3)
                x_redu_2 = self.onexone3(xr2)
                x_redu_1 = self.onexone4(xr1)

                x_up_1 = self.upsample(x_redu_4)
                x_cat_1 = torch.cat((x_up_1, x_redu_3), dim=1)

                x_up_2 = self.upsample(x_cat_1)
                x_cat_2 = torch.cat((x_up_2, x_redu_2), dim=1)

                x_up_3 = self.upsample(x_cat_2)
                x_cat_3 = torch.cat((x_up_3, x_redu_1), dim=1)

                reduced = self.onexone(x_cat_3)
        else:
            #upsample
            xu1 = self.upsample(xr4)
            cat1 = torch.cat((xu1, xr3), dim=1)


            #upsample 2
            xu2 = self.upsample(cat1)
            cat2 = torch.cat((xu2, xr2), dim=1)


            #upsample 3
            xu3 = self.upsample(cat2)
            cat3 = torch.cat((xu3, xr1), dim=1)


            # final 1x1 convolution to reduce channels to num_classes
            reduced = self.onexone(cat3)

        # final sigmoid layer before the saliency maps
        # todo: change pooling parameters
        saliency_map = self.sigmoid(reduced)
        # saliency_map = self.softmax(reduced)

        class_scores = self.pooling(saliency_map)

        norm_scores = self.norm(class_scores)
        # # insert normalization layer for the class scores such that sum = 1 and in [0,1]
        #
        # # log scores for Nllloss on RSNA
        log_scores = torch.log(norm_scores + 1e-8)

        return(saliency_map, log_scores)
Example #33
0
def gaussian_entropy(std):
    log_std = torch.log(std)
    norm = autograd.Variable(torch.Tensor([2*np.pi]))
    return 0.5 * len(std) * (1.0 + torch.log(norm)) + torch.sum(log_std)
Example #34
0
def binary_cross_entropy(recon_x, x):
    return -torch.sum(x * torch.log(recon_x + 1e-8) +
                      (1 - x) * torch.log(1 - recon_x + 1e-8),
                      dim=-1)
 def get_entropy(self):
     logps = F.softmax(self.gen_matrix, 2)
     result = torch.mean(torch.sum(logps * torch.log(logps + 1e-10), 1))
     result = result.cpu() if use_cuda else result
     return (-result.data.numpy())
Example #36
0
 def entropy(output):
     output = torch.clamp(output, 1e-5, 1 - 1e-5)
     entropy = -output * torch.log(output)
     return torch.mean(entropy)
Example #37
0
            s_lst.append(s)
            a_lst.append(a)
            r_lst.append(r / 100.0)
            mask_lst.append(1 - done)

            s = s_prime
            step_idx += 1

        s_final = torch.from_numpy(s_prime).float()
        v_final = model.v(s_final).detach().clone().numpy()
        td_target = compute_target(v_final, r_lst, mask_lst)

        td_target_vec = td_target.reshape(-1)
        s_vec = torch.tensor(s_lst).float().reshape(
            -1, 3)  # 4 == Dimension of state
        a_vec = torch.tensor(a_lst).reshape(-1).unsqueeze(1)
        advantage = td_target_vec - model.v(s_vec).reshape(-1)

        pi = model.pi(s_vec, softmax_dim=1)
        pi_a = pi.gather(1, a_vec).reshape(-1)
        loss = -(torch.log(pi_a) * advantage.detach()).mean() +\
            F.smooth_l1_loss(model.v(s_vec).reshape(-1), td_target_vec)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step_idx % PRINT_INTERVAL == 0:
            test(step_idx, model)

    envs.close()
Example #38
0
    def train(self, model, data):
        tot_loss_lst = []
        pi_loss_lst = []
        entropy_lst = []
        move_entropy_lst = []
        v_loss_lst = []

        # to calculate fixed advantages before update
        data_with_adv = []
        for mini_batch in data:
            s, a, m, r, s_prime, done_mask, prob, need_move = mini_batch
            with torch.no_grad():
                pi, pi_move, v, _ = model(s)
                pi_prime, pi_m_prime, v_prime, _ = model(s_prime)

            td_target = r + self.gamma * v_prime * done_mask
            delta = td_target - v                           # [horizon * batch_size * 1]
            delta = delta.detach().cpu().numpy()

            advantage_lst = []
            advantage = np.array([0])
            for delta_t in delta[::-1]:
                advantage = self.gamma * self.lmbda * advantage + delta_t           
                advantage_lst.append(advantage)
            advantage_lst.reverse()
            advantage = torch.tensor(advantage_lst, dtype=torch.float, device=model.device)

            data_with_adv.append((s, a, m, r, s_prime, done_mask, prob, need_move, td_target, advantage))

        for i in range(self.K_epoch):
            for mini_batch in data_with_adv:
                s, a, m, r, s_prime, done_mask, prob, need_move, td_target, advantage = mini_batch
                pi, pi_move, v, _ = model(s)
                pi_prime, pi_m_prime, v_prime, _ = model(s_prime)

                pi_a = pi.gather(2,a)
                pi_m = pi_move.gather(2,m)
                pi_am = pi_a*(1-need_move + need_move*pi_m)
                ratio = torch.exp(torch.log(pi_am) - torch.log(prob))  # a/b == exp(log(a)-log(b))

                surr1 = ratio * advantage
                surr2 = torch.clamp(ratio, 1-self.eps_clip, 1+self.eps_clip) * advantage
                entropy = -torch.log(pi_am)
                move_entropy = -need_move*torch.log(pi_m)

                surr_loss = -torch.min(surr1, surr2)
                v_loss = F.smooth_l1_loss(v, td_target.detach())
                entropy_loss = -1*self.entropy_coef*entropy
                loss = surr_loss + v_loss + entropy_loss.mean()
                loss = loss.mean()

                model.optimizer.zero_grad()
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), self.grad_clip)
                model.optimizer.step()

                tot_loss_lst.append(loss.item())
                pi_loss_lst.append(surr_loss.mean().item())
                v_loss_lst.append(v_loss.item())
                entropy_lst.append(entropy.mean().item())
                n_need_move = torch.sum(need_move).item()
                if n_need_move == 0:
                    move_entropy_lst.append(0)
                else:
                    move_entropy_lst.append((torch.sum(move_entropy)/n_need_move).item())
        return np.mean(tot_loss_lst), np.mean(pi_loss_lst), np.mean(v_loss_lst), np.mean(entropy_lst), np.mean(move_entropy_lst)
Example #39
0
    def _model_scores(self, r_src, r_trg, mask_mat, args):
        '''
        Compute the NCE scores for predicting r_src->r_trg.

        Input:
          r_src    : (n_batch_gpu, n_rkhs)
          r_trg    : (n_rkhs, n_batch * n_locs)
          mask_mat : (n_batch_gpu, n_batch)
        Output:
          raw_scores : (n_batch_gpu, n_locs)
          scores : (n_batch_gpu, n_locs)
          lgt_reg    : scalar
        '''
        # n_batch_gpu * n_gpu = n_batch, cuda:0 is not used if gpu >=4
        n_batch_gpu = mask_mat.size(0)
        n_batch = mask_mat.size(1)
        n_locs = r_trg.size(1) // n_batch
        n_rkhs = r_src.size(1)
        # reshape mask_mat for ease-of-use
        mask_pos = mask_mat.unsqueeze(dim=2).expand(-1, -1, n_locs).float()
        mask_neg = 1. - mask_pos

        # compute src->trg raw scores for batch on this gpu
        raw_scores = torch.mm(r_src, r_trg).float()
        raw_scores = raw_scores.reshape(n_batch_gpu, n_batch, n_locs)
        if args.rkhs:
            raw_scores = raw_scores / n_rkhs**0.5
        if args.l2_reg:
            lgt_reg = args.l2_reg * (raw_scores**2.).mean()
        else:
            lgt_reg = torch.tensor(0.0).to(device=raw_scores.device)
        if args.use_tanh_clip:
            assert args.hard_clamping == None
            raw_scores = tanh_clip(raw_scores, clip_val=self.tclip)
        if args.hard_clamping is not None:
            assert args.use_tanh_clip == False
            raw_scores = torch.clamp(raw_scores,
                                     min=args.hard_clamping[0],
                                     max=args.hard_clamping[1])
        # If using our proposed objective:
        if args.loss == "ours":
            '''
            start calculating scores. 
            pos_scores includes scores for all the positive samples
            neg_scores includes scores for all the negative samples
            pos mean is mean over positive samples
            neg mean is mean over negative samples
            '''
            raw_scores_squared = raw_scores**2
            # (n_batch_gpu, 1)
            pos_scores = ((mask_pos * raw_scores).sum(dim=(1, 2))) / n_locs
            # (n_batch_gpu, 1)
            neg_scores = ((mask_neg * raw_scores_squared).sum(dim=(1, 2))) / (
                (n_batch - 1) * n_locs)
            # relative density ratio to help stability
            if not args.relative_ratio == 0.0:
                """COMMAND SHOULD BE ADDED HERE"""
                pos_scores_squared = (
                    (mask_pos * raw_scores_squared).sum(dim=(1, 2))) / n_locs
                pos_scores = pos_scores - 0.5 * args.relative_ratio * pos_scores_squared
                neg_scores = (1. - args.relative_ratio) * neg_scores
            scores = pos_scores - 0.5 * neg_scores

        elif args.loss == "JS":
            """Lower bound on Jensen-Shannon divergence from Nowozin et al. (2016)."""
            # Implementation if f is simply a batch_by_batch score
            # f_diag = f.diag()
            # first_term = -F.softplus(-f_diag).mean()
            # n = f.size(0)
            # second_term = (torch.sum(F.softplus(f)) -
            #                torch.sum(F.softplus(f_diag))) / (n * (n - 1.))
            # return first_term - second_term
            pos_scores = (
                (mask_pos * -F.softplus(-raw_scores)).sum(dim=(1, 2))) / n_locs
            neg_scores = ((mask_neg * F.softplus(raw_scores)).sum(
                dim=(1, 2))) / ((n_batch - 1) * n_locs)
            scores = pos_scores - neg_scores

        elif args.loss == "nce":
            '''
            pos_scores includes scores for all the positive samples
            neg_scores includes scores for all the negative samples, with
            scores for positive samples set to the min score (-self.tclip here)
            '''
            # (n_batch_gpu, n_locs)
            pos_scores = (mask_pos * raw_scores).sum(dim=1)
            # (n_batch_gpu, n_batch, n_locs)
            neg_scores = (mask_neg * raw_scores) - (self.tclip * mask_pos)
            # (n_batch_gpu, n_batch * n_locs)
            neg_scores = neg_scores.reshape(n_batch_gpu, -1)
            # (n_batch_gpu, n_batch * n_locs)
            mask_neg = mask_neg.reshape(n_batch_gpu, -1)
            '''
            for each set of positive examples P_i, compute the max over scores
            for the set of negative samples N_i that are shared across P_i
            '''
            # (n_batch_gpu, 1)
            neg_maxes = torch.max(neg_scores, dim=1, keepdim=True)[0]
            '''
            compute a "partial, safe sum exp" over each negative sample set N_i,
            to broadcast across the positive samples in P_i which share N_i
            -- size will be (n_batch_gpu, 1)
            '''
            neg_sumexp = \
                (mask_neg * torch.exp(neg_scores - neg_maxes)).sum(dim=1, keepdim=True)
            '''
            use broadcasting of neg_sumexp across the scores in P_i, to compute
            the log-sum-exps for the denominators in the NCE log-softmaxes
            -- size will be (n_batch_gpu, n_locs)
            '''
            all_logsumexp = torch.log(
                torch.exp(pos_scores - neg_maxes) + neg_sumexp)
            # compute numerators for the NCE log-softmaxes
            pos_shiftexp = pos_scores - neg_maxes
            # compute the final log-softmax scores for NCE...
            scores = pos_shiftexp - all_logsumexp
        else:
            print("Currently loss could only be 'nce' or 'ours'")
            assert False
        return scores, pos_scores, lgt_reg
Example #40
0
def word_loss(word_probs, word):
    #outcome is a one-hot vector
    prob_of_word = torch.dot(word_probs, word)
    return -1 * torch.log(prob_of_word)
Example #41
0
def train(args, snapshot_path):
    base_lr = args.base_lr
    train_data_path = args.root_path
    batch_size = args.batch_size
    max_iterations = args.max_iterations

    def create_model(ema=False):
        # Network definition
        net = unet_3D(n_classes=2, in_channels=1)
        model = net.cuda()
        if ema:
            for param in model.parameters():
                param.detach_()
        return model

    model = create_model()
    ema_model = create_model(ema=True)

    db_train = BraTS2019(base_dir=train_data_path,
                         split='train',
                         num=None,
                         transform=transforms.Compose([
                             RandomRotFlip(),
                             RandomCrop(args.patch_size),
                             ToTensor(),
                         ]))

    def worker_init_fn(worker_id):
        random.seed(args.seed + worker_id)

    labeled_idxs = list(range(0, args.labeled_num))
    unlabeled_idxs = list(range(args.labeled_num, 250))
    batch_sampler = TwoStreamBatchSampler(labeled_idxs, unlabeled_idxs,
                                          batch_size,
                                          batch_size - args.labeled_bs)

    trainloader = DataLoader(db_train,
                             batch_sampler=batch_sampler,
                             num_workers=4,
                             pin_memory=True,
                             worker_init_fn=worker_init_fn)

    model.train()
    ema_model.train()

    optimizer = optim.SGD(model.parameters(),
                          lr=base_lr,
                          momentum=0.9,
                          weight_decay=0.0001)
    ce_loss = CrossEntropyLoss()
    dice_loss = losses.DiceLoss(2)

    writer = SummaryWriter(snapshot_path + '/log')
    logging.info("{} iterations per epoch".format(len(trainloader)))

    iter_num = 0
    max_epoch = max_iterations // len(trainloader) + 1
    best_performance = 0.0
    iterator = tqdm(range(max_epoch), ncols=70)
    for epoch_num in iterator:
        for i_batch, sampled_batch in enumerate(trainloader):

            volume_batch, label_batch = sampled_batch['image'], sampled_batch[
                'label']
            volume_batch, label_batch = volume_batch.cuda(), label_batch.cuda()
            unlabeled_volume_batch = volume_batch[args.labeled_bs:]

            noise = torch.clamp(
                torch.randn_like(unlabeled_volume_batch) * 0.1, -0.2, 0.2)
            ema_inputs = unlabeled_volume_batch + noise

            outputs = model(volume_batch)
            outputs_soft = torch.softmax(outputs, dim=1)
            with torch.no_grad():
                ema_output = ema_model(ema_inputs)
            T = 8
            _, _, d, w, h = unlabeled_volume_batch.shape
            volume_batch_r = unlabeled_volume_batch.repeat(2, 1, 1, 1, 1)
            stride = volume_batch_r.shape[0] // 2
            preds = torch.zeros([stride * T, 2, d, w, h]).cuda()
            for i in range(T // 2):
                ema_inputs = volume_batch_r + \
                    torch.clamp(torch.randn_like(
                        volume_batch_r) * 0.1, -0.2, 0.2)
                with torch.no_grad():
                    preds[2 * stride * i:2 * stride *
                          (i + 1)] = ema_model(ema_inputs)
            preds = torch.softmax(preds, dim=1)
            preds = preds.reshape(T, stride, 2, d, w, h)
            preds = torch.mean(preds, dim=0)
            uncertainty = -1.0 * \
                torch.sum(preds*torch.log(preds + 1e-6), dim=1, keepdim=True)

            loss_ce = ce_loss(outputs[:args.labeled_bs],
                              label_batch[:args.labeled_bs][:])
            loss_dice = dice_loss(outputs_soft[:args.labeled_bs],
                                  label_batch[:args.labeled_bs].unsqueeze(1))
            supervised_loss = 0.5 * (loss_dice + loss_ce)
            consistency_weight = get_current_consistency_weight(iter_num //
                                                                150)
            consistency_dist = losses.softmax_mse_loss(
                outputs[args.labeled_bs:],
                ema_output)  # (batch, 2, 112,112,80)
            threshold = (0.75 + 0.25 * ramps.sigmoid_rampup(
                iter_num, max_iterations)) * np.log(2)
            mask = (uncertainty < threshold).float()
            consistency_loss = torch.sum(
                mask * consistency_dist) / (2 * torch.sum(mask) + 1e-16)

            loss = supervised_loss + consistency_weight * consistency_loss
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            update_ema_variables(model, ema_model, args.ema_decay, iter_num)

            lr_ = base_lr * (1.0 - iter_num / max_iterations)**0.9
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr_

            iter_num = iter_num + 1
            writer.add_scalar('info/lr', lr_, iter_num)
            writer.add_scalar('info/total_loss', loss, iter_num)
            writer.add_scalar('info/loss_ce', loss_ce, iter_num)
            writer.add_scalar('info/loss_dice', loss_dice, iter_num)
            writer.add_scalar('info/consistency_loss', consistency_loss,
                              iter_num)
            writer.add_scalar('info/consistency_weight', consistency_weight,
                              iter_num)

            logging.info(
                'iteration %d : loss : %f, loss_ce: %f, loss_dice: %f' %
                (iter_num, loss.item(), loss_ce.item(), loss_dice.item()))
            writer.add_scalar('loss/loss', loss, iter_num)

            if iter_num % 20 == 0:
                image = volume_batch[0, 0:1, :, :,
                                     20:61:10].permute(3, 0, 1,
                                                       2).repeat(1, 3, 1, 1)
                grid_image = make_grid(image, 5, normalize=True)
                writer.add_image('train/Image', grid_image, iter_num)

                image = outputs_soft[0, 1:2, :, :,
                                     20:61:10].permute(3, 0, 1,
                                                       2).repeat(1, 3, 1, 1)
                grid_image = make_grid(image, 5, normalize=False)
                writer.add_image('train/Predicted_label', grid_image, iter_num)

                image = label_batch[0, :, :, 20:61:10].unsqueeze(0).permute(
                    3, 0, 1, 2).repeat(1, 3, 1, 1)
                grid_image = make_grid(image, 5, normalize=False)
                writer.add_image('train/Groundtruth_label', grid_image,
                                 iter_num)

            if iter_num > 0 and iter_num % 200 == 0:
                model.eval()
                avg_metric = test_all_case(model,
                                           args.root_path,
                                           test_list="val.txt",
                                           num_classes=2,
                                           patch_size=args.patch_size,
                                           stride_xy=64,
                                           stride_z=64)
                if avg_metric[:, 0].mean() > best_performance:
                    best_performance = avg_metric[:, 0].mean()
                    save_mode_path = os.path.join(
                        snapshot_path, 'iter_{}_dice_{}.pth'.format(
                            iter_num, round(best_performance, 4)))
                    save_best = os.path.join(
                        snapshot_path, '{}_best_model.pth'.format(args.model))
                    torch.save(model.state_dict(), save_mode_path)
                    torch.save(model.state_dict(), save_best)

                writer.add_scalar('info/val_dice_score', avg_metric[0, 0],
                                  iter_num)
                writer.add_scalar('info/val_hd95', avg_metric[0, 1], iter_num)
                logging.info('iteration %d : dice_score : %f hd95 : %f' %
                             (iter_num, avg_metric[0, 0].mean(),
                              avg_metric[0, 1].mean()))
                model.train()

            if iter_num % 3000 == 0:
                save_mode_path = os.path.join(snapshot_path,
                                              'iter_' + str(iter_num) + '.pth')
                torch.save(model.state_dict(), save_mode_path)
                logging.info("save model to {}".format(save_mode_path))

            if iter_num >= max_iterations:
                break
        if iter_num >= max_iterations:
            iterator.close()
            break
    writer.close()
    return "Training Finished!"
Example #42
0
    def transition(self, z, temperature, step):
        #print ('z', np.isnan(z.data.cpu().numpy()).any())
        #    print z.requires_grad
        h1 = self.act(self.bn7_list[step](self.fc_trans_1(z)))
        #print h1
        h2 = self.act(self.bn8_list[step](self.fc_trans_1_1(h1)))
        #print h2
        h3 = self.act(self.bn9_list[step](self.fc_trans_1_2(h2)))
        h4 = self.act(self.bn9_1_list[step](self.fc_trans_1_3(h3)))
        h5 = self.act(self.bn9_2_list[step](self.fc_trans_1_4(h4)))

        #print h3
        h5 = torch.clamp(h3, min=0, max=5)
        #print h3

        mu = self.bn5_list[step](
            self.fc_z_mu(h3))  #### why not non-linearity applied here
        #print mu
        sigma = self.bn6_list[step](self.fc_z_sigma(h3))
        #print sigma
        #print ('mu', np.isnan(mu.data.cpu().numpy()).any())
        #print ('sigma', np.isnan(sigma.data.cpu().numpy()).any())
        eps = Variable(mu.data.new(mu.size()).normal_())

        #print ('eps', np.isnan(eps.data.cpu().numpy()).any())

        #print eps

        #z_new = mu + T.sqrt(args.sigma * temperature) * T.exp(0.5 * sigma) * eps
        #z_new = (z_new - T.mean(z_new, axis=0, keepdims=True)) / (0.001 + T.std(z_new, axis=0, keepdims=True))

        if args.cuda:
            sigma_ = Variable(
                torch.sqrt(
                    torch.FloatTensor(1).fill_(args.sigma *
                                               temperature)).cuda())
            #print ('sigma_', np.isnan(sigma_.data.cpu().numpy()).any())

        else:
            sigma_ = Variable(
                torch.sqrt(
                    torch.FloatTensor(1).fill_(args.sigma * temperature)))

        z_new = eps.mul(sigma.mul(0.5).exp_()).mul(sigma_).add_(mu)
        #print ('z_new', np.isnan(z_new.data.cpu().numpy()).any())
        z_new = (z_new - z_new.mean(0)) / (0.001 + z_new.std(0))
        #print ('z_new_mean', np.isnan(z_new.mean(0).data.cpu().numpy()).any())
        #print ('z_new_std', np.isnan(z_new.std(0).data.cpu().numpy()).any())
        #print ('z_new', np.isnan(z_new.data.cpu().numpy()).any())

        if args.cuda:
            sigma_ = Variable(
                torch.log(
                    torch.FloatTensor(1).fill_(
                        args.sigma * temperature)).cuda()) + sigma
            #print ('sigma2', np.isnan(sigma_.data.cpu().numpy()).any())

        else:
            sigma_ = Variable(
                torch.log(
                    torch.FloatTensor(1).fill_(
                        args.sigma * temperature))) + sigma

        log_p_reverse = log_normal2(z, mu, sigma_, eps=1e-6).mean()
        #print ('z', np.isnan(z.data.cpu().numpy()).any())
        #print ('log_p_reverse', log_p_reverse)
        z_new = torch.clamp(z_new, min=-4, max=4)
        #print z_new
        return z_new, log_p_reverse, mu, sigma
Example #43
0
 def sample_gumbel(self, shape, eps=1e-20):
         #Sample from Gumbel(0, 1)
         U = torch.rand(shape).float()
         return - torch.log(eps - torch.log(U + eps))
 def acosh(self, x):
     return torch.log(x + (x * x - 1.0).sqrt())
Example #45
0
 def cross_entropy(self, y_hat, y):
     return -torch.log(y_hat[range(len(y_hat)), y])
Example #46
0
def log_sum_exp(x):
    m = torch.max(x, -1)[0]
    return m + torch.log(torch.sum(torch.exp(x - m.unsqueeze(-1)), -1))
Example #47
0
def train_gan(population,p_fitness,batch_size = 20,n_epochs = 100):
    p_fitness = torch.from_numpy(p_fitness).type("torch.FloatTensor").to(device)
    gen_explore_all = []
    gen_exploit_all = []
    dis_loss_all = []
    dis_confidence_all = []
    for e in range(n_epochs):
        #shuffle arrays in unison
        ind = np.arange(len(population))
        np.random.shuffle(ind)
        population = np.array(population)[ind]
        p_fitness = p_fitness[ind]
        for i in range(len(population)//batch_size):
            
            #turn population into vectors
            real_batch = []
            for b in range(batch_size):
                real_batch.append(get_params(population[(i*batch_size)+b]).unsqueeze(0))
            real_batch = torch.cat(real_batch, dim=0).to(device)
            
            #train discriminator on population
            dis_optimizer.zero_grad()
            dis_out_r,_ = dis(real_batch)
            dis_out_r = dis_out_r.squeeze(-1)
            rank = p_fitness[i*batch_size:(i*batch_size)+batch_size]
            dis_error_real = (torch.pow((dis_out_r[...,0]-rank),2))
            
            dis_error_real_mean = torch.mean(dis_error_real)
            dis_error_real_mean.backward(retain_graph=True)
            
            #discriminator confidence in its prediction
            dis_confidence = torch.mean(torch.pow(dis_out_r[...,1]-dis_error_real.detach(),2))
            dis_confidence.backward()
            
            dis_optimizer.step() 
            
            

        #for i in range(len(population)//batch_size):
            #generate children from population
            mutation = torch.from_numpy(np.random.uniform(all_a,all_a,batch_size)).type('torch.FloatTensor').to(device)
            
            child,confidence = gen_children(population,device,gen,batch_size,a = mutation)
            #train discriminator
            dis_out_f,dis_out_last = dis(child)
            dis_out_f = dis_out_f.squeeze(-1)
            
            gen_optimizer.zero_grad()
            
            #define generator loss
            #variety = torch.std(dis_out_f[...,0])#-torch.mean(torch.std(dis_out_last,-1))
            probs = torch.softmax(dis_out_f[...,0],-1)
            variety = -(torch.sum(probs*torch.log(probs)))

            gen_error_exploration = 0 +  (variety)-torch.mean(dis_out_f[...,1])
            gen_error_exploitation = -torch.mean(dis_out_f[...,0]) 
            gen_error =  gen_error_exploitation + (gen_error_exploration)
            
            gen_error.backward()
            gen_optimizer.step() 
            gen.hidden = None
            #keep losses to draw graph 
            gen_explore_all.append(gen_error_exploration)
            gen_exploit_all.append(gen_error_exploitation)
            dis_loss_all.append(dis_error_real_mean)  
            dis_confidence_all.append(dis_confidence)
            
    #print(rank)
    #print(dis_out_r[...,0])
    print(confidence)
    #print(get_params(population[0]))
    #print()
            
    return gen_explore_all,gen_exploit_all, dis_loss_all,dis_confidence_all
Example #48
0
File: math.py Project: aghriss/RL
def entropy_logits(logits):
    a0 = logits - logits.max(dim=-1, keepdim=True)[0]
    ea0 = torch.exp(a0)
    z0 = ea0.sum(dim=-1, keepdim=True)
    p0 = ea0 / z0
    return (p0 * (torch.log(z0) - a0)).sum(dim=-1)
Example #49
0
 def loss(output, target, weights):
     output = torch.clamp(output, 1e-5, 1 - 1e-5)
     weights = torch.clamp(weights, 1e-5, 1 - 1e-5)
     ML = weights * (target * torch.log(output) +
                     (1 - target) * torch.log(1 - output))
     return torch.neg(torch.mean(ML))
    def _PyramidRoI_Feat(self, feat_maps, rois, im_info):
        ''' roi pool on pyramid feature maps'''
        # do roi pooling based on predicted rois
        img_area = im_info[0][0] * im_info[0][1]
        h = rois.data[:, 4] - rois.data[:, 2] + 1
        w = rois.data[:, 3] - rois.data[:, 1] + 1

        roi_level = torch.log(torch.sqrt(h * w) / 50.0)
        roi_level = torch.round(roi_level + 4)
        roi_level[roi_level < 2] = 2
        roi_level[roi_level > 5] = 5

        # roi_level.fill_(5)
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            # NOTE: need to add pyrmaid
            grid_xy = _affine_grid_gen(rois,
                                       feat_maps.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            roi_pool_feat = self.RCNN_roi_crop(feat_maps,
                                               Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                roi_pool_feat = F.max_pool2d(roi_pool_feat, 2, 2)

        elif cfg.POOLING_MODE == 'align':
            roi_pool_feats = []
            box_to_levels = []
            for i, l in enumerate(range(2, 6)):
                if (roi_level == l).sum() == 0:
                    continue
                # idx_l = (roi_level == l).nonzero().squeeze()
                idx_l = (roi_level == l).nonzero()
                if idx_l.shape[0] > 1:
                    idx_l = idx_l.squeeze()
                else:
                    idx_l = idx_l.view(-1)
                box_to_levels.append(idx_l)
                scale = feat_maps[i].size(2) / im_info[0][0]
                feat = self.RCNN_roi_align(feat_maps[i], rois[idx_l], scale)
                roi_pool_feats.append(feat)
            roi_pool_feat = torch.cat(roi_pool_feats, 0)
            box_to_level = torch.cat(box_to_levels, 0)
            idx_sorted, order = torch.sort(box_to_level)
            roi_pool_feat = roi_pool_feat[order]

        elif cfg.POOLING_MODE == 'pool':
            roi_pool_feats = []
            box_to_levels = []
            for i, l in enumerate(range(2, 6)):
                if (roi_level == l).sum() == 0:
                    continue
                idx_l = (roi_level == l).nonzero().squeeze()
                box_to_levels.append(idx_l)
                scale = feat_maps[i].size(2) / im_info[0][0]
                feat = self.RCNN_roi_pool(feat_maps[i], rois[idx_l], scale)
                roi_pool_feats.append(feat)
            roi_pool_feat = torch.cat(roi_pool_feats, 0)
            box_to_level = torch.cat(box_to_levels, 0)
            idx_sorted, order = torch.sort(box_to_level)
            roi_pool_feat = roi_pool_feat[order]

        return roi_pool_feat
Example #51
0
def log_eps(x, eps=1e-4):
    return torch.log(x + eps)
Example #52
0
 def masked_logL1_loss(self, predicted, target, mask):
     diff = torch.log(1 + torch.abs(predicted - target)) * mask
     loss = torch.sum(diff, dim=(2, 3)) / torch.sum(mask, dim=(2, 3))
     return torch.mean(loss)
Example #53
0
                    loss = bce(pre, trating) * len(pre)
                    loss.backward()
                    opt.step()

                    flp = torch.FloatTensor(flpre[:, nst])
                    hrnow = torch.FloatTensor(fullhr[:, nst])
                    if (cd == 1):
                        flp = flp.cuda()
                        hrnow = hrnow.cuda()
                        proa = proa.cuda()
                        samitem = samitem.cuda()
                    optgen.zero_grad()
                    [tlfw1, tlfw2, tlfla1, tlfa2, cuid] = gen(flp, hrnow)
                    pregen = model.itempre(samitem)

                    losgen = -torch.sum(cuid * (hrnow * torch.log(
                        (pregen + eps) / episa) + (1 - hrnow) * torch.log(
                            (1 - pregen + eps) /
                            (1 - episa))) + cuid * torch.log(proa /
                                                             (1 - proa)) -
                                        cuid * torch.log(cuid + eps) -
                                        (1 - cuid) * torch.log(1 - cuid + eps))
                    losgen.backward()
                    optgen.step()

                    if (ite % 200 == 0):
                        [teu, tev] = model.getem()
                        eu = teu.cpu().numpy()
                        ev = tev.cpu().numpy()
                        prerating = 1 / (1 + np.exp(-eu.dot(ev.T)))
                        cu = np.zeros((n, n))
                        nowfa = np.identity(n)
Example #54
0
	def forward(self, r, classes, m, d, alpha=1.0):

		self.r = r

		if self.device == 'cuda':
			self.classes = torch.from_numpy(classes).type(GPU_LONG_DTYPE)
			self.clusters, _ = torch.sort(torch.arange(0, float(m)).repeat(d))
			self.clusters = self.clusters.type(GPU_INT_DTYPE)
		else:
			self.classes = torch.from_numpy(classes).type(LONG_DTYPE)
			self.clusters, _ = torch.sort(torch.arange(0, float(m)).repeat(d))
			self.clusters = self.clusters.type(INT_DTYPE)

		self.cluster_classes = self.classes[0:m*d:d]
		self.n_clusters = m
		self.alpha = alpha
		#pdb.set_trace()

		# Take cluster means within the batch
		cluster_examples = dynamic_partition(self.r, self.clusters, self.n_clusters)
		#pdb.set_trace()

		cluster_means = torch.stack([torch.mean(x, dim=0) for x in cluster_examples])
		#pdb.set_trace()

		sample_costs = compute_euclidean_distance(cluster_means, expand_dims(r, 1))
		#pdb.set_trace()

		if self.device == 'cuda':
			clusters_tensor = self.clusters.type(GPU_FLOAT_DTYPE)
			n_clusters_tensor = torch.arange(0, self.n_clusters).type(GPU_FLOAT_DTYPE)
			intra_cluster_mask = Variable(comparison_mask(clusters_tensor, n_clusters_tensor).type(GPU_FLOAT_DTYPE))
		else:
			clusters_tensor = self.clusters.type(FLOAT_DTYPE)
			n_clusters_tensor = torch.arange(0, self.n_clusters).type(FLOAT_DTYPE)
			intra_cluster_mask = Variable(comparison_mask(clusters_tensor, n_clusters_tensor).type(FLOAT_DTYPE))
		#pdb.set_trace()

		#pdb.set_trace()

		intra_cluster_costs = torch.sum(intra_cluster_mask * sample_costs, dim=1)
		#pdb.set_trace()

		N = r.size()[0]
		#pdb.set_trace()

		variance = torch.sum(intra_cluster_costs) / float(N - 1)
		#pdb.set_trace()

		var_normalizer = -1 / (2 * variance**2)
		#pdb.set_trace()

		# Compute numerator
		numerator = torch.exp(var_normalizer * intra_cluster_costs - self.alpha)
		#pdb.set_trace()

		if self.device == 'cuda':
			classes_tensor = self.classes.type(GPU_FLOAT_DTYPE)
			cluster_classes_tensor = self.cluster_classes.type(GPU_FLOAT_DTYPE)
			# Compute denominator
			diff_class_mask = Variable(comparison_mask(classes_tensor, cluster_classes_tensor).type(GPU_FLOAT_DTYPE))
		else:
			classes_tensor = self.classes.type(FLOAT_DTYPE)
			cluster_classes_tensor = self.cluster_classes.type(FLOAT_DTYPE)
			# Compute denominator
			diff_class_mask = Variable(comparison_mask(classes_tensor, cluster_classes_tensor).type(FLOAT_DTYPE))

		diff_class_mask = 1 - diff_class_mask # Logical not on ByteTensor
		#pdb.set_trace()

		denom_sample_costs = torch.exp(var_normalizer * sample_costs)
		#pdb.set_trace()

		denominator = torch.sum(diff_class_mask * denom_sample_costs, dim=1)
		#pdb.set_trace()

		epsilon = 1e-8
		#pdb.set_trace()

		losses = F.relu(-torch.log(numerator / (denominator + epsilon) + epsilon))
		#pdb.set_trace()

		total_loss = torch.mean(losses)
		#pdb.set_trace()

		return total_loss, losses		
plt.ion()  # something about continuous plotting

for step in range(10000):
    artist_paintings, labels = artist_works_with_labels(
    )  # real painting, label from artist
    G_ideas = torch.randn(BATCH_SIZE, N_IDEAS)  # random ideas
    G_inputs = torch.cat((G_ideas, labels), 1)  # ideas with labels
    G_paintings = G(G_inputs)  # fake painting w.r.t label from G

    D_inputs0 = torch.cat((artist_paintings, labels),
                          1)  # all have their labels
    D_inputs1 = torch.cat((G_paintings, labels), 1)
    prob_artist0 = D(D_inputs0)  # D try to increase this prob
    prob_artist1 = D(D_inputs1)  # D try to reduce this prob

    D_score0 = torch.log(prob_artist0)  # maximise this for D
    D_score1 = torch.log(1. - prob_artist1)  # maximise this for D
    D_loss = -torch.mean(
        D_score0 + D_score1)  # minimise the negative of both two above for D
    G_loss = torch.mean(D_score1)  # minimise D score w.r.t G

    opt_D.zero_grad()
    D_loss.backward(retain_graph=True)  # reusing computational graph
    opt_D.step()

    opt_G.zero_grad()
    G_loss.backward()
    opt_G.step()

    if step % 200 == 0:  # plotting
        plt.cla()
Example #56
0
    def learn(self, writer, i_iter):
        memory, log = self.collector.collect_samples(
            self.config["train"]["generator"]["sample_batch_size"])

        self.policy.train()
        self.value.train()
        self.discriminator.train()

        print(
            f"Iter: {i_iter}, num steps: {log['num_steps']}, total reward: {log['total_reward']: .4f}, "
            f"min reward: {log['min_episode_reward']: .4f}, max reward: {log['max_episode_reward']: .4f}, "
            f"average reward: {log['avg_reward']: .4f}, sample time: {log['sample_time']: .4f}"
        )

        # record reward information
        writer.add_scalar("gail/average reward", log['avg_reward'], i_iter)
        writer.add_scalar("gail/num steps", log['num_steps'], i_iter)

        # collect generated batch
        # gen_batch = self.collect_samples(self.config["ppo"]["sample_batch_size"])
        gen_batch = memory.sample()
        gen_batch_state = FLOAT(gen_batch.state).to(
            device)  # [batch size, state size]
        gen_batch_action = FLOAT(gen_batch.action).to(
            device)  # [batch size, action size]
        gen_batch_old_log_prob = FLOAT(gen_batch.log_prob).to(
            device)  # [batch size, 1]
        gen_batch_mask = FLOAT(gen_batch.mask).to(device)  # [batch, 1]

        ####################################################
        # update discriminator
        ####################################################
        d_optim_i_iters = self.config["train"]["discriminator"]["optim_step"]
        if i_iter % d_optim_i_iters == 0:
            for expert_batch_state, expert_batch_action in self.expert_dataset.train_loader:
                # calculate probs and logits
                gen_prob, gen_logits = self.discriminator(
                    gen_batch_state, gen_batch_action)
                expert_prob, expert_logits = self.discriminator(
                    expert_batch_state.to(device),
                    expert_batch_action.to(device))

                # calculate accuracy
                gen_acc = torch.mean((gen_prob < 0.5).float())
                expert_acc = torch.mean((expert_prob > 0.5).float())

                # calculate regression loss
                expert_labels = torch.ones_like(expert_prob)
                gen_labels = torch.zeros_like(gen_prob)
                e_loss = self.discriminator_func(expert_prob,
                                                 target=expert_labels)
                g_loss = self.discriminator_func(gen_prob, target=gen_labels)
                d_loss = e_loss + g_loss

                # calculate entropy loss
                logits = torch.cat([gen_logits, expert_logits], 0)
                entropy = ((1. - torch.sigmoid(logits)) * logits -
                           torch.nn.functional.logsigmoid(logits)).mean()
                entropy_loss = -self.config["train"]["discriminator"][
                    "ent_coeff"] * entropy

                total_loss = d_loss + entropy_loss

                self.optimizer_discriminator.zero_grad()
                total_loss.backward()
                self.optimizer_discriminator.step()

        writer.add_scalar('discriminator/d_loss', d_loss.item(), i_iter)
        writer.add_scalar("discriminator/e_loss", e_loss.item(), i_iter)
        writer.add_scalar("discriminator/g_loss", g_loss.item(), i_iter)
        writer.add_scalar("discriminator/ent", entropy.item(), i_iter)
        writer.add_scalar('discriminator/expert_acc', gen_acc.item(), i_iter)
        writer.add_scalar('discriminator/gen_acc', expert_acc.item(), i_iter)

        ####################################################
        # update policy by ppo [mini_batch]
        ####################################################

        with torch.no_grad():
            gen_batch_value = self.value(gen_batch_state)
            d_out, _ = self.discriminator(gen_batch_state, gen_batch_action)
            gen_batch_reward = -torch.log(1 - d_out + 1e-6)

        gen_batch_advantage, gen_batch_return = estimate_advantages(
            gen_batch_reward, gen_batch_mask, gen_batch_value,
            self.config["train"]["generator"]["gamma"],
            self.config["train"]["generator"]["tau"])

        ppo_optim_i_iters = self.config["train"]["generator"]["optim_step"]
        ppo_mini_batch_size = self.config["train"]["generator"][
            "mini_batch_size"]

        for _ in range(ppo_optim_i_iters):
            if ppo_mini_batch_size > 0:
                gen_batch_size = gen_batch_state.shape[0]
                optim_iter_num = int(
                    math.ceil(gen_batch_size / ppo_mini_batch_size))
                perm = torch.randperm(gen_batch_size)

                for i in range(optim_iter_num):
                    ind = perm[slice(
                        i * ppo_mini_batch_size,
                        min((i + 1) * ppo_mini_batch_size, gen_batch_size))]
                    mini_batch_state, mini_batch_action, mini_batch_advantage, mini_batch_return, \
                    mini_batch_old_log_prob = gen_batch_state[ind], gen_batch_action[ind], \
                                              gen_batch_advantage[ind], gen_batch_return[ind], gen_batch_old_log_prob[
                                                  ind]

                    v_loss, p_loss, ent_loss = ppo_step(
                        policy_net=self.policy,
                        value_net=self.value,
                        optimizer_policy=self.optimizer_policy,
                        optimizer_value=self.optimizer_value,
                        optim_value_iternum=self.config["value"]
                        ["optim_value_iter"],
                        states=mini_batch_state,
                        actions=mini_batch_action,
                        returns=mini_batch_return,
                        old_log_probs=mini_batch_old_log_prob,
                        advantages=mini_batch_advantage,
                        clip_epsilon=self.config["train"]["generator"]
                        ["clip_ratio"],
                        l2_reg=self.config["value"]["l2_reg"])
            else:
                v_loss, p_loss, ent_loss = ppo_step(
                    policy_net=self.policy,
                    value_net=self.value,
                    optimizer_policy=self.optimizer_policy,
                    optimizer_value=self.optimizer_value,
                    optim_value_iternum=self.config["value"]
                    ["optim_value_iter"],
                    states=gen_batch_state,
                    actions=gen_batch_action,
                    returns=gen_batch_return,
                    old_log_probs=gen_batch_old_log_prob,
                    advantages=gen_batch_advantage,
                    clip_epsilon=self.config["train"]["generator"]
                    ["clip_ratio"],
                    l2_reg=self.config["value"]["l2_reg"])

        writer.add_scalar('generator/p_loss', p_loss, i_iter)
        writer.add_scalar('generator/v_loss', v_loss, i_iter)
        writer.add_scalar('generator/ent_loss', ent_loss, i_iter)

        print(f" Training episode:{i_iter} ".center(80, "#"))
        print('d_gen_prob:', gen_prob.mean().item())
        print('d_expert_prob:', expert_prob.mean().item())
        print('d_loss:', d_loss.item())
        print('e_loss:', e_loss.item())
        print("d/bernoulli_entropy:", entropy.item())
Example #57
0
    def get_mi(self, X, Y, Y_tilde):

        T = self.network(X, Y).mean()
        expT = torch.exp(self.network(X, Y_tilde)).mean()
        mi = (T - torch.log(expT)).item() / math.log(2)
        return mi, T, expT
Example #58
0
    def forward(self, x):
        #syage 0

        x, xr1, xr2, xr3, xr4 = self.pretrained_resnet(x)

        #not needed if layer 1 is adjusted.
        # xr5 = self.layer5(xr4)


        xd1 = self.d1(xr1)

        xd2 = self.d2(xr2)

        xd3 = self.d3(xr3)

        #not needed if layer 1 is adjusted.
        # xd4 = self.d4(xr4)

        ## upward path

        # upsampling last feature map using bilinear interpolation
        # not needed if layer 1 downsampling is active
        # up0 = self.upsample0(xr5)
        # cat0 = torch.cat((up0, xd4), dim=1)
        # updense0 = self.updense0(cat0)

        # changed from updense 0 to xr4, if downsampling in layer1
        up1 = self.upsample(xr4)
        #concat the upsampled feature maps with the dense featuremaps from previous layer
        cat1 = torch.cat((up1, xd3), dim=1)
        # densenet to reduce channels to 4k --> [bs x 56 x 16 x 16]
        updense1 =self.updense1(cat1)

        # upsampling last feature map using bilinear interpolation
        up2 = self.upsample(updense1)
        # concat the upsampled feature maps with the dense featuremaps from previous layer
        cat2 = torch.cat((up2, xd2), dim=1)
        # densenet to reduce channels to 2k --> [bs x 28 x 32 x 32]
        updense2 =self.updense2(cat2)

        # upsampling last feature map using bilinear interpolation
        up3 = self.upsample(updense2)
        # concat the upsampled feature maps with the dense featuremaps from previous layer
        cat3 = torch.cat((up3, xd1), dim=1)
        # densenet to reduce channels to k --> [bs x 14 x 64 x 64]
        updense3 = self.updense3(cat3)

        reduced = self.onexone(updense3)
        # final sigmoid layer before the saliency maps
        # todo: add normalization before sigmoid, change sigmoid to alternative to prevent dying gradients, also add pooling
        saliency_map = self.sigmoid(reduced)
        #saliency_map = self.sigmoid(reduced)

        class_scores = self.pooling(saliency_map)

        norm_scores = self.norm(class_scores)

        # log scores for Nllloss on RSNA
        log_scores = torch.log(norm_scores)

        return(saliency_map, log_scores)
Example #59
0
File: train.py Project: tagas/vcae
def train_model(model, dataset, ds_name,
                epochs=10,
                batch_size=32,
                sample_size=32,
                eval_size=32,
                img_size=32,
                lr=1e-3,
                weight_decay=1e-4,
                loss_log_interval=20,
                image_log_interval=20,
                model_log_interval=20,
                checkpoint_dir='./checkpoints',
                results_dir='./res',
                resume=False,
                cuda=False,
                seed=0,
                device=None,
                cores=1):
    
    if resume:
        epoch_start = utils.load_checkpoint(model, checkpoint_dir)
    else:
        epoch_start = 0

    fixed_noise = torch.rand(sample_size, model.z_size).to(device)

    if model.model_name in ['vae', 'vae2', 'vae3']:
        m = dist.Normal(torch.Tensor([0.0]).to(device), torch.Tensor([1.0]).to(device))
        fixed_noise = m.icdf(fixed_noise)

    output_folder = results_dir + ds_name
    resfile_prefix = ds_name + "_" + \
                     model.model_name + \
                     "_ld_" + \
                     str(model.z_size) + \
                     "_bs_" + str(batch_size)

    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    data_root = './datasets'

    # DEC-VINE preclustering setup and training
    if model.model_name in ['dec_vine', 'dec_vine2', 'dec_vine3']:

        # load pre-trained AE
        if model.model_name == 'dec_vine':
            pretrain_prefix = resfile_prefix.replace("dec", "ae")
        elif model.model_name == 'dec_vine2':
            pretrain_prefix = resfile_prefix.replace("dec_vine2", "ae_vine2")
        elif model.model_name == 'dec_vine3':
            pretrain_prefix = resfile_prefix.replace("dec_vine3", "ae_vine3")
        
        pretrain_files = [filename for filename in os.listdir(checkpoint_dir) if filename.startswith(pretrain_prefix)]
        pretrain_epochs = [int(filename.replace(pretrain_prefix + "_", "")) for filename in pretrain_files]
        pretrain_path = os.path.join(checkpoint_dir, pretrain_files[pretrain_epochs.index(max(pretrain_epochs))])

        model.pretrain(pretrain_path)

        # form initial cluster centres
        data_loader = utils.get_data_loader(dataset, batch_size, cuda=cuda)
        data_stream = tqdm(enumerate(data_loader, 1))
        features = []

        for batch_index, (x, _, _) in data_stream:
            tmp_x = Variable(x).to(device)
            if model.model_name == 'dec_vine':
                z = model.ae.encoder(tmp_x)
                z = model.ae.q(z)
            elif model.model_name == 'dec_vine2' or  model.model_name == 'dec_vine3':
                z = torch.nn.functional.relu(model.ae.fc1(model.ae.encoder(tmp_x).view(x.size(0), -1)))
                z = model.ae.fc21(z)

            features.append(z)

        kmeans = KMeans(n_clusters=model.cluster_number, n_init=20)
        y_pred = kmeans.fit_predict(torch.cat(features).detach().cpu().numpy())
        model.cluster_layer.data = torch.tensor(kmeans.cluster_centers_).to(device)

    # load a pre-trained state for any model
    pretrain=0
    
    if  pretrain==1 and model.model_name == 'ae_vine3':
        
        pretrain_prefix = resfile_prefix
        pretrain_files = [filename for filename in os.listdir(checkpoint_dir) if filename.startswith(pretrain_prefix)]
        pretrain_epochs = [int(filename.replace(pretrain_prefix + "_", "")) for filename in pretrain_files]
        pretrain_path = os.path.join(checkpoint_dir, pretrain_files[pretrain_epochs.index(max(pretrain_epochs))])
        pretrained_ae = torch.load(pretrain_path, map_location=device)
        
        model.load_state_dict(pretrained_ae['state'])
        print('load pretrained ae3 from', pretrain_path)


    # reconstruction_criterion = torch.nn.BCELoss()
    reconstruction_criterion = torch.nn.BCELoss(size_average=False)
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    if model.model_name == 'gan':
        lr_g = lr_d = 0.0002
        k = 1
        fix_noise = get_noise(sample_size)
        opt_g = torch.optim.Adam(model.net_g.parameters(), lr=lr_g, betas=(0.5, 0.999))  # optimizer for Generator
        opt_d = torch.optim.Adam(model.net_d.parameters(), lr=lr_d, betas=(0.5, 0.999))  # optimizer for Discriminator

    for epoch in range(epoch_start, epochs + 1):
        print("Epoch {}".format(epoch))
        
        if model.model_name == "dec_vine" or model.model_name == "dec_vine2":
            model.eval()
            p = []
            indices = []
            data_loader = utils.get_data_loader(dataset, batch_size, cuda=cuda)
            data_stream = tqdm(enumerate(data_loader, 1))

            for batch_index, (x, _, idx) in data_stream:
                tmp_x = Variable(x).to(device)
                _, tmp_p = model(tmp_x)
                p.append(tmp_p.detach().cpu())
                tmp_idx = idx
                indices.append(tmp_idx)

            p = torch.cat(p)
            indices = torch.cat(indices)
            p = model.target_distribution(p[indices])
            p = Variable(p).to(device)

        model.train()
        data_loader = utils.get_data_loader(dataset, batch_size, cuda=cuda)
        data_stream = tqdm(enumerate(data_loader, 1))

        for batch_index, (x, _, idx) in data_stream:
            
            # learning rate decay
            if  model.model_name == 'gan' and (epoch) == 8:
                    opt_g.param_groups[0]['lr'] /= 10
                    opt_d.param_groups[0]['lr'] /= 10

            if model.model_name == 'gan' and (epoch) == 15:
                    opt_g.param_groups[0]['lr'] /= 10
                    opt_d.param_groups[0]['lr'] /= 10
            
            iteration = (epoch - 1) * (len(dataset) // batch_size) + batch_index
            x = Variable(x).to(device)
            idx = Variable(idx).to(device)


            if model.model_name == 'gan':
                # train Discriminator
                real_data = Variable(x.cuda())
                prob_fake = model.net_d(model.net_g(get_noise(real_data.size(0)).to(device)))
                prob_real = model.net_d(real_data)

                loss_d = - torch.mean(torch.log(prob_real) + torch.log(1 - prob_fake))

                opt_d.zero_grad()
                loss_d.backward()
                opt_d.step()

                # train Generator
                if batch_index % k is 0:
                    prob_fake = model.net_d(model.net_g(get_noise().to(device)))

                    loss_g = - torch.mean(torch.log(prob_fake))

                    opt_g.zero_grad()
                    loss_g.backward()
                    opt_g.step()

            else:

                if model.model_name == 'ae_vine' or model.model_name == 'ae_vine2' or model.model_name == 'ae_vine3':
                    x_reconstructed = model(x)

                elif model.model_name == 'dec_vine' or model.model_name == 'dec_vine2':
                    x_reconstructed, q = model(x)
                    p_batch = p[idx]
                    penalization_loss = 10*F.kl_div(q.log(), p_batch)
                    del p_batch, q

                elif model.model_name == 'vae' or model.model_name == "vae2" or model.model_name=="vae3":
                    (mean, logvar), x_reconstructed = model(x)
                    penalization_loss = model.kl_divergence_loss(mean, logvar)

                if model.model_name == 'vae2' or model.model_name == 'vae3':

                    reconstruction_loss = reconstruction_criterion(x_reconstructed, x)# / x.size(0)
                else:
                    reconstruction_loss = reconstruction_criterion(x_reconstructed, x) / x.size(0)

                if model.model_name == 'ae_vine' or model.model_name == 'ae_vine2' or model.model_name == 'ae_vine3':
                    loss = reconstruction_loss
                else:
                    loss = reconstruction_loss + penalization_loss

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            if iteration % loss_log_interval == 0:

                f = open(output_folder + "/" + resfile_prefix + "_losses" + ".txt", 'a')

                if model.model_name == 'gan':
                    f.write("\n{:<12} | {} | {} | {} | {} ".format(
                        model.model_name,
                        iteration,
                        loss_g,
                        loss_d,
                        seed
                    ))

                else:
                    if model.model_name == 'ae_vine' or model.model_name == 'ae_vine2' or model.model_name == 'ae_vine3':
                        f.write("\n{:<12} | {} | {} | {} ".format(
                            model.model_name,
                            iteration,
                            loss,
                            seed
                        ))

                    else:
                        f.write("\n{:<12} | {} | {} | {} | {} | {}".format(
                            model.model_name,
                            iteration,
                            reconstruction_loss.data.item(),
                            penalization_loss.data.item(),
                            loss.data.item(),
                            seed
                        ))

                f.close()

            # adding this just to have a way of calculating the scores at 0 epochs
            if batch_index > 0 and epoch == 0:
            	break

        if epoch % model_log_interval == 0:
            print()
            print('###################')
            print('# model checkpoint!')
            print('###################')
            print()
            utils.save_checkpoint(model, checkpoint_dir, epoch, resfile_prefix + "_" + str(epoch))

        if epoch % image_log_interval == 0:

            print()
            print('###################')
            print('# image checkpoint!')
            print('###################')
            print()

            model.eval()

            ae_vine_models = ['ae_vine', 'ae_vine2', 'dec_vine', 'dec_vine2', 'ae_vine3', 'dec_vine3']

            if model.model_name in ae_vine_models:

                data_loader_vine = utils.get_data_loader(dataset, 5000, cuda=cuda)
                data_stream_vine = tqdm(enumerate(data_loader_vine, 1))
                features = []

                for batch_index, (x, _, _) in data_stream_vine:

                    tmp_x = Variable(x).to(device)
                    if model.model_name == 'ae_vine':
                        encoded = model.encoder(tmp_x)
                        e = model.q(encoded)

                    elif model.model_name == 'dec_vine':
                        encoded = model.ae.encoder(tmp_x)
                        e = model.ae.q(encoded)

                    elif model.model_name == 'ae_vine2':
                        encoded = torch.nn.functional.relu(model.fc1(model.encoder(tmp_x).view(x.size(0), -1)))
                        e = model.fc21(encoded)

                    elif model.model_name == 'dec_vine2':
                        encoded = torch.nn.functional.relu(model.ae.fc1(model.ae.encoder(tmp_x).view(x.size(0), -1)))
                        e = model.ae.fc21(encoded)


                    elif model.model_name == 'ae_vine3':
                        encoded = F.relu(model.fc1(model.encoder(tmp_x).view(x.size(0), -1)))
                        e = model.fc21(encoded)

                    elif model.model_name == 'dec_vine3':
                        encoded = F.relu(model.ae.fc1(model.ae.encoder(tmp_x).view(x.size(0), -1)))
                        e = model.ae.fc21(encoded)
                    features.append(e.detach().cpu())
                    if batch_index > 0:
                        break

                features = torch.cat(features).numpy()
                copula_controls = base.list(family_set="tll", trunc_lvl=5, cores=cores)
                vine_obj = rvinecop.vine(features, copula_controls=copula_controls)

                model.vine = vine_obj

                fake = model.sample(sample_size, vine_obj, fixed_noise)

                del x, e, encoded, vine_obj,data_loader_vine

            elif model.model_name == 'gan':
                fake = model.net_g(fix_noise.to(device)).data.cpu() + 0.5
                print(fake.shape)
            else:

                fake = model.sample(sample_size, fixed_noise)

            fake = fake.reshape(sample_size, model.channel_num,
                                model.image_size, model.image_size)
            name_str = resfile_prefix + '_fake_samples_epoch'
            vutils.save_image(fake.detach(),
                              '%s/%s_%03d.png' % (output_folder, name_str, epoch),
                              normalize=True)
            del fake
        
        if epoch > 0 and epoch % 2 == 0: 
            eval_size = 2000 
            s = metric.compute_score_raw(ds_name, dataset, img_size, data_root,
                                         eval_size, batch_size,
                                         output_folder + '/real/',
                                         output_folder + '/fake/',
                                         model, model.z_size, 'resnet34', device)

            f = open(output_folder + "/" + resfile_prefix + "_scores" + ".txt", 'a')

            scr_arr = [str(a) for a in s]
            f.write("\n{:<12} | {} | {} | {}".format(
                model.model_name,
                epoch,
                ', '.join(scr_arr),
                seed
            ))

            f.close()
Example #60
0
def atanh(x):
    return 0.5 * torch.log((1. + x) / (1. - x))