def encode(self, reference_boxes, proposals): """ Encode a set of proposals with respect to some reference boxes Arguments: reference_boxes (Tensor): reference boxes proposals (Tensor): boxes to be encoded """ TO_REMOVE = 1 # TODO remove ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights wx, wy, ww, wh = self.weights targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights targets_dw = ww * torch.log(gt_widths / ex_widths) targets_dh = wh * torch.log(gt_heights / ex_heights) targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1) return targets
def forward(self, feat, right, wrong, batch_wrong, fake=None, fake_diff_mask=None): num_wrong = wrong.size(1) batch_size = feat.size(0) feat = feat.view(-1, self.ninp, 1) right_dis = torch.bmm(right.view(-1, 1, self.ninp), feat) wrong_dis = torch.bmm(wrong, feat) batch_wrong_dis = torch.bmm(batch_wrong, feat) wrong_score = torch.sum(torch.exp(wrong_dis - right_dis.expand_as(wrong_dis)),1) \ + torch.sum(torch.exp(batch_wrong_dis - right_dis.expand_as(batch_wrong_dis)),1) loss_dis = torch.sum(torch.log(wrong_score + 1)) loss_norm = right.norm() + feat.norm() + wrong.norm() + batch_wrong.norm() if fake: fake_dis = torch.bmm(fake.view(-1, 1, self.ninp), feat) fake_score = torch.masked_select(torch.exp(fake_dis - right_dis), fake_diff_mask) margin_score = F.relu(torch.log(fake_score + 1) - self.margin) loss_fake = torch.sum(margin_score) loss_dis += loss_fake loss_norm += fake.norm() loss = (loss_dis + 0.1 * loss_norm) / batch_size if fake: return loss, loss_fake.data[0] / batch_size else: return loss
def norm_flow(self, params, z, v, logposterior): h = F.tanh(params[0][0](z)) mew_ = params[0][1](h) sig_ = F.sigmoid(params[0][2](h)+5.) #[PB,Z] z_reshaped = z.view(self.P, self.B, self.z_size) gradients = torch.autograd.grad(outputs=logposterior(z_reshaped), inputs=z_reshaped, grad_outputs=self.grad_outputs, create_graph=True, retain_graph=True, only_inputs=True)[0] gradients = gradients.detach() gradients = gradients.view(-1,self.z_size) v = v*sig_ + mew_*gradients logdet = torch.sum(torch.log(sig_), 1) h = F.tanh(params[1][0](v)) mew_ = params[1][1](h) sig_ = F.sigmoid(params[1][2](h)+5.) #[PB,Z] z = z*sig_ + mew_*v logdet2 = torch.sum(torch.log(sig_), 1) #[PB] logdet = logdet + logdet2 #[PB,Z], [PB] return z, v, logdet
def pixelcnn_generate(self, z1, z2): # Sampling from PixelCNN x_zeros = torch.zeros( (z1.size(0), self.args.input_size[0], self.args.input_size[1], self.args.input_size[2])) if self.args.cuda: x_zeros = x_zeros.cuda() for i in range(self.args.input_size[1]): for j in range(self.args.input_size[2]): samples_mean, samples_logvar = self.p_x(Variable(x_zeros, volatile=True), z1, z2) samples_mean = samples_mean.view(samples_mean.size(0), self.args.input_size[0], self.args.input_size[1], self.args.input_size[2]) if self.args.input_type == 'binary': probs = samples_mean[:, :, i, j].data x_zeros[:, :, i, j] = torch.bernoulli(probs).float() samples_gen = samples_mean elif self.args.input_type == 'gray' or self.args.input_type == 'continuous': binsize = 1. / 256. samples_logvar = samples_logvar.view(samples_mean.size(0), self.args.input_size[0], self.args.input_size[1], self.args.input_size[2]) means = samples_mean[:, :, i, j].data logvar = samples_logvar[:, :, i, j].data # sample from logistic distribution u = torch.rand(means.size()).cuda() y = torch.log(u) - torch.log(1. - u) sample = means + torch.exp(logvar) * y x_zeros[:, :, i, j] = torch.floor(sample / binsize) * binsize samples_gen = samples_mean return samples_gen
def log_uniform_candidate_sampler(self, targets, choice_func=_choice): # returns sampled, true_expected_count, sampled_expected_count # targets = (batch_size, ) # # samples = (n_samples, ) # true_expected_count = (batch_size, ) # sampled_expected_count = (n_samples, ) # see: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/range_sampler.h # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/range_sampler.cc # algorithm: keep track of number of tries when doing sampling, # then expected count is # -expm1(num_tries * log1p(-p)) # = (1 - (1-p)^num_tries) where p is self._probs[id] np_sampled_ids, num_tries = choice_func(self._num_words, self._num_samples) sampled_ids = torch.from_numpy(np_sampled_ids).to(targets.device) # Compute expected count = (1 - (1-p)^num_tries) = -expm1(num_tries * log1p(-p)) # P(class) = (log(class + 2) - log(class + 1)) / log(range_max + 1) target_probs = torch.log((targets.float() + 2.0) / (targets.float() + 1.0)) / self._log_num_words_p1 target_expected_count = -1.0 * (torch.exp(num_tries * torch.log1p(-target_probs)) - 1.0) sampled_probs = torch.log((sampled_ids.float() + 2.0) / (sampled_ids.float() + 1.0)) / self._log_num_words_p1 sampled_expected_count = -1.0 * (torch.exp(num_tries * torch.log1p(-sampled_probs)) - 1.0) sampled_ids.requires_grad_(False) target_expected_count.requires_grad_(False) sampled_expected_count.requires_grad_(False) return sampled_ids, target_expected_count, sampled_expected_count
def compute_loss(self, outputs, masks, labels): """ Our implementation of weighted BCE loss. """ labels = labels.view(-1) masks = masks.view(-1) outputs = outputs.view(-1) # Generate the weights ones = torch.sum(labels) total = labels.nelement() weights = torch.FloatTensor(outputs.size()).type_as(outputs.data) weights[labels.long() == 1] = 1.0 - ones / total weights[labels.long() == 0] = ones / total weights = weights.view(weights.size(0), 1).expand(weights.size(0), 2) # Generate the log outputs outputs = outputs.clamp(min=1e-8) log_outputs = torch.log(outputs) neg_outputs = 1.0 - outputs neg_outputs = neg_outputs.clamp(min=1e-8) neg_log_outputs = torch.log(neg_outputs) all_outputs = torch.cat((log_outputs.view(-1, 1), neg_log_outputs.view(-1, 1)), 1) all_values = all_outputs.mul(torch.autograd.Variable(weights)) all_labels = torch.autograd.Variable(torch.cat((labels.view(-1, 1), (1.0 - labels).view(-1, 1)), 1)) all_masks = torch.autograd.Variable(torch.cat((masks.view(-1, 1), masks.view(-1, 1)), 1)) loss = -torch.sum(all_values.mul(all_labels).mul(all_masks)) / outputs.size(0) return loss
def norm_flow(self, params, z, v): # print (z.size()) h = F.tanh(params[0][0](z)) mew_ = params[0][1](h) sig_ = F.sigmoid(params[0][2](h)+5.) #[PB,Z] # print (v.size()) # print (mew_.size()) # print (self.B) # print (self.P) v = v*sig_ + mew_ logdet = torch.sum(torch.log(sig_), 1) h = F.tanh(params[1][0](v)) mew_ = params[1][1](h) sig_ = F.sigmoid(params[1][2](h)+5.) #[PB,Z] z = z*sig_ + mew_ logdet2 = torch.sum(torch.log(sig_), 1) #[PB] logdet = logdet + logdet2 #[PB,Z], [PB] return z, v, logdet
def reverse_flow(self, z): B = z.shape[0] C = z.shape[1] f = self.flows logdet = 0. reverse_ = list(range(self.n_flows))[::-1] for i in reverse_: z1 = z[:,:C//2] z2 = z[:,C//2:] sig1 = torch.sigmoid(f[str(i)]['f2_sig'](z1)) mu1 = f[str(i)]['f2_mu'](z1) z2 = (z2 - mu1) / sig1 sig2 = torch.sigmoid(f[str(i)]['f1_sig'](z2)) mu2 = f[str(i)]['f1_mu'](z2) z1 = (z1 - mu2) / sig2 z = torch.cat([z1,z2],1) z = z[:,f[str(i)]['inv_perm']] sig1 = sig1.view(B, -1) sig2 = sig2.view(B, -1) logdet += torch.sum(torch.log(sig1), 1) logdet += torch.sum(torch.log(sig2), 1) return z, logdet
def sample_relax(logits, surrogate): cat = Categorical(logits=logits) u = torch.rand(B,C).clamp(1e-10, 1.-1e-10).cuda() gumbels = -torch.log(-torch.log(u)) z = logits + gumbels b = torch.argmax(z, dim=1) #.view(B,1) logprob = cat.log_prob(b).view(B,1) # czs = [] # for j in range(1): # z = sample_relax_z(logits) # surr_input = torch.cat([z, x, logits.detach()], dim=1) # cz = surrogate.net(surr_input) # czs.append(cz) # czs = torch.stack(czs) # cz = torch.mean(czs, dim=0)#.view(1,1) surr_input = torch.cat([z, x, logits.detach()], dim=1) cz = surrogate.net(surr_input) cz_tildes = [] for j in range(1): z_tilde = sample_relax_given_b(logits, b) surr_input = torch.cat([z_tilde, x, logits.detach()], dim=1) cz_tilde = surrogate.net(surr_input) cz_tildes.append(cz_tilde) cz_tildes = torch.stack(cz_tildes) cz_tilde = torch.mean(cz_tildes, dim=0) #.view(B,1) return b, logprob, cz, cz_tilde
def forward_flow(self, z, xenc): B = z.shape[0] C = z.shape[1] f = self.flows logdet = 0. for i in range(self.n_flows): z = z[:,f[str(i)]['perm']] z1 = z[:,:C//2] z2 = z[:,C//2:] sig2 = torch.sigmoid(f[str(i)]['f1_sig'](torch.cat([z2,xenc],1))) mu2 = f[str(i)]['f1_mu'](torch.cat([z2,xenc],1)) z1 = z1*sig2 + mu2 mu1 = f[str(i)]['f2_mu'](torch.cat([z1,xenc],1)) sig1 = torch.sigmoid(f[str(i)]['f2_sig'](torch.cat([z1,xenc],1))) z2 = z2*sig1 + mu1 z = torch.cat([z1,z2],1) sig1 = sig1.view(B, -1) sig2 = sig2.view(B, -1) logdet += torch.sum(torch.log(sig1), 1) logdet += torch.sum(torch.log(sig2), 1) return z, logdet
def predictive_elbo(self, x, k, s): # No pW or qW self.B = x.size()[0] #batch size # self.k = k #number of z samples aka particles P # self.s = s #number of W samples elbo1s = [] for i in range(s): Ws, logpW, logqW = self.sample_W() #_ , [1], [1] mu, logvar = self.encode(x) #[B,Z] z, logpz, logqz = self.sample_z(mu, logvar, k=k) #[P,B,Z], [P,B] x_hat = self.decode(Ws, z) #[P,B,X] logpx = log_bernoulli(x_hat, x) #[P,B] elbo = logpx + logpz - logqz #[P,B] if k>1: max_ = torch.max(elbo, 0)[0] #[B] elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] # elbo1 = elbo1 #+ (logpW - logqW)*.00000001 #[B], logp(x|W)p(w)/q(w) elbo1s.append(elbo) elbo1s = torch.stack(elbo1s) #[S,B] if s>1: max_ = torch.max(elbo1s, 0)[0] #[B] elbo1 = torch.log(torch.mean(torch.exp(elbo1s - max_), 0)) + max_ #[B] elbo = torch.mean(elbo1s) #[1] return elbo#, logprobs2[0], logprobs2[1], logprobs2[2], logprobs2[3], logprobs2[4]
def forward(self, input): n = len(input) embeds = self.input_fun(input) # pick first node scores = self.scores(embeds, 0, 0) choice = self.choice(n, scores) picks = [choice] loss = -torch.log(scores[choice]) / n outputs = [] for i, e in enumerate(embeds): outputs.append(self.output) if picks[-1] > i: # skip elements until next node continue lstm_out, self.hidden = self.lstm(e.view(1, 1, -1), self.hidden) self.output = self.output_fun(lstm_out.view(1, -1)) if len(picks) < self.subset: # pick next node scores = self.scores(embeds, len(picks), i + 1) choice = self.choice(n, scores) picks.append(choice) loss -= torch.log(scores[choice]) / (n - i) return loss, outputs, picks
def train(ep): model.train() total_loss = 0 count = 0 train_idx_list = np.arange(len(X_train), dtype="int32") np.random.shuffle(train_idx_list) for idx in train_idx_list: data_line = X_train[idx] x, y = Variable(data_line[:-1]), Variable(data_line[1:]) if args.cuda: x, y = x.cuda(), y.cuda() optimizer.zero_grad() output = model(x.unsqueeze(0)).squeeze(0) loss = -torch.trace(torch.matmul(y, torch.log(output).float().t()) + torch.matmul((1 - y), torch.log(1 - output).float().t())) total_loss += loss.data[0] count += output.size(0) if args.clip > 0: torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) loss.backward() optimizer.step() if idx > 0 and idx % args.log_interval == 0: cur_loss = total_loss / count print("Epoch {:2d} | lr {:.5f} | loss {:.5f}".format(ep, lr, cur_loss)) total_loss = 0.0 count = 0
def sample_from_discretized_mix_logistic_1d(l, nr_mix): # Pytorch ordering l = l.permute(0, 2, 3, 1) ls = [int(y) for y in l.size()] xs = ls[:-1] + [1] #[3] # unpack parameters logit_probs = l[:, :, :, :nr_mix] l = l[:, :, :, nr_mix:].contiguous().view(xs + [nr_mix * 2]) # for mean, scale # sample mixture indicator from softmax temp = torch.FloatTensor(logit_probs.size()) if l.is_cuda : temp = temp.cuda() temp.uniform_(1e-5, 1. - 1e-5) temp = logit_probs.data - torch.log(- torch.log(temp)) _, argmax = temp.max(dim=3) one_hot = to_one_hot(argmax, nr_mix) sel = one_hot.view(xs[:-1] + [1, nr_mix]) # select logistic parameters means = torch.sum(l[:, :, :, :, :nr_mix] * sel, dim=4) log_scales = torch.clamp(torch.sum( l[:, :, :, :, nr_mix:2 * nr_mix] * sel, dim=4), min=-7.) u = torch.FloatTensor(means.size()) if l.is_cuda : u = u.cuda() u.uniform_(1e-5, 1. - 1e-5) u = Variable(u) x = means + torch.exp(log_scales) * (torch.log(u) - torch.log(1. - u)) x0 = torch.clamp(torch.clamp(x[:, :, :, 0], min=-1.), max=1.) out = x0.unsqueeze(1) return out
def relax_grad2(x, logits, b, surrogate, mixtureweights): B = logits.shape[0] C = logits.shape[1] cat = Categorical(logits=logits) # u = torch.rand(B,C).clamp(1e-10, 1.-1e-10).cuda() u = myclamp(torch.rand(B,C).cuda()) gumbels = -torch.log(-torch.log(u)) z = logits + gumbels # b = torch.argmax(z, dim=1) #.view(B,1) logq = cat.log_prob(b).view(B,1) surr_input = torch.cat([z, x, logits.detach()], dim=1) cz = surrogate.net(surr_input) z_tilde = sample_relax_given_b(logits, b) surr_input = torch.cat([z_tilde, x, logits.detach()], dim=1) cz_tilde = surrogate.net(surr_input) logpx_given_z = logprob_undercomponent(x, component=b) logpz = torch.log(mixtureweights[b]).view(B,1) logpxz = logpx_given_z + logpz #[B,1] f = logpxz - logq net_loss = - torch.mean( (f.detach() - cz_tilde.detach()) * logq - logq + cz - cz_tilde ) grad = torch.autograd.grad([net_loss], [logits], create_graph=True, retain_graph=True)[0] #[B,C] pb = torch.exp(logq) return grad, pb
def get_probs_and_logits(ps=None, logits=None, is_multidimensional=True): """ Convert probability values to logits, or vice-versa. Either ``ps`` or ``logits`` should be specified, but not both. :param ps: tensor of probabilities. Should be in the interval *[0, 1]*. If, ``is_multidimensional = True``, then must be normalized along axis -1. :param logits: tensor of logit values. For the multidimensional case, the values, when exponentiated along the last dimension, must sum to 1. :param is_multidimensional: determines the computation of ps from logits, and vice-versa. For the multi-dimensional case, logit values are assumed to be log probabilities, whereas for the uni-dimensional case, it specifically refers to log odds. :return: tuple containing raw probabilities and logits as tensors. """ assert (ps is None) != (logits is None) if ps is not None: eps = _get_clamping_buffer(ps) ps_clamped = ps.clamp(min=eps, max=1 - eps) if is_multidimensional: if ps is None: ps = softmax(logits, -1) else: logits = torch.log(ps_clamped) else: if ps is None: ps = F.sigmoid(logits) else: logits = torch.log(ps_clamped) - torch.log1p(-ps_clamped) return ps, logits
def poisson_nll_loss(input, target, log_input=True, full=False, size_average=True): r"""Poisson negative log likelihood loss. See :class:`~torch.nn.PoissonNLLLoss` for details. Args: input: expectation of underlying Poisson distribution. target: random sample :math:`target \sim Pois(input)`. log_input: if True the loss is computed as `exp(input) - target * input`, if False then loss is `input - target * log(input)`. Default: True full: whether to compute full loss, i. e. to add the Stirling approximation term. Default: False `target * log(target) - target + 0.5 * log(2 * pi * target)`. size_average: By default, the losses are averaged over observations for each minibatch. However, if the field sizeAverage is set to False, the losses are instead summed for each minibatch. Default: True """ if log_input: loss = torch.exp(input) - target * input else: loss = input - target * torch.log(input) if full: mask = target > 1 loss[mask] += (target * torch.log(target) - target + 0.5 * torch.log(2 * math.pi * target))[mask] if size_average: return torch.mean(loss) else: return torch.sum(loss)
def _mu_law(self, x): m = self._variable(torch.FloatTensor(1)) m[:] = self.n_categories + 1 s = torch.sign(x) x = torch.abs(x) x = s * (torch.log(1 + (self.n_categories * x)) / torch.log(m)) return x
def bbox_transform(anchor_rois, gt_rois): """ :param anchor_rois <torch.Tensor>: :param gt_rois <torch.Tensor>: :return: """ anchor_widths = anchor_rois[:, 3] - anchor_rois[:, 0] anchor_heights = anchor_rois[:, 4] - anchor_rois[:, 1] anchor_lengths = anchor_rois[:, 5] - anchor_rois[:, 2] anchor_ctr_x = anchor_rois[:, 0] + 0.5 * anchor_widths anchor_ctr_y = anchor_rois[:, 1] + 0.5 * anchor_heights anchor_ctr_z = anchor_rois[:, 2] + 0.5 * anchor_lengths gt_widths = gt_rois[:, 3] - gt_rois[:, 0] gt_heights = gt_rois[:, 4] - gt_rois[:, 1] gt_lengths = gt_rois[:, 5] - gt_rois[:, 2] gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights gt_ctr_z = gt_rois[:, 2] + 0.5 * gt_lengths targets_dx = (gt_ctr_x - anchor_ctr_x) / (anchor_widths + 1e-14) targets_dy = (gt_ctr_y - anchor_ctr_y) / (anchor_heights + 1e-14) targets_dz = (gt_ctr_z - anchor_ctr_z) / (anchor_lengths + 1e-14) targets_dw = torch.log(gt_widths / (anchor_widths + 1e-14) + 1e-14) targets_dh = torch.log(gt_heights / (anchor_heights + 1e-14) + 1e-14) targets_dl = torch.log(gt_lengths / (anchor_lengths + 1e-14) + 1e-14) targets = torch.stack([targets_dx, targets_dy, targets_dz, targets_dw, targets_dh, targets_dl], 1) return targets
def log_Bernoulli(x, mean, average=False, dim=None): probs = torch.clamp( mean, min=min_epsilon, max=max_epsilon ) log_bernoulli = x * torch.log( probs ) + (1. - x ) * torch.log( 1. - probs ) if average: return torch.mean( log_bernoulli, dim ) else: return torch.sum( log_bernoulli, dim )
def sample_relax_z(logits): B = logits.shape[0] C = logits.shape[1] u = torch.rand(B,C).clamp(1e-10, 1.-1e-10).cuda() gumbels = -torch.log(-torch.log(u)) z = logits + gumbels return z
def compute_stuff(mask_chosen, scores, weights, volumes): bs = weights.size(0) mask_chosen = Variable(mask_chosen.float()) probs = 1e-6 + (1-2e-6) * F.softmax(scores) lgp = (torch.log(probs) * mask_chosen + torch.log(1-probs) * (1-mask_chosen)).sum(1) w = (weights * mask_chosen).sum(1) v = (volumes * mask_chosen).sum(1) return lgp, w, v
def optimize_cnt(worm_img, skel_prev, skel_width, segment_length, n_epochs = 1000): #this is the variable that is going t obe modified skel_r = skel_prev.data #+ torch.zeros(*skel_prev.size()).normal_() skel_r = torch.nn.Parameter(skel_r) optimizer = optim.Adam([skel_r], lr=0.1) for ii in range(n_epochs): skel_map = get_skel_map(skel_r, skel_width) #skel_map += 1e-3 p_w = (skel_map*worm_img) skel_map_inv = (-skel_map).add_(1) worm_img_inv = (-worm_img).add_(1) p_bng = (skel_map_inv*worm_img_inv) #p_bng = torch.sqrt(p_bng) #c_loss = F.binary_cross_entropy(p_w, p_bng) c_loss = -(p_bng*torch.log(p_w + 1.e-3) + p_w*torch.log(p_bng + 1.e-3)).mean() ds = skel_r[1:] - skel_r[:-1] dds = ds[1:] - ds[:-1] #seg_mean = seg_sizes.mean() cont_loss = ds.norm(p=2) curv_loss = dds.norm(p=2) seg_sizes = ((ds).pow(2)).sum(1).sqrt() d1 = seg_sizes-segment_length*0.9 d2 = seg_sizes-segment_length*1.5 seg_loss = (torch.exp(-d1) + torch.exp(d2)).mean() #(seg_sizes-segment_length).cosh().mean() #seg_loss = ((seg_sizes - segment_length)).cosh().mean() #seg_mean_loss = ((seg_mean-seg_sizes).abs() + 1e-5).mean() loss = 100*c_loss + 50*seg_loss + cont_loss + curv_loss #loss = 50*c_loss + seg_loss optimizer.zero_grad() loss.backward() #torch.nn.utils.clip_grad_norm([skel_r], 0.001) optimizer.step() if ii % 250 == 0: print(ii, loss.data[0], c_loss.data[0], seg_loss.data[0], cont_loss.data[0], curv_loss.data[0] ) return skel_r, skel_map
def categorical(mean, temp): g = -torch.log(1e-10 - torch.log(1e-10+Variable(mean.data.new(mean.size()).uniform_()))) if mean.ndim != 3: return F.softmax((torch.log(mean + 1e-10) + g)/temp) else: shape = (mean.size()[0] * mean.size()[1], mean.size(2)) samples = F.softmax(((torch.log(mean + 1e-10) + g)/temp).view(shape)) return samples.view_as(mean)
def sample_relax_given_b(logits, b): u_b = torch.rand(B,1).clamp(1e-10, 1.-1e-10).cuda() z_tilde_b = -torch.log(-torch.log(u_b)) u = torch.rand(B,C).clamp(1e-10, 1.-1e-10).cuda() z_tilde = -torch.log((- torch.log(u) / torch.softmax(logits,dim=1)) - torch.log(u_b)) z_tilde.scatter_(dim=1, index=b.view(B,1), src=z_tilde_b) return z_tilde
def batch_log_pdf(self, x): """ Ref: :py:meth:`pyro.distributions.distribution.Distribution.batch_log_pdf` """ a = self.a.expand(self.shape(x)) b = self.b.expand(self.shape(x)) lb = x.ge(a).type_as(a) ub = x.le(b).type_as(b) batch_log_pdf_shape = self.batch_shape(x) + (1,) return torch.sum(torch.log(lb.mul(ub)) - torch.log(b - a), -1).contiguous().view(batch_log_pdf_shape)
def forward(self, prob, targets, infos, wt=None): prob = prob.clamp(min=1e-7, max=1-1e-7) if wt is None: wt1 = torch.ones_like(prob) if config.TRAIN.CE_LOSS_WEIGHTED and self.pos_wt is not None: wt1 = wt * (targets.detach() * self.pos_wt + (1-targets.detach()) * self.neg_wt) loss = -torch.mean(wt1 * (torch.log(prob) * targets + torch.log(1-prob) * (1-targets))) return loss
def custom_cross_entropy(x, y): sigmoid_x = torch.sigmoid(x) sigmoid_x2 = torch.sigmoid(x ** 2) neg_log_sigmoid_x = -1 * torch.log(sigmoid_x) neg_log_1_minus_sigmoid_x2 = -1 * torch.log(1 - sigmoid_x2) l1 = torch.mul(y, neg_log_sigmoid_x) l2 = torch.mul(1 - y, neg_log_1_minus_sigmoid_x2) return torch.sum(l1 + l2)
def forward(self, x=None, warmup=1., inf_net=None): #, k=1): #, marginf_type=0): outputs = {} B = x.shape[0] if inf_net is None: # mu, logvar = self.inference_net(x) z, logits = self.q.sample(x) else: # mu, logvar = inf_net.inference_net(x) z, logqz = inf_net.sample(x) # print (z[0]) # b = harden(z) # print (b[0]) # logpz = torch.sum( self.prior.log_prob(b), dim=1) # print (logpz[0]) # print (logpz.shape) # fdasf probs_q = torch.sigmoid(logits) probs_q = torch.clamp(probs_q, min=.00000001, max=.9999999) probs_p = torch.ones(B, self.z_size).cuda() *.5 KL = probs_q*torch.log(probs_q/probs_p) + (1-probs_q)*torch.log((1-probs_q)/(1-probs_p)) KL = torch.sum(KL, dim=1) # print (z.shape) # Decode Image x_hat = self.generator.forward(z) alpha = torch.sigmoid(x_hat) beta = Beta(alpha*self.beta_scale, (1.-alpha)*self.beta_scale) x_noise = torch.clamp(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda(), min=1e-5, max=1-1e-5) logpx = beta.log_prob(x_noise) #[120,3,112,112] # add uniform noise here logpx = torch.sum(logpx.view(B, -1),1) # [PB] * self.w_logpx # print (logpx.shape,logpz.shape,logqz.shape) # fsdfda log_ws = logpx - KL #+ logpz - logqz outputs['logpx'] = torch.mean(logpx) outputs['x_recon'] = alpha # outputs['welbo'] = torch.mean(logpx + warmup*( logpz - logqz)) outputs['welbo'] = torch.mean(logpx + warmup*(KL)) outputs['elbo'] = torch.mean(log_ws) outputs['logws'] = log_ws outputs['z'] = z outputs['logpz'] = torch.zeros(1) #torch.mean(logpz) outputs['logqz'] = torch.mean(KL) # outputs['logvar'] = logvar return outputs
def probs_to_logits(probs, is_binary=False): r""" Converts a tensor of probabilities into logits. For the binary case, this denotes the probability of occurrence of the event indexed by `1`. For the multi-dimensional case, the values along the last dimension denote the probabilities of occurrence of each of the events. """ ps_clamped = clamp_probs(probs) if is_binary: return torch.log(ps_clamped) - torch.log1p(-ps_clamped) return torch.log(ps_clamped)
def forward(self, inputs_NTF, seq_lens_N=None, pad_val=0, return_hiddens=False): ''' Forward pass of input data through NN module Cleanly handles variable-length sequences (though internals a bit messy). Args ---- inputs_NTF : 3D array (n_sequences, n_timesteps, n_features) Each row is one sequence, padded to length T = n_timesteps seq_lens_N : 1D array-like (n_sequences) Each entry indicates how many timesteps the n-th sequence has. (Remaining entries are all padding and should be ignored). Returns ------- yproba_N2 : 2D array (n_sequences, 2) Each row gives probability that given sequence is class 0 or 1 Each row sums to one hiddens_NTH : 3D array (n_sequences, n_timesteps, n_hiddens) Each (n,t) index gives the hidden-state vector at sequence n, timestep t ''' N, T, F = inputs_NTF.shape if seq_lens_N is None: seq_lens_N = torch.zeros(N, dtype=torch.int64) for n in range(N): bmask_T = torch.all(inputs_NTF[n] == pad_val, dim=-1) seq_lens_N[n] = np.searchsorted(bmask_T, 1) ## Create PackedSequence representation to handle variable-length sequences # Requires sorting all sequences in current batch in descending order by length sorted_seq_lens_N, ids_N = seq_lens_N.sort(0, descending=True) _, rev_ids_N = ids_N.sort(0, descending=False) sorted_inputs_NTF = inputs_NTF[ids_N] packed_inputs_PF = nn.utils.rnn.pack_padded_sequence(sorted_inputs_NTF, sorted_seq_lens_N, batch_first=True) # Apply dropout to the non-recurrent layer weights between LSTM layers before output ie is weights for h_(l-1)^t # See https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM for choosing the right weights if (self.dropout_proba_non_recurrent > 0.0 and self.rnn.num_layers > 1): dropout = nn.Dropout(p=self.dropout_proba_non_recurrent) self.rnn.weight_ih_l1 = torch.nn.Parameter(dropout( self.rnn.weight_ih_l1), requires_grad=True) self.rnn.bias_ih_l1 = torch.nn.Parameter(dropout( self.rnn.bias_ih_l1), requires_grad=True) # Apply the RNN if (self.convert_to_log_reg == False): packed_outputs_PH, _ = self.rnn(packed_inputs_PF) # Unpack to N x T x H padded representation outputs_NTH, _ = nn.utils.rnn.pad_packed_sequence( packed_outputs_PH, batch_first=True) # Apply weights + softmax to final timestep of each sequence end_hiddens_NH = outputs_NTH[range(N), sorted_seq_lens_N - 1] yproba_N2 = nn.functional.softmax(self.output(end_hiddens_NH), dim=-1) #yproba_N2 = nn.functional.logsigmoid(self.output(end_hiddens_NH)) # Unsort and return if return_hiddens: return yproba_N2.index_select( 0, rev_ids_N), outputs_NTH.index_select(0, rev_ids_N) else: return yproba_N2.index_select(0, rev_ids_N) else: # convert to logistic regression assert ( self.rnn.hidden_size == F ), "Number of hidden units must equal number of input features for conversion to logistic regression!" if ( self.first_pass == False ): # weird handling of validation set of gridsearchcv and validation set of LSTM object if (N != self.ht.shape[1]) & (N != self.htval.shape[1]): init_weights_for_logistic_regression_conversion(self.rnn) self.first_pass = True # set end hidden layer output to be same as input for logistic regression conversion h0 = torch.zeros(self.rnn.num_layers, N, self.rnn.hidden_size).double() c0 = torch.ones(self.rnn.num_layers, N, self.rnn.hidden_size).double() if (self.first_pass) & (self.training): packed_outputs_PH, (self.ht, self.ct) = self.rnn( packed_inputs_PF, (h0, c0)) elif (self.first_pass == False) & (self.training): packed_outputs_PH, (self.ht, self.ct) = self.rnn( packed_inputs_PF, (self.ht, self.ct)) elif (self.first_pass) & (self.training == False): # eval mode packed_outputs_PH, (self.htval, self.ctval) = self.rnn( packed_inputs_PF, (h0, c0)) self.first_pass = False elif (self.first_pass == False) & (self.training == False): packed_outputs_PH, (self.htval, self.ctval) = self.rnn( packed_inputs_PF, (self.htval, self.ctval)) outputs_NTH, _ = nn.utils.rnn.pad_packed_sequence( packed_outputs_PH, batch_first=True) outputs_NTH = torch.log( outputs_NTH / (1 - outputs_NTH) ) # inverse sigmoid the output of hidden units to get back input features outputs_NTH[torch.isinf( outputs_NTH)] = 0 # remove inf's from sigmoid inversion end_hiddens_NH = outputs_NTH[range(N), sorted_seq_lens_N - 1] yproba_N2 = nn.functional.logsigmoid( self.output(end_hiddens_NH)).index_select(0, rev_ids_N) return yproba_N2
def forward(self, x): #syage 0 # x is prediction of resnet, which will be ignored # xr1 to xr4 are the resnet feature spaces beginning on top. x, xr1, xr2, xr3, xr4 = self.pretrained_resnet(x) if self.mode is not None: if self.mode == 'mode1': #reduce resnet filter, than upsample and concat with other reduced resnet features x_redu_4 = self.onexone1(xr4) x_redu_3 = self.onexone2(xr3) x_redu_2 = self.onexone3(xr2) x_redu_1 = self.onexone4(xr1) x_up_1 = self.upsample(x_redu_4) x_cat_1 = torch.cat((x_up_1, x_redu_3), dim=1) x_up_2 = self.upsample(x_cat_1) x_cat_2 = torch.cat((x_up_2, x_redu_2), dim=1) x_up_3 = self.upsample(x_cat_2) x_cat_3 = torch.cat((x_up_3, x_redu_1), dim=1) reduced = self.onexone(x_cat_3) elif self.mode == 'mode2': # reduce resnet filter, than upsample and concat with other reduced resnet features x_redu_4 = self.onexone1(xr4) x_redu_3 = self.onexone2(xr3) x_redu_2 = self.onexone3(xr2) x_redu_1 = self.onexone4(xr1) x_up_1 = self.upsample(x_redu_4) x_cat_1 = torch.cat((x_up_1, x_redu_3), dim=1) x_up_2 = self.upsample(x_cat_1) x_cat_2 = torch.cat((x_up_2, x_redu_2), dim=1) x_up_3 = self.upsample(x_cat_2) x_cat_3 = torch.cat((x_up_3, x_redu_1), dim=1) reduced = self.onexone(x_cat_3) else: #upsample xu1 = self.upsample(xr4) cat1 = torch.cat((xu1, xr3), dim=1) #upsample 2 xu2 = self.upsample(cat1) cat2 = torch.cat((xu2, xr2), dim=1) #upsample 3 xu3 = self.upsample(cat2) cat3 = torch.cat((xu3, xr1), dim=1) # final 1x1 convolution to reduce channels to num_classes reduced = self.onexone(cat3) # final sigmoid layer before the saliency maps # todo: change pooling parameters saliency_map = self.sigmoid(reduced) # saliency_map = self.softmax(reduced) class_scores = self.pooling(saliency_map) norm_scores = self.norm(class_scores) # # insert normalization layer for the class scores such that sum = 1 and in [0,1] # # # log scores for Nllloss on RSNA log_scores = torch.log(norm_scores + 1e-8) return(saliency_map, log_scores)
def gaussian_entropy(std): log_std = torch.log(std) norm = autograd.Variable(torch.Tensor([2*np.pi])) return 0.5 * len(std) * (1.0 + torch.log(norm)) + torch.sum(log_std)
def binary_cross_entropy(recon_x, x): return -torch.sum(x * torch.log(recon_x + 1e-8) + (1 - x) * torch.log(1 - recon_x + 1e-8), dim=-1)
def get_entropy(self): logps = F.softmax(self.gen_matrix, 2) result = torch.mean(torch.sum(logps * torch.log(logps + 1e-10), 1)) result = result.cpu() if use_cuda else result return (-result.data.numpy())
def entropy(output): output = torch.clamp(output, 1e-5, 1 - 1e-5) entropy = -output * torch.log(output) return torch.mean(entropy)
s_lst.append(s) a_lst.append(a) r_lst.append(r / 100.0) mask_lst.append(1 - done) s = s_prime step_idx += 1 s_final = torch.from_numpy(s_prime).float() v_final = model.v(s_final).detach().clone().numpy() td_target = compute_target(v_final, r_lst, mask_lst) td_target_vec = td_target.reshape(-1) s_vec = torch.tensor(s_lst).float().reshape( -1, 3) # 4 == Dimension of state a_vec = torch.tensor(a_lst).reshape(-1).unsqueeze(1) advantage = td_target_vec - model.v(s_vec).reshape(-1) pi = model.pi(s_vec, softmax_dim=1) pi_a = pi.gather(1, a_vec).reshape(-1) loss = -(torch.log(pi_a) * advantage.detach()).mean() +\ F.smooth_l1_loss(model.v(s_vec).reshape(-1), td_target_vec) optimizer.zero_grad() loss.backward() optimizer.step() if step_idx % PRINT_INTERVAL == 0: test(step_idx, model) envs.close()
def train(self, model, data): tot_loss_lst = [] pi_loss_lst = [] entropy_lst = [] move_entropy_lst = [] v_loss_lst = [] # to calculate fixed advantages before update data_with_adv = [] for mini_batch in data: s, a, m, r, s_prime, done_mask, prob, need_move = mini_batch with torch.no_grad(): pi, pi_move, v, _ = model(s) pi_prime, pi_m_prime, v_prime, _ = model(s_prime) td_target = r + self.gamma * v_prime * done_mask delta = td_target - v # [horizon * batch_size * 1] delta = delta.detach().cpu().numpy() advantage_lst = [] advantage = np.array([0]) for delta_t in delta[::-1]: advantage = self.gamma * self.lmbda * advantage + delta_t advantage_lst.append(advantage) advantage_lst.reverse() advantage = torch.tensor(advantage_lst, dtype=torch.float, device=model.device) data_with_adv.append((s, a, m, r, s_prime, done_mask, prob, need_move, td_target, advantage)) for i in range(self.K_epoch): for mini_batch in data_with_adv: s, a, m, r, s_prime, done_mask, prob, need_move, td_target, advantage = mini_batch pi, pi_move, v, _ = model(s) pi_prime, pi_m_prime, v_prime, _ = model(s_prime) pi_a = pi.gather(2,a) pi_m = pi_move.gather(2,m) pi_am = pi_a*(1-need_move + need_move*pi_m) ratio = torch.exp(torch.log(pi_am) - torch.log(prob)) # a/b == exp(log(a)-log(b)) surr1 = ratio * advantage surr2 = torch.clamp(ratio, 1-self.eps_clip, 1+self.eps_clip) * advantage entropy = -torch.log(pi_am) move_entropy = -need_move*torch.log(pi_m) surr_loss = -torch.min(surr1, surr2) v_loss = F.smooth_l1_loss(v, td_target.detach()) entropy_loss = -1*self.entropy_coef*entropy loss = surr_loss + v_loss + entropy_loss.mean() loss = loss.mean() model.optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), self.grad_clip) model.optimizer.step() tot_loss_lst.append(loss.item()) pi_loss_lst.append(surr_loss.mean().item()) v_loss_lst.append(v_loss.item()) entropy_lst.append(entropy.mean().item()) n_need_move = torch.sum(need_move).item() if n_need_move == 0: move_entropy_lst.append(0) else: move_entropy_lst.append((torch.sum(move_entropy)/n_need_move).item()) return np.mean(tot_loss_lst), np.mean(pi_loss_lst), np.mean(v_loss_lst), np.mean(entropy_lst), np.mean(move_entropy_lst)
def _model_scores(self, r_src, r_trg, mask_mat, args): ''' Compute the NCE scores for predicting r_src->r_trg. Input: r_src : (n_batch_gpu, n_rkhs) r_trg : (n_rkhs, n_batch * n_locs) mask_mat : (n_batch_gpu, n_batch) Output: raw_scores : (n_batch_gpu, n_locs) scores : (n_batch_gpu, n_locs) lgt_reg : scalar ''' # n_batch_gpu * n_gpu = n_batch, cuda:0 is not used if gpu >=4 n_batch_gpu = mask_mat.size(0) n_batch = mask_mat.size(1) n_locs = r_trg.size(1) // n_batch n_rkhs = r_src.size(1) # reshape mask_mat for ease-of-use mask_pos = mask_mat.unsqueeze(dim=2).expand(-1, -1, n_locs).float() mask_neg = 1. - mask_pos # compute src->trg raw scores for batch on this gpu raw_scores = torch.mm(r_src, r_trg).float() raw_scores = raw_scores.reshape(n_batch_gpu, n_batch, n_locs) if args.rkhs: raw_scores = raw_scores / n_rkhs**0.5 if args.l2_reg: lgt_reg = args.l2_reg * (raw_scores**2.).mean() else: lgt_reg = torch.tensor(0.0).to(device=raw_scores.device) if args.use_tanh_clip: assert args.hard_clamping == None raw_scores = tanh_clip(raw_scores, clip_val=self.tclip) if args.hard_clamping is not None: assert args.use_tanh_clip == False raw_scores = torch.clamp(raw_scores, min=args.hard_clamping[0], max=args.hard_clamping[1]) # If using our proposed objective: if args.loss == "ours": ''' start calculating scores. pos_scores includes scores for all the positive samples neg_scores includes scores for all the negative samples pos mean is mean over positive samples neg mean is mean over negative samples ''' raw_scores_squared = raw_scores**2 # (n_batch_gpu, 1) pos_scores = ((mask_pos * raw_scores).sum(dim=(1, 2))) / n_locs # (n_batch_gpu, 1) neg_scores = ((mask_neg * raw_scores_squared).sum(dim=(1, 2))) / ( (n_batch - 1) * n_locs) # relative density ratio to help stability if not args.relative_ratio == 0.0: """COMMAND SHOULD BE ADDED HERE""" pos_scores_squared = ( (mask_pos * raw_scores_squared).sum(dim=(1, 2))) / n_locs pos_scores = pos_scores - 0.5 * args.relative_ratio * pos_scores_squared neg_scores = (1. - args.relative_ratio) * neg_scores scores = pos_scores - 0.5 * neg_scores elif args.loss == "JS": """Lower bound on Jensen-Shannon divergence from Nowozin et al. (2016).""" # Implementation if f is simply a batch_by_batch score # f_diag = f.diag() # first_term = -F.softplus(-f_diag).mean() # n = f.size(0) # second_term = (torch.sum(F.softplus(f)) - # torch.sum(F.softplus(f_diag))) / (n * (n - 1.)) # return first_term - second_term pos_scores = ( (mask_pos * -F.softplus(-raw_scores)).sum(dim=(1, 2))) / n_locs neg_scores = ((mask_neg * F.softplus(raw_scores)).sum( dim=(1, 2))) / ((n_batch - 1) * n_locs) scores = pos_scores - neg_scores elif args.loss == "nce": ''' pos_scores includes scores for all the positive samples neg_scores includes scores for all the negative samples, with scores for positive samples set to the min score (-self.tclip here) ''' # (n_batch_gpu, n_locs) pos_scores = (mask_pos * raw_scores).sum(dim=1) # (n_batch_gpu, n_batch, n_locs) neg_scores = (mask_neg * raw_scores) - (self.tclip * mask_pos) # (n_batch_gpu, n_batch * n_locs) neg_scores = neg_scores.reshape(n_batch_gpu, -1) # (n_batch_gpu, n_batch * n_locs) mask_neg = mask_neg.reshape(n_batch_gpu, -1) ''' for each set of positive examples P_i, compute the max over scores for the set of negative samples N_i that are shared across P_i ''' # (n_batch_gpu, 1) neg_maxes = torch.max(neg_scores, dim=1, keepdim=True)[0] ''' compute a "partial, safe sum exp" over each negative sample set N_i, to broadcast across the positive samples in P_i which share N_i -- size will be (n_batch_gpu, 1) ''' neg_sumexp = \ (mask_neg * torch.exp(neg_scores - neg_maxes)).sum(dim=1, keepdim=True) ''' use broadcasting of neg_sumexp across the scores in P_i, to compute the log-sum-exps for the denominators in the NCE log-softmaxes -- size will be (n_batch_gpu, n_locs) ''' all_logsumexp = torch.log( torch.exp(pos_scores - neg_maxes) + neg_sumexp) # compute numerators for the NCE log-softmaxes pos_shiftexp = pos_scores - neg_maxes # compute the final log-softmax scores for NCE... scores = pos_shiftexp - all_logsumexp else: print("Currently loss could only be 'nce' or 'ours'") assert False return scores, pos_scores, lgt_reg
def word_loss(word_probs, word): #outcome is a one-hot vector prob_of_word = torch.dot(word_probs, word) return -1 * torch.log(prob_of_word)
def train(args, snapshot_path): base_lr = args.base_lr train_data_path = args.root_path batch_size = args.batch_size max_iterations = args.max_iterations def create_model(ema=False): # Network definition net = unet_3D(n_classes=2, in_channels=1) model = net.cuda() if ema: for param in model.parameters(): param.detach_() return model model = create_model() ema_model = create_model(ema=True) db_train = BraTS2019(base_dir=train_data_path, split='train', num=None, transform=transforms.Compose([ RandomRotFlip(), RandomCrop(args.patch_size), ToTensor(), ])) def worker_init_fn(worker_id): random.seed(args.seed + worker_id) labeled_idxs = list(range(0, args.labeled_num)) unlabeled_idxs = list(range(args.labeled_num, 250)) batch_sampler = TwoStreamBatchSampler(labeled_idxs, unlabeled_idxs, batch_size, batch_size - args.labeled_bs) trainloader = DataLoader(db_train, batch_sampler=batch_sampler, num_workers=4, pin_memory=True, worker_init_fn=worker_init_fn) model.train() ema_model.train() optimizer = optim.SGD(model.parameters(), lr=base_lr, momentum=0.9, weight_decay=0.0001) ce_loss = CrossEntropyLoss() dice_loss = losses.DiceLoss(2) writer = SummaryWriter(snapshot_path + '/log') logging.info("{} iterations per epoch".format(len(trainloader))) iter_num = 0 max_epoch = max_iterations // len(trainloader) + 1 best_performance = 0.0 iterator = tqdm(range(max_epoch), ncols=70) for epoch_num in iterator: for i_batch, sampled_batch in enumerate(trainloader): volume_batch, label_batch = sampled_batch['image'], sampled_batch[ 'label'] volume_batch, label_batch = volume_batch.cuda(), label_batch.cuda() unlabeled_volume_batch = volume_batch[args.labeled_bs:] noise = torch.clamp( torch.randn_like(unlabeled_volume_batch) * 0.1, -0.2, 0.2) ema_inputs = unlabeled_volume_batch + noise outputs = model(volume_batch) outputs_soft = torch.softmax(outputs, dim=1) with torch.no_grad(): ema_output = ema_model(ema_inputs) T = 8 _, _, d, w, h = unlabeled_volume_batch.shape volume_batch_r = unlabeled_volume_batch.repeat(2, 1, 1, 1, 1) stride = volume_batch_r.shape[0] // 2 preds = torch.zeros([stride * T, 2, d, w, h]).cuda() for i in range(T // 2): ema_inputs = volume_batch_r + \ torch.clamp(torch.randn_like( volume_batch_r) * 0.1, -0.2, 0.2) with torch.no_grad(): preds[2 * stride * i:2 * stride * (i + 1)] = ema_model(ema_inputs) preds = torch.softmax(preds, dim=1) preds = preds.reshape(T, stride, 2, d, w, h) preds = torch.mean(preds, dim=0) uncertainty = -1.0 * \ torch.sum(preds*torch.log(preds + 1e-6), dim=1, keepdim=True) loss_ce = ce_loss(outputs[:args.labeled_bs], label_batch[:args.labeled_bs][:]) loss_dice = dice_loss(outputs_soft[:args.labeled_bs], label_batch[:args.labeled_bs].unsqueeze(1)) supervised_loss = 0.5 * (loss_dice + loss_ce) consistency_weight = get_current_consistency_weight(iter_num // 150) consistency_dist = losses.softmax_mse_loss( outputs[args.labeled_bs:], ema_output) # (batch, 2, 112,112,80) threshold = (0.75 + 0.25 * ramps.sigmoid_rampup( iter_num, max_iterations)) * np.log(2) mask = (uncertainty < threshold).float() consistency_loss = torch.sum( mask * consistency_dist) / (2 * torch.sum(mask) + 1e-16) loss = supervised_loss + consistency_weight * consistency_loss optimizer.zero_grad() loss.backward() optimizer.step() update_ema_variables(model, ema_model, args.ema_decay, iter_num) lr_ = base_lr * (1.0 - iter_num / max_iterations)**0.9 for param_group in optimizer.param_groups: param_group['lr'] = lr_ iter_num = iter_num + 1 writer.add_scalar('info/lr', lr_, iter_num) writer.add_scalar('info/total_loss', loss, iter_num) writer.add_scalar('info/loss_ce', loss_ce, iter_num) writer.add_scalar('info/loss_dice', loss_dice, iter_num) writer.add_scalar('info/consistency_loss', consistency_loss, iter_num) writer.add_scalar('info/consistency_weight', consistency_weight, iter_num) logging.info( 'iteration %d : loss : %f, loss_ce: %f, loss_dice: %f' % (iter_num, loss.item(), loss_ce.item(), loss_dice.item())) writer.add_scalar('loss/loss', loss, iter_num) if iter_num % 20 == 0: image = volume_batch[0, 0:1, :, :, 20:61:10].permute(3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=True) writer.add_image('train/Image', grid_image, iter_num) image = outputs_soft[0, 1:2, :, :, 20:61:10].permute(3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=False) writer.add_image('train/Predicted_label', grid_image, iter_num) image = label_batch[0, :, :, 20:61:10].unsqueeze(0).permute( 3, 0, 1, 2).repeat(1, 3, 1, 1) grid_image = make_grid(image, 5, normalize=False) writer.add_image('train/Groundtruth_label', grid_image, iter_num) if iter_num > 0 and iter_num % 200 == 0: model.eval() avg_metric = test_all_case(model, args.root_path, test_list="val.txt", num_classes=2, patch_size=args.patch_size, stride_xy=64, stride_z=64) if avg_metric[:, 0].mean() > best_performance: best_performance = avg_metric[:, 0].mean() save_mode_path = os.path.join( snapshot_path, 'iter_{}_dice_{}.pth'.format( iter_num, round(best_performance, 4))) save_best = os.path.join( snapshot_path, '{}_best_model.pth'.format(args.model)) torch.save(model.state_dict(), save_mode_path) torch.save(model.state_dict(), save_best) writer.add_scalar('info/val_dice_score', avg_metric[0, 0], iter_num) writer.add_scalar('info/val_hd95', avg_metric[0, 1], iter_num) logging.info('iteration %d : dice_score : %f hd95 : %f' % (iter_num, avg_metric[0, 0].mean(), avg_metric[0, 1].mean())) model.train() if iter_num % 3000 == 0: save_mode_path = os.path.join(snapshot_path, 'iter_' + str(iter_num) + '.pth') torch.save(model.state_dict(), save_mode_path) logging.info("save model to {}".format(save_mode_path)) if iter_num >= max_iterations: break if iter_num >= max_iterations: iterator.close() break writer.close() return "Training Finished!"
def transition(self, z, temperature, step): #print ('z', np.isnan(z.data.cpu().numpy()).any()) # print z.requires_grad h1 = self.act(self.bn7_list[step](self.fc_trans_1(z))) #print h1 h2 = self.act(self.bn8_list[step](self.fc_trans_1_1(h1))) #print h2 h3 = self.act(self.bn9_list[step](self.fc_trans_1_2(h2))) h4 = self.act(self.bn9_1_list[step](self.fc_trans_1_3(h3))) h5 = self.act(self.bn9_2_list[step](self.fc_trans_1_4(h4))) #print h3 h5 = torch.clamp(h3, min=0, max=5) #print h3 mu = self.bn5_list[step]( self.fc_z_mu(h3)) #### why not non-linearity applied here #print mu sigma = self.bn6_list[step](self.fc_z_sigma(h3)) #print sigma #print ('mu', np.isnan(mu.data.cpu().numpy()).any()) #print ('sigma', np.isnan(sigma.data.cpu().numpy()).any()) eps = Variable(mu.data.new(mu.size()).normal_()) #print ('eps', np.isnan(eps.data.cpu().numpy()).any()) #print eps #z_new = mu + T.sqrt(args.sigma * temperature) * T.exp(0.5 * sigma) * eps #z_new = (z_new - T.mean(z_new, axis=0, keepdims=True)) / (0.001 + T.std(z_new, axis=0, keepdims=True)) if args.cuda: sigma_ = Variable( torch.sqrt( torch.FloatTensor(1).fill_(args.sigma * temperature)).cuda()) #print ('sigma_', np.isnan(sigma_.data.cpu().numpy()).any()) else: sigma_ = Variable( torch.sqrt( torch.FloatTensor(1).fill_(args.sigma * temperature))) z_new = eps.mul(sigma.mul(0.5).exp_()).mul(sigma_).add_(mu) #print ('z_new', np.isnan(z_new.data.cpu().numpy()).any()) z_new = (z_new - z_new.mean(0)) / (0.001 + z_new.std(0)) #print ('z_new_mean', np.isnan(z_new.mean(0).data.cpu().numpy()).any()) #print ('z_new_std', np.isnan(z_new.std(0).data.cpu().numpy()).any()) #print ('z_new', np.isnan(z_new.data.cpu().numpy()).any()) if args.cuda: sigma_ = Variable( torch.log( torch.FloatTensor(1).fill_( args.sigma * temperature)).cuda()) + sigma #print ('sigma2', np.isnan(sigma_.data.cpu().numpy()).any()) else: sigma_ = Variable( torch.log( torch.FloatTensor(1).fill_( args.sigma * temperature))) + sigma log_p_reverse = log_normal2(z, mu, sigma_, eps=1e-6).mean() #print ('z', np.isnan(z.data.cpu().numpy()).any()) #print ('log_p_reverse', log_p_reverse) z_new = torch.clamp(z_new, min=-4, max=4) #print z_new return z_new, log_p_reverse, mu, sigma
def sample_gumbel(self, shape, eps=1e-20): #Sample from Gumbel(0, 1) U = torch.rand(shape).float() return - torch.log(eps - torch.log(U + eps))
def acosh(self, x): return torch.log(x + (x * x - 1.0).sqrt())
def cross_entropy(self, y_hat, y): return -torch.log(y_hat[range(len(y_hat)), y])
def log_sum_exp(x): m = torch.max(x, -1)[0] return m + torch.log(torch.sum(torch.exp(x - m.unsqueeze(-1)), -1))
def train_gan(population,p_fitness,batch_size = 20,n_epochs = 100): p_fitness = torch.from_numpy(p_fitness).type("torch.FloatTensor").to(device) gen_explore_all = [] gen_exploit_all = [] dis_loss_all = [] dis_confidence_all = [] for e in range(n_epochs): #shuffle arrays in unison ind = np.arange(len(population)) np.random.shuffle(ind) population = np.array(population)[ind] p_fitness = p_fitness[ind] for i in range(len(population)//batch_size): #turn population into vectors real_batch = [] for b in range(batch_size): real_batch.append(get_params(population[(i*batch_size)+b]).unsqueeze(0)) real_batch = torch.cat(real_batch, dim=0).to(device) #train discriminator on population dis_optimizer.zero_grad() dis_out_r,_ = dis(real_batch) dis_out_r = dis_out_r.squeeze(-1) rank = p_fitness[i*batch_size:(i*batch_size)+batch_size] dis_error_real = (torch.pow((dis_out_r[...,0]-rank),2)) dis_error_real_mean = torch.mean(dis_error_real) dis_error_real_mean.backward(retain_graph=True) #discriminator confidence in its prediction dis_confidence = torch.mean(torch.pow(dis_out_r[...,1]-dis_error_real.detach(),2)) dis_confidence.backward() dis_optimizer.step() #for i in range(len(population)//batch_size): #generate children from population mutation = torch.from_numpy(np.random.uniform(all_a,all_a,batch_size)).type('torch.FloatTensor').to(device) child,confidence = gen_children(population,device,gen,batch_size,a = mutation) #train discriminator dis_out_f,dis_out_last = dis(child) dis_out_f = dis_out_f.squeeze(-1) gen_optimizer.zero_grad() #define generator loss #variety = torch.std(dis_out_f[...,0])#-torch.mean(torch.std(dis_out_last,-1)) probs = torch.softmax(dis_out_f[...,0],-1) variety = -(torch.sum(probs*torch.log(probs))) gen_error_exploration = 0 + (variety)-torch.mean(dis_out_f[...,1]) gen_error_exploitation = -torch.mean(dis_out_f[...,0]) gen_error = gen_error_exploitation + (gen_error_exploration) gen_error.backward() gen_optimizer.step() gen.hidden = None #keep losses to draw graph gen_explore_all.append(gen_error_exploration) gen_exploit_all.append(gen_error_exploitation) dis_loss_all.append(dis_error_real_mean) dis_confidence_all.append(dis_confidence) #print(rank) #print(dis_out_r[...,0]) print(confidence) #print(get_params(population[0])) #print() return gen_explore_all,gen_exploit_all, dis_loss_all,dis_confidence_all
def entropy_logits(logits): a0 = logits - logits.max(dim=-1, keepdim=True)[0] ea0 = torch.exp(a0) z0 = ea0.sum(dim=-1, keepdim=True) p0 = ea0 / z0 return (p0 * (torch.log(z0) - a0)).sum(dim=-1)
def loss(output, target, weights): output = torch.clamp(output, 1e-5, 1 - 1e-5) weights = torch.clamp(weights, 1e-5, 1 - 1e-5) ML = weights * (target * torch.log(output) + (1 - target) * torch.log(1 - output)) return torch.neg(torch.mean(ML))
def _PyramidRoI_Feat(self, feat_maps, rois, im_info): ''' roi pool on pyramid feature maps''' # do roi pooling based on predicted rois img_area = im_info[0][0] * im_info[0][1] h = rois.data[:, 4] - rois.data[:, 2] + 1 w = rois.data[:, 3] - rois.data[:, 1] + 1 roi_level = torch.log(torch.sqrt(h * w) / 50.0) roi_level = torch.round(roi_level + 4) roi_level[roi_level < 2] = 2 roi_level[roi_level > 5] = 5 # roi_level.fill_(5) if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) # NOTE: need to add pyrmaid grid_xy = _affine_grid_gen(rois, feat_maps.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() roi_pool_feat = self.RCNN_roi_crop(feat_maps, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: roi_pool_feat = F.max_pool2d(roi_pool_feat, 2, 2) elif cfg.POOLING_MODE == 'align': roi_pool_feats = [] box_to_levels = [] for i, l in enumerate(range(2, 6)): if (roi_level == l).sum() == 0: continue # idx_l = (roi_level == l).nonzero().squeeze() idx_l = (roi_level == l).nonzero() if idx_l.shape[0] > 1: idx_l = idx_l.squeeze() else: idx_l = idx_l.view(-1) box_to_levels.append(idx_l) scale = feat_maps[i].size(2) / im_info[0][0] feat = self.RCNN_roi_align(feat_maps[i], rois[idx_l], scale) roi_pool_feats.append(feat) roi_pool_feat = torch.cat(roi_pool_feats, 0) box_to_level = torch.cat(box_to_levels, 0) idx_sorted, order = torch.sort(box_to_level) roi_pool_feat = roi_pool_feat[order] elif cfg.POOLING_MODE == 'pool': roi_pool_feats = [] box_to_levels = [] for i, l in enumerate(range(2, 6)): if (roi_level == l).sum() == 0: continue idx_l = (roi_level == l).nonzero().squeeze() box_to_levels.append(idx_l) scale = feat_maps[i].size(2) / im_info[0][0] feat = self.RCNN_roi_pool(feat_maps[i], rois[idx_l], scale) roi_pool_feats.append(feat) roi_pool_feat = torch.cat(roi_pool_feats, 0) box_to_level = torch.cat(box_to_levels, 0) idx_sorted, order = torch.sort(box_to_level) roi_pool_feat = roi_pool_feat[order] return roi_pool_feat
def log_eps(x, eps=1e-4): return torch.log(x + eps)
def masked_logL1_loss(self, predicted, target, mask): diff = torch.log(1 + torch.abs(predicted - target)) * mask loss = torch.sum(diff, dim=(2, 3)) / torch.sum(mask, dim=(2, 3)) return torch.mean(loss)
loss = bce(pre, trating) * len(pre) loss.backward() opt.step() flp = torch.FloatTensor(flpre[:, nst]) hrnow = torch.FloatTensor(fullhr[:, nst]) if (cd == 1): flp = flp.cuda() hrnow = hrnow.cuda() proa = proa.cuda() samitem = samitem.cuda() optgen.zero_grad() [tlfw1, tlfw2, tlfla1, tlfa2, cuid] = gen(flp, hrnow) pregen = model.itempre(samitem) losgen = -torch.sum(cuid * (hrnow * torch.log( (pregen + eps) / episa) + (1 - hrnow) * torch.log( (1 - pregen + eps) / (1 - episa))) + cuid * torch.log(proa / (1 - proa)) - cuid * torch.log(cuid + eps) - (1 - cuid) * torch.log(1 - cuid + eps)) losgen.backward() optgen.step() if (ite % 200 == 0): [teu, tev] = model.getem() eu = teu.cpu().numpy() ev = tev.cpu().numpy() prerating = 1 / (1 + np.exp(-eu.dot(ev.T))) cu = np.zeros((n, n)) nowfa = np.identity(n)
def forward(self, r, classes, m, d, alpha=1.0): self.r = r if self.device == 'cuda': self.classes = torch.from_numpy(classes).type(GPU_LONG_DTYPE) self.clusters, _ = torch.sort(torch.arange(0, float(m)).repeat(d)) self.clusters = self.clusters.type(GPU_INT_DTYPE) else: self.classes = torch.from_numpy(classes).type(LONG_DTYPE) self.clusters, _ = torch.sort(torch.arange(0, float(m)).repeat(d)) self.clusters = self.clusters.type(INT_DTYPE) self.cluster_classes = self.classes[0:m*d:d] self.n_clusters = m self.alpha = alpha #pdb.set_trace() # Take cluster means within the batch cluster_examples = dynamic_partition(self.r, self.clusters, self.n_clusters) #pdb.set_trace() cluster_means = torch.stack([torch.mean(x, dim=0) for x in cluster_examples]) #pdb.set_trace() sample_costs = compute_euclidean_distance(cluster_means, expand_dims(r, 1)) #pdb.set_trace() if self.device == 'cuda': clusters_tensor = self.clusters.type(GPU_FLOAT_DTYPE) n_clusters_tensor = torch.arange(0, self.n_clusters).type(GPU_FLOAT_DTYPE) intra_cluster_mask = Variable(comparison_mask(clusters_tensor, n_clusters_tensor).type(GPU_FLOAT_DTYPE)) else: clusters_tensor = self.clusters.type(FLOAT_DTYPE) n_clusters_tensor = torch.arange(0, self.n_clusters).type(FLOAT_DTYPE) intra_cluster_mask = Variable(comparison_mask(clusters_tensor, n_clusters_tensor).type(FLOAT_DTYPE)) #pdb.set_trace() #pdb.set_trace() intra_cluster_costs = torch.sum(intra_cluster_mask * sample_costs, dim=1) #pdb.set_trace() N = r.size()[0] #pdb.set_trace() variance = torch.sum(intra_cluster_costs) / float(N - 1) #pdb.set_trace() var_normalizer = -1 / (2 * variance**2) #pdb.set_trace() # Compute numerator numerator = torch.exp(var_normalizer * intra_cluster_costs - self.alpha) #pdb.set_trace() if self.device == 'cuda': classes_tensor = self.classes.type(GPU_FLOAT_DTYPE) cluster_classes_tensor = self.cluster_classes.type(GPU_FLOAT_DTYPE) # Compute denominator diff_class_mask = Variable(comparison_mask(classes_tensor, cluster_classes_tensor).type(GPU_FLOAT_DTYPE)) else: classes_tensor = self.classes.type(FLOAT_DTYPE) cluster_classes_tensor = self.cluster_classes.type(FLOAT_DTYPE) # Compute denominator diff_class_mask = Variable(comparison_mask(classes_tensor, cluster_classes_tensor).type(FLOAT_DTYPE)) diff_class_mask = 1 - diff_class_mask # Logical not on ByteTensor #pdb.set_trace() denom_sample_costs = torch.exp(var_normalizer * sample_costs) #pdb.set_trace() denominator = torch.sum(diff_class_mask * denom_sample_costs, dim=1) #pdb.set_trace() epsilon = 1e-8 #pdb.set_trace() losses = F.relu(-torch.log(numerator / (denominator + epsilon) + epsilon)) #pdb.set_trace() total_loss = torch.mean(losses) #pdb.set_trace() return total_loss, losses
plt.ion() # something about continuous plotting for step in range(10000): artist_paintings, labels = artist_works_with_labels( ) # real painting, label from artist G_ideas = torch.randn(BATCH_SIZE, N_IDEAS) # random ideas G_inputs = torch.cat((G_ideas, labels), 1) # ideas with labels G_paintings = G(G_inputs) # fake painting w.r.t label from G D_inputs0 = torch.cat((artist_paintings, labels), 1) # all have their labels D_inputs1 = torch.cat((G_paintings, labels), 1) prob_artist0 = D(D_inputs0) # D try to increase this prob prob_artist1 = D(D_inputs1) # D try to reduce this prob D_score0 = torch.log(prob_artist0) # maximise this for D D_score1 = torch.log(1. - prob_artist1) # maximise this for D D_loss = -torch.mean( D_score0 + D_score1) # minimise the negative of both two above for D G_loss = torch.mean(D_score1) # minimise D score w.r.t G opt_D.zero_grad() D_loss.backward(retain_graph=True) # reusing computational graph opt_D.step() opt_G.zero_grad() G_loss.backward() opt_G.step() if step % 200 == 0: # plotting plt.cla()
def learn(self, writer, i_iter): memory, log = self.collector.collect_samples( self.config["train"]["generator"]["sample_batch_size"]) self.policy.train() self.value.train() self.discriminator.train() print( f"Iter: {i_iter}, num steps: {log['num_steps']}, total reward: {log['total_reward']: .4f}, " f"min reward: {log['min_episode_reward']: .4f}, max reward: {log['max_episode_reward']: .4f}, " f"average reward: {log['avg_reward']: .4f}, sample time: {log['sample_time']: .4f}" ) # record reward information writer.add_scalar("gail/average reward", log['avg_reward'], i_iter) writer.add_scalar("gail/num steps", log['num_steps'], i_iter) # collect generated batch # gen_batch = self.collect_samples(self.config["ppo"]["sample_batch_size"]) gen_batch = memory.sample() gen_batch_state = FLOAT(gen_batch.state).to( device) # [batch size, state size] gen_batch_action = FLOAT(gen_batch.action).to( device) # [batch size, action size] gen_batch_old_log_prob = FLOAT(gen_batch.log_prob).to( device) # [batch size, 1] gen_batch_mask = FLOAT(gen_batch.mask).to(device) # [batch, 1] #################################################### # update discriminator #################################################### d_optim_i_iters = self.config["train"]["discriminator"]["optim_step"] if i_iter % d_optim_i_iters == 0: for expert_batch_state, expert_batch_action in self.expert_dataset.train_loader: # calculate probs and logits gen_prob, gen_logits = self.discriminator( gen_batch_state, gen_batch_action) expert_prob, expert_logits = self.discriminator( expert_batch_state.to(device), expert_batch_action.to(device)) # calculate accuracy gen_acc = torch.mean((gen_prob < 0.5).float()) expert_acc = torch.mean((expert_prob > 0.5).float()) # calculate regression loss expert_labels = torch.ones_like(expert_prob) gen_labels = torch.zeros_like(gen_prob) e_loss = self.discriminator_func(expert_prob, target=expert_labels) g_loss = self.discriminator_func(gen_prob, target=gen_labels) d_loss = e_loss + g_loss # calculate entropy loss logits = torch.cat([gen_logits, expert_logits], 0) entropy = ((1. - torch.sigmoid(logits)) * logits - torch.nn.functional.logsigmoid(logits)).mean() entropy_loss = -self.config["train"]["discriminator"][ "ent_coeff"] * entropy total_loss = d_loss + entropy_loss self.optimizer_discriminator.zero_grad() total_loss.backward() self.optimizer_discriminator.step() writer.add_scalar('discriminator/d_loss', d_loss.item(), i_iter) writer.add_scalar("discriminator/e_loss", e_loss.item(), i_iter) writer.add_scalar("discriminator/g_loss", g_loss.item(), i_iter) writer.add_scalar("discriminator/ent", entropy.item(), i_iter) writer.add_scalar('discriminator/expert_acc', gen_acc.item(), i_iter) writer.add_scalar('discriminator/gen_acc', expert_acc.item(), i_iter) #################################################### # update policy by ppo [mini_batch] #################################################### with torch.no_grad(): gen_batch_value = self.value(gen_batch_state) d_out, _ = self.discriminator(gen_batch_state, gen_batch_action) gen_batch_reward = -torch.log(1 - d_out + 1e-6) gen_batch_advantage, gen_batch_return = estimate_advantages( gen_batch_reward, gen_batch_mask, gen_batch_value, self.config["train"]["generator"]["gamma"], self.config["train"]["generator"]["tau"]) ppo_optim_i_iters = self.config["train"]["generator"]["optim_step"] ppo_mini_batch_size = self.config["train"]["generator"][ "mini_batch_size"] for _ in range(ppo_optim_i_iters): if ppo_mini_batch_size > 0: gen_batch_size = gen_batch_state.shape[0] optim_iter_num = int( math.ceil(gen_batch_size / ppo_mini_batch_size)) perm = torch.randperm(gen_batch_size) for i in range(optim_iter_num): ind = perm[slice( i * ppo_mini_batch_size, min((i + 1) * ppo_mini_batch_size, gen_batch_size))] mini_batch_state, mini_batch_action, mini_batch_advantage, mini_batch_return, \ mini_batch_old_log_prob = gen_batch_state[ind], gen_batch_action[ind], \ gen_batch_advantage[ind], gen_batch_return[ind], gen_batch_old_log_prob[ ind] v_loss, p_loss, ent_loss = ppo_step( policy_net=self.policy, value_net=self.value, optimizer_policy=self.optimizer_policy, optimizer_value=self.optimizer_value, optim_value_iternum=self.config["value"] ["optim_value_iter"], states=mini_batch_state, actions=mini_batch_action, returns=mini_batch_return, old_log_probs=mini_batch_old_log_prob, advantages=mini_batch_advantage, clip_epsilon=self.config["train"]["generator"] ["clip_ratio"], l2_reg=self.config["value"]["l2_reg"]) else: v_loss, p_loss, ent_loss = ppo_step( policy_net=self.policy, value_net=self.value, optimizer_policy=self.optimizer_policy, optimizer_value=self.optimizer_value, optim_value_iternum=self.config["value"] ["optim_value_iter"], states=gen_batch_state, actions=gen_batch_action, returns=gen_batch_return, old_log_probs=gen_batch_old_log_prob, advantages=gen_batch_advantage, clip_epsilon=self.config["train"]["generator"] ["clip_ratio"], l2_reg=self.config["value"]["l2_reg"]) writer.add_scalar('generator/p_loss', p_loss, i_iter) writer.add_scalar('generator/v_loss', v_loss, i_iter) writer.add_scalar('generator/ent_loss', ent_loss, i_iter) print(f" Training episode:{i_iter} ".center(80, "#")) print('d_gen_prob:', gen_prob.mean().item()) print('d_expert_prob:', expert_prob.mean().item()) print('d_loss:', d_loss.item()) print('e_loss:', e_loss.item()) print("d/bernoulli_entropy:", entropy.item())
def get_mi(self, X, Y, Y_tilde): T = self.network(X, Y).mean() expT = torch.exp(self.network(X, Y_tilde)).mean() mi = (T - torch.log(expT)).item() / math.log(2) return mi, T, expT
def forward(self, x): #syage 0 x, xr1, xr2, xr3, xr4 = self.pretrained_resnet(x) #not needed if layer 1 is adjusted. # xr5 = self.layer5(xr4) xd1 = self.d1(xr1) xd2 = self.d2(xr2) xd3 = self.d3(xr3) #not needed if layer 1 is adjusted. # xd4 = self.d4(xr4) ## upward path # upsampling last feature map using bilinear interpolation # not needed if layer 1 downsampling is active # up0 = self.upsample0(xr5) # cat0 = torch.cat((up0, xd4), dim=1) # updense0 = self.updense0(cat0) # changed from updense 0 to xr4, if downsampling in layer1 up1 = self.upsample(xr4) #concat the upsampled feature maps with the dense featuremaps from previous layer cat1 = torch.cat((up1, xd3), dim=1) # densenet to reduce channels to 4k --> [bs x 56 x 16 x 16] updense1 =self.updense1(cat1) # upsampling last feature map using bilinear interpolation up2 = self.upsample(updense1) # concat the upsampled feature maps with the dense featuremaps from previous layer cat2 = torch.cat((up2, xd2), dim=1) # densenet to reduce channels to 2k --> [bs x 28 x 32 x 32] updense2 =self.updense2(cat2) # upsampling last feature map using bilinear interpolation up3 = self.upsample(updense2) # concat the upsampled feature maps with the dense featuremaps from previous layer cat3 = torch.cat((up3, xd1), dim=1) # densenet to reduce channels to k --> [bs x 14 x 64 x 64] updense3 = self.updense3(cat3) reduced = self.onexone(updense3) # final sigmoid layer before the saliency maps # todo: add normalization before sigmoid, change sigmoid to alternative to prevent dying gradients, also add pooling saliency_map = self.sigmoid(reduced) #saliency_map = self.sigmoid(reduced) class_scores = self.pooling(saliency_map) norm_scores = self.norm(class_scores) # log scores for Nllloss on RSNA log_scores = torch.log(norm_scores) return(saliency_map, log_scores)
def train_model(model, dataset, ds_name, epochs=10, batch_size=32, sample_size=32, eval_size=32, img_size=32, lr=1e-3, weight_decay=1e-4, loss_log_interval=20, image_log_interval=20, model_log_interval=20, checkpoint_dir='./checkpoints', results_dir='./res', resume=False, cuda=False, seed=0, device=None, cores=1): if resume: epoch_start = utils.load_checkpoint(model, checkpoint_dir) else: epoch_start = 0 fixed_noise = torch.rand(sample_size, model.z_size).to(device) if model.model_name in ['vae', 'vae2', 'vae3']: m = dist.Normal(torch.Tensor([0.0]).to(device), torch.Tensor([1.0]).to(device)) fixed_noise = m.icdf(fixed_noise) output_folder = results_dir + ds_name resfile_prefix = ds_name + "_" + \ model.model_name + \ "_ld_" + \ str(model.z_size) + \ "_bs_" + str(batch_size) if not os.path.exists(output_folder): os.makedirs(output_folder) data_root = './datasets' # DEC-VINE preclustering setup and training if model.model_name in ['dec_vine', 'dec_vine2', 'dec_vine3']: # load pre-trained AE if model.model_name == 'dec_vine': pretrain_prefix = resfile_prefix.replace("dec", "ae") elif model.model_name == 'dec_vine2': pretrain_prefix = resfile_prefix.replace("dec_vine2", "ae_vine2") elif model.model_name == 'dec_vine3': pretrain_prefix = resfile_prefix.replace("dec_vine3", "ae_vine3") pretrain_files = [filename for filename in os.listdir(checkpoint_dir) if filename.startswith(pretrain_prefix)] pretrain_epochs = [int(filename.replace(pretrain_prefix + "_", "")) for filename in pretrain_files] pretrain_path = os.path.join(checkpoint_dir, pretrain_files[pretrain_epochs.index(max(pretrain_epochs))]) model.pretrain(pretrain_path) # form initial cluster centres data_loader = utils.get_data_loader(dataset, batch_size, cuda=cuda) data_stream = tqdm(enumerate(data_loader, 1)) features = [] for batch_index, (x, _, _) in data_stream: tmp_x = Variable(x).to(device) if model.model_name == 'dec_vine': z = model.ae.encoder(tmp_x) z = model.ae.q(z) elif model.model_name == 'dec_vine2' or model.model_name == 'dec_vine3': z = torch.nn.functional.relu(model.ae.fc1(model.ae.encoder(tmp_x).view(x.size(0), -1))) z = model.ae.fc21(z) features.append(z) kmeans = KMeans(n_clusters=model.cluster_number, n_init=20) y_pred = kmeans.fit_predict(torch.cat(features).detach().cpu().numpy()) model.cluster_layer.data = torch.tensor(kmeans.cluster_centers_).to(device) # load a pre-trained state for any model pretrain=0 if pretrain==1 and model.model_name == 'ae_vine3': pretrain_prefix = resfile_prefix pretrain_files = [filename for filename in os.listdir(checkpoint_dir) if filename.startswith(pretrain_prefix)] pretrain_epochs = [int(filename.replace(pretrain_prefix + "_", "")) for filename in pretrain_files] pretrain_path = os.path.join(checkpoint_dir, pretrain_files[pretrain_epochs.index(max(pretrain_epochs))]) pretrained_ae = torch.load(pretrain_path, map_location=device) model.load_state_dict(pretrained_ae['state']) print('load pretrained ae3 from', pretrain_path) # reconstruction_criterion = torch.nn.BCELoss() reconstruction_criterion = torch.nn.BCELoss(size_average=False) optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) if model.model_name == 'gan': lr_g = lr_d = 0.0002 k = 1 fix_noise = get_noise(sample_size) opt_g = torch.optim.Adam(model.net_g.parameters(), lr=lr_g, betas=(0.5, 0.999)) # optimizer for Generator opt_d = torch.optim.Adam(model.net_d.parameters(), lr=lr_d, betas=(0.5, 0.999)) # optimizer for Discriminator for epoch in range(epoch_start, epochs + 1): print("Epoch {}".format(epoch)) if model.model_name == "dec_vine" or model.model_name == "dec_vine2": model.eval() p = [] indices = [] data_loader = utils.get_data_loader(dataset, batch_size, cuda=cuda) data_stream = tqdm(enumerate(data_loader, 1)) for batch_index, (x, _, idx) in data_stream: tmp_x = Variable(x).to(device) _, tmp_p = model(tmp_x) p.append(tmp_p.detach().cpu()) tmp_idx = idx indices.append(tmp_idx) p = torch.cat(p) indices = torch.cat(indices) p = model.target_distribution(p[indices]) p = Variable(p).to(device) model.train() data_loader = utils.get_data_loader(dataset, batch_size, cuda=cuda) data_stream = tqdm(enumerate(data_loader, 1)) for batch_index, (x, _, idx) in data_stream: # learning rate decay if model.model_name == 'gan' and (epoch) == 8: opt_g.param_groups[0]['lr'] /= 10 opt_d.param_groups[0]['lr'] /= 10 if model.model_name == 'gan' and (epoch) == 15: opt_g.param_groups[0]['lr'] /= 10 opt_d.param_groups[0]['lr'] /= 10 iteration = (epoch - 1) * (len(dataset) // batch_size) + batch_index x = Variable(x).to(device) idx = Variable(idx).to(device) if model.model_name == 'gan': # train Discriminator real_data = Variable(x.cuda()) prob_fake = model.net_d(model.net_g(get_noise(real_data.size(0)).to(device))) prob_real = model.net_d(real_data) loss_d = - torch.mean(torch.log(prob_real) + torch.log(1 - prob_fake)) opt_d.zero_grad() loss_d.backward() opt_d.step() # train Generator if batch_index % k is 0: prob_fake = model.net_d(model.net_g(get_noise().to(device))) loss_g = - torch.mean(torch.log(prob_fake)) opt_g.zero_grad() loss_g.backward() opt_g.step() else: if model.model_name == 'ae_vine' or model.model_name == 'ae_vine2' or model.model_name == 'ae_vine3': x_reconstructed = model(x) elif model.model_name == 'dec_vine' or model.model_name == 'dec_vine2': x_reconstructed, q = model(x) p_batch = p[idx] penalization_loss = 10*F.kl_div(q.log(), p_batch) del p_batch, q elif model.model_name == 'vae' or model.model_name == "vae2" or model.model_name=="vae3": (mean, logvar), x_reconstructed = model(x) penalization_loss = model.kl_divergence_loss(mean, logvar) if model.model_name == 'vae2' or model.model_name == 'vae3': reconstruction_loss = reconstruction_criterion(x_reconstructed, x)# / x.size(0) else: reconstruction_loss = reconstruction_criterion(x_reconstructed, x) / x.size(0) if model.model_name == 'ae_vine' or model.model_name == 'ae_vine2' or model.model_name == 'ae_vine3': loss = reconstruction_loss else: loss = reconstruction_loss + penalization_loss optimizer.zero_grad() loss.backward() optimizer.step() if iteration % loss_log_interval == 0: f = open(output_folder + "/" + resfile_prefix + "_losses" + ".txt", 'a') if model.model_name == 'gan': f.write("\n{:<12} | {} | {} | {} | {} ".format( model.model_name, iteration, loss_g, loss_d, seed )) else: if model.model_name == 'ae_vine' or model.model_name == 'ae_vine2' or model.model_name == 'ae_vine3': f.write("\n{:<12} | {} | {} | {} ".format( model.model_name, iteration, loss, seed )) else: f.write("\n{:<12} | {} | {} | {} | {} | {}".format( model.model_name, iteration, reconstruction_loss.data.item(), penalization_loss.data.item(), loss.data.item(), seed )) f.close() # adding this just to have a way of calculating the scores at 0 epochs if batch_index > 0 and epoch == 0: break if epoch % model_log_interval == 0: print() print('###################') print('# model checkpoint!') print('###################') print() utils.save_checkpoint(model, checkpoint_dir, epoch, resfile_prefix + "_" + str(epoch)) if epoch % image_log_interval == 0: print() print('###################') print('# image checkpoint!') print('###################') print() model.eval() ae_vine_models = ['ae_vine', 'ae_vine2', 'dec_vine', 'dec_vine2', 'ae_vine3', 'dec_vine3'] if model.model_name in ae_vine_models: data_loader_vine = utils.get_data_loader(dataset, 5000, cuda=cuda) data_stream_vine = tqdm(enumerate(data_loader_vine, 1)) features = [] for batch_index, (x, _, _) in data_stream_vine: tmp_x = Variable(x).to(device) if model.model_name == 'ae_vine': encoded = model.encoder(tmp_x) e = model.q(encoded) elif model.model_name == 'dec_vine': encoded = model.ae.encoder(tmp_x) e = model.ae.q(encoded) elif model.model_name == 'ae_vine2': encoded = torch.nn.functional.relu(model.fc1(model.encoder(tmp_x).view(x.size(0), -1))) e = model.fc21(encoded) elif model.model_name == 'dec_vine2': encoded = torch.nn.functional.relu(model.ae.fc1(model.ae.encoder(tmp_x).view(x.size(0), -1))) e = model.ae.fc21(encoded) elif model.model_name == 'ae_vine3': encoded = F.relu(model.fc1(model.encoder(tmp_x).view(x.size(0), -1))) e = model.fc21(encoded) elif model.model_name == 'dec_vine3': encoded = F.relu(model.ae.fc1(model.ae.encoder(tmp_x).view(x.size(0), -1))) e = model.ae.fc21(encoded) features.append(e.detach().cpu()) if batch_index > 0: break features = torch.cat(features).numpy() copula_controls = base.list(family_set="tll", trunc_lvl=5, cores=cores) vine_obj = rvinecop.vine(features, copula_controls=copula_controls) model.vine = vine_obj fake = model.sample(sample_size, vine_obj, fixed_noise) del x, e, encoded, vine_obj,data_loader_vine elif model.model_name == 'gan': fake = model.net_g(fix_noise.to(device)).data.cpu() + 0.5 print(fake.shape) else: fake = model.sample(sample_size, fixed_noise) fake = fake.reshape(sample_size, model.channel_num, model.image_size, model.image_size) name_str = resfile_prefix + '_fake_samples_epoch' vutils.save_image(fake.detach(), '%s/%s_%03d.png' % (output_folder, name_str, epoch), normalize=True) del fake if epoch > 0 and epoch % 2 == 0: eval_size = 2000 s = metric.compute_score_raw(ds_name, dataset, img_size, data_root, eval_size, batch_size, output_folder + '/real/', output_folder + '/fake/', model, model.z_size, 'resnet34', device) f = open(output_folder + "/" + resfile_prefix + "_scores" + ".txt", 'a') scr_arr = [str(a) for a in s] f.write("\n{:<12} | {} | {} | {}".format( model.model_name, epoch, ', '.join(scr_arr), seed )) f.close()
def atanh(x): return 0.5 * torch.log((1. + x) / (1. - x))