def predict(model, batch, flipped_batch, use_gpu): image_ids, inputs = batch['image_id'], batch['input'] if use_gpu: inputs = inputs.cuda() outputs, _, _ = model(inputs) probs = torch.sigmoid(outputs) if flipped_batch is not None: flipped_image_ids, flipped_inputs = flipped_batch['image_id'], flipped_batch['input'] # assert image_ids == flipped_image_ids if use_gpu: flipped_inputs = flipped_inputs.cuda() flipped_outputs, _, _ = model(flipped_inputs) flipped_probs = torch.sigmoid(flipped_outputs) probs += torch.flip(flipped_probs, (3,)) # flip back and add probs *= 0.5 probs = probs.squeeze(1).cpu().numpy() if args.resize: probs = np.swapaxes(probs, 0, 2) probs = cv2.resize(probs, (orig_img_size, orig_img_size), interpolation=cv2.INTER_LINEAR) probs = np.swapaxes(probs, 0, 2) else: probs = probs[:, y0:y1, x0:x1] return probs
def _make_images_board(self, model): model.eval() num_imgs = 64 fuseTrans = self.cfg.fuseTrans batch = next(iter(self.data_loaders[1])) input_images, renderTrans, depthGT, maskGT = utils.unpack_batch_novel(batch, self.cfg.device) with torch.set_grad_enabled(False): XYZ, maskLogit = model(input_images) # ------ build transformer ------ XYZid, ML = transform.fuse3D( self.cfg, XYZ, maskLogit, fuseTrans) # [B,3,VHW],[B,1,VHW] newDepth, newMaskLogit, collision = transform.render2D( self.cfg, XYZid, ML, renderTrans) # [B,N,1,H,W] return {'RGB': utils.make_grid( input_images[:num_imgs]), 'depth': utils.make_grid( ((1-newDepth)*(collision==1).float())[:num_imgs, 0, 0:1, :, :]), 'depthGT': utils.make_grid( 1-depthGT[:num_imgs, 0, 0:1, :, :]), 'mask': utils.make_grid( torch.sigmoid(maskLogit[:num_imgs, 0:1,:, :])), 'mask_rendered': utils.make_grid( torch.sigmoid(newMaskLogit[:num_imgs, 0, 0:1, :, :])), 'maskGT': utils.make_grid( maskGT[:num_imgs, 0, 0:1, :, :]), }
def forward(self, input_, hx): """ Args: input_: A (batch, input_size) tensor containing input features. hx: A tuple (h_0, c_0), which contains the initial hidden and cell state, where the size of both states is (batch, hidden_size). time: The current timestep value, which is used to get appropriate running statistics. Returns: h_1, c_1: Tensors containing the next hidden and cell state. """ h_0, c_0 = hx batch_size = h_0.size(0) bias_batch = (self.bias.unsqueeze(0) .expand(batch_size, *self.bias.size())) wh = torch.mm(h_0, self.weight_hh) wh = torch.mm(h_0, self.weight_hh) wi = torch.mm(input_, self.weight_ih) bn_wh = self.bn_hh(wh) bn_wi = self.bn_ih(wi) f, i, o, g = torch.split(bn_wh + bn_wi + bias_batch, split_size=self.hidden_size, dim=1) c_1 = torch.sigmoid(f)*c_0 + torch.sigmoid(i)*torch.tanh(g) h_1 = torch.sigmoid(o) * torch.tanh(self.bn_c(c_1)) return h_1, c_1
def forward(self, x, r): """ Computes an output and data structure instructions using a single linear layer. :type x: Variable :param x: The input to this Controller :type r: Variable :param r: The previous item read from the neural data structure :rtype: tuple :return: A tuple of the form (y, (v, u, d)), interpreted as follows: - output y - pop a strength u from the data structure - push v with strength d to the data structure """ self._hidden = self._rnn(torch.cat([x, r], 1), self._hidden) nn_output = self._linear(self._hidden) output = nn_output[:, self._n_args + self._read_size:].contiguous() read_params = torch.sigmoid(nn_output[:, :self._n_args + self._read_size]) v = read_params[:, self._n_args:].contiguous() instructions = tuple(read_params[:, j].contiguous() for j in xrange(self._n_args)) self._log(x, torch.sigmoid(output), v, *instructions) return output, ((v,) + instructions)
def forward_flow(self, z, xenc): B = z.shape[0] C = z.shape[1] f = self.flows logdet = 0. for i in range(self.n_flows): z = z[:,f[str(i)]['perm']] z1 = z[:,:C//2] z2 = z[:,C//2:] sig2 = torch.sigmoid(f[str(i)]['f1_sig'](torch.cat([z2,xenc],1))) mu2 = f[str(i)]['f1_mu'](torch.cat([z2,xenc],1)) z1 = z1*sig2 + mu2 mu1 = f[str(i)]['f2_mu'](torch.cat([z1,xenc],1)) sig1 = torch.sigmoid(f[str(i)]['f2_sig'](torch.cat([z1,xenc],1))) z2 = z2*sig1 + mu1 z = torch.cat([z1,z2],1) sig1 = sig1.view(B, -1) sig2 = sig2.view(B, -1) logdet += torch.sum(torch.log(sig1), 1) logdet += torch.sum(torch.log(sig2), 1) return z, logdet
def reverse_flow(self, z): B = z.shape[0] C = z.shape[1] f = self.flows logdet = 0. reverse_ = list(range(self.n_flows))[::-1] for i in reverse_: z1 = z[:,:C//2] z2 = z[:,C//2:] sig1 = torch.sigmoid(f[str(i)]['f2_sig'](z1)) mu1 = f[str(i)]['f2_mu'](z1) z2 = (z2 - mu1) / sig1 sig2 = torch.sigmoid(f[str(i)]['f1_sig'](z2)) mu2 = f[str(i)]['f1_mu'](z2) z1 = (z1 - mu2) / sig2 z = torch.cat([z1,z2],1) z = z[:,f[str(i)]['inv_perm']] sig1 = sig1.view(B, -1) sig2 = sig2.view(B, -1) logdet += torch.sum(torch.log(sig1), 1) logdet += torch.sum(torch.log(sig2), 1) return z, logdet
def forward(self, inputs, mask=None, layer_cache=None, step=None): """ Args: inputs (FloatTensor): ``(batch_size, input_len, model_dim)`` Returns: (FloatTensor, FloatTensor): * gating_outputs ``(batch_size, input_len, model_dim)`` * average_outputs average attention ``(batch_size, input_len, model_dim)`` """ batch_size = inputs.size(0) inputs_len = inputs.size(1) device = inputs.device average_outputs = self.cumulative_average( inputs, self.cumulative_average_mask(batch_size, inputs_len).to(device).float() if layer_cache is None else step, layer_cache=layer_cache) average_outputs = self.average_layer(average_outputs) gating_outputs = self.gating_layer(torch.cat((inputs, average_outputs), -1)) input_gate, forget_gate = torch.chunk(gating_outputs, 2, dim=2) gating_outputs = torch.sigmoid(input_gate) * inputs + \ torch.sigmoid(forget_gate) * average_outputs return gating_outputs, average_outputs
def custom_cross_entropy(x, y): sigmoid_x = torch.sigmoid(x) sigmoid_x2 = torch.sigmoid(x ** 2) neg_log_sigmoid_x = -1 * torch.log(sigmoid_x) neg_log_1_minus_sigmoid_x2 = -1 * torch.log(1 - sigmoid_x2) l1 = torch.mul(y, neg_log_sigmoid_x) l2 = torch.mul(1 - y, neg_log_1_minus_sigmoid_x2) return torch.sum(l1 + l2)
def forward(self, x=None, warmup=1., inf_net=None): #, k=1): #, marginf_type=0): outputs = {} B = x.shape[0] if inf_net is None: # mu, logvar = self.inference_net(x) z, logits = self.q.sample(x) else: # mu, logvar = inf_net.inference_net(x) z, logqz = inf_net.sample(x) # print (z[0]) # b = harden(z) # print (b[0]) # logpz = torch.sum( self.prior.log_prob(b), dim=1) # print (logpz[0]) # print (logpz.shape) # fdasf probs_q = torch.sigmoid(logits) probs_q = torch.clamp(probs_q, min=.00000001, max=.9999999) probs_p = torch.ones(B, self.z_size).cuda() *.5 KL = probs_q*torch.log(probs_q/probs_p) + (1-probs_q)*torch.log((1-probs_q)/(1-probs_p)) KL = torch.sum(KL, dim=1) # print (z.shape) # Decode Image x_hat = self.generator.forward(z) alpha = torch.sigmoid(x_hat) beta = Beta(alpha*self.beta_scale, (1.-alpha)*self.beta_scale) x_noise = torch.clamp(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda(), min=1e-5, max=1-1e-5) logpx = beta.log_prob(x_noise) #[120,3,112,112] # add uniform noise here logpx = torch.sum(logpx.view(B, -1),1) # [PB] * self.w_logpx # print (logpx.shape,logpz.shape,logqz.shape) # fsdfda log_ws = logpx - KL #+ logpz - logqz outputs['logpx'] = torch.mean(logpx) outputs['x_recon'] = alpha # outputs['welbo'] = torch.mean(logpx + warmup*( logpz - logqz)) outputs['welbo'] = torch.mean(logpx + warmup*(KL)) outputs['elbo'] = torch.mean(log_ws) outputs['logws'] = log_ws outputs['z'] = z outputs['logpz'] = torch.zeros(1) #torch.mean(logpz) outputs['logqz'] = torch.mean(KL) # outputs['logvar'] = logvar return outputs
def predict_transform( prediction, inp_dim, anchors, num_classes, CUDA=True ): batch_size = prediction.size(0) stride = inp_dim // prediction.size(2) grid_size = inp_dim // stride bbox_attrs = 5 + num_classes num_anchors = len(anchors) prediction = prediction.view( batch_size, bbox_attrs * num_anchors, grid_size * grid_size) prediction = prediction.transpose(1, 2).contiguous() prediction = prediction.view( batch_size, grid_size * grid_size * num_anchors, bbox_attrs) anchors = [(a[0] / stride, a[1] / stride) for a in anchors] # Sigmoid the center_X, center_Y and object confidence prediction[:,:,0] = torch.sigmoid(prediction[:,:,0]) prediction[:,:,1] = torch.sigmoid(prediction[:,:,1]) prediction[:,:,4] = torch.sigmoid(prediction[:,:,4]) # Add the centre offsets grid = np.arange(grid_size) a, b = np.meshgrid(grid, grid) x_offset = torch.FloatTensor(a).view(-1, 1) y_offset = torch.FloatTensor(b).view(-1, 1) if CUDA: x_offset = x_offset.cuda() y_offset = y_offset.cuda() x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1, num_anchors).view(-1, 2).unsqueeze(0) prediction[:, :, :2] += x_y_offset # log space transform height and the width anchors = torch.FloatTensor(anchors) if CUDA: anchors = anchors.cuda() anchors = anchors.repeat(grid_size * grid_size, 1).unsqueeze(0) prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4]) * anchors prediction[:,:,5:5 + num_classes] = ( torch.sigmoid((prediction[:, :, 5:5 + num_classes]))) prediction[:, :, 4] *= stride return prediction
def forward(self, data, last_hidden): hx, cx = last_hidden m = self.wmx(data) * self.wmh(hx) gates = self.wx(data) + self.wh(m) i, f, o, u = gates.chunk(4, 1) i = torch.sigmoid(i) f = torch.sigmoid(f) u = torch.tanh(u) o = torch.sigmoid(o) cy = f * cx + i * u hy = o * torch.tanh(cy) return hy, cy
def forward(self, x): out = F.leaky_relu(self.conv1(x), 0.05) # (?, 32, 14, 14) out = F.leaky_relu(self.conv2(out), 0.05) # (?, 64, 7, 7) out = F.leaky_relu(self.conv3(out), 0.05) # (?, 128, 3, 3) out = F.leaky_relu(self.conv4(out), 0.05) # (?, 256, 1, 1) out = out.squeeze() return torch.sigmoid(self.linear(out))
def forward(self, x): x = self.fc1(x) x = F.relu(x, inplace=True) x = self.fc2(x) x = torch.sigmoid(x) #print(x.size()) return x
def make_score_map(self, img, mode='sigmoid'): """ """ img = img/255 # The offset is inserted so that the final size of the score map matches # the search image. To know more see "How to overlay the search img with # the score map" in Trello/Report. It is half of the dimension of the # Smallest Class Equivalent of the Ref image. offset = (((self.ref.shape[0] + 1)//4)*4 - 1)//2 img_mean = img.mean() img_padded = np.pad(img, ((offset, offset), (offset, offset), (0, 0)), mode='constant', constant_values=img_mean) img_padded = numpy_to_torch_var(img_padded, device) srch_emb = self.net.get_embedding(img_padded) score_map = self.net.match_corr(self.ref_emb, srch_emb) dimx = score_map.shape[-1] dimy = score_map.shape[-2] score_map = score_map.view(-1, dimy, dimx) if mode == 'sigmoid': score_map = sigmoid(score_map) elif mode == 'norm': score_map = score_map - score_map.min() score_map = score_map/score_map.max() score_map = score_map.unsqueeze(0) # We upscale 4 times, because the total stride of the network is 4 score_map = F.interpolate(score_map, scale_factor=4, mode='bilinear', align_corners=False) score_map = score_map.cpu() score_map = torch_var_to_numpy(score_map) return score_map
def test_autograd_closure(self): x = Variable(torch.Tensor([0.4]), requires_grad=True) y = Variable(torch.Tensor([0.7]), requires_grad=True) trace = torch._C._tracer_enter((x, y), 1) z = torch.sigmoid(x * (x + y)) w = torch.abs(x * x * x + y) + Variable(torch.ones(1)) torch._C._tracer_exit((z, w)) torch._C._jit_pass_lint(trace) (z * w).backward() torch._C._jit_pass_dce(trace) torch._C._jit_pass_lint(trace) x_grad = x.grad.data.clone() x.grad.data.zero_() function = torch._C._jit_createAutogradClosure(trace) torch._C._jit_pass_lint(trace) z2, w2 = function()(x, y) (z2 * w2).backward() self.assertEqual(z, z2) self.assertEqual(w, w2) self.assertEqual(x.grad.data, x_grad)
def _step(self, tok, states, attention): prev_states, prev_out = states lstm_in = torch.cat( [self._embedding(tok).squeeze(1), prev_out], dim=1 ) states = self._lstm(lstm_in, prev_states) lstm_out = states[0][-1] query = torch.mm(lstm_out, self._attn_w) attention, attn_mask, extend_src, extend_vsize = attention context, score = step_attention( query, attention, attention, attn_mask) dec_out = self._projection(torch.cat([lstm_out, context], dim=1)) # extend generation prob to extended vocabulary gen_prob = self._compute_gen_prob(dec_out, extend_vsize) # compute the probabilty of each copying copy_prob = torch.sigmoid(self._copy(context, states[0][-1], lstm_in)) # add the copy prob to existing vocab distribution lp = torch.log( ((-copy_prob + 1) * gen_prob ).scatter_add( dim=1, index=extend_src.expand_as(score), source=score * copy_prob ) + 1e-8) # numerical stability for log return lp, (states, dec_out), score
def forward(self, words): projected = [self.projectors[name](self.embedders[name](words)) for name in self.emb_names] if self.args.attnnet == 'none': out = sum(projected) else: projected_cat = torch.cat([p.unsqueeze(2) for p in projected], 2) s_len, b_size, _, emb_dim = projected_cat.size() attn_input = projected_cat if self.args.attnnet.startswith('dep_'): attn_input = attn_input.view(s_len, b_size * self.n_emb, -1) self.m_attn = self.attn_1(self.attn_0(attn_input)[0]) self.m_attn = self.m_attn.view(s_len, b_size, self.n_emb) elif self.args.attnnet.startswith('no_dep_'): self.m_attn = self.attn_1(self.attn_0(attn_input)).squeeze(3) if self.args.attnnet.endswith('_gating'): self.m_attn = torch.sigmoid(self.m_attn) elif self.args.attnnet.endswith('_softmax'): self.m_attn = F.softmax(self.m_attn, dim=2) attended = projected_cat * self.m_attn.view(s_len, b_size, self.n_emb, 1).expand_as(projected_cat) out = attended.sum(2) if self.args.nonlin == 'relu': out = F.relu(out) if self.args.emb_dropout > 0.0: out = self.dropout(out) return out
def forward(self, input, hidden_state): hidden,c=hidden_state#hidden and c are images with several channels #print 'hidden ',hidden.size() #print 'input ',input.size() combined = torch.cat((input, hidden), 1)#oncatenate in the channels #print 'combined',combined.size() A=self.conv(combined) (ai,af,ao,ag)=torch.split(A,self.num_features,dim=1)#it should return 4 tensors i=torch.sigmoid(ai) f=torch.sigmoid(af) o=torch.sigmoid(ao) g=torch.tanh(ag) next_c=f*c+i*g next_h=o*torch.tanh(next_c) return next_h, next_c
def test(img_dir, split_test, split_name, model, batch_size, img_size, crop_size): since = time.time() # -------------------- SETTINGS: DATA TRANSFORMS normalizer = [[0.485, 0.456, 0.406], [0.229, 0.224, 0.225]] data_transforms = {split_name: transforms.Compose([ transforms.Resize(img_size), transforms.CenterCrop(crop_size), transforms.ToTensor(), transforms.Normalize(normalizer[0], normalizer[1])])} # -------------------- SETTINGS: DATASET BUILDERS datasetTest = DataGenerator(img_dir=img_dir, split_file=split_test, transform=data_transforms[split_name]) dataLoaderTest = DataLoader(dataset=datasetTest, batch_size=batch_size, shuffle=False, num_workers=32) dataloaders = {} dataloaders[split_name] = dataLoaderTest print('Number of testing CXR images: {}'.format(len(datasetTest))) dataset_sizes = {split_name: len(datasetTest)} # -------------------- TESTING model.train(False) running_corrects = 0 output_list = [] # label_list = [] preds_list = [] # Iterate over data. for data in dataloaders[split_name]: inputs, img_names = data # wrap them in Variable inputs = Variable(inputs.cuda(), volatile=True) # forward outputs = model(inputs) score = torch.sigmoid(outputs) score_np = score.data.cpu().numpy() preds = score>0.5 preds_np = preds.data.cpu().numpy() preds = preds.type(torch.cuda.LongTensor) outputs = outputs.data.cpu().numpy() for j in range(len(img_names)): print(str(img_names[j]) + ': ' + str(score_np[j])) img_name = str(img_names[j]).rsplit('/',1)[1] if score_np[j] > 0.5: copyfile(str(img_names[j]), './images-own-90/'+img_name) if score_np[j] < 0.5: copyfile(str(img_names[j]), './images-own-0/'+img_name) for i in range(outputs.shape[0]): output_list.append(outputs[i].tolist()) preds_list.append(preds_np[i].tolist())
def forward(self, x): x = self.pre_layer(x) x = x.view(x.size(0), -1) x = self.conv4(x) x = self.prelu4(x) det = torch.sigmoid(self.conv5_1(x)) box = self.conv5_2(x) return det, box
def forward(self, input_, c_input, hx): """ Args: batch = 1 input_: A (batch, input_size) tensor containing input features. c_input: A list with size c_num,each element is the input ct from skip word (batch, hidden_size). hx: A tuple (h_0, c_0), which contains the initial hidden and cell state, where the size of both states is (batch, hidden_size). Returns: h_1, c_1: Tensors containing the next hidden and cell state. """ h_0, c_0 = hx batch_size = h_0.size(0) #assert(batch_size == 1) bias_batch = (self.bias.unsqueeze(0).expand(batch_size, *self.bias.size())) wh_b = torch.addmm(bias_batch, h_0, self.weight_hh) wi = torch.mm(input_, self.weight_ih) i, o, g = torch.split(wh_b + wi, split_size_or_sections=self.hidden_size, dim=1) i = torch.sigmoid(i) g = torch.tanh(g) o = torch.sigmoid(o) c_num = len(c_input) if c_num == 0: f = 1 - i c_1 = f*c_0 + i*g h_1 = o * torch.tanh(c_1) else: c_input_var = torch.cat(c_input, 0) alpha_bias_batch = (self.alpha_bias.unsqueeze(0).expand(batch_size, *self.alpha_bias.size())) c_input_var = c_input_var.squeeze(1) ## (c_num, hidden_dim) alpha_wi = torch.addmm(self.alpha_bias, input_, self.alpha_weight_ih).expand(c_num, self.hidden_size) alpha_wh = torch.mm(c_input_var, self.alpha_weight_hh) alpha = torch.sigmoid(alpha_wi + alpha_wh) ## alpha = i concat alpha alpha = torch.exp(torch.cat([i, alpha],0)) alpha_sum = alpha.sum(0) ## alpha = softmax for each hidden element alpha = torch.div(alpha, alpha_sum) merge_i_c = torch.cat([g, c_input_var],0) c_1 = merge_i_c * alpha c_1 = c_1.sum(0).unsqueeze(0) h_1 = o * torch.tanh(c_1) return h_1, c_1
def forward(self, x): out = F.leaky_relu(self.conv1(x), 0.05) # (?, 32, 27, 27) out = self.maxpool(out) # (?, 32, 13, 13) out = F.leaky_relu(self.conv2(out), 0.05) # (?, 64, 10, 10) out = self.maxpool(out) # (?, 64, 6, 6) out = out.view(-1, self.conv_dim*2*6*6) # (?, 64*8*8) out = F.leaky_relu(self.linear(out), 0.05) # (?, 1024) return torch.sigmoid(self.output(out).squeeze())
def forward(self, z): z = z.view(z.size(0), z.size(1), 1, 1) out = self.fc(z) # (?, 256, 2, 2) out = F.leaky_relu(self.deconv1(out), 0.05) # (?, 128, 4, 4) out = F.leaky_relu(self.deconv2(out), 0.05) # (?, 64, 7, 7) out = F.leaky_relu(self.deconv3(out), 0.05) # (?, 32, 14, 14) out = torch.sigmoid(self.deconv4(out)) # (?, 1, 28, 28) return out
def forward(self, features): N, T, _ = features.size() rnn_output, _ = self.rnn(features) rnn_output = rnn_output.contiguous() rnn_output = rnn_output.view(rnn_output.size(0) * rnn_output.size(1), rnn_output.size(2)) outputs = torch.sigmoid(self.scores(rnn_output)) return outputs.view(N, T, self.K)
def f(x, y): out = x + y with torch.jit.scope('Foo', out): out = x * out with torch.jit.scope('Bar', out): out = torch.tanh(out) out = torch.sigmoid(out) return out
def forward(self, input_tensor, cur_state): h_cur, c_cur = cur_state combined = torch.cat([input_tensor, h_cur], dim=1) # concatenate along channel axis combined_conv = self.conv(combined) cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_dim, dim=1) i = torch.sigmoid(cc_i) f = torch.sigmoid(cc_f) o = torch.sigmoid(cc_o) g = torch.tanh(cc_g) c_next = f * c_cur + i * g h_next = o * torch.tanh(c_next) return h_next, c_next
def forward(self, x): x = self.pre_layer(x) x = x.view(x.size(0), -1) x = self.conv5(x) x = self.prelu5(x) det = torch.sigmoid(self.conv6_1(x)) box = self.conv6_2(x) landmark = self.conv6_3(x) return det, box, landmark
def norm_flow_reverse(self, params, z1, z2): h = torch.tanh(params[1][0](z2)) mew_ = params[1][1](h) sig_ = torch.sigmoid(params[1][2](h)) #[PB,Z] z1 = (z1 - mew_) / sig_ logdet2 = torch.sum(torch.log(sig_), 1) h = torch.tanh(params[0][0](z1)) mew_ = params[0][1](h) sig_ = torch.sigmoid(params[0][2](h)) #[PB,Z] z2 = (z2 - mew_) / sig_ logdet = torch.sum(torch.log(sig_), 1) #[PB] logdet = logdet + logdet2 #[PB,Z], [PB] return z1, z2, logdet
def forward(self, input, hidden, encoder_outputs, enc_padding_mask, context, extra_zeros, enc_batch_extend_vocab, coverage): """ :param input: (B) :param hidden: (1, B, H), (1, B, H) :param encoder_outputs: (B, L, 2*H) :param enc_padding_mask: (B, L) :param context: (B, 2*H); Since beam search will use context, so we need to send context out. :param extra_zeros: (B, n) :param enc_batch_extend_vocab: (B, L) :param coverage: (B, L) :return: (B, V), ((1, B, H), (1, B, H)), (B, 2*H), (B, L), (B, 1), (B, L) """ input = self.embed(input) # B -> (B, D) x = self.x_context(torch.cat((context, input), 1)) # (B, 2*H), (B, D) -> (B, 2*H + D) -> (B, D) output, hidden = self.lstm(x.unsqueeze(1), hidden) # (B, 1, D), ((1, B, H), (1, B, H)) -> (B, 1, H), hidden h_decoder, c_decoder = hidden # (1, B, H), (1, B, H) hidden_hat = torch.cat((h_decoder.view(-1, self.args.hidden_dim), c_decoder.view(-1, self.args.hidden_dim)), 1) # (B, H), (B, H) -> (B, 2*H) context, attn_dist, coverage = self.attention(hidden_hat, encoder_outputs, enc_padding_mask, coverage) # (B, 2*H), (B, L), (B, L) <- (B, 2*H), (B, L, 2*H), (B, L), (B, L) p_gen = None if self.args.pointer_gen: p_gen_input = torch.cat((context, hidden_hat, x), 1) # (B, 2*H), (B, 2*H), (B, D) -> (B, 2*2*H + D) p_gen = self.p_gen_linear(p_gen_input) # (B, 2*2*H + D) -> (B, 1) p_gen = torch.sigmoid(p_gen) # (B, 1) output = torch.cat((output.view(-1, self.args.hidden_dim), context), 1) # (B, H), (B, 2*H) -> (B, 3*H) output = self.out_linear(output) # (B, 3*H) -> (B, H) # output = F.relu(output) ## map (B, H) -> (B, V) # output = self.out2(output) # (B, H) -> (B, V); change to below matrix multiply output_pos = self.hidden2dim_pos(output) # (B, H) -> (B, D) output_neg = self.hidden2dim_neg(output) # (B, H) -> (B, D) output_pos = F.relu(torch.mm(output_pos, self.embed.weight.t())) # (B, D) * (D, V) -> (B, V) output_neg = F.relu(torch.mm(output_neg, self.embed.weight.t())) # (B, D) * (D, V) -> (B, V) output = output_pos - output_neg # (B, V) ## change output to vocab_dist vocab_dist = F.softmax(output, dim=1) # (B, V) if self.args.pointer_gen: vocab_dist_ = p_gen * vocab_dist # (B, 1) * (B, V) -> (B, V) attn_dist_ = (1 - p_gen) * attn_dist # (B, 1) * (B, L) -> (B, L) if extra_zeros is not None: vocab_dist_ = torch.cat([vocab_dist_, extra_zeros], 1) # (B, V), (B, n) -> (B, V + n) final_dist = vocab_dist_.scatter_add(1, enc_batch_extend_vocab, attn_dist_) # (B, V) -> (B, V + n) else: final_dist = vocab_dist # (B, V) return final_dist, hidden, context, attn_dist, p_gen, coverage # (B, V), ((1, B, H), (1, B, H)), (B, 2*H), (B, L), (B, 1), (B, L)
def forward(self, x): # print x.shape, self.read.shape hidden = self.embed(x) output = self.linear(torch.cat([hidden, self.read], 1)) read_params = torch.sigmoid(output[:,:2 + self.get_read_size()]) self.u, self.d, self.v = read_params[:,0].contiguous(), read_params[:,1].contiguous(), read_params[:,2:].contiguous() self.read_stack(v.data, u.data, d.data) return output[:,2 + self.get_read_size():] #should not apply softmax
def forward(self, x): x = torch.relu(self.hidden(x)) x = torch.sigmoid(self.out(x)) return x
def eval_net(net, val_loader, device, save_imgs=False): n_val = len(val_loader) n_val_1, n_val_2, n_val_3, n_val_4, n_val_5 = n_val, n_val, n_val, n_val, n_val net.eval() dice_1, dice_2, dice_3, dice_4, dice_5 = 0, 0, 0, 0, 0 with tqdm(total=n_val, desc='Validation round', unit='batch', leave=False) as pbar: for i, batch in enumerate(val_loader): img, mask_1, mask_2, mask_3, mask_4, mask_5 = batch['img'][ 0], batch['mask_1'][0], batch['mask_2'][0], batch['mask_3'][ 0], batch['mask_4'][0], batch['mask_5'][0] img = img.to(device=device, dtype=torch.float32) #print(img) mask_1 = mask_1.to(device=device, dtype=torch.float32) mask_2 = mask_2.to(device=device, dtype=torch.float32) mask_3 = mask_3.to(device=device, dtype=torch.float32) mask_4 = mask_4.to(device=device, dtype=torch.float32) mask_5 = mask_5.to(device=device, dtype=torch.float32) #if torch.sum(mask_1) == 0: #n_val_1 = n_val_1 - 1 #dice_1 -= 1 #if torch.sum(mask_2) == 0: # n_val_2 = n_val_2 - 1 # dice_2 -= 1 #if torch.sum(mask_3) == 0: # n_val_3 = n_val_3 - 1 # dice_3 -= 1 # if torch.sum(mask_4) == 0: # n_val_4 = n_val_4 - 1 # dice_4 -= 1 # if torch.sum(mask_5) == 0: # n_val_5 = n_val_5 - 1 # dice_5 -= 1 with torch.no_grad(): mask_pred_1, mask_pred_2, mask_pred_3, mask_pred_4, mask_pred_5 = net( img.cuda()) #print(dice_1) pred_1 = torch.sigmoid(mask_pred_1) pred_2 = torch.sigmoid(mask_pred_2) pred_3 = torch.sigmoid(mask_pred_3) pred_4 = torch.sigmoid(mask_pred_4) pred_5 = torch.sigmoid(mask_pred_5) pred_1 = (pred_1 > 0.5).float() pred_2 = (pred_2 > 0.5).float() pred_3 = (pred_3 > 0.5).float() pred_4 = (pred_4 > 0.5).float() pred_5 = (pred_5 > 0.5).float() if torch.sum(mask_1) != 0: dice_1 += dice_coeff(pred_1, mask_1.cuda()).item() else: n_val_1 -= 1 if torch.sum(mask_2) != 0: dice_2 += dice_coeff(pred_2, mask_2.cuda()).item() else: n_val_2 -= 1 if torch.sum(mask_3) != 0: dice_3 += dice_coeff(pred_3, mask_3.cuda()).item() else: n_val_3 -= 1 if torch.sum(mask_4) != 0: dice_4 += dice_coeff(pred_4, mask_4.cuda()).item() else: n_val_4 -= 1 if torch.sum(mask_5) != 0: dice_5 += dice_coeff(pred_5, mask_5.cuda()).item() else: n_val_5 -= 1 pbar.update() if save_imgs: #save predictions save_img(mask_pred_1, 'liver' + str(i)) save_img(mask_pred_2, 'kidneys' + str(i)) save_img(mask_pred_3, 'panc' + str(i)) save_img(mask_pred_4, 'spleen' + str(i)) save_img(mask_pred_5, 'bladder' + str(i)) if save_imgs: print('Predicted masks saved in ./predictions/ !') print(n_val_1) print(f'dice_liver: {dice_1/n_val_1}') print(f'dice_kidneys: {dice_2/n_val_2}') print(f'dice_panc: {dice_3/n_val_3}') print(f'dice_spleen: {dice_4/n_val_4}') print(f'dice_bladder: {dice_5/n_val_5}')
def forward(self, x): x = x * torch.sigmoid(x) return x
def multilabel_soft_margin_loss(input, target, weight=None, size_average=True): input = torch.sigmoid(input) return binary_cross_entropy(input, target, weight, size_average)
def forward(self, x): return x * torch.sigmoid(self.beta * x)
def forward(self, x): x = self.bn(self.fc(x)) return torch.mul(x[:, :self.od], torch.sigmoid(x[:, self.od:]))
def train_epoch(model, train_iterator, val_iterator, optim, criterion, scheduler, device="cuda"): model.train() train_loss = [] num_corrects = 0 num_total = 0 labels = [] outs = [] tbar = tqdm(train_iterator) for item in tbar: x = item["x"].to(device).long() target_id = item["target_id"].to(device).long() part = item["part"].to(device).long() label = item["label"].to(device).float() elapsed_time = item["elapsed_time"].to(device).long() duration_previous_content = item["duration_previous_content"].to(device).long() optim.zero_grad() output = model(x, target_id, part, elapsed_time, duration_previous_content) target_idx = (label.view(-1) >= 0).nonzero() loss = criterion(output.view(-1)[target_idx], label.view(-1)[target_idx]) loss.backward() optim.step() scheduler.step() train_loss.append(loss.item()) output = output[:, -1] label = label[:, -1] target_idx = (label.view(-1) >= 0).nonzero() pred = (torch.sigmoid(output) >= 0.5).long() num_corrects += (pred.view(-1)[target_idx] == label.view(-1)[target_idx]).sum().item() num_total += len(label) labels.extend(label.view(-1)[target_idx].data.cpu().numpy()) outs.extend(output.view(-1)[target_idx].data.cpu().numpy()) tbar.set_description('loss - {:.4f}'.format(loss)) acc = num_corrects / num_total auc = roc_auc_score(labels, outs) loss = np.mean(train_loss) preds = [] labels = [] model.eval() i = 0 for item in tqdm(val_iterator): x = item["x"].to(device).long() target_id = item["target_id"].to(device).long() part = item["part"].to(device).long() label = item["label"].to(device).float() elapsed_time = item["elapsed_time"].to(device).long() duration_previous_content = item["duration_previous_content"].to(device).long() output = model(x, target_id, part, elapsed_time, duration_previous_content) preds.extend(torch.nn.Sigmoid()(output[:, -1]).view(-1).data.cpu().numpy().tolist()) labels.extend(label[:, -1].view(-1).data.cpu().numpy()) i += 1 if i > 100: break auc_val = roc_auc_score(labels, preds) return loss, acc, auc, auc_val
def forward(self, x, targets=None, image_dim=832): # tensors for cuda support _float_tensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor self.image_dim = image_dim num_samples = x.size(0) grid_size = x.size(2) prediction = ( x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size) .permute(0, 1, 3, 4, 2) .contiguous() ) # get outputs x = torch.sigmoid(prediction[..., 0]) # center x y = torch.sigmoid(prediction[..., 1]) # center y w = prediction[..., 2] # width h = prediction[..., 3] # height pred_conf = torch.sigmoid(prediction[..., 4]) # conf pred_cls = torch.sigmoid(prediction[..., 5:]) # cls pred. # if grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # add offset and scale with anchors pred_boxes = _float_tensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + self.grid_x pred_boxes[..., 1] = y.data + self.grid_y pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h output = torch.cat( ( pred_boxes.view(num_samples, -1, 4) * self.stride, pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), -1, ) if targets is None: return output, 0.0 else: iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors, ignore_threshold=self.ignore_threshold, ) obj_mask = obj_mask.bool() # convert int8 to bool noobj_mask = noobj_mask.bool() # convert int8 to bool # loss : mask outputs to ignore non-existing objects (except with conf. loss) loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.no_obj_scale * loss_conf_noobj loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls # metrics cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss
def forward(self, x): return torch.sigmoid(self.network(x))
def forward(self, input, targets=None): bs = input.size(0) in_h = input.size(2) in_w = input.size(3) stride_h = self.img_size[1] / in_h stride_w = self.img_size[0] / in_w scaled_anchors = [(a_w / stride_w, a_h / stride_h) for a_w, a_h in self.anchors] prediction = input.view(bs, self.num_anchors, self.bbox_attrs, in_h, in_w).permute(0, 1, 3, 4, 2).contiguous() # Get outputs x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height conf = torch.sigmoid(prediction[..., 4]) # Conf pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. if targets is not None: # build target mask, noobj_mask, tx, ty, tw, th, tconf, tcls = self.get_target(targets, scaled_anchors, in_w, in_h, self.ignore_threshold) mask, noobj_mask = mask.cuda(), noobj_mask.cuda() tx, ty, tw, th = tx.cuda(), ty.cuda(), tw.cuda(), th.cuda() tconf, tcls = tconf.cuda(), tcls.cuda() # losses. loss_x = self.bce_loss(x * mask, tx * mask) loss_y = self.bce_loss(y * mask, ty * mask) loss_w = self.mse_loss(w * mask, tw * mask) loss_h = self.mse_loss(h * mask, th * mask) loss_conf = self.bce_loss(conf * mask, mask) + \ 0.5 * self.bce_loss(conf * noobj_mask, noobj_mask * 0.0) loss_cls = self.bce_loss(pred_cls[mask == 1], tcls[mask == 1]) # total loss = losses * weight loss = loss_x * self.lambda_xy + loss_y * self.lambda_xy + \ loss_w * self.lambda_wh + loss_h * self.lambda_wh + \ loss_conf * self.lambda_conf + loss_cls * self.lambda_cls return loss, loss_x.item(), loss_y.item(), loss_w.item(),\ loss_h.item(), loss_conf.item(), loss_cls.item() else: FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor # Calculate offsets for each grid grid_x = torch.linspace(0, in_w-1, in_w).repeat(in_w, 1).repeat( bs * self.num_anchors, 1, 1).view(x.shape).type(FloatTensor) grid_y = torch.linspace(0, in_h-1, in_h).repeat(in_h, 1).t().repeat( bs * self.num_anchors, 1, 1).view(y.shape).type(FloatTensor) # Calculate anchor w, h anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0])) anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1])) anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(w.shape) anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(h.shape) # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + grid_x pred_boxes[..., 1] = y.data + grid_y pred_boxes[..., 2] = torch.exp(w.data) * anchor_w pred_boxes[..., 3] = torch.exp(h.data) * anchor_h # Results _scale = torch.Tensor([stride_w, stride_h] * 2).type(FloatTensor) output = torch.cat((pred_boxes.view(bs, -1, 4) * _scale, conf.view(bs, -1, 1), pred_cls.view(bs, -1, self.num_classes)), -1) return output.data
def forward(self, x): x = F.relu(self.linear(x)) origin = torch.sigmoid(self.origin(x)) return origin
def ensembleResnetModelsPredictions(test_loader, model, threshold, X_test, run_name, models): main_predictions = None flag = True print("====== Starting Prediction Model Enesembling ======") true_labels = [] for mt in models: mname = "./ensembler/{}.pth".format(mt) print("load model {}".format(mname)) model = get_from_models(mt, 17, not(mname==None), mname) model.cuda() model.eval() predictions = [] dir_path = './out' mlb = X_test.getLabelEncoder() with torch.no_grad(): for id_batch, (data, target) in enumerate(test_loader): data = data.cuda(async=True) data = Variable(data) pred = torch.sigmoid(model(data)) # pred = model(data) predictions.append(pred.data.cpu().numpy()) if flag == True: true_labels.append(target.data.cpu().numpy()) if id_batch % 10 == 0: print("Done {}%".format(100 * id_batch * len(data)/float(len(test_loader.dataset)))) predictions = np.vstack(predictions) if flag == False: print("Main Prediction {}, prediction {}".format(main_predictions[0], predictions[0])) main_predictions = np.add(main_predictions , predictions) else: flag = False main_predictions = predictions predictions = main_predictions/len(models) true_labels = np.vstack(true_labels) print("====== Raw predictions done ========") # predictions = predictions.numpy() predictions = predictions > threshold pred_path = os.path.join(dir_path, run_name + '-raw-pred-1'+'.csv') np.savetxt(pred_path, predictions, delimiter=";") result = pd.DataFrame({ 'image_name': X_test.name(), 'tags': mlb.inverse_transform(predictions) }) result['tags'] = result['tags'].apply(lambda tags: " ".join(tags)) print("======= Final predictions done =======") result_path = os.path.join(dir_path, run_name + '-final-pred-'+'.csv') result.to_csv(result_path, index=False) print("Final predictions saved to {}".format(result_path)) ff2 = fbeta(np.array(true_labels)[:len(predictions)], np.array(predictions)>threshold) print("Final fbeta score is " + str(ff2*100))
def swish(x): return x * torch.sigmoid(x)
import torch from matplotlib import pyplot as plt plt.style.use(['science','muted']) def reset_v(h, s): return h * (1 - s) x = torch.arange(-1, 1.01, 0.01) figure = plt.figure(dpi=200) fig0 = plt.subplot(1, 2, 1) plt.xlabel('$x$') plt.ylabel('$y$') plt.title('$\\Theta(x)$ and $\\sigma(\\alpha x)$') plt.plot(x, (x >= 0).float(), label='$\\Theta(x)$') plt.plot(x, torch.sigmoid(5 * x), linestyle=':', label='$\\sigma(\\alpha x), \\alpha=5.0$') plt.plot(x, torch.sigmoid(10 * x), linestyle=':', label='$\\sigma(\\alpha x), \\alpha=10.0$') plt.plot(x, torch.sigmoid(50 * x), linestyle=':', label='$\\sigma(\\alpha x), \\alpha=50.0$') plt.legend() fig1 = plt.subplot(1, 2, 2) h = torch.arange(0, 2.5, 0.01) plt.xlabel('$H(t)$') plt.ylabel('$V(t)$') plt.title('Voltage Reset') plt.plot(h, reset_v(h, (h >= 1).float()), label='$\\Theta(x)$') plt.plot(h, reset_v(h, torch.sigmoid(5 * (h - 1))), linestyle=':', label='$\\sigma(\\alpha x), \\alpha=5.0$') plt.plot(h, reset_v(h, torch.sigmoid(10 * (h - 1))), linestyle=':', label='$\\sigma(\\alpha x), \\alpha=10.0$') plt.plot(h, reset_v(h, torch.sigmoid(50 * (h - 1))), linestyle=':', label='$\\sigma(\\alpha x), \\alpha=50.0$') plt.axhline(0, linestyle='--', label='$V_\\text{reset}$', c='g') plt.axhline(1, linestyle='--', label='$V_\\text{threshold}$', c='r')
def decode(self, z): h3 = F.relu(self.fc3(z)) return torch.sigmoid(self.fc4(h3))
def create_inferenced_velodyne_data(dataset_cfg_path, dataset_section, model_cfg_path, model_path, pred_key, confidence, velodyne_path, output_path, batch_size=6, eval_flag=True): # get configurations dataset_cfg = ConfigParser() model_cfg = ConfigParser() dataset_cfg.read(dataset_cfg_path) model_cfg.read(model_cfg_path) # prepare dataset dataset = get_class(dataset_cfg[dataset_section]['class'])(dataset_cfg[dataset_section]) dataloader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, shuffle=False, num_workers=batch_size, pin_memory=False, collate_fn=get_class(dataset_cfg[dataset_section]['collate_fn']), ) # prepare network model device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') net = LPCC_Net(model_cfg['MODEL']).to(device) state_dict = torch.load(model_path) net.load_state_dict(state_dict) if eval_flag: net.eval() # generate predictions print('generating predictions ...', flush=True) pred_dict = defaultdict(list) for example in tqdm(dataloader): # network feed-forward with torch.no_grad(): res = net(example) ret_dict = res[-1] # get output confidence and indices out_features = torch.sigmoid(get_input(ret_dict, pred_key).features) out_indices = get_input(ret_dict, pred_key).indices filtered_indices = out_indices[out_features[:, 0] > confidence] # transform to points coors = indices_to_coors(filtered_indices, [0.1, 0.05, 0.05], [-3.0, -40.0, 0.0])[:, [3, 2, 1]] coors_w_r = torch.cat((coors, torch.zeros(coors.shape[0], 1, dtype=coors.dtype, device=coors.device)), dim=1) # deal with batches for batch in range(len(example['scene_idx'])): scene_idx = example['scene_idx'][batch] b_idx = filtered_indices[:, 0] == batch pred_dict[scene_idx].append(coors_w_r[b_idx].cpu().numpy()) print('done.', flush=True) # merge with original velodyne data print('writing to file ...', flush=True) for s_idx in tqdm(pred_dict.keys()): scene_pts = read_bin(os.path.join(velodyne_path, str(s_idx).zfill(6) + '.bin')) merged_pts = np.concatenate([*pred_dict[s_idx], scene_pts], axis=0) with open(os.path.join(output_path, str(s_idx).zfill(6) + '.bin'), 'wb') as f: merged_pts.tofile(f) print('done.', flush=True)
def forward(self, x): y_spatial = self.spatial_attn(x) y_channel = self.channel_attn(x) y = y_spatial * y_channel y = torch.sigmoid(self.conv(y)) return y
def forward(self, input, mask=None, weight=None, train=True, n_sources=None, V=None, A=None): """ input: shape (B, F, T) """ if V is None: V = self.embedding_net(input) # (B, F*K, T) if self.V_activate: V = torch.tanh(V) V = V.contiguous().view(V.shape[0], self.embed_dim, -1, V.shape[2]) # (B, K, F, T) V = V.contiguous().view(V.shape[0], self.embed_dim, -1) # (B, K, F*T) VT = torch.transpose(V, 1, 2) # (B, F*T, K) if self.A_mask: mask[mask < self.A_mask] = 0.0 mask[mask >= self.A_mask] = 1.0 if self.V_norm: # V = F.normalize(V, p=2, dim=1) VT = F.normalize(VT, p=2, dim=2) if train: # calculate the ideal attractors # first calculate the source assignment matrix Y if A is None: if self.kmeans_type: if not n_sources: n_sources = self.n_sources # Y = torch.zeros(V.shape[0], n_sources, V.shape[2]).cuda() # (B, nspk, FT) A = torch.zeros(V.shape[0], n_sources, self.embed_dim).cuda() # (B, nspk, K) temp_A = torch.zeros(V.shape[0], n_sources, self.embed_dim).cuda() # (B, nspk, K) err = torch.zeros(V.shape[0]) for j in range(self.n_init): for i in range(V.shape[0]): ind = torch.randperm(V.shape[2])[:n_sources].cuda() temp_A[i] = VT[i, ind, :] # randomly initiate A # print('kmeans++++++++++++') for i in range(len(self.kmeans_layers)): Y, temp_A = self.kmeans_layers[i](VT, temp_A, weight) if self.n_init == 1: A = temp_A else: dist = -kMeansIter.distance( VT, temp_A, alpha=self.alpha, dist_type=self.kmeans_dist) # B, nspk, F*T temp_err = torch.sum(dist * Y, dim=[1, 2]) for i in range(A.shape[0]): if j == 0 or temp_err[i] < err[i]: err[i] = temp_err[i] if A[i].shape[0] != temp_A[i].shape[0]: print(ind) A[i] = temp_A[i] else: Y = mask * weight # B, nspk,F*T # attractors are the weighted average of the embeddings # calculated by V and Y V_Y = torch.bmm(Y, VT) # B, nspk, K sum_Y = torch.sum(Y, 2, keepdim=True).expand_as( V_Y) # B, nspk, K A = V_Y / (sum_Y + self.eps) # B, nspk, K else: if not n_sources: n_sources = self.n_sources A = torch.zeros(V.shape[0], n_sources, V.shape[1]) if self.km is None: if self.da_sim == 'cos': self.km = SphericalKMeans( n_clusters=n_sources ) #, init='k-means++', n_init=5, max_iter=20) if self.da_sim == 'dotProduct' or self.da_sim == 'negL2norm': self.km = KMeans( n_clusters=n_sources ) # , init='k-means++', n_init=5, max_iter=20) for i in range(VT.shape[0]): # skm.fit(VT[i].cpu()) self.km.fit(VT[i].cpu(), sample_weight=weight[i][0].cpu()) A[i] = torch.from_numpy(self.km.cluster_centers_) A = A.cuda() # return V # (B, K, F*T) # calculate the similarity between embeddings and attractors if self.A_norm: A = F.normalize(A, p=2, dim=2) if self.da_sim == 'dotProduct': dist = torch.bmm(A, V) # B, nspk, F*T elif self.da_sim == 'negL2norm': dist = kMeansIter.distance(VT, A, alpha=self.alpha, dist_type='negL2norm') elif self.da_sim == 'cos': dist = kMeansIter.distance(VT, A, alpha=self.alpha, dist_type='cos') # B, nspk, F*T elif self.da_sim == 'sin': sin_va = F.relu( 1 - (A.bmm(VT.transpose(1, 2))).pow(2)).sqrt() # B, nspk, TF # sin_va = torch.norm(torch.cross(A, VT, dim=2)) if A.shape[1] == 2: mask = sin_va[:, [1, 0], :] / ( torch.sum(sin_va, dim=1, keepdim=True) + self.eps) if A.shape[1] > 2: mask = torch.zeros(mask.shape).cuda() ind = torch.arange(mask.shape[1]) for i in range(mask.shape[1]): mask[:, i, :] = sin_va[:, ind[ind != i], :].min(dim=1).values / \ (sin_va[:, i, :] + sin_va[:, ind[ind != i], :].min(dim=1).values + self.eps) return (mask, V, A) else: raise ValueError( 'Attractor similarity must be dotProduct or negL2norm or cos') # re-scale the similarity distance if self.dist_scaler: dist = dist * self.scaler # generate the masks if self.act_fn == 'softmax': mask = F.softmax(dist, dim=1) # B, nspk, F*T return (mask, V, A) elif self.act_fn == 'sigmoid': mask = torch.sigmoid(dist) # B, nspk, F*T return (mask, V, A) else: raise ValueError('Activation function must be softmax or sigmoid')
def backward(self, z): y = None for i, layer in enumerate(reversed(self.model)): y, z = layer.backward(y, z) x = torch.sigmoid(y) return x
def forward(self, input): z_hidden = self.hidden(input) a_hidden = torch.sigmoid(z_hidden) out = self.output(a_hidden) out = self.softmax(out) return out
def forward(self, x): x = torch.sigmoid(self.linear1(x)) x = self.output(x) return x
def forward(self, x): x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2, stride=1)) x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=1)) x = F.relu(self.fc1(x.view(-1, (2**4)*6*6))) x = torch.sigmoid(self.fc2(x)) return x
def forward(self, x): batch_size, chans, height, width = x.size() # need to first determine the hidden state size, which is tied to the cnn feature size dummy_glimpse = torch.Tensor(batch_size, chans, self.attn_grid_size, self.attn_grid_size) if x.is_cuda: dummy_glimpse = dummy_glimpse.cuda() dummy_feature_map = self.encoder.forward(dummy_glimpse) self.att_rnn.forward( dummy_feature_map.view( batch_size, old_div(dummy_feature_map.nelement(), batch_size))) self.att_rnn.reset_hidden_state(batch_size, x.data.is_cuda) outputs = [] init_tensor = torch.zeros(batch_size, self.num_classes, height, width) if x.data.is_cuda: init_tensor = init_tensor.cuda() outputs.append(init_tensor) self.init_weights(self.att_rnn.get_hidden_state()) for t in range(self.timesteps): # 1) decode hidden state to generate gaussian attention parameters state = self.att_rnn.get_hidden_state() gauss_attn_params = torch.tanh( F.linear(state, self.att_decoder_weights)) # 2) extract glimpse glimpse = self.attn_reader.forward(x, gauss_attn_params, self.attn_grid_size) # visualize first glimpse in batch for all t torch_glimpses = torch.chunk(glimpse, batch_size, dim=0) ImageVisualizer().set_image( PTImage.from_cwh_torch(torch_glimpses[0].squeeze().data), 'zGlimpse {}'.format(t)) # 3) use conv stack or resnet to extract features feature_map = self.encoder.forward(glimpse) conv_output_dims = self.encoder.get_output_dims()[:-1][::-1] conv_output_dims.append(glimpse.size()) # import ipdb;ipdb.set_trace() # 4) update hidden state # think about this connection a bit more self.att_rnn.forward( feature_map.view(batch_size, old_div(feature_map.nelement(), batch_size))) # 5) use deconv network to get partial masks partial_mask = self.decoder.forward(feature_map, conv_output_dims) # 6) write masks additively to mask canvas partial_canvas = self.attn_writer.forward(partial_mask, gauss_attn_params, (height, width)) outputs.append(torch.add(outputs[-1], partial_canvas)) # return the sigmoided versions for i in range(len(outputs)): outputs[i] = torch.sigmoid(outputs[i]) return outputs
def __init__(self,numObsTraits:int, numCatList:Iterable[int],nLatentDim:int,decoderType:str,**kwargs): """ Variational autoencoder used for latent phenotype inference. Parameters ---------- numObsTraits : int Number of traits or symptoms used in the model. numCatList : Iterable[int] List containing the number of categories for each categorical covariate. nLatentDim : int Number of latent dimensions in the model decoderType : str Type of decoder. Must be one of the following: 'Linear','Linear_Monotonic','Nonlinear','Nonlinear_Monotonic' **kwargs : type Mutliple kwargs available. Please see source code for details. Returns ------- Nonlinear """ super(VAE,self).__init__() self.numObsTraits=numObsTraits self.nLatentDim = nLatentDim self.numCatList=numCatList self.decoderType=decoderType assert self.decoderType in ['Linear','Linear_Monotonic','Nonlinear','Nonlinear_Monotonic'], "Currently supported decoders for VAE include: 'Linear','Linear_Monotonic','Nonlinear','Nonlinear_Monotonic'" allKeywordArgs = list(kwargs.keys()) if 'linkFunction' not in allKeywordArgs: self.linkFunction = lambda x:torch.sigmoid(x) else: linkFunction = kwargs['linkFunction'] assert linkFunction in ['Logit','Probit'],"Only Logit and Probit link functions currently supported." if linkFunction=='Logit': self.linkFunction = lambda x:torch.sigmoid(x) else: self.linkFunction = lambda x:dist.Normal(torch.tensor(0.0,dtype=torch.float32,device=x.device),torch.tensor(1.0,dtype=torch.float32,device=x.device)).cdf(x) if 'computeDevice' not in allKeywordArgs: """ Specifies compute device for model fitting, can also be specified later by calling SwitchDevice """ self.compute_device=None else: self.compute_device=kwargs['computeDevice'] if 'dropLinearCovariateColumn' not in allKeywordArgs: """ Specifies model to drop one category from each covariate. Defaults to True. """ self.dropLinearCovariateColumn=True else: self.dropLinearCovariateColumn=kwargs['dropLinearCovariateColumn'] assert isinstance(self.dropLinearCovariateColumn,bool),"dropLinearCovariateColumn expects boolean value" if 'coupleCovariates' not in allKeywordArgs: """ Specifies whether to couple covariates to non-linear MLP network (True), or to model them using an independent linear network (False). Defaults to True. """ self.coupleCovariates=True else: self.coupleCovariates=kwargs['coupleCovariates'] assert isinstance(self.dropLinearCovariateColumn,bool),"coupleCovariates expects boolean value" if self.decoderType not in ['Nonlinear']: print("Warning: Not fitting Nonlinear model. Coupling covariates has no effect on inference.") if 'encoderNetworkHyperparameters' not in allKeywordArgs: self.encoderHyperparameters={'n_layers' : 2, 'n_hidden' : 64, 'dropout_rate': 0.0, 'use_batch_norm':True} else: self.encoderHyperparameters = kwargs['encoderNetworkHyperparameters'] assert isinstance(self.encoderHyperparameters,dict),"Expects dictionary of encoder hyperparameters" assert set(self.encoderHyperparameters.keys())==set(['n_layers','n_hidden','dropout_rate','use_batch_norm']),"Encoder hyperparameters must include: 'n_layers','n_hidden','dropout_rate','use_batch_norm'" if 'decoderNetworkHyperparameters' not in allKeywordArgs: self.decoderHyperparameters={'n_layers' : 2, 'n_hidden' : 64, 'dropout_rate': 0.0, 'use_batch_norm':True} else: self.decoderHyperparameters = kwargs['decoderNetworkHyperparameters'] assert isinstance(self.encoderHyperparameters,dict),"Expects dictionary of decoder hyperparameters" assert set(self.encoderHyperparameters.keys())==set(['n_layers','n_hidden','dropout_rate','use_batch_norm']),"Decoder hyperparameters must include: 'n_layers','n_hidden','dropout_rate','use_batch_norm'" if self.decoderType not in ['Nonlinear','Nonlinear_Monotonic']: print("Warning: Decoder neural network hyperparameters specified for a linear model. Parameters will not be used.") if self.dropLinearCovariateColumn: self.numCovParam = sum(self.numCatList)-len(self.numCatList) else: self.numCovParam = sum(self.numCatList) self.encoder=MeanScaleEncoder(self.numObsTraits, self.nLatentDim, n_cat_list=self.numCatList, **self.encoderHyperparameters) if self.decoderType=='Linear': self.decoder = LinearDecoder(self.nLatentDim,self.numCatList,self.numObsTraits,self.dropLinearCovariateColumn,True) elif self.decoderType=='Linear_Monotonic': self.decoder = LinearDecoder_Monotonic(self.nLatentDim,self.numCatList,self.numObsTraits,self.dropLinearCovariateColumn,True) elif self.decoderType == 'Nonlinear': self.decoder = NonlinearMLPDecoder(self.nLatentDim,self.numCatList,self.numObsTraits,self.dropLinearCovariateColumn,self.coupleCovariates,**self.decoderHyperparameters) else: self.decoder = NonlinearMLPDecoder_Monotonic(self.nLatentDim,self.numCatList,self.numObsTraits,self.dropLinearCovariateColumn,**self.decoderHyperparameters) if self.compute_device is not None: self.SwitchDevice(self.compute_device) self.eval()
def getActivationVec(self, input): z_hidden = self.hidden(input) a_hidden = torch.sigmoid(z_hidden) a_hidden -= 0.5 return a_hidden
def generate_cropped_img(config, input, output, target, meta, for_training=False): output = torch.sigmoid(output).data.cpu().numpy() output[output > 0.5] = 1 output[output <= 0.5] = 0 output = np.asarray(output, dtype="float32") input = input.data.cpu().numpy() input = np.asarray(input, dtype="float32") target = target.data.cpu().numpy() target[target > 0] = 255 target = np.asarray(target, dtype="float32") mask_index_dict = {} for i in range(len(output)): for c in range(config['num_classes']): img = input[i, c] mask = output[i, c] # get coordinates mask_index = np.where(mask == 1) # skip no lung images if len(mask_index[0]) > 0 and len(mask_index[1]) > 0: min_height = np.min(mask_index[0]) max_height = np.max(mask_index[0]) min_width = np.min(mask_index[1]) max_width = np.max(mask_index[1]) mask_index_dict[meta['img_id'][i]] = [ min_height, max_height, min_width, max_width ] cropped_img = img[min_height:max_height + 1, min_width:max_width + 1] cropped_img = cv2.normalize(cropped_img, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F).astype(np.uint8) if not for_training: save_path = os.path.join('outputs', config['name'] + '_crop_testing', str(c)) os.makedirs(save_path, exist_ok=True) cv2.imwrite( os.path.join(save_path, meta['img_id'][i] + '.jpg'), cropped_img) else: # save cropped img img_path = os.path.join('data', config['dataset'] + '_cropped', config['sub_dataset'], 'images') os.makedirs(img_path, exist_ok=True) cv2.imwrite( os.path.join(img_path, meta['img_id'][i] + '.jpg'), cropped_img) # save cropped mask mask_gt = target[i, c] cropped_mask_gt = mask_gt[min_height:max_height + 1, min_width:max_width + 1] target_path = os.path.join('data', config['dataset'] + '_cropped', config['sub_dataset'], 'masks', str(c)) os.makedirs(target_path, exist_ok=True) cv2.imwrite( os.path.join(target_path, meta['img_id'][i] + '.png'), cropped_mask_gt) return mask_index_dict
def decode(where_mu, where_logvar): sample = reparameterize_normal(where_mu, where_logvar) scl = torch.sigmoid(sample[..., :2]) trs = torch.tanh(sample[..., 2:]) return scl, trs
def main(): val_args = parse_args() args = joblib.load('models/%s/args.pkl' %val_args.name) if not os.path.exists('output/%s' %args.name): os.makedirs('output/%s' %args.name) print('Config -----') for arg in vars(args): print('%s: %s' %(arg, getattr(args, arg))) print('------------') joblib.dump(args, 'models/%s/args.pkl' %args.name) # create model print("=> creating model %s" %args.arch) model = deepresunet.__dict__[args.arch](args) model = model.cuda() # Data loading code img_paths = glob(r'D:\Project\CollegeDesign\dataset\Brats2018FoulModel2D\testImage\*') mask_paths = glob(r'D:\Project\CollegeDesign\dataset\Brats2018FoulModel2D\testMask\*') val_img_paths = img_paths val_mask_paths = mask_paths #train_img_paths, val_img_paths, train_mask_paths, val_mask_paths = \ # train_test_split(img_paths, mask_paths, test_size=0.2, random_state=41) model.load_state_dict(torch.load('models/%s/model.pth' %args.name)) model.eval() val_dataset = Dataset(args, val_img_paths, val_mask_paths) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True, drop_last=False) if val_args.mode == "GetPicture": """ 获取并保存模型生成的标签图 """ with warnings.catch_warnings(): warnings.simplefilter('ignore') with torch.no_grad(): for i, (input, target) in tqdm(enumerate(val_loader), total=len(val_loader)): input = input.cuda() #target = target.cuda() # compute output if args.deepsupervision: output = model(input)[-1] else: output = model(input) #print("img_paths[i]:%s" % img_paths[i]) output = torch.sigmoid(output).data.cpu().numpy() img_paths = val_img_paths[args.batch_size*i:args.batch_size*(i+1)] #print("output_shape:%s"%str(output.shape)) for i in range(output.shape[0]): """ 生成灰色圖片 wtName = os.path.basename(img_paths[i]) overNum = wtName.find(".npy") wtName = wtName[0:overNum] wtName = wtName + "_WT" + ".png" imsave('output/%s/'%args.name + wtName, (output[i,0,:,:]*255).astype('uint8')) tcName = os.path.basename(img_paths[i]) overNum = tcName.find(".npy") tcName = tcName[0:overNum] tcName = tcName + "_TC" + ".png" imsave('output/%s/'%args.name + tcName, (output[i,1,:,:]*255).astype('uint8')) etName = os.path.basename(img_paths[i]) overNum = etName.find(".npy") etName = etName[0:overNum] etName = etName + "_ET" + ".png" imsave('output/%s/'%args.name + etName, (output[i,2,:,:]*255).astype('uint8')) """ npName = os.path.basename(img_paths[i]) overNum = npName.find(".npy") rgbName = npName[0:overNum] rgbName = rgbName + ".png" rgbPic = np.zeros([160, 160, 3], dtype=np.uint8) for idx in range(output.shape[2]): for idy in range(output.shape[3]): if output[i,0,idx,idy] > 0.5: rgbPic[idx, idy, 0] = 0 rgbPic[idx, idy, 1] = 128 rgbPic[idx, idy, 2] = 0 if output[i,1,idx,idy] > 0.5: rgbPic[idx, idy, 0] = 255 rgbPic[idx, idy, 1] = 0 rgbPic[idx, idy, 2] = 0 if output[i,2,idx,idy] > 0.5: rgbPic[idx, idy, 0] = 255 rgbPic[idx, idy, 1] = 255 rgbPic[idx, idy, 2] = 0 imsave('output/%s/'%args.name + rgbName,rgbPic) torch.cuda.empty_cache() """ 将验证集中的GT numpy格式转换成图片格式并保存 """ print("Saving GT,numpy to picture") val_gt_path = 'output/%s/'%args.name + "GT/" if not os.path.exists(val_gt_path): os.mkdir(val_gt_path) for idx in tqdm(range(len(val_mask_paths))): mask_path = val_mask_paths[idx] name = os.path.basename(mask_path) overNum = name.find(".npy") name = name[0:overNum] rgbName = name + ".png" npmask = np.load(mask_path) GtColor = np.zeros([npmask.shape[0],npmask.shape[1],3], dtype=np.uint8) for idx in range(npmask.shape[0]): for idy in range(npmask.shape[1]): #坏疽(NET,non-enhancing tumor)(标签1) 红色 if npmask[idx, idy] == 1: GtColor[idx, idy, 0] = 255 GtColor[idx, idy, 1] = 0 GtColor[idx, idy, 2] = 0 #浮肿区域(ED,peritumoral edema) (标签2) 绿色 elif npmask[idx, idy] == 2: GtColor[idx, idy, 0] = 0 GtColor[idx, idy, 1] = 128 GtColor[idx, idy, 2] = 0 #增强肿瘤区域(ET,enhancing tumor)(标签4) 黄色 elif npmask[idx, idy] == 4: GtColor[idx, idy, 0] = 255 GtColor[idx, idy, 1] = 255 GtColor[idx, idy, 2] = 0 #imsave(val_gt_path + rgbName, GtColor) imageio.imwrite(val_gt_path + rgbName, GtColor) """ mask_path = val_mask_paths[idx] name = os.path.basename(mask_path) overNum = name.find(".npy") name = name[0:overNum] wtName = name + "_WT" + ".png" tcName = name + "_TC" + ".png" etName = name + "_ET" + ".png" npmask = np.load(mask_path) WT_Label = npmask.copy() WT_Label[npmask == 1] = 1. WT_Label[npmask == 2] = 1. WT_Label[npmask == 4] = 1. TC_Label = npmask.copy() TC_Label[npmask == 1] = 1. TC_Label[npmask == 2] = 0. TC_Label[npmask == 4] = 1. ET_Label = npmask.copy() ET_Label[npmask == 1] = 0. ET_Label[npmask == 2] = 0. ET_Label[npmask == 4] = 1. imsave(val_gt_path + wtName, (WT_Label * 255).astype('uint8')) imsave(val_gt_path + tcName, (TC_Label * 255).astype('uint8')) imsave(val_gt_path + etName, (ET_Label * 255).astype('uint8')) """ print("Done!") if val_args.mode == "Calculate": """ 计算各种指标:Dice、Sensitivity、PPV """ wt_dices = [] tc_dices = [] et_dices = [] wt_sensitivities = [] tc_sensitivities = [] et_sensitivities = [] wt_ppvs = [] tc_ppvs = [] et_ppvs = [] wt_Hausdorf = [] tc_Hausdorf = [] et_Hausdorf = [] wtMaskList = [] tcMaskList = [] etMaskList = [] wtPbList = [] tcPbList = [] etPbList = [] maskPath = glob("output/%s/" % args.name + "GT\*.png") pbPath = glob("output/%s/" % args.name + "*.png") if len(maskPath) == 0: print("请先生成图片!") return for myi in tqdm(range(len(maskPath))): mask = imread(maskPath[myi]) pb = imread(pbPath[myi]) wtmaskregion = np.zeros([mask.shape[0], mask.shape[1]], dtype=np.float32) wtpbregion = np.zeros([mask.shape[0], mask.shape[1]], dtype=np.float32) tcmaskregion = np.zeros([mask.shape[0], mask.shape[1]], dtype=np.float32) tcpbregion = np.zeros([mask.shape[0], mask.shape[1]], dtype=np.float32) etmaskregion = np.zeros([mask.shape[0], mask.shape[1]], dtype=np.float32) etpbregion = np.zeros([mask.shape[0], mask.shape[1]], dtype=np.float32) for idx in range(mask.shape[0]): for idy in range(mask.shape[1]): # 只要这个像素的任何一个通道有值,就代表这个像素不属于前景,即属于WT区域 if mask[idx, idy, :].any() != 0: wtmaskregion[idx, idy] = 1 if pb[idx, idy, :].any() != 0: wtpbregion[idx, idy] = 1 # 只要第一个通道是255,即可判断是TC区域,因为红色和黄色的第一个通道都是255,区别于绿色 if mask[idx, idy, 0] == 255: tcmaskregion[idx, idy] = 1 if pb[idx, idy, 0] == 255: tcpbregion[idx, idy] = 1 # 只要第二个通道是128,即可判断是ET区域 if mask[idx, idy, 1] == 128: etmaskregion[idx, idy] = 1 if pb[idx, idy, 1] == 128: etpbregion[idx, idy] = 1 #开始计算WT dice = dice_coef(wtpbregion,wtmaskregion) wt_dices.append(dice) ppv_n = ppv(wtpbregion, wtmaskregion) wt_ppvs.append(ppv_n) Hausdorff = hausdorff_distance(wtmaskregion, wtpbregion) wt_Hausdorf.append(Hausdorff) sensitivity_n = sensitivity(wtpbregion, wtmaskregion) wt_sensitivities.append(sensitivity_n) # 开始计算TC dice = dice_coef(tcpbregion, tcmaskregion) tc_dices.append(dice) ppv_n = ppv(tcpbregion, tcmaskregion) tc_ppvs.append(ppv_n) Hausdorff = hausdorff_distance(tcmaskregion, tcpbregion) tc_Hausdorf.append(Hausdorff) sensitivity_n = sensitivity(tcpbregion, tcmaskregion) tc_sensitivities.append(sensitivity_n) # 开始计算ET dice = dice_coef(etpbregion, etmaskregion) et_dices.append(dice) ppv_n = ppv(etpbregion, etmaskregion) et_ppvs.append(ppv_n) Hausdorff = hausdorff_distance(etmaskregion, etpbregion) et_Hausdorf.append(Hausdorff) sensitivity_n = sensitivity(etpbregion, etmaskregion) et_sensitivities.append(sensitivity_n) print('WT Dice: %.4f' % np.mean(wt_dices)) print('TC Dice: %.4f' % np.mean(tc_dices)) print('ET Dice: %.4f' % np.mean(et_dices)) print("=============") print('WT PPV: %.4f' % np.mean(wt_ppvs)) print('TC PPV: %.4f' % np.mean(tc_ppvs)) print('ET PPV: %.4f' % np.mean(et_ppvs)) print("=============") print('WT sensitivity: %.4f' % np.mean(wt_sensitivities)) print('TC sensitivity: %.4f' % np.mean(tc_sensitivities)) print('ET sensitivity: %.4f' % np.mean(et_sensitivities)) print("=============") print('WT Hausdorff: %.4f' % np.mean(wt_Hausdorf)) print('TC Hausdorff: %.4f' % np.mean(tc_Hausdorf)) print('ET Hausdorff: %.4f' % np.mean(et_Hausdorf)) print("=============")
def meta_train(self, task, ptracker): """ Trained by feeding both the query set and the support set into the model """ self.mode='train' self.train() self.net_reset() total_losses = [] for support_set, target_set in task: self.backbone.train() self.gpmodel.train() self.likelihood.train() support_set = self.strategy.update_support_set(support_set) support_x, support_y = support_set target_x, target_y = target_set support_n = len(support_y) # Combine target and support set if len(target_x) > 0: all_x = torch.cat((support_x, target_x), dim=0) all_y = torch.cat((support_y, target_y), dim=0) else: all_x = support_x all_y = support_y all_h = self.forward(all_x) all_h, all_y = self.strategy.update_support_features((all_h, all_y)) all_y_onehots = uu.onehot(all_y, fill_with=-1, dim=self.output_dim[self.mode]) total_losses =[] for idx in range(self.output_dim[self.mode]): self.gpmodel.set_train_data(inputs=all_h, targets=all_y_onehots[:, idx], strict=False) output = self.gpmodel(*self.gpmodel.train_inputs) loss = -self.loss_fn(output, self.gpmodel.train_targets) total_losses.append(loss) self.optimizer.zero_grad() loss = torch.stack(total_losses).sum() loss.backward() self.optimizer.step() if len(target_x) > 0: with torch.no_grad(): self.gpmodel.eval() self.likelihood.eval() self.backbone.eval() target_h = self.forward(target_x) predictions_list = list() total_losses = list() for idx in range(self.output_dim[self.mode]): self.gpmodel.set_train_data( inputs=all_h[:support_n], targets=all_y_onehots[:support_n, idx], strict=False) output = self.gpmodel(all_h[support_n:]) total_losses.append(self.loss_fn(output, all_y_onehots[support_n:, idx])) prediction = self.likelihood(output) predictions_list.append(torch.sigmoid(prediction.mean)) predictions_list = torch.stack(predictions_list).T loss = -torch.stack(total_losses).sum() pred_y = predictions_list.argmax(1) ptracker.add_task_performance( pred_y.detach().cpu().numpy(), target_y.detach().cpu().numpy(), loss.detach().cpu().numpy())
def train(train_loader, model, criterion, optimizer, epoch): model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() running_metric_text = runningScore(2) running_metric_kernel = runningScore(2) end = time.time() for batch_idx, (imgs, gt_texts, gt_kernels, training_masks) in enumerate(train_loader): data_time.update(time.time() - end) imgs = Variable(imgs.cuda()) gt_texts = Variable(gt_texts.cuda()) gt_kernels = Variable(gt_kernels.cuda()) training_masks = Variable(training_masks.cuda()) outputs = model(imgs) texts = outputs[:, 0, :, :] kernels = outputs[:, 1:, :, :] selected_masks = ohem_batch(texts, gt_texts, training_masks) selected_masks = Variable(selected_masks.cuda()) loss_text = criterion(texts, gt_texts, selected_masks) loss_kernels = [] mask0 = torch.sigmoid(texts).data.cpu().numpy() mask1 = training_masks.data.cpu().numpy() selected_masks = ((mask0 > 0.5) & (mask1 > 0.5)).astype('float32') selected_masks = torch.from_numpy(selected_masks).float() selected_masks = Variable(selected_masks.cuda()) for i in range(6): kernel_i = kernels[:, i, :, :] gt_kernel_i = gt_kernels[:, i, :, :] loss_kernel_i = criterion(kernel_i, gt_kernel_i, selected_masks) loss_kernels.append(loss_kernel_i) loss_kernel = sum(loss_kernels) / len(loss_kernels) loss = 0.7 * loss_text + 0.3 * loss_kernel losses.update(loss.item(), imgs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() score_text = cal_text_score(texts, gt_texts, training_masks, running_metric_text) score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts, training_masks, running_metric_kernel) batch_time.update(time.time() - end) end = time.time() if batch_idx % 20 == 0: output_log = '({batch}/{size}) Batch: {bt:.3f}s | TOTAL: {total:.0f}min | ETA: {eta:.0f}min | Loss: {loss:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f} | IOU_k: {iou_k: .4f}'.format( batch=batch_idx + 1, size=len(train_loader), bt=batch_time.avg, total=batch_time.avg * batch_idx / 60.0, eta=batch_time.avg * (len(train_loader) - batch_idx) / 60.0, loss=losses.avg, acc=score_text['Mean Acc'], iou_t=score_text['Mean IoU'], iou_k=score_kernel['Mean IoU']) print(output_log) sys.stdout.flush() return (losses.avg, score_text['Mean Acc'], score_kernel['Mean Acc'], score_text['Mean IoU'], score_kernel['Mean IoU'])