def forward(self, x): conv1 = self.conv1(x) #1/4 conv2 = self.conv2(conv1) #1/4 conv3 = self.conv3(conv2) #1/8 conv4 = self.conv4(conv3) #1/16 conv5 = self.conv5(conv4) #1/32 center_512 = self.center_global_pool(conv5) center_64 = self.center_conv1x1(center_512) center_64_flatten = center_64.view(center_64.size(0), -1) center_fc = self.center_fc(center_64_flatten) f = self.center(conv5) d5 = self.decoder5(f, conv5) d4 = self.decoder4(d5, conv4) d3 = self.decoder3(d4, conv3) d2 = self.decoder2(d3, conv2) d1 = self.decoder1(d2) hypercol = torch.cat(( d1, F.upsample(d2, scale_factor=2,mode='bilinear'), F.upsample(d3, scale_factor=4, mode='bilinear'), F.upsample(d4, scale_factor=8, mode='bilinear'), F.upsample(d5, scale_factor=16, mode='bilinear')),1) hypercol = F.dropout2d(hypercol, p = 0.50) x_no_empty = self.logits_no_empty(hypercol) hypercol_add_center = torch.cat(( hypercol, F.upsample(center_64, scale_factor=128,mode='bilinear')),1) x_final = self.logits_final( hypercol_add_center) return center_fc, x_no_empty, x_final
def forward(self, input): x, low_level_features = self.resnet_features(input) x1 = self.aspp1(x) x2 = self.aspp2(x) x3 = self.aspp3(x) x4 = self.aspp4(x) x5 = self.global_avg_pool(x) x5 = F.upsample(x5, size=x4.size()[2:], mode='bilinear', align_corners=True) x = torch.cat((x1, x2, x3, x4, x5), dim=1) x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = F.upsample(x, size=(int(math.ceil(input.size()[-2]/4)), int(math.ceil(input.size()[-1]/4))), mode='bilinear', align_corners=True) low_level_features = self.conv2(low_level_features) low_level_features = self.bn2(low_level_features) low_level_features = self.relu(low_level_features) x = torch.cat((x, low_level_features), dim=1) x = self.last_conv(x) x = F.interpolate(x, size=input.size()[2:], mode='bilinear', align_corners=True) return x
def forward(self, x): x = self.encoder1(x) #; print('x:', x.size()) e2 = self.encoder2(x) #; print('e2:', e2.size()) e3 = self.encoder3(e2) #; print('e3:', e3.size()) e4 = self.encoder4(e3) #; print('e4:', e4.size()) e5 = self.encoder5(e4) #; print('e5:', e5.size()) center = self.center(e5) #; print('center:', center.size()) d5 = self.decoder5(center, e5) #; print('d5:', d5.size()) d4 = self.decoder4(d5, e4) #; print('d4:', d4.size()) d3 = self.decoder3(d4, e3) #; print('d3:', d3.size()) d2 = self.decoder2(d3, e2) #; print('d2:', d2.size()) d1 = self.decoder1(d2) ; #print('d1:', d1.size()) f = torch.cat([ d1, F.upsample(d2, scale_factor=2, mode='bilinear'), F.upsample(d3, scale_factor=4, mode='bilinear'), F.upsample(d4, scale_factor=8, mode='bilinear'), F.upsample(d5, scale_factor=16, mode='bilinear'), ], dim=1) #f = F.dropout2d(f, p=self.dropout_2d) #print (self.logit(d1).shape) return self.logit(f)
def forward(self, preds, target): h, w = target.size(2), target.size(3) scale_pred = F.upsample(input=preds[0], size=(h, w), mode='bilinear', align_corners=True) loss1 = self.criterion(scale_pred, target) scale_pred = F.upsample(input=preds[1], size=(h, w), mode='bilinear', align_corners=True) loss2 = self.criterion(scale_pred, target) return loss1 + loss2*0.4
def forward(self, x): x = F.relu(F.max_pool2d(self.convD1(x), 2)) x = F.relu(F.max_pool2d(self.convD2(x), 2)) x = F.relu(F.max_pool2d(self.convD3(x), 2)) x = F.upsample(x, scale_factor=2) x = F.relu(self.convE1(x)) x = F.upsample(x, scale_factor=2) x = F.relu(self.convE2(x)) x = F.upsample(x, scale_factor=2) x = F.relu(self.convE3(x)) return x
def forward(self, x): # n x_2 = self.stage1(x) x_4 = self.stage2(x_2) score_2 = self.norm1(x_2) score_4 = self.norm2(x_4) # n / 4 up_2 = F.upsample(x_4, x_2.size()[2:],mode='bilinear') up_2 += score_2 up = F.upsample(up_2, x.size()[2:],mode='bilinear') up = self.final(up) return up
def forward(self, x): enc1 = self.enc1(x) enc2 = self.enc2(enc1) enc3 = self.enc3(enc2) enc4 = self.enc4(enc3) center = self.center(enc4) dec4 = self.dec4(torch.cat([center, F.upsample(enc4, center.size()[2:], mode='bilinear')], 1)) dec3 = self.dec3(torch.cat([dec4, F.upsample(enc3, dec4.size()[2:], mode='bilinear')], 1)) dec2 = self.dec2(torch.cat([dec3, F.upsample(enc2, dec3.size()[2:], mode='bilinear')], 1)) dec1 = self.dec1(torch.cat([dec2, F.upsample(enc1, dec2.size()[2:], mode='bilinear')], 1)) final = self.final(dec1) return F.upsample(final, x.size()[2:], mode='bilinear')
def forward(self, x_high, x_low): high_upsampled = F.upsample(self.conv_high(x_high), scale_factor=self.up_scale_high, mode='bilinear') if x_low is None: return high_upsampled low_upsampled = F.upsample(self.conv_low(x_low), scale_factor=self.up_scale_low, mode='bilinear') return low_upsampled + high_upsampled
def forward(self, x): conv1 = self.conv1(x) #1/2 conv2 = self.conv2(conv1) #1/2 conv3 = self.conv3(conv2) #1/4 conv4 = self.conv4(conv3) #1/8 conv5 = self.conv5(conv4) #1/16 center_2048 = self.center_global_pool(conv5) center_64 = self.center_conv1x1(center_2048) center_64_flatten = center_64.view(center_64.size(0), -1) center_fc = self.center_fc(center_64_flatten) center = self.center(self.center_se(self.pool(conv5)))#1/16 dec5 = self.dec5(self.dec5_se(torch.cat([center, conv5], 1)))#1/8 dec4 = self.dec4(self.dec4_se(torch.cat([dec5, conv4], 1))) #1/4 dec3 = self.dec3(self.dec3_se(torch.cat([dec4, conv3], 1))) #1/2 dec2 = self.dec2(self.dec2_se(torch.cat([dec3, conv2], 1))) #1 x_no_empty = self.logits_no_empty(dec2) dec0_add_center = torch.cat(( dec2, F.upsample(center_64, scale_factor=128, mode='bilinear')), 1) x_final = self.logits_final(dec0_add_center) return center_fc, x_no_empty, x_final
def forward(self, inp): """Get Inception feature maps Parameters ---------- inp : torch.autograd.Variable Input tensor of shape Bx3xHxW. Values are expected to be in range (0, 1) Returns ------- List of torch.autograd.Variable, corresponding to the selected output block, sorted ascending by index """ outp = [] x = inp if self.resize_input: x = F.upsample(x, size=(299, 299), mode='bilinear') if self.normalize_input: x = x.clone() x[:, 0] = x[:, 0] * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 x[:, 1] = x[:, 1] * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 x[:, 2] = x[:, 2] * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 for idx, block in enumerate(self.blocks): x = block(x) if idx in self.output_blocks: outp.append(x) if idx == self.last_needed_block: break return outp
def forward(self, top_blob, lateral_blob): # Lateral 1x1 conv lat = self.conv_lateral(lateral_blob) # Top-down 2x upsampling # td = F.upsample(top_blob, size=lat.size()[2:], mode='bilinear') td = F.upsample(top_blob, scale_factor=2, mode='nearest') # Sum lateral and top-down return lat + td
def forward(self, x1, x2): if not self.learn: x1 = F.upsample(x1, size=(x1.size()[2]*self.p_kernel[0],x1.size()[3]*self.p_kernel[1],x1.size()[4]*self.p_kernel[2]),mode='trilinear') x1 = self.conv(x1) x1 = self.Relu(x1) x = torch.cat([x2, x1], dim=1) x = self.fuse(x) return x
def forward(self, x, e = None): x = F.upsample(x, scale_factor=2, mode='bilinear') if e is not None: x = torch.cat([x,e],1) x = F.dropout2d(x, p = 0.50) x = self.conv1(x) x = self.conv2(x) x = self.SCSE(x) return x
def forward(self, x): output_slices = [x] h, w = x.shape[2:] for module, pool_size in zip(self.path_module_list, self.pool_sizes): out = F.avg_pool2d(x, int(h/pool_size), int(h/pool_size), 0) out = module(out) out = F.upsample(out, size=(h,w), mode='bilinear') output_slices.append(out) return torch.cat(output_slices, dim=1)
def forward(self, y, z): x = torch.cat([y, nn.MaxPool2d(self.scale, self.scale)(z)], dim=1) y_prime = self.conv1(x) y_prime = self.conv2(y_prime) x = self.conv_res(y_prime) upsample_size = torch.Size([_s*self.scale for _s in y_prime.shape[-2:]]) x = F.upsample(x, size=upsample_size, mode='nearest') z_prime = z + x return y_prime, z_prime
def forward(self, x): input_adjust = self.input_adjust(x) conv1 = self.conv1(input_adjust) conv2 = self.conv2(conv1) conv3 = self.conv3(conv2) center = self.conv4(conv3) dec4 = self.dec4(center) dec3 = self.dec3(torch.cat([dec4, conv3], 1)) dec2 = self.dec2(torch.cat([dec3, conv2], 1)) dec1 = self.dec1(torch.cat([dec2, conv1], 1)) hcol = torch.cat([dec1, F.upsample(dec2, scale_factor=2, mode='bilinear'), #,align_corners=False F.upsample(dec3, scale_factor=4, mode='bilinear'), #,align_corners=False F.upsample(dec4, scale_factor=8, mode='bilinear')], dim=1) #,align_corners=False #hcol = F.dropout2d(hcol, p = 0.5) #print('input_adjust ', input_adjust.shape, '\ncenter ' , center.shape, '\ndec1: ', dec1.shape) #print('hcol ', hcol.shape, '\nout ', self._mask_out(hcol).shape) return self._mask_out(hcol)
def forward(self, x, e=None): x = F.upsample(x, scale_factor=2, mode='bilinear') if e is not None: x = torch.cat([x,e], 1) x = F.relu(self.conv1(x), inplace=True) x = F.relu(self.conv2(x), inplace=True) g1 = self.spatial_gate(x) g2 = self.channel_gate(x) x = x*g1 + x*g2 return x
def forward(self, x): #256 down1 = self.down1(x) out = F.max_pool2d(down1, kernel_size=2, stride=2) #64 down2 = self.down2(out) out = F.max_pool2d(down2, kernel_size=2, stride=2) #32 down3 = self.down3(out) out = F.max_pool2d(down3, kernel_size=2, stride=2) #16 down4 = self.down4(out) out = F.max_pool2d(down4, kernel_size=2, stride=2) # 8 out = self.same(out) out = F.upsample(out, scale_factor=2, mode='bilinear') #16 out = torch.cat([down4, out],1) out = self.up4(out) out = F.upsample(out, scale_factor=2, mode='bilinear') #32 out = torch.cat([down3, out],1) out = self.up3(out) out = F.upsample(out, scale_factor=2, mode='bilinear') #64 out = torch.cat([down2, out],1) out = self.up2(out) out = F.upsample(out, scale_factor=2, mode='bilinear') #128 out = torch.cat([down1, out],1) out = self.up1(out) out = F.upsample(out, scale_factor=2, mode='bilinear') #256 out = self.up0(out) out = self.classify(out) return out
def forward(self, x): h = x.unsqueeze(2).unsqueeze(3) if self.normalize_latents: mean = torch.mean(h * h, 1, keepdim=True) dom = torch.rsqrt(mean + self.eps) h = h * dom h = self.block0(h, self.depth == 0) if self.depth > 0: for i in range(self.depth - 1): h = F.upsample(h, scale_factor=2) h = self.blocks[i](h) h = F.upsample(h, scale_factor=2) ult = self.blocks[self.depth - 1](h, True) if self.alpha < 1.0: if self.depth > 1: preult_rgb = self.blocks[self.depth - 2].toRGB(h) else: preult_rgb = self.block0.toRGB(h) else: preult_rgb = 0 h = preult_rgb * (1-self.alpha) + ult * self.alpha return h
def forward(self, x): x, low_level_features = self.xception_features(x) x1 = self.aspp1(x) x2 = self.aspp2(x) x3 = self.aspp3(x) x4 = self.aspp4(x) x5 = self.global_avg_pool(x) x5 = F.upsample(x5, size=x4.size()[2:], mode='bilinear', align_corners=True) x = torch.cat((x1, x2, x3, x4, x5), dim=1) x = self.conv1(x) x = self.bn1(x) x = F.upsample(x, scale_factor=4, mode='bilinear', align_corners=True) low_level_features = self.conv2(low_level_features) low_level_features = self.bn2(low_level_features) x = torch.cat((x, low_level_features), dim=1) x = self.last_conv(x) x = F.upsample(x, scale_factor=4, mode='bilinear', align_corners=True) return x
def _upsample_add(self, x, y): '''Upsample and add two feature maps. Args: x: (Variable) top feature map to be upsampled. y: (Variable) lateral feature map. Returns: (Variable) added feature map. Note in PyTorch, when input size is odd, the upsampled feature map with `F.upsample(..., scale_factor=2, mode='nearest')` maybe not equal to the lateral feature map size. e.g. original input size: [N,_,15,15] -> conv2d feature map size: [N,_,8,8] -> upsampled feature map size: [N,_,16,16] So we choose bilinear upsample which supports arbitrary output sizes. ''' _,_,H,W = y.size() return F.upsample(x, size=(H,W), mode='bilinear') + y
def forward(self, x): # conv & downsampling down_sampled_fmaps = [] for i in range(self.n_stages-1): x = self.down_convs[i](x) x = self.max_pooling(x) down_sampled_fmaps.insert(0, x) # center convs x = self.down_convs[self.n_stages-1](x) x = self.up_convs[0](x) # conv & upsampling for i, down_sampled_fmap in enumerate(down_sampled_fmaps): x = torch.cat([x, down_sampled_fmap], 1) x = self.up_convs[i+1](x) x = F.upsample(x, scale_factor=2, mode='bilinear') return self.out_conv(x)
def _forward(self, level, inp): # Upper branch up1 = inp up1 = self._modules['b1_' + str(level)](up1) # Lower branch low1 = F.max_pool2d(inp, 2, stride=2) low1 = self._modules['b2_' + str(level)](low1) if level > 1: low2 = self._forward(level - 1, low1) else: low2 = low1 low2 = self._modules['b2_plus_' + str(level)](low2) low3 = low2 low3 = self._modules['b3_' + str(level)](low3) up2 = F.upsample(low3, scale_factor=2, mode='nearest') return up1 + up2
def _forward(self, level, inp): # Upper branch up1 = inp up1 = self._modules['b1_' + str(level)](up1) up1 = self.dropout(up1) # Lower branch low1 = F.max_pool2d(inp, 2, stride=2) low1 = self._modules['b2_' + str(level)](low1) if level > 1: low2 = self._forward(level - 1, low1) else: low2 = low1 low2 = self._modules['b2_plus_' + str(level)](low2) low3 = low2 low3 = self._modules['b3_' + str(level)](low3) up1size = up1.size() rescale_size = (up1size[2], up1size[3]) up2 = F.upsample(low3, size=rescale_size, mode='bilinear') return up1 + up2
def forward(self, x): batch_size, h, w = x.size(0), x.size(2), x.size(3) if self.scale > 1: x = self.pool(x) value = self.f_value(x).view(batch_size, self.value_channels, -1) value = value.permute(0, 2, 1) query = self.f_query(x).view(batch_size, self.key_channels, -1) query = query.permute(0, 2, 1) key = self.f_key(x).view(batch_size, self.key_channels, -1) sim_map = torch.matmul(query, key) sim_map = (self.key_channels**-.5) * sim_map sim_map = F.softmax(sim_map, dim=-1) context = torch.matmul(sim_map, value) context = context.permute(0, 2, 1).contiguous() context = context.view(batch_size, self.value_channels, *x.size()[2:]) context = self.W(context) if self.scale > 1: context = F.upsample(input=context, size=(h, w), mode='bilinear', align_corners=True) return context
def to_latent(obs, next_obs): """ Transform observations to latent space. :args obs: 5D torch tensor (BSIZE, SEQ_LEN, ASIZE, SIZE, SIZE) :args next_obs: 5D torch tensor (BSIZE, SEQ_LEN, ASIZE, SIZE, SIZE) :returns: (latent_obs, latent_next_obs) - latent_obs: 4D torch tensor (BSIZE, SEQ_LEN, LSIZE) - next_latent_obs: 4D torch tensor (BSIZE, SEQ_LEN, LSIZE) """ with torch.no_grad(): obs, next_obs = [ f.upsample(x.view(-1, 3, SIZE, SIZE), size=RED_SIZE, mode='bilinear', align_corners=True) for x in (obs, next_obs)] (obs_mu, obs_logsigma), (next_obs_mu, next_obs_logsigma) = [ vae(x)[1:] for x in (obs, next_obs)] latent_obs, latent_next_obs = [ (x_mu + x_logsigma.exp() * torch.randn_like(x_mu)).view(BSIZE, SEQ_LEN, LSIZE) for x_mu, x_logsigma in [(obs_mu, obs_logsigma), (next_obs_mu, next_obs_logsigma)]] return latent_obs, latent_next_obs
def forward(self, x): x = self.frontend(x) x = self.own_reslayer_3(x) x = self.de_pred(x) x = F.upsample(x, scale_factor=8) return x
def upsample_add(self, x, y): _, _, H, W = y.size() z = F.upsample(x, size=(H, W), mode='bilinear') return z + y
def main(opts): adj2_ = torch.from_numpy(graph.cihp2pascal_nlp_adj).float() adj2_test = adj2_.unsqueeze(0).unsqueeze(0).expand(1, 1, 7, 20).cuda() adj1_ = Variable( torch.from_numpy(graph.preprocess_adj(graph.pascal_graph)).float()) adj1_test = adj1_.unsqueeze(0).unsqueeze(0).expand(1, 1, 7, 7).cuda() cihp_adj = graph.preprocess_adj(graph.cihp_graph) adj3_ = Variable(torch.from_numpy(cihp_adj).float()) adj3_test = adj3_.unsqueeze(0).unsqueeze(0).expand(1, 1, 20, 20).cuda() p = OrderedDict() # Parameters to include in report p["trainBatch"] = opts.batch # Training batch size p["nAveGrad"] = 1 # Average the gradient of several iterations p["lr"] = opts.lr # Learning rate p["lrFtr"] = 1e-5 p["lraspp"] = 1e-5 p["lrpro"] = 1e-5 p["lrdecoder"] = 1e-5 p["lrother"] = 1e-5 p["wd"] = 5e-4 # Weight decay p["momentum"] = 0.9 # Momentum p["epoch_size"] = 10 # How many epochs to change learning rate p["num_workers"] = opts.numworker backbone = "xception" # Use xception or resnet as feature extractor, with open(opts.txt_file, "r") as f: img_list = f.readlines() max_id = 0 save_dir_root = os.path.join(os.path.dirname(os.path.abspath(__file__))) exp_name = os.path.dirname(os.path.abspath(__file__)).split("/")[-1] runs = glob.glob(os.path.join(save_dir_root, "run", "run_*")) for r in runs: run_id = int(r.split("_")[-1]) if run_id >= max_id: max_id = run_id + 1 # run_id = int(runs[-1].split('_')[-1]) + 1 if runs else 0 # Network definition if backbone == "xception": net = deeplab_xception_transfer.deeplab_xception_transfer_projection( n_classes=opts.classes, os=16, hidden_layers=opts.hidden_layers, source_classes=20, ) elif backbone == "resnet": # net = deeplab_resnet.DeepLabv3_plus(nInputChannels=3, n_classes=7, os=16, pretrained=True) raise NotImplementedError else: raise NotImplementedError if gpu_id >= 0: net.cuda() # net load weights if not opts.loadmodel == "": x = torch.load(opts.loadmodel) net.load_source_model(x) print("load model:", opts.loadmodel) else: print("no model load !!!!!!!!") ## multi scale scale_list = [1, 0.5, 0.75, 1.25, 1.5, 1.75] testloader_list = [] testloader_flip_list = [] for pv in scale_list: composed_transforms_ts = transforms.Compose( [tr.Scale_(pv), tr.Normalize_xception_tf(), tr.ToTensor_()]) composed_transforms_ts_flip = transforms.Compose([ tr.Scale_(pv), tr.HorizontalFlip(), tr.Normalize_xception_tf(), tr.ToTensor_(), ]) voc_val = pascal.VOCSegmentation(split="val", transform=composed_transforms_ts) voc_val_f = pascal.VOCSegmentation( split="val", transform=composed_transforms_ts_flip) testloader = DataLoader(voc_val, batch_size=1, shuffle=False, num_workers=p["num_workers"]) testloader_flip = DataLoader(voc_val_f, batch_size=1, shuffle=False, num_workers=p["num_workers"]) testloader_list.append(copy.deepcopy(testloader)) testloader_flip_list.append(copy.deepcopy(testloader_flip)) print("Eval Network") if not os.path.exists(opts.output_path + "pascal_output_vis/"): os.makedirs(opts.output_path + "pascal_output_vis/") if not os.path.exists(opts.output_path + "pascal_output/"): os.makedirs(opts.output_path + "pascal_output/") start_time = timeit.default_timer() # One testing epoch total_iou = 0.0 net.eval() for ii, large_sample_batched in enumerate( zip(*testloader_list, *testloader_flip_list)): print(ii) # 1 0.5 0.75 1.25 1.5 1.75 ; flip: sample1 = large_sample_batched[:6] sample2 = large_sample_batched[6:] for iii, sample_batched in enumerate(zip(sample1, sample2)): inputs, labels = sample_batched[0]["image"], sample_batched[0][ "label"] inputs_f, _ = sample_batched[1]["image"], sample_batched[1][ "label"] inputs = torch.cat((inputs, inputs_f), dim=0) if iii == 0: _, _, h, w = inputs.size() # assert inputs.size() == inputs_f.size() # Forward pass of the mini-batch inputs, labels = Variable(inputs, requires_grad=False), Variable(labels) with torch.no_grad(): if gpu_id >= 0: inputs, labels = inputs.cuda(), labels.cuda() # outputs = net.forward(inputs) # pdb.set_trace() outputs = net.forward(inputs, adj1_test.cuda(), adj3_test.cuda(), adj2_test.cuda()) outputs = (outputs[0] + flip(outputs[1], dim=-1)) / 2 outputs = outputs.unsqueeze(0) if iii > 0: outputs = F.upsample(outputs, size=(h, w), mode="bilinear", align_corners=True) outputs_final = outputs_final + outputs else: outputs_final = outputs.clone() ################ plot pic predictions = torch.max(outputs_final, 1)[1] prob_predictions = torch.max(outputs_final, 1)[0] results = predictions.cpu().numpy() prob_results = prob_predictions.cpu().numpy() vis_res = decode_labels(results) parsing_im = Image.fromarray(vis_res[0]) parsing_im.save(opts.output_path + "pascal_output_vis/{}.png".format(img_list[ii][:-1])) cv2.imwrite( opts.output_path + "pascal_output/{}.png".format(img_list[ii][:-1]), results[0, :, :], ) # np.save('../../cihp_prob_output/{}.npy'.format(img_list[ii][:-1]), prob_results[0, :, :]) # pred_list.append(predictions.cpu()) # label_list.append(labels.squeeze(1).cpu()) # loss = criterion(outputs, labels, batch_average=True) # running_loss_ts += loss.item() # total_iou += utils.get_iou(predictions, labels) end_time = timeit.default_timer() print("time use for " + str(ii) + " is :" + str(end_time - start_time)) # Eval pred_path = opts.output_path + "pascal_output/" eval_( pred_path=pred_path, gt_path=opts.gt_path, classes=opts.classes, txt_file=opts.txt_file, )
def forward(self, left, right): refimg_fea = self.feature_extraction(left) targetimg_fea = self.feature_extraction(right) # matching cost = Variable( torch.FloatTensor(refimg_fea.size()[0], refimg_fea.size()[1] * 2, self.maxdisp // 4, refimg_fea.size()[2], refimg_fea.size()[3]).zero_()).cuda() for i in range(self.maxdisp // 4): if i > 0: cost[:, :refimg_fea.size()[1], i, :, i:] = refimg_fea[:, :, :, i:] cost[:, refimg_fea.size()[1]:, i, :, i:] = targetimg_fea[:, :, :, :-i] else: cost[:, :refimg_fea.size()[1], i, :, :] = refimg_fea cost[:, refimg_fea.size()[1]:, i, :, :] = targetimg_fea cost = cost.contiguous() cost0 = self.dres0(cost) cost0 = self.dres1(cost0) + cost0 out1, pre1, post1 = self.dres2(cost0, None, None) out1 = out1 + cost0 out2, pre2, post2 = self.dres3(out1, pre1, post1) out2 = out2 + cost0 out3, pre3, post3 = self.dres4(out2, pre1, post2) out3 = out3 + cost0 cost1 = self.classif1(out1) cost2 = self.classif2(out2) + cost1 cost3 = self.classif3(out3) + cost2 if self.training: cost1 = F.upsample( cost1, [self.maxdisp, left.size()[2], left.size()[3]], mode='trilinear') cost2 = F.upsample( cost2, [self.maxdisp, left.size()[2], left.size()[3]], mode='trilinear') cost1 = torch.squeeze(cost1, 1) pred1 = F.softmax(cost1, dim=1) pred1 = disparityregression(self.maxdisp)(pred1) cost2 = torch.squeeze(cost2, 1) pred2 = F.softmax(cost2, dim=1) pred2 = disparityregression(self.maxdisp)(pred2) cost3 = F.upsample( cost3, [self.maxdisp, left.size()[2], left.size()[3]], mode='trilinear') cost3 = torch.squeeze(cost3, 1) pred3 = F.softmax(cost3, dim=1) pred3 = disparityregression(self.maxdisp)(pred3) if self.training: return pred1, pred2, pred3 else: return pred3
def forward(self, frame, DQNs): # x: [B,2,84,84] self.B = frame.size()[0] #Predict lowres lowres = self.predict_lowres(frame) #[B,2,210,160] # print (mask.size()) # upsample # highdim = F.upsample(input=frame[:,:,:400,:200], size=(480,640), mode='bilinear') highdim = F.upsample(input=lowres, size=(480,640), mode='bilinear') # highdim = F.upsample(input=lowres, size=(480,640), mode='nearest') # #plot # highdim = highdim.data.cpu().numpy()[0] # highdim = np.rollaxis(highdim, 1, 0) # highdim = np.rollaxis(highdim, 2, 1) # print (highdim.shape) # plt.imshow(highdim) # # save_dir = home+'/Documents/tmp/Doom/' # plt_path = exp_path_2+'training_plot.png' # plt.savefig(plt_path) # print ('saved viz',plt_path) # plt.close() # fds # TODO # fsdaf # mask = mask.repeat(1,3,1,1) # masked_frame = frame * mask # bias_frame = Variable(torch.ones(1,3,480,640).cuda()) * F.sigmoid(self.bias_frame.bias_frame) # masked_frame = frame * mask + (1.-mask)*bias_frame masked_frame = highdim difs= [] for i in range(len(DQNs)): q_mask = DQNs[i](masked_frame) # val, index = torch.max(q_mask, 1) # q_mask = q_mask[:,index] q_real = DQNs[i](frame) # val, index = torch.max(q_real, 1) # q_real = q_real[:,index] dif = torch.mean((q_mask-q_real)**2) #[B,A] difs.append(dif) difs = torch.stack(difs) dif = torch.mean(difs) # mask = mask.view(self.B, -1) # mask_sum = torch.mean(torch.sum(mask, dim=1)) * .0000001 # loss = dif + mask_sum loss = dif return loss, dif#, mask_sum
def forward(self, x): args = self.args if args.input_norm: rgb_mean = x.contiguous().view(x.size()[:2] + (-1, )).mean( dim=-1).view(x.size()[:2] + ( 1, 1, 1, )) x = (x - rgb_mean) / args.rgb_max x1_raw = x[:, :, 0, :, :].contiguous() x2_raw = x[:, :, 1, :, :].contiguous() # on the bottom level are original images x1_pyramid = self.feature_pyramid_extractor(x1_raw) + [x1_raw] x2_pyramid = self.feature_pyramid_extractor(x2_raw) + [x2_raw] # outputs flows = [] # tensors for summary summaries = { 'x2_warps': [], } for l, (x1, x2) in enumerate(zip(x1_pyramid, x2_pyramid)): # upsample flow and scale the displacement if l == 0: shape = list(x1.size()) shape[1] = 2 flow = torch.zeros(shape).to(args.device) else: flow = F.upsample(flow, scale_factor=2, mode='bilinear') * 2 x2_warp = self.warping_layer(x2, flow) # correlation corr = self.corr(x1, x2_warp) if args.corr_activation: F.leaky_relu_(corr) # concat and estimate flow # ATTENTION: `+ flow` makes flow estimator learn to estimate residual flow if args.residual: flow_coarse = self.flow_estimators[l](torch.cat( [x1, corr, flow], dim=1)) + flow else: flow_coarse = self.flow_estimators[l](torch.cat( [x1, corr, flow], dim=1)) flow_fine = self.context_networks[l](torch.cat([x1, flow], dim=1)) flow = flow_coarse + flow_fine if l == args.output_level: flow = F.upsample(flow, scale_factor=2 **(args.num_levels - args.output_level - 1), mode='bilinear') * 2**(args.num_levels - args.output_level - 1) flows.append(flow) summaries['x2_warps'].append(x2_warp.data) break else: flows.append(flow) summaries['x2_warps'].append(x2_warp.data) return flows, summaries
loss_record2 = AvgMeter() loss_record3 = AvgMeter() for i, pack in enumerate(train_loader, start=1): for rate in size_rates: optimizer.zero_grad() images, gts, depths = pack images = Variable(images).cuda() gts = Variable(gts).cuda() depths = Variable(depths).cuda() gt_edges = label_edge_prediction(gts) # multi-scale training samples trainsize = int(round(opt.trainsize * rate / 32) * 32) if rate != 1: images = F.upsample(images, size=(trainsize, trainsize), mode='bilinear', align_corners=True) gts = F.upsample(gts, size=(trainsize, trainsize), mode='bilinear', align_corners=True) depths = F.upsample(depths, size=(trainsize, trainsize), mode='bilinear', align_corners=True) gt_edges = F.upsample(gt_edges, size=(trainsize, trainsize), mode='bilinear', align_corners=True)
def inference(net, img_path='', use_gpu=True): ''' :param net: :param img_path: :param output_path: :return: ''' # adj adj2_ = torch.from_numpy(graph.cihp2pascal_nlp_adj).float() adj2_test = adj2_.unsqueeze(0).unsqueeze(0).expand(1, 1, 7, 20).cuda().transpose( 2, 3) adj1_ = Variable( torch.from_numpy(graph.preprocess_adj(graph.pascal_graph)).float()) adj3_test = adj1_.unsqueeze(0).unsqueeze(0).expand(1, 1, 7, 7).cuda() cihp_adj = graph.preprocess_adj(graph.cihp_graph) adj3_ = Variable(torch.from_numpy(cihp_adj).float()) adj1_test = adj3_.unsqueeze(0).unsqueeze(0).expand(1, 1, 20, 20).cuda() # multi-scale scale_list = [1, 0.5, 0.75, 1.25, 1.5, 1.75] img = read_img(img_path) testloader_list = [] testloader_flip_list = [] for pv in scale_list: composed_transforms_ts = transforms.Compose([ tr.Scale_only_img(pv), tr.Normalize_xception_tf_only_img(), tr.ToTensor_only_img() ]) composed_transforms_ts_flip = transforms.Compose([ tr.Scale_only_img(pv), tr.HorizontalFlip_only_img(), tr.Normalize_xception_tf_only_img(), tr.ToTensor_only_img() ]) testloader_list.append(img_transform(img, composed_transforms_ts)) # print(img_transform(img, composed_transforms_ts)) testloader_flip_list.append( img_transform(img, composed_transforms_ts_flip)) # print(testloader_list) start_time = timeit.default_timer() # One testing epoch net.eval() # 1 0.5 0.75 1.25 1.5 1.75 ; flip: for iii, sample_batched in enumerate( zip(testloader_list, testloader_flip_list)): inputs, labels = sample_batched[0]['image'], sample_batched[0]['label'] inputs_f, _ = sample_batched[1]['image'], sample_batched[1]['label'] inputs = inputs.unsqueeze(0) inputs_f = inputs_f.unsqueeze(0) inputs = torch.cat((inputs, inputs_f), dim=0) if iii == 0: _, _, h, w = inputs.size() # assert inputs.size() == inputs_f.size() # Forward pass of the mini-batch inputs = Variable(inputs, requires_grad=False) with torch.no_grad(): if use_gpu >= 0: inputs = inputs.cuda() # outputs = net.forward(inputs) outputs = net.forward(inputs, adj1_test.cuda(), adj3_test.cuda(), adj2_test.cuda()) outputs = (outputs[0] + flip(flip_cihp(outputs[1]), dim=-1)) / 2 outputs = outputs.unsqueeze(0) if iii > 0: outputs = F.upsample(outputs, size=(h, w), mode='bilinear', align_corners=True) outputs_final = outputs_final + outputs else: outputs_final = outputs.clone() ################ plot pic predictions = torch.max(outputs_final, 1)[1] results = predictions.cpu().numpy() vis_res = decode_labels(results) parsing_im = Image.fromarray(vis_res[0]) return parsing_im
def forward(self, left, right): img_size = left.size() feats_l = self.feature_extraction(left) feats_r = self.feature_extraction(right) pred = [] for scale in range(len(feats_l)): if scale > 0: wflow = F.upsample( pred[scale - 1], (feats_l[scale].size(2), feats_l[scale].size(3)), mode='bilinear') * feats_l[scale].size(2) / img_size[2] cost = self._build_volume_2d3(feats_l[scale], feats_r[scale], self.maxdisplist[scale], wflow, stride=1) else: cost = self._build_volume_2d(feats_l[scale], feats_r[scale], self.maxdisplist[scale], stride=1) cost = torch.unsqueeze(cost, 1) cost = self.volume_postprocess[scale](cost) cost = cost.squeeze(1) if scale == 0: pred_low_res = disparityregression2(0, self.maxdisplist[0])( F.softmax(-cost, dim=1)) pred_low_res = pred_low_res * img_size[2] / pred_low_res.size( 2) disp_up = F.upsample(pred_low_res, (img_size[2], img_size[3]), mode='bilinear') pred.append(disp_up) else: pred_low_res = disparityregression2(-self.maxdisplist[scale] + 1, self.maxdisplist[scale], stride=1)(F.softmax(-cost, dim=1)) pred_low_res = pred_low_res * img_size[2] / pred_low_res.size( 2) disp_up = F.upsample(pred_low_res, (img_size[2], img_size[3]), mode='bilinear') pred.append(disp_up + pred[scale - 1]) if self.refine_spn: spn_out = self.refine_spn[0](nn.functional.upsample( left, (img_size[2] // 4, img_size[3] // 4), mode='bilinear')) G1, G2, G3 = spn_out[:, :self. spn_init_channels, :, :], spn_out[:, self. spn_init_channels: self. spn_init_channels * 2, :, :], spn_out[:, self . spn_init_channels * 2:, :, :] sum_abs = G1.abs() + G2.abs() + G3.abs() G1 = torch.div(G1, sum_abs + 1e-8) G2 = torch.div(G2, sum_abs + 1e-8) G3 = torch.div(G3, sum_abs + 1e-8) pred_flow = nn.functional.upsample( pred[-1], (img_size[2] // 4, img_size[3] // 4), mode='bilinear') refine_flow = self.spn_layer(self.refine_spn[1](pred_flow), G1, G2, G3) refine_flow = self.refine_spn[2](refine_flow) pred.append( nn.functional.upsample(refine_flow, (img_size[2], img_size[3]), mode='bilinear')) return pred
def forward(self, x): """Applies network layers and ops on input image(s) x. Args: x: input image or batch of images. Shape: [batch,3*batch,300,300]. Return: Depending on phase: test: Variable(tensor) of output class label predictions, confidence score, and corresponding location predictions for each object detected. Shape: [batch,topk,7] train: list of concat outputs from: 1: confidence layers, Shape: [batch*num_priors,num_classes] 2: localization layers, Shape: [batch,num_priors*4] 3: priorbox layers, Shape: [2,num_priors*4] """ sources = list() loc = list() conf = list() # apply vgg up to conv4_3 relu for k in range(23): x = self.base[k](x) s1 = self.reduce(x) # apply vgg up to fc7 for k in range(23, len(self.base)): x = self.base[k](x) s2 = self.up_reduce(x) s2 = F.upsample(s2, scale_factor=2, mode='bilinear', align_corners=True) s = torch.cat((s1,s2),1) ss = self.Norm(s) sources.append(ss) # apply extra layers and cache source layer outputs for k, v in enumerate(self.extras): x = v(x) if k < self.indicator or k%2 ==0: sources.append(x) # apply multibox head to source layers for (x, l, c) in zip(sources, self.loc, self.conf): loc.append(l(x).permute(0, 2, 3, 1).contiguous()) conf.append(c(x).permute(0, 2, 3, 1).contiguous()) #print([o.size() for o in loc]) loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) if self.phase == "test": output = ( loc.view(loc.size(0), -1, 4), # loc preds self.softmax(conf.view(-1, self.num_classes)), # conf preds ) else: output = ( loc.view(loc.size(0), -1, 4), conf.view(conf.size(0), -1, self.num_classes), ) return output
def forward(self, x): return F.upsample(x, size=(512, 1024), mode='bilinear') #train resolution: 512x512
def forward(self, x): x = F.upsample(x, scale_factor=2, mode='bilinear', align_corners=True) #False x = F.relu(self.conv1(x), inplace=True) x = F.relu(self.conv2(x), inplace=True) return x
def forward(self, x): h, w = 2 * x.size(2), 2 * x.size(3) p = F.upsample(input=x, size=(h, w), mode='bilinear') return self.conv(p)
def resize_image(img, h, w, **up_kwargs): return F.upsample(img, (h, w), **up_kwargs)
def forward(self, x): size = x.shape[2:] x = self.conv1_1(x) s1_1 = self.dconv1_1(x) x = self.conv1_2(x) s1_2 = self.dconv1_2(x) x = self.maxpool(x) x = self.conv2_1(x) s = self.dconv2_1(x) s2_1 = F.upsample(s, size=size, mode='bilinear', align_corners=True) x = self.conv2_2(x) s = self.dconv2_2(x) s2_2 = F.upsample(s, size=size, mode='bilinear', align_corners=True) x = self.maxpool(x) x = self.conv3_1(x) s = self.dconv3_1(x) s3_1 = F.upsample(s, size=size, mode='bilinear', align_corners=True) x = self.conv3_2(x) s = self.dconv3_2(x) s3_2 = F.upsample(s, size=size, mode='bilinear', align_corners=True) x = self.conv3_3(x) s = self.dconv3_3(x) s3_3 = F.upsample(s, size=size, mode='bilinear', align_corners=True) x = self.maxpool(x) x = self.conv4_1(x) s = self.dconv4_1(x) s4_1 = F.upsample(s, size=size, mode='bilinear', align_corners=True) x = self.conv4_2(x) s = self.dconv4_2(x) s4_2 = F.upsample(s, size=size, mode='bilinear', align_corners=True) x = self.conv4_3(x) s = self.dconv4_3(x) s4_3 = F.upsample(s, size=size, mode='bilinear', align_corners=True) x = self.maxpool(x) x = self.conv5_1(x) s = self.dconv5_1(x) s5_1 = F.upsample(s, size=size, mode='bilinear', align_corners=True) x = self.conv5_2(x) s = self.dconv5_2(x) s5_2 = F.upsample(s, size=size, mode='bilinear', align_corners=True) x = self.conv5_3(x) s = self.dconv5_3(x) s5_3 = F.upsample(s, size=size, mode='bilinear', align_corners=True) score = self.score( torch.cat([ s1_1, s1_2, s2_1, s2_2, s3_1, s3_2, s3_3, s4_1, s4_2, s4_3, s5_1, s5_2, s5_3 ], dim=1)) # no relu return score
def forward(self, x): layer0 = self.layer0(x) layer1 = self.layer1(layer0) layer2 = self.layer2(layer1) layer3 = self.layer3(layer2) layer4 = self.layer4(layer3) ccl_4 = self.ccl_4(layer4) ccl_3 = self.ccl_3(layer3) ccl_2 = self.ccl_2(layer2) ccl_1 = self.ccl_1(layer1) sc_4 = self.sc_4(layer4) sc_3 = self.sc_3(layer3) sc_2 = self.sc_2(layer2) sc_1 = self.sc_1(layer1) layer4_fusion = self.layer4_fusion(torch.cat((sc_4, ccl_4), 1)) layer3_fusion = self.layer3_fusion(torch.cat((sc_3, ccl_3), 1)) layer2_fusion = self.layer2_fusion(torch.cat((sc_2, ccl_2), 1)) layer1_fusion = self.layer1_fusion(torch.cat((sc_1, ccl_1), 1)) layer4_feature = self.up_4(layer4_fusion) layer4_feature = self.cbam_4(layer4_feature) layer3_feature = self.up_3(layer3_fusion) layer3_feature = self.cbam_3(layer3_feature) layer2_feature = self.up_2(layer2_fusion) layer2_feature = self.cbam_2(layer2_feature) layer1_feature = self.up_1(layer1_fusion) layer1_feature = self.cbam_1(layer1_feature) fusion_feature = torch.cat( (layer1_feature, layer2_feature, layer3_feature, layer4_feature), 1) fusion_feature = self.cbam_fusion(fusion_feature) layer4_predict = self.layer4_predict(layer4_feature) layer3_predict = self.layer3_predict(layer3_feature) layer2_predict = self.layer2_predict(layer2_feature) layer1_predict = self.layer1_predict(layer1_feature) fusion_predict = self.fusion_predict(fusion_feature) layer4_predict = F.upsample(layer4_predict, size=x.size()[2:], mode='bilinear', align_corners=True) layer3_predict = F.upsample(layer3_predict, size=x.size()[2:], mode='bilinear', align_corners=True) layer2_predict = F.upsample(layer2_predict, size=x.size()[2:], mode='bilinear', align_corners=True) layer1_predict = F.upsample(layer1_predict, size=x.size()[2:], mode='bilinear', align_corners=True) fusion_predict = F.upsample(fusion_predict, size=x.size()[2:], mode='bilinear', align_corners=True) if self.training: return layer4_predict, layer3_predict, layer2_predict, layer1_predict, fusion_predict return F.sigmoid(layer4_predict), F.sigmoid(layer3_predict), F.sigmoid(layer2_predict), \ F.sigmoid(layer1_predict), F.sigmoid(fusion_predict)
def _concatenation(self, x, g): input_size = x.size() batch_size = input_size[0] assert batch_size == g.size(0) ############################# # compute compatibility score # theta => (b, c, t, h, w) -> (b, i_c, t, h, w) # phi => (b, c, t, h, w) -> (b, i_c, t, h, w) theta_x = self.theta(x) theta_x_size = theta_x.size() # nl(theta.x + phi.g + bias) -> f = (b, i_c, t/s1, h/s2, w/s3) phi_g = F.upsample(self.phi(g), size=theta_x_size[2:], mode=self.upsample_mode) f = theta_x + phi_g f = self.nl1(f) psi_f = self.psi(f) ############################################ # normalisation -- scale compatibility score # psi^T . f -> (b, 1, t/s1, h/s2, w/s3) if self.mode == 'concatenation_softmax': sigm_psi_f = F.softmax(psi_f.view(batch_size, 1, -1), dim=2) sigm_psi_f = sigm_psi_f.view(batch_size, 1, *theta_x_size[2:]) elif self.mode == 'concatenation_mean': psi_f_flat = psi_f.view(batch_size, 1, -1) psi_f_sum = torch.sum(psi_f_flat, dim=2) #clamp(1e-6) psi_f_sum = psi_f_sum[:, :, None].expand_as(psi_f_flat) sigm_psi_f = psi_f_flat / psi_f_sum sigm_psi_f = sigm_psi_f.view(batch_size, 1, *theta_x_size[2:]) elif self.mode == 'concatenation_mean_flow': psi_f_flat = psi_f.view(batch_size, 1, -1) ss = psi_f_flat.shape psi_f_min = psi_f_flat.min(dim=2)[0].view(ss[0], ss[1], 1) psi_f_flat = psi_f_flat - psi_f_min psi_f_sum = torch.sum(psi_f_flat, dim=2).view(ss[0], ss[1], 1).expand_as(psi_f_flat) sigm_psi_f = psi_f_flat / psi_f_sum sigm_psi_f = sigm_psi_f.view(batch_size, 1, *theta_x_size[2:]) elif self.mode == 'concatenation_range_normalise': psi_f_flat = psi_f.view(batch_size, 1, -1) ss = psi_f_flat.shape psi_f_max = torch.max(psi_f_flat, dim=2)[0].view(ss[0], ss[1], 1) psi_f_min = torch.min(psi_f_flat, dim=2)[0].view(ss[0], ss[1], 1) sigm_psi_f = (psi_f_flat - psi_f_min) / ( psi_f_max - psi_f_min).expand_as(psi_f_flat) sigm_psi_f = sigm_psi_f.view(batch_size, 1, *theta_x_size[2:]) elif self.mode == 'concatenation_sigmoid': sigm_psi_f = F.sigmoid(psi_f) else: raise NotImplementedError # sigm_psi_f is attention map! upsample the attentions and multiply sigm_psi_f = F.upsample(sigm_psi_f, size=input_size[2:], mode=self.upsample_mode) y = sigm_psi_f.expand_as(x) * x W_y = self.W(y) return W_y, sigm_psi_f
import torch.onnx from torch import nn from torch import optim import torchvision from torchvision import datasets, transforms class ConvNet(nn.Module): def __init__(self): super(ConvNet, self).__init__() self.conv1 = nn.Conv2d(1, 10, kernel_size=5, padding=2) def forward(self, x): x = F.max_pool2d(self.conv1(x), 2) x = F.upsample(x, scale_factor=2, mode='nearest') return x model = ConvNet() model = model.to('cpu') dummy_input = Tensor(1, 1, 28, 28) torch.onnx.export( model, dummy_input, "torch_conv_simple_upsample.proto", verbose=True) inp = Tensor([1, 2, 3, 4]).reshape(1, 1, 2, 2) print(inp) print(F.upsample(inp, scale_factor=2))
def _upsample(self, x, y, scale=1): _, _, H, W = y.size() return F.upsample(x, size=(H // scale, W // scale), mode='bilinear')
def forward(self, input_images, N, R, L, S, targets, smooth_L_with_N, compute_loss_R, compute_loss_S): # GPU device_id = N.get_device() total_loss = Variable(torch.zeros(1).type( torch.FloatTensor)).cuda(device_id) #### GroundTruth #### # gt_R = Variable(targets['gt_R'].float().cuda(device_id), requires_grad=False) # gt_S = Variable(targets['gt_S'].float().cuda(device_id), requires_grad=False) # rgb_img = Variable(targets['rgb_img'].float().cuda(device_id), requires_grad=False) # mask = Variable(torch.min(targets['mask'].float().cuda(device_id), dim=1, keepdim=True)[0], requires_grad=False) gt_R = targets['gt_R'] gt_S = targets['gt_S'] rgb_img = targets['rgb_img'] mask = torch.min(targets['mask'], dim=1, keepdim=True)[0] # Same size size = [N.size(2), N.size(3)] gt_R = F.upsample(gt_R, size, mode='bilinear') gt_S = F.upsample(gt_S, size, mode='bilinear') gt_S_intensity = torch.mean(gt_S, dim=1, keepdim=True) # shading intensity rgb_img = F.upsample(rgb_img, size, mode='bilinear') mask = F.upsample(mask, size, mode='bilinear') mask = (mask >= 0.999).float() #### Loss function #### if compute_loss_R: mask_R = mask.repeat(1, gt_R.size(1), 1, 1) mask_img = mask.repeat(1, input_images.size(1), 1, 1) R_loss = self.w_R * self.value_R_criterion(R, gt_R, mask_R) grad_R_loss = self.w_grad_R * self.grad_R_criterion( R, gt_R, mask_R) global_R_loss = self.w_global_R * self.global_R_criterion( R, gt_R, rgb_img, mask_img) total_loss += R_loss + grad_R_loss + global_R_loss if compute_loss_S: S_intensity = S L_intensity = L mask_S_intensity = mask.repeat(1, gt_S_intensity.size(1), 1, 1) mask_L_intensity = mask.repeat(1, L_intensity.size(1), 1, 1) # L Loss smooth_mode = 1 if smooth_L_with_N else 0 smooth_L_loss = self.w_smooth_L * self.smooth_L_criterion( L_intensity, N, mask_L_intensity, smooth_mode) # S Loss S_loss = self.w_S * self.value_S_criterion( S_intensity, gt_S_intensity, mask_S_intensity) grad_S_loss = self.w_grad_S * self.grad_S_criterion( S_intensity, gt_S_intensity, mask_S_intensity) total_loss += S_loss + grad_S_loss + smooth_L_loss # visualize # if self.cnt % 30 == 0: # L_len = torch.sum(L_intensity ** 2, dim=1, keepdim=True) ** 0.5 # L_direct = L_intensity / L_len.clamp(1e-6) # V.vis.img_many({ # 'rgb_img': rgb_img.cpu().data[0, :, :, :], # 'gt_R': gt_R.cpu().data[0, :, :, :], # 'gt_S_intensity': torch.clamp(gt_S_intensity.cpu().data[0, 0, :, :], 0, 1), # # 'gt_mask': mask_img.cpu().data[0, :, :, :], # 'pred_N': ((-N[0, :, :, :] + 1.0) / 2.0).data.cpu().clamp(0, 1), # # 'pred_R': torch.clamp(R.data.cpu()[0, :, :, :], 0, 1), # 'pred_S_intensity': torch.clamp(S_intensity[0, :, :, :].data.cpu(), 0, 1), # 'pred_L_direction': ((-L_direct[0, :, :, :] + 1.0) / 2.0).data.cpu(), # 'pred_L_length': (L_len / (torch.max(L_len) + 1e-6)).data.cpu()[0, :, :, :], # }) # self.cnt = 0 # self.cnt += 1 return total_loss
def forward(self, x): x = F.max_pool2d(self.conv1(x), 2) x = F.upsample(x, scale_factor=2, mode='nearest') return x
def forward(self, x): assert x.size(2) == 160 and x.size(3) == 64, \ "Input size does not match, expected (160, 64) but got ({}, {})".format(x.size(2), x.size(3)) x = self.conv(x) # ============== Block 1 ============== # global branch x1 = self.inception1(x) x1_attn, x1_theta = self.ha1(x1) x1_out = x1 * x1_attn # local branch if self.learn_region: x1_local_list = [] for region_idx in range(4): x1_theta_i = x1_theta[:, region_idx, :] x1_theta_i = self.transform_theta(x1_theta_i, region_idx) x1_trans_i = self.stn(x, x1_theta_i) x1_trans_i = F.upsample(x1_trans_i, (24, 28), mode='bilinear', align_corners=True) x1_local_i = self.local_conv1(x1_trans_i) x1_local_list.append(x1_local_i) # ============== Block 2 ============== # Block 2 # global branch x2 = self.inception2(x1_out) x2_attn, x2_theta = self.ha2(x2) x2_out = x2 * x2_attn # local branch if self.learn_region: x2_local_list = [] for region_idx in range(4): x2_theta_i = x2_theta[:, region_idx, :] x2_theta_i = self.transform_theta(x2_theta_i, region_idx) x2_trans_i = self.stn(x1_out, x2_theta_i) x2_trans_i = F.upsample(x2_trans_i, (12, 14), mode='bilinear', align_corners=True) x2_local_i = x2_trans_i + x1_local_list[region_idx] x2_local_i = self.local_conv2(x2_local_i) x2_local_list.append(x2_local_i) # ============== Block 3 ============== # Block 3 # global branch x3 = self.inception3(x2_out) x3_attn, x3_theta = self.ha3(x3) x3_out = x3 * x3_attn # local branch if self.learn_region: x3_local_list = [] for region_idx in range(4): x3_theta_i = x3_theta[:, region_idx, :] x3_theta_i = self.transform_theta(x3_theta_i, region_idx) x3_trans_i = self.stn(x2_out, x3_theta_i) x3_trans_i = F.upsample(x3_trans_i, (6, 7), mode='bilinear', align_corners=True) x3_local_i = x3_trans_i + x2_local_list[region_idx] x3_local_i = self.local_conv3(x3_local_i) x3_local_list.append(x3_local_i) # ============== Feature generation ============== # global branch x_global = F.avg_pool2d(x3_out, x3_out.size()[2:]).view( x3_out.size(0), x3_out.size(1)) x_global = self.fc_global(x_global) # local branch if self.learn_region: x_local_list = [] for region_idx in range(4): x_local_i = x3_local_list[region_idx] x_local_i = F.avg_pool2d(x_local_i, x_local_i.size()[2:]).view( x_local_i.size(0), -1) x_local_list.append(x_local_i) x_local = torch.cat(x_local_list, 1) x_local = self.fc_local(x_local) if not self.training: # l2 normalization before concatenation if self.learn_region: x_global = x_global / x_global.norm(p=2, dim=1, keepdim=True) x_local = x_local / x_local.norm(p=2, dim=1, keepdim=True) return torch.cat([x_global, x_local], 1) else: return x_global prelogits_global = self.classifier_global(x_global) if self.learn_region: prelogits_local = self.classifier_local(x_local) if self.loss == {'xent'}: if self.learn_region: return (prelogits_global, prelogits_local) else: return prelogits_global elif self.loss == {'xent', 'htri'}: if self.learn_region: return (prelogits_global, prelogits_local), (x_global, x_local) else: return prelogits_global, x_global else: raise KeyError("Unsupported loss: {}".format(self.loss))
def forward(self, input, adj1_target=None, adj2_source=None, adj3_transfer=None): x, low_level_features = self.xception_features(input) # print(x.size()) x1 = self.aspp1(x) x2 = self.aspp2(x) x3 = self.aspp3(x) x4 = self.aspp4(x) x5 = self.global_avg_pool(x) x5 = F.upsample(x5, size=x4.size()[2:], mode="bilinear", align_corners=True) x = torch.cat((x1, x2, x3, x4, x5), dim=1) x = self.concat_projection_conv1(x) x = self.concat_projection_bn1(x) x = self.relu(x) # print(x.size()) x = F.upsample( x, size=low_level_features.size()[2:], mode="bilinear", align_corners=True ) low_level_features = self.feature_projection_conv1(low_level_features) low_level_features = self.feature_projection_bn1(low_level_features) low_level_features = self.relu(low_level_features) # print(low_level_features.size()) # print(x.size()) x = torch.cat((x, low_level_features), dim=1) x = self.decoder(x) ### add graph # source graph source_graph = self.source_featuremap_2_graph(x) source_graph1 = self.source_graph_conv1.forward( source_graph, adj=adj2_source, relu=True ) source_graph2 = self.source_graph_conv2.forward( source_graph1, adj=adj2_source, relu=True ) source_graph3 = self.source_graph_conv2.forward( source_graph2, adj=adj2_source, relu=True ) source_2_target_graph1_v5 = self.transpose_graph.forward( source_graph1, adj=adj3_transfer, relu=True ) source_2_target_graph2_v5 = self.transpose_graph.forward( source_graph2, adj=adj3_transfer, relu=True ) source_2_target_graph3_v5 = self.transpose_graph.forward( source_graph3, adj=adj3_transfer, relu=True ) # target graph # print('x size',x.size(),adj1.size()) graph = self.target_featuremap_2_graph(x) source_2_target_graph1 = self.similarity_trans(source_graph1, graph) # graph combine 1 graph = torch.cat( ( graph, source_2_target_graph1.squeeze(0), source_2_target_graph1_v5.squeeze(0), ), dim=-1, ) graph = self.fc_graph.forward(graph, relu=True) graph = self.target_graph_conv1.forward(graph, adj=adj1_target, relu=True) source_2_target_graph2 = self.similarity_trans(source_graph2, graph) # graph combine 2 graph = torch.cat( (graph, source_2_target_graph2, source_2_target_graph2_v5), dim=-1 ) graph = self.fc_graph.forward(graph, relu=True) graph = self.target_graph_conv2.forward(graph, adj=adj1_target, relu=True) source_2_target_graph3 = self.similarity_trans(source_graph3, graph) # graph combine 3 graph = torch.cat( (graph, source_2_target_graph3, source_2_target_graph3_v5), dim=-1 ) graph = self.fc_graph.forward(graph, relu=True) graph = self.target_graph_conv3.forward(graph, adj=adj1_target, relu=True) # print(graph.size(),x.size()) graph = self.target_graph_2_fea.forward(graph, x) x = self.target_skip_conv(x) x = x + graph ### x = self.semantic(x) x = F.upsample(x, size=input.size()[2:], mode="bilinear", align_corners=True) return x
def forward(self, x, adj, flag=0): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) if flag == 0: x = self.deconv_layers(x) x = self.final_layer(x) return x elif flag == 1: self.indices = self.indices.type(torch.cuda.LongTensor) hp = self.deconv_layers(x) hp_o = self.final_layer(hp) hp = F.upsample(hp_o, size=[12, 9], mode='bilinear') hp = torch.index_select(hp, 1, self.indices) x = self.embedding_layer(x) feature = torch.mul(hp.unsqueeze(dim=2), x.unsqueeze(dim=1)) # Generate the mask and align the feature align_feature = torch.zeros(hp.size(0), hp.size(1), 9 * 256) for b in range(len(hp)): for i in range(len(hp[0])): mask = torch.zeros(hp.size(2), hp.size(3)) temp = hp[b][i] y_values, ys = temp.max(dim=0) x_values, xc = y_values.max(dim=0) # xc = x cordinate y = ys[xc] mask[y][xc] = 1 for n in range(0, 2): y_l = y + n y_s = y - n xc_l = xc + n xc_s = xc - n if y_l >= hp.size(2): y_l = hp.size(2) - 1 if y_s < 0: y_s = 0 if xc_l >= hp.size(3): xc_l = hp.size(3) - 1 if xc_s < 0: xc_s = 0 mask[y_l][xc_l] = 1 mask[y_s][xc_s] = 1 mask[y][xc_l] = 1 mask[y][xc_s] = 1 mask[y_l][xc] = 1 mask[y_s][xc] = 1 mask[y_s][xc_l] = 1 mask[y_l][xc_s] = 1 mask = mask.type(torch.cuda.ByteTensor) temp_feature = torch.masked_select(feature[b][i], mask) cnt = mask.sum() if cnt < 9: mean = torch.mean(temp_feature).repeat(256) while cnt < 9: temp_feature = torch.cat((temp_feature, mean), dim=0) cnt = cnt + 1 align_feature[b][i] = temp_feature align_feature = align_feature.type(torch.cuda.FloatTensor) x_part = F.dropout(F.relu(self.graph_layer1(align_feature, adj))) x_part = self.graph_layer2(x_part, adj) x_part = self.fc_feature_align(x_part.view(x_part.size(0), -1)) x = x.view(x.size(0), -1) x = self.fc_feature(x) x = 0.1 * x + 0.9 * x_part if not self.feature_flag: x = self.classification(x) return x else: x1 = self.deconv_layers(x) x1 = self.final_layer(x1) x_pose = self.pose_conv(x1) x2 = self.embedding_layer(x) x2 = x2 * x_pose x2 = x2.view(x2.size(0), -1) x2 = self.fc_feature(x2) if not self.feature_flag: x2 = self.classification(x2) return x1, x2
val = q[:,index] grad = torch.autograd.grad(val, x)[0] grad = grad.data.cpu().numpy()[0] #for the first one in teh batch -> [2,84,84] grad = np.abs(grad) #[3,480,640] # print (grad.shape) grad = np.rollaxis(grad, 1, 0) grad = np.rollaxis(grad, 2, 1) grad = np.mean(grad, 2) #[480,640] #Get mask x = Variable(torch.from_numpy(np.array([preprocess(frame)])).float()).cuda() # print (x.size()) #[1,3,480,640] lowres = MP.predict_lowres(x) #[1,1,480,640] highdim = F.upsample(input=lowres, size=(480,640), mode='bilinear') # masked_frame = x * mask # masked_frame = x * mask + (1.-mask)* F.sigmoid(MP.bias_frame.bias_frame) masked_frame = highdim #Get grad of masked # x = Variable(torch.from_numpy(np.array([masked_frame)])).float(), requires_grad=True).cuda() # print (x.size()) #[1,3,480,640] q_m = DQNs[0](masked_frame) #[1,A] m, index = torch.max(q_m, 1) val = q_m[:,index] grad_m = torch.autograd.grad(val, masked_frame)[0] grad_m = grad_m.data.cpu().numpy()[0] #for the first one in teh batch -> [2,84,84]
def forward(self,Images,ROI,EvalMode=False): #------------------------------- Convert from numpy to pytorch------------------------------------------------------- InpImages = torch.autograd.Variable(torch.from_numpy(Images), requires_grad=False).transpose(2,3).transpose(1, 2).type(torch.FloatTensor) ROImap = torch.autograd.Variable(torch.from_numpy(ROI.astype(np.float)), requires_grad=False,volatile=EvalMode).unsqueeze(dim=1).type(torch.FloatTensor) if self.UseGPU == True: # Convert to GPU InpImages = InpImages.cuda() ROImap = ROImap.cuda() # -------------------------Normalize image------------------------------------------------------------------------------------------------------- RGBMean = [123.68, 116.779, 103.939] RGBStd = [65, 65, 65] for i in range(len(RGBMean)): InpImages[:, i, :, :]=(InpImages[:, i, :, :]-RGBMean[i])/RGBStd[i] # Normalize image by std and mean #================================================================================================================================================== #============================Run net layers=================================================================================================== nValve = 0 # counter of attention layers used x=InpImages x = self.Net.conv1(x) # First resnet convulotion layer #----------------Apply Attention layers-------------------------------------------------------------------------------------------------- #F.upsample(ROImap.repeat(1,x.shape[1],1,1), mode='bilinear') # AttentionMap = self.Valve[nValve](F.upsample(ROImap, size=x.shape[2:4], mode='bilinear')) # BiasMap = self.BiasValve[nValve](F.upsample(ROImap, size=x.shape[2:4], mode='bilinear')) # nValve += 1 # ---------------------First resnet block----------------------------------------------------------------------------------------------- x = self.Net.bn1(x) x = self.Net.relu(x) x = self.Net.maxpool(x) x = self.Net.layer1(x) # ----------------Apply Attention layer-------------------------------------------------------------------------------------------------- # AttentionMap = self.Valve[nValve](F.upsample(ROImap, size=x.shape[2:4], mode='bilinear')) # BiasMap = self.BiasValve[nValve](F.upsample(ROImap, size=x.shape[2:4], mode='bilinear')) # x = x * AttentionMap + BiasMap # nValve += 1 # --------------------Second Resnet 50 Block------------------------------------------------------------------------------------------------ x = self.Net.layer2(x) # ----------------Attention-------------------------------------------------------------------------------------------------- # AttentionMap = self.Valve[nValve](F.upsample(ROImap, size=x.shape[2:4], mode='bilinear')) # BiasMap = self.BiasValve[nValve](F.upsample(ROImap, size=x.shape[2:4], mode='bilinear')) # x = x * AttentionMap + BiasMap # nValve += 1 # -------------------Third resnet 50 block------------------------------------------------------------------------------------------------- x = self.Net.layer3(x) x = x * F.upsample(ROImap.repeat(1,x.shape[1],1,1), size=x.shape[2:4], mode='bilinear') # ---------------Apply Attention layer-------------------------------------------------------------------------------------------------- # AttentionMap = self.Valve[nValve](F.upsample(ROImap, size=x.shape[2:4], mode='bilinear')) # BiasMap = self.BiasValve[nValve](F.upsample(ROImap, size=x.shape[2:4], mode='bilinear')) # x = x * AttentionMap + BiasMap # nValve += 1 # -----------------Resnet 50 block 4--------------------------------------------------------------------------------------------------- x = self.Net.layer4(x) # ----------------Apply Attention layer-------------------------------------------------------------------------------------------------- # AttentionMap = self.Valve[nValve](F.upsample(ROImap, size=x.shape[2:4], mode='bilinear')) # BiasMap = self.BiasValve[nValve](F.upsample(ROImap, size=x.shape[2:4], mode='bilinear')) # x = x * AttentionMap + BiasMap # nValve += 1 # ------------Fully connected final vector-------------------------------------------------------------------------------------------------------- x = torch.mean(torch.mean(x, dim=2), dim=2) #x = x.squeeze() x = self.Net.fc(x) #--------------------------------------------------------------------------------------------------------------------------- ProbVec = F.softmax(x,dim=1) # Probability vector for all classes Prob,Pred=ProbVec.max(dim=1) # Top predicted class and probability return ProbVec,Pred
def forward(self, x): size = 2 * x.size(2), 2 * x.size(3) f = F.upsample(x, size=size, mode='bilinear') return self.conv(f)
def main(): # use the gpu or cpu as specificed device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device_ids = None if args.gpu_ids is None: if torch.cuda.is_available(): device_ids = list(range(torch.cuda.device_count())) else: device_ids = args.gpu_ids device = torch.device("cuda:{}".format(device_ids[0])) # set save dir ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) save_dir = os.path.join(ROOT_DIR, 'evaluate_' + args.evaluate_name) os.makedirs(save_dir, exist_ok=True) # check model path model_path = os.path.join('logs', args.model_name, 'checkpoint.pth') if not os.path.exists(model_path): print('model path {} is not exists.'.format(model_path)) sys.exit(1) # build the model model = DFN(in_channels=3, add_fc=False, self_attention=True) model = nn.DataParallel(model, device_ids=device_ids) # loading checkpoint print("=> loading checkpoint '{}'".format(model_path)) checkpoint = torch.load(model_path, map_location='cpu') model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( model_path, checkpoint['epoch'])) test_ds = get_helen_test_data( ['hair'], aug_setting_name='aug_512_0.6_multi_person') # ------ begin evaluate batch_time = AverageMeter() acc_hist_all = Acc_score(['hair']) acc_hist_single = Acc_score(['hair']) mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() batch_index = 0 batch = None labels = None image_names = [] data_len = len(test_ds.image_ids) for idx, image_id in enumerate(test_ds.image_ids): if batch_index == 0: batch = np.zeros((args.batch_size, 512, 512, 3)) labels = np.zeros((args.batch_size, 512, 512)) image = test_ds.load_image(image_id) batch[batch_index] = (image / 255 - mean) / std labels[batch_index] = test_ds.load_labels(image_id) image_names.append( os.path.basename(test_ds[image_id]['image_path'])[:-4]) batch_index = batch_index + 1 if batch_index < args.batch_size and idx != data_len - 1: continue batch_index = 0 input = batch.transpose((0, 3, 1, 2)) input, target = torch.from_numpy(input).to( torch.float).to(device), torch.from_numpy(labels).to( torch.long).to(device) # get and deal with output output = model(input) if type(output) == list: output = output[0] if output.size()[-1] < target.size()[-1]: output = F.upsample( output, size=target.size()[-2:], mode='bilinear') target = target.cpu().detach().numpy() pred = torch.argmax(output, dim=1).cpu().detach().numpy() acc_hist_all.collect(target, pred) acc_hist_single.collect(target, pred) f1_result = acc_hist_single.get_f1_results()['hair'] input_images = unmold_input(batch) for b in range(input_images.shape[0]): print(input_images[b].shape, target[b].shape) gt_blended = blend_labels(input_images[b], target[b]) predict_blended = blend_labels(input_images[b], pred[b]) fig, axes = plt.subplots(ncols=2) axes[0].imshow(predict_blended) axes[0].set(title=f'predict:%04f' % (f1_result)) axes[1].imshow(gt_blended) axes[1].set(title='ground-truth') if args.save: save_path = os.path.join(save_dir, f'%04f_%s.png' % (f1_result, image_names[b])) plt.savefig(save_path) else: plt.show() plt.close(fig) acc_hist_single.reset() batch_time.update(time.time() - end) end = time.time() image_names = [] f1_result = acc_hist_all.get_f1_results()['hair'] print('Valiation: [{0}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Acc of f-score [{1}]'.format( len(test_ds), f1_result, batch_time=batch_time))
def IOG_getmask(bgpoint, cppoint, image, net, newpoint, switch, distancemap_old): with torch.no_grad(): # Main Testing Loop if newpoint == -1: print('first loop') new_x = -1 new_y = -1 else: if switch == 0: print('add new point on the cp') new_x = newpoint[0] #please add new point on the ori_image new_y = newpoint[1] #please add new point on the ori_image elif switch == 1: print('add new point on the bg') new_x = newpoint[0] #please add new point on the ori_image new_y = newpoint[1] #please add new point on the ori_image else: print('forget to choose switch') #switch = True #if the new point is on the object: true, if the new point is on the bg: false, device = 'cuda' cx = cppoint[0] cy = cppoint[1] bgx = bgpoint[0] bgy = bgpoint[1] bgyw = bgpoint[2] - bgx bgyh = bgpoint[3] - bgy w, h, channel = image.shape bg = getbg(bgy, bgx, bgyh, bgyw, w, h) cp = getcp(cx, cy, w, h) crop_image = crop_from_mask(image, bg, relax=30, zero_pad=True) crop_bg = crop_from_mask(bg, bg, relax=30, zero_pad=True) crop_cp = crop_from_mask(cp, bg, relax=30, zero_pad=True) crop_image = fixed_resize(crop_image, (512, 512)) crop_bg = fixed_resize(crop_bg, (512, 512)) crop_cp = fixed_resize(crop_cp, (512, 512)) distancemap = get_distancemap(sigma=10, elem=crop_bg, elem_cp=crop_cp, pad_pixel=10) ############ distancemap = totensor(distancemap) crop_image = totensor(crop_image) distancemap = distancemap.float() crop_image = crop_image.float() inputs = torch.cat([crop_image, distancemap], 1) inputs = inputs.to(device) if new_x > -1: print('add newpoint:', new_x, new_y) newpoint = getcp(new_x, new_y, w, h) crop_newpoint = crop_from_mask(newpoint, bg, relax=30, zero_pad=True) newpoint = fixed_resize(crop_newpoint, (512, 512)) distancemap_mid = new_distancemap(distancemap_old, newpoint, 10, switch) ############ distancemap_mid = distancemap_mid.float() else: distancemap_mid = distancemap distancemap_mid = distancemap_mid.to(device) refine = net.forward(inputs, distancemap_mid) output_refine = upsample(refine, size=(512, 512), mode='bilinear', align_corners=True) #generate result jj = 0 outputs = output_refine.to(torch.device('cpu')) pred = np.transpose(outputs.data.numpy()[jj, :, :, :], (1, 2, 0)) pred = 1 / (1 + np.exp(-pred)) pred = np.squeeze(pred) gt = bg bbox = get_bbox(gt, pad=30, zero_pad=True) result = crop2fullmask(pred, bbox, gt, zero_pad=True, relax=0, mask_relax=False) resultmax, resultmin = result.max(), result.min() result = (result - resultmin) / (resultmax - resultmin) result = (result > 0.3) * 255 sm.imsave('resultloop.png', result) return result, distancemap_mid
def _upsample_add(x, y): _, _, H, W = y.size() return F.upsample(x, size=(H, W), mode='bilinear') + y
else: #concat out = torch.cat((x5, bx5), 1) out = self.combine(out) out = self.up1(before_pool4_resize, bxbefore_pool4, out) # 16, 16,128 out = self.up2(before_pool3_resize, bxbefore_pool3, out) # 32, 32,64 out = self.up3(before_pool2_resize, bxbefore_pool2, out) # 64, 64,32 out = self.up4(before_pool1_resize, bxbefore_pool1, out) # 128, 128,1 return out if __name__ == "__main__": """ testing """ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #device = torch.device('cuda:1') x = Variable(torch.FloatTensor(np.random.random((1, 1, 2560, 128))), requires_grad=True).to(device) img = Variable(torch.FloatTensor(np.random.random((1, 1, 128, 128))), requires_grad=True).to(device) model = YNet(in_channels=1, merge_mode='concat').to(device) out = model(x, img) out = F.upsample(out, (128, 128), mode='bilinear') loss = torch.mean(out) loss.backward() print(loss)
def main(): logger.add( f"test_HardMseg_Clinic.log", rotation="10 MB", ) dev = "cpu" # dev = "cuda" img_paths = glob.glob("clinic_test/images/*") mask_paths = glob.glob("clinic_test/masks/*") img_paths.sort() mask_paths.sort() import network.models as models img_size = (352, 352) arch_path = [ ("UNet", "weights/unet_99.pth"), ("PraNet", "weights/pranet-19.pth"), ("SCWSRCCANet", "weights/scws_rcca_178.pth"), ] numpy_vertical = [] import matplotlib.pyplot as plt c = -1 for (arch, model_path) in arch_path: c += 1 model = models.__dict__[arch]() if dev == "cpu": model.cpu() else: model.cuda() model.eval() logger.info(f"Loading from {model_path}") device = torch.device(dev) try: # model.load_state_dict(torch.load(model_path)["model_state_dict"]) model.load_state_dict( torch.load(model_path, map_location=device)["model_state_dict"]) except: # model.load_state_dict(torch.load(model_path)) model.load_state_dict(torch.load(model_path, map_location=device)) mask_img_gt = [] soft_ress = [] ress = [] mask_img_gt_pr = [] imgs = [] mean_dices = [] mean_precisions = [] mean_recalls = [] mean_ious = [] cc = -1 for img_path, mask_path in zip(img_paths, mask_paths): cc += 1 image_ = imread(img_path) # h, w , 3 (0-255), numpy if os.path.exists(mask_path): mask = np.array(Image.open(mask_path).convert("L")) else: print("not exist") mask = np.zeros(image_.shape[:2], dtype=np.float64) image = cv2.resize(image_, img_size) image = image.astype("float32") / 255 image = image.transpose((2, 0, 1)) image = image[:, :, :, np.newaxis] image = image.transpose((3, 0, 1, 2)) mask = mask.astype("float32") image, gt, filename, img = ( np.asarray(image), np.asarray(mask), os.path.basename(img_path), np.asarray(image_), ) name = os.path.splitext(filename)[0] ext = os.path.splitext(filename)[1] gt = np.asarray(gt, np.float32) gt /= gt.max() + 1e-8 res2 = 0 # image = torch.tensor(image).float().cuda() if dev == "cpu": image = torch.tensor(image).float() else: image = torch.tensor(image).float().cuda() # image = image.cpu() if arch == "UNet": res2 = model(image) else: res5, res4, res3, res2 = model(image) res = res2 res = F.upsample(res, size=gt.shape, mode="bilinear", align_corners=False) res = res.sigmoid().data.cpu().numpy().squeeze() res = (res - res.min()) / (res.max() - res.min() + 1e-8) pr = res.round() tp = np.sum(gt * pr) fp = np.sum(pr) - tp fn = np.sum(gt) - tp mean_precision = precision_m(gt, pr) mean_recall = recall_m(gt, pr) mean_iou = jaccard_m(gt, pr) mean_dice = dice_m(gt, pr) mean_F2 = (5 * precision_m(gt, pr) * recall_m(gt, pr)) / ( 4 * precision_m(gt, pr) + recall_m(gt, pr)) # mean_acc += (tp+tn)/(tp+tn+fp+fn) logger.info("scores ver1: {:.3f} {:.3f} {:.3f} {:.3f}".format( mean_iou, mean_precision, mean_recall, mean_dice # , mean_F2 )) mean_ious.append(mean_iou) mean_precisions.append(mean_precision) mean_recalls.append(mean_recall) mean_dices.append(mean_dice) precision_all = tp / (tp + fp + 1e-07) recall_all = tp / (tp + fn + 1e-07) dice_all = 2 * precision_all * recall_all / (precision_all + recall_all) iou_all = ( recall_all * precision_all / (recall_all + precision_all - recall_all * precision_all)) logger.info("scores ver2: {:.3f} {:.3f} {:.3f} {:.3f}".format( iou_all, precision_all, recall_all, dice_all)) overwrite = True vis_x = 200 visualize_dir = "outputs" ##### HARD PR ress.append(res.round() * 255) save_img( os.path.join( visualize_dir, "PR_" + str(arch), name + "_hard_pr" + str(arch) + ext, ), res.round() * 255, "cv2", overwrite, ) mask_img = np.asarray(img) + vis_x * np.array( (gt, np.zeros_like(gt), np.zeros_like(gt))).transpose( (1, 2, 0)) mask_img = mask_img[:, :, ::-1] ##### HARD GT mask_img_gt.append(mask_img) save_img( os.path.join( visualize_dir, "GT_" + str(arch), name + "_hard_gt" + str(arch) + ext, ), mask_img.round(), "cv2", overwrite, ) ##### SOFT PR soft_ress.append(res * 255) save_img( os.path.join( visualize_dir, "PR_" + str(arch), name + "_soft_pr" + str(arch) + ext, ), res * 255, "cv2", overwrite, ) mask_img = ( np.asarray(img) + vis_x * np.array(( np.zeros_like(res.round()), res.round(), np.zeros_like(res.round()), )).transpose((1, 2, 0)) + vis_x * np.array( (gt, np.zeros_like(gt), np.zeros_like(gt)) # (gt, gt, np.zeros_like(gt)) ).transpose((1, 2, 0))) mask_img = mask_img[:, :, ::-1] ##### MASK GT_PR mask_img_gt_pr.append(mask_img) save_img( os.path.join( visualize_dir, "GT_PR_" + str(arch), name + str(arch) + ext, ), mask_img, "cv2", overwrite, )
def train(epoch, file_obj, acc): print("\n Epoch : %d" % epoch) id_net.train() for batch_idx, (inputs, img_path, ids, att) in enumerate(trainloader): inputs = Variable(inputs.cuda(), volatile=True) with torch.no_grad(): loc_preds, cls_preds = net(inputs) boxes = [] for box_counter in range(inputs.size(0)): box, label, score = coder.decode(loc_preds[box_counter].data.cpu(), cls_preds[box_counter].data.cpu(), (224, 224)) if box.size(0) == 1: boxes.append([float(x) for x in box[0]]) continue tmp_box = [0, 0, 0, 0] for box_loop in box: ###shape should be 224!!!!! select_box = [float(x) for x in box_loop] cond1 = abs(select_box[0] + select_box[2] / 2 - 112) < abs(tmp_box[0] + tmp_box[2] / 2 - 112) cond2 = abs(select_box[1] + select_box[3] / 2 - 112) < abs(tmp_box[1] + tmp_box[3] / 2 - 112) if cond1 and cond2: tmp_box = select_box boxes.append(tmp_box) img_input = torch.zeros(inputs.size(0), 3, 150, 150) for img_counter in range(inputs.size(0)): face_box = boxes[img_counter] face_box = [int(x) for x in face_box] face_box[0] = max(face_box[0], 0) face_box[1] = max(face_box[1], 0) face_box[2] = min(face_box[2], inputs.size(2)) face_box[3] = min(face_box[3], inputs.size(2)) height = face_box[3] - face_box[1] width = face_box[2] - face_box[0] sampled = F.upsample( inputs[img_counter, :, face_box[0]:face_box[2], face_box[1]:face_box[3]].view(1, 3, width, height), size=(112, 112), mode='bilinear') att_sampled = F.upsample(att[img_counter, face_box[0]:face_box[2], face_box[1]:face_box[3]].view( 1, 1, width, height), size=(112, 112), mode='bilinear') grid = torchvision.utils.make_grid(sampled * att_sampled.cuda()) torchvision.utils.save_image( grid, './train/' + img_path[img_counter].split('/')[-1]) print(img_counter) #img_input[img_counter, :,:,:] = sampled*att_sampled.cuda() return 0 while 0: inputs = Variable(img_input.cuda()) optimizer.zero_grad() id_net_out = id_net(inputs) id_out_shape = id_net_out.size() target = torch.tensor(ids).view(id_out_shape[0]).cuda() #output = MCP(id_net_out, target) loss = criterion(id_net_out, target) loss.backward() optimizer.step() file_obj.write('epoch|' + str(epoch) + '|loss|' + str(loss) + '' + '\n') print('epoch: %d | train_loss: %.3f |' % (epoch, loss)) ################################################ ################################################ ################################################ if batch_idx % 20 == 0: estimate = 0 for batch_idx, (inputs, img_path, ids, att) in enumerate(trainloader_valid): inputs = Variable(inputs.cuda(), volatile=True) with torch.no_grad(): loc_preds, cls_preds = net(inputs) boxes = [] for box_counter in range(inputs.size(0)): #try: box, label, score = coder.decode( loc_preds[box_counter].data.cpu(), cls_preds[box_counter].data.cpu(), (224, 224)) if box.size(0) == 1: boxes.append([float(x) for x in box[0]]) continue tmp_box = [0, 0, 0, 0] for box_loop in box: ###shape should be 224!!!!! select_box = [float(x) for x in box_loop] cond1 = abs(select_box[0] + select_box[2] / 2 - 112) < abs(tmp_box[0] + tmp_box[2] / 2 - 112) cond2 = abs(select_box[1] + select_box[3] / 2 - 112) < abs(tmp_box[1] + tmp_box[3] / 2 - 112) if cond1 and cond2: tmp_box = select_box boxes.append(tmp_box) img_input = torch.zeros(inputs.size(0), 3, 150, 150) for img_counter in range(inputs.size(0)): face_box = boxes[img_counter] face_box = [int(x) for x in face_box] face_box[0] = max(face_box[0], 0) face_box[1] = max(face_box[1], 0) face_box[2] = min(face_box[2], inputs.size(2)) face_box[3] = min(face_box[3], inputs.size(2)) height = face_box[3] - face_box[1] width = face_box[2] - face_box[0] sampled = F.upsample(inputs[img_counter, :, face_box[0]:face_box[2], face_box[1]:face_box[3]].view( 1, 3, width, height), size=(112, 112), mode='bilinear') att_sampled = F.upsample( att[img_counter, face_box[0]:face_box[2], face_box[1]:face_box[3]].view(1, 1, width, height), size=(112, 112), mode='bilinear') #img_input[img_counter, :,:,:] = sampled*att_sampled.cuda() inputs = Variable(img_input.cuda()) with torch.no_grad(): id_net_out = id_net(inputs) _, estimate_id = torch.max(id_net_out, dim=1) estimate += sum(torch.eq(estimate_id, torch.tensor(ids).cuda())) acc_tmp = float(estimate) / 100 print('----------acc:', acc_tmp) if acc_tmp > acc and acc > 0: acc = acc_tmp save_model( id_net, 'arcface_id_net_occ-softmax3000-acc' + str(acc) + '.pth') return acc
def forward(self, up, down): refimg_fea = self.feature_extraction(up) # reference image feature targetimg_fea = self.feature_extraction(down) # target image feature # matching cost = Variable( torch.FloatTensor(refimg_fea.size()[0], refimg_fea.size()[1] * 2, self.maxdisp / 4 * 3, refimg_fea.size()[2], refimg_fea.size()[3]).zero_()).cuda() for i in range(self.maxdisp / 4 * 3): if i > 0: cost[:, :refimg_fea.size()[1], i, :, :] = refimg_fea[:, :, :, :] cost[:, refimg_fea.size()[1]:, i, :, :] = shift_down[:, :, :, :] shift_down = self.forF(shift_down) else: cost[:, :refimg_fea.size()[1], i, :, :] = refimg_fea cost[:, refimg_fea.size()[1]:, i, :, :] = targetimg_fea shift_down = self.forF(targetimg_fea) cost = cost.contiguous() cost0 = self.dres0(cost) cost0 = self.dres1(cost0) + cost0 out1, pre1, post1 = self.dres2(cost0, None, None) out1 = out1 + cost0 out2, pre2, post2 = self.dres3(out1, pre1, post1) out2 = out2 + cost0 out3, pre3, post3 = self.dres4(out2, pre1, post2) out3 = out3 + cost0 cost1 = self.classif1(out1) cost2 = self.classif2(out2) + cost1 cost3 = self.classif3(out3) + cost2 if self.training: cost1 = F.upsample( cost1, [self.maxdisp * 3, up.size()[2], up.size()[3]], mode='trilinear' ) cost2 = F.upsample( cost2, [self.maxdisp * 3, up.size()[2], up.size()[3]], mode='trilinear') cost1 = torch.squeeze(cost1, 1) pred1 = F.softmax(cost1, dim=1) pred1 = disparityregression_sub3(self.maxdisp)(pred1) cost2 = torch.squeeze(cost2, 1) pred2 = F.softmax(cost2, dim=1) pred2 = disparityregression_sub3(self.maxdisp)(pred2) cost3 = F.upsample( cost3, [self.maxdisp * 3, up.size()[2], up.size()[3]], mode='trilinear') cost3 = torch.squeeze(cost3, 1) pred3 = F.softmax(cost3, dim=1) pred3 = disparityregression_sub3(self.maxdisp)(pred3) if self.training: return pred1, pred2, pred3 else: return pred3