def evaluate(self, sample, model_out): # Calculate downstream FC + MLP loss/accuracies results = super().evaluate(sample, model_out, multi_out=True) inp_img = sample[0] out_img, mu, logvar = [tmp_out.float() for tmp_out in model_out[1]] out_res = out_img.shape[2] # Reconstruction loss target_img = F.interpolate(inp_img.float(), size=out_res) MSE = F.mse_loss(target_img, out_img) # KL divergence if self.lmd > 0: KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) else: KLD = torch.Tensor([0])[0] vae_loss = MSE + self.lmd * KLD loss = vae_loss + results['loss_downstream'] results['loss_vae'] = vae_loss results['loss_mse'] = MSE results['loss_kld'] = KLD return loss, results
def forward(self, x, alpha, steps): """Upsample 2x and pass to next progressive layer. Look into Table 2 and figure 2 diagram for building generator model. Args: x: alpha: steps: Returns: Generated image. """ x = self.initial(x) if steps == 0: return self.initial_rgb(x) upscaled = None for step in range(steps): upscaled = F.interpolate(x, scale_factor=2, mode='nearest') x = self.progressive_blocks[step](upscaled) final_upscaled = self.rgb_layers[steps - 1](upscaled) final_output = self.rgb_layers[steps](x) return self.fade_in(alpha, final_upscaled, final_output)
def forward(self, x): x = x[::-1] # to lowest resolution first top_down_feature = None for i, feature in enumerate(x): # 首先输入的图片,进入backbone 里对应的index ,做里面的conv1*1运算. feature = self.backbone_feature_reduction[i](feature) if i == 0: top_down_feature = feature else: # 进行双现行差值,提升特征. # 吧上一次的低纬度的特征值进行bilinear差值运算,得到新的特征图. upsampled_feature = F.interpolate( top_down_feature, size=feature.size() [-2:], # 表示输出的大小. feature: channel*宽*高,所以这个size 等于feature的宽和高. mode='bilinear', align_corners=True) # 迭代过程中更新top_down_feature if i < len(x) - 1: top_down_feature = self.top_down_feature_reduction[i - 1]( feature + upsampled_feature) else: top_down_feature = feature + upsampled_feature return top_down_feature
def class_loss_and_accuracy(pred, label, is_dense=False): if is_dense: out_res = pred.shape[2] label = F.interpolate(label.float(), size=out_res)[:, 1].long() loss = ce_loss(pred, label) acc = (pred.argmax(1) == label).float().mean() return loss, acc
def forward(self, input): output1_ = self.output1(input[0]) output2_ = self.output2(input[1]) output3_ = self.output3(input[2]) up3 = F.interpolate(output3_, size=[output2_.size(2), output2_.size(3)], mode="nearest") output2 = output2_ + up3 output2 = self.merge2(output2) up2 = F.interpolate(output2, size=[output1_.size(2), output1_.size(3)], mode="nearest") output1 = output1_ + up2 output1 = self.merge1(output1) # out = [output1, output2] out = [output1, output2, output3_] return out
def forward(self, x): x = self.input_conv(x) h = self.down_sample(x) h = self.inner(h) h = self.output_conv(h) h = F.interpolate(h, size=x.shape[2:]) return h + x
def forward(self, x): x = self.cbr(x) x = self.dropout(x) x = self.classification(x) # upsampling output = F.interpolate(x, size=(self.height, self.width), mode='bilinear', align_corners=True) return output
def multi_loss_and_accuracy(pred, label, is_dense=False): if pred is None: # Return empty results return { k: torch.zeros(1) for k in [ 'loss_fc_cls', 'accuracy', 'loss_mlp_cls', 'acc_mlp_cls', 'loss_fc_3d', 'acc_fc_3d', 'loss_mlp_3d', 'acc_fc_3d', 'loss_downstream' ] } fc_pred = pred[0] mlp_pred = pred[1] r = {} predict_3d = label.ndim > 1 if is_dense: out_res = pred[0][0].shape[2] label = F.interpolate(label.float(), size=out_res) class_label = label[:, 1].long() depth_label = label[:, 2] depth_label[depth_label == 0] = depth_label.max() elif predict_3d: class_label = label[:, 0] pose_label = label[:, 1] else: class_label = label class_fc, pose_fc = fc_pred class_mlp, pose_mlp = mlp_pred r['loss_fc_cls'], r['accuracy'] = class_loss_and_accuracy( class_fc, class_label) r['loss_mlp_cls'], r['acc_mlp_cls'] = class_loss_and_accuracy( class_mlp, class_label) if is_dense: r['loss_fc_3d'], r['acc_fc_3d'], _, _ = depth_loss_and_accuracy( fc_pred[1][:, 0].float(), depth_label / 25) r['loss_mlp_3d'], r['acc_mlp_3d'], _, _ = depth_loss_and_accuracy( mlp_pred[1][:, 0].float(), depth_label / 25) elif predict_3d: r['loss_fc_3d'], r['acc_fc_3d'] = class_loss_and_accuracy( pose_fc, pose_label) r['loss_mlp_3d'], r['acc_mlp_3d'] = class_loss_and_accuracy( pose_mlp, pose_label) loss_keys = [k for k in r if 'loss' in k] r['loss_downstream'] = sum(r[k] for k in loss_keys) / len(loss_keys) return r
def forward(self, x): x = self.conv_intro(x) x = self.pool(x) x = self.block1(x) out3 = self.block2(x) out4 = self.block3(out3) out5 = self.block4(out4) p5 = self.conv5(out5) p4 = self.conv4(out4) + self.conv4_up(F.interpolate(p5, scale_factor=2)) p3 = self.conv3(out3) + self.conv3_up(F.interpolate(p4, scale_factor=2)) p6 = self.conv6(out5) p7 = self.conv7(F.relu(p6)) return p3, p4, p5, p6, p7
def forward(self, x): x = F.pad(x, (0, 1, 0, 1), mode='replicate') # cause densenet loose 1 pixel # somewhere in downscaling x = self.densenet(x) x = self.conv(x) context = self.oc_block(x) x = self.classifier(torch.cat([x, context], dim=1)) x = F.interpolate(x, scale_factor=8, mode='bilinear') return x
def forward(self, x): pyramid_poolings = [x] for avg_pool, cbr in zip(self.avg_pool_list, self.cbr_list): out = cbr(avg_pool(x)) # (512,h,w) # Deconvolution(Upsampling) out = F.interpolate(out, size=(self.height, self.width), mode="bilinear", align_corners=True) pyramid_poolings.append(out) # pyramid_poolingの4つの出力の各チャネル数は512, h:60, w:60 # PyramidPoolingの入力前と4つの出力を(N,C,H,W)のCの次元で連結 output = torch.cat(pyramid_poolings, dim=1) return output
def forward(self, x): x = x[::-1] # to lowest resolution first top_down_feature = None for i, feature in enumerate(x): feature = self.backbone_feature_reduction[i](feature) if i == 0: top_down_feature = feature else: upsampled_feature = F.interpolate(top_down_feature, size=feature.size()[-2:], mode='bilinear', align_corners=True) if i < len(x) - 1: top_down_feature = self.top_down_feature_reduction[i - 1]( feature + upsampled_feature) else: top_down_feature = feature + upsampled_feature return top_down_feature
def forward(self, x): h = self.conv1(x) # x224 -> x112 h = self.maxpool(h) # x112 -> x56 h = self.conv2(h) # x56 -> x56 h = self.conv3(h) # x56 -> x28 h = self.conv4(h) # x28 -> x14 # local branch h2 = self.conv5_2(h) h2 = F.interpolate(h2, scale_factor=(1, 2, 2), mode='trilinear') h2 = torch.cat([h, h2], dim=1) h = self.conv5(h) # x14 -> x7 h = self.tail(h) coords, heatmaps, probabilities = None, None, None if self.num_coords > 0: coords, heatmaps, probabilities = self.coord_layers(h) # if not self.training and self.ensemble_eval: # not fully supported yet # h_ens = F.avg_pool3d(h, (1, self.s_dim_in//32, self.s_dim_in//32), (1, 1, 1)) # h_ens = h_ens.view(h_ens.shape[0], h_ens.shape[1], -1) # h_ens = [self.classifier_list(h_ens[:, :, ii]) for ii in range(h_ens.shape[2])] h_ch, h_max = self.dfb_classifier_list(h2) # h_ch = self.dfb_classifier_list(h) h = self.globalpool(h) h = h.view(h.shape[0], -1) h_out = self.classifier_list(h) objects = None # if self.num_objects: # objects = [self.__getattr__('object_presence_layer_{}'.format(ii))(h) for ii in range(len(self.num_objects))] cat_obj = None # if self.num_obj_cat: # cat_obj = [self.__getattr__('objcat_presence_layer_{}'.format(ii))(h) for ii in range(len(self.num_obj_cat))] # if not self.training and self.ensemble_eval: # return h_out, h_ens, coords, heatmaps, probabilities, objects, cat_obj h_out = [h_out, h_ch, h_max] # h_out = h_ch # h_out = [out + ch + hmax for out, ch, hmax in zip(h_out, h_ch, h_max)] return h_out, coords, heatmaps, probabilities, objects, cat_obj
def forward(self, h): probabilities = torch.zeros( 0, device=h.device ) # torch.nn.ReLU(self.probability(torch.squeeze(h))) # 1. Use a 1x1 conv to get one unnormalized heatmap per location if self.temporal_interpolate > 1: h = F.interpolate(h, scale_factor=(self.temporal_interpolate, 1, 1), mode='trilinear') unnormalized_heatmaps = self.hm_conv(h) # 2. Transpose the heatmap volume to keep the temporal dimension in the volume unnormalized_heatmaps.transpose_(2, 1).transpose_(1, 0) # 3. Normalize the heatmaps heatmaps = [dsntnn.flat_softmax(uhm) for uhm in unnormalized_heatmaps] # 4. Calculate the coordinates coords = [dsntnn.dsnt(hm) for hm in heatmaps] heatmaps = torch.stack(heatmaps, 1) coords = torch.stack(coords, 1) return coords, heatmaps, probabilities
def evaluate(self, sample, model_out): depth_pred = model_out[1][0][1] label = sample[1] loss_fn = l2_loss if self.use_l2 else l1_loss # Resize ground-truth appropriately out_res = depth_pred.shape[2] label = F.interpolate(label.float(), size=out_res)[:, 2] # Remap 0 to max dist label[label == 0] = label.max() loss, d_thr, d_strict, rmse = depth_loss_and_accuracy( depth_pred[:, 0].float(), label / 25, loss_fn=loss_fn) return loss, { 'accuracy': d_thr, 'depth_loss': loss, 'rmse': rmse, 'd_strict': d_strict }
def get_and_save_feats(exp_id=None, dense=False, include_test=False): """Load a pretrained model and run through dataset to get output features.""" with gin.unlock_config(): gin.bind_parameter('augment.do_augment', False) if include_test: gin.bind_parameter('session.test_iters', -1) # Initialize session sess = session.initialize_session() model = sess['model'] loader = sess['loader'] iters = sess['iters'] task = sess['task'] if exp_id is None: exp_id = sess['restore_session'] # Restore model if exp_id is not None: path = '%s/%s/snapshot' % (paths.EXP_DIR, exp_id) print("Restoring from:", path) if not torch.cuda.is_available(): loaded = torch.load(path, map_location='cpu') else: loaded = torch.load(path) model.load_state_dict(loaded['model'], strict=False) model.cuda() model.eval() n_feats = model.backbone.out_feats print("# output features", n_feats) splits = list(loader.datasets.keys()) results = {s:{} for s in splits} idx_offsets = {s:loader.datasets[s].idxs[0] for s in splits} if not dense: all_feats = {s:np.zeros((len(loader.datasets[s]), n_feats)) for s in splits} all_labels = {s:np.zeros(len(loader.datasets[s])) for s in splits} else: all_feats, all_labels = None, None for split in splits: for _ in tqdm(range(iters[split])): # Load sample + save reference labels sample = loader.get_sample(split) idxs = np.array(sample[2]) - idx_offsets[split] if not dense: labels = np.array(sample[1].cpu()) all_labels[split][idxs] = labels else: labels = F.interpolate(sample[1].float(), size=r) all_labels[split][idxs] = np.array(labels.cpu()) if torch.cuda.is_available(): sample = [s.cuda() for s in sample] # Pass through model with torch.no_grad(): model_out = task.forward(model, sample) loss, eval_metrics = task.evaluate(sample, model_out) # Save features feats = model_out[2] all_feats[split][idxs] = np.array(feats.cpu()) # Collect results to_report = {'loss': loss.item()} for k,v in eval_metrics.items(): to_report[k] = v.item() for k,v in to_report.items(): if not k in results[split]: results[split][k] = [] results[split][k] += [v] # Print out results (sanity check that loaded model is good) for k in results[split]: print(k, np.array(results[split][k]).mean()) if exp_id is not None: # Save features + labels torch.save({'train_feats': all_feats['train'].astype(np.float16), 'valid_feats': all_feats['valid'].astype(np.float16), 'train_labels': all_labels['train'].astype(np.uint8), 'valid_labels': all_labels['valid'].astype(np.uint8), 'idx_offsets': idx_offsets,}, '%s/%s/network_output.pt' % (paths.EXP_DIR, exp_id)) return all_feats, all_labels
def forward(self, x, y): if self.lateral: y = self.lateral_conv(y) if self.interpolate: y = F.interpolate(y, scale_factor=self.y_stride // self.x_stride) return x + self.fuse_out(y)