def forward(self, x): x = self.conv1(x) for l, l2, l3 in zip(self.layers, self.layers2, self.layers3): x = l3(l2(l(x))) x = F.adaptive_max_pool2d(x, 1) x = x.view(x.size(0), -1) return F.log_softmax(self.out(x), dim=-1)
def resize2dmask(mask, size): with torch.no_grad(): o_in = torch.zeros(mask.size()).cuda() o_out = torch.zeros(size).cuda() t = (o_out - F.adaptive_max_pool2d(Variable(o_in - mask), size)).data # t = (F.adaptive_avg_pool2d(Variable(img), size)).data return t
def pa_max_pool(in_dict): """Implement `local max pooling` as `masking + global max pooling`. Args: feat: pytorch tensor, with shape [N, C, H, W] mask: pytorch tensor, with shape [N, pC, pH, pW] Returns: feat_list: a list (length = pC) of pytorch tensors with shape [N, C, 1, 1] visible: pytorch tensor with shape [N, pC] NOTE: The implementation of `masking + global max pooling` is only equivalent to `local max pooling` when feature values are non-negative, which holds for ResNet that has ReLU as final operation of all blocks. """ feat = in_dict['feat'] mask = in_dict['pap_mask'] N, C, H, W = feat.size() N, pC, pH, pW = mask.size() feat_list = [] for i in range(pC): # [N, C, pH, pW] m = mask[:, i, :, :].unsqueeze(1).expand_as(feat) # [N, C] local_feat = F.adaptive_max_pool2d(feat * m, 1) # local_feat = F.adaptive_max_pool2d(feat * m, 1).view(N, -1) feat_list.append(local_feat) # [N, pC] visible = (mask.sum(-1).sum(-1) != 0).float() out_dict = {'feat_list': feat_list, 'visible': visible} return out_dict
def roi_pooling(input, rois, size=(7, 7), spatial_scale=1.0): assert (rois.dim() == 2) assert (rois.size(1) == 5) output = [] rois = rois.data.float() num_rois = rois.size(0) rois[:, 1:].mul_(spatial_scale) rois = rois.long() for i in range(num_rois): roi = rois[i] im_idx = roi[0] if roi[1] >= input.size(3) or roi[2] >= input.size( 2) or roi[1] < 0 or roi[2] < 0: # print(f"Runtime Warning: roi top left corner out of range: {roi}", file=sys.stderr) roi[1] = torch.clamp(roi[1], 0, input.size(3) - 1) roi[2] = torch.clamp(roi[2], 0, input.size(2) - 1) if roi[3] >= input.size(3) or roi[4] >= input.size( 2) or roi[3] < 0 or roi[4] < 0: # print(f"Runtime Warning: roi bottom right corner out of range: {roi}", file=sys.stderr) roi[3] = torch.clamp(roi[3], 0, input.size(3) - 1) roi[4] = torch.clamp(roi[4], 0, input.size(2) - 1) if (roi[3:5] - roi[1:3] < 0).any(): # print(f"Runtime Warning: invalid roi: {roi}", file=sys.stderr) im = input.new_full((1, input.size(1), 1, 1), 0) else: im = input.narrow(0, im_idx, 1)[..., roi[2]:(roi[4] + 1), roi[1]:(roi[3] + 1)] output.append(F.adaptive_max_pool2d(im, size)) return torch.cat(output, 0)
def forward(self, x, ROI, roi_layer): roiX = self.layer1(x) x_batch = [] for k in range(batch_size): x_set = [] for j in range(roi_layer): for i in range(self.count): y1, y2, x1, x2 = ROI[k][i].squeeze(0) y1 = int(y1.float() * 0.9) y2 = int(y2.float() * 0.9) x2 = int(x2.float() * 0.9) x1 = int(x1.float() * 0.9) image = F.adaptive_max_pool2d(roiX[k, j][y1:y2, x1:x2].unsqueeze(0), output_size=(125, 125)) x_set.append(image.cpu().detach().numpy()) x_batch = x_set + x_batch x_batch = torch.FloatTensor(x_batch).cuda().squeeze(1) x_batch = x_batch.view(batch_size, 15, 125, 125) #得到roi_layer*count张图 都是一张图的特征图 要转成tensor norX = self.layer2(x) x = torch.cat((norX, x_batch), 1) x = self.layer3(x) x = self.layer4(x) print(x.shape) x = x.view(x.size(0), -1) x = self.fc(x) return F.softmax(x, dim=1)
def forward(self, feature): x = func.adaptive_max_pool2d(feature, (self.R, self.R)) x = x.view(x.size(0), -1) if self.norm: x = func.normalize(x) return x
def forward(self, x): if len(x.shape) == 5: x = x.squeeze(1) x = self.base(x) if not self.training and self.out_base_feat: if self.lmp == 0: out = self.avgpool(x).view(x.size(0), -1) elif self.lmp >= 1: out = F.adaptive_max_pool2d(x, output_size=(self.lmp, 1)).view( x.size(0), -1) else: out = F.adaptive_avg_pool2d(x, output_size=(-self.lmp, 1)).view( x.size(0), -1) return out x = self.avgpool(x).view(x.size(0), -1) feat = F.normalize(x) if self.embed_dim > 0: x = self.embed_fc(x) if self.has_bn: x = self.embed_bn(x) x = F.normalize(x) if self.dropout > 0: x = self.drop(x) # return feat, x return x, feat
def forward(self, x, xr): ''' the forward function :param x: the first input :param xr: the second input :return: the output ''' # convolution x = self.Conv_classifer(x) if self.maxpooling_dir: # global average polling of cnn features map in direction of channels x = F.adaptive_avg_pool2d(x, (x.size()[1], 1)) else: # global max polling of cnn features map in direction of words x = F.adaptive_max_pool2d(x, (x.size()[2], 1)) x = x.view(x.size()[0], -1) if self.padding: x = nn.ConstantPad1d((0, self.w_size - x.size()[1]), 0)(x) if self.xr: x = torch.cat((x, xr), 1) x = self.FC_classifier(x) return x
def forward(self, x): feature_map = self.feature_map(x) cls, reg = self.rpn(feature_map) feature_map = feature_map.view( (-1, self.OUTPUT_SIZE[0], self.OUTPUT_SIZE[1])) if self.training: proposals = self.rpn.get_proposals(reg, cls) else: proposals = self.rpn.get_proposals(reg, cls) all_cls = [] all_reg = [] for roi in proposals.int(): roi[np.where(roi < 0)] = 0 roi = roi / self.OUTPUT_CELL_SIZE roi_feature_map = feature_map[:, roi[0]:roi[2] + 1, roi[1]:roi[3] + 1] pooled_roi = F.adaptive_max_pool2d(roi_feature_map, (7, 7)).view( (-1, 50176)) r = F.relu(self.fc(pooled_roi)) r_cls = self.cls_layer(r) r_reg = self.reg_layer(r).view((self.n_classes, 4)) all_cls.append(r_cls) all_reg.append(r_reg[torch.argmax(r_cls)]) # print(all_cls.shape, all_reg.shape) return torch.stack(all_cls).view( (-1, self.n_classes)), torch.stack(all_reg), proposals, cls, reg
def forward(self, x): if self.version in ['v1', 'v2']: x_compress = self.compress(x) x_out = self.spatial(x_compress) scale = F.sigmoid(x_out) # broadcasting elif self.version in ['v3', 'v4', 'v7', 'v8']: x_compress = self.compress(x) x_out = self.spatial(x_compress) if not self.version == 'v7': scale = F.sigmoid(x_out) # broadcasting elif self.version == 'v5': x_compress = self.compress(x) x_ = self.spatial(x_compress) x_out = self.expand(x_) scale = F.sigmoid(x_out) # broadcasting elif self.version == 'v6': x_pool = F.adaptive_max_pool2d( x, x.size(2) // self.size_reduction_ratio) x_ = self.compress(x_pool) x_ = self.spatial(x_) x_ = self.expand(x_) x_up = F.upsample(x_, x.size()[2:], mode='bilinear') scale = F.sigmoid(x_up) # broadcasting if self.version == 'v7': return x_out.expand_as(x) else: return x * scale
def forward(self, x): s0 = s1 = self.stem(x) pre_layers = [s1] for i, cell in enumerate(self.cells): weights = [] n = 2 start = 0 for _ in range(self._steps): end = start + n for j in range(start, end): # import pdb;pdb.set_trace() weights.append(F.softmax(self.alphas_normal[j], dim=-1)) start = end n += 1 selected_idxs = self.normal_selected_idxs # import pdb;pdb.set_trace() s0, s1 = s1, cell(s0, s1, weights, selected_idxs, self.att) pre_layers.append(s1) # import pdb;pdb.set_trace() fusion = torch.cat(pre_layers, dim=1) fusion = self.fusion_conv(fusion) x1 = F.adaptive_max_pool2d(fusion, 1) x2 = F.adaptive_avg_pool2d(fusion, 1) logits = self.classifier(torch.cat((x1, x2), dim=1)) return logits.squeeze(-1).squeeze(-1)
def reversal_att_map(self, attention): opp_att_map = attention.new_ones(attention.size()) max_ = F.adaptive_max_pool2d(attention, 1) opp_att_map *= max_ opp_att_map -= attention return opp_att_map
def apply(features: Tensor, proposal_bboxes: Tensor, proposal_batch_indices: Tensor, mode: Mode) -> Tensor: _, _, feature_map_height, feature_map_width = features.shape scale = 1 / 16 output_size = (7 * 2, 7 * 2) if mode == Pooler.Mode.POOLING: pool = [] for (proposal_bbox, proposal_batch_index) in zip(proposal_bboxes, proposal_batch_indices): start_x = max(min(round(proposal_bbox[0].item() * scale), feature_map_width - 1), 0) # [0, feature_map_width) start_y = max(min(round(proposal_bbox[1].item() * scale), feature_map_height - 1), 0) # (0, feature_map_height] end_x = max(min(round(proposal_bbox[2].item() * scale) + 1, feature_map_width), 1) # [0, feature_map_width) end_y = max(min(round(proposal_bbox[3].item() * scale) + 1, feature_map_height), 1) # (0, feature_map_height] roi_feature_map = features[proposal_batch_index, :, start_y:end_y, start_x:end_x] pool.append(F.adaptive_max_pool2d(input=roi_feature_map, output_size=output_size)) pool = torch.stack(pool, dim=0) elif mode == Pooler.Mode.ALIGN: pool = ROIAlign(output_size, spatial_scale=scale, sampling_ratio=0)( features, torch.cat([proposal_batch_indices.view(-1, 1).float(), proposal_bboxes], dim=1) ) else: raise ValueError pool = F.max_pool2d(input=pool, kernel_size=2, stride=2) return pool
def disp_loss(self, pred, targe, sparse=True): assert len(targe.size()) == 3, 'expect targe size is b,h,w' b, channel, h, w = pred.size() loss = 0. targe = targe.unsqueeze(1) if sparse: targe = F.adaptive_max_pool2d(targe, (h, w)) pred=pred.view(b,-1).contiguous() targe=targe.view(b,-1).contiguous() EPE_map = self.SmoothL1Loss(pred, targe) #print("EPE_map mean:",EPE_map.mean()) positive = (targe > 0).long() #ignore = (EPE_map*positive) #print("EPE_map mean:",EPE_map.mean()) #EPE_map = EPE_map[positive] loss = EPE_map.mean() return loss EPE_map = self.SmoothL1Loss(pred, targe) positive = (targe > 0).long() EPE_map = EPE_map[positive] loss = torch.mean(EPE_map) for ib in range(b): EPE_map = self.SmoothL1Loss(pred, targe.unsqueeze(1)) #EPE_map = torch.abs(targe[ib] - pred[ib, 0]) positive = (targe[ib] > 0).long() loss += EPE_map.mean_() #smooth = self.hardtanh(EPE_map) #EPE_map = 0.5 * smooth * EPE_map return loss
def train(model, data_loader, optimizer): model.train() batch_time = 0 total_loss = {'mse': 0, 'pos': 0, 'neg': 0} for batch_no, (img, ann) in tqdm(enumerate(data_loader)): start_time = time.time() if args.cuda: img = img.cuda(async=True) ann = ann.cuda(async=True) preds = model(V(img)) loss = 0 mse_loss = 0 pos_loss = 0 neg_loss = 0 for pred in preds: b, c, h, w = pred.size() _ann = F.adaptive_max_pool2d(ann, (h, w)).float() diff = ((_ann - pred)**2) mask = (_ann > 0).float() npos = int(mask.sum().data[0]) pos_loss = (mask * diff).sum() / npos mask = 1 - mask neg_loss = (mask * diff).view(-1).topk(k=min( npos * 3, diff.numel()), sorted=False)[0] neg_loss = neg_loss.sum() / neg_loss.numel() mse = F.mse_loss(pred, _ann) #loss += pos_loss + neg_loss loss += mse total_loss['mse'] += mse.data[0] total_loss['pos'] += pos_loss.data[0] total_loss['neg'] += neg_loss.data[0] model.zero_grad() loss.backward() optimizer.step() batch_time += time.time() - start_time if (batch_no + 1) % args.log_interval == 0: avg_time = batch_time * 1e3 / args.log_interval for k in total_loss: total_loss[k] /= args.log_interval print( 'train | {:4d} /{:4d} batch | {:7.2f} ms/batch | mse loss {:.2e} | pos loss {:.2e} | neg loss {:.2e}' .format(batch_no + 1, args.num_iter, avg_time, total_loss['mse'], total_loss['pos'], total_loss['neg'])) for k in total_loss: total_loss[k] = 0 batch_time = 0 if (batch_no + 1) >= args.num_iter: return
def forward(self, x, get_features=False): x0 = F.pad(x, (0, 0, 0, 0, 0, self.n_filters - self.input_shape[0])) x0 = quantifier(x0, self.n_bits_activ) hist = [None for _ in range(self.n_history - 1)] + [x0] for t in range(self.n_iter): a = self.Lconv(hist[-1]) a = self.Lactiv(a) a = self.alpha[t, 0] * a for i, x in enumerate(hist): if x is not None: a = a + self.alpha[t, i + 1] * x a = self.Lnormalization[t](a) a = quantifier(a, self.n_bits_activ) for i in range(1, self.n_history - 1): hist[i] = hist[i + 1] hist[self.n_history - 1] = a if self.pool_strategy[t]: for i in range(len(hist)): if hist[i] is not None: hist[i] = F.max_pool2d(hist[i], 2) out = F.adaptive_max_pool2d(hist[-1], (1, 1))[:, :, 0, 0] if get_features: return out, self.LOutput(out) else: return self.LOutput(out)
def max_pool(in_dict): """Implement `local max pooling` as `masking + global max pooling`. Args: feat: pytorch tensor, with shape [N, C, H, W] mask: pytorch tensor, with shape [N, pC, pH, pW] Returns: feat_list: a list (length = pC) of pytorch tensors with shape [N, C] visible: pytorch tensor with shape [N, pC] NOTE: The implementation of `masking + global max pooling` is only equivalent to `local max pooling` when feature values are non-negative, which holds for ResNet that has ReLU as final operation of all blocks. """ assert len(in_dict['feat']) == len(in_dict['ps_pred']) N = in_dict['feat'][0].shape[0] num_parts = len(in_dict['ps_pred']) feat_list = [] visible_list = [] for i in range(num_parts): feat = in_dict['feat'][i] # [N, 1, pH, pW] m = (in_dict['ps_pred'][i] > 0.5).float() visible_list.append((m.sum(-1).sum(-1) > 0).float()) # [N, C, pH, pW] m = m.expand_as(feat) # [N, C] local_feat = F.adaptive_max_pool2d(feat * m, 1).view(N, -1) feat_list.append(local_feat) # [N, pC] visible = torch.cat(visible_list, 1) out_dict = {'feat_list': feat_list, 'visible': visible} return out_dict
def forward(self, data): """ Forward function. :param data: :return: tensor """ fm = self.conv1(data) if self.channel_att: # fm_pool = F.adaptive_avg_pool2d(fm, (1, 1)) + F.adaptive_max_pool2d(fm, (1, 1)) fm_pool = torch.cat([ F.adaptive_avg_pool2d(fm, (1, 1)), F.adaptive_max_pool2d(fm, (1, 1)) ], dim=1) att = self.att_c(fm_pool) fm = fm * att if self.spatial_att: fm_pool = torch.cat([ torch.mean(fm, dim=1, keepdim=True), torch.max(fm, dim=1, keepdim=True)[0] ], dim=1) att = self.att_s(fm_pool) fm = fm * att return fm
def forward(self, data): """ Forward function. :param data: :return: tensor """ fm = self.conv1(data) # (bs,out_ch,w,h) if self.channel_att: # adaptive_avg_pool2d 将(w,h)大小变成任意大小,如下面变成了(1,1),则经过cat之后为 (bs,2*out_ch,1,1) fm_pool = torch.cat([ F.adaptive_avg_pool2d(fm, (1, 1)), F.adaptive_max_pool2d(fm, (1, 1)) ], dim=1) att = self.att_c(fm_pool) # (bs,out_ch,1,1) fm = fm * att # (bs,out_ch,w,h)*(bs,out_ch,1,1) -> (bs,out_ch,w,h) if self.spatial_att: # (bs,1,w,h) + (bs,1,w,h) -> (bs,2,w,h) channel上一个是mean,一个是max fm_pool = torch.cat([ torch.mean(fm, dim=1, keepdim=True), torch.max(fm, dim=1, keepdim=True)[0] ], dim=1) att = self.att_s(fm_pool) # (bs,1,w,h) fm = fm * att # (bs,out_ch,w,h)*(bs,1,w,h) -> (bs,out_ch,w,h) return fm
def forward(self, x, xr): conv1 = self.Conv_classifer(x) # x = F.adaptive_avg_pool2d(x, (x.size()[1], 1)) x1 = F.adaptive_max_pool2d(conv1, (conv1.size()[1], 1)) x1 = x1.view(x1.size()[0], -1) conv2 = self.Conv_classifer2(conv1) x2 = F.adaptive_max_pool2d(conv2, (conv2.size()[1], 1)) x2 = x2.view(x2.size()[0], -1) x = torch.cat((x1, x2, xr), 1) x = self.FC_classifier(x) return x
def forward(self, features, proposal_bboxes): _, _, feature_map_height, feature_map_width = features.shape pool = [] for proposal_bbox in proposal_bboxes: start_x = max( min(round(proposal_bbox[0].item() / 16), feature_map_width - 1), 0) start_y = max( min(round(proposal_bbox[1].item() / 16), feature_map_height - 1), 0) end_x = max( min( round(proposal_bbox[2].item() / 16) + 1, feature_map_width), 1) end_y = max( min( round(proposal_bbox[3].item() / 16) + 1, feature_map_height), 1) roi_feature_map = features[..., start_y:end_y, start_x:end_x] pool.append(F.adaptive_max_pool2d(roi_feature_map, 7)) pool = torch.cat(pool, dim=0) pool = pool.view(pool.shape[0], -1) h = self.fcs(pool) classes = self._class(h) transformers = self._transformer(h) return classes, transformers
def forward(self, x): x = self.down_sampling(x) gap = F.adaptive_avg_pool2d(x, 1) gap_logits = self.gap_fc(gap.view(x.shape[0], -1)) gap_weight = list(self.gap_fc.parameters())[0] gap = x * gap_weight.unsqueeze(dim=2).unsqueeze(dim=3) gmp = F.adaptive_max_pool2d(x, 1) gmp_logits = self.gmp_fc(gmp.view(x.shape[0], -1)) gmp_weight = list(self.gmp_fc.parameters())[0] gmp = x * gmp_weight.unsqueeze(dim=2).unsqueeze(dim=3) cam_logit = torch.cat([gap_logits, gmp_logits], dim=1) x = torch.cat([gap, gmp], dim=1) x = self.conv(x) heatmap = torch.sum(x, dim=1, keepdim=True) out = self.fc(x.view(x.shape[0], -1)) gamma, beta = self.gamma(out), self.beta(out) for i in range(self.num_blocks): x = getattr(self, "UpBlock" + str(i + 1))(x, gamma, beta) out = self.up_sampling(x) return out, cam_logit, heatmap
def forward(self, x): batch_size, C, H, W = x.shape x0 = self.conv1(x) x1 = self.encode2(x0) x2 = self.encode3(x1) x3 = self.encode4(x2) x4 = self.encode5(x3) ##---- #segment t0 = self.lateral0(x4) t1 = upsize_add(t0, self.lateral1(x3)) #16x16 t2 = upsize_add(t1, self.lateral2(x2)) #32x32 t3 = upsize_add(t2, self.lateral3(x1)) #64x64 t1 = self.top1(t1) #; print(t1.shape) t2 = self.top2(t2) #; print(t2.shape) t3 = self.top3(t3) #; print(t3.shape) x = fuse([t1, t2, t3], "cat") logit = self.logit(x) #--- probability_mask = torch.sigmoid(logit) probability_label = F.adaptive_max_pool2d(probability_mask, 1).view(batch_size,-1) if self.infer: return logit elif self.fp16: # amp doesn't support regular BCELoss; only BCEWithLogitsLoss return (probability_label, logit) else: return (probability_label, probability_mask)
def forward(self, pred, pred_semseg, label_info, label_i=0, sparse=True, shape=(30,30)): targe = label_info['disp_label_0'] bs, num_class, h,w = pred.shape if sparse: targe = F.adaptive_max_pool2d(targe, shape) #calucate disp output #pred_scans = label_info['semseg_scans'].repeat(bs//cfg.TRAIN.IMS_PER_BATCH, 1, h, w) #pred_scans = (pred_semseg == pred_scans.long()).float() #pred_scans = pred_scans.unsqueeze(2) #pred_scans.requires_grad = False #pred_disp = pred*pred_scans #pred_disp = torch.sum(pred_disp, 1) disp_scans = label_info['disp_scans'].repeat(bs//cfg.TRAIN.IMS_PER_BATCH, 1, 1, 1) pred_semseg, _ = torch.max(pred_semseg, dim=1) pred_semseg = pred_semseg.unsqueeze(1) pred = pred + pred_semseg pred = F.softmax(pred, dim=1) disp_pred = torch.sum(pred*disp_scans, dim=1) #loss and pred EPE_map = self.SmoothL1Loss(disp_pred, targe) epe_pred = torch.abs(disp_pred - targe) #ignore false disp values positive = targe.ge(0) EPE_map = torch.masked_select(EPE_map, positive) epe_pred = torch.masked_select(epe_pred, positive) #normlization loss = EPE_map.mean() epe_pred = epe_pred.mean() return loss, epe_pred
def forward(self, x): x = F.pad(x, (0, 0, 0, 0, 0, self.n_filters - self.input_shape[0])) hist = [None for _ in range(self.n_history - 1)] + [x] for t in range(self.n_iter): a = self.Lconv(hist[-1]) fm_height = hist[-1].size(-1) a = self.Lactiv(a) a = self.alpha[t, 0] * a for i, x in enumerate(hist): if x is not None: a = a + self.alpha[t, i + 1] * x a = self.Lnormalization[t](a) for i in range(1, self.n_history - 1): hist[i] = hist[i + 1] hist[self.n_history - 1] = a if self.pool_strategy[t]: for i in range(len(hist)): if hist[i] is not None: hist[i] = F.max_pool2d(hist[i], 2) if self.out_mode == "pool" and hist[-1].size()[-1] > 1: out = F.adaptive_max_pool2d(hist[-1], (1, 1))[:, :, 0, 0] elif self.out_mode == "flatten": out = hist[-1].view(hist[-1].size()[0], -1) else: out = hist[-1][:, :, 0, 0] return self.LOutput(out)
def roi_pooling_2d_pytorch(input, rois, output_size=(7, 7), spatial_scale=1.0): """Spatial Region of Interest (ROI) pooling function in pure pytorch/python This function acts similarly to `~roi_pooling_2d`, but performs a python loop over ROI. Note that this is not a direct replacement of `~roi_pooling_2d` (viceversa). See :function:`~roi_pooling_2d` for details and output shape. Args: output_size (int or tuple): the target output size of the image of the form H x W. Can be a tuple (H, W) or a single number H for a square image H x H. spatial_scale (float): scale of the rois if resized. """ assert rois.dim() == 2 assert rois.size(1) == 5 output = [] rois = rois.data.float() num_rois = rois.size(0) rois[:, 1:].mul_(spatial_scale) rois = rois.long() for i in range(num_rois): roi = rois[i] im_idx = roi[0] im = input.narrow(0, im_idx, 1)[..., roi[2]:(roi[4] + 1), roi[1]:(roi[3] + 1)] output.append(F.adaptive_max_pool2d(im, output_size)) return torch.cat(output, 0)
def forward(self, x, blockID=None, ratio=None): self.stochastic_downsampling(blockID, ratio) x = self.conv1(x) x = self.bn1(x) x = self.relu(x) if self.downsampling_ratio < 1: if self.size_after_maxpool is None: self.size_after_maxpool = self.maxpool(x).size(2) x = F.adaptive_max_pool2d( x, int(round(self.size_after_maxpool * self.downsampling_ratio))) else: x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = self.avgpool(x) x = x.view(x.size(0), -1) x = self.fc(x) return x
def forward(self, features: Tensor, proposal_bboxes: Tensor) -> Tuple[Tensor, Tensor]: _, _, feature_map_height, feature_map_width = features.shape pool = [] for proposal_bbox in proposal_bboxes: start_x = max( min(round(proposal_bbox[0].item() / 16), feature_map_width - 1), 0) # [0, feature_map_width) start_y = max( min(round(proposal_bbox[1].item() / 16), feature_map_height - 1), 0) # (0, feature_map_height] end_x = max( min( round(proposal_bbox[2].item() / 16) + 1, feature_map_width), 1) # [0, feature_map_width) end_y = max( min( round(proposal_bbox[3].item() / 16) + 1, feature_map_height), 1) # (0, feature_map_height] roi_feature_map = features[..., start_y:end_y, start_x:end_x] pool.append(F.adaptive_max_pool2d(roi_feature_map, 7)) print(pool) pool = torch.cat(pool, dim=0) # pool has shape (128, 512, 7, 7) pool = pool.view(pool.shape[0], -1) h = self.fcs(pool) classes = self._class(h) transformers = self._transformer(h) return classes, transformers
def simple_test_bboxes(self, x, img_meta, mask_feats, proposals, rcnn_test_cfg, rescale=False): rois = bbox2roi(proposals) roi_feats = self.bbox_roi_extractor( x[:self.bbox_roi_extractor.num_inputs], rois) if self.with_shared_head: roi_feats = self.shared_head(roi_feats) _, _, H, W = roi_feats.size() mask_feats = F.adaptive_max_pool2d(mask_feats, (H, W)) roi_feats = torch.cat([roi_feats, mask_feats], dim=1) cls_score, bbox_pred = self.bbox_head(roi_feats) img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] det_bboxes, det_labels = self.bbox_head.get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=rescale, cfg=rcnn_test_cfg) return det_bboxes, det_labels
def forward(self, x, visual=False): x = self.cnn(x) # G x_g = self.embedding(x) b, c, h, w = x_g.size() x_g = torch.mean(x_g.contiguous().view(b, c, -1), dim=2) out1 = self.fc(x_g) # [b, nc] # L x_l = self.special(x) y_l = self.cls1(x_l) # [b, 14, 1, w] y_l0 = F.softmax(y_l, dim=1) y_l = F.adaptive_max_pool2d(y_l0, (1, 1)).contiguous().view(b, -1) # [b, 14] out2 = self.cls2(y_l) # Visualization if visual: nc1 = y_l0.size(1) out2 = F.softmax(out2, dim=1) return y_l0.contiguous().view(nc1, -1), y_l.contiguous().view(nc1, -1), out2.contiguous().view(nc1-1, -1) # Dynamic weighting weight = self.fusion(F.relu(out1)) # [b, 1] weight = torch.cat((weight, 1 - weight), dim=1) # [b, 2] y_all = torch.stack((out1, out2), dim=2) out = y_all.matmul(weight.unsqueeze(2)).squeeze(2) if self.training: ally = y_l0.permute(0, 2, 3, 1).contiguous().view(b*w, -1) # [b*w, 14], it is p. return ally, out1, out2, out else: return out
def forward(self, x): out = self.conv1(x) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = self.layer4(out) out = F.adaptive_max_pool2d(out, 1) out = out.view(out.size(0), -1) return F.log_softmax(self.linear(out))
def forward(self, x): x = self.conv_layers(x) mean_pool = F.adaptive_avg_pool2d(x, 1).squeeze() max_pool = F.adaptive_max_pool2d(x, 1).squeeze() x = torch.cat([mean_pool, max_pool], dim=1) x = self.fc(x) return x
def _pyramid_pooling(self, input_x, output_sizes): pyramid_level_tensors = [] for tsize in output_sizes: if self.pool_type == 'max_pool': pyramid_level_tensor = F.adaptive_max_pool2d(input_x, tsize) if self.pool_type == 'avg_pool': pyramid_level_tensor = F.adaptive_avg_pool2d(input_x, tsize) pyramid_level_tensor = pyramid_level_tensor.view(input_x.size(0), -1) pyramid_level_tensors.append(pyramid_level_tensor) return torch.cat(pyramid_level_tensors, dim=1)
def forward(self, x): x = F.adaptive_max_pool2d(x, output_size=(1, 1)) x = x.view(-1, x.size(1)) x = self.linear(x) return x
def logits(self, features): x = F.adaptive_max_pool2d(features, 1) x = x.view(x.size(0), -1) return self.classifier(x)
def classifier(self, x): x = F.adaptive_max_pool2d(x, 1) x = x.view(x.size(0), -1) x = self.dropout(x) return F.log_softmax(self.linear(x))