def forward(self, x): res = self.feature_extractor(x) res = F.adaptive_avg_pool2d(res, (1, 1)) res = res.view(-1, 32) res = self.fc_final(res) return res
def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = F.adaptive_avg_pool2d(out, 1) out = out.view(out.size(0), -1) return F.log_softmax(self.linear(out))
def forward(self, x): out = self.conv1(x) out = self.block1(out) out = self.block2(out) out = self.block3(out) out = self.relu(self.bn1(out)) out = F.adaptive_avg_pool2d(out, 1) out = out.view(-1, self.nChannels) return self.fc(out)
def get_activations(images, model, batch_size=64, dims=2048, cuda=False, verbose=False): """Calculates the activations of the pool_3 layer for all images. Params: -- images : Numpy array of dimension (n_images, 3, hi, wi). The values must lie between 0 and 1. -- model : Instance of inception model -- batch_size : the images numpy array is split into batches with batch size batch_size. A reasonable batch size depends on the hardware. -- dims : Dimensionality of features returned by Inception -- cuda : If set to True, use GPU -- verbose : If set to True and parameter out_step is given, the number of calculated batches is reported. Returns: -- A numpy array of dimension (num images, dims) that contains the activations of the given tensor when feeding inception with the query tensor. """ model.eval() d0 = images.shape[0] if batch_size > d0: print(('Warning: batch size is bigger than the data size. ' 'Setting batch size to data size')) batch_size = d0 n_batches = d0 // batch_size n_used_imgs = n_batches * batch_size pred_arr = np.empty((n_used_imgs, dims)) for i in range(n_batches): if verbose: print('\rPropagating batch %d/%d' % (i + 1, n_batches), end='', flush=True) start = i * batch_size end = start + batch_size batch = torch.from_numpy(images[start:end]).type(torch.FloatTensor) batch = Variable(batch, volatile=True) if cuda: batch = batch.cuda() pred = model(batch)[0] # If model output is not scalar, apply global spatial average pooling. # This happens if you choose a dimensionality not equal 2048. if pred.shape[2] != 1 or pred.shape[3] != 1: pred = adaptive_avg_pool2d(pred, output_size=(1, 1)) pred_arr[start:end] = pred.cpu().data.numpy().reshape(batch_size, -1) if verbose: print(' done') return pred_arr
def _pyramid_pooling(self, input_x, output_sizes): pyramid_level_tensors = [] for tsize in output_sizes: if self.pool_type == 'max_pool': pyramid_level_tensor = F.adaptive_max_pool2d(input_x, tsize) if self.pool_type == 'avg_pool': pyramid_level_tensor = F.adaptive_avg_pool2d(input_x, tsize) pyramid_level_tensor = pyramid_level_tensor.view(input_x.size(0), -1) pyramid_level_tensors.append(pyramid_level_tensor) return torch.cat(pyramid_level_tensors, dim=1)
def forward(self, x): x = self.conv_layers(x) mean_pool = F.adaptive_avg_pool2d(x, 1).squeeze() max_pool = F.adaptive_max_pool2d(x, 1).squeeze() x = torch.cat([mean_pool, max_pool], dim=1) x = self.fc(x) return x
def forward(self, img, att_size=14): x = img.unsqueeze(0) x = self.resnet.conv1(x) x = self.resnet.bn1(x) x = self.resnet.relu(x) x = self.resnet.maxpool(x) x = self.resnet.layer1(x) x = self.resnet.layer2(x) x = self.resnet.layer3(x) x = self.resnet.layer4(x) fc = x.mean(3).mean(2).squeeze() att = F.adaptive_avg_pool2d(x,[att_size,att_size]).squeeze().permute(1, 2, 0) return fc, att
def forward(self, img, att_size=14): x = img # (26, 3, w, d) x = self.resnet.conv1(x) x = self.resnet.bn1(x) x = self.resnet.relu(x) x = self.resnet.maxpool(x) x = self.resnet.layer1(x) x = self.resnet.layer2(x) x = self.resnet.layer3(x) x = self.resnet.layer4(x) fc = x.mean(3).mean(2).squeeze() if att_size > 0: att = F.adaptive_avg_pool2d(x,[att_size,att_size]).squeeze().permute(1, 2, 0) return fc, att else: return fc.unsqueeze(0), fc.mean(0) # (1, 26, 2048), (2048,)
def forward(self, x): features = self.features(x) out = F.leaky_relu(features, 0.02, inplace=True) out = F.adaptive_avg_pool2d(out, (1, 1)).view(features.size(0), -1) out = self.classifier(out) return out
def forward(self, x): bs, _, _, _ = x.shape x = self.model.features(x) x = F.adaptive_avg_pool2d(x, 1).reshape(bs, -1) l0 = self.l0(x) return l0
def forward(self, x: Tensor, y: Tensor) -> Tensor: # !!! MODIFIED BY USER !!! # features = self.features(x) # Initial layers x = self.features.conv0(x) x = self.features.norm0(x) x = self.features.relu0(x) x = self.features.pool0(x) # denseblock1 x = self.features.denseblock1(x) # transition1 x = self.features.transition1(x) l1 = x # denseblock2 x = self.features.denseblock2(x) # transition2 x = self.features.transition2(x) l2 = x # denseblock3 x = self.features.denseblock3(x) # transition3 x = self.features.transition3(x) l3 = x # denseblock4 x = self.features.denseblock4(x) # norm5 features = self.features.norm5(x) out = F.relu(features, inplace=True) out = F.adaptive_avg_pool2d(out, (1, 1)) g = out l1 = self.att_proj1(l1) l2 = self.att_proj2(l2) l3 = self.att_proj3(l3) ga1 = self.calculate_ga(l1, g, self.att_est1) ga2 = self.calculate_ga(l2, g, self.att_est2) ga3 = self.calculate_ga(l3, g, self.att_est3) # Classifier layer has been replaced #out = torch.flatten(out, 1) #out = self.classifier(out) # Non-image model non_image_out = F.relu(self.non_image_fc(y)) #print(ga1.shape, ga2.shape, ga3.shape, g.shape) g_all = torch.cat((ga1, ga2, ga3, non_image_out), dim=1) out = self.classifier(g_all) return out
def _scale(self, input: Tensor, inplace: bool) -> Tensor: scale = F.adaptive_avg_pool2d(input, 1) scale = self.fc1(scale) scale = self.relu(scale) scale = self.fc2(scale) return F.hardsigmoid(scale, inplace=inplace)
def forward(self, x): if self.transform_input: x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 x = torch.cat((x_ch0, x_ch1, x_ch2), 1) # 299 x 299 x 3 x = self.Conv2d_1a_3x3(x) # 149 x 149 x 32 x = self.Conv2d_2a_3x3(x) # 147 x 147 x 32 x = self.Conv2d_2b_3x3(x) # 147 x 147 x 64 x = F.max_pool2d(x, kernel_size=3, stride=2) # 73 x 73 x 64 x = self.Conv2d_3b_1x1(x) # 73 x 73 x 80 x = self.Conv2d_4a_3x3(x) # 71 x 71 x 192 x = F.max_pool2d(x, kernel_size=3, stride=2) # 35 x 35 x 192 x = self.Mixed_5b(x) # 35 x 35 x 256 x = self.Mixed_5c(x) # 35 x 35 x 288 x = self.Mixed_5d(x) # 35 x 35 x 288 x = self.Mixed_6a(x) # 17 x 17 x 768 x = self.Mixed_6b(x) # 17 x 17 x 768 x = self.Mixed_6c(x) # 17 x 17 x 768 x = self.Mixed_6d(x) # 17 x 17 x 768 x = self.Mixed_6e(x) # 17 x 17 x 768 if self.training and self.aux_logits: aux = self.AuxLogits(x) # 17 x 17 x 768 x = self.Mixed_7a(x) # 8 x 8 x 1280 x = self.Mixed_7b(x) # 8 x 8 x 2048 x = self.Mixed_7c(x) # 8 x 8 x 2048 # x = F.avg_pool2d(x, kernel_size=8) x = F.adaptive_avg_pool2d(x, output_size=(1, 1)) # 1 x 1 x 2048 x = F.dropout(x, training=self.training) # 1 x 1 x 2048 x = x.view(x.size(0), -1) # 2048 x = self.fc(x) # 1000 (num_classes) if self.training and self.aux_logits: return x, aux return x
def build_module(self): """ Builds network whilst automatically inferring shapes of layers. """ print("Building basic block of ConvolutionalNetwork using input shape", self.input_shape) x = torch.zeros( (self.input_shape )) # create dummy inputs to be used to infer shapes of layers out = x # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True) for i in range(self.num_layers): # for number of layers times self.layer_dict['conv_{}'.format(i)] = nn.Conv2d( in_channels=out.shape[1], # add a conv layer in the module dict kernel_size=3, out_channels=self.num_filters, padding=1, bias=self.use_bias) out = self.layer_dict['conv_{}'.format(i)]( out) # use layer on inputs to get an output out = F.relu(out) # apply relu print(out.shape) if self.dim_reduction_type == 'strided_convolution': # if dim reduction is strided conv, then add a strided conv self.layer_dict['dim_reduction_strided_conv_{}'.format( i)] = nn.Conv2d(in_channels=out.shape[1], kernel_size=3, out_channels=out.shape[1], padding=1, bias=self.use_bias, stride=2, dilation=1) out = self.layer_dict['dim_reduction_strided_conv_{}'.format( i)](out) # use strided conv to get an output out = F.relu(out) # apply relu to the output elif self.dim_reduction_type == 'dilated_convolution': # if dim reduction is dilated conv, then add a dilated conv, using an arbitrary dilation rate of i + 2 (so it gets smaller as we go, you can choose other dilation rates should you wish to do it.) self.layer_dict['dim_reduction_dilated_conv_{}'.format( i)] = nn.Conv2d(in_channels=out.shape[1], kernel_size=3, out_channels=out.shape[1], padding=1, bias=self.use_bias, stride=1, dilation=i + 2) out = self.layer_dict['dim_reduction_dilated_conv_{}'.format( i)](out) # run dilated conv on input to get output out = F.relu(out) # apply relu on output elif self.dim_reduction_type == 'max_pooling': self.layer_dict['dim_reduction_max_pool_{}'.format( i)] = nn.MaxPool2d(2, padding=1) out = self.layer_dict['dim_reduction_max_pool_{}'.format(i)]( out) elif self.dim_reduction_type == 'avg_pooling': self.layer_dict['dim_reduction_avg_pool_{}'.format( i)] = nn.AvgPool2d(2, padding=1) out = self.layer_dict['dim_reduction_avg_pool_{}'.format(i)]( out) print(out.shape) if out.shape[-1] != 2: out = F.adaptive_avg_pool2d( out, 2 ) # apply adaptive pooling to make sure output of conv layers is always (2, 2) spacially (helps with comparisons). print('shape before final linear layer', out.shape) out = out.view(out.shape[0], -1) self.logit_linear_layer = nn.Linear( in_features=out.shape[1], # add a linear layer out_features=self.num_output_classes, bias=self.use_bias) out = self.logit_linear_layer( out) # apply linear layer on flattened inputs print("Block is built, output volume is", out.shape) return out
def forward(self, x): features = self.backbone.features(x) features = F.adaptive_avg_pool2d(features, output_size=(1, 1)).view((features.size()[0], -1)) logits = self.head_linear(features) return logits
def forward(self, inputs): assert len(inputs) == len(self.in_channels) laterals = [ lateral_conv(inputs[i + self.start_level]) for i, lateral_conv in enumerate(self.lateral_convs) ] h, w = inputs[-1].size(2), inputs[-1].size(3) #size = [1,2,3] AdapPool_Features = [ self.high_lateral_conv[j](F.adaptive_avg_pool2d( inputs[-1], output_size=(max(1, int(h * self.adaptive_pool_output_ratio[j])), max(1, int(w * self.adaptive_pool_output_ratio[j]))))) for j in range(len(self.adaptive_pool_output_ratio)) ] AdapPool_Features = [ F.upsample(feat, size=(h, w), mode='bilinear', align_corners=True) for feat in AdapPool_Features ] Concat_AdapPool_Features = torch.cat(AdapPool_Features, dim=1) fusion_weights = self.high_lateral_conv_attention( Concat_AdapPool_Features) fusion_weights = F.sigmoid(fusion_weights) high_pool_fusion = 0 for i in range(3): high_pool_fusion += torch.unsqueeze(fusion_weights[:, i, :, :], dim=1) * AdapPool_Features[i] raw_laternals = [laterals[i].clone() for i in range(len(laterals))] # build top-down path #high_pool_fusion += global_pool laterals[-1] += high_pool_fusion used_backbone_levels = len(laterals) for i in range(used_backbone_levels - 1, 0, -1): laterals[i - 1] += F.interpolate(laterals[i], scale_factor=2, mode='nearest') # build outputs # part 1: from original levels outs = [ self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) ] # part 2: add extra levels if self.num_outs > len(outs): # use max pool to get more levels on top of outputs # (e.g., Faster R-CNN, Mask R-CNN) if not self.add_extra_convs: for i in range(self.num_outs - used_backbone_levels): outs.append(F.max_pool2d(outs[-1], 1, stride=2)) # add conv layers on top of original feature maps (RetinaNet) else: if self.extra_convs_on_inputs: orig = inputs[self.backbone_end_level - 1] outs.append(self.fpn_convs[used_backbone_levels](orig)) else: outs.append(self.fpn_convs[used_backbone_levels](outs[-1])) pool_noupsample_fusion = F.adaptive_avg_pool2d( high_pool_fusion, (1, 1)) outs[-1] += pool_noupsample_fusion for i in range(used_backbone_levels + 1, self.num_outs): if self.relu_before_extra_convs: outs.append(self.fpn_convs[i](F.relu(outs[-1]))) else: outs.append(self.fpn_convs[i](outs[-1])) if self.train_with_auxiliary: return tuple(outs), tuple(raw_laternals) else: return tuple(outs)
def downsample2d_as(inputs, target_as): _, _, h, w = target_as.size() return tf.adaptive_avg_pool2d(inputs, [h, w])
def forward(self, ft, score, x=None): h = self.reduce(ft) hpool = F.adaptive_avg_pool2d(h, (1, 1)) if x is None else x h = adaptive_cat((h, score), dim=1, ref_tensor=0) h = self.transform(h) return h, hpool
def forward(self, x): return self.a * F.adaptive_max_pool2d( x, 1) + (1 - self.a) * F.adaptive_avg_pool2d(x, 1)
def global_pool(x1, x2): ctx = F.adaptive_avg_pool2d(x1, 1).sigmoid() x = upsample_add(x1, x2 * ctx) return x
def forward(self, inputs): return F.adaptive_avg_pool2d(inputs, 1).view(inputs.size(0), -1)
def forward(self, x): return F.adaptive_avg_pool2d(x, (1, 1))
def forward(self, x): return F.adaptive_avg_pool2d( x.clamp(min=self.eps).pow(self.p), self.size).pow(1. / self.p)
def forward(self, feature): # class_feature = self.class_c2(self.class_c1(feature)) # (512, 10) class_feature = self.class_c1(feature) # (512, 10) class_1x1 = F.adaptive_avg_pool2d(class_feature, output_size=(1, 1)).view((class_feature.size()[0], -1)) class_logits = self.class_l1(class_1x1) return class_feature, class_logits
def forward(self, x): out = F.relu(x) out = F.adaptive_avg_pool2d(out, (1, 1)) out = out.view(out.size(0), -1) out = self.fc(out) return out
def get_activations(images, model, batch_size=64, dims=2048, cuda=False, verbose=False): """Calculates the activations of the pool_3 layer for all images. Params: -- images : Numpy array of dimension (n_images, 3, hi, wi). The values must lie between 0 and 1. -- model : Instance of inception model -- batch_size : the images numpy array is split into batches with batch size batch_size. A reasonable batch size depends on the hardware. -- dims : Dimensionality of features returned by Inception -- cuda : If set to True, use GPU -- verbose : If set to True and parameter out_step is given, the number of calculated batches is reported. Returns: -- A numpy array of dimension (num images, dims) that contains the activations of the given tensor when feeding inception with the query tensor. """ model.eval() d0 = images.shape[0] if batch_size > d0: print(('Warning: batch size is bigger than the data size. ' 'Setting batch size to data size')) batch_size = d0 n_batches = d0 // batch_size n_used_imgs = n_batches * batch_size pred_arr = np.empty((n_used_imgs, dims)) for i in range(n_batches): if verbose: print('\rPropagating batch %d/%d' % (i + 1, n_batches), end='', flush=True) start = i * batch_size end = start + batch_size batch = torch.from_numpy(images[start:end]).type(torch.FloatTensor) batch = Variable(batch, volatile=True) if cuda: batch = batch.cuda() pred = model(batch)[0] # If model output is not scalar, apply global spatial average pooling. # This happens if you choose a dimensionality not equal 2048. if pred.shape[2] != 1 or pred.shape[3] != 1: pred = adaptive_avg_pool2d(pred, output_size=(1, 1)) pred_arr[start:end] = pred.cpu().data.numpy().reshape(batch_size, -1) if verbose: print(' done') return pred_arr
def _compute_grad_weights(self, grads): grads = self._normalize(grads) return F.adaptive_avg_pool2d(grads, 1)
def aug_test(self, img_feats, proposal_list, img_metas, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ if self.with_semantic: semantic_feats = [ self.semantic_head(feat)[1] for feat in img_feats ] else: semantic_feats = [None] * len(img_metas) rcnn_test_cfg = self.test_cfg aug_bboxes = [] aug_scores = [] for x, img_meta, semantic in zip(img_feats, img_metas, semantic_feats): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip, flip_direction) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_head = self.bbox_head[i] bbox_results = self._bbox_forward(i, x, rois, semantic_feat=semantic) ms_scores.append(bbox_results['cls_score']) if i < self.num_stages - 1: bbox_label = bbox_results['cls_score'].argmax(dim=1) rois = bbox_head.regress_by_class( rois, bbox_label, bbox_results['bbox_pred'], img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_bboxes( rois, cls_score, bbox_results['bbox_pred'], img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head[-1].num_classes - 1)] else: aug_masks = [] aug_img_metas = [] for x, img_meta, semantic in zip(img_feats, img_metas, semantic_feats): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip, flip_direction) mask_rois = bbox2roi([_bboxes]) mask_feats = self.mask_roi_extractor[-1]( x[:len(self.mask_roi_extractor[-1].featmap_strides)], mask_rois) if self.with_semantic: semantic_feat = semantic mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) if mask_semantic_feat.shape[-2:] != mask_feats.shape[ -2:]: mask_semantic_feat = F.adaptive_avg_pool2d( mask_semantic_feat, mask_feats.shape[-2:]) mask_feats += mask_semantic_feat last_feat = None for i in range(self.num_stages): mask_head = self.mask_head[i] if self.mask_info_flow: mask_pred, last_feat = mask_head( mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) return bbox_result, segm_result else: return bbox_result
def get_activations(files, model, batch_size=1, dims=2048, cuda=False, verbose=False): """Calculates the activations of the pool_3 layer for all images. Params: -- files : List of image files paths -- model : Inst ance of inception model -- batch_size : Batch size of images for the model to process at once. Make sure that the number of samples is a multiple of the batch size, otherwise some samples are ignored. This behavior is retained to match the original FID score implementation. -- dims : Dimensionality of features returned by Inception -- cuda : If set to True, use GPU -- verbose : If set to True and parameter out_step is given, the number of calculated batches is reported. Returns: -- A numpy array of dimension (num images, dims) that contains the activations of the given tensor when feeding inception with the query tensor. """ model.eval() if len(files) % batch_size != 0: print(('Warning: number of images is not a multiple of the ' 'batch size. Some samples are going to be ignored.')) if batch_size > len(files): print(('Warning: batch size is bigger than the data size. ' 'Setting batch size to data size')) batch_size = len(files) n_batches = len(files) // batch_size n_used_imgs = n_batches * batch_size pred_arr = np.empty((n_used_imgs, dims)) for i in tqdm(range(n_batches)): if verbose: print('\rPropagating batch %d/%d' % (i + 1, n_batches), end='', flush=True) start = i * batch_size end = start + batch_size images = np.array( [imread(str(f)).astype(np.float32) for f in files[start:end]]) # images = np.concatenate([imread(str(f)).astype(np.float32) # for f in files[start:end]], axis=0) # Reshape to (n_images, 3, height, width) images = images.transpose((0, 3, 1, 2)) images /= 255 batch = torch.from_numpy(images).type(torch.FloatTensor) if cuda: batch = batch.cuda() pred = model(batch)[0] # If model output is not scalar, apply global spatial average pooling. # This happens if you choose a dimensionality not equal 2048. if pred.shape[2] != 1 or pred.shape[3] != 1: pred = adaptive_avg_pool2d(pred, output_size=(1, 1)) pred_arr[start:end] = pred.cpu().data.numpy().reshape(batch_size, -1) if verbose: print(' done') return pred_arr
def make_avg_flat(out): flat = F.adaptive_avg_pool2d(out, output_size=1) flat = flat.view(flat.size(0), -1) return flat
def resize2d(img, size): return F.adaptive_avg_pool2d(img, size[2:])
def forward(self, x): bts,_,_,_ = x.size() mean = [0.485,0.456,0.406] std = [0.229,0.224,0.225] x = torch.cat([ (x-mean[2])/std[2], (x-mean[1])/std[1], (x-mean[0])/std[0] ],1) e1 = self.conv1(x) #; print('x',x.size()) #64,64,64 e1 = self.scse1(e1) e2 = self.encoder2(e1) #; print('e2',e2.size()) #128,32,32 e2 = self.scse2(e2) e3 = self.encoder3(e2) #; print('e3',e3.size())#256,16,16 e3 = self.scse3(e3) e4 = self.encoder4(e3)#; print('e4',e4.size()) #512,8,8 e4 = self.scse4(e4) # e5 = self.encoder5(e4)# ; print('e5',e5.size())#1024,8,8 # e5 = self.scse5(e5) f = self.center(e4)# ; print('f',f.size()) #256,8,8 #f = self.scse_center(f) f_gap = F.adaptive_avg_pool2d(f,output_size=1)#256,1,1 f_gap = F.dropout(f_gap,p=0.5) f_gap_fuse = self.class_fuse_conv(f_gap)#64,1,1 class_logit = self.class_out(f_gap_fuse.view(bts,64)).view(bts)# 1 d5 = self.decoder5(f,e3)# ; print('d5',d5.size())#64,16,16 #d5 = self.drop_1(d5) d4 = self.decoder4(d5,e2)#; print('d4',d4.size())#32,32 #d4 = self.drop_1(d4) d3 = self.decoder3(d4,e1)#; print('d3',d3.size())#64,64 #d3 = self.drop_1(d3) d2 = self.decoder2(d3)#; print('d2',d2.size()) #128,128 hyper = torch.cat(( F.upsample(d2, scale_factor=2, mode='bilinear', align_corners=False), F.upsample(d3,scale_factor=4,mode='bilinear',align_corners=False), F.upsample(d4,scale_factor=8,mode='bilinear',align_corners=False), F.upsample(d5,scale_factor=16,mode='bilinear',align_corners=False) ),1) #256,128,128 hyper= F.dropout2d(hyper,p=0.5)#256,128,128 seg_base_fuse = self.seg_basefuse_conv(hyper)#64,128,128 seg_logit = self.seg_single_logit(seg_base_fuse)#1,128,128 fuse_feature = torch.cat(( seg_base_fuse, F.upsample(f_gap_fuse,scale_factor=seg_logit.size()[-1],mode='nearest') ),1)#128,128,128 fuse_logit = self.fuse_logit(fuse_feature)#1,128,128 return fuse_logit,seg_logit,class_logit
def get_yolo_feature_vec(self, coords): fmap_cropped = self.crop_feature_map(coords) fmap_cropped = F.adaptive_avg_pool2d(fmap_cropped, (1, 1)) return np.squeeze(fmap_cropped.cpu().numpy())
def adaptive_avg_pool2d(a): return F.adaptive_avg_pool2d(a, 1)
def forward(self, input): x = self.features(input) x = F.relu(x, inplace=True) x = F.adaptive_avg_pool2d(x, (4,4)).view(x.size(0), -1) x = self.fc(x) return x
def forward(self, x): out = self.extractor(x) out = F.adaptive_avg_pool2d(out, 1) out = out.view(out.size(0), -1) out = self.fc(out) return out
def forward(self, x): if self.transform_input: x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 x = torch.cat((x_ch0, x_ch1, x_ch2), 1) # N x 3 x 299 x 299 x = self.Conv2d_1a_3x3(x) # N x 32 x 149 x 149 x = self.Conv2d_2a_3x3(x) # N x 32 x 147 x 147 x = self.Conv2d_2b_3x3(x) # N x 64 x 147 x 147 x = F.max_pool2d(x, kernel_size=3, stride=2) # N x 64 x 73 x 73 x = self.Conv2d_3b_1x1(x) # N x 80 x 73 x 73 x = self.Conv2d_4a_3x3(x) # N x 192 x 71 x 71 x = F.max_pool2d(x, kernel_size=3, stride=2) # N x 192 x 35 x 35 x = self.Mixed_5b(x) # N x 256 x 35 x 35 x = self.Mixed_5c(x) # N x 288 x 35 x 35 x = self.Mixed_5d(x) # N x 288 x 35 x 35 x = self.Mixed_6a(x) # N x 768 x 17 x 17 x = self.Mixed_6b(x) # N x 768 x 17 x 17 x = self.Mixed_6c(x) # N x 768 x 17 x 17 x = self.Mixed_6d(x) # N x 768 x 17 x 17 x = self.Mixed_6e(x) # N x 768 x 17 x 17 if self.training and self.aux_logits: aux = self.AuxLogits(x) # N x 768 x 17 x 17 x = self.Mixed_7a(x) # N x 1280 x 8 x 8 x = self.Mixed_7b(x) # N x 2048 x 8 x 8 x = self.Mixed_7c(x) # N x 2048 x 8 x 8 # Adaptive average pooling x = F.adaptive_avg_pool2d(x, (1, 1)) # N x 2048 x 1 x 1 x = F.dropout(x, training=self.training) # N x 2048 x 1 x 1 x = torch.flatten(x, 1) # N x 2048 x = self.fc(x) # N x 1000 (num_classes) if self.training and self.aux_logits: return _InceptionOutputs(x, aux) return x
def forward(self, x): batchsize = x.shape[0] x = self.mobilenet0_conv0(x) x = self.mobilenet0_conv1(x) x = self.mobilenet0_conv2(x) x = self.mobilenet0_conv3(x) x = self.mobilenet0_conv4(x) x = self.mobilenet0_conv5(x) x = self.mobilenet0_conv6(x) x = self.mobilenet0_conv7(x) x = self.mobilenet0_conv8(x) x = self.mobilenet0_conv9(x) x10 = self.mobilenet0_conv10(x) x = self.mobilenet0_conv11(x10) x = self.mobilenet0_conv12(x) x = self.mobilenet0_conv13(x) x = self.mobilenet0_conv14(x) x = self.mobilenet0_conv15(x) x = self.mobilenet0_conv16(x) x = self.mobilenet0_conv17(x) x = self.mobilenet0_conv18(x) x = self.mobilenet0_conv19(x) x = self.mobilenet0_conv20(x) x = self.mobilenet0_conv21(x) x22 = self.mobilenet0_conv22(x) x = self.mobilenet0_conv23(x22) x = self.mobilenet0_conv24(x) x = self.mobilenet0_conv25(x) x = self.mobilenet0_conv26(x) o1 = self.rf_c3_lateral(x) o2 = self.rf_c3_det_conv1(o1) o3 = self.rf_c3_det_context_conv1(o1) o4 = self.rf_c3_det_context_conv2(o3) o5 = self.rf_c3_det_context_conv3_1(o3) o6 = self.rf_c3_det_context_conv3_2(o5) o7 = torch.cat((o2, o4, o6), 1) o8 = self.rf_c3_det_concat_relu(o7) cls32 = self.face_rpn_cls_score_stride32(o8) cls32_shape = cls32.shape cls32 = cls32.view( batchsize, 2, -1, cls32_shape[3] ) # torch.reshape(cls32, (batchsize, 2, -1, cls32_shape[3])) cls32 = self.face_rpn_cls_score_stride32_softmax(cls32) cls32 = cls32.view( batchsize, 4, -1, cls32_shape[3] ) # torch.reshape(cls32, (batchsize, 4, -1, cls32_shape[3])) bbox32 = self.face_rpn_bbox_pred_stride32(o8) landmark32 = self.face_rpn_landmark_pred_stride32(o8) p1 = self.rf_c2_lateral(x22) p2 = self.rf_c3_upsampling(o1) p2 = F.adaptive_avg_pool2d(p2, (p1.shape[2], p1.shape[3])) p3 = p1 + p2 p4 = self.rf_c2_aggr(p3) p5 = self.rf_c2_det_conv1(p4) p6 = self.rf_c2_det_context_conv1(p4) p7 = self.rf_c2_det_context_conv2(p6) p8 = self.rf_c2_det_context_conv3_1(p6) p9 = self.rf_c2_det_context_conv3_2(p8) p10 = torch.cat((p5, p7, p9), 1) p10 = self.rf_c2_det_concat_relu(p10) cls16 = self.face_rpn_cls_score_stride16(p10) cls16_shape = cls16.shape cls16 = cls16.view( batchsize, 2, -1, cls16_shape[3] ) # torch.reshape(cls16, (batchsize, 2, -1, cls16_shape[3])) cls16 = self.face_rpn_cls_score_stride16_softmax(cls16) cls16 = cls16.view( batchsize, 4, -1, cls16_shape[3] ) #cls16 = torch.reshape(cls16, (batchsize, 4, -1, cls16_shape[3])) bbox16 = self.face_rpn_bbox_pred_stride16(p10) landmark16 = self.face_rpn_landmark_pred_stride16(p10) q1 = self.rf_c1_red_conv(x10) q2 = self.rf_c2_upsampling(p4) q2 = F.adaptive_avg_pool2d(q2, (q1.shape[2], q1.shape[3])) q3 = q1 + q2 q4 = self.rf_c1_aggr(q3) q5 = self.rf_c1_det_conv1(q4) q6 = self.rf_c1_det_context_conv1(q4) q7 = self.rf_c1_det_context_conv2(q6) q8 = self.rf_c1_det_context_conv3_1(q6) q9 = self.rf_c1_det_context_conv3_2(q8) q10 = torch.cat((q5, q7, q9), 1) q10 = self.rf_c2_det_concat_relu(q10) cls8 = self.face_rpn_cls_score_stride8(q10) cls8_shape = cls8.shape cls8 = cls8.view( batchsize, 2, -1, cls8_shape[3] ) # torch.reshape(cls8, (batchsize, 2, -1, cls8_shape[3])) cls8 = self.face_rpn_cls_score_stride8_softmax(cls8) cls8 = cls8.view( batchsize, 4, -1, cls8_shape[3] ) #cls8 = torch.reshape(cls8, (batchsize, 4, -1, cls8_shape[3])) bbox8 = self.face_rpn_bbox_pred_stride8(q10) landmark8 = self.face_rpn_landmark_pred_stride8(q10) detections = [] detections.append(cls32) detections.append(bbox32) detections.append(landmark32) detections.append(cls16) detections.append(bbox16) detections.append(landmark16) detections.append(cls8) detections.append(bbox8) detections.append(landmark8) return detections
def _compute_grad_weights(self, grads): return F.adaptive_avg_pool2d(grads, 1)
def forward(self, x): batch_size, _, _, _ = x.shape mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] x[:, 0, :, :] -= mean[0] x[:, 0, :, :] /= std[0] x[:, 1, :, :] -= mean[1] x[:, 1, :, :] /= std[1] x[:, 2, :, :] -= mean[2] x[:, 2, :, :] /= std[2] # depth encoding / CoordConv # coord_scale = 1 / std[0] # coord_x = (torch.abs(torch.linspace(-1, 1, steps=x.size(3))) - 0.5) * coord_scale # x[:, 1] = coord_x.unsqueeze(0).expand_as(x[:, 1]) # coord_y = (torch.linspace(-1, 1, steps=x.size(2))) * coord_scale # x[:, 2] = coord_y.unsqueeze(-1).expand_as(x[:, 2]) e1 = self.encoder1(x) # ; print('e1', e1.size()) e2 = self.encoder2(e1) # ; print('e2', e2.size()) e3 = self.encoder3(e2) # ; print('e3', e3.size()) e4 = self.encoder4(e3) # ; print('e4', e4.size()) e5 = self.encoder5(e4) # ; print('e5', e5.size()) c = self.center(self.pool(e5)) # ; print('c', c.size()) d5 = self.decoder5(c, e5) # ; print('d5', d5.size()) d4 = self.decoder4(d5, e4) # ; print('d4', d4.size()) d3 = self.decoder3(d4, e3) # ; print('d3', d3.size()) d2 = self.decoder2(torch.cat((d3, e2), 1)) # ; print('d2', d2.size()) d1 = self.decoder1(d2, e1) # ; print('d1', d1.size()) d1_size = d1.size()[2:] upsampler = upsample(size=d1_size) u5 = upsampler(d5) u4 = upsampler(d4) u3 = upsampler(d3) u2 = upsampler(d2) d = torch.cat((d1, u2, u3, u4, u5), 1) # logit = self.logit(d)#;print(logit.size()) fuse_pixel = self.fuse_pixel(d) logit_pixel = ( self.logit_pixel1(d1), self.logit_pixel2(u2), self.logit_pixel3(u3), self.logit_pixel4(u4), self.logit_pixel5(u5), ) e = F.adaptive_avg_pool2d(e5, output_size=1).view(batch_size, -1) # image pool e = F.dropout(e, p=0.50, training=self.training) fuse_image = self.fuse_image(e) logit_image = self.logit_image(fuse_image).view(-1) # print(fuse_pixel.size()) # print(fuse_image.size()) logit = self.logit(torch.cat([ # fuse fuse_pixel, F.upsample(fuse_image.view(batch_size, -1, 1, 1, ), scale_factor=128, mode='nearest') ], 1)) return logit, logit_pixel, logit_image