def predict_frame(self, oriImg): test_image = Variable(T.transpose(T.transpose(T.unsqueeze(torch.from_numpy(oriImg).float(), 0), 2, 3), 1, 2),volatile=True).cuda() # print('Input Image Size: ', test_image.size()) # Multiplier: A pyramid based scaling method to evaluate image from various scales. multiplier = [x * self.model_['boxsize'] / oriImg.shape[0] for x in self.param_['scale_search']] # print('Image Scaling Multipliers: ', multiplier, '\n') # Heatmap and Parts Affinity Field Data Structures heatmap_avg = torch.zeros((len(multiplier),19,oriImg.shape[0], oriImg.shape[1])).cuda() paf_avg = torch.zeros((len(multiplier),38,oriImg.shape[0], oriImg.shape[1])).cuda() # Compute Keypoint and Part Affinity Fields # print('Generating Keypoint Heatmap and Parts Affinity Field Predictions...') for m in range(len(multiplier)): # Set Image Scale scale = multiplier[m] h = int(oriImg.shape[0] * scale) w = int(oriImg.shape[1] * scale) # print('[', 'Multiplier: ', scale, '-', (w, h), ']') # Pad Image Corresponding to Detection Stride pad_h = 0 if (h % self.model_['stride'] == 0) else self.model_['stride'] - (h % self.model_['stride']) pad_w = 0 if (w % self.model_['stride'] == 0) else self.model_['stride'] - (w % self.model_['stride']) new_h = h + pad_h new_w = w + pad_w # Apply Image Resize Transformation imageToTest = cv2.resize(oriImg, (0,0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) imageToTest_padded, pad = util.padRightDownCorner(imageToTest, self.model_['stride'], self.model_['padValue']) imageToTest_padded = np.transpose(np.float32(imageToTest_padded[:,:,:,np.newaxis]), (3,2,0,1))/256 - 0.5 # Generate Predictions feed = Variable(T.from_numpy(imageToTest_padded)).cuda() output1, output2 = self.model(feed) # Scale Prediction Outputs to Corresponding Image Size heatmap = nn.UpsamplingBilinear2d((oriImg.shape[0], oriImg.shape[1])).cuda()(output2) paf = nn.UpsamplingBilinear2d((oriImg.shape[0], oriImg.shape[1])).cuda()(output1) # print('Heatmap Dim:', heatmap.size()) # (1, Joint Count, X, Y) # print('PAF Dim:', paf.size()) # (1, PAF Count, X, Y) # print() heatmap_avg[m] = heatmap[0].data paf_avg[m] = paf[0].data # Compute Average Values heatmap_avg = T.transpose(T.transpose(T.squeeze(T.mean(heatmap_avg, 0)),0,1),1,2).cuda() paf_avg = T.transpose(T.transpose(T.squeeze(T.mean(paf_avg, 0)),0,1),1,2).cuda() # Convert to Numpy Type heatmap_avg = heatmap_avg.cpu().numpy() paf_avg = paf_avg.cpu().numpy() ''' # [Plotting & Visualizing Heatmap and PAF] # Plot Heapmap Probabilities # util.plot_heatmap(oriImg, heatmap_avg) # util.plot_joint_heatmap(oriImg, heatmap_avg, 1) # Plot Part-Affinity Vectors # util.plot_paf(oriImg, paf_avg, 4) ''' # Compute Heapmap Peaks (Using Non-Maximum Supression Method) all_peaks = [] peak_counter = 0 joint_pt_lookup = dict() for part in range(18): # Smooth out heapmap with gaussian kernel to remove high frequency variation. map_ori = heatmap_avg[:,:,part] map = gaussian_filter(map_ori, sigma=3) map_left = np.zeros(map.shape) map_left[1:,:] = map[:-1,:] map_right = np.zeros(map.shape) map_right[:-1,:] = map[1:,:] map_up = np.zeros(map.shape) map_up[:,1:] = map[:,:-1] map_down = np.zeros(map.shape) map_down[:,:-1] = map[:,1:] # Compute Peak Based on Binary Threshold peaks_binary = np.logical_and.reduce((map>=map_left, map>=map_right, map>=map_up, map>=map_down, map > self.param_['thre1'])) peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse # Derive Joint Keypoint Peaks with Mapped ID with Probabilities peaks_with_score = [x + (map_ori[x[1],x[0]],) for x in peaks] id = range(peak_counter, peak_counter + len(peaks)) peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))] # Create Joint Lookup Dictionary for pt in peaks_with_score_and_id: joint_pt_lookup[(pt[1], pt[0])] = pt[2:4] all_peaks.append(peaks_with_score_and_id) peak_counter += len(peaks) ''' # [Plot KeyPoint (with Probabilities)] # util.plot_key_point(oriImg, all_peaks) ''' # util.plot_all_keypoints(oriImg, all_peaks) # Load Joint Index and Sequences Data mapIdx = self.md.get_mapIdx() limbSeq = self.md.get_limbseq() # Compute Part-Affinity Fields connection_all = [] special_k = [] mid_num = 10 for k in range(len(mapIdx)): score_mid = paf_avg[:,:,[x-19 for x in mapIdx[k]]] # print(score_mid.shape) candA = all_peaks[limbSeq[k][0]-1] candB = all_peaks[limbSeq[k][1]-1] # print('Limb Seq Connection: [', limbSeq[k][0]-1, ',', limbSeq[k][1]-1, ']\n') nA = len(candA) nB = len(candB) indexA, indexB = limbSeq[k] if nA != 0 and nB != 0: connection_candidate = [] for i in range(nA): for j in range(nB): # Compute Joint Unit Vector vec = np.subtract(candB[j][:2], candA[i][:2]) norm = math.sqrt(vec[0]*vec[0] + vec[1]*vec[1]) # Assert: Check if the norm is a not a zero vector. if not np.any(norm): #print('Exception: Norm is a zero-vector') continue # TODO: Save this vector! vec = np.divide(vec, norm) #print('Unit Vector: [',i, ', ', j, ']: ', str(vec)) startend = zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), np.linspace(candA[i][1], candB[j][1], num=mid_num)) vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] for I in range(len(startend))]) vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] for I in range(len(startend))]) # Compute Components for Affinity Field Criterion score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1]) score_with_dist_prior = sum(score_midpts)/len(score_midpts) + min(0.5*oriImg.shape[0]/norm-1, 0) # Check PAF Criterion criterion1 = len(np.nonzero(score_midpts > self.param_['thre2'])[0]) > 0.8 * len(score_midpts) criterion2 = score_with_dist_prior > 0 if criterion1 and criterion2: connection_candidate.append([i, j, score_with_dist_prior, score_with_dist_prior+candA[i][2]+candB[j][2]]) connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True) connection = np.zeros((0,5)) for c in range(len(connection_candidate)): i, j, s = connection_candidate[c][0:3] if (i not in connection[:,3] and j not in connection[:,4]): connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]]) if len(connection) >= min(nA, nB): break connection_all.append(connection) #print('\nConnections:') #print(connection) #print() else: # Handle Exception for Potential Missing Part Entities special_k.append(k) connection_all.append([]) # Build Human Pose subset = -1 * np.ones((0, 20)) candidate = np.array([item for sublist in all_peaks for item in sublist]) for k in range(len(mapIdx)): if k not in special_k: partAs = connection_all[k][:,0] partBs = connection_all[k][:,1] indexA, indexB = np.array(limbSeq[k]) - 1 for i in range(len(connection_all[k])): found = 0 subset_idx = [-1, -1] for j in range(len(subset)): if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]: subset_idx[found] = j found += 1 if found == 1: j = subset_idx[0] if subset[j][indexB] != partBs[i]: subset[j][indexB] = partBs[i] subset[j][-1] += 1 subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] elif found == 2: # if found 2 and disjoint, merge them j1, j2 = subset_idx # print "found = 2" membership = ((subset[j1]>=0).astype(int) + (subset[j2]>=0).astype(int))[:-2] if len(np.nonzero(membership == 2)[0]) == 0: #merge subset[j1][:-2] += (subset[j2][:-2] + 1) subset[j1][-2:] += subset[j2][-2:] subset[j1][-2] += connection_all[k][i][2] subset = np.delete(subset, j2, 0) else: # as like found == 1 subset[j1][indexB] = partBs[i] subset[j1][-1] += 1 subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] # if find no partA in the subset, create a new subset elif not found and k < 17: row = -1 * np.ones(20) row[indexA] = partAs[i] row[indexB] = partBs[i] row[-1] = 2 row[-2] = sum(candidate[connection_all[k][i,:2].astype(int), 2]) + connection_all[k][i][2] subset = np.vstack([subset, row]) # Remove Rows of Subset with the Least Parts Available deleteIdx = []; for i in range(len(subset)): if subset[i][-1] < 4 or subset[i][-2]/subset[i][-1] < 0.4: deleteIdx.append(i) subset = np.delete(subset, deleteIdx, axis=0) # Setup Pose Dictionary Data Structure for Prediction Return joints_per_skeleton = [[] for i in range(len(subset))] for n in range(len(subset)): for i in range(18): cidx = subset[n][i] if cidx != -1: y = candidate[cidx.astype(int), 0] x = candidate[cidx.astype(int), 1] joints_per_skeleton[n].append([y, x]) else: joints_per_skeleton[n].append(None) return joints_per_skeleton
def forward(self, x): sources = list() arm_loc = list() arm_conf = list() feature_h = list() feature_l = list() # apply vgg up to conv4_3 relu for k in range(23): x = self.vgg[k](x) # s = self.L2Norm(x) x = self.L2Norm(x) conv4 = self.convs_4(x) conv4 = self.bn1(conv4) conv4 = self.relu(conv4) # conv4 = self.L2Norm(conv4) # apply vgg up to fc7 for k in range(23, len(self.vgg)): x = self.vgg[k](x) # feature_h.append(conv4) # apply spp for k, v in enumerate(self.spp): if k == 0: feature_h.append(v(conv4)) else: feature_h.append(v(x)) # apply bottleneck for k, v in enumerate(self.bottleneck): feature_l.append(self.relu(self.bn2(v(feature_h[k])))) # apply MSCA for i in range(len(feature_h)): if i == 0: k = i + 1 temp = [] for j in range(k, len(feature_h)): unsample = nn.UpsamplingBilinear2d(scale_factor=2**j) temp.append(unsample(feature_l[j])) temp.insert(i, feature_h[i]) sources.append( self.relu(self.bn2(self.convs(torch.cat(temp, 1))))) elif i == len(feature_h) - 1: k = i + 1 temp = [] for j in range(0, i): scale = (1 / 2.)**(i - j) downsample = nn.AdaptiveAvgPool2d( output_size=(int(feature_l[j].size()[2] * scale), int(feature_l[j].size()[3] * scale))) temp.append(downsample(feature_l[j])) # unsample = nn.UpsamplingBilinear2d(scale_factor=2 ** j) # temp.append(unsample(feature_l[j])) temp.insert(i, feature_h[i]) sources.append( self.relu(self.bn2(self.convs(torch.cat(temp, 1))))) else: k = i + 1 temp = [] for j in range(i): scale = (1 / 2.)**(i - j) downsample = nn.AdaptiveAvgPool2d( output_size=(int(feature_l[j].size()[2] * scale), int(feature_l[j].size()[3] * scale))) temp.append(downsample(feature_l[j])) for j in range(k, len(feature_h)): scale = 2**(j - i) unsample = nn.UpsamplingBilinear2d(scale_factor=scale) temp.append(unsample(feature_l[j])) temp.insert(i, feature_h[i]) sources.append( self.relu(self.bn2(self.convs(torch.cat(temp, 1))))) for (x, l, c) in zip(sources, self.pfp_loc, self.pfp_conf): # for x in sources: # self.convs = nn.Conv2d(x.shape[1], 256, kernel_size=3, stride=1, padding=1, bias=False) arm_loc.append(l(x).permute(0, 2, 3, 1).contiguous()) arm_conf.append(c(x).permute(0, 2, 3, 1).contiguous()) arm_loc = torch.cat([o.view(o.size(0), -1) for o in arm_loc], 1) arm_conf = torch.cat([o.view(o.size(0), -1) for o in arm_conf], 1) if self.status == "test": output = self.detect( arm_loc.view(arm_loc.size(0), -1, 4), # loc preds self.softmax( arm_conf.view(arm_conf.size(0), -1, self.num_classes)), # conf preds self.priors.type(type(x.data)) # default boxes ) else: output = (arm_loc.view(arm_loc.size(0), -1, 4), arm_conf.view(arm_conf.size(0), -1, self.num_classes), self.priors) return output
def __init__(self, in_channels, num_classes, base_channels=64): super(UNetPlusPlus, self).__init__() self.block_i = 0 # Downsampling self.op0_0 = ConvUnit(in_channels, base_channels) self.maxpool0 = nn.MaxPool2d(kernel_size=2, stride=2) self.op1_0 = ConvUnit(base_channels, base_channels * 2) self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2) self.op2_0 = ConvUnit(base_channels * 2, base_channels * 4) self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2) self.op3_0 = ConvUnit(base_channels * 4, base_channels * 8) self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2) self.op4_0 = ConvUnit(base_channels * 8, base_channels * 16) self.maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2) # Upsampling self.op0_1 = ConvUnit(base_channels + base_channels * 2, base_channels) self.upsample0_1 = nn.UpsamplingBilinear2d(scale_factor=2) self.op1_1 = ConvUnit(base_channels * 2 + base_channels * 4, base_channels * 2) self.upsample1_1 = nn.UpsamplingBilinear2d(scale_factor=2) self.op2_1 = ConvUnit(base_channels * 4 + base_channels * 8, base_channels * 4) self.upsample2_1 = nn.UpsamplingBilinear2d(scale_factor=2) self.op3_1 = ConvUnit(base_channels * 8 + base_channels * 16, base_channels * 8) self.upsample3_1 = nn.UpsamplingBilinear2d(scale_factor=2) self.op0_2 = ConvUnit(base_channels * 2 + base_channels * 2, base_channels) self.upsample0_2 = nn.UpsamplingBilinear2d(scale_factor=2) self.op1_2 = ConvUnit(base_channels * 2 * 2 + base_channels * 4, base_channels * 2) self.upsample1_2 = nn.UpsamplingBilinear2d(scale_factor=2) self.op2_2 = ConvUnit(base_channels * 4 * 2 + base_channels * 8, base_channels * 4) self.upsample2_2 = nn.UpsamplingBilinear2d(scale_factor=2) self.op0_3 = ConvUnit(base_channels * 3 + base_channels * 2, base_channels) self.upsample0_3 = nn.UpsamplingBilinear2d(scale_factor=2) self.op1_3 = ConvUnit(base_channels * 2 * 3 + base_channels * 4, base_channels * 2) self.upsample1_3 = nn.UpsamplingBilinear2d(scale_factor=2) self.op0_4 = ConvUnit(base_channels * 4 + base_channels * 2, base_channels) self.upsample0_4 = nn.UpsamplingBilinear2d(scale_factor=2) # Multi-task self.out_1 = nn.Conv2d(base_channels, num_classes, kernel_size=1, stride=1, padding=0) self.out_2 = nn.Conv2d(base_channels, num_classes, kernel_size=1, stride=1, padding=0) self.out_3 = nn.Conv2d(base_channels, num_classes, kernel_size=1, stride=1, padding=0) self.out_4 = nn.Conv2d(base_channels, num_classes, kernel_size=1, stride=1, padding=0) for m in self.modules(): if isinstance(m, nn.Conv2d): base.init_weights(m, init_type='kaiming') elif isinstance(m, nn.BatchNorm2d): base.init_weights(m, init_type='kaiming')
def __init__( self, cfg, in_channels, num_layers=3, num_filters=256, kernel_size=3, output_kernel_size=1, rot_output_dim=3, mask_output_dim=1, freeze=False, num_classes=1, rot_class_aware=False, mask_class_aware=False, norm="BN", num_gn_groups=32, ): super().__init__() self.freeze = freeze self.concat = cfg.MODEL.CDPN.ROT_HEAD.ROT_CONCAT assert kernel_size == 2 or kernel_size == 3 or kernel_size == 4, "Only support kenerl 2, 3 and 4" padding = 1 output_padding = 0 if kernel_size == 3: output_padding = 1 elif kernel_size == 2: padding = 0 assert output_kernel_size == 1 or output_kernel_size == 3, "Only support kenerl 1 and 3" if output_kernel_size == 1: pad = 0 elif output_kernel_size == 3: pad = 1 if self.concat: _, _, channels, _ = resnet_spec[cfg.MODEL.CDPN.BACKBONE.NUM_LAYERS] self.features = nn.ModuleList() self.features.append( nn.ConvTranspose2d( in_channels, num_filters, kernel_size=kernel_size, stride=2, padding=padding, output_padding=output_padding, bias=False, ) ) self.features.append(get_norm(norm, num_filters, num_gn_groups=num_gn_groups)) self.features.append(nn.ReLU(inplace=True)) for i in range(num_layers): self.features.append(nn.UpsamplingBilinear2d(scale_factor=2)) self.features.append( nn.Conv2d( num_filters + channels[-2 - i], num_filters, kernel_size=3, stride=1, padding=1, bias=False ) ) self.features.append(get_norm(norm, num_filters, num_gn_groups=num_gn_groups)) self.features.append(nn.ReLU(inplace=True)) self.features.append( nn.Conv2d(num_filters, num_filters, kernel_size=3, stride=1, padding=1, bias=False) ) self.features.append(get_norm(norm, num_filters, num_gn_groups=num_gn_groups)) self.features.append(nn.ReLU(inplace=True)) else: self.features = nn.ModuleList() self.features.append( nn.ConvTranspose2d( in_channels, num_filters, kernel_size=kernel_size, stride=2, padding=padding, output_padding=output_padding, bias=False, ) ) self.features.append(get_norm(norm, num_filters, num_gn_groups=num_gn_groups)) self.features.append(nn.ReLU(inplace=True)) for i in range(num_layers): # _in_channels = in_channels if i == 0 else num_filters # self.features.append( # nn.ConvTranspose2d(_in_channels, num_filters, kernel_size=kernel_size, stride=2, padding=padding, # output_padding=output_padding, bias=False)) # self.features.append(nn.BatchNorm2d(num_filters)) # self.features.append(nn.ReLU(inplace=True)) if i >= 1: self.features.append(nn.UpsamplingBilinear2d(scale_factor=2)) self.features.append( nn.Conv2d(num_filters, num_filters, kernel_size=3, stride=1, padding=1, bias=False) ) self.features.append(get_norm(norm, num_filters, num_gn_groups=num_gn_groups)) self.features.append(nn.ReLU(inplace=True)) self.features.append( nn.Conv2d(num_filters, num_filters, kernel_size=3, stride=1, padding=1, bias=False) ) self.features.append(get_norm(norm, num_filters, num_gn_groups=num_gn_groups)) self.features.append(nn.ReLU(inplace=True)) self.rot_output_dim = rot_output_dim if rot_class_aware: self.rot_output_dim *= num_classes self.mask_output_dim = mask_output_dim if mask_class_aware: self.mask_output_dim *= num_classes self.features.append( nn.Conv2d( num_filters, self.mask_output_dim + self.rot_output_dim, kernel_size=output_kernel_size, padding=pad, bias=True, ) ) for m in self.modules(): if isinstance(m, nn.Conv2d): normal_init(m, std=0.001) elif isinstance(m, (_BatchNorm, nn.GroupNorm)): constant_init(m, 1) elif isinstance(m, nn.ConvTranspose2d): normal_init(m, std=0.001)
def _preprocess(modules: List, up: bool) -> List: if up: modules.append(nn.UpsamplingBilinear2d(scale_factor=2)) return modules
def __init__(self, input_nc, output_nc): super(SEAUNet, self).__init__() self.down1 = nn.Sequential( nn.Conv2d(input_nc, 64, kernel_size=7, stride=2, padding=3), nn.ReLU(True), SEAB(4, 64, 64, 32)) self.down2 = nn.Sequential( nn.AvgPool2d(3, 2, 1), SEAB(4, 64, 64, 32), ) self.down3 = nn.Sequential( nn.AvgPool2d(3, 2, 1), SEAB(4, 64, 128, 32), ) self.down4 = nn.Sequential(nn.AvgPool2d(3, 2, 1), SEAB( 4, 128, 128, 32, )) self.down5 = nn.Sequential( nn.AvgPool2d(3, 2, 1), SEAB(8, 128, 256, 32), ) self.down6 = nn.Sequential( nn.AvgPool2d(3, 2, 1), SEAB(8, 256, 256, 32), ) self.down7 = nn.Sequential( nn.AvgPool2d(3, 2, 1), SEAB(8, 256, 512, 32), ) self.down8 = nn.Sequential( nn.AvgPool2d(3, 2, 1), SEAB(8, 512, 512, 32), ) self.up8 = nn.Sequential( nn.AvgPool2d(2, 2), SEAB(8, 512, 512, 32), nn.UpsamplingBilinear2d(scale_factor=2), ) self.up7 = nn.Sequential( SEAB(8, 1024, 512, 32), nn.UpsamplingBilinear2d(scale_factor=2), ) self.up6 = nn.Sequential( SEAB(8, 1024, 256, 32), nn.UpsamplingBilinear2d(scale_factor=2), ) self.up5 = nn.Sequential( SEAB(8, 512, 256, 32), nn.UpsamplingBilinear2d(scale_factor=2), ) self.up4 = nn.Sequential( SEAB(4, 512, 128, 32), nn.UpsamplingBilinear2d(scale_factor=2), ) self.up3 = nn.Sequential( SEAB(4, 256, 128, 32), nn.UpsamplingBilinear2d(scale_factor=2), ) self.up2 = nn.Sequential( SEAB(4, 256, 64, 32), nn.UpsamplingBilinear2d(scale_factor=2), ) self.up1 = nn.Sequential( SEAB(4, 128, 64, 32), nn.UpsamplingBilinear2d(scale_factor=2), ) self.up0 = nn.Sequential(SEAB(4, 128, 64, 32), nn.ConvTranspose2d(64, 64, 4, 2, 1)) self.to_rgb_Bsub = nn.Sequential( nn.Conv2d(32, output_nc, kernel_size=1, bias=False), nn.Tanh()) self.to_rgb_Bout = nn.Sequential( nn.Conv2d(32, output_nc, kernel_size=1, bias=False), nn.Tanh())
def __init__(self, cfg): super(deeplabv3plus, self).__init__() self.backbone = None self.backbone_layers = None input_channel = 2048 self.aspp = ASPP(dim_in=input_channel, dim_out=cfg.MODEL_ASPP_OUTDIM, rate=16 // cfg.MODEL_OUTPUT_STRIDE, bn_mom=cfg.TRAIN_BN_MOM) self.dropout1 = nn.Dropout(0.5) self.upsample4 = nn.UpsamplingBilinear2d(scale_factor=4) self.upsample_sub = nn.UpsamplingBilinear2d( scale_factor=cfg.MODEL_OUTPUT_STRIDE // 4) indim = 256 self.shortcut_conv = nn.Sequential( nn.Conv2d(indim, cfg.MODEL_SHORTCUT_DIM, cfg.MODEL_SHORTCUT_KERNEL, 1, padding=cfg.MODEL_SHORTCUT_KERNEL // 2, bias=True), SynchronizedBatchNorm2d(cfg.MODEL_SHORTCUT_DIM, momentum=cfg.TRAIN_BN_MOM), nn.ReLU(inplace=True), ) self.cat_conv = nn.Sequential( nn.Conv2d(cfg.MODEL_ASPP_OUTDIM + cfg.MODEL_SHORTCUT_DIM, cfg.MODEL_ASPP_OUTDIM, 3, 1, padding=1, bias=True), SynchronizedBatchNorm2d(cfg.MODEL_ASPP_OUTDIM, momentum=cfg.TRAIN_BN_MOM), nn.ReLU(inplace=True), nn.Dropout(0.5), nn.Conv2d(cfg.MODEL_ASPP_OUTDIM, cfg.MODEL_ASPP_OUTDIM, 3, 1, padding=1, bias=True), SynchronizedBatchNorm2d(cfg.MODEL_ASPP_OUTDIM, momentum=cfg.TRAIN_BN_MOM), nn.ReLU(inplace=True), nn.Dropout(0.1), ) self.cls_conv = nn.Conv2d(cfg.MODEL_ASPP_OUTDIM, cfg.MODEL_NUM_CLASSES, 1, 1, padding=0) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, SynchronizedBatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) self.backbone = build_backbone(cfg.MODEL_BACKBONE, os=cfg.MODEL_OUTPUT_STRIDE) self.backbone_layers = self.backbone.get_layers()
def __init__(self, input_channels=1, filter_sizes=None, l3_k_size=5, dilations=None): super().__init__() if filter_sizes is None: filter_sizes = [ 16, # First set of convs 16, # Second set of convs 32, # Dilated convs 16 ] # Transpose Convs if dilations is None: dilations = [2, 4] self.features = nn.Sequential( # 4 conv layers. nn.Conv2d(input_channels, filter_sizes[0], kernel_size=11, stride=1, padding=5, bias=True), nn.ReLU(inplace=True), nn.Conv2d(filter_sizes[0], filter_sizes[0], kernel_size=5, stride=1, padding=2, bias=True), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(filter_sizes[0], filter_sizes[1], kernel_size=5, stride=1, padding=2, bias=True), nn.ReLU(inplace=True), nn.Conv2d(filter_sizes[1], filter_sizes[1], kernel_size=5, stride=1, padding=2, bias=True), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), # Dilated convolutions. nn.Conv2d(filter_sizes[1], filter_sizes[2], kernel_size=l3_k_size, dilation=dilations[0], stride=1, padding=(l3_k_size // 2 * dilations[0]), bias=True), nn.ReLU(inplace=True), nn.Conv2d(filter_sizes[2], filter_sizes[2], kernel_size=l3_k_size, dilation=dilations[1], stride=1, padding=(l3_k_size // 2 * dilations[1]), bias=True), nn.ReLU(inplace=True), # Output layers nn.UpsamplingBilinear2d(scale_factor=2), nn.Conv2d(filter_sizes[2], filter_sizes[3], 3, padding=1), nn.ReLU(inplace=True), nn.UpsamplingBilinear2d(scale_factor=2), nn.Conv2d(filter_sizes[3], filter_sizes[3], 3, padding=1), nn.ReLU(inplace=True), ) self.pos_output = nn.Conv2d(filter_sizes[3], 1, kernel_size=1) self.cos_output = nn.Conv2d(filter_sizes[3], 1, kernel_size=1) self.sin_output = nn.Conv2d(filter_sizes[3], 1, kernel_size=1) self.width_output = nn.Conv2d(filter_sizes[3], 1, kernel_size=1) for m in self.modules(): if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)): nn.init.xavier_uniform_(m.weight, gain=1)
def __init__(self, input_nc, output_nc, ngf=64, use_dropout=False, n_blocks=4, padding_type='reflect', anime=False): assert (n_blocks >= 0) super(Generator, self).__init__() self.n_blocks = n_blocks instance_norm_layer = functools.partial(nn.InstanceNorm2d, affine=True, track_running_stats=False) model = [ nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias=True), nn.InstanceNorm2d(ngf, affine=True), nn.ReLU(True) ] n_downsampling = 2 for i in range(n_downsampling): # add downsampling layers mult = 2**i model += [ nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1, bias=True), nn.InstanceNorm2d(ngf * mult * 2, affine=True), nn.ReLU(True) ] mult = 2**n_downsampling for i in range(n_blocks): # add ResNet blocks model += [ ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=instance_norm_layer, use_dropout=use_dropout, use_bias=True) ] self.encoder = nn.Sequential(*model) # CAM self.gap = nn.AdaptiveAvgPool2d(1) self.gmp = nn.AdaptiveMaxPool2d(1) self.cam_w = nn.Parameter(torch.FloatTensor(ngf * mult, 1)) nn.init.xavier_uniform_(self.cam_w) self.cam_bias = nn.Parameter(torch.FloatTensor(1)) self.cam_bias.data.fill_(0) self.conv1x1 = nn.Sequential( nn.Conv2d(2 * ngf * mult, ngf * mult, 1, 1), nn.ReLU(), ) # MLP self.mlp = MLP(ngf * mult) adain_resblock = [] for i in range(n_blocks): adain_resblock.append(ResBlockByAdaLIN(ngf * mult, anime)) self.adain_resblocks = nn.ModuleList(adain_resblock) decoder = [] for i in range(n_downsampling): decoder.append(nn.UpsamplingBilinear2d(scale_factor=2)) decoder.append(nn.ReflectionPad2d(1)) decoder.append(nn.Conv2d(ngf * mult, ngf * mult // 2, 3, 1, 0)) decoder.append(LayerInstanceNorm(ngf * mult // 2)) decoder.append(nn.ReLU()) mult = mult // 2 decoder.extend([ nn.ReflectionPad2d(3), nn.Conv2d(ngf, output_nc, 7, 1), nn.Tanh() ]) self.decoder = nn.Sequential(*decoder)
def __init__(self, n_channels=16, n_blocks=1): super(SRResNet, self).__init__() self.n_channels = n_channels self.inConv = nn.Conv2d(in_channels=3, out_channels=self.n_channels, kernel_size=1, stride=1, padding=0, bias=True) self.inRelu = nn.ReLU(inplace=True) # image is 120*120*16 #encoder self.resBlocks1 = self.make_block_layers(n_blocks, DownResidual, self.n_channels) #60*60*32 self.resBlocks2 = self.make_block_layers( n_blocks, DownResidual, self.n_channels * 2) #30*30*64 self.resBlocks3 = self.make_block_layers( n_blocks, DownResidual, self.n_channels * 4) #15*15*128 self.resBlocks4 = self.make_block_layers(n_blocks, DownResidual, self.n_channels * 8) #8*8*256 self.resBlocks5 = self.make_block_layers( n_blocks, DownResidual, self.n_channels * 16) #4*4*512 self.resBlocks6 = self.make_block_layers( n_blocks, DownResidual, self.n_channels * 32) #2*2*1024 # FC self.fc1 = nn.Linear(2 * 2 * 1024, 1 * 1 * 1024) #self.fc_bn1 = nn.BatchNorm2d(1*1*1024) self.fc_relu1 = nn.ReLU(inplace=True) self.fc2 = nn.Linear(1 * 1 * 1024, 4 * 4 * 512) #self.fc_bn1 = nn.BatchNorm2d(4*4*512) self.fc_relu2 = nn.ReLU(inplace=True) # FC #decoder self.resBlocks7 = self.make_block_layers( n_blocks, UpResidual, self.n_channels * 32) # 8*8*256 self.resBlocks8 = self.make_block_layers( n_blocks, UpResidual, self.n_channels * 16) # 16*16*128 self.resBlocks9 = self.make_block_layers( n_blocks, UpResidual, self.n_channels * 8) # 32*32*64 self.resBlocks10 = self.make_block_layers( n_blocks, UpResidual, self.n_channels * 4) # 64*64*32 self.resBlocks11 = self.make_block_layers( n_blocks, UpResidual, self.n_channels * 2) # 128*128*16 self.downsample = nn.UpsamplingBilinear2d(size=(120, 120)) #120*120 self.Conv1 = nn.Conv2d(in_channels=16, out_channels=3, kernel_size=1, padding=0, bias=True) self.bn1 = nn.BatchNorm2d(3) self.relu1 = nn.ReLU(inplace=True) self.outConv = nn.Conv2d(in_channels=3, out_channels=3, kernel_size=1, padding=0, bias=True) self.tan = nn.Tanh()
def __init__(self, mode='combined', activate=None): super(SegNet, self).__init__() _, self.data_type = get_device() self.mode = mode self.activate = activate self.dropout = nn.Dropout2d(0.3) self.maxpool2d = nn.MaxPool2d(2) self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=True) self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=True) self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1, bias=True) self.conv4 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1, bias=True) self.deconv1 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=1, padding=1, bias=True) self.deconv2 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=1, padding=1, bias=True) self.deconv3 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=1, padding=1, bias=True) self.deconv4 = nn.ConvTranspose2d(64, 3, kernel_size=3, stride=1, padding=1, bias=True) self.batch_norm = nn.BatchNorm2d(3) self.threshold1 = nn.Threshold(0.25, 0) self.threshold2 = nn.Threshold(0.5, 0) self.threshold3 = nn.Threshold(0.75, 0) self.maxpool1d = nn.MaxPool1d(3, stride=2, return_indices=True)
def main(): """Create the model and start the training.""" h, w = map(int, args.input_size.split(',')) input_size = (h, w) h, w = map(int, args.input_size_target.split(',')) input_size_target = (h, w) h, w = map(int, args.com_size.split(',')) com_size = (h, w) ############################ #validation data testloader = data.DataLoader(cityscapesDataSet(args.data_dir_target, args.data_list_target_val, crop_size=input_size, mean=IMG_MEAN, scale=False, mirror=False, set=args.set_val), batch_size=1, shuffle=False, pin_memory=True) with open('./dataset/cityscapes_list/info.json', 'r') as fp: info = json.load(fp) mapping = np.array(info['label2train'], dtype=np.int) label_path_list = './dataset/cityscapes_list/label.txt' gt_imgs = open(label_path_list, 'r').read().splitlines() gt_imgs = [join('./data/Cityscapes/data/gtFine/val', x) for x in gt_imgs] interp_val = nn.UpsamplingBilinear2d(size=(com_size[1], com_size[0])) ############################ cudnn.enabled = True # Create network if args.model == 'DeepLab': model = Res_Deeplab(num_classes=args.num_classes) # if args.restore_from[:4] == 'http' : # saved_state_dict = model_zoo.load_url(args.restore_from) # else: saved_state_dict = torch.load(args.restore_from) #new_params = model.state_dict().copy() # for i in saved_state_dict: # # Scale.layer5.conv2d_list.3.weight # i_parts = i.split('.') # # print i_parts # if not args.num_classes == 19 or not i_parts[1] == 'layer5': # new_params['.'.join(i_parts[1:])] = saved_state_dict[i] # print i_parts model.load_state_dict(saved_state_dict) model.train() model.cuda(args.gpu) cudnn.benchmark = True # init D model_D1 = FCDiscriminator(num_classes=args.num_classes) model_D1.train() model_D1.cuda(args.gpu) if not os.path.exists(args.snapshot_dir): os.makedirs(args.snapshot_dir) trainloader = data.DataLoader(GTA5DataSet(args.data_dir, args.data_list, max_iters=args.num_steps * args.iter_size * args.batch_size, crop_size=input_size, scale=args.random_scale, mirror=args.random_mirror, mean=IMG_MEAN), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) trainloader_iter = enumerate(trainloader) targetloader = data.DataLoader(cityscapesDataSet( args.data_dir_target, args.data_list_target, max_iters=args.num_steps * args.iter_size * args.batch_size, crop_size=input_size_target, scale=False, mirror=args.random_mirror, mean=IMG_MEAN, set=args.set), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) targetloader_iter = enumerate(targetloader) # implement model.optim_parameters(args) to handle different models' lr setting optimizer = optim.SGD(model.optim_parameters(args), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer.zero_grad() optimizer_D1 = optim.Adam(model_D1.parameters(), lr=args.learning_rate_D, betas=(0.9, 0.99)) optimizer_D1.zero_grad() bce_loss = torch.nn.BCEWithLogitsLoss() interp = nn.UpsamplingBilinear2d(size=(input_size[1], input_size[0])) interp_target = nn.UpsamplingBilinear2d(size=(input_size_target[1], input_size_target[0])) # labels for adversarial training source_label = 0 target_label = 1 for i_iter in range(args.num_steps): loss_seg_value1 = 0 loss_adv_target_value1 = 0 loss_D_value1 = 0 optimizer.zero_grad() adjust_learning_rate(optimizer, i_iter) optimizer_D1.zero_grad() adjust_learning_rate_D(optimizer_D1, i_iter) for sub_i in range(args.iter_size): # train G for param in model_D1.parameters(): param.requires_grad = False _, batch = next(trainloader_iter) images_source, labels, _, _ = batch images_source = Variable(images_source).cuda(args.gpu) pred1, feature = model(images_source) pred1 = interp(pred1) loss_seg1 = loss_calc(pred1, labels, args.gpu) D_out1 = model_D1(feature) loss_D1_source = bce_loss( D_out1, Variable( torch.FloatTensor( D_out1.data.size()).fill_(source_label)).cuda( args.gpu)) _, batch = next(targetloader_iter) images_target, _, _ = batch images_target = Variable(images_target).cuda(args.gpu) _, feature_target = model(images_target) D_out1 = model_D1(feature_target) loss_D1_target = bce_loss( D_out1, Variable( torch.FloatTensor( D_out1.data.size()).fill_(target_label)).cuda( args.gpu)) loss = loss_seg1 + args.lambda_adv_target1 * (-loss_D1_source - loss_D1_target) loss.backward() loss_seg_value1 += loss_seg1.data.item() loss_adv_target = loss_D1_source + loss_D1_target loss_adv_target_value1 = loss_adv_target.data.item() optimizer.step() # train D for param in model_D1.parameters(): param.requires_grad = True pred1, feature = model(images_source) feature = feature.detach() D_out1 = model_D1(feature) loss_D1_source = bce_loss( D_out1, Variable( torch.FloatTensor( D_out1.data.size()).fill_(source_label)).cuda( args.gpu)) _, feature_target = model(images_target) feature_target = feature_target.detach() D_out1 = model_D1(feature_target) loss_D1_target = bce_loss( D_out1, Variable( torch.FloatTensor( D_out1.data.size()).fill_(target_label)).cuda( args.gpu)) loss_D1 = loss_D1_source + loss_D1_target loss_D1.backward() loss_D_value1 = loss_D1.data.item() optimizer_D1.step() print('exp = {}'.format(args.snapshot_dir)) print( 'iter = {0:8d}/{1:8d}, loss_seg1 = {2:.3f} loss_adv1 = {3:.3f} loss_D1 = {4:.3f}' .format(i_iter, args.num_steps, loss_seg_value1, loss_adv_target_value1, loss_D_value1)) if i_iter >= args.num_steps_stop - 1: print('save model ...') torch.save( model.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '.pth')) torch.save( model_D1.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '_D1.pth')) break if i_iter % args.save_pred_every == 0 and i_iter != 0: print('taking snapshot ...') torch.save( model.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '.pth')) torch.save( model_D1.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '_D1.pth')) hist = np.zeros((19, 19)) f = open(args.results_dir, 'a') for index, batch in enumerate(testloader): print(index) image, _, name = batch output1, output2 = model( Variable(image, volatile=True).cuda(args.gpu)) pred = interp_val(output1) pred = pred[0].permute(1, 2, 0) # print(pred.shape) pred = torch.max(pred, 2)[1].byte() pred = pred.data.cpu().numpy() label = Image.open(gt_imgs[index]) label = np.array(label.resize(com_size, Image.NEAREST)) label = label_mapping(label, mapping) # print("fengmao,",np.max(label),np.max(pred)) hist += fast_hist(label.flatten(), pred.flatten(), 19) mIoUs = per_class_iu(hist) mIoU = round(np.nanmean(mIoUs) * 100, 2) print(mIoU) f.write('i_iter:{:d}, miou:{:0.3f} \n'.format(i_iter, mIoU)) f.close()
pytorch_list = []; for i in img_list: img = np.zeros((513,513,3)); img_temp = cv2.imread(os.path.join(im_path,i[:-1]+'.jpg')).astype(float) img_original = img_temp img_temp[:,:,0] = img_temp[:,:,0] - 104.008 img_temp[:,:,1] = img_temp[:,:,1] - 116.669 img_temp[:,:,2] = img_temp[:,:,2] - 122.675 img[:img_temp.shape[0],:img_temp.shape[1],:] = img_temp gt = cv2.imread(os.path.join(gt_path,i[:-1]+'.png'),0) gt[gt==255] = 0 with torch.no_grad(): output = model(Variable(torch.from_numpy(img[np.newaxis, :].transpose(0,3,1,2)).float(),volatile = True).cuda(gpu0)) interp = nn.UpsamplingBilinear2d(size=(513, 513)) output = interp(output[3]).cpu().data[0].numpy() output = output[:,:img_temp.shape[0],:img_temp.shape[1]] output = output.transpose(1,2,0) output = np.argmax(output,axis = 2) if args['--visualize']: plt.subplot(3, 1, 1) plt.imshow(img_original) plt.subplot(3, 1, 2) plt.imshow(gt) plt.subplot(3, 1, 3) plt.imshow(output) plt.show() iou_pytorch = get_iou(output,gt)
def __init__(self, ver_dim, seg_dim, spherical_used=False, fcdim=256, s8dim=128, s4dim=64, s2dim=32, raw_dim=32): super(Resnet18, self).__init__() # Load the pretrained weights, remove avg pool # layer and get the output stride of 8 resnet18_8s = resnet18(fully_conv=True, pretrained=True, output_stride=8, remove_avg_pool_layer=True) self.ver_dim = ver_dim self.seg_dim = seg_dim self.spherical_used = spherical_used # Randomly initialize the 1x1 Conv scoring layer resnet18_8s.fc = nn.Sequential( nn.Conv2d(resnet18_8s.inplanes, fcdim, 3, 1, 1, bias=False), nn.BatchNorm2d(fcdim), nn.ReLU(True)) self.resnet18_8s = resnet18_8s # x8s->128 self.conv8s = nn.Sequential( nn.Conv2d(128 + fcdim, s8dim, 3, 1, 1, bias=False), nn.BatchNorm2d(s8dim), nn.LeakyReLU(0.1, True)) self.up8sto4s = nn.UpsamplingBilinear2d(scale_factor=2) # x4s->64 self.conv4s = nn.Sequential( nn.Conv2d(64 + s8dim, s4dim, 3, 1, 1, bias=False), nn.BatchNorm2d(s4dim), nn.LeakyReLU(0.1, True)) # x2s->64 self.conv2s = nn.Sequential( nn.Conv2d(64 + s4dim, s2dim, 3, 1, 1, bias=False), nn.BatchNorm2d(s2dim), nn.LeakyReLU(0.1, True)) self.up4sto2s = nn.UpsamplingBilinear2d(scale_factor=2) self.convraw = nn.Sequential( nn.Conv2d(3 + s2dim, raw_dim, 3, 1, 1, bias=False), nn.BatchNorm2d(raw_dim), nn.LeakyReLU(0.1, True), nn.Conv2d(raw_dim, seg_dim + ver_dim, 1, 1)) if self.spherical_used: self.convsignX = nn.Sequential( nn.Conv2d(3 + s2dim, raw_dim, 3, 1, 1, bias=False), nn.BatchNorm2d(raw_dim), nn.LeakyReLU(0.1, True), nn.Conv2d(raw_dim, ver_dim, 1, 1) # N * 4, label have 4 dims ) self.convsignY = nn.Sequential( nn.Conv2d(3 + s2dim, raw_dim, 3, 1, 1, bias=False), nn.BatchNorm2d(raw_dim), nn.LeakyReLU(0.1, True), nn.Conv2d(raw_dim, ver_dim, 1, 1) # N * 4, label have 4 dims ) self.up2storaw = nn.UpsamplingBilinear2d(scale_factor=2)
def __init__(self, nz=100, ngf=160, nc=3, init_type="normal"): super(_Generator_ResizeConv, self).__init__() self.init_type = init_type self.normfunc = nn.BatchNorm2d self.model = nn.Sequential( OrderedDict([ # ----- layer 1 ("unsample1", nn.UpsamplingBilinear2d(scale_factor=2)), ("conv1", nn.Conv2d(in_channels=nz, out_channels=ngf * 16, kernel_size=3, stride=1, padding=1, bias=False)), ("bn1", self.normfunc(num_features=ngf * 16)), ("relu1", nn.ReLU(inplace=True)), # ----- layer 2 ("unsample2", nn.UpsamplingBilinear2d(scale_factor=4)), ("conv2", nn.Conv2d(in_channels=ngf * 16, out_channels=ngf * 8, kernel_size=3, stride=1, padding=1, bias=False)), ("bn2", self.normfunc(num_features=ngf * 8)), ("relu2", nn.ReLU(inplace=True)), # ----- layer 3 ("unsample3", nn.UpsamplingBilinear2d(scale_factor=2)), ("conv3", nn.Conv2d(in_channels=ngf * 8, out_channels=ngf * 4, kernel_size=3, stride=1, padding=1, bias=False)), ("bn3", self.normfunc(num_features=ngf * 4)), ("relu3", nn.ReLU(inplace=True)), # ----- layer 4 ("unsample4", nn.UpsamplingBilinear2d(scale_factor=2)), ("conv4", nn.Conv2d(in_channels=ngf * 4, out_channels=ngf * 2, kernel_size=3, stride=1, padding=1, bias=False)), ("bn4", self.normfunc(num_features=ngf * 2)), ("relu4", nn.ReLU(inplace=True)), # ----- layer 5 ("unsample5", nn.UpsamplingBilinear2d(scale_factor=2)), ("conv5", nn.Conv2d(in_channels=ngf * 2, out_channels=ngf, kernel_size=3, stride=1, padding=1, bias=False)), ("bn5", self.normfunc(num_features=ngf)), ("relu5", nn.ReLU(inplace=True)), ("unsample6", nn.UpsamplingBilinear2d(scale_factor=2)), ("conv6", nn.Conv2d(in_channels=ngf, out_channels=nc, kernel_size=3, stride=1, padding=1, bias=False)), ("tanh", nn.Tanh()), ]))
def __init__(self, in_channels, out_channels, kernel_size=3, activation=None, upsampling=1): conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=kernel_size // 2) upsampling = nn.UpsamplingBilinear2d(scale_factor=upsampling) if upsampling > 1 else nn.Identity() activation = Activation(activation) super().__init__(conv2d, upsampling, activation)
def forward(self, x, skip): upsample = nn.UpsamplingBilinear2d(size=skip.size()[2:]) x = upsample(x) skip = self.conv_channel_adjust(skip) fused = self.conv_fusion(x + skip) return fused
def __init__(self, anchor_num=3, class_num=20): super(YoloNano, self).__init__() self.anchor_num = anchor_num self.class_num = class_num self.c1 = nn.Sequential( nn.Conv2d(3, 12, 3, padding=1), #(12,416,416) nn.BatchNorm2d(12), nn.LeakyReLU()) self.c2 = nn.Sequential( nn.Conv2d(12, 24, 3, stride=2, padding=1), #(24,208,208) nn.BatchNorm2d(24), nn.LeakyReLU()) self.pep1 = PEP(24, 7) #(24,208,208) self.ep1 = EP(24, 70) #(70,104,104) self.pep2 = PEP(70, 25) #(70,104,104) self.pep3 = PEP(70, 24) #(70,104,104) self.ep2 = EP(70, 150) #(150,52,52) self.pep4 = PEP(150, 56) #(150,52,52) self.c3 = nn.Sequential( nn.Conv2d(150, 150, 1, stride=1), #(150,52,52) nn.BatchNorm2d(150), nn.LeakyReLU()) self.fca = FCA(150, 8) self.pep5 = PEP(150, 73) # (150,52,52) self.pep6 = PEP(150, 71) # (150,52,52) self.pep7 = PEP(150, 75) # (150,52,52) self.ep3 = EP(150, 325) # (325,26,26) self.pep8 = PEP(325, 132) # (325,26,26) self.pep9 = PEP(325, 124) # (325,26,26) self.pep10 = PEP(325, 141) # (325,26,26) self.pep11 = PEP(325, 140) # (325,26,26) self.pep12 = PEP(325, 137) # (325,26,26) self.pep13 = PEP(325, 135) # (325,26,26) self.pep14 = PEP(325, 133) # (325,26,26) self.pep15 = PEP(325, 140) # (325,26,26) self.ep4 = EP(325, 545) # (545,13,13) self.pep16 = PEP(545, 276) # (545,13,13) self.c4 = nn.Sequential( nn.Conv2d(545, 230, 1), #(230,13,13) nn.BatchNorm2d(230), nn.LeakyReLU()) self.ep5 = EP(230, 489, stride=1) # (489,13,13) self.pep17 = PEP(489, 213, output_channel=469) # (469,13,13) self.c5 = nn.Sequential( nn.Conv2d(469, 189, 1), #(189,13,13) nn.BatchNorm2d(189), nn.LeakyReLU()) self.c6 = nn.Sequential( nn.Conv2d(189, 105, 1), #(189,13,13) nn.BatchNorm2d(105), nn.LeakyReLU()) self.upsample1 = nn.UpsamplingBilinear2d(size=(26, 26)) self.pep18 = PEP(430, 113, output_channel=325) # (325,26,26) self.pep19 = PEP(325, 113, output_channel=207) # (207,26,26) self.c7 = nn.Sequential( nn.Conv2d(207, 98, 1), #(98,26,26) nn.BatchNorm2d(98), nn.LeakyReLU()) self.c8 = nn.Sequential( nn.Conv2d(98, 47, 1), #(47,26,26) nn.BatchNorm2d(47), nn.LeakyReLU()) self.upsample2 = nn.UpsamplingBilinear2d(size=(52, 52)) self.pep20 = PEP(197, 58, output_channel=122) # (207,52,52) self.pep21 = PEP(122, 52, output_channel=87) # (87,52,52) self.pep22 = PEP(87, 47, output_channel=93) # (93,52,52) self.c9 = nn.Conv2d(93, self.anchor_num * (5 + self.class_num), 1) #(75,52,52) self.ep6 = EP(98, 183, stride=1) # (183,26,26) self.c10 = nn.Conv2d(183, self.anchor_num * (5 + self.class_num), 1) #(75,26,26) self.ep7 = EP(189, 462, stride=1) # (462,13,13) self.c11 = nn.Conv2d(462, self.anchor_num * (5 + self.class_num), 1) #(75,13,13) anchors52 = [[61, 9], [17, 22], [22, 50]] # 52x52 anchors26 = [[36, 30], [43, 65], [68, 41]] # 26x26 anchors13 = [[156, 134], [67, 107], [108, 63]] # 13x13 self.yolo52 = YOLOLayer(anchors52, class_num) self.yolo26 = YOLOLayer(anchors26, class_num) self.yolo13 = YOLOLayer(anchors13, class_num)
def forward(self, x): h, w = x.shape[2:] x = self.relu(self.conv1(x)) x = self.relu(self.conv2(x)) C1 = x # C1: [-1, 64, h, w] x = self.pool(x) if self.dropout: x = self.drop(x) x = self.relu(self.conv3(x)) x = self.relu(self.conv4(x)) C2 = x # C2: [-1, 128, h/2, w/2] x = self.pool(x) if self.dropout: x = self.drop(x) x = self.relu(self.conv5(x)) x = self.relu(self.conv6(x)) x = self.relu(self.conv7(x)) C3 = x # C3: [-1, 256, h/4, w/4] x = self.pool(x) if self.dropout: x = self.drop(x) x = self.relu(self.conv8(x)) x = self.relu(self.conv9(x)) x = self.relu(self.conv10(x)) C4 = x # C4: [-1, 512, h/8, w/8] x = self.pool(x) if self.dropout: x = self.drop(x) x = self.relu(self.conv11(x)) x = self.relu(self.conv12(x)) x = self.relu(self.conv13(x)) if self.dropout: x = self.drop(x) C5 = x # C5: [-1, 512, h/16, w/16] # C1 C2 C1 = self.conv14(C1) C1 = self.relu(self.bn1(C1)) C2 = self.conv15(C2) C2 = self.relu(self.bn2(C2)) C12 = concatenate([C1, C2], 1) # C12: [-1, 64+128, h, w] C12 = self.conv16(C12) C12 = self.relu(self.bn3(C12)) # C12: [-1, 64, h, w] C3_cfe = self.relu( self.bn4( concatenate([ self.conv17(C3), self.conv18(C3), self.conv19(C3), self.conv20(C3) ], 1))) C4_cfe = self.relu( self.bn5( concatenate([ self.conv21(C4), self.conv22(C4), self.conv23(C4), self.conv24(C4) ], 1))) C5_cfe = self.relu( self.bn6( concatenate([ self.conv25(C5), self.conv26(C5), self.conv27(C5), self.conv28(C5) ], 1))) C345 = concatenate([C3_cfe, C4_cfe, C5_cfe], 1) # C345: [-1, 32*4*3, h/4, w/4] # CA if self.with_CA: h2, w2 = C345.shape[2:] CA = nn.AvgPool2d((h2, w2))(C345).view(-1, 384) CA = self.linear1(CA) CA = self.relu(CA) CA = self.linear2(CA) CA = self.sigmoid(CA).view(-1, 384, 1, 1).repeat(1, 1, h2, w2) C345 = CA * C345 C345 = self.conv29(C345) C345 = self.relu(self.bn7(C345)) C345 = nn.UpsamplingBilinear2d(size=(h, w))( C345) # C345: [-1, 64, h, w] if self.with_SA: attention1 = self.relu(self.bn8( self.conv30(C345))) # [-1, 32, h, w] attention1 = self.relu(self.bn9( self.conv31(attention1))) # [-1, 1, h, w] attention2 = self.relu(self.bn10( self.conv32(C345))) # [-1, 32, h, w] attention2 = self.relu(self.bn11( self.conv33(attention2))) # [-1, 1, h, w] SA = attention1 + attention2 SA = self.sigmoid(SA) # [-1, 1, h, w] SA = SA.repeat([1, 64, 1, 1]) C12 = SA * C12 # [-1, 64, h, w] fea = torch.cat([C12, C345], 1) # [-1, 128, h, w] x = self.conv34(fea) return x
def __init__(self, in_ch, out_ch): super(upbn, self).__init__() self.up = nn.UpsamplingBilinear2d(scale_factor=2) self.conv = double_convbn(in_ch, out_ch)
def __init__(self, num_classes=23, hparams=None): super().__init__() self.hparams = hparams ####################################################################### # YOUR CODE # ####################################################################### # filters = [64, 128, 256, 512, 1024] filters = [32, 64, 128, 256, 512] self.model = nn.Sequential( # 64*236*236 -> 64*118*118 nn.Conv2d(3, filters[0], 3, padding=1), nn.BatchNorm2d(filters[0]), nn.ReLU(), nn.MaxPool2d(2, 2), # nn.Dropout(p=0.25), # 128*116*116 -> 128*58*58 nn.Conv2d(filters[0], filters[1], 3, padding=1), nn.BatchNorm2d(filters[1]), nn.ReLU(), nn.MaxPool2d(2, 2), # 256*56*56 -> 256*28*28 nn.Conv2d(filters[1], filters[2], 3, padding=1), nn.BatchNorm2d(filters[2]), nn.ReLU(), nn.MaxPool2d(2, 2), # 512*26*26 -> 512*13*13 nn.Conv2d(filters[2], filters[3], 3, padding=1), nn.BatchNorm2d(filters[3]), nn.ReLU(), nn.MaxPool2d(2, 2), # 1024*12*12 -> 1024*6*6 nn.Conv2d(filters[3], filters[4], 3, padding=1), nn.BatchNorm2d(filters[4]), nn.ReLU(), # ------------ start up-sampling ------------# # 1024*12*12 -> 512*11*11 nn.UpsamplingBilinear2d(scale_factor=2), nn.Conv2d(filters[4], filters[3], 1), nn.ReLU(), # 512*22*22 -> 256*20*20 nn.UpsamplingBilinear2d(scale_factor=2), nn.Conv2d(filters[3], filters[2], 1), nn.ReLU(), # 256*40*40 -> 128*38*38 nn.UpsamplingBilinear2d(scale_factor=2), nn.Conv2d(filters[2], filters[1], 1), nn.ReLU(), # 128*76*76 -> 64*74*74 nn.UpsamplingBilinear2d(scale_factor=2), nn.Conv2d(filters[1], filters[0], 1), nn.ReLU(), nn.Conv2d(filters[0], num_classes, 1))
def __init__(self, in_ch, out_ch): super(uphalf, self).__init__() self.up = nn.UpsamplingBilinear2d(scale_factor=2) self.conv = half(in_ch, out_ch)
def __init__(self, block=Bottleneck): super(ResNetUpSample, self).__init__() self.inplanes = 64 self.layer2 = self._make_layer(block, planes=128, blocks=3) self.upsample = nn.UpsamplingBilinear2d(scale_factor=2)
def runIter(args, encoder, decoder, x, y_mask, sw_mask, crits, optims, mode='train', loss=None, prev_hidden_temporal_list=None, prev_mask=None, last_frame=False): """ Runs forward a batch """ mask_siou = crits enc_opt, dec_opt = optims T = args.maxseqlen hidden_spatial = None out_masks = [] if mode == 'train': encoder.train(True) decoder.train(True) else: encoder.train(False) decoder.train(False) feats = encoder(x, nlb=args.NLB) hidden_temporal_list = [] # loop over sequence length and get predictions for t in range(0, T): #prev_hidden_temporal_list is a list with the hidden state for all instances from previous time instant #If this is the first frame of the sequence, hidden_temporal is initialized to None. Otherwise, it is set with the value from previous time instant. if prev_hidden_temporal_list is not None: hidden_temporal = prev_hidden_temporal_list[t] if args.only_temporal: hidden_spatial = None else: hidden_temporal = None mask_lstm = [] maxpool = nn.MaxPool2d((2, 2), ceil_mode=True) prev_mask_instance = prev_mask[:, t, :] prev_mask_instance = prev_mask_instance.view( prev_mask_instance.size(0), 1, x.data.size(2), -1) prev_mask_instance = maxpool(prev_mask_instance) for ii in range(len(feats)): prev_mask_instance = maxpool(prev_mask_instance) mask_lstm.append(prev_mask_instance) mask_lstm = list(reversed(mask_lstm)) #The decoder receives two hidden state variables: hidden_spatial (a tuple, with hidden_state and cell_state) which refers to the #hidden state from the previous object instance from the same time instant, and hidden_temporal which refers to the hidden state from the same #object instance from the previous time instant. out_mask, hidden = decoder(feats, mask_lstm, hidden_spatial, hidden_temporal) hidden_tmp = [] for ss in range(len(hidden)): hidden_tmp.append(hidden[ss][0]) hidden_spatial = hidden hidden_temporal_list.append(hidden_tmp) upsample_match = nn.UpsamplingBilinear2d(size=(x.size()[-2], x.size()[-1])) out_mask = upsample_match(out_mask) out_mask = out_mask.view(out_mask.size(0), -1) # get predictions in list to concat later out_masks.append(out_mask) # concat all outputs into single tensor to compute the loss t = len(out_masks) out_masks = torch.cat(out_masks, 1).view(out_mask.size(0), len(out_masks), -1) sw_mask = Variable(torch.from_numpy( sw_mask.data.cpu().numpy()[:, 0:t])).contiguous().float() if args.use_gpu: sw_mask = sw_mask.cuda() else: out_masks = out_masks.contiguous() #loss is masked with sw_mask loss_mask_iou = mask_siou(y_mask.view(-1, y_mask.size()[-1]), out_masks.view(-1, out_masks.size()[-1]), sw_mask.view(-1, 1)) loss_mask_iou = torch.mean(loss_mask_iou) # total loss is the weighted sum of all terms if loss is None: loss = args.iou_weight * loss_mask_iou else: loss += args.iou_weight * loss_mask_iou if last_frame: enc_opt.zero_grad() dec_opt.zero_grad() decoder.zero_grad() encoder.zero_grad() if mode == 'train': loss.backward() dec_opt.step() if args.update_encoder: enc_opt.step() #pytorch 0.4 #losses = [loss.data[0], loss_mask_iou.data[0]] #pytorch 1.0 losses = [loss.data.item(), loss_mask_iou.data.item()] out_masks = torch.sigmoid(out_masks) outs = out_masks.data del loss_mask_iou, feats, x, y_mask, sw_mask if last_frame: del loss loss = None return loss, losses, outs, hidden_temporal_list
def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3, n_blocks=9, norm_layer=nn.BatchNorm2d, padding_type='reflect', dilation=1, interpolated_conv=False): assert (n_blocks >= 0) super(GlobalGenerator, self).__init__() activation = nn.ReLU(True) model = [ nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0), norm_layer(ngf), activation ] ### downsample for i in range(n_downsampling): mult = 2**i if interpolated_conv is True: model += [ nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=1, padding=1), nn.MaxPool2d(2), norm_layer(ngf * mult * 2), nn.ReLU(True), ] else: model += [ nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1), norm_layer(ngf * mult * 2), activation ] ### resnet blocks mult = 2**n_downsampling for i in range(n_blocks): model += [ ResnetBlock(ngf * mult, padding_type=padding_type, activation=activation, norm_layer=norm_layer, dilation=dilation) ] ### upsample # Harry: use interpolated convolution instead of transposed convolution for i in range(n_downsampling): mult = 2**(n_downsampling - i) if interpolated_conv is True: model += [ nn.UpsamplingBilinear2d(scale_factor=2), nn.Conv2d(ngf * mult, int(ngf * mult / 2), 3, padding=1), norm_layer(int(ngf * mult / 2)), nn.ReLU(True) ] else: model += [ nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1), norm_layer(int(ngf * mult / 2)), activation ] model += [ nn.ReflectionPad2d(3), nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0), nn.Tanh() ] self.model = nn.Sequential(*model)
def __init__(self, kernel_cores=[32], encoding=16, input_size=[64, 64], num_channels=3): super(DimoCAE, self).__init__() self.kernel_cores = kernel_cores self._initialize_weights() self.encoding = encoding in_channels = num_channels layers = [] scaling_size = input_size[0] num_layers = len(kernel_cores) for i in range(num_layers): conve2d = nn.Conv2d(in_channels, kernel_cores[i], kernel_size=(3, 3), padding=(1, 1)) #drope2d = nn.Dropout(0.3) #relue2d = nn.ReLU(True) maxpoole2d = nn.MaxPool2d(2) #,padding=(0,1)) layers += [conve2d, maxpoole2d] #, maxpoole2d] in_channels = kernel_cores[i] self.encoder = nn.Sequential(*layers) layers_d = [] scaling_size = scaling_size / 2**num_layers scaling_size *= 2 layers_fc, layers_dc = [], [] dim = input_size[0] / (2**num_layers) self.dim = dim layers_fc += [ nn.Linear(kernel_cores[-1] * dim * dim, encoding), nn.ReLU() ] self.fc = nn.Sequential(*layers_fc) layers_dc += [ nn.Linear(encoding, kernel_cores[-1] * dim * dim, nn.ReLU()) ] self.dc = nn.Sequential(*layers_dc) # possible bug, last layer does not return to Feature size, but first kernel_cores size for j in range(num_layers): upsample = nn.UpsamplingBilinear2d(scaling_size) if j != num_layers - 1: conv2d = nn.Conv2d(in_channels, kernel_cores[-j - 2], kernel_size=(3, 3), padding=(1, 1)) else: conv2d = nn.Conv2d(in_channels, num_channels, kernel_size=(3, 3), padding=(1, 1)) #drop2d = nn.Dropout(0.3) if j != num_layers - 1: layers_d += [upsample, conv2d] else: layers_d += [upsample, conv2d] if j != num_layers - 1: in_channels = kernel_cores[-j - 2] scaling_size *= 2 self.decoder = nn.Sequential(*layers_d)
train_loss_all = [] valid_loss_all = [] for epoch in range(1000): # loop over the dataset multiple times train_loss_epoch = [] for i, data in enumerate(train_dataloader): # training images, poses = data['image'], data['pose'] guass_heatmap = data['guass_heatmap'] images, poses = Variable(images.cuda()), Variable(poses.cuda()) optimizer.zero_grad() outputs = net(images) output_heatmap = nn.UpsamplingBilinear2d( (inputsize, inputsize))(outputs) guass_heatmap = Variable(guass_heatmap.cuda()) loss = criterion(output_heatmap, guass_heatmap) loss.backward() optimizer.step() train_loss_epoch.append(loss.data[0]) if epoch % 2 == 0: valid_loss_epoch = [] for i_batch, sample_batched in enumerate(test_dataloader): # calculate the valid loss net_forward = net images = sample_batched['image'].cuda() poses = sample_batched['pose'].cuda()
def __init__(self, phase, base, extras, head, num_classes, batch_norm): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.batch_norm = batch_norm # TODO: implement __call__ in PriorBox self.priorbox = PriorBox(v2) self.priors = Variable(self.priorbox.forward(), volatile=True) self.size = 300 # SSD network self.vgg = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45) # FSSD extra layers before fusion self.conv81 = nn.Conv2d(1024, 256, kernel_size=1) self.conv81.apply(weights_init) self.bn_conv81 = nn.BatchNorm2d(256) self.conv82 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1) self.conv82.apply(weights_init) self.bn_conv82 = nn.BatchNorm2d(512) self.bn_fused = nn.BatchNorm2d(768) # FSSD extra layers after fusion self.conv91 = nn.Conv2d(768, 512, kernel_size=3, stride=1, padding=1) self.conv91.apply(weights_init) self.bn_conv91 = nn.BatchNorm2d(512) self.conv101 = nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1) self.conv101.apply(weights_init) self.bn_conv101 = nn.BatchNorm2d(512) self.conv111 = nn.Conv2d(512, 256, kernel_size=3, stride=2, padding=1) self.conv111.apply(weights_init) self.bn_conv111 = nn.BatchNorm2d(256) self.conv121 = nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1) self.conv121.apply(weights_init) self.bn_conv121 = nn.BatchNorm2d(256) self.conv131 = nn.Conv2d(256, 256, kernel_size=3, stride=1) self.conv131.apply(weights_init) self.bn_conv131 = nn.BatchNorm2d(256) self.conv141 = nn.Conv2d(256, 256, kernel_size=3, stride=1) self.conv141.apply(weights_init) self.bn_conv141 = nn.BatchNorm2d(256) # FSSD fuse layers self.fuse_conv43 = nn.Conv2d(512, 256, kernel_size=1) self.fuse_conv43.apply(weights_init) self.fuse_fc7 = nn.Conv2d(1024, 256, kernel_size=1) self.fuse_fc7.apply(weights_init) self.fuse_fc7_bilinear = nn.UpsamplingBilinear2d(size=(38, 38)) self.fuse_conv82 = nn.Conv2d(512, 256, kernel_size=1) self.fuse_conv82.apply(weights_init) self.fuse_conv82_bilienar = nn.UpsamplingBilinear2d(size=(38, 38))
from torch.autograd import Variable from PIL import Image import matplotlib.pyplot as plt import scipy.misc database = Database("data/DAVIS/", "data/DAVIS/ImageSets/480p/val.txt") deep_lab = Res_Deeplab() deep_lab.load_state_dict( torch.load("data/models/MS_DeepLab_resnet_pretrained_COCO_init.pth")) logsoftmax = nn.LogSoftmax() while database.has_next(): images, targets, name = database.get_test() image = Variable(torch.from_numpy(images[1]).float(), volatile=True) rescale = nn.UpsamplingBilinear2d(size=(images[0].shape[2], images[0].shape[3])) appearance = deep_lab(image)[3] appearance = rescale(appearance) appearance = appearance.data.numpy()[0] print(np.amin(appearance)) print(np.amax(appearance)) print(appearance.shape) slices = np.split(appearance, appearance.shape[0], axis=0) print(slices[0].shape) overlay_color = [255, 0, 0] transparency = 0.999 maxval = np.amax(appearance) slices = sorted(slices, key=lambda x: np.amax(x)) os.makedirs(os.path.join("deeplabvis", name)) for i in range(len(slices)):
def __init__(self, load_weights=True): super(net10_local, self).__init__() self.frontend_feat = [ 64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512 ] self.frontend = make_layers(self.frontend_feat, dilation=False) # self.build_feature_layer = nn.Sequential(nn.Conv2d(512, 512, kernel_size=1, padding=0)) # 加上这一层是对固定vgg16提取的特征进行一个再加工,方便与auxiliary mask的结合 self.auxiliary_backend_feat = ['M', 512, 512, 512, 'M', 512, 256] self.auxiliary_backend = make_layers(self.auxiliary_backend_feat, in_channels=512, dilation=False) self.auxiliary_upsample1 = nn.Sequential( nn.UpsamplingBilinear2d(size=(64, 64)), nn.Conv2d(256, 256, kernel_size=1, padding=0), nn.ReLU(inplace=True)) self.auxiliary_upsample2 = nn.Sequential( nn.UpsamplingBilinear2d(size=(128, 128)), nn.Conv2d(256, 256, kernel_size=1, padding=0), nn.ReLU(inplace=True)) self.auxiliary_backend_output_layer = nn.Conv2d(256, 1, kernel_size=1) self.trunk_backend1 = nn.Sequential( nn.Conv2d(768, 512, kernel_size=3, padding=1, stride=1), nn.ReLU(inplace=True)) self.trunk_backend2_1 = nn.Sequential( nn.Conv2d(512, 256, kernel_size=3, padding=1, stride=1), nn.ReLU(inplace=True)) self.trunk_backend2_2 = nn.Sequential( nn.Conv2d(512, 256, kernel_size=3, padding=2, stride=1, dilation=2), nn.ReLU(inplace=True)) self.trunk_backend3_1 = nn.Sequential( nn.Conv2d(512, 256, kernel_size=3, padding=1, stride=1), nn.ReLU(inplace=True)) self.trunk_backend3_2 = nn.Sequential( nn.Conv2d(512, 256, kernel_size=3, padding=2, stride=1, dilation=2), nn.ReLU(inplace=True)) self.trunk_backend3_3 = nn.Sequential( nn.Conv2d(512, 256, kernel_size=3, padding=4, stride=1, dilation=4), nn.ReLU(inplace=True)) self.trunk_backend4_1 = nn.Sequential( nn.Conv2d(768, 256, kernel_size=3, padding=1, stride=1), nn.ReLU(inplace=True)) self.trunk_backend4_2 = nn.Sequential( nn.Conv2d(768, 256, kernel_size=3, padding=2, stride=1, dilation=2), nn.ReLU(inplace=True)) self.trunk_backend5 = nn.Sequential( nn.Conv2d(512, 256, kernel_size=3, padding=1, stride=1), nn.ReLU(inplace=True)) self.density_map_layer = nn.Conv2d(256, 1, kernel_size=1) if load_weights: mod = models.vgg16(pretrained=True) self._initialize_weights() for i in range(len(self.frontend.state_dict().items())): list(self.frontend.state_dict().items())[i][1].data[:] = list( mod.state_dict().items())[i][1].data[:] for k in self.frontend.children(): for param in k.parameters(): param.requires_grad = False else: self._initialize_weights()