def forward(self, x): x = self.features(x) if hasattr(self, 'pooling_kernel_size') and self.pooling_kernel_size is not None: # 2D image output if hasattr(self, 'pooling_dropped') and self.pooling_dropped: x = x[:, :, self.pooling_dropped:-self.pooling_dropped, self.pooling_dropped:-self.pooling_dropped] x = F.avg_pool2d(x, kernel_size=self.pooling_kernel_size, stride=1) out = self.classifier(x) if self.raster_size is not None: out = out.view(out.size(0), self.raster_size, self.raster_size, -1, out.size(2), out.size(3)) out = out.permute(0, 3, 4, 1, 5, 2).contiguous() out = out.view(out.size(0), out.size(1), out.size(2)*out.size(3), out.size(4)*out.size(5)) return out else: # Scalar output assert not hasattr(self, 'raster_size') or self.raster_size is None if not self.training and self.test_time_pool: x = F.avg_pool2d(x, kernel_size=7, stride=1) out = self.classifier(x) # The extra test time pool should be pooling an img_size//32 - 6 size patch out = adaptive_avgmax_pool2d(out, pool_type='avgmax') else: x = adaptive_avgmax_pool2d(x, pool_type='avg') out = self.classifier(x) return out.view(out.size(0), -1)
def forward(self, x): x = self.conv1(x) x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2) x = self.block1(x) x = self.group1(x) x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2) x = self.block2(x) x = self.group2(x) x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2) x = self.block3(x) x = self.group3(x) x = self.block4(x) x = self.group4(x) x = F.max_pool2d(x, 2) + F.avg_pool2d(x, 2) x = x.view(x.size(0), -1) fc = self.fc(x) x = F.dropout(fc, training=self.training) output = list() for name, fun in self.fc_dict.iteritems(): out = fun(x) output.append(out) return output, fc
def forward(self, x): x1 = self.layers1(x) x2 = self.layers2(x1) x3 = self.layers3(x2) x4 = self.layers4(x3) x5a = self.layers5a(x4) x5b = self.layers5b(x5a) x5c = self.layers5c(x5b) x5a_feat = F.avg_pool2d(x5a, x5a.size()[2:]).view(x5a.size(0), x5a.size(1)) x5b_feat = F.avg_pool2d(x5b, x5b.size()[2:]).view(x5b.size(0), x5b.size(1)) x5c_feat = F.avg_pool2d(x5c, x5c.size()[2:]).view(x5c.size(0), x5c.size(1)) midfeat = torch.cat((x5a_feat, x5b_feat), dim=1) midfeat = self.fc_fuse(midfeat) combofeat = torch.cat((x5c_feat, midfeat), dim=1) if not self.training: return combofeat prelogits = self.classifier(combofeat) if self.loss == {'xent'}: return prelogits elif self.loss == {'xent', 'htri'}: return prelogits, combofeat else: raise KeyError("Unsupported loss: {}".format(self.loss))
def forward(self, x, class_em=True): for name, module in self.base._modules.items(): if name == 'avgpool': break x = module(x) if self.cut_at_pooling: x = F.avg_pool2d(x, x.size()[2:]) # x = x.max(1)[0][0] return x x = F.avg_pool2d(x, x.size()[2:]) x = x.view(x.size(0), -1) # if self.dropout > 0: # x = self.drop(x) if self.num_classes > 0 and not self.has_embedding: if class_em: x_class = self.classifier(x) else: x_class = x if self.has_embedding: x = self.feat(x) x = self.feat_bn(x) x = self.relu(x) if self.dropout > 0: x = self.drop(x) if self.num_diff_features > 0: x = self.diff_feat(x) if self.num_diff_features > 0 and self.num_classes > 0: return x, x_class elif self.num_classes > 0 and self.has_embedding: x_class = self.classifier(x) return x_class else: return x
def forward(self, x): xhighres = x h = self.blocks[-(self.depth + 1)](xhighres, True) if self.depth > 0: h = F.avg_pool2d(h, 2) if self.alpha < 1.0: xlowres = F.avg_pool2d(xhighres, 2) preult_rgb = self.blocks[-self.depth].fromRGB(xlowres) h = h * self.alpha + (1 - self.alpha) * preult_rgb for i in range(self.depth, 0, -1): h = self.blocks[-i](h) if i > 1: h = F.avg_pool2d(h, 2) h = self.linear(h.squeeze(-1).squeeze(-1)) return h
def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = self.relu(out) out = self.conv2(out) out = self.bn2(out) out = self.relu(out) out = self.conv3(out) out = self.bn3(out) ## senet out2 = F.avg_pool2d(out, kernel_size=out.size(2)) out2 = self.conv4(out2) out2 = self.relu(out2) out2 = self.conv5(out2) out2 = self.sigmoid(out2) # out2 = self.se_block.forward(out) # not used if self.downsample is not None: residual = self.downsample(x) out = out2 * out + residual # out = out2 + residual # not used out = self.relu(out) return out
def forward(self, x): """ Returns: local_feat_list: each member with shape [N, c] logits_list: each member with shape [N, num_classes] """ # shape [N, C, H, W] feat = self.base(x) assert feat.size(2) % self.num_stripes == 0 stripe_h = int(feat.size(2) / self.num_stripes) local_feat_list = [] logits_list = [] for i in range(self.num_stripes): # shape [N, C, 1, 1] local_feat = F.avg_pool2d( feat[:, :, i * stripe_h: (i + 1) * stripe_h, :], (stripe_h, feat.size(-1))) # shape [N, c, 1, 1] local_feat = self.local_relu(self.local_bn(self.local_conv(local_feat))) # shape [N, c] local_feat = local_feat.view(local_feat.size(0), -1) local_feat_list.append(local_feat) if hasattr(self, 'fc_list'): logits_list.append(self.fc_list[i](local_feat)) if hasattr(self, 'fc_list'): return local_feat_list, logits_list return local_feat_list
def forward(self, x): # reshape input first with batch size tracked x = self.conv1(x) x = self.bn1(x) x = self.activation(x) x = self.dropout(x) x = self.main_avg_pool(x) x = self.conv2(x) x = self.bn2(x) x = self.activation(x) x = self.dropout(x) x = self.main_avg_pool(x) x = self.conv3(x) x = self.bn3(x) x = self.activation(x) x = self.dropout(x) x = self.main_avg_pool(x) x = self.conv4(x) x = self.bn4(x) x = self.activation(x) x = F.avg_pool2d(x, kernel_size=x.size()[-1]) x = self.conv_to_class(x) x = x.view(x.size(0), -1) return x
def preprocess2(img): # numpy to pytoch, float, scale, cuda, downsample img = torch.from_numpy(img).cuda().float() / 255. img = F.avg_pool2d(img, kernel_size=2, stride=None, padding=0, ceil_mode=False, count_include_pad=True) # print (img.shape) #[3,240,320] # fsdaf return img
def features(self, input): x_conv0 = self.conv0(input) x_stem_0 = self.cell_stem_0(x_conv0) x_stem_1 = self.cell_stem_1(x_conv0, x_stem_0) x_cell_0 = self.cell_0(x_stem_1, x_stem_0) x_cell_1 = self.cell_1(x_cell_0, x_stem_1) x_cell_2 = self.cell_2(x_cell_1, x_cell_0) x_cell_3 = self.cell_3(x_cell_2, x_cell_1) x_reduction_cell_0 = self.reduction_cell_0(x_cell_3, x_cell_2) x_cell_6 = self.cell_6(x_reduction_cell_0, x_cell_3) x_cell_7 = self.cell_7(x_cell_6, x_reduction_cell_0) x_cell_8 = self.cell_8(x_cell_7, x_cell_6) x_cell_9 = self.cell_9(x_cell_8, x_cell_7) x_reduction_cell_1 = self.reduction_cell_1(x_cell_9, x_cell_8) x_cell_12 = self.cell_12(x_reduction_cell_1, x_cell_9) x_cell_13 = self.cell_13(x_cell_12, x_reduction_cell_1) x_cell_14 = self.cell_14(x_cell_13, x_cell_12) x_cell_15 = self.cell_15(x_cell_14, x_cell_13) x_cell_15 = self.relu(x_cell_15) x_cell_15 = F.avg_pool2d(x_cell_15, x_cell_15.size()[2:]) x_cell_15 = x_cell_15.view(x_cell_15.size(0), -1) x_cell_15 = self.dropout(x_cell_15) return x_cell_15
def forward(self, x): if self.transform_input: x = x.clone() x[:, 0] = x[:, 0] * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 x[:, 1] = x[:, 1] * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 x[:, 2] = x[:, 2] * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 else: warn("Input isn't transformed") x = self.Conv2d_1a_3x3(x) x = self.Conv2d_2a_3x3(x) x = self.Conv2d_2b_3x3(x) x = F.max_pool2d(x, kernel_size=3, stride=2) x = self.Conv2d_3b_1x1(x) x = self.Conv2d_4a_3x3(x) x = F.max_pool2d(x, kernel_size=3, stride=2) x = self.Mixed_5b(x) x = self.Mixed_5c(x) x = self.Mixed_5d(x) x = self.Mixed_6a(x) x = self.Mixed_6b(x) x = self.Mixed_6c(x) x = self.Mixed_6d(x) x = self.Mixed_6e(x) x = self.Mixed_7a(x) x = self.Mixed_7b(x) x_for_attn = x = self.Mixed_7c(x) # 8 x 8 x 2048 x = F.avg_pool2d(x, kernel_size=8) # 1 x 1 x 2048 x_for_capt = x = x.view(x.size(0), -1) # 2048 x = self.fc(x) # 1000 (num_classes) return x_for_attn, x_for_capt, x
def forward(self, x): features = self.features(x) out = F.relu(features, inplace=True) out = F.avg_pool2d(out, kernel_size=self.avgpool_size).view( features.size(0), -1) out = self.classifier(out) return out
def forward(self, x): x = F.relu(self.bn1(self.conv1(x)), True) x = F.avg_pool2d(self.conv2(x), 2, stride=2) x = self.conv3(x) x = self.conv4(x) previous = x outputs = [] for i in range(self.num_modules): hg = self._modules['m' + str(i)](previous) ll = hg ll = self._modules['top_m_' + str(i)](ll) ll = F.relu(self._modules['bn_end' + str(i)] (self._modules['conv_last' + str(i)](ll)), True) # Predict heatmaps tmp_out = self._modules['l' + str(i)](ll) outputs.append(tmp_out) if i < self.num_modules - 1: ll = self._modules['bl' + str(i)](ll) tmp_out_ = self._modules['al' + str(i)](tmp_out) previous = previous + ll + tmp_out_ return outputs
def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) out = self.layers(out) out = F.avg_pool2d(out, 2) out = out.view(out.size(0), -1) out = self.linear(out) return out
def forward(self, x): x1 = self.conv1(x) x1 = F.max_pool2d(x1, 3, stride=2) x2 = self.fire2(x1) x3 = self.fire3(x2) if self.bypass: x3 = x3 + x2 x4 = self.fire4(x3) x4 = F.max_pool2d(x4, 3, stride=2) x5 = self.fire5(x4) if self.bypass: x5 = x5 + x4 x6 = self.fire6(x5) x7 = self.fire7(x6) if self.bypass: x7 = x7 + x6 x8 = self.fire8(x7) x8 = F.max_pool2d(x8, 3, stride=2) x9 = self.fire9(x8) if self.bypass: x9 = x9 + x8 x9 = F.dropout(x9, training=self.training) x10 = F.relu(self.conv10(x9)) f = F.avg_pool2d(x10, x10.size()[2:]).view(x10.size(0), -1) if not self.training: return f if self.loss == {'xent'}: return f elif self.loss == {'xent', 'htri'}: return f, f else: raise KeyError("Unsupported loss: {}".format(self.loss))
def on_step_validation(self, state): if not self.done: x = state[torchbearer.X].data.clone() if len(x.size()) == 3: x = x.unsqueeze(1) x = F.avg_pool2d(x, self.avg_pool_size).data data = None if state[torchbearer.EPOCH] == 0 and self.write_data: if self.avg_data_channels: data = torch.mean(x, 1) else: data = x data = data.view(data.size(0), -1) feature = None if self.write_features: feature = state[self.features_key].data.clone() feature = feature.view(feature.size(0), -1) label = state[torchbearer.Y_TRUE].data.clone() if state[torchbearer.BATCH] == 0: remaining = self.num_images if self.num_images < label.size(0) else label.size(0) self._images = x[:remaining].to('cpu') self._labels = label[:remaining].to('cpu') if data is not None: self._data = data[:remaining].to('cpu') if feature is not None: self._features = feature[:remaining].to('cpu') else: remaining = self.num_images - self._labels.size(0) if remaining > label.size(0): remaining = label.size(0) self._images = torch.cat((self._images, x[:remaining].to('cpu')), dim=0) self._labels = torch.cat((self._labels, label[:remaining].to('cpu')), dim=0) if data is not None: self._data = torch.cat((self._data, data[:remaining].to('cpu')), dim=0) if feature is not None: self._features = torch.cat((self._features, feature[:remaining].to('cpu')), dim=0) if self._labels.size(0) >= self.num_images: if state[torchbearer.EPOCH] == 0 and self.write_data: self._writer.add_embedding(self._data, metadata=self._labels, label_img=self._images, tag='data', global_step=-1) if self.write_features: self._writer.add_embedding(self._features, metadata=self._labels, label_img=self._images, tag='features', global_step=state[torchbearer.EPOCH]) self.done = True
def forward(self, x): B = x.data.size(0) C = x.data.size(1) H = x.data.size(2) W = x.data.size(3) x = F.avg_pool2d(x, (H, W)) x = x.view(B, C) return x
def forward(self, x): module_input = x x = F.avg_pool2d(module_input, kernel_size=module_input.size(2)) x = self.conv1(x) x = self.relu(x) x = self.conv2(x) x = self.sigmoid(x) return module_input * x
def forward(self, x): out = self.conv1(x) out = self.trans1(self.dense1(out)) out = self.trans2(self.dense2(out)) out = self.dense3(out) out = torch.squeeze(F.avg_pool2d(F.relu(self.bn1(out)), 8)) out = F.log_softmax(self.fc(out)) return out
def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) out = self.layers(out) out = F.relu(self.bn2(self.conv2(out))) # NOTE: change pooling kernel_size 7 -> 4 for CIFAR10 out = F.avg_pool2d(out, 4) out = out.view(out.size(0), -1) out = self.linear(out) return out
def forward(self, x): out = self.conv1(x) out = self.block1(out) out = self.block2(out) out = self.block3(out) out = self.relu(self.bn1(out)) out = F.avg_pool2d(out, 8) out = out.view(-1, self.nChannels) return self.fc(out)
def forward(self, x): x = self.conv_1_3x3.forward(x) x = F.relu(self.bn_1.forward(x), inplace=True) x = self.stage_1.forward(x) x = self.stage_2.forward(x) x = self.stage_3.forward(x) x = F.avg_pool2d(x, 8, 1) x = x.view(-1, 1024) return self.classifier(x)
def forward(self, x): if self.transform_input: x = x.clone() x[:, 0] = x[:, 0] * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 x[:, 1] = x[:, 1] * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 x[:, 2] = x[:, 2] * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 # 299 x 299 x 3 x = self.Conv2d_1a_3x3(x) # 149 x 149 x 32 x = self.Conv2d_2a_3x3(x) # 147 x 147 x 32 x = self.Conv2d_2b_3x3(x) # 147 x 147 x 64 x = F.max_pool2d(x, kernel_size=3, stride=2) # 73 x 73 x 64 x = self.Conv2d_3b_1x1(x) # 73 x 73 x 80 x = self.Conv2d_4a_3x3(x) # 71 x 71 x 192 x = F.max_pool2d(x, kernel_size=3, stride=2) # 35 x 35 x 192 x = self.Mixed_5b(x) # 35 x 35 x 256 x = self.Mixed_5c(x) # 35 x 35 x 288 x = self.Mixed_5d(x) # 35 x 35 x 288 x = self.Mixed_6a(x) # 17 x 17 x 768 x = self.Mixed_6b(x) # 17 x 17 x 768 x = self.Mixed_6c(x) # 17 x 17 x 768 x = self.Mixed_6d(x) # 17 x 17 x 768 x = self.Mixed_6e(x) # 17 x 17 x 768 if self.training and self.aux_logits: aux = self.AuxLogits(x) # 17 x 17 x 768 x = self.Mixed_7a(x) # 8 x 8 x 1280 x = self.Mixed_7b(x) # 8 x 8 x 2048 x = self.Mixed_7c(x) # 8 x 8 x 2048 x = F.avg_pool2d(x, kernel_size=8) # 1 x 1 x 2048 x = F.dropout(x, training=self.training) # 1 x 1 x 2048 x = x.view(x.size(0), -1) # 2048 x = self.fc(x) # 1000 (num_classes) if self.training and self.aux_logits: return x, aux return x
def f(input, params, mode): x = F.conv2d(input, params['conv0'], padding=1) g0 = group(x, params, 'group0', mode, 1) g1 = group(g0, params, 'group1', mode, 2) g2 = group(g1, params, 'group2', mode, 2) o = F.relu(utils.batch_norm(g2, params, 'bn', mode)) o = F.avg_pool2d(o, 8, 1, 0) o = o.view(o.size(0), -1) o = F.linear(o, params['fc.weight'], params['fc.bias']) return o
def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = self.layer4(out) out = self.layer5(out) out = F.avg_pool2d(out, 8) out = self.linear(out.view(out.size(0), -1)) return out
def logits(self, features): x = F.avg_pool2d(features, kernel_size=8) # 1 x 1 x 2048 x = F.dropout(x, training=self.training) # 1 x 1 x 2048 x = x.view(x.size(0), -1) # 2048 x = self.last_linear(x) # 1000 (num_classes) if self.training and self.aux_logits: aux = self._out_aux self._out_aux = None return x, aux return x
def forward(self, x): out = F.relu(self.bn1(self.conv1(x))) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = F.avg_pool2d(out, out.size()[3]) out = out.view(out.size(0), -1) self.feat = out out = self.linear(out) return out
def forward(self, x): out = self.conv1(x) out = self.trans1(self.dense1(out)) out = self.trans2(self.dense2(out)) out = self.trans3(self.dense3(out)) out = self.dense4(out) out = F.avg_pool2d(F.relu(self.bn(out)), 4) out = out.view(out.size(0), -1) out = self.linear(out) return out
def logits(self, features): if not self.training and self.test_time_pool: x = F.avg_pool2d(features, kernel_size=7, stride=1) out = self.classifier(x) # The extra test time pool should be pooling an img_size//32 - 6 size patch out = adaptive_avgmax_pool2d(out, pool_type='avgmax') else: x = adaptive_avgmax_pool2d(features, pool_type='avg') out = self.classifier(x) return out.view(out.size(0), -1)
def forward(self, x): out = self.conv1(x) out = self.layer1(out) out = self.layer2(out) out = self.layer3(out) out = self.layer4(out) out = F.avg_pool2d(out, 4) out = out.view(out.size(0), -1) out = self.linear(out) return out
def forward(self, x): return F.avg_pool2d(x, kernel_size=x.size()[2:])
def forward(self, x): if self.transform_input: x = x.clone() x[:, 0] = x[:, 0] * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 x[:, 1] = x[:, 1] * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 x[:, 2] = x[:, 2] * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 # 299 x 299 x 3 x = self.Conv2d_1a_3x3(x) # 149 x 149 x 32 x = self.Conv2d_2a_3x3(x) # 147 x 147 x 32 x = self.Conv2d_2b_3x3(x) # 147 x 147 x 64 x = F.max_pool2d(x, kernel_size=3, stride=2) # 73 x 73 x 64 x = self.Conv2d_3b_1x1(x) # 73 x 73 x 80 x = self.Conv2d_4a_3x3(x) # 71 x 71 x 192 x = F.max_pool2d(x, kernel_size=3, stride=2) # 35 x 35 x 192 x = self.Mixed_5b(x) # 35 x 35 x 256 x = self.Mixed_5c(x) # 35 x 35 x 288 x = self.Mixed_5d(x) # 35 x 35 x 288 x = self.Mixed_6a(x) # 17 x 17 x 768 x = self.Mixed_6b(x) # 17 x 17 x 768 x = self.Mixed_6c(x) # 17 x 17 x 768 x = self.Mixed_6d(x) # 17 x 17 x 768 x = self.Mixed_6e(x) # 17 x 17 x 768 if self.training and self.aux_logits: aux = self.AuxLogits(x) # 17 x 17 x 768 x = self.Mixed_7a(x) # 8 x 8 x 1280 x = self.Mixed_7b(x) # 8 x 8 x 2048 x = self.Mixed_7c(x) # 8 x 8 x 2048 x = F.avg_pool2d(x, kernel_size=8) # 1 x 1 x 2048 x = F.dropout(x, training=self.training) # 1 x 1 x 2048 x = x.view(x.size(0), -1) # 2048 # x = self.fc(x) x1 = self.classifier1(x) x2 = self.classifier2(x) x3 = self.classifier3(x) x4 = self.classifier4(x) x5 = self.classifier5(x) x6 = self.classifier6(x) x7 = self.classifier7(x) x8 = self.classifier8(x) x9 = self.classifier9(x) x10 = self.classifier10(x) return torch.cat((x1, x2, x3, x4, x5, x6, x7, x8, x9, x10), 1)
def forward(self, x): return F.avg_pool2d(x, 2)
def avg_pool2d(a): return F.avg_pool2d(a, 2)
def forward(self, xyz: torch.Tensor, features: torch.Tensor = None, inds: torch.Tensor = None) -> (torch.Tensor, torch.Tensor): r""" Parameters ---------- xyz : torch.Tensor (B, N, 3) tensor of the xyz coordinates of the features features : torch.Tensor (B, C, N) tensor of the descriptors of the the features inds : torch.Tensor (B, npoint) tensor that stores index to the xyz points (values in 0-N-1) Returns ------- new_xyz : torch.Tensor (B, npoint, 3) tensor of the new features' xyz new_features : torch.Tensor (B, \sum_k(mlps[k][-1]), npoint) tensor of the new_features descriptors inds: torch.Tensor (B, npoint) tensor of the inds """ new_xyz = xyz[:, :self.npoint, :].contiguous( ) # (B, 6*N, 3) or (B, 12*N, 3) target_xyz = xyz[:, self.npoint:, :].contiguous( ) # (B, 2*1024, 3) or (B, 1024, 3) if not self.ret_unique_cnt: grouped_features, grouped_xyz = self.grouper( target_xyz, new_xyz, features[:, :, self.npoint:].contiguous()) # (B, C, npoint, nsample) else: grouped_features, grouped_xyz, unique_cnt = self.grouper( target_xyz, new_xyz, features[:, :, self.npoint:].contiguous() ) # (B, C, npoint, nsample), (B,3,npoint,nsample), (B,npoint) new_features = self.mlp_module( grouped_features) # (B, mlp[-1], npoint, nsample) if self.pooling == 'max': new_features = F.max_pool2d(new_features, kernel_size=[ 1, new_features.size(3) ]) # (B, mlp[-1], npoint, 1) elif self.pooling == 'avg': new_features = F.avg_pool2d(new_features, kernel_size=[ 1, new_features.size(3) ]) # (B, mlp[-1], npoint, 1) elif self.pooling == 'rbf': # Use radial basis function kernel for weighted sum of features (normalized by nsample and sigma) # Ref: https://en.wikipedia.org/wiki/Radial_basis_function_kernel rbf = torch.exp(-1 * grouped_xyz.pow(2).sum(1, keepdim=False) / (self.sigma**2) / 2) # (B, npoint, nsample) new_features = torch.sum( new_features * rbf.unsqueeze(1), -1, keepdim=True) / float( self.nsample) # (B, mlp[-1], npoint, 1) new_features = new_features.squeeze(-1) # (B, mlp[-1], npoint) if not self.ret_unique_cnt: return new_xyz, new_features, inds else: return new_xyz, new_features, inds, unique_cnt
def forward(self, xset): # X_h, X_l = x yset = [] ysets = [] for j in range(self.outbranch): ysets.append([]) if isinstance(xset, torch.Tensor): xset = [ xset, ] for i in range(self.inbranch): if xset[i] is None: continue if self.stride == 2: x = F.avg_pool2d(xset[i], (2, 2), stride=2) else: x = xset[i] begin_x = int( round(self.in_channels * self.alpha_in[i] / self.groups)) end_x = int( round(self.in_channels * self.alpha_in[i + 1] / self.groups)) if begin_x == end_x: continue for j in range(self.outbranch): begin_y = int(round(self.out_channels * self.alpha_out[j])) end_y = int(round(self.out_channels * self.alpha_out[j + 1])) if begin_y == end_y: continue scale_factor = 2**(i - j) if self.bias is not None: this_bias = self.bias[begin_y:end_y] else: this_bias = None this_weight = self.weight[begin_y:end_y, begin_x:end_x, :, :] if scale_factor > 1: y = F.conv2d(x, this_weight, this_bias, 1, self.padding, self.dilation, self.groups) y = F.interpolate(y, scale_factor=scale_factor, mode=up_kwargs['mode']) elif scale_factor < 1: x_resize = F.max_pool2d(x, int(round(1.0 / scale_factor)), stride=int( round(1.0 / scale_factor))) y = F.conv2d(x_resize, this_weight, this_bias, 1, self.padding, self.dilation, self.groups) else: y = F.conv2d(x, this_weight, this_bias, 1, self.padding, self.dilation, self.groups) ysets[j].append(y) for j in range(self.outbranch): if len(ysets[j]) != 0: yset.append(sum(ysets[j])) else: yset.append(None) del ysets return yset
def forward(self, x, vars=None, bn_training=True): """ This function can be called by finetunning, however, in finetunning, we dont wish to update running_mean/running_var. Thought weights/bias of bn is updated, it has been separated by fast_weights. Indeed, to not update running_mean/running_var, we need set update_bn_statistics=False but weight/bias will be updated and not dirty initial theta parameters via fast_weiths. :param x: [b, 1, 28, 28] :param vars: :param bn_training: set False to not update :return: x, loss, likelihood, kld """ if vars is None: vars = self.vars idx = 0 bn_idx = 0 for name, param in self.config: if name is 'conv2d': w, b = vars[idx], vars[idx + 1] # remember to keep synchrozied of forward_encoder and forward_decoder! x = F.conv2d(x, w, b, stride=param[4], padding=param[5]) idx += 2 # print(name, param, '\tout:', x.shape) elif name is 'convt2d': w, b = vars[idx], vars[idx + 1] # remember to keep synchrozied of forward_encoder and forward_decoder! x = F.conv_transpose2d(x, w, b, stride=param[4], padding=param[5]) idx += 2 # print(name, param, '\tout:', x.shape) elif name is 'linear': w, b = vars[idx], vars[idx + 1] x = F.linear(x, w, b) idx += 2 # print('forward:', idx, x.norm().item()) elif name is 'bn': w, b = vars[idx], vars[idx + 1] running_mean, running_var = self.vars_bn[bn_idx], self.vars_bn[ bn_idx + 1] x = F.batch_norm(x, running_mean, running_var, weight=w, bias=b, training=bn_training) idx += 2 bn_idx += 2 elif name is 'flatten': # print(x.shape) x = x.view(x.size(0), -1) elif name is 'reshape': # [b, 8] => [b, 2, 2, 2] x = x.view(x.size(0), *param) elif name is 'relu': x = F.relu(x, inplace=param[0]) elif name is 'leakyrelu': x = F.leaky_relu(x, negative_slope=param[0], inplace=param[1]) elif name is 'tanh': x = F.tanh(x) elif name is 'sigmoid': x = torch.sigmoid(x) elif name is 'upsample': x = F.upsample_nearest(x, scale_factor=param[0]) elif name is 'max_pool2d': x = F.max_pool2d(x, param[0], param[1], param[2]) elif name is 'avg_pool2d': x = F.avg_pool2d(x, param[0], param[1], param[2]) else: raise NotImplementedError # make sure variable is used properly assert idx == len(vars) assert bn_idx == len(self.vars_bn) return x
def fn(di): x = F.avg_pool2d(di['in'], (height, width)) x = torch.squeeze(x, 2) return {'out': torch.squeeze(x, 2)}
def forward(self, x): features = None # --> fixed-size input: batch x 3 x 299 x 299 x = nn.Upsample(size=(299, 299), mode='bilinear')(x) # 299 x 299 x 3 x = self.Conv2d_1a_3x3(x) # 149 x 149 x 32 x = self.Conv2d_2a_3x3(x) # 147 x 147 x 32 x = self.Conv2d_2b_3x3(x) # 147 x 147 x 64 x = F.max_pool2d(x, kernel_size=3, stride=2) # 73 x 73 x 64 x = self.Conv2d_3b_1x1(x) # 73 x 73 x 80 x = self.Conv2d_4a_3x3(x) # 71 x 71 x 192 x = F.max_pool2d(x, kernel_size=3, stride=2) # 35 x 35 x 192 x = self.Mixed_5b(x) # 35 x 35 x 256 x = self.Mixed_5c(x) # 35 x 35 x 288 x = self.Mixed_5d(x) # 35 x 35 x 288 x = self.Mixed_6a(x) # 17 x 17 x 768 x = self.Mixed_6b(x) # 17 x 17 x 768 x = self.Mixed_6c(x) # 17 x 17 x 768 x = self.Mixed_6d(x) # 17 x 17 x 768 x = self.Mixed_6e(x) # 17 x 17 x 768 # image region features features = x # 17 x 17 x 768 x = self.Mixed_7a(x) # 8 x 8 x 1280 x = self.Mixed_7b(x) # 8 x 8 x 2048 x = self.Mixed_7c(x) # 8 x 8 x 2048 x = F.avg_pool2d(x, kernel_size=8) # 1 x 1 x 2048 # x = F.dropout(x, training=self.training) # 1 x 1 x 2048 x = x.view(x.size(0), -1) # 2048 # global image features cnn_code = self.emb_cnn_code(x) # 512 if features is not None: features = self.emb_features(features) return features, cnn_code
def forward(self, x): #y = self.inception(x) ## weird result in training mode, probably a bug in inception module? #if self.training: # if self.normalize: # return y[0]/torch.norm(y[0],2,1).repeat(1, self.feature_size) # else: # return y[0] #else: # if self.normalize: # return y/torch.norm(y,2,1).repeat(1, self.feature_size) # else: # return y if self.inception.transform_input: x = x.clone() x[:, 0] = x[:, 0] * (0.229 / 0.5) + (0.485 - 0.5) / 0.5 x[:, 1] = x[:, 1] * (0.224 / 0.5) + (0.456 - 0.5) / 0.5 x[:, 2] = x[:, 2] * (0.225 / 0.5) + (0.406 - 0.5) / 0.5 # 299 x 299 x 3 x = self.inception.Conv2d_1a_3x3(x) # 149 x 149 x 32 x = self.inception.Conv2d_2a_3x3(x) # 147 x 147 x 32 x = self.inception.Conv2d_2b_3x3(x) # 147 x 147 x 64 x = F.max_pool2d(x, kernel_size=3, stride=2) # 73 x 73 x 64 x = self.inception.Conv2d_3b_1x1(x) # 73 x 73 x 80 x = self.inception.Conv2d_4a_3x3(x) # 71 x 71 x 192 x = F.max_pool2d(x, kernel_size=3, stride=2) # 35 x 35 x 192 x = self.inception.Mixed_5b(x) # 35 x 35 x 256 x = self.inception.Mixed_5c(x) # 35 x 35 x 288 x = self.inception.Mixed_5d(x) # 35 x 35 x 288 x = self.inception.Mixed_6a(x) # 17 x 17 x 768 x = self.inception.Mixed_6b(x) # 17 x 17 x 768 x = self.inception.Mixed_6c(x) # 17 x 17 x 768 x = self.inception.Mixed_6d(x) # 17 x 17 x 768 x = self.inception.Mixed_6e(x) # 17 x 17 x 768 if self.inception.training and self.inception.aux_logits: aux = self.inception.AuxLogits(x) # 17 x 17 x 768 x = self.inception.Mixed_7a(x) # 8 x 8 x 1280 x = self.inception.Mixed_7b(x) # 8 x 8 x 2048 x = self.inception.Mixed_7c(x) # 8 x 8 x 2048 x = F.avg_pool2d(x, kernel_size=8) # x = x.view(-1, self.feature_size) # 1 x 1 x 2048 x = F.dropout(x, training=self.training) # 1 x 1 x 2048 x = x.view(x.size(0), -1) # 2048 x = self.inception.fc(x) if self.normalize: return x/torch.norm(x,2,1).repeat(1, self.feature_size) else: return x
def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) l2_out = self.layer2(x) l2_reshape = l2_out.view(l2_out.size(0), l2_out.size(1), -1).transpose(2, 1) l2_ca, l2_attw = self.layer2_ca(l2_reshape, l2_reshape, l2_reshape) l2_ca = l2_ca.transpose(2, 1).view(l2_out.size()).contiguous() # l2_ca = F.relu(l2_ca, inplace=True) l3_out = self.layer3(l2_ca) l3_reshape = l3_out.view(l3_out.size(0), l3_out.size(1), -1).transpose(2, 1) l3_ca, l3_attw = self.layer3_ca(l3_reshape, l3_reshape, l3_reshape) l3_ca = l3_ca.transpose(2, 1).view(l3_out.size()).contiguous() # l3_ca = F.relu(l3_ca, inplace=True) l3_column = l3_ca.pow(2).mean(1).view(l3_ca.size(0), -1) l3_column_min = l3_column.min(1)[0].unsqueeze(1) l3_column = l3_column - l3_column_min l3_side = F.softmax(l3_column, 1).view(l3_ca.size(0), 1, l3_ca.size(2), l3_ca.size(3)) l3_multipath = torch.cat([l3_ca] * self.num_bypath, 1) l4_out = self.layer4(l3_multipath) l4_outs = l4_out.split(2048, 1) # feature for spatial attention l4_fea = [] l4_fsa = l4_outs[0] l4_column = l4_fsa.pow(2).mean(1).view(l4_fsa.size(0), -1) l4_column_min = l4_column.min(1)[0].unsqueeze(1) l4_column = l4_column - l4_column_min l4_side = F.softmax(l4_column, 1).view(l4_fsa.size(0), 1, l4_fsa.size(2), l4_fsa.size(3)) SideAdd = torch.add(l3_side, l4_side) l4_fsa = torch.mul(l4_fsa, SideAdd) l4_fea.append(l4_fsa) l4_fca = l4_outs[1] l4_fsa_reshape = l4_fsa.view(l4_fca.size(0), l4_fca.size(1), -1).transpose(2, 1) l4_reshape = l4_fca.view(l4_fca.size(0), l4_fca.size(1), -1).transpose(2, 1) ##### query key vaule l4_ca, l4_attw = self.layer4_ca(l4_fsa_reshape, l4_reshape, l4_reshape) l4_ca = l4_ca.transpose(2, 1).view(l4_fca.size()).contiguous() # l4_ca = F.relu(l4_ca, inplace=True) l4_fea.append(l4_ca) # SideAdd = torch.add(l3_side, l4_side) # l4_fea = torch.mul(l4_fea, SideAdd) # l2_avg = F.avg_pool2d(l2_ca, l2_ca.size()[2:]) l2_rd = self.layer2_reduce(l2_avg) l2_rd = l2_rd.squeeze() l2_sm = self.layer2_cls(l2_rd) # l3_ca = torch.mul(l3_ca, SideAdd) l3_avg = F.avg_pool2d(l3_ca, l3_ca.size()[2:]) l3_rd = self.layer3_reduce(l3_avg) l3_rd = l3_rd.squeeze() l3_sm = self.layer3_cls(l3_rd) # l3_sm = self.cls(l3_rd) l4_rd = [] l4_sm = [] # current implementation ,layer4 outputs use different classifier!!! maybe use the same one is better for cur_red, cur_cls, cur_fea in zip(self.layer4_reduce, self.layer4_cls, l4_fea): cur_avg = F.avg_pool2d(cur_fea, cur_fea.size()[2:]) cur_rd = cur_red(cur_avg) cur_rd = cur_rd.squeeze() cur_sm = cur_cls(cur_rd) l4_rd.append(cur_rd) # l4_rd.append(cur_sm) l4_sm.append(cur_sm) g_rd = [ l2_rd, l3_rd, ] + l4_rd g_sm = [l2_sm, l3_sm] + l4_sm # g_rd = [l3_rd, l4_rd] # g_sm = [l3_sm,l4_sm] net_rd = torch.cat(g_rd, 1) if self.norm: net_rd = F.normalize(net_rd) if self.dropout > 0: net_rd = self.drop(net_rd) # !!!! in resnet_channel3 l2_side is meaningless!!!! if self.test_stage: return [l2_attw, l3_attw, l4_attw], g_sm, g_rd, l4_side, l3_side, l4_side, net_rd return g_sm, g_rd, l4_side, l3_side, l4_side, net_rd
def _shortcut(self, x): if self.learned_sc: x = self.conv1x1(x) if self.downsample: x = F.avg_pool2d(x, 2) return x
def forward(self, xyz: torch.Tensor, features: torch.Tensor = None, inds: torch.Tensor = None) -> (torch.Tensor, torch.Tensor): r""" Parameters ---------- xyz : torch.Tensor (B, N, 3) tensor of the xyz coordinates of the features features : torch.Tensor (B, C, N) tensor of the descriptors of the the features inds : torch.Tensor (B, npoint) tensor that stores index to the xyz points (values in 0-N-1) Returns ------- new_xyz : torch.Tensor (B, npoint, 3) tensor of the new features' xyz new_features : torch.Tensor (B, \sum_k(mlps[k][-1]), npoint) tensor of the new_features descriptors inds: torch.Tensor (B, npoint) tensor of the inds """ xyz_flipped = xyz.transpose(1, 2).contiguous() if inds is None: inds = pointnet2_utils.furthest_point_sample(xyz, self.npoint) else: assert (inds.shape[1] == self.npoint) new_xyz = pointnet2_utils.gather_operation( xyz_flipped, inds).transpose( 1, 2).contiguous() if self.npoint is not None else None #if not self.ret_unique_cnt: grouped_features_local, grouped_xyz_local = self.grouper1( xyz, new_xyz, features) grouped_features_global, grouped_xyz_global = self.grouper2( xyz, new_xyz, features) # (B, C, npoint, nsample) #else: # grouped_features, grouped_xyz, unique_cnt = self.grouper( # xyz, new_xyz, features # ) # (B, C, npoint, nsample), (B,3,npoint,nsample), (B,npoint) new_features_local = self.mlp_module1( grouped_features_local) # (B, mlp[-1], npoint, nsample) new_features_global = self.mlp_module2( grouped_features_global) # (B, mlp[-1], npoint, nsample) if self.pooling == 'max': new_features_local = F.max_pool2d(new_features_local, kernel_size=[ 1, new_features_local.size(3) ]) # (B, mlp[-1], npoint, 1) new_features_global = F.max_pool2d(new_features_global, kernel_size=[ 1, new_features_global.size(3) ]) # (B, mlp[-1], npoint, 1) new_features = torch.cat([ new_features_local, new_features_global.repeat(1, 1, self.npoint, 1) ], 1) elif self.pooling == 'avg': new_features = F.avg_pool2d(new_features, kernel_size=[ 1, new_features.size(3) ]) # (B, mlp[-1], npoint, 1) elif self.pooling == 'rbf': # Use radial basis function kernel for weighted sum of features (normalized by nsample and sigma) # Ref: https://en.wikipedia.org/wiki/Radial_basis_function_kernel rbf = torch.exp(-1 * grouped_xyz.pow(2).sum(1, keepdim=False) / (self.sigma**2) / 2) # (B, npoint, nsample) new_features = torch.sum( new_features * rbf.unsqueeze(1), -1, keepdim=True) / float( self.nsample) # (B, mlp[-1], npoint, 1) new_features = new_features.squeeze(-1) # (B, mlp[-1], npoint) if not self.ret_unique_cnt: return new_xyz, torch.squeeze(new_features), inds else: return new_xyz, torch.squeeze(new_features), inds, unique_cnt
def forward(self, x, path, last): M = self.args.M div = 1 p = 0.5 y = self.conv1[0](x) for j in range(1, self.args.M): if (path[0][j] == 1): y += self.conv1[j](x) x = F.relu(y) y = self.conv2[0](x) for j in range(1, self.args.M): if (path[1][j] == 1): y += self.conv2[j](x) x = y + x x = F.relu(x) y = self.conv3[0](x) for j in range(1, self.args.M): if (path[2][j] == 1): y += self.conv3[j](x) x = y + x x = F.relu(x) y = self.conv4[-1](x) for j in range(self.args.M): if (path[3][j] == 1): y += self.conv4[j](x) x = y x = F.relu(x) y = self.conv5[0](x) for j in range(1, self.args.M): if (path[4][j] == 1): y += self.conv5[j](x) x = y + x x = F.relu(x) x = self.pool1(x) y = self.conv6[-1](x) for j in range(self.args.M): if (path[5][j] == 1): y += self.conv6[j](x) x = y x = F.relu(x) y = self.conv7[0](x) for j in range(1, self.args.M): if (path[6][j] == 1): y += self.conv7[j](x) x = y x = F.relu(x) x = self.pool2(x) y = self.conv8[-1](x) for j in range(self.args.M): if (path[7][j] == 1): y += self.conv8[j](x) x = y x = F.relu(x) y = self.conv9[0](x) for j in range(1, self.args.M): if (path[8][j] == 1): y += self.conv9[j](x) x = y + x x = F.relu(x) # x = self.pool3(x) x = F.avg_pool2d(x, (8, 8), stride=(1, 1)) x = x.view(-1, self.a5) x = self.final_layers[last](x) return x
def forward(self, x): features = self.shared(x) out = F.relu(features, inplace=True) out = F.avg_pool2d(out, kernel_size=7).view(features.size(0), -1) out = self.classifier(out) return out
def forward(self, x): x_conv_global = self.base(x) #[32, 2048, 8, 4] f_global = F.avg_pool2d(x_conv_global, x_conv_global.size()[2:]).squeeze() #[32, 2048] return f_global
def compute_generator_loss(self, input_label, input_semantics, real_image, ref_label=None, ref_semantics=None, ref_image=None, self_ref=None): G_losses = {} generate_out = self.generate_fake(input_semantics, real_image, ref_semantics=ref_semantics, ref_image=ref_image, self_ref=self_ref) if 'loss_novgg_featpair' in generate_out and generate_out[ 'loss_novgg_featpair'] is not None: G_losses['no_vgg_feat'] = generate_out['loss_novgg_featpair'] if self.opt.warp_cycle_w > 0: if not self.opt.warp_patch: ref = F.avg_pool2d(ref_image, self.opt.warp_stride) else: ref = ref_image G_losses['G_warp_cycle'] = F.l1_loss(generate_out['warp_cycle'], ref) * self.opt.warp_cycle_w if self.opt.two_cycle: real = F.avg_pool2d(real_image, self.opt.warp_stride) G_losses['G_warp_cycle'] += F.l1_loss( generate_out['warp_i2r2i'], real) * self.opt.warp_cycle_w if self.opt.warp_self_w > 0: # real = F.avg_pool2d(real_image, self.opt.warp_stride) # warp = F.avg_pool2d(generate_out['warp_out'], self.opt.warp_stride) sample_weights = (self_ref[:, 0, 0, 0] / (sum(self_ref[:, 0, 0, 0]) + 1e-5)).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1) G_losses['G_warp_self'] = torch.mean( F.l1_loss(generate_out['warp_out'], real_image, reduce=False) * sample_weights) * self.opt.warp_self_w pred_fake, pred_real, seg, fake_cam_logit, real_cam_logit = self.discriminate( input_semantics, generate_out['fake_image'], real_image) G_losses['GAN'] = self.criterionGAN( pred_fake, True, for_discriminator=False) * self.opt.weight_gan if not self.opt.no_ganFeat_loss: num_D = len(pred_fake) GAN_Feat_loss = self.FloatTensor(1).fill_(0) for i in range(num_D): # for each discriminator # last output is the final prediction, so we exclude it num_intermediate_outputs = len(pred_fake[i]) - 1 for j in range( num_intermediate_outputs): # for each layer output unweighted_loss = self.criterionFeat( pred_fake[i][j], pred_real[i][j].detach()) GAN_Feat_loss += unweighted_loss * self.opt.lambda_feat / num_D G_losses['GAN_Feat'] = GAN_Feat_loss fake_features = self.vggnet_fix(generate_out['fake_image'], ['r12', 'r22', 'r32', 'r42', 'r52'], preprocess=True) sample_weights = (self_ref[:, 0, 0, 0] / (sum(self_ref[:, 0, 0, 0]) + 1e-5)).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1) weights = [1.0 / 32, 1.0 / 16, 1.0 / 8, 1.0 / 4, 1.0] loss = 0 for i in range(len(generate_out['real_features'])): loss += weights[i] * util.weighted_l1_loss( fake_features[i], generate_out['real_features'][i].detach(), sample_weights) G_losses['fm'] = loss * self.opt.lambda_vgg * self.opt.fm_ratio feat_loss = util.mse_loss( fake_features[self.perceptual_layer], generate_out['real_features'][self.perceptual_layer].detach()) G_losses['perc'] = feat_loss * self.opt.weight_perceptual G_losses['contextual'] = self.get_ctx_loss( fake_features, generate_out['ref_features'] ) * self.opt.lambda_vgg * self.opt.ctx_w if self.opt.warp_mask_losstype != 'none': ref_label = F.interpolate(ref_label.float(), scale_factor=0.25, mode='nearest').long().squeeze(1) gt_label = F.interpolate(input_label.float(), scale_factor=0.25, mode='nearest').long().squeeze(1) weights = [] for i in range(ref_label.shape[0]): ref_label_uniq = torch.unique(ref_label[i]) gt_label_uniq = torch.unique(gt_label[i]) zero_label = [ it for it in gt_label_uniq if it not in ref_label_uniq ] weight = torch.ones_like(gt_label[i]).float() for j in zero_label: weight[gt_label[i] == j] = 0 weight[gt_label[i] == 0] = 0 #no loss from unknown class weights.append(weight.unsqueeze(0)) weights = torch.cat(weights, dim=0) #G_losses['mask'] = (F.cross_entropy(warp_mask, gt_label, reduce =False) * weights.float()).sum() / (weights.sum() + 1e-5) * self.opt.weight_mask G_losses['mask'] = ( F.nll_loss(torch.log(generate_out['warp_mask'] + 1e-10), gt_label, reduce=False) * weights).sum() / (weights.sum() + 1e-5) * self.opt.weight_mask #self.fake_image = fake_image return G_losses, generate_out
def forward(self, x): x = self.conv(F.relu(self.bn(x))) x = F.avg_pool2d(x, 2) return x
def forward(self, x, vars=None, bn_training=False, feature=False): """ This function can be called by finetunning, however, in finetunning, we dont wish to update running_mean/running_var. Thought weights/bias of bn is updated, it has been separated by fast_weights. Indeed, to not update running_mean/running_var, we need set update_bn_statistics=False but weight/bias will be updated and not dirty initial theta parameters via fast_weiths. :param x: [b, 1, 28, 28] :param vars: :param bn_training: set False to not update :return: x, loss, likelihood, kld """ cat_var = False cat_list = [] if vars is None: vars = self.vars idx = 0 bn_idx = 0 try: for (name, param, extra_name) in self.config: # assert(name == "conv2d") if name == 'conv2d': w, b = vars[idx], vars[idx + 1] x = F.conv2d(x, w, b, stride=param[4], padding=param[5]) idx += 2 # print(name, param, '\tout:', x.shape) elif name == 'convt2d': w, b = vars[idx], vars[idx + 1] x = F.conv_transpose2d(x, w, b, stride=param[4], padding=param[5]) idx += 2 elif name == 'linear': # ipdb.set_trace() if extra_name == 'cosine': w = F.normalize(vars[idx]) x = F.normalize(x) x = F.linear(x, w) idx += 1 else: w, b = vars[idx], vars[idx + 1] x = F.linear(x, w, b) idx += 2 if cat_var: cat_list.append(x) elif name == 'rep': # print('rep') # print(x.shape) if feature: return x elif name == "cat_start": cat_var = True cat_list = [] elif name == "cat": cat_var = False x = torch.cat(cat_list, dim=1) elif name == 'bn': w, b = vars[idx], vars[idx + 1] running_mean, running_var = self.vars_bn[ bn_idx], self.vars_bn[bn_idx + 1] x = F.batch_norm(x, running_mean, running_var, weight=w, bias=b, training=bn_training) idx += 2 bn_idx += 2 elif name == 'flatten': # print('flatten') # print(x.shape) x = x.view(x.size(0), -1) elif name == 'reshape': # [b, 8] => [b, 2, 2, 2] x = x.view(x.size(0), *param) elif name == 'relu': x = F.relu(x, inplace=param[0]) elif name == 'leakyrelu': x = F.leaky_relu(x, negative_slope=param[0], inplace=param[1]) elif name == 'tanh': x = F.tanh(x) elif name == 'sigmoid': x = torch.sigmoid(x) elif name == 'upsample': x = F.upsample_nearest(x, scale_factor=param[0]) elif name == 'max_pool2d': x = F.max_pool2d(x, param[0], param[1], param[2]) elif name == 'avg_pool2d': x = F.avg_pool2d(x, param[0], param[1], param[2]) else: print(name) raise NotImplementedError except: traceback.print_exc(file=sys.stdout) ipdb.set_trace() # make sure variable is used properly assert idx == len(vars) assert bn_idx == len(self.vars_bn) return x
def avg_pool2d_strides(a): return F.avg_pool2d(a, 2, stride=stride)
def interpolation( G, identity: torch. Tensor, # [C,H,W] and dynamic range [0,255], W & H must match G output resolution hair: torch. Tensor, # [C,H,W] and dynamic range [0,255], W & H must match G output resolution *, alpha1=1.0, alpha2=1.0, num_steps=500, w_avg_samples=10000, initial_learning_rate=0.1, initial_noise_factor=5e-1, lr_rampdown_length=0.25, lr_rampup_length=0.05, noise_ramp_length=0.75, regularize_noise_weight=1e5, verbose=False, stop, device: torch.device, hair_img_filename, identity_img_filename): assert identity.shape == (G.img_channels, G.img_resolution, G.img_resolution) def logprint(*args): if verbose: print(*args) G = copy.deepcopy(G).eval().requires_grad_(False).to( device) # type: ignore # Compute q stats. logprint( f'Computing W midpoint and stddev using {w_avg_samples} samples...') w_samples = np.random.RandomState(123).randn(w_avg_samples, G.num_ws) w_avg = np.mean(w_samples, axis=0, keepdims=True) # [G.w_dim] w_std = (np.sum((w_samples - w_avg)**2) / w_avg_samples)**0.5 # Setup noise inputs. noise_bufs = { name: buf for (name, buf) in G.synthesis.named_buffers() if 'noise_const' in name } seg_channel_dict = {'h': 0, 'i': 1, 'b': 2} vgg16 = torch.jit.load('vgg16.pt').eval().to(device) segNet = torch.load('segNet.pt').eval().to('cuda') segNet_mean = torch.from_numpy( np.load('train-mean.npy')).float().to(device) segNet_std = torch.from_numpy(np.load('train-std.npy')).float().to(device) def apply_seg_mask(x: torch.Tensor, channel: int): x_norm = (x - segNet_mean) / segNet_std segmentation = segNet(x_norm)['out'] mask = torch.argmax(segmentation, dim=1) mask = mask.squeeze().to('cuda') x = x.squeeze() mask[mask != channel] = 1000 x = torch.where(mask != 1000, x, torch.tensor(255.0).to('cuda')) return x.unsqueeze(0) # Features for identity image. if identity.shape[2] > 256: masked_identity_img = F.interpolate(identity.unsqueeze(0).to( torch.float32), size=(256, 256), mode='area') masked_identity_img = apply_seg_mask(masked_identity_img, seg_channel_dict['i']).to('cuda').to( torch.float32) masked_identity_img = masked_identity_img.to(torch.uint8) identity_features = vgg16(masked_identity_img, resize_images=False, return_lpips=True) # Features for hair image. if hair.shape[2] > 256: masked_hair_img = F.interpolate(hair.unsqueeze(0).to(torch.float32), size=(256, 256), mode='area') masked_hair_img = apply_seg_mask( masked_hair_img, seg_channel_dict['h']).to('cuda').to(torch.float32) masked_hair_img = masked_hair_img.to(torch.uint8) hair_features = vgg16(masked_hair_img, resize_images=False, return_lpips=True) # Loading the projection of generated images to save time when debugging w_h_path = Path("generated-male/{}.npy".format(hair_img_filename)) w_p_path = Path("generated-male/{}.npy".format(identity_img_filename)) # Loading the projection of real images # w_h_path = Path("male/{}/18x512/{}-projected_w.npz".format(hair_img_filename, hair_img_filename)) # w_p_path = Path("male/{}/18x512/{}-projected_w.npz".format(identity_img_filename, identity_img_filename)) if w_h_path.exists(): # w_h = torch.from_numpy(np.load(w_h_path)['w'][0]).to('cuda') # w_h = torch.from_numpy(np.load(w_h_path)['w']).to('cuda') w_h = torch.from_numpy(np.load(w_h_path)[0]).to('cuda') else: w_h = project_18(G, hair, device=torch.device('cuda'))[-1] np.savez(w_h_path, w=w_h.cpu().numpy()) if w_p_path.exists(): # w_p = torch.from_numpy(np.load(w_p_path)['w'][0]).to('cuda') # w_p = torch.from_numpy(np.load(w_p_path)['w']).to('cuda') w_p = torch.from_numpy(np.load(w_p_path)[0]).to('cuda') else: w_p = project_18(G, identity, device=torch.device('cuda'))[-1] np.savez(w_p_path, w=w_p.cpu().numpy()) q_opt = torch.nn.Parameter( torch.randn(size=w_p.shape, dtype=torch.float32, requires_grad=True, device=device)) hair_distances = [] identity_distances = [] # list of all target ws through optimization w_out = torch.zeros([stop] + list(w_h.shape), dtype=torch.float32, device=device) optimizer = torch.optim.Adam([q_opt] + list(noise_bufs.values()), betas=(0.9, 0.999), lr=initial_learning_rate) # Init noise. for buf in noise_bufs.values(): buf[:] = torch.randn_like(buf) buf.requires_grad = True print("starting the iterations..") for step in range(num_steps): print("iteration ", step) if step == stop: break # Learning rate schedule. t = step / num_steps w_noise_scale = w_std * initial_noise_factor * max( 0.0, 1.0 - t / noise_ramp_length)**2 lr_ramp = min(1.0, (1.0 - t) / lr_rampdown_length) lr_ramp = 0.5 - 0.5 * np.cos(lr_ramp * np.pi) lr_ramp = lr_ramp * min(1.0, t / lr_rampup_length) lr = initial_learning_rate * lr_ramp for param_group in optimizer.param_groups: param_group['lr'] = lr # Synth images from opt_w. w_t = w_p + q_opt.sigmoid() * (w_h - w_p) ws = w_t.unsqueeze(0) # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images. target_image = G.synthesis(ws, noise_mode='const') target_image = (target_image + 1) * (255 / 2) if target_image.shape[2] > 256: target_image = F.interpolate(target_image, size=(256, 256), mode='area') hair_target_image = apply_seg_mask(target_image, seg_channel_dict['h']) identity_target_image = apply_seg_mask(target_image, seg_channel_dict['i']) # Features for synth images. target_hair_features = vgg16(hair_target_image, resize_images=False, return_lpips=True) target_identity_features = vgg16(identity_target_image, resize_images=False, return_lpips=True) # Compute loss hair_dist = (target_hair_features - hair_features).square().sum() identity_dist = (target_identity_features - identity_features).square().sum() hair_distances.append(hair_dist.item()) identity_distances.append(identity_dist.item()) # loss function dist = alpha1 * hair_dist + alpha2 * identity_dist # Noise regularization. reg_loss = 0.0 for v in noise_bufs.values(): noise = v[None, None, :, :] # must be [1,1,H,W] for F.avg_pool2d() while True: reg_loss += (noise * torch.roll(noise, shifts=1, dims=3)).mean()**2 reg_loss += (noise * torch.roll(noise, shifts=1, dims=2)).mean()**2 if noise.shape[2] <= 8: break noise = F.avg_pool2d(noise, kernel_size=2) loss = dist + reg_loss * regularize_noise_weight # Step print("optimizer steps") optimizer.zero_grad(set_to_none=True) loss.backward() optimizer.step() logprint( f'step {step+1:>4d}/{num_steps}: dist {dist:<4.2f} loss {float(loss):<5.2f}' ) # Save projected W for each optimization step. w_out[step] = w_t.detach() # Normalize noise. with torch.no_grad(): for buf in noise_bufs.values(): buf -= buf.mean() buf *= buf.square().mean().rsqrt() return w_out, hair_distances, identity_distances
def forward(self, x): x = F.relu(self.bn1(self.conv1(x))) out = F.relu(self.layers_0_bn1(self.layers_0_conv1(x))) out = F.relu(self.layers_0_bn2(self.layers_0_conv2(out))) out = self.layers_0_bn3(self.layers_0_conv3(out)) x = out + self.layers_0_shortcut_1(self.layers_0_shortcut_0(x)) out = F.relu(self.layers_1_bn1(self.layers_1_conv1(x))) out = F.relu(self.layers_1_bn2(self.layers_1_conv2(out))) out = self.layers_1_bn3(self.layers_1_conv3(out)) x = out + self.layers_1_shortcut_1(self.layers_1_shortcut_0(x)) out = F.relu(self.layers_2_bn1(self.layers_2_conv1(x))) out = F.relu(self.layers_2_bn2(self.layers_2_conv2(out))) out = self.layers_2_bn3(self.layers_2_conv3(out)) x = out + self.layers_2_shortcut(x) out = F.relu(self.layers_3_bn1(self.layers_3_conv1(x))) out = F.relu(self.layers_3_bn2(self.layers_3_conv2(out))) out = self.layers_3_bn3(self.layers_3_conv3(out)) x = out out = F.relu(self.layers_4_bn1(self.layers_4_conv1(x))) out = F.relu(self.layers_4_bn2(self.layers_4_conv2(out))) out = self.layers_4_bn3(self.layers_4_conv3(out)) x = out + self.layers_4_shortcut(x) out = F.relu(self.layers_5_bn1(self.layers_5_conv1(x))) out = F.relu(self.layers_5_bn2(self.layers_5_conv2(out))) out = self.layers_5_bn3(self.layers_5_conv3(out)) x = out + self.layers_5_shortcut(x) out = F.relu(self.layers_6_bn1(self.layers_6_conv1(x))) out = F.relu(self.layers_6_bn2(self.layers_6_conv2(out))) out = self.layers_6_bn3(self.layers_6_conv3(out)) x = out out = F.relu(self.layers_7_bn1(self.layers_7_conv1(x))) out = F.relu(self.layers_7_bn2(self.layers_7_conv2(out))) out = self.layers_7_bn3(self.layers_7_conv3(out)) x = out + self.layers_7_shortcut(x) out = F.relu(self.layers_8_bn1(self.layers_8_conv1(x))) out = F.relu(self.layers_8_bn2(self.layers_8_conv2(out))) out = self.layers_8_bn3(self.layers_8_conv3(out)) x = out + self.layers_8_shortcut(x) out = F.relu(self.layers_9_bn1(self.layers_9_conv1(x))) out = F.relu(self.layers_9_bn2(self.layers_9_conv2(out))) out = self.layers_9_bn3(self.layers_9_conv3(out)) x = out + self.layers_9_shortcut(x) out = F.relu(self.layers_10_bn1(self.layers_10_conv1(x))) out = F.relu(self.layers_10_bn2(self.layers_10_conv2(out))) out = self.layers_10_bn3(self.layers_10_conv3(out)) x = out + self.layers_10_shortcut_1(self.layers_10_shortcut_0(x)) out = F.relu(self.layers_11_bn1(self.layers_11_conv1(x))) out = F.relu(self.layers_11_bn2(self.layers_11_conv2(out))) out = self.layers_11_bn3(self.layers_11_conv3(out)) x = out + self.layers_11_shortcut(x) out = F.relu(self.layers_12_bn1(self.layers_12_conv1(x))) out = F.relu(self.layers_12_bn2(self.layers_12_conv2(out))) out = self.layers_12_bn3(self.layers_12_conv3(out)) x = out + self.layers_12_shortcut(x) out = F.relu(self.layers_13_bn1(self.layers_13_conv1(x))) out = F.relu(self.layers_13_bn2(self.layers_13_conv2(out))) out = self.layers_13_bn3(self.layers_13_conv3(out)) x = out out = F.relu(self.layers_14_bn1(self.layers_14_conv1(x))) out = F.relu(self.layers_14_bn2(self.layers_14_conv2(out))) out = self.layers_14_bn3(self.layers_14_conv3(out)) x = out + self.layers_14_shortcut(x) out = F.relu(self.layers_15_bn1(self.layers_15_conv1(x))) out = F.relu(self.layers_15_bn2(self.layers_15_conv2(out))) out = self.layers_15_bn3(self.layers_15_conv3(out)) x = out + self.layers_15_shortcut(x) out = F.relu(self.layers_16_bn1(self.layers_16_conv1(x))) out = F.relu(self.layers_16_bn2(self.layers_16_conv2(out))) out = self.layers_16_bn3(self.layers_16_conv3(out)) x = out + self.layers_16_shortcut_1(self.layers_16_shortcut_0(x)) x = F.relu(self.bn2(self.conv2(x))) x = F.avg_pool2d(x, 4) x = x.view(x.size(0), -1) x = self.linear(x) return x
def forward(self, x): out = self.conv(F.relu(self.bn(x))) out = F.avg_pool2d(out, 2) return out
def downsample(inputs): #m = nn.AvgPool2d(kernel_size = 2, stride = 2, padding=0) #return m(inputs) return F.avg_pool2d(inputs, 2)
def attention_crop_drop(attention_maps, input_image): # start = time.time() B, N, W, H = input_image.shape input_tensor = input_image batch_size, num_parts, height, width = attention_maps.shape # attention_maps = torch.nn.functional.interpolate(attention_maps.detach(),size=(W,H),mode='bilinear') attention_maps = attention_maps.detach() part_weights = F.avg_pool2d(attention_maps.detach(), (W, H)).reshape(batch_size, -1) part_weights = torch.add(torch.sqrt(part_weights), 1e-12) part_weights = torch.div(part_weights, torch.sum(part_weights, dim=1).unsqueeze(1)).cpu() part_weights = part_weights.numpy() # print(part_weights.shape) ret_imgs = [] masks = [] # print(part_weights[3]) for i in range(batch_size): attention_map = attention_maps[i] part_weight = part_weights[i] selected_index = np.random.choice(np.arange(0, num_parts), 1, p=part_weight)[0] selected_index2 = np.random.choice(np.arange(0, num_parts), 1, p=part_weight)[0] ## create crop imgs mask = attention_map[selected_index, :, :] # mask = (mask-mask.min())/(mask.max()-mask.min()) threshold = random.uniform(0.4, 0.6) # threshold = 0.5 itemindex = np.where(mask.cpu() >= mask.cpu().max() * threshold) # print(itemindex.shape) # itemindex = torch.nonzero(mask >= threshold*mask.max()) padding_h = max(int(0.1 * H), 1) padding_w = max(int(0.1 * W), 1) height_min = itemindex[0].min() height_min = max(0, height_min - padding_h) height_max = itemindex[0].max() + padding_h width_min = itemindex[1].min() width_min = max(0, width_min - padding_w) width_max = itemindex[1].max() + padding_w # print('numpy',height_min,height_max,width_min,width_max) out_img = input_tensor[i][:, height_min:height_max, width_min:width_max].unsqueeze(0) out_img = torch.nn.functional.interpolate(out_img, size=(W, H), mode='bilinear', align_corners=True) out_img = out_img.squeeze(0) ret_imgs.append(out_img) ## create drop imgs mask2 = attention_map[selected_index2:selected_index2 + 1, :, :] threshold = random.uniform(0.2, 0.5) mask2 = (mask2 < threshold * mask2.max()).float() masks.append(mask2) # bboxes = np.asarray(bboxes, np.float32) crop_imgs = torch.stack(ret_imgs) masks = torch.stack(masks) drop_imgs = input_tensor * masks return (crop_imgs, drop_imgs)
def gem(x, p=3, eps=1e-6): return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1. / p)
def forward(self, x): return F.avg_pool2d(x, x.shape[2:])
def forward(self, x, kernel=3, stride=2, padding=1): weight = self.weight(x).exp() return F.avg_pool2d(x * weight, kernel, stride, padding) / F.avg_pool2d( weight, kernel, stride, padding)
def avg_pool2d(self, in_features, kernel_size, stride, padding): return F.avg_pool2d(in_features, kernel_size=kernel_size, stride=stride, padding=padding)
def forward(self, input_tensor): return functional.avg_pool2d(input_tensor, input_tensor.size()[2:]).view( input_tensor.size()[:2])