def __init__( self, rgb_encoder, res_encoder, num_classes, dropout=0, mean=[0.3914976, 0.44266784, 0.46043398], std=[0.17819773, 0.17319807, 0.18128773], ): super().__init__() max_pixel_value = 255 self.rgb_bn = Normalize( np.array(mean) * max_pixel_value, np.array(std) * max_pixel_value) self.res_bn = Normalize([0.85, -0.51, 0.69], [0.68, 0.47, 0.6]) self.encoder = rgb_encoder self.res_encoder = res_encoder self.pool = GlobalAvgPool2d(flatten=True) self.rgb_type_classifier = WeightNormClassifier( rgb_encoder.num_features, num_classes, rgb_encoder.num_features // 2, dropout) self.rgb_flag_classifier = WeightNormClassifier( rgb_encoder.num_features, 1, rgb_encoder.num_features // 2, dropout) self.res_type_classifier = WeightNormClassifier( res_encoder.num_features, num_classes, res_encoder.num_features // 2, dropout) self.res_flag_classifier = WeightNormClassifier( res_encoder.num_features, 1, res_encoder.num_features // 2, dropout)
def __init__( self, rgb_encoder, res_encoder, num_classes, dropout=0, mean=[0.3914976, 0.44266784, 0.46043398], std=[0.17819773, 0.17319807, 0.18128773], ): super().__init__() max_pixel_value = 255 self.rgb_bn = Normalize( np.array(mean) * max_pixel_value, np.array(std) * max_pixel_value) self.res_bn = Normalize([0.85, -0.51, 0.69], [0.68, 0.47, 0.6]) self.encoder = rgb_encoder self.res_encoder = res_encoder self.pool = GlobalAvgPool2d(flatten=True) self.drop = nn.Dropout(dropout) self.decoder = nn.Sequential( nn.Linear(rgb_encoder.num_features + res_encoder.num_features, 512), nn.BatchNorm1d(512), nn.ReLU(inplace=True), ) self.type_classifier = nn.Linear(512, num_classes) self.flag_classifier = nn.Linear(512, 1)
def test_fliplr_image2label(): x = torch.rand((4, 3, 224, 224)) model = GlobalAvgPool2d(flatten=True) output = tta.fliplr_image2label(model, x) np.testing.assert_allclose(to_numpy(output), to_numpy(x.mean(dim=(2, 3))), atol=1e-6, rtol=1e-6)
def __init__(self, encoder, num_classes, dropout=0, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]): super().__init__() max_pixel_value = 255 self.rgb_bn = Normalize(np.array(mean) * max_pixel_value, np.array(std) * max_pixel_value) self.encoder = encoder self.drop = nn.Dropout(dropout) self.pool = GlobalAvgPool2d(flatten=True) self.type_classifier = nn.Linear(encoder.num_features, num_classes) self.flag_classifier = nn.Linear(encoder.num_features, 1)
def disaster_type_classifier(features: int, num_classes: int, embedding=256, abn_block=ABN, dropout=0.0) -> nn.Module: return nn.Sequential( nn.Conv2d(features, embedding, kernel_size=1), abn_block(embedding), GlobalAvgPool2d(flatten=True), nn.Dropout(dropout, inplace=True), nn.Linear(embedding, num_classes), )
def __init__(self, encoder, num_classes, dropout=0, mean=[-0.5, -0.5, -0.5], std=[0.5, 0.5, 0.5]): super().__init__() self.res_bn = Normalize(mean, std) self.encoder = encoder self.pool = GlobalAvgPool2d(flatten=True) self.drop = nn.Dropout(dropout) self.type_classifier = nn.Linear(encoder.num_features, num_classes) self.flag_classifier = nn.Linear(encoder.num_features, 1)
def __init__( self, encoder, num_classes, dropout=0, mean=[0.3914976, 0.44266784, 0.46043398], std=[0.17819773, 0.17319807, 0.18128773], ): super().__init__() max_pixel_value = 255 self.rgb_bn = Normalize( np.array(mean) * max_pixel_value, np.array(std) * max_pixel_value) self.encoder = encoder self.pool = GlobalAvgPool2d(flatten=True) self.drop = nn.Dropout(dropout) self.type_classifier = nn.Linear(encoder.num_features, num_classes) self.flag_classifier = nn.Linear(encoder.num_features, 1)
def __init__( self, num_classes, dropout=0.0, mean=[0.3914976, 0.44266784, 0.46043398], std=[0.17819773, 0.17319807, 0.18128773], max_pixel_value=255, input_key=INPUT_IMAGE_KEY, ): super().__init__() self.rgb_bn = Normalize(np.array(mean) * max_pixel_value, np.array(std) * max_pixel_value) self.encoder = EfficientUnetEncoder() self.decoder = EfficientUNetDecoder(self.encoder.channels, decoder_features=[16, 32, 64]) self.pool = GlobalAvgPool2d(flatten=True) self.drop = nn.Dropout(dropout) self.type_classifier = nn.Linear(self.encoder.channels[-1], num_classes) self.flag_classifier = nn.Linear(self.encoder.channels[-1], 1) self.input_key = input_key self.mask = nn.Conv2d(self.decoder.channels[0], 1, kernel_size=1)
def __init__( self, encoder, num_classes, dropout=0, mean=[0.3914976, 0.44266784, 0.46043398], std=[0.17819773, 0.17319807, 0.18128773], ): super().__init__() self.encoder = encoder max_pixel_value = 255 self.rgb_bn = Normalize( np.array(mean) * max_pixel_value, np.array(std) * max_pixel_value) self.pool = GlobalAvgPool2d(flatten=True) self.drop = nn.Dropout(dropout) # Recombination of embedding and quality factor self.fc1 = nn.Sequential( nn.Linear(encoder.num_features + 3, encoder.num_features), nn.ReLU()) self.type_classifier = nn.Linear(encoder.num_features, num_classes) self.flag_classifier = nn.Linear(encoder.num_features, 1)
def __init__(self, structure, in_chans=3, norm_act=ABN, classes=0): """Wider ResNet with pre-activation (identity mapping) blocks Parameters ---------- structure : list of int Number of residual blocks in each of the six modules of the network. norm_act : callable Function to create normalization / activation Module. classes : int If not `0` also include global average pooling and a fully-connected layer with `classes` outputs at the end of the network. """ super(WiderResNet, self).__init__() self.structure = structure if len(structure) != 6: raise ValueError("Expected a structure with six values") # Initial layers # self.mod1 = nn.Sequential(OrderedDict([("conv1", nn.Conv2d(3, 64, 3, stride=1, padding=1, bias=False))])) # Deep stem self.mod1 = nn.Sequential(*[ nn.Conv2d(in_chans, 64, 3, stride=2, padding=1, bias=False), norm_act(64), nn.Conv2d(64, 64, 3, stride=1, padding=1, bias=False), norm_act(64), nn.Conv2d(64, 64, 3, stride=1, padding=1, bias=False), ]) # Groups of residual blocks in_channels = 64 channels = [(128, 128), (256, 256), (512, 512), (512, 1024), (512, 1024, 2048), (1024, 2048, 4096)] for mod_id, num in enumerate(structure): # Create blocks for module blocks = [] for block_id in range(num): blocks.append(( "block%d" % (block_id + 1), IdentityResidualBlock(in_channels, channels[mod_id], norm_act=norm_act), )) # Update channels and p_keep in_channels = channels[mod_id][-1] # Create module if mod_id <= 4: self.add_module("pool%d" % (mod_id + 2), nn.MaxPool2d(3, stride=2, padding=1)) self.add_module("mod%d" % (mod_id + 2), nn.Sequential(OrderedDict(blocks))) # Pooling and predictor self.bn_out = norm_act(in_channels) if classes != 0: self.classifier = nn.Sequential( OrderedDict([("avg_pool", GlobalAvgPool2d(flatten=True)), ("fc", nn.Linear(in_channels, classes))])) self.num_features = in_channels
def __init__(self, structure, in_chans=3, norm_act=ABN, classes=0, dilation=False): """Wider ResNet with pre-activation (identity mapping) blocks This variant uses down-sampling by max-pooling in the first two blocks and by strided convolution in the others. Parameters ---------- structure : list of int Number of residual blocks in each of the six modules of the network. norm_act : callable Function to create normalization / activation Module. classes : int If not `0` also include global average pooling and a fully-connected layer with `classes` outputs at the end of the network. dilation : bool If `True` apply dilation to the last three modules and change the down-sampling factor from 32 to 8. """ super(WiderResNetA2, self).__init__() self.structure = structure self.dilation = dilation if len(structure) != 6: raise ValueError("Expected a structure with six values") # Initial layers self.mod1 = nn.Sequential( OrderedDict([("conv1", nn.Conv2d(3, 64, 3, stride=1, padding=1, bias=False))])) # Groups of residual blocks in_channels = 64 channels = [(128, 128), (256, 256), (512, 512), (512, 1024), (512, 1024, 2048), (1024, 2048, 4096)] for mod_id, num in enumerate(structure): # Create blocks for module blocks = [] for block_id in range(num): if not dilation: dil = 1 stride = 2 if block_id == 0 and 2 <= mod_id <= 4 else 1 else: if mod_id == 3: dil = 2 elif mod_id > 3: dil = 4 else: dil = 1 stride = 2 if block_id == 0 and mod_id == 2 else 1 if mod_id == 4: drop = partial(nn.Dropout2d, p=0.3) elif mod_id == 5: drop = partial(nn.Dropout2d, p=0.5) else: drop = None blocks.append(( "block%d" % (block_id + 1), IdentityResidualBlock(in_channels, channels[mod_id], norm_act=norm_act, stride=stride, dilation=dil, dropout=drop), )) # Update channels and p_keep in_channels = channels[mod_id][-1] # Create module if mod_id < 2: self.add_module("pool%d" % (mod_id + 2), nn.MaxPool2d(3, stride=2, padding=1)) self.add_module("mod%d" % (mod_id + 2), nn.Sequential(OrderedDict(blocks))) # Pooling and predictor self.bn_out = norm_act(in_channels) if classes != 0: self.classifier = nn.Sequential( OrderedDict([("avg_pool", GlobalAvgPool2d(flatten=True)), ("fc", nn.Linear(in_channels, classes))])) self.num_features = in_channels