Esempio n. 1
0
    def __init__(self, in_channels, out_channels, *, norm_layer, stride=1):
        super().__init__()

        # See note in ResidualBlock for the reason behind bias=True
        self.convnormrelu1 = Conv2dNormActivation(in_channels,
                                                  out_channels // 4,
                                                  norm_layer=norm_layer,
                                                  kernel_size=1,
                                                  bias=True)
        self.convnormrelu2 = Conv2dNormActivation(out_channels // 4,
                                                  out_channels // 4,
                                                  norm_layer=norm_layer,
                                                  kernel_size=3,
                                                  stride=stride,
                                                  bias=True)
        self.convnormrelu3 = Conv2dNormActivation(out_channels // 4,
                                                  out_channels,
                                                  norm_layer=norm_layer,
                                                  kernel_size=1,
                                                  bias=True)
        self.relu = nn.ReLU(inplace=True)

        if stride == 1:
            self.downsample = nn.Identity()
        else:
            self.downsample = Conv2dNormActivation(
                in_channels,
                out_channels,
                norm_layer=norm_layer,
                kernel_size=1,
                stride=stride,
                bias=True,
                activation_layer=None,
            )
Esempio n. 2
0
    def __init__(self, in_channels, out_channels, *, norm_layer, stride=1):
        super().__init__()

        # Note regarding bias=True:
        # Usually we can pass bias=False in conv layers followed by a norm layer.
        # But in the RAFT training reference, the BatchNorm2d layers are only activated for the first dataset,
        # and frozen for the rest of the training process (i.e. set as eval()). The bias term is thus still useful
        # for the rest of the datasets. Technically, we could remove the bias for other norm layers like Instance norm
        # because these aren't frozen, but we don't bother (also, we woudn't be able to load the original weights).
        self.convnormrelu1 = Conv2dNormActivation(in_channels,
                                                  out_channels,
                                                  norm_layer=norm_layer,
                                                  kernel_size=3,
                                                  stride=stride,
                                                  bias=True)
        self.convnormrelu2 = Conv2dNormActivation(out_channels,
                                                  out_channels,
                                                  norm_layer=norm_layer,
                                                  kernel_size=3,
                                                  bias=True)

        if stride == 1:
            self.downsample = nn.Identity()
        else:
            self.downsample = Conv2dNormActivation(
                in_channels,
                out_channels,
                norm_layer=norm_layer,
                kernel_size=1,
                stride=stride,
                bias=True,
                activation_layer=None,
            )

        self.relu = nn.ReLU(inplace=True)
Esempio n. 3
0
    def __init__(self,
                 in_channels: int,
                 num_anchors: int,
                 conv_depth=1) -> None:
        super().__init__()
        convs = []
        for _ in range(conv_depth):
            convs.append(
                Conv2dNormActivation(in_channels,
                                     in_channels,
                                     kernel_size=3,
                                     norm_layer=None))
        self.conv = nn.Sequential(*convs)
        self.cls_logits = nn.Conv2d(in_channels,
                                    num_anchors,
                                    kernel_size=1,
                                    stride=1)
        self.bbox_pred = nn.Conv2d(in_channels,
                                   num_anchors * 4,
                                   kernel_size=1,
                                   stride=1)

        for layer in self.modules():
            if isinstance(layer, nn.Conv2d):
                torch.nn.init.normal_(layer.weight,
                                      std=0.01)  # type: ignore[arg-type]
                if layer.bias is not None:
                    torch.nn.init.constant_(layer.bias,
                                            0)  # type: ignore[arg-type]
Esempio n. 4
0
    def __init__(self, *, block=ResidualBlock, layers=(64, 64, 96, 128, 256), norm_layer=nn.BatchNorm2d):
        super().__init__()

        if len(layers) != 5:
            raise ValueError(f"The expected number of layers is 5, instead got {len(layers)}")

        # See note in ResidualBlock for the reason behind bias=True
        self.convnormrelu = Conv2dNormActivation(
            3, layers[0], norm_layer=norm_layer, kernel_size=7, stride=2, bias=True
        )

        self.layer1 = self._make_2_blocks(block, layers[0], layers[1], norm_layer=norm_layer, first_stride=1)
        self.layer2 = self._make_2_blocks(block, layers[1], layers[2], norm_layer=norm_layer, first_stride=2)
        self.layer3 = self._make_2_blocks(block, layers[2], layers[3], norm_layer=norm_layer, first_stride=2)

        self.conv = nn.Conv2d(layers[3], layers[4], kernel_size=1)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
            elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d)):
                if m.weight is not None:
                    nn.init.constant_(m.weight, 1)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
Esempio n. 5
0
    def __init__(self,
                 *,
                 in_channels_corr,
                 corr_layers=(256, 192),
                 flow_layers=(128, 64),
                 out_channels=128):
        super().__init__()

        if len(flow_layers) != 2:
            raise ValueError(
                f"The expected number of flow_layers is 2, instead got {len(flow_layers)}"
            )
        if len(corr_layers) not in (1, 2):
            raise ValueError(
                f"The number of corr_layers should be 1 or 2, instead got {len(corr_layers)}"
            )

        self.convcorr1 = Conv2dNormActivation(in_channels_corr,
                                              corr_layers[0],
                                              norm_layer=None,
                                              kernel_size=1)
        if len(corr_layers) == 2:
            self.convcorr2 = Conv2dNormActivation(corr_layers[0],
                                                  corr_layers[1],
                                                  norm_layer=None,
                                                  kernel_size=3)
        else:
            self.convcorr2 = nn.Identity()

        self.convflow1 = Conv2dNormActivation(2,
                                              flow_layers[0],
                                              norm_layer=None,
                                              kernel_size=7)
        self.convflow2 = Conv2dNormActivation(flow_layers[0],
                                              flow_layers[1],
                                              norm_layer=None,
                                              kernel_size=3)

        # out_channels - 2 because we cat the flow (2 channels) at the end
        self.conv = Conv2dNormActivation(corr_layers[-1] + flow_layers[-1],
                                         out_channels - 2,
                                         norm_layer=None,
                                         kernel_size=3)

        self.out_channels = out_channels
Esempio n. 6
0
    def __init__(self, *, in_channels, hidden_size, multiplier=0.25):
        super().__init__()
        self.convrelu = Conv2dNormActivation(in_channels, hidden_size, norm_layer=None, kernel_size=3)
        # 8 * 8 * 9 because the predicted flow is downsampled by 8, from the downsampling of the initial FeatureEncoder
        # and we interpolate with all 9 surrounding neighbors. See paper and appendix B.
        self.conv = nn.Conv2d(hidden_size, 8 * 8 * 9, 1, padding=0)

        # In the original code, they use a factor of 0.25 to "downweight the gradients" of that branch.
        # See e.g. https://github.com/princeton-vl/RAFT/issues/119#issuecomment-953950419
        # or https://github.com/princeton-vl/RAFT/issues/24.
        # It doesn't seem to affect epe significantly and can likely be set to 1.
        self.multiplier = multiplier
Esempio n. 7
0
    def __init__(self, *, in_channels_corr, corr_layers=(256, 192), flow_layers=(128, 64), out_channels=128):
        super().__init__()

        assert len(flow_layers) == 2
        assert len(corr_layers) in (1, 2)

        self.convcorr1 = Conv2dNormActivation(in_channels_corr, corr_layers[0], norm_layer=None, kernel_size=1)
        if len(corr_layers) == 2:
            self.convcorr2 = Conv2dNormActivation(corr_layers[0], corr_layers[1], norm_layer=None, kernel_size=3)
        else:
            self.convcorr2 = nn.Identity()

        self.convflow1 = Conv2dNormActivation(2, flow_layers[0], norm_layer=None, kernel_size=7)
        self.convflow2 = Conv2dNormActivation(flow_layers[0], flow_layers[1], norm_layer=None, kernel_size=3)

        # out_channels - 2 because we cat the flow (2 channels) at the end
        self.conv = Conv2dNormActivation(
            corr_layers[-1] + flow_layers[-1], out_channels - 2, norm_layer=None, kernel_size=3
        )

        self.out_channels = out_channels
Esempio n. 8
0
 def __init__(self,
              *,
              in_channels: int,
              hidden_size: int,
              out_channels: int,
              multiplier: float = 0.25):
     super(raft.MaskPredictor, self).__init__()
     self.convrelu = Conv2dNormActivation(in_channels,
                                          hidden_size,
                                          norm_layer=None,
                                          kernel_size=3)
     self.conv = nn.Conv2d(hidden_size,
                           out_channels,
                           kernel_size=1,
                           padding=0)
     self.multiplier = multiplier