Esempio n. 1
0
 def receptive_field_for(self, input_field=None):
     import netharn as nh
     field = nh.ReceptiveFieldFor(self.hidden)(input_field)
     if self.skip:
         skip = nh.ReceptiveFieldFor(self.skip)(field)
         field.hidden['skip'] = skip
     return field
Esempio n. 2
0
 def _debug_hidden(self, input_shape, n=5):
     """
     Print internal shape and field info
     """
     import netharn as nh
     shape = nh.OutputShapeFor(self.branch)(input_shape=input_shape)
     print(ub.repr2(shape.hidden.shallow(n), nl=-1, dtype=False, si=True))
     field = nh.ReceptiveFieldFor(self.branch)(input_shape=input_shape)
     print(ub.repr2(field.hidden.shallow(n), nl=-1, dtype=False, si=True))
Esempio n. 3
0
def test_convT_rf():
    """
    CommandLine:
        xdoctest -m ~/code/netharn/tests/test_receptive_feild.py test_convT_rf
    """
    # Test that we always invert whatever weird crazy thing we do
    import netharn as nh
    rng = np.random.RandomState(3668028386)

    ntrials = 100
    M = 9

    for _ in ub.ProgIter(range(ntrials), desc='testing rand convT instances'):
        depth = rng.randint(1, M)
        params = []
        for i in range(depth):
            k = rng.randint(0, 1 + M // 2) * 2 + 1
            s = rng.randint(1, 1 + M)
            d = rng.randint(1, 1 + M)
            p = rng.randint(0, 1 + M)
            params.append((i, (k, s, d)))

        # Construct a series of forward convolutions and the tranpose
        # convolutions that should "invert" them. Assert that the strides and
        # crop of the RF are the same on every layer. Furthremote the RF size
        # should strictly increase.

        layers = ub.odict()
        for i, (k, s, d) in params:
            key = 'c{}'.format(i)
            conv = nn.Conv2d(1,
                             1,
                             kernel_size=k,
                             stride=s,
                             padding=p,
                             dilation=d)
            layers[key] = conv

        for i, (k, s, d) in reversed(params):
            key = 'c{}T'.format(i)
            convT = nn.ConvTranspose2d(1,
                                       1,
                                       kernel_size=k,
                                       stride=s,
                                       padding=p,
                                       dilation=d)
            layers[key] = convT

        module = nn.Sequential(layers)
        field = nh.ReceptiveFieldFor(module)()

        input_rf = nh.ReceptiveFieldFor.input()
        symmetric = [('input', input_rf)] + list(field.hidden.items())

        for a, b, in ub.iter_window(symmetric, 2):
            k1, v1 = a
            k2, v2 = b
            assert np.all(v1['shape'] <= v2['shape']), 'v1={} v2={}'.format(
                v1, v2)

        for a, b in zip(symmetric, symmetric[::-1]):
            k1, v1 = a
            k2, v2 = b
            assert np.all(v1['stride'] == v2['stride']), 'v1={} v2={}'.format(
                v1, v2)
            assert np.all(v1['crop'] == v2['crop']), 'v1={} v2={}'.format(
                v1, v2)
Esempio n. 4
0
 def receptive_field_for(self, input_field=None):
     import netharn as nh
     return nh.ReceptiveFieldFor(self.module)(input_field)
Esempio n. 5
0
    def __init__(self, branch='resnet50', input_shape=(1, 3, 416, 416),
                 norm_desc=False, desc_size=1024, hidden_channels=3, dropout=0,
                 norm='batch', noli='relu', residual=False, bias=False):
        """
        Note:
            * i have found norm_desc to be generally unhelpful.

        Example:
            >>> from netharn.models.descriptor_network import *
            >>> import netharn as nh
            >>> input_shape = (4, 3, 32, 32)
            >>> self = DescriptorNetwork(input_shape=input_shape)
            >>> nh.OutputShapeFor(self)._check_consistency(input_shape)
            {'dvecs': (4, 1024)}
        """
        import netharn as nh
        super(DescriptorNetwork, self).__init__()

        pretrained = True

        if branch is None or branch == 'resnet50':
            self.branch = torchvision.models.resnet50(pretrained=pretrained)
        else:
            self.branch = branch
        if not isinstance(self.branch, torchvision.models.ResNet):
            raise ValueError('can only accept resnet at the moment')
        self.norm_desc = norm_desc

        self.in_channels = input_shape[1]
        self.out_channels = desc_size

        if self.branch.conv1.in_channels != self.in_channels:
            prev = self.branch.conv1
            cls = prev.__class__
            self.branch.conv1 = cls(
                in_channels=self.in_channels,
                out_channels=prev.out_channels,
                kernel_size=prev.kernel_size,
                stride=prev.stride,
                padding=prev.padding,
                dilation=prev.dilation,
                groups=prev.groups,
                bias=prev.bias,
            )
            if pretrained:
                nh.initializers.functional.load_partial_state(
                    self.branch.conv1,
                    prev.state_dict(),
                    leftover=nh.initializers.KaimingNormal(),
                    verbose=0,
                )

        # Note the advanced usage of output-shape-for
        if 0 and __debug__:
            # new torchvision broke this
            branch_field = nh.ReceptiveFieldFor(self.branch)(
                input_shape=input_shape)
            prepool_field = branch_field.hidden.shallow(1)['layer4']
            input_dims = np.array(input_shape[-2:])
            rf_stride = prepool_field['stride']
            if np.any(input_dims < rf_stride // 2):
                msg = ('Network is too deep OR input is to small. '
                       'rf_stride={} but input_dims={}'.format(
                           rf_stride, input_dims))

                self._debug_hidden(input_shape, n=2)
                print(msg)
                import warnings
                warnings.warn(msg)
                raise Exception(msg)

        branch_shape = nh.OutputShapeFor(self.branch)(input_shape)
        prepool_shape = branch_shape.hidden.shallow(1)['layer4']

        # replace the last layer of resnet with a linear embedding to learn the
        # LP distance between pairs of images.
        # Also need to replace the pooling layer in case the input has a
        # different size.
        self.prepool_shape = prepool_shape
        pool_channels = prepool_shape[1]
        pool_dims = prepool_shape[2:]
        if np.all(np.array(pool_dims) == 1):
            self.branch.avgpool = layers.Identity()
        else:
            self.branch.avgpool = torch.nn.AvgPool2d(pool_dims, stride=1)

        # Check that the modification to the layer fixed the size
        postbranch_shape = nh.OutputShapeFor(self.branch)(input_shape)
        postpool_shape = postbranch_shape.hidden.shallow(1)['layer4']

        assert np.all(np.array(prepool_shape[1:]) > 0)
        assert np.all(np.array(postpool_shape[1:]) > 0)

        # Replace the final linear layer with an MLP head
        self.branch.fc = layers.MultiLayerPerceptronNd(
            dim=0, in_channels=pool_channels, hidden_channels=hidden_channels,
            out_channels=desc_size, bias=bias, dropout=dropout, norm=norm,
            noli=noli, residual=residual)