def output_shape_for(self, input_shape): outputs = self.hidden.output_shape_for(input_shape) if self.skip: import netharn as nh skip = nh.OutputShapeFor(self.skip)(input_shape) outputs.hidden['skip'] = skip return outputs
def _debug_hidden(self, input_shape, n=5): """ Print internal shape and field info """ import netharn as nh shape = nh.OutputShapeFor(self.branch)(input_shape=input_shape) print(ub.repr2(shape.hidden.shallow(n), nl=-1, dtype=False, si=True)) field = nh.ReceptiveFieldFor(self.branch)(input_shape=input_shape) print(ub.repr2(field.hidden.shallow(n), nl=-1, dtype=False, si=True))
def resnet_prepool_output_shape(self, input_shape): """ self = MatchingNetworkLP(input_shape=input_shape) input_shape = (1, 3, 224, 224) self.resnet_prepool_output_shape(input_shape) self = MatchingNetworkLP(input_shape=input_shape) input_shape = (1, 3, 416, 416) self.resnet_prepool_output_shape(input_shape) """ # Figure out how big the output will be and redo the average pool layer # to account for it branch = self.branch shape = input_shape shape = nh.OutputShapeFor(branch.conv1)(shape) shape = nh.OutputShapeFor(branch.bn1)(shape) shape = nh.OutputShapeFor(branch.relu)(shape) shape = nh.OutputShapeFor(branch.maxpool)(shape) shape = nh.OutputShapeFor(branch.layer1)(shape) shape = nh.OutputShapeFor(branch.layer2)(shape) shape = nh.OutputShapeFor(branch.layer3)(shape) shape = nh.OutputShapeFor(branch.layer4)(shape) prepool_shape = shape return prepool_shape
def output_shape_for(self, input_shape1, input_shape2): shape1 = nh.OutputShapeFor(self.branch)(input_shape1) shape2 = nh.OutputShapeFor(self.branch)(input_shape2) assert shape1 == shape2 output_shape = (shape1[0], 1) return output_shape
def output_shape_for(self, input_shape, math=math): import netharn as nh shape = nh.OutputShapeFor(self.conv1[0])(input_shape) shape = nh.OutputShapeFor(self.conv2[0])(shape) output_shape = shape return output_shape
def __init__(self, branch='resnet50', input_shape=(1, 3, 416, 416), norm_desc=False, desc_size=1024, hidden_channels=3, dropout=0, norm='batch', noli='relu', residual=False, bias=False): """ Note: * i have found norm_desc to be generally unhelpful. Example: >>> from netharn.models.descriptor_network import * >>> import netharn as nh >>> input_shape = (4, 3, 32, 32) >>> self = DescriptorNetwork(input_shape=input_shape) >>> nh.OutputShapeFor(self)._check_consistency(input_shape) {'dvecs': (4, 1024)} """ import netharn as nh super(DescriptorNetwork, self).__init__() pretrained = True if branch is None or branch == 'resnet50': self.branch = torchvision.models.resnet50(pretrained=pretrained) else: self.branch = branch if not isinstance(self.branch, torchvision.models.ResNet): raise ValueError('can only accept resnet at the moment') self.norm_desc = norm_desc self.in_channels = input_shape[1] self.out_channels = desc_size if self.branch.conv1.in_channels != self.in_channels: prev = self.branch.conv1 cls = prev.__class__ self.branch.conv1 = cls( in_channels=self.in_channels, out_channels=prev.out_channels, kernel_size=prev.kernel_size, stride=prev.stride, padding=prev.padding, dilation=prev.dilation, groups=prev.groups, bias=prev.bias, ) if pretrained: nh.initializers.functional.load_partial_state( self.branch.conv1, prev.state_dict(), leftover=nh.initializers.KaimingNormal(), verbose=0, ) # Note the advanced usage of output-shape-for if 0 and __debug__: # new torchvision broke this branch_field = nh.ReceptiveFieldFor(self.branch)( input_shape=input_shape) prepool_field = branch_field.hidden.shallow(1)['layer4'] input_dims = np.array(input_shape[-2:]) rf_stride = prepool_field['stride'] if np.any(input_dims < rf_stride // 2): msg = ('Network is too deep OR input is to small. ' 'rf_stride={} but input_dims={}'.format( rf_stride, input_dims)) self._debug_hidden(input_shape, n=2) print(msg) import warnings warnings.warn(msg) raise Exception(msg) branch_shape = nh.OutputShapeFor(self.branch)(input_shape) prepool_shape = branch_shape.hidden.shallow(1)['layer4'] # replace the last layer of resnet with a linear embedding to learn the # LP distance between pairs of images. # Also need to replace the pooling layer in case the input has a # different size. self.prepool_shape = prepool_shape pool_channels = prepool_shape[1] pool_dims = prepool_shape[2:] if np.all(np.array(pool_dims) == 1): self.branch.avgpool = layers.Identity() else: self.branch.avgpool = torch.nn.AvgPool2d(pool_dims, stride=1) # Check that the modification to the layer fixed the size postbranch_shape = nh.OutputShapeFor(self.branch)(input_shape) postpool_shape = postbranch_shape.hidden.shallow(1)['layer4'] assert np.all(np.array(prepool_shape[1:]) > 0) assert np.all(np.array(postpool_shape[1:]) > 0) # Replace the final linear layer with an MLP head self.branch.fc = layers.MultiLayerPerceptronNd( dim=0, in_channels=pool_channels, hidden_channels=hidden_channels, out_channels=desc_size, bias=bias, dropout=dropout, norm=norm, noli=noli, residual=residual)