def __init__(self, in_channels, out_features, hidden_size=64, fc_in_size=None, conv_kernel=[3, 3, 3, 3], strides=None): super(ConvolutionalNeuralNetwork, self).__init__() self.in_channels = in_channels self.out_features = out_features self.hidden_size = hidden_size if strides is None: strides = [None] * len(conv_kernel) else: assert (len(strides) == len(conv_kernel)) self.features = MetaSequential( conv3x3(in_channels, hidden_size, conv_kernel[0], strides[0]), ) for k in range(1, len(conv_kernel)): self.features.add_module( 'block_' + str(k), conv3x3(hidden_size, hidden_size, conv_kernel[k], strides[k])) if fc_in_size is None: fc_in_size = hidden_size self.classifier = MetaLinear(fc_in_size, out_features)
def __init__(self, in_features, out_features, hidden_sizes, meta_batch_size=1): super(MetaMLPModel, self).__init__() self.in_features = in_features self.out_features = out_features self.hidden_sizes = hidden_sizes layer_sizes = [in_features] + hidden_sizes self.features = MetaSequential( OrderedDict([('layer{0}'.format(i + 1), MetaSequential( OrderedDict([ ('linear', BatchLinear(hidden_size, layer_sizes[i + 1], meta_batch_size=meta_batch_size, bias=True)), ('relu', nn.ReLU()) ]))) for (i, hidden_size) in enumerate(layer_sizes[:-1])])) self.classifier = BatchLinear(hidden_sizes[-1], out_features, meta_batch_size=meta_batch_size, bias=True)
def _make_layer(self, block, planes, stride=1, drop_rate=0.0, drop_block=False, block_size=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = MetaSequential( MetaConv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=1, bias=False), MetaBatchNorm2d(planes * block.expansion, track_running_stats=False), ) layers = [] layers.append( block(self.inplanes, planes, stride, downsample, drop_rate, drop_block, block_size)) self.inplanes = planes * block.expansion return MetaSequential(*layers)
def conv3x3(in_channels, out_channels, ksize=1, stride=None, **kwargs): if stride is None: return MetaSequential( MetaConv2d(in_channels, out_channels, kernel_size=ksize, padding=1, **kwargs), MetaBatchNorm2d(out_channels, momentum=1., track_running_stats=False), nn.ReLU(), nn.MaxPool2d(2)) else: return MetaSequential( MetaConv2d(in_channels, out_channels, stride=[stride, stride], kernel_size=ksize, padding=0, **kwargs), MetaBatchNorm2d(out_channels, momentum=1., track_running_stats=False), nn.ReLU(), )
def conv_block(in_channels, out_channels, bias=True, activation=nn.ReLU(inplace=True), use_dropout=False, p=0.1): res = MetaSequential( OrderedDict([ ('conv', MetaConv2d(int(in_channels), int(out_channels), kernel_size=3, padding=1, bias=bias)), ('norm', MetaBatchNorm2d(int(out_channels), momentum=1., track_running_stats=False)), ('relu', activation), ('pool', nn.MaxPool2d(2)), ])) if use_dropout: res.add_module('dropout', nn.Dropout2d(p)) return res
def __init__(self): super(MIL, self).__init__() h1, w1 = 200, 200 h2, w2 = calcHW(h1, w1, kernel_size=8, stride=2) h3, w3 = calcHW(h2, w2, kernel_size=4, stride=2) h4, w4 = calcHW(h3, w3, kernel_size=4, stride=2) self.features = MetaSequential( MetaConv2d(in_channels=3, out_channels=32, kernel_size=8, stride=2), MetaLayerNorm([32, h2, w2]), nn.ReLU(), MetaConv2d(in_channels=32, out_channels=64, kernel_size=4, stride=2), MetaLayerNorm([64, h3, w3]), nn.ReLU(), MetaConv2d(in_channels=64, out_channels=64, kernel_size=4, stride=2), MetaLayerNorm([64, h4, w4]), nn.ReLU(), SpatialSoftmax(h4, w4)) self.policy = MetaSequential( MetaLinear(2 * 64 + 3, 128), nn.ReLU(), MetaLinear(128, 128), nn.ReLU(), MetaLinear(128, 128), nn.ReLU(), MetaLinear(128, 4), )
def _make_layer(self, block, planes, blocks, stride=1, dilate=False): norm_layer = self._norm_layer downsample = None previous_dilation = self.dilation if dilate: self.dilation *= stride stride = 1 if stride != 1 or self.inplanes != planes * block.expansion: downsample = MetaSequential( conv1x1(self.inplanes, planes * block.expansion, stride), norm_layer(planes * block.expansion), ) layers = [] layers.append( block(self.inplanes, planes, stride, downsample, self.groups, self.base_width, previous_dilation, norm_layer)) self.inplanes = planes * block.expansion for _ in range(1, blocks): layers.append( block(self.inplanes, planes, groups=self.groups, base_width=self.base_width, dilation=self.dilation, norm_layer=norm_layer)) return MetaSequential(*layers)
class unetConv2(MetaModule): """Double convolution modules: (conv + norm + relu) x2""" def __init__(self, in_channels, out_channels, is_batchnorm, kernel_size=3, stride=1, padding=1, final=False, device='cpu', fcn=False, fcn_center=False): super(unetConv2, self).__init__() self.device=device def init_layers(m): if type(m) == MetaConv2d: torch.nn.init.kaiming_uniform_(m.weight, nonlinearity='relu') if fcn==True: self.double_conv = MetaSequential(OrderedDict([ ('conv1', MetaConv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)), ('norm1', nn.BatchNorm2d(out_channels)),# momentum=1.,track_running_stats=False)), ('relu1', nn.ReLU(inplace=True)), #('dropout1', nn.Dropout(0.3)), ('conv2', MetaConv2d(out_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)), ('norm2', nn.BatchNorm2d(out_channels)),# momentum=1.,track_running_stats=False)), ('relu2', nn.ReLU(inplace=True)), #('dropout2', nn.Dropout(0.3)) ('conv3', MetaConv2d(out_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)), ('norm3', nn.BatchNorm2d(out_channels)),# momentum=1.,track_running_stats=False)), ('relu3', nn.ReLU(inplace=True)) ])) elif fcn_center==True: self.double_conv = MetaSequential(OrderedDict([ ('conv1', MetaConv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)), ('norm1', nn.BatchNorm2d(out_channels)),# momentum=1.,track_running_stats=False)), ('relu1', nn.ReLU(inplace=True)) ])) else: self.double_conv = MetaSequential(OrderedDict([ ('conv1', MetaConv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)), ('norm1', nn.BatchNorm2d(out_channels)),# momentum=1.,track_running_stats=False)), ('relu1', nn.ReLU(inplace=True)), #('dropout1', nn.Dropout(0.3)), ('conv2', MetaConv2d(out_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)), ('norm2', nn.BatchNorm2d(out_channels)),# momentum=1.,track_running_stats=False)), ('relu2', nn.ReLU(inplace=True)) #('dropout2', nn.Dropout(0.3)) ])) # Use the module's apply function to recursively apply the initialization self.double_conv.apply(init_layers) def forward(self, inputs, params=None): if self.device=='cuda': inputs = inputs.to(self.device) outputs = self.double_conv(inputs, params) return outputs
def __init__(self, in_planes, planes, stride=1): super(BasicBlock, self).__init__() self.conv1 = MetaConv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn1 = MetaBatchNorm2d(planes) self.conv2 = MetaConv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) self.bn2 = MetaBatchNorm2d(planes) self.shortcut = MetaSequential() if stride != 1 or in_planes != self.expansion*planes: self.shortcut = MetaSequential( MetaConv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), MetaBatchNorm2d(self.expansion*planes) )
def _make_layer(self, block, planes, num_blocks, stride=1): downsample = None if stride != 1 or self.in_planes != planes * block.expansion: downsample = MetaSequential( conv1x1(self.in_planes, planes * block.expansion, stride), MetaBatchNorm2d(planes * block.expansion)) layers = [] layers.append(block(self.in_planes, planes, stride, downsample)) self.in_planes = planes * block.expansion for _ in range(1, num_blocks): layers.append(block(self.in_planes, planes)) return MetaSequential(*layers)
def __init__(self, in_dim, out_dim, num_layers=3, hidden_size=64, nonlinearity="relu"): super(MultiLayerPerceptron, self).__init__() self.in_dim = in_dim self.hidden_size = hidden_size self.out_dim = out_dim if nonlinearity == "relu": self.activation = nn.ReLU elif nonlinearity == "swish": self.activation = swish elif nonlinearity == "sigmoid": self.activation = nn.sigmoid else: raise () self.layer_list = [ nn.Flatten(), nn.Linear(in_dim, hidden_size), self.activation() ] for _ in range(num_layers): self.layer_list.extend( [nn.Linear(hidden_size, hidden_size), self.activation()]) # Should be able to add variable layers self.features = MetaSequential(*self.layer_list) self.classifier = MetaLinear(hidden_size, out_dim)
def __init__(self, out_features=10, input_size=100, hidden_size1=300, hidden_size2=200): super(MLP, self).__init__() self.out_features = out_features self.features = MetaSequential(nn.Linear(input_size, hidden_size1), nn.ReLU(), nn.Linear(hidden_size1, hidden_size2), nn.ReLU(), nn.Linear(hidden_size2, hidden_size1), nn.ReLU(), nn.Linear(hidden_size1, hidden_size2), nn.ReLU(), nn.Linear(hidden_size2, hidden_size1), nn.ReLU(), nn.Linear(hidden_size1, hidden_size2), nn.ReLU(), nn.Linear(hidden_size2, hidden_size1), nn.ReLU()) self.classifier = MetaLinear(hidden_size1, out_features)
def model(): model = MetaSequential( MetaLinear(2, 3, bias=True), nn.ReLU(), MetaLinear(3, 1, bias=False)) return model
def __init__(self, in_channels=3, hid_channels=64): super(Conv2, self).__init__() self.in_channels = in_channels self.hid_channels = hid_channels self.encoder = MetaSequential(conv3x3(in_channels, hid_channels), conv3x3(hid_channels, hid_channels))
def _make_layer(self, block, planes, num_blocks, stride): strides = [stride] + [1]*(num_blocks-1) layers = [] for stride in strides: layers.append(block(self.in_planes, planes, stride)) self.in_planes = planes * block.expansion return MetaSequential(*layers)
def test_metasequential_params(): meta_model = MetaSequential( nn.Linear(2, 3, bias=True), nn.ReLU(), MetaLinear(3, 5, bias=True)) model = nn.Sequential( nn.Linear(2, 3, bias=True), nn.ReLU(), nn.Linear(3, 5, bias=True)) # Set same weights for both models (first layer) weight0 = torch.randn(3, 2) meta_model[0].weight.data.copy_(weight0) model[0].weight.data.copy_(weight0) bias0 = torch.randn(3) meta_model[0].bias.data.copy_(bias0) model[0].bias.data.copy_(bias0) params = OrderedDict() params['2.weight'] = torch.randn(5, 3) model[2].weight.data.copy_(params['2.weight']) params['2.bias'] = torch.randn(5) model[2].bias.data.copy_(params['2.bias']) inputs = torch.randn(5, 2) outputs_torchmeta = meta_model(inputs, params=params) outputs_nn = model(inputs) np.testing.assert_equal(outputs_torchmeta.detach().numpy(), outputs_nn.detach().numpy())
def __init__(self, out_features, in_channels=1, hidden_size=32, mid_feats=256, feature_size=25088): super(MetaMNISTConvModel, self).__init__() self.in_channels = in_channels self.out_features = out_features self.hidden_size = hidden_size self.feature_size = feature_size self.mid_feats = mid_feats self.features = MetaSequential( OrderedDict([ ('layer1', conv_block(in_channels, hidden_size, kernel_size=3, stride=1, padding=1, bias=True)), ('layer2', conv_block(hidden_size, hidden_size, kernel_size=3, stride=1, padding=1, bias=True)), ])) self.classifier_first = MetaLinear(feature_size, mid_feats, bias=True) self.classifier = MetaLinear(mid_feats, out_features, bias=True)
def __init__(self, out_features, in_channels=1, hidden_size=64, feature_size=64): super(MetaToyConvModel, self).__init__() self.in_channels = in_channels self.out_features = out_features self.hidden_size = hidden_size self.feature_size = feature_size self.features = MetaSequential( OrderedDict([ ('layer1', conv_block(in_channels, hidden_size, kernel_size=3, stride=1, padding=1, bias=True)), ('layer2', conv_block(hidden_size, hidden_size, kernel_size=3, stride=1, padding=1, bias=True)), # ('layer3', conv_block(hidden_size, hidden_size, kernel_size=3, # stride=1, padding=1, bias=True)), # ('layer4', conv_block(hidden_size, hidden_size, kernel_size=3, # stride=1, padding=1, bias=True)) ])) self.classifier = MetaLinear(feature_size, out_features, bias=True)
def __init__(self, in_dims, out_dims, hidden_size=100): super(PolicyNetwork, self).__init__() self.in_dims = in_dims self.out_dims = out_dims * 2 # diag guassian distribution self.hidden_size = hidden_size fc1 = MetaLinear(in_dims, hidden_size) fc2 = MetaLinear(hidden_size, hidden_size) fc3 = MetaLinear(hidden_size, self.out_dims) self.features = MetaSequential( fc1, nn.Tanh(), fc2, nn.Tanh(), fc3, nn.Tanh(), ) self.activation = {} def get_activation(name): def hook(model, input, output): self.activation[name] = output.detach() return hook fc1.register_forward_hook(get_activation('fc1')) fc2.register_forward_hook(get_activation('fc2')) fc3.register_forward_hook(get_activation('fc3'))
def conv_block(in_channels, out_channels, **kwargs): return MetaSequential( OrderedDict([('conv', MetaConv2d(in_channels, out_channels, **kwargs)), ('norm', nn.BatchNorm2d(out_channels, momentum=1., track_running_stats=False)), ('relu', nn.ReLU()), ('pool', nn.MaxPool2d(2))]))
def conv3x3(in_channels, out_channels, **kwargs): return MetaSequential( MetaConv2d(in_channels, out_channels, kernel_size=3, padding=1, **kwargs), MetaBatchNorm2d(out_channels, momentum=1., track_running_stats=False), nn.ReLU(), nn.MaxPool2d(2))
def __init__(self, l_obs, n_action, l1=64, l2=64): super(MetaNet_PG, self).__init__() self.l_obs = l_obs self.n_action = n_action self.l1 = l1 self.l2 = l2 self.actor_net = MetaSequential( MetaLinear(self.l_obs, self.l1), nn.ReLU(), MetaLinear(self.l1, self.l2), nn.ReLU(), MetaLinear(self.l2, self.n_action), nn.Sigmoid(), MetaLinear(self.n_action, self.n_action))
def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, final=False, device='cpu', is_first=False): super(ResUnetConv2, self).__init__() self.device=device def init_layers(m): if type(m) == MetaConv2d: torch.nn.init.kaiming_uniform_(m.weight, nonlinearity='relu') if is_first: self.double_conv = MetaSequential(OrderedDict([ ('conv1', MetaConv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)), ('norm1', nn.BatchNorm2d(out_channels)),# momentum=1.,track_running_stats=False)), ('relu1', nn.ReLU(inplace=True)), #('dropout1', nn.Dropout(0.3)), ('conv2', MetaConv2d(out_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)) #('dropout2', nn.Dropout(0.3)) ])) else: self.double_conv = MetaSequential(OrderedDict([ ('norm1', nn.BatchNorm2d(in_channels)),# momentum=1.,track_running_stats=False)), ('relu1', nn.ReLU(inplace=True)), #('dropout1', nn.Dropout(0.3)), ('conv1', MetaConv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)), ('norm2', nn.BatchNorm2d(out_channels)),# momentum=1.,track_running_stats=False)), ('relu2', nn.ReLU(inplace=True)), ('conv2', MetaConv2d(out_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)) #('dropout2', nn.Dropout(0.3)) ])) self.addition_connection = MetaSequential(OrderedDict([ ('conv1', MetaConv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)), ('norm2', nn.BatchNorm2d(out_channels))# momentum=1.,track_running_stats=False)), #('dropout2', nn.Dropout(0.3)) ])) # use the module's apply function to recursively apply the initialization self.double_conv.apply(init_layers)
def __init__(self, in_channels=3, hid_channels=64): super(Conv6, self).__init__() self.in_channels = in_channels self.hid_channels = hid_channels self.encoder = MetaSequential( conv3x3_1(in_channels, hid_channels), # 42*42 conv3x3_1(hid_channels, hid_channels), # 21*21 conv3x3_1(hid_channels, hid_channels), # 10*10 conv3x3_1(hid_channels, hid_channels), # 5*5 conv3x3_2(hid_channels, hid_channels), conv3x3_2(hid_channels, hid_channels))
def __init__(self, in_channels, hidden1_size=40, hidden2_size=80): super(RegressionNeuralNetwork, self).__init__() self.in_channels = in_channels self.hidden1_size = hidden1_size self.hidden2_size = hidden2_size self.regressor = MetaSequential(MetaLinear(in_channels, hidden1_size), nn.ReLU(), MetaLinear(hidden1_size, hidden2_size), nn.ReLU(), MetaLinear(hidden2_size, hidden1_size), nn.ReLU(), MetaLinear(hidden1_size, 1))
def __init__(self, in_channels, out_features, hidden_size=64): super(ConvolutionalNeuralNetwork, self).__init__() self.in_channels = in_channels self.out_features = out_features self.hidden_size = hidden_size self.features = MetaSequential(conv3x3(in_channels, hidden_size), conv3x3(hidden_size, hidden_size), conv3x3(hidden_size, hidden_size), conv3x3(hidden_size, hidden_size)) self.classifier = MetaLinear(hidden_size, out_features)
def __init__(self, in_features, out_features, num_hidden_layers, hidden_features, outermost_linear=False): super().__init__() self.net = [] self.net.append(MetaSequential( BatchLinear(in_features, hidden_features), nn.ReLU(inplace=True) )) for i in range(num_hidden_layers): self.net.append(MetaSequential( BatchLinear(hidden_features, hidden_features), nn.ReLU(inplace=True) )) if outermost_linear: self.net.append(MetaSequential( BatchLinear(hidden_features, out_features), )) else: self.net.append(MetaSequential( BatchLinear(hidden_features, out_features), nn.ReLU(inplace=True) )) self.net = MetaSequential(*self.net) self.net.apply(init_weights_normal)
def test_metasequential(): meta_model = MetaSequential( nn.Linear(2, 3, bias=True), nn.ReLU(), MetaLinear(3, 5, bias=True)) model = nn.Sequential( nn.Linear(2, 3, bias=True), nn.ReLU(), nn.Linear(3, 5, bias=True)) assert isinstance(meta_model, MetaModule) assert isinstance(meta_model, nn.Sequential) params = OrderedDict(meta_model.meta_named_parameters()) assert set(params.keys()) == set(['2.weight', '2.bias']) # Set same weights for both models weight0 = torch.randn(3, 2) meta_model[0].weight.data.copy_(weight0) model[0].weight.data.copy_(weight0) bias0 = torch.randn(3) meta_model[0].bias.data.copy_(bias0) model[0].bias.data.copy_(bias0) weight2 = torch.randn(5, 3) meta_model[2].weight.data.copy_(weight2) model[2].weight.data.copy_(weight2) bias2 = torch.randn(5) meta_model[2].bias.data.copy_(bias2) model[2].bias.data.copy_(bias2) inputs = torch.randn(5, 2) outputs_torchmeta = meta_model(inputs, params=None) outputs_nn = model(inputs) np.testing.assert_equal(outputs_torchmeta.detach().numpy(), outputs_nn.detach().numpy())
class MetaFC(MetaModule): '''A fully connected neural network that allows swapping out the weights, either via a hypernetwork or via MAML. ''' def __init__(self, in_features, out_features, num_hidden_layers, hidden_features, outermost_linear=False): super().__init__() self.net = [] self.net.append(MetaSequential( BatchLinear(in_features, hidden_features), nn.ReLU(inplace=True) )) for i in range(num_hidden_layers): self.net.append(MetaSequential( BatchLinear(hidden_features, hidden_features), nn.ReLU(inplace=True) )) if outermost_linear: self.net.append(MetaSequential( BatchLinear(hidden_features, out_features), )) else: self.net.append(MetaSequential( BatchLinear(hidden_features, out_features), nn.ReLU(inplace=True) )) self.net = MetaSequential(*self.net) self.net.apply(init_weights_normal) def forward(self, coords, params=None, **kwargs): '''Simple forward pass without computation of spatial gradients.''' output = self.net(coords, params=self.get_subdict(params, 'net')) return output
def __init__(self, in_features, out_features, num_hidden_layers, hidden_features, outermost_linear=False, nonlinearity='relu', weight_init=None): super().__init__() self.first_layer_init = None # Dictionary that maps nonlinearity name to the respective function, initialization, and, if applicable, # special first-layer initialization scheme nls_and_inits = { 'sine': (Sine(), sine_init, first_layer_sine_init, last_layer_sine_init), 'relu': (nn.ReLU(inplace=True), init_weights_normal, None, None), 'sigmoid': (nn.Sigmoid(), init_weights_xavier, None, None), 'tanh': (nn.Tanh(), init_weights_xavier, None, None), 'selu': (nn.SELU(inplace=True), init_weights_selu, None, None), 'softplus': (nn.Softplus(), init_weights_normal, None, None), 'elu': (nn.ELU(inplace=True), init_weights_elu, None, None) } nl, nl_weight_init, first_layer_init, last_layer_init = nls_and_inits[ nonlinearity] if weight_init is not None: # Overwrite weight init if passed self.weight_init = weight_init else: self.weight_init = nl_weight_init self.net = [] self.net.append( MetaSequential(BatchLinear(in_features, hidden_features), nl)) for i in range(num_hidden_layers): self.net.append( MetaSequential(BatchLinear(hidden_features, hidden_features), nl)) if outermost_linear: self.net.append( MetaSequential(BatchLinear(hidden_features, out_features))) else: self.net.append( MetaSequential(BatchLinear(hidden_features, out_features), nl)) self.net = MetaSequential(*self.net) if self.weight_init is not None: self.net.apply(self.weight_init) if first_layer_init is not None: # Apply special initialization to first layer, if applicable. self.net[0].apply(first_layer_init) if last_layer_init is not None: self.net[-1].apply(last_layer_init)