def __init__(self): super(VGG16_SG2, self).__init__() # modify the original VGG model to have a 10 unit output linear layer self.model = models.vgg16(True) self.model.classifier[0].in_features = 512 self.conv1 = nn.Conv2d(1, 3, 1) self.block1 = self.model.features[:5] self.block2 = self.model.features[5:10] self.block3 = self.model.features[10:17] self.block4 = self.model.features[17:24] self.block5 = self.model.features[24:] self.classifier = nn.Sequential( nn.Linear(512, 128), nn.ReLU(True), nn.Linear(128, 64), nn.ReLU(True), nn.Linear(64, 10), ) self.backward_interface_1 = dni.BackwardInterface( dni.BasicSynthesizer(output_dim=24, n_hidden=20)) self.backward_interface_2 = dni.BackwardInterface( dni.BasicSynthesizer(output_dim=12, n_hidden=20)) self.backward_interface_3 = dni.BackwardInterface( dni.BasicSynthesizer(output_dim=6, n_hidden=20)) self.backward_interface_4 = dni.BackwardInterface( dni.BasicSynthesizer(output_dim=3, n_hidden=20)) self.backward_interface_5 = dni.BackwardInterface( dni.BasicSynthesizer(output_dim=1, n_hidden=20))
def __init__(self): super(Eight_Layer_SG, self).__init__() self.block_1 = nn.Sequential( nn.Conv2d( in_channels=1, out_channels=32, kernel_size=(12, 12), stride=(4, 4), padding=(4, 4), ), nn.BatchNorm2d(32), nn.ReLU(), ) self.block_2 = nn.Sequential( nn.Conv2d( in_channels=32, out_channels=64, kernel_size=(12, 12), stride=(4, 4), padding=(4, 4), ), nn.BatchNorm2d(64), nn.ReLU(), ) self.classifier = nn.Sequential( nn.Linear(576, 128), nn.ReLU(True), nn.Dropout(p=0.5), nn.Linear(128, 64), nn.ReLU(True), nn.Dropout(p=0.5), nn.Linear(64, 10), ) self.backward_interface_1 = dni.BackwardInterface( dni.BasicSynthesizer(output_dim=12, n_hidden=256)) self.backward_interface_2 = dni.BackwardInterface( dni.BasicSynthesizer(output_dim=3, n_hidden=128))
def __init__(self): super(Net, self).__init__() self.hidden1 = nn.Linear(784, 256, bias=False) self.hidden1_bn = nn.BatchNorm1d(256) self.hidden2 = nn.Linear(256, 256, bias=False) self.hidden2_bn = nn.BatchNorm1d(256) if args.dni: if args.context: context_dim = 10 else: context_dim = None self.bidirectional_interface = dni.BidirectionalInterface( dni.BasicSynthesizer(output_dim=256, n_hidden=2, trigger_dim=784, context_dim=context_dim), dni.BasicSynthesizer(output_dim=256, n_hidden=2, context_dim=context_dim)) self.output = nn.Linear(256, 10, bias=False) self.output_bn = nn.BatchNorm1d(10)
def _init_dni(self, synthesizer_type, freeze_synthesizer, non_zero_init, scale_synth_grad, trained_net_file, trained_net_initial_file): if self.context: context_dim = 10 else: context_dim = None if synthesizer_type == 'mlp': synthesizer = dni.BasicSynthesizer( output_dim=self.num_neurons, context_dim=context_dim, n_hidden=1, non_zero_init=non_zero_init, normalize_output=scale_synth_grad) if synthesizer_type == 'local_mlp': synthesizer = LocalSynthesizer(output_dim=self.num_neurons, context_dim=context_dim, non_zero_init=non_zero_init) if synthesizer_type == 'conv': pass #TODO self.backward_interface = dni.BackwardInterface(synthesizer) # Loading the specified initialization if trained_net_initial_file is not None: trained_net = torch.load(trained_net_initial_file) dict_trained_params = dict(trained_net.named_parameters()) for name, param in self.named_parameters(): if name in dict_trained_params: param.requires_grad = False print('Copying pretrained ' + name) param.copy_(dict_trained_params[name].data) param.requires_grad = True # Loading the trained synthesizer if trained_net_file is not None: trained_net = torch.load(trained_net_file) dict_trained_params = dict( trained_net.backward_interface.named_parameters()) for name, param in self.backward_interface.named_parameters(): if name in dict_trained_params: param.requires_grad = False print('Copying pretrained ' + name) param.copy_(dict_trained_params[name].data) param.requires_grad = True if freeze_synthesizer: for name, param in self.backward_interface.named_parameters(): param.requires_grad = False
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False, use_dni=False): super(RNNModel, self).__init__() self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) if rnn_type in ['LSTM', 'GRU']: self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout) else: try: nonlinearity = { 'RNN_TANH': 'tanh', 'RNN_RELU': 'relu' }[rnn_type] except KeyError: raise ValueError( """An invalid option for `--model` was supplied, options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""" ) self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: if nhid != ninp: raise ValueError( 'When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.nhid = nhid self.nlayers = nlayers if use_dni: if rnn_type == 'LSTM': output_dim = 2 * nhid else: output_dim = nhid self.backward_interface = dni.BackwardInterface( dni.BasicSynthesizer(output_dim, n_hidden=2)) else: self.backward_interface = None