def __init__(self, version=1.0, num_classes=2): super(SqueezeNet, self).__init__() if version not in [1.0, 1.1]: raise ValueError("Unsupported SqueezeNet version {version}:" "1.0 or 1.1 expected".format(version=version)) self.num_classes = num_classes self.features = nn.Sequential( nn.Conv2d(1, 64, kernel_size=3, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(64, 16, 64, 64), Fire(128, 16, 64, 64), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(128, 32, 128, 128), Fire(256, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), Fire(512, 64, 256, 256), ) self.layer = encoding.nn.Encoding(D=512, K=32) self.classifier = nn.Sequential(nn.Dropout(p=0.5), nn.Linear(32 * 512, num_classes), nn.ReLU(inplace=True), nn.Softmax()) for m in self.modules(): if isinstance(m, nn.Conv2d): init.kaiming_uniform(m.weight.data) if m.bias is not None: m.bias.data.zero_()
def __init__(self, version=1.0, num_classes=1000): super(RecurrentSqueezeNet, self).__init__() if version not in [1.0, 1.1]: raise ValueError("Unsupported SqueezeNet version {version}:" "1.0 or 1.1 expected".format(version=version)) self.num_classes = num_classes global lstm_input_size, lstm_hidden_size self.lstm_cell_unit = LSTMCellModule(lstm_input_size, lstm_hidden_size) if version == 1.0: self.features = nn.Sequential( nn.Conv2d(3, 96, kernel_size=7, stride=2), # Using 1 for MNIST. RecurrentAttention(96, self.lstm_cell_unit), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), RecurrentFire(96, 16, 64, 64, self.lstm_cell_unit), RecurrentFire(128, 16, 64, 64, self.lstm_cell_unit), RecurrentFire(128, 32, 128, 128, self.lstm_cell_unit), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), RecurrentFire(256, 32, 128, 128, self.lstm_cell_unit), RecurrentFire(256, 48, 192, 192, self.lstm_cell_unit), RecurrentFire(384, 48, 192, 192, self.lstm_cell_unit), RecurrentFire(384, 64, 256, 256, self.lstm_cell_unit), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), RecurrentFire(512, 64, 256, 256, self.lstm_cell_unit), ) else: self.features = nn.Sequential( nn.Conv2d(1, 64, kernel_size=3, stride=2), # Using 1 for MNIST. nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(64, 16, 64, 64), Fire(128, 16, 64, 64), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(128, 32, 128, 128), Fire(256, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), Fire(512, 64, 256, 256), ) # Final convolution is initialized differently form the rest final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1) self.classifier = nn.Sequential( nn.Dropout(p=0.5), final_conv, nn.ReLU(inplace=True), nn.AvgPool2d(1, stride=1) #Changed for MNIST. ) for m in self.modules(): if isinstance(m, nn.Conv2d): if m is final_conv: init.normal(m.weight.data, mean=0.0, std=0.01) else: init.kaiming_uniform(m.weight.data) if m.bias is not None: m.bias.data.zero_()
def __init__(self, configer, in_channels): super(PoseModel, self).__init__() self.configer = configer self.in_channels = in_channels model_dict = self._get_model_dict() self.model1_1 = model_dict['block1_1'] self.model2_1 = model_dict['block2_1'] self.model3_1 = model_dict['block3_1'] self.model4_1 = model_dict['block4_1'] self.model5_1 = model_dict['block5_1'] self.model6_1 = model_dict['block6_1'] self.model1_2 = model_dict['block1_2'] self.model2_2 = model_dict['block2_2'] self.model3_2 = model_dict['block3_2'] self.model4_2 = model_dict['block4_2'] self.model5_2 = model_dict['block5_2'] self.model6_2 = model_dict['block6_2'] for m in self.modules(): if isinstance(m, nn.Conv2d): init.kaiming_uniform(m.weight.data) if m.bias is not None: m.bias.data.zero_()
def __init__(self, num_classes=1000): super(SqueezeNet_CIFAR, self).__init__() self.model_name = 'squeezenet_cifar' self.num_classes = num_classes self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(64, 16, 64, 64), Fire(128, 16, 64, 64), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(128, 32, 128, 128), Fire(256, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), Fire(512, 64, 256, 256), ) self.avgpool = nn.AvgPool2d(kernel_size=4, stride=1) self.fc = nn.Linear(512, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): init.kaiming_uniform(m.weight.data) if m.bias is not None: m.bias.data.zero_()
def __init__(self, version=1.0, num_classes=1000): super(SqueezeNet, self).__init__() self.num_classes = 31 self.features = nn.Sequential( nn.Conv2d(1, 64, kernel_size=3, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(64, 16, 64, 64), Fire(128, 16, 64, 64), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(128, 32, 128, 128), Fire(256, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), Fire(512, 64, 256, 256), ) # Final convolution is initialized differently form the rest final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1) self.classifier = nn.Sequential(nn.Dropout(p=0.5), final_conv, nn.ReLU(inplace=True), nn.AvgPool2d(6, stride=1)) self.linear = nn.Linear(155, self.num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): if m is final_conv: init.normal(m.weight.data, mean=0.0, std=0.01) else: init.kaiming_uniform(m.weight.data) if m.bias is not None: m.bias.data.zero_()
def kaiming_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: init.kaiming_normal(m.weight.data) init.kaiming_normal(m.bias.data) if classname.find('Linear') != -1: init.kaiming_uniform(m.weight.data)
def weights_init(m): if isinstance(m, nn.Conv2d): init.kaiming_uniform(m.weight.data) m.bias.data.fill_(0) if isinstance(m, nn.Linear): init.kaiming_uniform(m.weight.data) m.bias.data.fill_(0)
def __init__(self, num_classes=10): super(SqueezeNet, self).__init__() self.num_classes = num_classes self.firstConv = nn.Conv2d(3, 96, kernel_size=3, stride=1, padding=1) self.features = nn.Sequential( nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), Fire(96, 16, 64, 64), Fire(128, 16, 64, 64), Fire(128, 32, 128, 128), nn.MaxPool2d(kernel_size=2, stride=2), Fire(256, 32, 128, 128), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), nn.MaxPool2d(kernel_size=2, stride=2), Fire(512, 64, 256, 256), ) # Final convolution is initialized differently form the rest self.final_conv = nn.Conv2d(512, self.num_classes, kernel_size=4) # self.classifier = nn.Sequential( # # nn.Dropout(p=0.5), # final_conv, # # nn.ReLU(inplace=True), # # nn.AvgPool2d(4, stride=1) # ) for m in self.modules(): if isinstance(m, nn.Conv2d): if m is self.final_conv: weight_init.normal(m.weight, mean=0.0, std=0.01) else: weight_init.kaiming_uniform(m.weight) if m.bias is not None: weight_init.constant(m.bias, 0)
def __init__(self, embedding_tokens): super(Net, self).__init__() question_features = 1024 vision_features = config.output_features glimpses = 2 self.text = TextProcessor( embedding_tokens=embedding_tokens, embedding_features=300, lstm_features=question_features, drop=0.5, ) self.attention = Attention( v_features=vision_features, q_features=question_features, mid_features=512, glimpses=2, drop=0.5, ) self.classifier = Classifier( in_features=glimpses * vision_features + question_features, mid_features=1024, out_features=config.max_answers, drop=0.5, ) for m in self.modules(): if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d): init.kaiming_uniform(m.weight, nonlinearity='relu') if m.bias is not None: m.bias.data.zero_()
def __init__(self, frame_size, dim, q_levels, weight_norm): super().__init__() self.q_levels = q_levels self.embedding = torch.nn.Embedding(self.q_levels, self.q_levels) self.input = torch.nn.Conv1d(in_channels=q_levels, out_channels=dim, kernel_size=frame_size, bias=False) init.kaiming_uniform(self.input.weight) if weight_norm: self.input = torch.nn.utils.weight_norm(self.input) self.hidden = torch.nn.Conv1d(in_channels=dim, out_channels=dim, kernel_size=1) init.kaiming_uniform(self.hidden.weight) init.constant(self.hidden.bias, 0) if weight_norm: self.hidden = torch.nn.utils.weight_norm(self.hidden) self.output = torch.nn.Conv1d(in_channels=dim, out_channels=q_levels, kernel_size=1) nn.lecun_uniform(self.output.weight) init.constant(self.output.bias, 0) if weight_norm: self.output = torch.nn.utils.weight_norm(self.output)
def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: init.kaiming_uniform(m.weight.data) elif classname.find('BatchNorm') != -1 or classname.find('LayerNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0)
def __init__(self, num_classes=1000): super(SqueezeNetV11BN, self).__init__() self.num_classes = num_classes self.features = Sequential( Conv2d(3, 64, kernel_size=3, stride=1, padding=1), BatchNorm2d(64), ReLU(inplace=True), MaxPool2d(kernel_size=2, stride=1), FireBN(64, 16, 64, 64), FireBN(128, 16, 64, 64), MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), FireBN(128, 32, 128, 128), FireBN(256, 32, 128, 128), MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), FireBN(256, 48, 192, 192), FireBN(384, 48, 192, 192), FireBN(384, 64, 256, 256), FireBN(512, 64, 256, 256), ) # Final convolution is initialized differently form the rest final_conv = Conv2d(512, self.num_classes, kernel_size=1) self.classifier = Sequential(Dropout(p=0.5), final_conv, ReLU(inplace=True), AdaptiveAvgPool2d(1)) for m in self.modules(): if isinstance(m, Conv2d): if m is final_conv: normal(m.weight.data, mean=0.0, std=0.01) else: kaiming_uniform(m.weight.data) if m.bias is not None: m.bias.data.zero_()
def __init__(self, num_classes=200): super(SqueezeNet, self).__init__() self.num_classes = num_classes self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(64), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(64, 16, 64, 64), Fire(128, 16, 64, 64, residual=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(128, 32, 128, 128), Fire(256, 32, 128, 128, residual=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192, residual=True), Fire(384, 64, 256, 256), Fire(512, 64, 256, 256, residual=True)) self.avg_pool = nn.AvgPool2d( 7) if self.num_classes == 200 else nn.AvgPool2d(kernel_size=4, stride=4) # Final convolution is initialized differently form the rest final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1) self.classifier = nn.Sequential(nn.Dropout(p=0.6), final_conv, nn.ReLU(inplace=True), self.avg_pool) for m in self.modules(): if isinstance(m, nn.Conv2d): if m is final_conv: init.normal(m.weight, mean=0.0, std=0.01) else: init.kaiming_uniform(m.weight) if m.bias is not None: m.bias.data.zero_()
def init_modules(modules): ''' Initialize Convolutonal and Linear modules with He initialization ''' for m in modules: if isinstance(m, (nn.Conv2d, nn.Linear)): init.kaiming_uniform(m.weight)
def weights_init(m): if isinstance(m, torch.nn.Conv2d): init.kaiming_uniform(m.weight) init.constant(m.bias, 0.01) if isinstance(m, torch.nn.Linear): init.xavier_uniform(m.weight) init.constant(m.bias, 0.01)
def init_weights( self ): """Initialize the weights.""" init.kaiming_uniform( self.affine_a.weight, mode='fan_in' ) self.affine_a.bias.data.fill_( 0 ) init.kaiming_uniform( self.affine_b.weight, mode='fan_in' ) self.affine_b.bias.data.fill_( 0 )
def __init__(self, characters_vocabulary: Dict[str, int], characters_embedding_dimension=100, context_size=128, word_embeddings_dimension=100, hidden_state_dimension=128, fc_dropout_p=0, lstm_dropout=0, comick_compatibility=False): super().__init__() self.version = 2.0 self.characters_vocabulary = characters_vocabulary self.context_size = context_size self.embeddings = nn.Embedding( num_embeddings=len(self.characters_vocabulary), embedding_dim=characters_embedding_dimension, padding_idx=0) kaiming_uniform(self.embeddings.weight) self.lstm = nn.LSTM( input_size=characters_embedding_dimension + self.context_size, hidden_size=hidden_state_dimension, num_layers=2, batch_first=True, bidirectional=True, )
def __init__(self, num_embeddings, embedding_dim, hidden_state_dim, n_lstms=1, padding_idx=0, freeze_embeddings=False, dropout=0): super().__init__() self.dropout = nn.Dropout(dropout) self.embeddings = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim, padding_idx=0) kaiming_uniform(self.embeddings.weight) if freeze_embeddings: for param in self.embeddings.parameters(): print('Freezing embeddings') param.requires_grad = False self.lstms = [] for i in range(n_lstms): lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_state_dim, num_layers=1, batch_first=True, bidirectional=True, dropout=dropout) setattr(self, 'lstm' + str(i), lstm) # To support 'parameters()' self.lstms.append(lstm)
def __init__( self, characters_vocabulary: Dict[str, int], words_vocabulary: Dict[str, int], characters_embedding_dimension=20, characters_hidden_state_dimension=50, word_embeddings_dimension=50, words_hidden_state_dimension=50, words_embeddings=None, freeze_word_embeddings=False, ): super().__init__() self.words_vocabulary = words_vocabulary self.characters_vocabulary = characters_vocabulary self.context = Context(hidden_state_dim=words_hidden_state_dimension, output_dim=2 * characters_hidden_state_dimension, num_embeddings=len(self.words_vocabulary), embedding_dim=word_embeddings_dimension, freeze_embeddings=freeze_word_embeddings) if words_embeddings != None: self.load_words_embeddings(words_embeddings) self.mimick = MultiLSTM( num_embeddings=len(self.characters_vocabulary), embedding_dim=characters_embedding_dimension, hidden_state_dim=characters_hidden_state_dimension) self.fc = nn.Linear(in_features=2 * characters_hidden_state_dimension, out_features=word_embeddings_dimension) kaiming_uniform(self.fc.weight)
def __init__(self, num_classes=10): super(Test, self).__init__() self.num_classes = num_classes self.features = nn.Sequential( nn.Conv2d(3, 96, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True), ) self.fire1_squeeze = nn.Conv2d(96, 16, kernel_size=1, stride=1) self.squeeze_activation = nn.ReLU(inplace=True) self.fire1_expand1 = nn.Conv2d(16, 64, kernel_size=1, stride=1) self.expand1x1_activation = nn.ReLU(inplace=True) self.fire1_expand2 = nn.Conv2d(16, 64, kernel_size=3, stride=1, padding=1) self.expand3x3_activation = nn.ReLU(inplace=True) # self.fire1 = Fire(96, 16, 64, 64) self.fire2 = Fire(128, 16, 64, 64) self.fire3 = Fire(128, 32, 128, 128) torch.manual_seed(42) for m in self.modules(): if isinstance(m, nn.Conv2d): weight_init.kaiming_uniform(m.weight) if m.bias is not None: weight_init.constant(m.bias, 0)
def __init__(self, hyperParams): super(RNNLabeler, self).__init__() self.hyperParams = hyperParams if hyperParams.wordEmbFile == "": self.wordEmb = nn.Embedding(hyperParams.wordNum, hyperParams.wordEmbSize) self.wordDim = hyperParams.wordEmbSize else: self.wordEmb, self.wordDim = self.load_pretrain( hyperParams.wordEmbFile, hyperParams.wordAlpha) self.wordEmb.weight.requires_grad = hyperParams.wordFineTune self.dropOut = nn.Dropout(hyperParams.dropProb) self.LSTM = nn.LSTM(input_size=self.wordDim, hidden_size=hyperParams.rnnHiddenSize, dropout=hyperParams.dropProb, batch_first=True, num_layers=2, bidirectional=True) self.linearLayer = nn.Linear(hyperParams.rnnHiddenSize * 2, hyperParams.labelSize, bias=False) init.kaiming_uniform(self.linearLayer.weight) self.outputLayer = nn.Linear(hyperParams.rnnHiddenSize * 2, hyperParams.labelSize, bias=False) init.kaiming_uniform(self.outputLayer.weight)
def __init__(self, version=1.0, num_classes=2, use_ref=False): super(SqueezeNetQC, self).__init__() self.use_ref = use_ref self.feat = 3 if version not in [1.0, 1.1]: raise ValueError("Unsupported SqueezeNet version {version}:" "1.0 or 1.1 expected".format(version=version)) self.num_classes = num_classes if version == 1.0: self.features = nn.Sequential( nn.Conv2d(2 if use_ref else 1, 96, kernel_size=7, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(96, 16, 64, 64), Fire(128, 16, 64, 64), Fire(128, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 32, 128, 128), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(512, 64, 256, 256), ) else: self.features = nn.Sequential( nn.Conv2d(2 if use_ref else 1, 64, kernel_size=3, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(64, 16, 64, 64), Fire(128, 16, 64, 64), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(128, 32, 128, 128), Fire(256, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), Fire(512, 64, 256, 256), ) # Final convolution is initialized differently form the rest final_conv = nn.Conv2d(512*self.feat, self.num_classes, kernel_size=1) self.classifier = nn.Sequential( nn.Dropout(p=0.5), final_conv, nn.ReLU(inplace=True), nn.AvgPool2d(13, stride=1) ) for m in self.modules(): if isinstance(m, nn.Conv2d): if m is final_conv: init.normal(m.weight.data, mean=0.0, std=0.01) else: init.kaiming_uniform(m.weight.data) if m.bias is not None: m.bias.data.zero_()
def reset(self): kaiming_uniform(self.conv.weight) self.conv.bias.data.zero_() for i in self.mlp: if isinstance(i, nn.Linear): kaiming_uniform(i.weight) i.bias.data.zero_()
def __init__(self, inputSize): super().__init__() self.linear = nn.Linear(inputSize, 10) self.linear2 = nn.Linear(10, 5) self.linear3 = nn.Linear(3, 1) init.xavier_uniform(self.linear.weight) init.kaiming_uniform(self.linear2.weight)
def __init__(self, frame_size, n_frame_samples, n_rnn, dim, learn_h0, weight_norm): super().__init__() self.frame_size = frame_size self.n_frame_samples = n_frame_samples self.dim = dim h0 = torch.zeros(n_rnn, dim) if learn_h0: self.h0 = torch.nn.Parameter(h0) else: self.register_buffer('h0', torch.autograd.Variable(h0)) self.input_expand = torch.nn.Conv1d(in_channels=n_frame_samples, out_channels=dim, kernel_size=1) init.kaiming_uniform(self.input_expand.weight) init.constant(self.input_expand.bias, 0) if weight_norm: self.input_expand = torch.nn.utils.weight_norm(self.input_expand) # Tentative d'inclure le conditioning BGF (20-06-08) self.input_conditioning = torch.nn.Conv1d(in_channels=n_frame_samples, out_channels=dim, kernel_size=1) init.kaiming_uniform(self.input_conditioning.weight) init.constant(self.input_conditioning.bias, 0) if weight_norm: self.input_conditioning = torch.nn.utils.weight_norm( self.input_conditioning) # self.rnn = torch.nn.GRU(input_size=dim, hidden_size=dim, num_layers=n_rnn, batch_first=True) for i in range(n_rnn): nn.concat_init( getattr(self.rnn, 'weight_ih_l{}'.format(i)), [nn.lecun_uniform, nn.lecun_uniform, nn.lecun_uniform]) init.constant(getattr(self.rnn, 'bias_ih_l{}'.format(i)), 0) nn.concat_init( getattr(self.rnn, 'weight_hh_l{}'.format(i)), [nn.lecun_uniform, nn.lecun_uniform, init.orthogonal]) init.constant(getattr(self.rnn, 'bias_hh_l{}'.format(i)), 0) self.upsampling = nn.LearnedUpsampling1d(in_channels=dim, out_channels=dim, kernel_size=frame_size) init.uniform(self.upsampling.conv_t.weight, -np.sqrt(6 / dim), np.sqrt(6 / dim)) init.constant(self.upsampling.bias, 0) if weight_norm: self.upsampling.conv_t = torch.nn.utils.weight_norm( self.upsampling.conv_t)
def _initialize_weights(self): for m in self.modules(): if isinstance(m, Conv3d): init.kaiming_uniform(m.weight.data) if m.bias is not None: m.bias.data.zero_() elif isinstance(m, BatchNorm3d): m.weight.data.fill_(1) m.bias.data.zero_()
def init_weights(self): self.fc.weight.data.uniform_(0., 0.01) self.fc2.weight.data.uniform_(-0.01, 0.01) self.fc3.weight.data.uniform_(0.0, 0.01) for name, weight in self.lstm.named_parameters(): if len(weight.size()) == 1: weight_init.uniform(weight, 0, 0.01) else: weight_init.kaiming_uniform(weight)
def __init__(self, block, alpha, depth, mos, num_class, k, rd): super(PyramidNet, self).__init__() self.input_channel = 16 # number of channel for the first layer self.mos = mos # to inculde mixtue of softmaxes layer num_blocks = (depth - 2) / 6 self.add = alpha / ( 3 * num_blocks * 1.0 ) # addition in number of channel at every conv layer # 0.1 for float divison self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(16) if block == 'basic': block = Basic else: block = BotteleNeck self.l1 = self.make_layer(block, num_blocks) self.l2 = self.make_layer( block, num_blocks, stride=2) # stride=2 to reduce height and width dimension self.l3 = self.make_layer(block, num_blocks, stride=2) self.bn_last = nn.BatchNorm2d(int(round( self.input_channel))) # int round to make interger since parameter # should be integer self.avgpool = nn.AvgPool2d(8) # average pooling in the last layer if self.mos: self.MOS = MixtureofSoftmaxes(int(round(self.input_channel)), k, num_class, rd) # k here is number of experts else: self.fc = nn.Linear(int(round(self.input_channel)), num_class) # Initilisation as mentioned in original resnet paper # i.e all the conv layer are initilialised with He initilisation for m in self.modules(): if isinstance(m, nn.Conv2d): init.kaiming_uniform(m.weight, a=math.sqrt(2)) elif isinstance(m, nn.BatchNorm2d): init.constant(m.weight, 1) init.constant(m.bias, 0)
def __call__(self, m): if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): if self.init is "xavier_normal": init.xavier_normal(m.weight) elif self.init is "he_uniform": init.kaiming_uniform(m.weight) elif self.init is "he_normal": init.kaiming_normal(m.weight) if m.bias is not None: m.bias.data.zero_()
def __init__(self, in_features, out_features, s=30.0, m=0.35, **kwargs): super(AddMarginProduct, self).__init__() self.in_features = in_features self.out_features = out_features self.s = s self.m = m self.weight = Parameter(torch.FloatTensor(out_features, in_features)) init.kaiming_uniform(self.weight) print('{}, m={}, s={}'.format('cosface', m, s))
def __init__(self, version=1.0, num_classes=1000): super(SqueezeNet, self).__init__() if version not in [1.0, 1.1]: raise ValueError("Unsupported SqueezeNet version {version}:" "1.0 or 1.1 expected".format(version=version)) self.num_classes = num_classes if version == 1.0: self.features = nn.Sequential( nn.Conv2d(3, 96, kernel_size=7, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(96, 16, 64, 64), Fire(128, 16, 64, 64), Fire(128, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 32, 128, 128), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(512, 64, 256, 256), ) else: self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=3, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(64, 16, 64, 64), Fire(128, 16, 64, 64), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(128, 32, 128, 128), Fire(256, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), Fire(512, 64, 256, 256), ) # Final convolution is initialized differently form the rest final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1) self.classifier = nn.Sequential( nn.Dropout(p=0.5), final_conv, nn.ReLU(inplace=True), nn.AvgPool2d(13, stride=1) ) for m in self.modules(): if isinstance(m, nn.Conv2d): if m is final_conv: init.normal(m.weight.data, mean=0.0, std=0.01) else: init.kaiming_uniform(m.weight.data) if m.bias is not None: m.bias.data.zero_()