def _sanity_check_model_conversion(self, svm_model, nn_model, x_val): #use flatten layer to vectorize multi-dim array flatten = Flatten() flatten.to_numpy() y_pred_svm = svm_model.decision_function(flatten.forward(x_val)) y_pred_nn = nn_model.forward(x_val) rtol = 1e-7 if y_pred_nn.shape[1] == 2: numpy.testing.assert_allclose( y_pred_svm, -y_pred_nn[:, 0], rtol, err_msg= 'Predictions of Trained SVM model and converted NN model are NOT equal! (2-Class-Case)' ) numpy.testing.assert_allclose( y_pred_svm, y_pred_nn[:, 1], rtol, err_msg= 'Predictions of Trained SVM model and converted NN model are NOT equal! (2-Class-Case)' ) else: numpy.testing.assert_allclose( y_pred_svm, y_pred_nn, rtol, err_msg= 'Predictions of Trained SVM model and converted NN model are NOT equal!' )
def __init__(self, device, image_channels=1, h_dim=1024, z_dim=32, num_labels=0): super(M2, self).__init__() self.device = device self.h_dim = h_dim self.z_dim = z_dim self.num_labels = num_labels self.encoder = torch.nn.Sequential( torch.nn.Conv2d(image_channels, 32, kernel_size=3, stride=2), torch.nn.LeakyReLU(), torch.nn.Conv2d(32, 64, kernel_size=3, stride=2), torch.nn.LeakyReLU(), torch.nn.Dropout2d(p=.2), torch.nn.Conv2d(64, 128, kernel_size=3, stride=1), torch.nn.LeakyReLU(), torch.nn.Dropout2d(p=.2), torch.nn.Conv2d(128, 256, kernel_size=3, stride=1), torch.nn.LeakyReLU(), Flatten(), torch.nn.Linear(h_dim, h_dim), torch.nn.LeakyReLU()) self.mu = torch.nn.Linear(h_dim + num_labels, z_dim) # mean self.logsigma = torch.nn.Linear(h_dim + num_labels, z_dim) # standard deviation self.upscale_z = torch.nn.Linear(z_dim + num_labels, h_dim) self.decoder = torch.nn.Sequential( torch.nn.Linear(h_dim, h_dim), torch.nn.LeakyReLU(), UnFlatten(), torch.nn.ConvTranspose2d(h_dim, 128, kernel_size=4, stride=2), torch.nn.LeakyReLU(), torch.nn.Dropout2d(p=.2), torch.nn.ConvTranspose2d(128, 64, kernel_size=5, stride=2), torch.nn.LeakyReLU(), torch.nn.Dropout2d(p=.2), torch.nn.ConvTranspose2d(64, 32, kernel_size=4, padding=0, stride=2), torch.nn.LeakyReLU(), torch.nn.ConvTranspose2d(32, image_channels, kernel_size=5, stride=1), torch.nn.Sigmoid()) # simple FFNN classifier # input: flattened vector, output: probability of each class # TODO: implement CNN classifier self.classifier = torch.nn.Sequential( Flatten(), torch.nn.Linear(784, 512), torch.nn.ReLU(), torch.nn.Linear(512, 256), torch.nn.ReLU(), torch.nn.Linear(256, self.num_labels), # 3 output layer torch.nn.Softmax(dim=1))
def train_model(self, x_train, y_train, x_val, y_val, *args, **kwargs): # train model using sklearn print('training {} model'.format(self.__class__.__name__)) #use flatten layer to vectorize multi-dim array flatten = Flatten() flatten.to_numpy() x_train = flatten.forward(x_train) self.model.fit(x_train, y_train) self.model = self._convert_to_nn(self.model, y_train, x_val)
def __init__(self, conditional=False, fc=False): super(Discriminator, self).__init__() self.conditional = conditional ncond = 0 if self.conditional: # one hot encoded input for conditional model ncond = 10 sn = th.nn.utils.spectral_norm # sn = lambda x: x self.fc = fc mult = 2 if self.fc: self.net = th.nn.Sequential( Flatten(), th.nn.Linear(28*28 + ncond, mult*256), th.nn.LeakyReLU(0.2, inplace=True), # th.nn.Linear(mult*256, mult*256, 4), # th.nn.LeakyReLU(0.2, inplace=True), # th.nn.Dropout(0.5), th.nn.Linear(mult*256, mult*256, 4), th.nn.LeakyReLU(0.2, inplace=True), th.nn.Linear(mult*256*1*1, 1), ) else: self.net = th.nn.Sequential( th.nn.Conv2d(1 + ncond, mult*64, 4, padding=0, stride=2), th.nn.LeakyReLU(0.2, inplace=True), # 16x16 sn(th.nn.Conv2d(mult*64, mult*128, 4, padding=0, stride=2)), th.nn.LeakyReLU(0.2, inplace=True), # 8x8 sn(th.nn.Conv2d(mult*128, mult*256, 4, padding=0, stride=2)), th.nn.LeakyReLU(0.2, inplace=True), # 4x4 Flatten(), th.nn.Linear(mult*256*1*1, 1), ) self._reset_weights()
def cifar_model_wide(): # cifar wide model = nn.Sequential(nn.Conv2d(3, 16, 4, stride=2, padding=1), nn.ReLU(), nn.Conv2d(16, 32, 4, stride=2, padding=1), nn.ReLU(), Flatten(), nn.Linear(32 * 8 * 8, 100), nn.ReLU(), nn.Linear(100, 10)) return model
def cifar_model(): # cifar base model = nn.Sequential(nn.Conv2d(3, 8, 4, stride=2, padding=1), nn.ReLU(), nn.Conv2d(8, 16, 4, stride=2, padding=1), nn.ReLU(), Flatten(), nn.Linear(1024, 100), nn.ReLU(), nn.Linear(100, 10)) return model
def __init__(self, f_in, h1, h2, f_out): super(FC, self).__init__() self.h1 = nn.Linear(f_in, h1) self.h2 = nn.Linear(h1, h2) self.output = nn.Linear(h2, f_out) self.flat = Flatten()
def __init__(self, device, image_channels=1, h_dim=1024, z_dim=32, num_labels=0): super(VAE, self).__init__() self.device = device self.h_dim = h_dim self.z_dim = z_dim self.encoder = torch.nn.Sequential( NNprint(), torch.nn.Conv2d(image_channels, 32, kernel_size=3, stride=2), NNprint(), torch.nn.LeakyReLU(), torch.nn.Conv2d(32, 64, kernel_size=3, stride=2), NNprint(), torch.nn.LeakyReLU(), torch.nn.Conv2d(64, 128, kernel_size=3, stride=1), NNprint(), torch.nn.LeakyReLU(), torch.nn.Conv2d(128, 256, kernel_size=3, stride=1), NNprint(), torch.nn.LeakyReLU(), Flatten(), NNprint(), ) self.h_dim = h_dim self.num_labels = num_labels self.fc1 = torch.nn.Linear(h_dim, z_dim) self.fc2 = torch.nn.Linear(h_dim, z_dim) self.fc3 = torch.nn.Linear(z_dim + num_labels, h_dim) self.decoder = torch.nn.Sequential( NNprint(), UnFlatten(), NNprint(), torch.nn.ConvTranspose2d(h_dim, 128, kernel_size=4, stride=2), torch.nn.LeakyReLU(), NNprint(), torch.nn.ConvTranspose2d(128, 64, kernel_size=5, stride=2), torch.nn.LeakyReLU(), NNprint(), torch.nn.ConvTranspose2d(64, 32, kernel_size=4, padding=0, stride=2), torch.nn.LeakyReLU(), NNprint(), torch.nn.ConvTranspose2d(32, image_channels, kernel_size=5, stride=1), torch.nn.Sigmoid(), NNprint(), )
def cifar_model_deep(): # cifar deep model = nn.Sequential(nn.Conv2d(3, 8, 4, stride=2, padding=1), nn.ReLU(), nn.Conv2d(8, 8, 3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(8, 8, 3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(8, 8, 4, stride=2, padding=1), nn.ReLU(), Flatten(), nn.Linear(8 * 8 * 8, 100), nn.ReLU(), nn.Linear(100, 10)) return model
def __init__(self, f_in, h1, h2, f_out): super(FC, self).__init__() self.h1 = nn.Linear(f_in, h1) self.relu1 = nn.ReLU(inplace=True) self.h2 = nn.Linear(h1, h2) self.relu2 = nn.ReLU(inplace=True) self.output = nn.Linear(h2, f_out) self.flat = Flatten()
def __init__(self, f_in, h1, h2, f_out, d): super(FC_RP, self).__init__() self.basis_weights = nn.Parameter(torch.zeros(d, 1)) self.h1 = LinearRP(f_in, h1, d) self.h2 = LinearRP(h1, h2, d) self.output = LinearRP(h2, f_out, d) self.flat = Flatten()
def mnist_cnn_4layer(): # mnist_cnn_a return nn.Sequential( nn.Conv2d(1, 16, (4, 4), stride=2, padding=1), nn.ReLU(), nn.Conv2d(16, 32, (4, 4), stride=2, padding=1), nn.ReLU(), Flatten(), nn.Linear(1568, 100), nn.ReLU(), nn.Linear(100, 10), )
def cnn_4layer_b(): # cifar_cnn_b return nn.Sequential( nn.ZeroPad2d((1, 2, 1, 2)), nn.Conv2d(3, 32, (5, 5), stride=2, padding=0), nn.ReLU(), nn.Conv2d(32, 128, (4, 4), stride=2, padding=1), nn.ReLU(), Flatten(), nn.Linear(8192, 250), nn.ReLU(), nn.Linear(250, 10), )
def __init__(self, device, image_channels=1, h_dim=1024, z_dim=32): super(M1, self).__init__() self.device = device self.h_dim = h_dim self.z_dim = z_dim self.encoder = torch.nn.Sequential( torch.nn.Conv2d(image_channels, 32, kernel_size=3, stride=2), torch.nn.LeakyReLU(), torch.nn.Conv2d(32, 64, kernel_size=3, stride=2), torch.nn.LeakyReLU(), torch.nn.Dropout2d(p=.2), torch.nn.Conv2d(64, 128, kernel_size=3, stride=1), torch.nn.LeakyReLU(), torch.nn.Dropout2d(p=.2), torch.nn.Conv2d(128, 256, kernel_size=3, stride=1), torch.nn.LeakyReLU(), Flatten(), torch.nn.Linear(h_dim, h_dim), torch.nn.LeakyReLU(), ) self.mu = torch.nn.Linear(h_dim, z_dim) # mean self.logsigma = torch.nn.Linear(h_dim, z_dim) # standard deviation self.upscale_z = torch.nn.Linear(z_dim, h_dim) self.decoder = torch.nn.Sequential( torch.nn.Linear(h_dim, h_dim), torch.nn.LeakyReLU(), UnFlatten(), torch.nn.ConvTranspose2d(h_dim, 128, kernel_size=4, stride=2), torch.nn.LeakyReLU(), torch.nn.Dropout2d(p=.2), torch.nn.ConvTranspose2d(128, 64, kernel_size=5, stride=2), torch.nn.LeakyReLU(), torch.nn.Dropout2d(p=.2), torch.nn.ConvTranspose2d(64, 32, kernel_size=4, padding=0, stride=2), torch.nn.LeakyReLU(), torch.nn.ConvTranspose2d(32, image_channels, kernel_size=5, stride=1), torch.nn.Sigmoid())
def _convert_to_nn(self, svm_model, y_train, x_val): #convert to linear NN print('converting {} model to linear NN'.format( self.__class__.__name__)) W = svm_model.coef_.T B = svm_model.intercept_ if numpy.unique(y_train).size == 2: linear_layer = Linear(W.shape[0], 2) linear_layer.W = numpy.concatenate([-W, W], axis=1) linear_layer.B = numpy.concatenate([-B, B], axis=0) else: linear_layer = Linear(*(W.shape)) linear_layer.W = W linear_layer.B = B svm_model = self.model nn_model = Sequential([Flatten(), linear_layer]) if not self.use_gpu: nn_model.to_numpy() #sanity check model conversion self._sanity_check_model_conversion(svm_model, nn_model, x_val) print('model conversion sanity check passed') return nn_model
def _read_txt_helper(path): with open(path, 'rb') as f: content = f.read().split('\n') modules = [] c = 0 line = content[c] while len(line) > 0: if line.startswith( Linear.__name__ ): # @UndefinedVariable import error suppression for PyDev users ''' Format of linear layer Linear <rows_of_W> <columns_of_W> <flattened weight matrix W> <flattened bias vector> ''' _, m, n = line.split() m = int(m) n = int(n) layer = Linear(m, n) layer.W = np.array([ float(weightstring) for weightstring in content[c + 1].split() if len(weightstring) > 0 ]).reshape((m, n)) layer.B = np.array([ float(weightstring) for weightstring in content[c + 2].split() if len(weightstring) > 0 ]) modules.append(layer) c += 3 # the description of a linear layer spans three lines elif line.startswith( Convolution.__name__ ): # @UndefinedVariable import error suppression for PyDev users ''' Format of convolution layer Convolution <rows_of_W> <columns_of_W> <depth_of_W> <number_of_filters_W> <stride_axis_0> <stride_axis_1> <flattened filter block W> <flattened bias vector> ''' _, h, w, d, n, s0, s1 = line.split() h = int(h) w = int(w) d = int(d) n = int(n) s0 = int(s0) s1 = int(s1) layer = Convolution(filtersize=(h, w, d, n), stride=(s0, s1)) layer.W = np.array([ float(weightstring) for weightstring in content[c + 1].split() if len(weightstring) > 0 ]).reshape((h, w, d, n)) layer.B = np.array([ float(weightstring) for weightstring in content[c + 2].split() if len(weightstring) > 0 ]) modules.append(layer) c += 3 #the description of a convolution layer spans three lines elif line.startswith( SumPool.__name__ ): # @UndefinedVariable import error suppression for PyDev users ''' Format of sum pooling layer SumPool <mask_heigth> <mask_width> <stride_axis_0> <stride_axis_1> ''' _, h, w, s0, s1 = line.split() h = int(h) w = int(w) s0 = int(s0) s1 = int(s1) layer = SumPool(pool=(h, w), stride=(s0, s1)) modules.append(layer) c += 1 # one line of parameterized layer description elif line.startswith( MaxPool.__name__ ): # @UndefinedVariable import error suppression for PyDev users ''' Format of max pooling layer MaxPool <mask_heigth> <mask_width> <stride_axis_0> <stride_axis_1> ''' _, h, w, s0, s1 = line.split() h = int(h) w = int(w) s0 = int(s0) s1 = int(s1) layer = MaxPool(pool=(h, w), stride=(s0, s1)) modules.append(layer) c += 1 # one line of parameterized layer description elif line.startswith( Flatten.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(Flatten()) c += 1 #one line of parameterless layer description elif line.startswith( Rect.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(Rect()) c += 1 #one line of parameterless layer description elif line.startswith( Tanh.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(Tanh()) c += 1 #one line of parameterless layer description elif line.startswith( SoftMax.__name__ ): # @UndefinedVariable import error suppression for PyDev users modules.append(SoftMax()) c += 1 #one line of parameterless layer description else: raise ValueError( 'Layer type identifier' + [s for s in line.split() if len(s) > 0][0] + ' not supported for reading from plain text file') #skip info of previous layers, read in next layer header line = content[c] return Sequential(modules)
def __init__(self, imsize=28, paths=4, segments=5, samples=2, zdim=128, conditional=False, variational=True, raster=False, fc=False): super(VectorMNISTVAE, self).__init__() # if imsize != 28: # raise NotImplementedError() self.samples = samples self.imsize = imsize self.paths = paths self.segments = segments self.zdim = zdim self.conditional = conditional self.variational = variational ncond = 0 if self.conditional: # one hot encoded input for conditional model ncond = 10 self.fc = fc mult = 1 nc = 1024 if not self.fc: # conv model self.encoder = th.nn.Sequential( # 32x32 th.nn.Conv2d(1 + ncond, mult*64, 4, padding=0, stride=2), th.nn.LeakyReLU(0.2, inplace=True), # 16x16 th.nn.Conv2d(mult*64, mult*128, 4, padding=0, stride=2), th.nn.LeakyReLU(0.2, inplace=True), # 8x8 th.nn.Conv2d(mult*128, mult*256, 4, padding=0, stride=2), th.nn.LeakyReLU(0.2, inplace=True), Flatten(), ) else: self.encoder = th.nn.Sequential( # 32x32 Flatten(), th.nn.Linear(28*28 + ncond, mult*256), th.nn.LeakyReLU(0.2, inplace=True), # 8x8 th.nn.Linear(mult*256, mult*256, 4), th.nn.LeakyReLU(0.2, inplace=True), ) self.mu_predictor = th.nn.Linear(256*1*1, zdim) if self.variational: self.logvar_predictor = th.nn.Linear(256*1*1, zdim) self.decoder = th.nn.Sequential( th.nn.Linear(zdim + ncond, nc), th.nn.SELU(inplace=True), th.nn.Linear(nc, nc), th.nn.SELU(inplace=True), ) self.raster = raster if self.raster: self.raster_decoder = th.nn.Sequential( th.nn.Linear(nc, imsize*imsize), ) else: # 4 points bezier with n_segments -> 3*n_segments + 1 points self.point_predictor = th.nn.Sequential( th.nn.Linear(nc, 2*self.paths*(self.segments*3+1)), th.nn.Tanh() # bound spatial extent ) self.width_predictor = th.nn.Sequential( th.nn.Linear(nc, self.paths), th.nn.Tanh() ) self.alpha_predictor = th.nn.Sequential( th.nn.Linear(nc, self.paths), th.nn.Tanh() ) self._reset_weights()
def __init__(self): super(StyleCNN, self).__init__() # Initial configurations self.content_layers = ['conv_4'] self.style_layers = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5'] self.content_weight = 5 self.style_weight = 1000 self.gram = GramMatrix() self.use_cuda = torch.cuda.is_available() final_linear = nn.Linear(256, 32) torch.randn(final_linear.weight.size(), out=final_linear.weight.data).mul_(0.01) final_linear.bias.data.mul_(0.01) final_linear.bias.data[:16].add_(1) self.normalization_network = nn.Sequential( nn.Conv2d(3, 32, 9, stride=2, padding=0), nn.Conv2d(32, 64, 9, stride=2, padding=0), nn.Conv2d(64, 128, 9, stride=2, padding=0), Flatten(), nn.Linear(625, 256), final_linear) self.transform_network = nn.Sequential( nn.ReflectionPad2d(40), nn.Conv2d(3, 32, 9, stride=1, padding=4), nn.Conv2d(32, 64, 3, stride=2, padding=1), nn.Conv2d(64, 128, 3, stride=2, padding=1), nn.Conv2d(128, 128, 3, stride=1, padding=0), nn.Conv2d(128, 128, 3, stride=1, padding=0), nn.Conv2d(128, 128, 3, stride=1, padding=0), nn.Conv2d(128, 128, 3, stride=1, padding=0), nn.Conv2d(128, 128, 3, stride=1, padding=0), nn.Conv2d(128, 128, 3, stride=1, padding=0), nn.Conv2d(128, 128, 3, stride=1, padding=0), nn.Conv2d(128, 128, 3, stride=1, padding=0), nn.Conv2d(128, 128, 3, stride=1, padding=0), nn.Conv2d(128, 128, 3, stride=1, padding=0), nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1), nn.ConvTranspose2d(64, 32, 3, stride=2, padding=1, output_padding=1), nn.Conv2d(32, 3, 9, stride=1, padding=4), ) try: self.transform_network.load_state_dict( torch.load("models/transform_net_ckpt")) self.normalization_network.load_state_dict( torch.load("models/normalization_net_ckpt")) except (IOError): pass self.out_dims = [ 32, 64, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 64, 32, 3 ] self.loss_network = models.vgg19(pretrained=True) self.loss_layers = [] idx, layer_i = 0, 1 for layer in list(self.loss_network.features): losses = "" if isinstance(layer, nn.Conv2d): name = "conv_" + str(layer_i) if name in self.content_layers: losses += "c" if name in self.style_layers: losses += "s" if losses != "": self.loss_layers.append((idx, losses)) if isinstance(layer, nn.ReLU): layer_i += 1 idx += 1 self.loss = nn.MSELoss() norm_params = torch.FloatTensor(128, 32) torch.randn(128, 16, out=norm_params[:, :16]).mul_(0.01).add_(1) torch.randn(128, 16, out=norm_params[:, 16:]).mul_(0.01) self.norm_params = Parameter(norm_params) self.normalization_optimizer = optim.Adam( self.normalization_network.parameters(), lr=1e-3) self.transform_optimizer = optim.Adam( self.transform_network.parameters(), lr=1e-3) if self.use_cuda: self.normalization_network.cuda() self.loss.cuda() self.gram.cuda()
def __init__(self, **kwargs): super(PoseNet, self).__init__("PoseNet", **kwargs) if kwargs["only_front_camera"]: in_channels = 3 else: in_channels = 18 # Stem network self.stem_network = nn.Sequential( nn.Conv2d(in_channels=in_channels, out_channels=64, kernel_size=7, stride=2), nn.MaxPool2d(kernel_size=3, stride=2), nn.ReLU(True), # nn.Conv2d(in_channels=64, out_channels=192, kernel_size=3, stride=1), nn.MaxPool2d(kernel_size=3, stride=2), nn.ReLU(True), ) # Side networks self.side_network_4a = nn.Sequential( nn.AvgPool2d(kernel_size=5, stride=3), nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1, stride=1), nn.ReLU(True), Flatten(), nn.Linear(3 * 3 * 128, 1024), # paper says 4 x 4 ? nn.ReLU(True), nn.Dropout(p=0.7), nn.Linear(1024, 4)) self.side_network_4d = nn.Sequential( nn.AvgPool2d(kernel_size=5, stride=3), nn.Conv2d(in_channels=528, out_channels=128, kernel_size=1, stride=1), nn.ReLU(True), Flatten(), nn.Linear(3 * 3 * 128, 1024), # paper says 4 x 4 ? nn.ReLU(True), nn.Dropout(p=0.7), nn.Linear(1024, 4)) # Inceptions 3 self.incep_3a = Inception( in_channels=192, conv1x1_out_channels=64, conv3x3_in_channels=96, conv3x3_out_channels=128, conv5x5_in_channels=16, conv5x5_out_channels=32, maxpool3x3_out_channels=32, ) self.incep_3b = Inception( in_channels=256, conv1x1_out_channels=128, conv3x3_in_channels=128, conv3x3_out_channels=192, conv5x5_in_channels=32, conv5x5_out_channels=96, maxpool3x3_out_channels=64, ) # Inceptions 4 self.incep_4a = Inception(in_channels=480, conv1x1_out_channels=192, conv3x3_in_channels=96, conv3x3_out_channels=208, conv5x5_in_channels=16, conv5x5_out_channels=48, maxpool3x3_out_channels=64) self.incep_4b = Inception(in_channels=512, conv1x1_out_channels=160, conv3x3_in_channels=112, conv3x3_out_channels=224, conv5x5_in_channels=24, conv5x5_out_channels=64, maxpool3x3_out_channels=64) self.incep_4c = Inception(in_channels=512, conv1x1_out_channels=128, conv3x3_in_channels=128, conv3x3_out_channels=256, conv5x5_in_channels=24, conv5x5_out_channels=64, maxpool3x3_out_channels=64) self.incep_4d = Inception(in_channels=512, conv1x1_out_channels=112, conv3x3_in_channels=144, conv3x3_out_channels=288, conv5x5_in_channels=32, conv5x5_out_channels=64, maxpool3x3_out_channels=64) self.incep_4e = Inception(in_channels=528, conv1x1_out_channels=256, conv3x3_in_channels=160, conv3x3_out_channels=320, conv5x5_in_channels=32, conv5x5_out_channels=128, maxpool3x3_out_channels=128) # Inceptions 5 self.incep_5a = Inception(in_channels=832, conv1x1_out_channels=256, conv3x3_in_channels=160, conv3x3_out_channels=320, conv5x5_in_channels=32, conv5x5_out_channels=128, maxpool3x3_out_channels=128) self.incep_5b = Inception(in_channels=832, conv1x1_out_channels=384, conv3x3_in_channels=192, conv3x3_out_channels=384, conv5x5_in_channels=48, conv5x5_out_channels=128, maxpool3x3_out_channels=128) self.flatten = Flatten() self.dropout = nn.Dropout(p=0.4) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1) self.final_regressor = nn.Sequential(nn.Linear(1024, 2048), nn.ReLU(True), nn.Linear(2048, 4)) self.apply(xavier_initialization)
def __init__(self, **kwargs): super(PoseNetSimple, self).__init__("PoseNetSimple", **kwargs) if kwargs["only_front_camera"]: in_channels = 3 else: in_channels = 18 # Stem network self.stem_network = nn.Sequential( nn.Conv2d(in_channels=in_channels, out_channels=64, kernel_size=7, stride=2), nn.MaxPool2d(kernel_size=3, stride=2), nn.LeakyReLU(inplace=True), # nn.Conv2d(in_channels=64, out_channels=192, kernel_size=3, stride=1), nn.MaxPool2d(kernel_size=3, stride=2), nn.LeakyReLU(inplace=True), ) # Inceptions 3 self.incep_3a = Inception( in_channels=192, conv1x1_out_channels=64, conv3x3_in_channels=96, conv3x3_out_channels=128, conv5x5_in_channels=16, conv5x5_out_channels=32, maxpool3x3_out_channels=32, ) self.incep_3b = Inception( in_channels=256, conv1x1_out_channels=128, conv3x3_in_channels=128, conv3x3_out_channels=192, conv5x5_in_channels=32, conv5x5_out_channels=96, maxpool3x3_out_channels=64, ) # Inceptions 4 self.incep_4a = Inception(in_channels=480, conv1x1_out_channels=192, conv3x3_in_channels=96, conv3x3_out_channels=208, conv5x5_in_channels=16, conv5x5_out_channels=48, maxpool3x3_out_channels=64) # Side networks self.side_network_4a = nn.Sequential( nn.AvgPool2d(kernel_size=5, stride=3), nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1, stride=1), nn.LeakyReLU(inplace=True), Flatten(), nn.Linear(3 * 3 * 128, 1024), # paper says 4 x 4 ? nn.LeakyReLU(inplace=True), nn.Dropout(p=0.5), nn.Linear(1024, 4)) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.apply(xavier_initialization)