def _sanity_check_model_conversion(self, svm_model, nn_model, x_val):

        #use flatten layer to vectorize multi-dim array
        flatten = Flatten()
        flatten.to_numpy()

        y_pred_svm = svm_model.decision_function(flatten.forward(x_val))
        y_pred_nn = nn_model.forward(x_val)

        rtol = 1e-7
        if y_pred_nn.shape[1] == 2:
            numpy.testing.assert_allclose(
                y_pred_svm,
                -y_pred_nn[:, 0],
                rtol,
                err_msg=
                'Predictions of Trained SVM model and converted NN model are NOT equal! (2-Class-Case)'
            )
            numpy.testing.assert_allclose(
                y_pred_svm,
                y_pred_nn[:, 1],
                rtol,
                err_msg=
                'Predictions of Trained SVM model and converted NN model are NOT equal! (2-Class-Case)'
            )
        else:
            numpy.testing.assert_allclose(
                y_pred_svm,
                y_pred_nn,
                rtol,
                err_msg=
                'Predictions of Trained SVM model and converted NN model are NOT equal!'
            )
Beispiel #2
0
    def __init__(self,
                 device,
                 image_channels=1,
                 h_dim=1024,
                 z_dim=32,
                 num_labels=0):
        super(M2, self).__init__()
        self.device = device
        self.h_dim = h_dim
        self.z_dim = z_dim
        self.num_labels = num_labels

        self.encoder = torch.nn.Sequential(
            torch.nn.Conv2d(image_channels, 32, kernel_size=3, stride=2),
            torch.nn.LeakyReLU(),
            torch.nn.Conv2d(32, 64, kernel_size=3, stride=2),
            torch.nn.LeakyReLU(), torch.nn.Dropout2d(p=.2),
            torch.nn.Conv2d(64, 128, kernel_size=3, stride=1),
            torch.nn.LeakyReLU(), torch.nn.Dropout2d(p=.2),
            torch.nn.Conv2d(128, 256, kernel_size=3, stride=1),
            torch.nn.LeakyReLU(), Flatten(), torch.nn.Linear(h_dim, h_dim),
            torch.nn.LeakyReLU())

        self.mu = torch.nn.Linear(h_dim + num_labels, z_dim)  # mean
        self.logsigma = torch.nn.Linear(h_dim + num_labels,
                                        z_dim)  # standard deviation
        self.upscale_z = torch.nn.Linear(z_dim + num_labels, h_dim)

        self.decoder = torch.nn.Sequential(
            torch.nn.Linear(h_dim, h_dim), torch.nn.LeakyReLU(), UnFlatten(),
            torch.nn.ConvTranspose2d(h_dim, 128, kernel_size=4, stride=2),
            torch.nn.LeakyReLU(), torch.nn.Dropout2d(p=.2),
            torch.nn.ConvTranspose2d(128, 64, kernel_size=5, stride=2),
            torch.nn.LeakyReLU(), torch.nn.Dropout2d(p=.2),
            torch.nn.ConvTranspose2d(64,
                                     32,
                                     kernel_size=4,
                                     padding=0,
                                     stride=2), torch.nn.LeakyReLU(),
            torch.nn.ConvTranspose2d(32,
                                     image_channels,
                                     kernel_size=5,
                                     stride=1), torch.nn.Sigmoid())

        # simple FFNN classifier
        # input: flattened vector, output: probability of each class
        # TODO: implement CNN classifier
        self.classifier = torch.nn.Sequential(
            Flatten(),
            torch.nn.Linear(784, 512),
            torch.nn.ReLU(),
            torch.nn.Linear(512, 256),
            torch.nn.ReLU(),
            torch.nn.Linear(256, self.num_labels),  # 3 output layer
            torch.nn.Softmax(dim=1))
    def train_model(self, x_train, y_train, x_val, y_val, *args, **kwargs):
        # train model using sklearn
        print('training {} model'.format(self.__class__.__name__))

        #use flatten layer to vectorize multi-dim array
        flatten = Flatten()
        flatten.to_numpy()

        x_train = flatten.forward(x_train)
        self.model.fit(x_train, y_train)
        self.model = self._convert_to_nn(self.model, y_train, x_val)
Beispiel #4
0
    def __init__(self, conditional=False, fc=False):
        super(Discriminator, self).__init__()

        self.conditional = conditional

        ncond = 0
        if self.conditional:  # one hot encoded input for conditional model
            ncond = 10

        sn = th.nn.utils.spectral_norm
        # sn = lambda x: x

        self.fc = fc

        mult = 2
        if self.fc:
            self.net = th.nn.Sequential(
                Flatten(),
                th.nn.Linear(28*28 + ncond, mult*256),
                th.nn.LeakyReLU(0.2, inplace=True),

                # th.nn.Linear(mult*256, mult*256, 4),
                # th.nn.LeakyReLU(0.2, inplace=True),
                # th.nn.Dropout(0.5),

                th.nn.Linear(mult*256, mult*256, 4),
                th.nn.LeakyReLU(0.2, inplace=True),

                th.nn.Linear(mult*256*1*1, 1),
            )
        else:
            self.net = th.nn.Sequential(
                th.nn.Conv2d(1 + ncond, mult*64, 4, padding=0, stride=2),

                th.nn.LeakyReLU(0.2, inplace=True),
                # 16x16

                sn(th.nn.Conv2d(mult*64, mult*128, 4, padding=0, stride=2)),
                th.nn.LeakyReLU(0.2, inplace=True),
                # 8x8

                sn(th.nn.Conv2d(mult*128, mult*256, 4, padding=0, stride=2)),
                th.nn.LeakyReLU(0.2, inplace=True),
                # 4x4

                Flatten(),

                th.nn.Linear(mult*256*1*1, 1),
            )

        self._reset_weights()
Beispiel #5
0
def cifar_model_wide():
    # cifar wide
    model = nn.Sequential(nn.Conv2d(3, 16, 4, stride=2, padding=1), nn.ReLU(),
                          nn.Conv2d(16, 32, 4, stride=2, padding=1), nn.ReLU(),
                          Flatten(), nn.Linear(32 * 8 * 8, 100), nn.ReLU(),
                          nn.Linear(100, 10))
    return model
Beispiel #6
0
def cifar_model():
    # cifar base
    model = nn.Sequential(nn.Conv2d(3, 8, 4, stride=2, padding=1), nn.ReLU(),
                          nn.Conv2d(8, 16, 4, stride=2, padding=1), nn.ReLU(),
                          Flatten(), nn.Linear(1024, 100), nn.ReLU(),
                          nn.Linear(100, 10))
    return model
    def __init__(self, f_in, h1, h2, f_out):
        super(FC, self).__init__()

        self.h1 = nn.Linear(f_in, h1)
        self.h2 = nn.Linear(h1, h2)
        self.output = nn.Linear(h2, f_out)

        self.flat = Flatten()
Beispiel #8
0
    def __init__(self,
                 device,
                 image_channels=1,
                 h_dim=1024,
                 z_dim=32,
                 num_labels=0):
        super(VAE, self).__init__()
        self.device = device
        self.h_dim = h_dim
        self.z_dim = z_dim

        self.encoder = torch.nn.Sequential(
            NNprint(),
            torch.nn.Conv2d(image_channels, 32, kernel_size=3, stride=2),
            NNprint(),
            torch.nn.LeakyReLU(),
            torch.nn.Conv2d(32, 64, kernel_size=3, stride=2),
            NNprint(),
            torch.nn.LeakyReLU(),
            torch.nn.Conv2d(64, 128, kernel_size=3, stride=1),
            NNprint(),
            torch.nn.LeakyReLU(),
            torch.nn.Conv2d(128, 256, kernel_size=3, stride=1),
            NNprint(),
            torch.nn.LeakyReLU(),
            Flatten(),
            NNprint(),
        )
        self.h_dim = h_dim
        self.num_labels = num_labels
        self.fc1 = torch.nn.Linear(h_dim, z_dim)
        self.fc2 = torch.nn.Linear(h_dim, z_dim)
        self.fc3 = torch.nn.Linear(z_dim + num_labels, h_dim)

        self.decoder = torch.nn.Sequential(
            NNprint(),
            UnFlatten(),
            NNprint(),
            torch.nn.ConvTranspose2d(h_dim, 128, kernel_size=4, stride=2),
            torch.nn.LeakyReLU(),
            NNprint(),
            torch.nn.ConvTranspose2d(128, 64, kernel_size=5, stride=2),
            torch.nn.LeakyReLU(),
            NNprint(),
            torch.nn.ConvTranspose2d(64,
                                     32,
                                     kernel_size=4,
                                     padding=0,
                                     stride=2),
            torch.nn.LeakyReLU(),
            NNprint(),
            torch.nn.ConvTranspose2d(32,
                                     image_channels,
                                     kernel_size=5,
                                     stride=1),
            torch.nn.Sigmoid(),
            NNprint(),
        )
Beispiel #9
0
def cifar_model_deep():
    # cifar deep
    model = nn.Sequential(nn.Conv2d(3, 8, 4, stride=2, padding=1), nn.ReLU(),
                          nn.Conv2d(8, 8, 3, stride=1, padding=1), nn.ReLU(),
                          nn.Conv2d(8, 8, 3, stride=1, padding=1), nn.ReLU(),
                          nn.Conv2d(8, 8, 4, stride=2, padding=1), nn.ReLU(),
                          Flatten(), nn.Linear(8 * 8 * 8, 100), nn.ReLU(),
                          nn.Linear(100, 10))
    return model
 def __init__(self, f_in, h1, h2, f_out):
     super(FC, self).__init__()
     
     self.h1 = nn.Linear(f_in, h1)
     self.relu1 = nn.ReLU(inplace=True)
     self.h2 = nn.Linear(h1, h2)
     self.relu2 = nn.ReLU(inplace=True)
     self.output = nn.Linear(h2, f_out)
     
     self.flat = Flatten()
    def __init__(self, f_in, h1, h2, f_out, d):
        super(FC_RP, self).__init__()

        self.basis_weights = nn.Parameter(torch.zeros(d, 1))

        self.h1 = LinearRP(f_in, h1, d)
        self.h2 = LinearRP(h1, h2, d)
        self.output = LinearRP(h2, f_out, d)

        self.flat = Flatten()
Beispiel #12
0
def mnist_cnn_4layer():
    # mnist_cnn_a
    return nn.Sequential(
        nn.Conv2d(1, 16, (4, 4), stride=2, padding=1),
        nn.ReLU(),
        nn.Conv2d(16, 32, (4, 4), stride=2, padding=1),
        nn.ReLU(),
        Flatten(),
        nn.Linear(1568, 100),
        nn.ReLU(),
        nn.Linear(100, 10),
    )
Beispiel #13
0
def cnn_4layer_b():
    # cifar_cnn_b
    return nn.Sequential(
        nn.ZeroPad2d((1, 2, 1, 2)),
        nn.Conv2d(3, 32, (5, 5), stride=2, padding=0),
        nn.ReLU(),
        nn.Conv2d(32, 128, (4, 4), stride=2, padding=1),
        nn.ReLU(),
        Flatten(),
        nn.Linear(8192, 250),
        nn.ReLU(),
        nn.Linear(250, 10),
    )
Beispiel #14
0
    def __init__(self, device, image_channels=1, h_dim=1024, z_dim=32):
        super(M1, self).__init__()
        self.device = device
        self.h_dim = h_dim
        self.z_dim = z_dim

        self.encoder = torch.nn.Sequential(
            torch.nn.Conv2d(image_channels, 32, kernel_size=3, stride=2),
            torch.nn.LeakyReLU(),
            torch.nn.Conv2d(32, 64, kernel_size=3, stride=2),
            torch.nn.LeakyReLU(),
            torch.nn.Dropout2d(p=.2),
            torch.nn.Conv2d(64, 128, kernel_size=3, stride=1),
            torch.nn.LeakyReLU(),
            torch.nn.Dropout2d(p=.2),
            torch.nn.Conv2d(128, 256, kernel_size=3, stride=1),
            torch.nn.LeakyReLU(),
            Flatten(),
            torch.nn.Linear(h_dim, h_dim),
            torch.nn.LeakyReLU(),
        )

        self.mu = torch.nn.Linear(h_dim, z_dim)  # mean
        self.logsigma = torch.nn.Linear(h_dim, z_dim)  # standard deviation
        self.upscale_z = torch.nn.Linear(z_dim, h_dim)

        self.decoder = torch.nn.Sequential(
            torch.nn.Linear(h_dim, h_dim), torch.nn.LeakyReLU(), UnFlatten(),
            torch.nn.ConvTranspose2d(h_dim, 128, kernel_size=4, stride=2),
            torch.nn.LeakyReLU(), torch.nn.Dropout2d(p=.2),
            torch.nn.ConvTranspose2d(128, 64, kernel_size=5, stride=2),
            torch.nn.LeakyReLU(), torch.nn.Dropout2d(p=.2),
            torch.nn.ConvTranspose2d(64,
                                     32,
                                     kernel_size=4,
                                     padding=0,
                                     stride=2), torch.nn.LeakyReLU(),
            torch.nn.ConvTranspose2d(32,
                                     image_channels,
                                     kernel_size=5,
                                     stride=1), torch.nn.Sigmoid())
    def _convert_to_nn(self, svm_model, y_train, x_val):
        #convert to linear NN
        print('converting {} model to linear NN'.format(
            self.__class__.__name__))
        W = svm_model.coef_.T
        B = svm_model.intercept_

        if numpy.unique(y_train).size == 2:
            linear_layer = Linear(W.shape[0], 2)
            linear_layer.W = numpy.concatenate([-W, W], axis=1)
            linear_layer.B = numpy.concatenate([-B, B], axis=0)
        else:
            linear_layer = Linear(*(W.shape))
            linear_layer.W = W
            linear_layer.B = B

        svm_model = self.model
        nn_model = Sequential([Flatten(), linear_layer])
        if not self.use_gpu: nn_model.to_numpy()

        #sanity check model conversion
        self._sanity_check_model_conversion(svm_model, nn_model, x_val)
        print('model conversion sanity check passed')
        return nn_model
Beispiel #16
0
    def _read_txt_helper(path):
        with open(path, 'rb') as f:
            content = f.read().split('\n')

            modules = []
            c = 0
            line = content[c]

            while len(line) > 0:
                if line.startswith(
                        Linear.__name__
                ):  # @UndefinedVariable import error suppression for PyDev users
                    '''
                    Format of linear layer
                    Linear <rows_of_W> <columns_of_W>
                    <flattened weight matrix W>
                    <flattened bias vector>
                    '''
                    _, m, n = line.split()
                    m = int(m)
                    n = int(n)
                    layer = Linear(m, n)
                    layer.W = np.array([
                        float(weightstring)
                        for weightstring in content[c + 1].split()
                        if len(weightstring) > 0
                    ]).reshape((m, n))
                    layer.B = np.array([
                        float(weightstring)
                        for weightstring in content[c + 2].split()
                        if len(weightstring) > 0
                    ])
                    modules.append(layer)
                    c += 3  # the description of a linear layer spans three lines

                elif line.startswith(
                        Convolution.__name__
                ):  # @UndefinedVariable import error suppression for PyDev users
                    '''
                    Format of convolution layer
                    Convolution <rows_of_W> <columns_of_W> <depth_of_W> <number_of_filters_W> <stride_axis_0> <stride_axis_1>
                    <flattened filter block W>
                    <flattened bias vector>
                    '''

                    _, h, w, d, n, s0, s1 = line.split()
                    h = int(h)
                    w = int(w)
                    d = int(d)
                    n = int(n)
                    s0 = int(s0)
                    s1 = int(s1)
                    layer = Convolution(filtersize=(h, w, d, n),
                                        stride=(s0, s1))
                    layer.W = np.array([
                        float(weightstring)
                        for weightstring in content[c + 1].split()
                        if len(weightstring) > 0
                    ]).reshape((h, w, d, n))
                    layer.B = np.array([
                        float(weightstring)
                        for weightstring in content[c + 2].split()
                        if len(weightstring) > 0
                    ])
                    modules.append(layer)
                    c += 3  #the description of a convolution layer spans three lines

                elif line.startswith(
                        SumPool.__name__
                ):  # @UndefinedVariable import error suppression for PyDev users
                    '''
                    Format of sum pooling layer
                    SumPool <mask_heigth> <mask_width> <stride_axis_0> <stride_axis_1>
                    '''

                    _, h, w, s0, s1 = line.split()
                    h = int(h)
                    w = int(w)
                    s0 = int(s0)
                    s1 = int(s1)
                    layer = SumPool(pool=(h, w), stride=(s0, s1))
                    modules.append(layer)
                    c += 1  # one line of parameterized layer description

                elif line.startswith(
                        MaxPool.__name__
                ):  # @UndefinedVariable import error suppression for PyDev users
                    '''
                    Format of max pooling layer
                    MaxPool <mask_heigth> <mask_width> <stride_axis_0> <stride_axis_1>
                    '''

                    _, h, w, s0, s1 = line.split()
                    h = int(h)
                    w = int(w)
                    s0 = int(s0)
                    s1 = int(s1)
                    layer = MaxPool(pool=(h, w), stride=(s0, s1))
                    modules.append(layer)
                    c += 1  # one line of parameterized layer description

                elif line.startswith(
                        Flatten.__name__
                ):  # @UndefinedVariable import error suppression for PyDev users
                    modules.append(Flatten())
                    c += 1  #one line of parameterless layer description
                elif line.startswith(
                        Rect.__name__
                ):  # @UndefinedVariable import error suppression for PyDev users
                    modules.append(Rect())
                    c += 1  #one line of parameterless layer description
                elif line.startswith(
                        Tanh.__name__
                ):  # @UndefinedVariable import error suppression for PyDev users
                    modules.append(Tanh())
                    c += 1  #one line of parameterless layer description
                elif line.startswith(
                        SoftMax.__name__
                ):  # @UndefinedVariable import error suppression for PyDev users
                    modules.append(SoftMax())
                    c += 1  #one line of parameterless layer description
                else:
                    raise ValueError(
                        'Layer type identifier' +
                        [s for s in line.split() if len(s) > 0][0] +
                        ' not supported for reading from plain text file')

                #skip info of previous layers, read in next layer header
                line = content[c]

        return Sequential(modules)
Beispiel #17
0
    def __init__(self, imsize=28, paths=4, segments=5, samples=2, zdim=128,
                 conditional=False, variational=True, raster=False, fc=False):
        super(VectorMNISTVAE, self).__init__()

        # if imsize != 28:
        #     raise NotImplementedError()

        self.samples = samples
        self.imsize = imsize
        self.paths = paths
        self.segments = segments
        self.zdim = zdim
        self.conditional = conditional
        self.variational = variational

        ncond = 0
        if self.conditional:  # one hot encoded input for conditional model
            ncond = 10

        self.fc = fc
        mult = 1
        nc = 1024

        if not self.fc:  # conv model
            self.encoder = th.nn.Sequential(
                # 32x32
                th.nn.Conv2d(1 + ncond, mult*64, 4, padding=0, stride=2),
                th.nn.LeakyReLU(0.2, inplace=True),

                # 16x16
                th.nn.Conv2d(mult*64, mult*128, 4, padding=0, stride=2),
                th.nn.LeakyReLU(0.2, inplace=True),

                # 8x8
                th.nn.Conv2d(mult*128, mult*256, 4, padding=0, stride=2),
                th.nn.LeakyReLU(0.2, inplace=True),
                Flatten(),
            )
        else:
            self.encoder = th.nn.Sequential(
                # 32x32
                Flatten(),
                th.nn.Linear(28*28 + ncond, mult*256),
                th.nn.LeakyReLU(0.2, inplace=True),

                # 8x8
                th.nn.Linear(mult*256, mult*256, 4),
                th.nn.LeakyReLU(0.2, inplace=True),
            )

        self.mu_predictor = th.nn.Linear(256*1*1, zdim)
        if self.variational:
            self.logvar_predictor = th.nn.Linear(256*1*1, zdim)

        self.decoder = th.nn.Sequential(
            th.nn.Linear(zdim + ncond, nc),
            th.nn.SELU(inplace=True),

            th.nn.Linear(nc, nc),
            th.nn.SELU(inplace=True),
        )


        self.raster = raster
        if self.raster:
            self.raster_decoder = th.nn.Sequential(
                th.nn.Linear(nc, imsize*imsize),
            )
        else:
            # 4 points bezier with n_segments -> 3*n_segments + 1 points
            self.point_predictor = th.nn.Sequential(
                th.nn.Linear(nc, 2*self.paths*(self.segments*3+1)),
                th.nn.Tanh()  # bound spatial extent
            )

            self.width_predictor = th.nn.Sequential(
                th.nn.Linear(nc, self.paths),
                th.nn.Tanh()
            )

            self.alpha_predictor = th.nn.Sequential(
                th.nn.Linear(nc, self.paths),
                th.nn.Tanh()
            )

            self._reset_weights()
    def __init__(self):
        super(StyleCNN, self).__init__()

        # Initial configurations
        self.content_layers = ['conv_4']
        self.style_layers = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']
        self.content_weight = 5
        self.style_weight = 1000
        self.gram = GramMatrix()

        self.use_cuda = torch.cuda.is_available()

        final_linear = nn.Linear(256, 32)
        torch.randn(final_linear.weight.size(),
                    out=final_linear.weight.data).mul_(0.01)
        final_linear.bias.data.mul_(0.01)
        final_linear.bias.data[:16].add_(1)
        self.normalization_network = nn.Sequential(
            nn.Conv2d(3, 32, 9, stride=2, padding=0),
            nn.Conv2d(32, 64, 9, stride=2, padding=0),
            nn.Conv2d(64, 128, 9, stride=2, padding=0), Flatten(),
            nn.Linear(625, 256), final_linear)

        self.transform_network = nn.Sequential(
            nn.ReflectionPad2d(40),
            nn.Conv2d(3, 32, 9, stride=1, padding=4),
            nn.Conv2d(32, 64, 3, stride=2, padding=1),
            nn.Conv2d(64, 128, 3, stride=2, padding=1),
            nn.Conv2d(128, 128, 3, stride=1, padding=0),
            nn.Conv2d(128, 128, 3, stride=1, padding=0),
            nn.Conv2d(128, 128, 3, stride=1, padding=0),
            nn.Conv2d(128, 128, 3, stride=1, padding=0),
            nn.Conv2d(128, 128, 3, stride=1, padding=0),
            nn.Conv2d(128, 128, 3, stride=1, padding=0),
            nn.Conv2d(128, 128, 3, stride=1, padding=0),
            nn.Conv2d(128, 128, 3, stride=1, padding=0),
            nn.Conv2d(128, 128, 3, stride=1, padding=0),
            nn.Conv2d(128, 128, 3, stride=1, padding=0),
            nn.ConvTranspose2d(128,
                               64,
                               3,
                               stride=2,
                               padding=1,
                               output_padding=1),
            nn.ConvTranspose2d(64,
                               32,
                               3,
                               stride=2,
                               padding=1,
                               output_padding=1),
            nn.Conv2d(32, 3, 9, stride=1, padding=4),
        )
        try:
            self.transform_network.load_state_dict(
                torch.load("models/transform_net_ckpt"))
            self.normalization_network.load_state_dict(
                torch.load("models/normalization_net_ckpt"))
        except (IOError):
            pass

        self.out_dims = [
            32, 64, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 64,
            32, 3
        ]

        self.loss_network = models.vgg19(pretrained=True)
        self.loss_layers = []
        idx, layer_i = 0, 1
        for layer in list(self.loss_network.features):
            losses = ""
            if isinstance(layer, nn.Conv2d):
                name = "conv_" + str(layer_i)

                if name in self.content_layers:
                    losses += "c"
                if name in self.style_layers:
                    losses += "s"

                if losses != "":
                    self.loss_layers.append((idx, losses))

            if isinstance(layer, nn.ReLU):
                layer_i += 1

            idx += 1

        self.loss = nn.MSELoss()
        norm_params = torch.FloatTensor(128, 32)
        torch.randn(128, 16, out=norm_params[:, :16]).mul_(0.01).add_(1)
        torch.randn(128, 16, out=norm_params[:, 16:]).mul_(0.01)
        self.norm_params = Parameter(norm_params)
        self.normalization_optimizer = optim.Adam(
            self.normalization_network.parameters(), lr=1e-3)
        self.transform_optimizer = optim.Adam(
            self.transform_network.parameters(), lr=1e-3)

        if self.use_cuda:
            self.normalization_network.cuda()
            self.loss.cuda()
            self.gram.cuda()
Beispiel #19
0
    def __init__(self, **kwargs):
        super(PoseNet, self).__init__("PoseNet", **kwargs)

        if kwargs["only_front_camera"]:
            in_channels = 3
        else:
            in_channels = 18

        # Stem network
        self.stem_network = nn.Sequential(
            nn.Conv2d(in_channels=in_channels,
                      out_channels=64,
                      kernel_size=7,
                      stride=2),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.ReLU(True),
            #
            nn.Conv2d(in_channels=64,
                      out_channels=192,
                      kernel_size=3,
                      stride=1),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.ReLU(True),
        )

        # Side networks
        self.side_network_4a = nn.Sequential(
            nn.AvgPool2d(kernel_size=5, stride=3),
            nn.Conv2d(in_channels=512,
                      out_channels=128,
                      kernel_size=1,
                      stride=1),
            nn.ReLU(True),
            Flatten(),
            nn.Linear(3 * 3 * 128, 1024),  # paper says 4 x 4 ?
            nn.ReLU(True),
            nn.Dropout(p=0.7),
            nn.Linear(1024, 4))

        self.side_network_4d = nn.Sequential(
            nn.AvgPool2d(kernel_size=5, stride=3),
            nn.Conv2d(in_channels=528,
                      out_channels=128,
                      kernel_size=1,
                      stride=1),
            nn.ReLU(True),
            Flatten(),
            nn.Linear(3 * 3 * 128, 1024),  # paper says 4 x 4 ?
            nn.ReLU(True),
            nn.Dropout(p=0.7),
            nn.Linear(1024, 4))

        # Inceptions 3
        self.incep_3a = Inception(
            in_channels=192,
            conv1x1_out_channels=64,
            conv3x3_in_channels=96,
            conv3x3_out_channels=128,
            conv5x5_in_channels=16,
            conv5x5_out_channels=32,
            maxpool3x3_out_channels=32,
        )

        self.incep_3b = Inception(
            in_channels=256,
            conv1x1_out_channels=128,
            conv3x3_in_channels=128,
            conv3x3_out_channels=192,
            conv5x5_in_channels=32,
            conv5x5_out_channels=96,
            maxpool3x3_out_channels=64,
        )

        # Inceptions 4
        self.incep_4a = Inception(in_channels=480,
                                  conv1x1_out_channels=192,
                                  conv3x3_in_channels=96,
                                  conv3x3_out_channels=208,
                                  conv5x5_in_channels=16,
                                  conv5x5_out_channels=48,
                                  maxpool3x3_out_channels=64)
        self.incep_4b = Inception(in_channels=512,
                                  conv1x1_out_channels=160,
                                  conv3x3_in_channels=112,
                                  conv3x3_out_channels=224,
                                  conv5x5_in_channels=24,
                                  conv5x5_out_channels=64,
                                  maxpool3x3_out_channels=64)

        self.incep_4c = Inception(in_channels=512,
                                  conv1x1_out_channels=128,
                                  conv3x3_in_channels=128,
                                  conv3x3_out_channels=256,
                                  conv5x5_in_channels=24,
                                  conv5x5_out_channels=64,
                                  maxpool3x3_out_channels=64)
        self.incep_4d = Inception(in_channels=512,
                                  conv1x1_out_channels=112,
                                  conv3x3_in_channels=144,
                                  conv3x3_out_channels=288,
                                  conv5x5_in_channels=32,
                                  conv5x5_out_channels=64,
                                  maxpool3x3_out_channels=64)
        self.incep_4e = Inception(in_channels=528,
                                  conv1x1_out_channels=256,
                                  conv3x3_in_channels=160,
                                  conv3x3_out_channels=320,
                                  conv5x5_in_channels=32,
                                  conv5x5_out_channels=128,
                                  maxpool3x3_out_channels=128)

        # Inceptions 5
        self.incep_5a = Inception(in_channels=832,
                                  conv1x1_out_channels=256,
                                  conv3x3_in_channels=160,
                                  conv3x3_out_channels=320,
                                  conv5x5_in_channels=32,
                                  conv5x5_out_channels=128,
                                  maxpool3x3_out_channels=128)
        self.incep_5b = Inception(in_channels=832,
                                  conv1x1_out_channels=384,
                                  conv3x3_in_channels=192,
                                  conv3x3_out_channels=384,
                                  conv5x5_in_channels=48,
                                  conv5x5_out_channels=128,
                                  maxpool3x3_out_channels=128)

        self.flatten = Flatten()
        self.dropout = nn.Dropout(p=0.4)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.avgpool = nn.AvgPool2d(kernel_size=7, stride=1)

        self.final_regressor = nn.Sequential(nn.Linear(1024, 2048),
                                             nn.ReLU(True), nn.Linear(2048, 4))

        self.apply(xavier_initialization)
Beispiel #20
0
    def __init__(self, **kwargs):
        super(PoseNetSimple, self).__init__("PoseNetSimple", **kwargs)

        if kwargs["only_front_camera"]:
            in_channels = 3
        else:
            in_channels = 18

        # Stem network
        self.stem_network = nn.Sequential(
            nn.Conv2d(in_channels=in_channels,
                      out_channels=64,
                      kernel_size=7,
                      stride=2),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.LeakyReLU(inplace=True),
            #
            nn.Conv2d(in_channels=64,
                      out_channels=192,
                      kernel_size=3,
                      stride=1),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.LeakyReLU(inplace=True),
        )

        # Inceptions 3
        self.incep_3a = Inception(
            in_channels=192,
            conv1x1_out_channels=64,
            conv3x3_in_channels=96,
            conv3x3_out_channels=128,
            conv5x5_in_channels=16,
            conv5x5_out_channels=32,
            maxpool3x3_out_channels=32,
        )

        self.incep_3b = Inception(
            in_channels=256,
            conv1x1_out_channels=128,
            conv3x3_in_channels=128,
            conv3x3_out_channels=192,
            conv5x5_in_channels=32,
            conv5x5_out_channels=96,
            maxpool3x3_out_channels=64,
        )

        # Inceptions 4
        self.incep_4a = Inception(in_channels=480,
                                  conv1x1_out_channels=192,
                                  conv3x3_in_channels=96,
                                  conv3x3_out_channels=208,
                                  conv5x5_in_channels=16,
                                  conv5x5_out_channels=48,
                                  maxpool3x3_out_channels=64)

        # Side networks
        self.side_network_4a = nn.Sequential(
            nn.AvgPool2d(kernel_size=5, stride=3),
            nn.Conv2d(in_channels=512,
                      out_channels=128,
                      kernel_size=1,
                      stride=1),
            nn.LeakyReLU(inplace=True),
            Flatten(),
            nn.Linear(3 * 3 * 128, 1024),  # paper says 4 x 4 ?
            nn.LeakyReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(1024, 4))

        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.apply(xavier_initialization)