Beispiel #1
0
    def __init__(self,
                 list_blocks_types,
                 img_height,
                 img_width,
                 device,
                 self_supervised,
                 normalization=None,
                 share_fc_weights=False,
                 share_conv_weights=False,
                 fc_dropout_rate=0.0,
                 show_img=False,
                 trace_model=False):

        super(ICSTNStandard, self).__init__()

        self.device = device
        self.num_blocks = len(list_blocks_types)
        self.show_img = show_img
        self.share_fc = share_fc_weights
        self.share_conv = share_conv_weights
        self.trace_model = trace_model
        self.self_sup = self_supervised
        # self.self_sup_SSIM = False # TODO

        self.img_warper = warp.WarpImg(img_height=img_height,
                                       img_width=img_width,
                                       device=self.device)

        self.dof = []
        for motion in list_blocks_types:
            if motion == 'trans' or motion == 'rot':
                self.dof.append(
                    3
                )  # NOTE degree-of-freedom to predict (for now translation only)
            elif motion == 'tilt':
                self.dof.append(2)
            elif motion == 'rot&trans':
                self.dof.append(6)

        if self.share_fc:
            if len(set(self.dof)) != 1:
                self.share_fc = False
                print(
                    "Cannot share fully-connected layer with this architecture! Will use different weights."
                )

        if self.num_blocks == 1:
            conv_layers_block = 18  # TABLE I 2nd
            linear_inputs = 768
        elif self.num_blocks == 3:
            conv_layers_block = 5.7542  # TABLE I 6th
            linear_inputs = 5120
        else:
            print("Error! No ICSTN network block is implemented for",
                  list_blocks_types)

        # create network blocks

        # block 1
        self.convs_block_1 = self.create_convs(conv_layers_block)
        self.fc_block_1 = nn.Linear(linear_inputs, self.dof[0], bias=True)

        # block 2
        if self.num_blocks > 1:

            if self.share_conv:
                self.convs_block_2 = self.convs_block_1
            else:
                self.convs_block_2 = self.create_convs(conv_layers_block)

            if self.share_fc:
                assert (self.dof[0] == self.dof[1])
                self.fc_block_2 = self.fc_block_1
            else:
                self.fc_block_2 = nn.Linear(linear_inputs,
                                            self.dof[1],
                                            bias=True)

        # block 3
        if self.num_blocks > 2:

            if self.share_conv:
                self.convs_block_3 = self.convs_block_1
            else:
                self.convs_block_3 = self.create_convs(conv_layers_block)

            if self.share_fc:
                assert (self.dof[0] == self.dof[2])
                self.fc_block_3 = self.fc_block_1
            else:
                self.fc_block_3 = nn.Linear(linear_inputs,
                                            self.dof[2],
                                            bias=True)

        print("PoseNet blocks are:", list_blocks_types, conv_layers_block)

        self.init_weights()
        print("ICSTN Standard is initialized!")

        if self.show_img:
            cv2.namedWindow('before', cv2.WINDOW_NORMAL)
            cv2.namedWindow('after', cv2.WINDOW_NORMAL)
Beispiel #2
0
    def __init__(self,
                 list_blocks_types,
                 img_height,
                 img_width,
                 device,
                 self_supervised,
                 normalization=None,
                 share_fc_weights=True,
                 fc_dropout_rate=0.0,
                 show_img=False,
                 trace_model=False):

        super(ICSTNPyramid, self).__init__()

        self.device = device
        self.max_pyramid_level = len(
            list_blocks_types
        ) - 1  # each block corresponds to one pyramid level
        self.num_blocks = len(list_blocks_types)
        self.show_img = show_img
        self.share_fc = share_fc_weights
        self.trace_model = trace_model
        self.self_sup = self_supervised
        # self.self_sup_SSIM = False # TODO

        self.img_warper = warp.WarpImg(img_height=img_height,
                                       img_width=img_width,
                                       device=self.device)

        self.dof = []
        for motion in list_blocks_types:
            if motion == 'trans' or motion == 'rot':
                self.dof.append(
                    3
                )  # NOTE degree-of-freedom to predict (for now translation only)
            elif motion == 'tilt':
                self.dof.append(2)
            elif motion == 'rot&trans':
                self.dof.append(6)

        self.avgPool = False  # True False

        if self.num_blocks == 4:
            conv_layers_block_list = [2.7522, 2.7542, 3.7542,
                                      3.7544]  # TABLE II 4th
            linear_inputs = [
                8960, 8960, 17920, 17920
            ]  # NOTE cannot share weights among fully-connected layers
        elif self.num_blocks == 3:
            conv_layers_block_list = [4.7522, 4.7542, 4.7544]  # TABLE II 3rd
            linear_inputs = [5120, 5120, 5120]
        else:
            print("Error! No pyramidal network block is implemented for",
                  list_blocks_types)

        assert (self.num_blocks == len(conv_layers_block_list))

        if self.share_fc:
            if len(set(linear_inputs)) != 1 or len(set(self.dof)) != 1:
                self.share_fc = False
                print(
                    "Cannot share fully-connected layer with this architecture! Will use different weights."
                )

        # create network blocks

        self.convs_block_1 = self.create_convs(conv_layers_block_list[0])
        self.linear_input_1 = linear_inputs[0]
        self.fc_block_1 = nn.Linear(self.linear_input_1,
                                    self.dof[0],
                                    bias=True)

        if self.num_blocks > 1:
            self.convs_block_2 = self.create_convs(conv_layers_block_list[1])

            if self.share_fc:
                assert (linear_inputs[1] == linear_inputs[0])
                self.fc_block_2 = self.fc_block_1
            else:
                self.linear_input_2 = linear_inputs[1]
                self.fc_block_2 = nn.Linear(self.linear_input_2,
                                            self.dof[1],
                                            bias=True)

        if self.num_blocks > 2:
            self.convs_block_3 = self.create_convs(conv_layers_block_list[2])

            if self.share_fc:
                assert (linear_inputs[2] == linear_inputs[0])
                self.fc_block_3 = self.fc_block_1
            else:
                self.linear_input_3 = linear_inputs[2]
                self.fc_block_3 = nn.Linear(self.linear_input_3,
                                            self.dof[2],
                                            bias=True)

        if self.num_blocks > 3:
            self.convs_block_4 = self.create_convs(conv_layers_block_list[3])

            if self.share_fc:
                assert (linear_inputs[3] == linear_inputs[0])
                self.fc_block_4 = self.fc_block_1
            else:
                self.linear_input_4 = linear_inputs[3]
                self.fc_block_4 = nn.Linear(self.linear_input_4,
                                            self.dof[3],
                                            bias=True)

        print("Pyramidal PoseNet has {} image pyramid levels.".format(
            self.max_pyramid_level + 1))
        print("PoseNet blocks are:", list_blocks_types, conv_layers_block_list)
        # print("Input fully connected:", linear_inputs)

        if self.avgPool:
            print("Average Pooling for downsampling.")
        else:
            print("Bilinear Interpolation for downsampling.")

        self.init_weights()
        print("ICSTN Pyramid is initialized!")

        if self.show_img:
            cv2.namedWindow('before', cv2.WINDOW_NORMAL)
            cv2.namedWindow('after', cv2.WINDOW_NORMAL)
Beispiel #3
0
    def __init__(self,
                 list_blocks_types,
                 img_height,
                 img_width,
                 device,
                 self_supervised,
                 normalization=None,
                 share_fc_weights=True,
                 fc_dropout_rate=0.0,
                 show_img=False,
                 trace_model=False):

        super().__init__()

        self.device = device
        self.max_pyramid_level = len(
            list_blocks_types
        ) + 1  # each block corresponds to one pyramid level, '+1' to correspond to Table II - 6th
        self.num_blocks = len(list_blocks_types)
        self.show_img = show_img
        self.share_fc = share_fc_weights
        self.trace_model = trace_model
        self.self_sup = self_supervised

        self.img_warper_dict = {}
        # initialize the image warpers for each pyramid_level. 3 warpers for the feature maps (multi-channel) and 1 warper for the image (for img_show)
        for pyramid_level in range(
                self.max_pyramid_level
        ):  # NOTE for now, each pyramid level has same posenet blocks
            print(
                "Initializing PoseNet and ImgWarper for pyramid level {} ...".
                format(pyramid_level))
            input_tensor_height = int(img_height / (2**pyramid_level))
            input_tensor_width = int(img_width / (2**pyramid_level))
            self.img_warper_dict[pyramid_level] = warp.WarpImg(
                img_height=input_tensor_height,
                img_width=input_tensor_width,
                device=self.device)

        self.dof = []
        for motion in list_blocks_types:
            if motion == 'trans' or motion == 'rot':
                self.dof.append(
                    3
                )  # NOTE degree-of-freedom to predict (for now translation only)
            elif motion == 'tilt':
                self.dof.append(2)
            elif motion == 'rot&trans':
                self.dof.append(6)

        if self.show_img:
            cv2.namedWindow('before', cv2.WINDOW_NORMAL)
            cv2.namedWindow('after', cv2.WINDOW_NORMAL)

        conv_planes = [16, 32, 64, 128, 256]
        linear_inputs = [5120, 5120, 5120]

        # feature pyramid extractor (acts on each image respectively) (Table II - 6th)
        self.conv1 = conv(1, conv_planes[0], kernel_size=7, stride=4)  # 56 80
        self.conv2 = conv(conv_planes[0],
                          conv_planes[1],
                          kernel_size=5,
                          stride=2)  # 28 40
        self.conv3 = conv(conv_planes[1], conv_planes[2], stride=2)  # 14 20

        # Pose prediction blocks
        self.convs_block_1 = nn.Sequential(  # 14 20  
            conv(conv_planes[2] * 2, conv_planes[3], stride=2),  # 7 10
            conv(conv_planes[3], conv_planes[4], stride=2)  # 4 5
        )
        self.linear_input_1 = linear_inputs[0]
        self.fc_block_1 = nn.Linear(self.linear_input_1,
                                    self.dof[0],
                                    bias=True)

        self.convs_block_2 = nn.Sequential(  # 28 40 
            conv(conv_planes[1] * 2, conv_planes[2], stride=2),  # 14 20
            conv(conv_planes[2], conv_planes[3], stride=2),  # 7 10
            conv(conv_planes[3], conv_planes[4], stride=2)  # 4 5
        )
        if self.share_fc:
            assert (linear_inputs[1] == linear_inputs[0])
            self.fc_block_2 = self.fc_block_1
        else:
            self.linear_input_2 = linear_inputs[1]
            self.fc_block_2 = nn.Linear(self.linear_input_2,
                                        self.dof[1],
                                        bias=True)

        self.convs_block_3 = nn.Sequential(  # 56 80
            conv(conv_planes[0] * 2, conv_planes[1], kernel_size=5,
                 stride=2),  # 28 40
            conv(conv_planes[1], conv_planes[2], stride=2),  # 14 20
            conv(conv_planes[2], conv_planes[3], stride=2),  # 7 10
            conv(conv_planes[3], conv_planes[4], stride=2)  # 4 5
        )
        if self.share_fc:
            assert (linear_inputs[2] == linear_inputs[0])
            self.fc_block_3 = self.fc_block_1
        else:
            self.linear_input_3 = linear_inputs[2]
            self.fc_block_3 = nn.Linear(self.linear_input_3,
                                        self.dof[2],
                                        bias=True)

        self.init_weights()

        print("FPE PoseNet has {} image pyramid levels.".format(
            self.num_blocks))
        print("PoseNet blocks are:", list_blocks_types)
        # print("Input fully connected:", linear_inputs)

        print("Feature Pyramids Extractor Network is initialized!")