def __init__(self, list_blocks_types, img_height, img_width, device, self_supervised, normalization=None, share_fc_weights=False, share_conv_weights=False, fc_dropout_rate=0.0, show_img=False, trace_model=False): super(ICSTNStandard, self).__init__() self.device = device self.num_blocks = len(list_blocks_types) self.show_img = show_img self.share_fc = share_fc_weights self.share_conv = share_conv_weights self.trace_model = trace_model self.self_sup = self_supervised # self.self_sup_SSIM = False # TODO self.img_warper = warp.WarpImg(img_height=img_height, img_width=img_width, device=self.device) self.dof = [] for motion in list_blocks_types: if motion == 'trans' or motion == 'rot': self.dof.append( 3 ) # NOTE degree-of-freedom to predict (for now translation only) elif motion == 'tilt': self.dof.append(2) elif motion == 'rot&trans': self.dof.append(6) if self.share_fc: if len(set(self.dof)) != 1: self.share_fc = False print( "Cannot share fully-connected layer with this architecture! Will use different weights." ) if self.num_blocks == 1: conv_layers_block = 18 # TABLE I 2nd linear_inputs = 768 elif self.num_blocks == 3: conv_layers_block = 5.7542 # TABLE I 6th linear_inputs = 5120 else: print("Error! No ICSTN network block is implemented for", list_blocks_types) # create network blocks # block 1 self.convs_block_1 = self.create_convs(conv_layers_block) self.fc_block_1 = nn.Linear(linear_inputs, self.dof[0], bias=True) # block 2 if self.num_blocks > 1: if self.share_conv: self.convs_block_2 = self.convs_block_1 else: self.convs_block_2 = self.create_convs(conv_layers_block) if self.share_fc: assert (self.dof[0] == self.dof[1]) self.fc_block_2 = self.fc_block_1 else: self.fc_block_2 = nn.Linear(linear_inputs, self.dof[1], bias=True) # block 3 if self.num_blocks > 2: if self.share_conv: self.convs_block_3 = self.convs_block_1 else: self.convs_block_3 = self.create_convs(conv_layers_block) if self.share_fc: assert (self.dof[0] == self.dof[2]) self.fc_block_3 = self.fc_block_1 else: self.fc_block_3 = nn.Linear(linear_inputs, self.dof[2], bias=True) print("PoseNet blocks are:", list_blocks_types, conv_layers_block) self.init_weights() print("ICSTN Standard is initialized!") if self.show_img: cv2.namedWindow('before', cv2.WINDOW_NORMAL) cv2.namedWindow('after', cv2.WINDOW_NORMAL)
def __init__(self, list_blocks_types, img_height, img_width, device, self_supervised, normalization=None, share_fc_weights=True, fc_dropout_rate=0.0, show_img=False, trace_model=False): super(ICSTNPyramid, self).__init__() self.device = device self.max_pyramid_level = len( list_blocks_types ) - 1 # each block corresponds to one pyramid level self.num_blocks = len(list_blocks_types) self.show_img = show_img self.share_fc = share_fc_weights self.trace_model = trace_model self.self_sup = self_supervised # self.self_sup_SSIM = False # TODO self.img_warper = warp.WarpImg(img_height=img_height, img_width=img_width, device=self.device) self.dof = [] for motion in list_blocks_types: if motion == 'trans' or motion == 'rot': self.dof.append( 3 ) # NOTE degree-of-freedom to predict (for now translation only) elif motion == 'tilt': self.dof.append(2) elif motion == 'rot&trans': self.dof.append(6) self.avgPool = False # True False if self.num_blocks == 4: conv_layers_block_list = [2.7522, 2.7542, 3.7542, 3.7544] # TABLE II 4th linear_inputs = [ 8960, 8960, 17920, 17920 ] # NOTE cannot share weights among fully-connected layers elif self.num_blocks == 3: conv_layers_block_list = [4.7522, 4.7542, 4.7544] # TABLE II 3rd linear_inputs = [5120, 5120, 5120] else: print("Error! No pyramidal network block is implemented for", list_blocks_types) assert (self.num_blocks == len(conv_layers_block_list)) if self.share_fc: if len(set(linear_inputs)) != 1 or len(set(self.dof)) != 1: self.share_fc = False print( "Cannot share fully-connected layer with this architecture! Will use different weights." ) # create network blocks self.convs_block_1 = self.create_convs(conv_layers_block_list[0]) self.linear_input_1 = linear_inputs[0] self.fc_block_1 = nn.Linear(self.linear_input_1, self.dof[0], bias=True) if self.num_blocks > 1: self.convs_block_2 = self.create_convs(conv_layers_block_list[1]) if self.share_fc: assert (linear_inputs[1] == linear_inputs[0]) self.fc_block_2 = self.fc_block_1 else: self.linear_input_2 = linear_inputs[1] self.fc_block_2 = nn.Linear(self.linear_input_2, self.dof[1], bias=True) if self.num_blocks > 2: self.convs_block_3 = self.create_convs(conv_layers_block_list[2]) if self.share_fc: assert (linear_inputs[2] == linear_inputs[0]) self.fc_block_3 = self.fc_block_1 else: self.linear_input_3 = linear_inputs[2] self.fc_block_3 = nn.Linear(self.linear_input_3, self.dof[2], bias=True) if self.num_blocks > 3: self.convs_block_4 = self.create_convs(conv_layers_block_list[3]) if self.share_fc: assert (linear_inputs[3] == linear_inputs[0]) self.fc_block_4 = self.fc_block_1 else: self.linear_input_4 = linear_inputs[3] self.fc_block_4 = nn.Linear(self.linear_input_4, self.dof[3], bias=True) print("Pyramidal PoseNet has {} image pyramid levels.".format( self.max_pyramid_level + 1)) print("PoseNet blocks are:", list_blocks_types, conv_layers_block_list) # print("Input fully connected:", linear_inputs) if self.avgPool: print("Average Pooling for downsampling.") else: print("Bilinear Interpolation for downsampling.") self.init_weights() print("ICSTN Pyramid is initialized!") if self.show_img: cv2.namedWindow('before', cv2.WINDOW_NORMAL) cv2.namedWindow('after', cv2.WINDOW_NORMAL)
def __init__(self, list_blocks_types, img_height, img_width, device, self_supervised, normalization=None, share_fc_weights=True, fc_dropout_rate=0.0, show_img=False, trace_model=False): super().__init__() self.device = device self.max_pyramid_level = len( list_blocks_types ) + 1 # each block corresponds to one pyramid level, '+1' to correspond to Table II - 6th self.num_blocks = len(list_blocks_types) self.show_img = show_img self.share_fc = share_fc_weights self.trace_model = trace_model self.self_sup = self_supervised self.img_warper_dict = {} # initialize the image warpers for each pyramid_level. 3 warpers for the feature maps (multi-channel) and 1 warper for the image (for img_show) for pyramid_level in range( self.max_pyramid_level ): # NOTE for now, each pyramid level has same posenet blocks print( "Initializing PoseNet and ImgWarper for pyramid level {} ...". format(pyramid_level)) input_tensor_height = int(img_height / (2**pyramid_level)) input_tensor_width = int(img_width / (2**pyramid_level)) self.img_warper_dict[pyramid_level] = warp.WarpImg( img_height=input_tensor_height, img_width=input_tensor_width, device=self.device) self.dof = [] for motion in list_blocks_types: if motion == 'trans' or motion == 'rot': self.dof.append( 3 ) # NOTE degree-of-freedom to predict (for now translation only) elif motion == 'tilt': self.dof.append(2) elif motion == 'rot&trans': self.dof.append(6) if self.show_img: cv2.namedWindow('before', cv2.WINDOW_NORMAL) cv2.namedWindow('after', cv2.WINDOW_NORMAL) conv_planes = [16, 32, 64, 128, 256] linear_inputs = [5120, 5120, 5120] # feature pyramid extractor (acts on each image respectively) (Table II - 6th) self.conv1 = conv(1, conv_planes[0], kernel_size=7, stride=4) # 56 80 self.conv2 = conv(conv_planes[0], conv_planes[1], kernel_size=5, stride=2) # 28 40 self.conv3 = conv(conv_planes[1], conv_planes[2], stride=2) # 14 20 # Pose prediction blocks self.convs_block_1 = nn.Sequential( # 14 20 conv(conv_planes[2] * 2, conv_planes[3], stride=2), # 7 10 conv(conv_planes[3], conv_planes[4], stride=2) # 4 5 ) self.linear_input_1 = linear_inputs[0] self.fc_block_1 = nn.Linear(self.linear_input_1, self.dof[0], bias=True) self.convs_block_2 = nn.Sequential( # 28 40 conv(conv_planes[1] * 2, conv_planes[2], stride=2), # 14 20 conv(conv_planes[2], conv_planes[3], stride=2), # 7 10 conv(conv_planes[3], conv_planes[4], stride=2) # 4 5 ) if self.share_fc: assert (linear_inputs[1] == linear_inputs[0]) self.fc_block_2 = self.fc_block_1 else: self.linear_input_2 = linear_inputs[1] self.fc_block_2 = nn.Linear(self.linear_input_2, self.dof[1], bias=True) self.convs_block_3 = nn.Sequential( # 56 80 conv(conv_planes[0] * 2, conv_planes[1], kernel_size=5, stride=2), # 28 40 conv(conv_planes[1], conv_planes[2], stride=2), # 14 20 conv(conv_planes[2], conv_planes[3], stride=2), # 7 10 conv(conv_planes[3], conv_planes[4], stride=2) # 4 5 ) if self.share_fc: assert (linear_inputs[2] == linear_inputs[0]) self.fc_block_3 = self.fc_block_1 else: self.linear_input_3 = linear_inputs[2] self.fc_block_3 = nn.Linear(self.linear_input_3, self.dof[2], bias=True) self.init_weights() print("FPE PoseNet has {} image pyramid levels.".format( self.num_blocks)) print("PoseNet blocks are:", list_blocks_types) # print("Input fully connected:", linear_inputs) print("Feature Pyramids Extractor Network is initialized!")