def forward(self, x): h = self.conv1(x) # x224 -> x112 h = self.maxpool(h) # x112 -> x56 h = self.conv2(h) # x56 -> x56 h = self.conv3(h) # x56 -> x28 h = self.conv4(h) # x28 -> x14 h = self.conv5(h) # x14 -> x7 h = self.tail(h) coords, heatmaps, probabilities = None, None, None if self.num_coords > 0: coords, heatmaps, probabilities = self.coord_layers(h) h_ens = F.avg_pool3d(h, (1, self.s_dim_in // 32, self.s_dim_in // 32), (1, 1, 1)) if hasattr(self, 'dropout'): h_ens = self.dropout(h_ens) h_ens = h_ens.view(h_ens.shape[0], -1) attention = self.attn(h_ens) h = attention.unsqueeze(-1).unsqueeze(-1).unsqueeze(1) * h if not self.training and self.ensemble_eval: # not fully supported yet h_ens = F.avg_pool3d(h, (1, self.s_dim_in // 32, self.s_dim_in // 32), (1, 1, 1)) h_ens = h_ens.view(h_ens.shape[0], h_ens.shape[1], -1) h_ens = [ self.classifier_list(h_ens[:, :, ii]) for ii in range(h_ens.shape[2]) ] h = self.globalpool(h) h = h.view(h.shape[0], -1) h_out = self.classifier_list(h) objects = None # if self.num_objects: # objects = [self.__getattr__('object_presence_layer_{}'.format(ii))(h) for ii in range(len(self.num_objects))] cat_obj = None # if self.num_obj_cat: # cat_obj = [self.__getattr__('objcat_presence_layer_{}'.format(ii))(h) for ii in range(len(self.num_obj_cat))] if not self.training and self.ensemble_eval: h_out = [h_out, h_ens] return h_out, coords, heatmaps, probabilities, objects, cat_obj
def forward(self, h): # h is a volume of Bx768x8x7x7 -> 8xBx768 batch_size = h.size(0) feat_size = h.size(1) h = F.avg_pool3d(h, (1, h.size(3), h.size(4)), (1, 1, 1)) # Bx768x8x1x1 h = h.view(h.shape[0], h.shape[1], -1) # Bx768x8 h = h.transpose(1, 2).transpose(0, 1) # 8xBx768 h_temp = [] for i, cls_task_size in enumerate(self.num_classes): encoder = getattr(self, 'encoder_{}'.format(i)) h0 = torch.zeros(self.num_lstm_layers, batch_size, self.hidden_multiplier * feat_size, device=h.device) c0 = torch.zeros(self.num_lstm_layers, batch_size, self.hidden_multiplier * feat_size, device=h.device) encoder_out, (ht, ct) = encoder(h, (h0, c0)) attn_decoder = getattr(self, 'attn_decoder_{}'.format(i)) decoder_out = attn_decoder(h, encoder_out) h_temp.append(decoder_out[-1]) h_temp = torch.cat(h_temp, dim=1) if hasattr(self, 'dropout'): h_temp = self.dropout(h_temp) h_out = [] for i in range(len(self.num_classes)): fc = getattr(self, 'classifier_{}'.format(i)) h_out.append(fc(h_temp)) return h_out
def forward(self, x, upto=None): if upto is None: assert x.shape[2] == 16 h = self.conv1(x) # x224 -> x112 # print(h.shape) h = self.maxpool(h) # x112 -> x56 # print(h.shape) h = self.conv2(h) # x56 -> x56 # print(h.shape) h = self.conv3(h) # x56 -> x28 # print(h.shape) h = self.conv4(h) # x28 -> x14 # print(h.shape) h = self.conv5(h) # x14 -> x7 # print(h.shape) h = self.tail(h) coords, heatmaps, probabilities = None, None, None if self.num_coords > 0: coords, heatmaps, probabilities = self.coord_layers(h) if not self.training and self.ensemble_eval: # not fully supported yet h_ens = F.avg_pool3d( h, (1, self.s_dim_in // 32, self.s_dim_in // 32), (1, 1, 1)) h_ens = h_ens.view(h_ens.shape[0], h_ens.shape[1], -1) h_ens = [ self.classifier_list(h_ens[:, :, ii]) for ii in range(h_ens.shape[2]) ] h = self.globalpool(h) h = h.view(h.shape[0], -1) h_out = self.classifier_list(h) objects = None if self.num_objects: objects = [ self.__getattr__('object_presence_layer_{}'.format(ii))(h) for ii in range(len(self.num_objects)) ] cat_obj = None if self.num_obj_cat: cat_obj = [ self.__getattr__('objcat_presence_layer_{}'.format(ii))(h) for ii in range(len(self.num_obj_cat)) ] if not self.training and self.ensemble_eval: h_out = [h_out, h_ens] return h_out, coords, heatmaps, probabilities, objects, cat_obj elif upto == 'shared': return self.forward_shared_block(x) elif upto == 'cls': return self.forward_cls_layers(x) elif upto == 'coord': return self.forward_coord_layers(x)
def forward(self, x): h_diff = self.conv1_tdn(roll(x, shift=-1, dim=2) - x) h_diff = self.maxpool(h_diff) # x112 -> x56 h = self.conv1(x) # x224 -> x112 h = self.maxpool(h) # x112 -> x56 h_diff = self.conv2_tdn(h_diff + (roll(h, shift=-1, dim=2) - h)) h = self.conv2(h) # x56 -> x56 h_diff = self.conv3_tdn(h_diff + (roll(h, shift=-1, dim=2) - h)) h = self.conv3(h) # x56 -> x28 h_diff = self.conv4_tdn(h_diff + (roll(h, shift=-1, dim=2) - h)) h = self.conv4(h) # x28 -> x14 h_diff = self.conv5_tdn(h_diff + (roll(h, shift=-1, dim=2) - h)) h = self.conv5(h) # x14 -> x7 h = self.tail(h) h_diff = self.tail_tdn(h_diff) coords, heatmaps, probabilities = None, None, None if self.num_coords > 0: coords, heatmaps, probabilities = self.coord_layers(h) if not self.training and self.ensemble_eval: # not fully supported yet h_ens = F.avg_pool3d(h, (1, self.s_dim_in//32, self.s_dim_in//32), (1, 1, 1)) h_ens = h_ens.view(h_ens.shape[0], h_ens.shape[1], -1) h_ens = [self.classifier_list(h_ens[:, :, ii]) for ii in range(h_ens.shape[2])] h = self.globalpool(h) h = h.view(h.shape[0], -1) h_out = self.classifier_list(h) h_diff = self.globalpool(h_diff) h_diff = h_diff.view(h_diff.shape[0], -1) h_diff_out = self.classifier_list_tdn(h_diff) objects = None # if self.num_objects: # objects = [self.__getattr__('object_presence_layer_{}'.format(ii))(h) for ii in range(len(self.num_objects))] cat_obj = None # if self.num_obj_cat: # cat_obj = [self.__getattr__('objcat_presence_layer_{}'.format(ii))(h) for ii in range(len(self.num_obj_cat))] if not self.training and self.ensemble_eval: h_out = [h_out, h_ens] else: h_out = [h_out, h_diff_out] return h_out, coords, heatmaps, probabilities, objects, cat_obj
def backprop_kernel_indices(indices_l, k_l, a_l1, thres, topn=None): #Initialisation indices_l1 = {} # Special architectures that include either; cross-channel convolutions, seperate branches or fibres. if (len(k_l) > 1): # Get shape(s) of all kenel inputs for layer kernels_shapes_0 = [k.shape[0] for k in k_l] kernels_shapes_1 = [k.shape[1] for k in k_l] # Create corresponding kernel shapes - output shape kernels_out_list = [ sum(kernels_shapes_0[:idx + 1]) for idx, k in enumerate(kernels_shapes_0) ] kernels_out_list = [0] + kernels_out_list # Create corresponding kernel shapes - input shape kernels_in_list = [ sum(kernels_shapes_1[:idx + 1]) for idx, k in enumerate(kernels_shapes_1) ] kernels_in_list = [0] + kernels_in_list else: # Create corresponding kernel shapes - output/input shape kernels_out_list = [0, k_l[0].shape[0]] kernels_in_list = [0, k_l[0].shape[1]] # Start by computing activation map a^k_l(i)_l1 for each k_l(i), where i in indices for i in indices_l: tmp = i # Normal convolutions over entire activation maps if (len(k_l) == 1): kernel_indx = 0 # Convolutions with various channels sizes else: kernel_indx = [ id for id, ki in enumerate(kernels_shapes_0) if (kernels_out_list[id + 1] - i) > 0 ] kernel_indx = kernel_indx[0] tmp = i - kernels_out_list[kernel_indx] # pointwise multiplication for activation a(l1) and the ith kernel in k_l #print("Kernel shape: ",k_l[i].shape) #print("Activation_shape: ",a_l1[0].shape) # Downsample the spatio-temporal dimension to create a global representation of the activation map and kernel. _, dk, hk, wk = list(k_l[kernel_indx][tmp].size()) _, da, ha, wa = list(a_l1[0].size()) kernel = F.avg_pool3d(k_l[kernel_indx][tmp], (dk, hk, wk)).squeeze(-1).squeeze(-1).squeeze(-1) act_map = F.avg_pool3d( a_l1[0], (da, ha, wa)).squeeze(-1).squeeze(-1).squeeze(-1) # If group convolutions, increase the kernel size groups = act_map.shape[0] // kernels_in_list[-1] # Inflate kernels to represent all kernels in the same feature space as the input if groups > 1: # Kernel inflation form [out_channels, in_channels/groups] -> [out_channels, in_channels] kernel = torch.cat([k.repeat(groups) for k in kernel], 0) # Select activations for corresponding kernel channels - special architectures if (len(k_l) > 1): lower_lim = kernels_in_list[kernel_indx] upper_lim = kernels_in_list[kernel_indx + 1] act_map = act_map[lower_lim:upper_lim] pooled = torch.mul(kernel, act_map) pooled = pooled / pooled.sum(0).expand_as(pooled) base = torch.min(pooled) pooled_range = torch.max(pooled) - base pooled = torch.FloatTensor([(x - base) / pooled_range for x in pooled]) # Iterate over the pooled volume and find the indices that have a value larger than the threshold if topn is None: indices_l1_i = [ j for j, feat in enumerate(pooled) if feat >= thres ] else: # Get all values above threshold values = [value for value in enumerate(pooled) if value >= thres] # accending order sort values.sort() # select n values values = values[-topn:] # find top n value indices in pooled tensor indices_l1_i = [ j for j, feat in enumerate(pooled) if feat in values ] # Append indices to dictionary indices_l1[i] = indices_l1_i return indices_l1
def forward(self, x): h = self.conv1(x) # x224 -> x112 h = self.maxpool(h) # x112 -> x56 h = self.conv2(h) # x56 -> x56 h = self.conv3(h) # x56 -> x28 h = self.conv4(h) # x28 -> x14 h = self.conv5(h) # x14 -> x7 h = self.tail(h) coords, heatmaps, probabilities = None, None, None if self.num_coords > 0: coords, heatmaps, probabilities = self.coord_layers(h) h_ens = F.avg_pool3d(h, (1, self.s_dim_in // 32, self.s_dim_in // 32), (1, 1, 1)) if hasattr(self, 'dropout'): h_ens = self.dropout(h_ens) # first temporal attention # h_ens = h_ens.view(h_ens.shape[0], h_ens.shape[1], -1) # h_probs = self.temporal_attention(h_ens) # B x t_dim x num_cls_tasks # temporal attention connected h_ens = h_ens.view(h_ens.shape[0], -1) h_probs = self.temporal_attention(h_ens) # B x t_dim x num_cls_tasks h_ens = h_ens.view(h_ens.shape[0], self.c5_out, -1) with torch.no_grad(): # TemporalAttentionCon and # TemporalAttention h_ens_out = [] for ii in range(self.t_dim_in // 2): h_ens_task = [] for cls_task, cl in enumerate(self.classifier_list): h_ens_task.append(cl(h_ens[:, :, ii])) h_ens_out.append(h_ens_task) h_ens = h_ens_out # # old temporal attention # h_ens = [self.classifier_list(h_ens[:, :, ii]) for ii in range(h_ens.shape[2])] # t_dim (list) x num_cls_tasks (list) x [B x cls_tasks] (Tensor) h_out = [] for cls_task, cl in enumerate(self.classifier_list): if self.training: h_temp = self.globalpool(h) else: h_temp = self.globalpool( h * h_probs[:, :, cls_task].unsqueeze(-1).unsqueeze(-1).unsqueeze(1)) h_temp = h_temp.view(h_temp.shape[0], -1) h_out.append(cl(h_temp)) # h = self.globalpool(h) # h = h.view(h.shape[0], -1) # h_out = self.classifier_list(h) objects = None # if self.num_objects: # objects = [self.__getattr__('object_presence_layer_{}'.format(ii))(h) for ii in range(len(self.num_objects))] cat_obj = None # if self.num_obj_cat: # cat_obj = [self.__getattr__('objcat_presence_layer_{}'.format(ii))(h) for ii in range(len(self.num_obj_cat))] h_out = [h_out, h_ens, h_probs] return h_out, coords, heatmaps, probabilities, objects, cat_obj