def det_process(self, feature): with torch.no_grad(): if self.rgb_model_backbone is not None: rgb_output = self.rgb_model_branch( feature['rgb_features'], feature['rgb_features_flip']) rgb_hm = rgb_output[0]['hm'].sigmoid_() rgb_wh = rgb_output[0]['wh'] rgb_mov = rgb_output[0]['mov'] if self.opt.flip_test: rgb_hm_f = rgb_output[1]['hm'].sigmoid_() rgb_wh_f = rgb_output[1]['wh'] rgb_hm = (rgb_hm + flip_tensor(rgb_hm_f)) / 2 rgb_wh = (rgb_wh + flip_tensor(rgb_wh_f)) / 2 if self.flow_model_backbone is not None: flow_output = self.flow_model_branch( feature['flow_features'], feature['flow_features_flip']) flow_hm = flow_output[0]['hm'].sigmoid_() flow_wh = flow_output[0]['wh'] flow_mov = flow_output[0]['mov'] if self.opt.flip_test: flow_hm_f = flow_output[1]['hm'].sigmoid_() flow_wh_f = flow_output[1]['wh'] flow_hm = (flow_hm + flip_tensor(flow_hm_f)) / 2 flow_wh = (flow_wh + flip_tensor(flow_wh_f)) / 2 if self.flow_model_backbone is not None and self.rgb_model_backbone is not None: hm = (1 - self.opt.hm_fusion_rgb ) * flow_hm + self.opt.hm_fusion_rgb * rgb_hm wh = (1 - self.opt.wh_fusion_rgb ) * flow_wh + self.opt.wh_fusion_rgb * rgb_wh mov = (1 - self.opt.mov_fusion_rgb ) * flow_mov + self.opt.mov_fusion_rgb * rgb_mov elif self.flow_model_backbone is not None and self.rgb_model_backbone is None: hm = flow_hm wh = flow_wh mov = flow_mov elif self.rgb_model_backbone is not None and self.flow_model_backbone is None: hm = rgb_hm wh = rgb_wh mov = rgb_mov else: print('No model exists.') assert 0 detections = moc_decode(hm, wh, mov, N=self.opt.N, K=self.opt.K) return detections
def process(self, images, flows): with torch.no_grad(): if self.rgb_model is not None: rgb_output = self.rgb_model(images) #rgb_hm = rgb_output[0]['hm'].sigmoid_() rgb_hm = rgb_output[0]['hm'] rgb_wh = rgb_output[0]['wh'] rgb_mov = rgb_output[0]['mov'] # ADDED: one additional loss #rgb_hmc = rgb_output[0]['hmc'] if self.opt.flip_test: rgb_hm_f = rgb_output[1]['hm'].sigmoid_() rgb_wh_f = rgb_output[1]['wh'] rgb_hm = (rgb_hm + flip_tensor(rgb_hm_f)) / 2 rgb_wh = (rgb_wh + flip_tensor(rgb_wh_f)) / 2 if self.flow_model is not None: flow_output = self.flow_model(flows) flow_hm = flow_output[0]['hm'].sigmoid_() flow_wh = flow_output[0]['wh'] flow_mov = flow_output[0]['mov'] if self.opt.flip_test: flow_hm_f = flow_output[1]['hm'].sigmoid_() flow_wh_f = flow_output[1]['wh'] flow_hm = (flow_hm + flip_tensor(flow_hm_f)) / 2 flow_wh = (flow_wh + flip_tensor(flow_wh_f)) / 2 if self.pa_model is not None: pa_output = self.pa_model(flows) pa_hm = pa_output[0]['hm'].sigmoid_() pa_wh = pa_output[0]['wh'] pa_mov = pa_output[0]['mov'] if self.flow_model is not None and self.rgb_model is not None: hm = (1 - self.opt.hm_fusion_rgb ) * flow_hm + self.opt.hm_fusion_rgb * rgb_hm wh = (1 - self.opt.wh_fusion_rgb ) * flow_wh + self.opt.wh_fusion_rgb * rgb_wh mov = (1 - self.opt.mov_fusion_rgb ) * flow_mov + self.opt.mov_fusion_rgb * rgb_mov elif self.flow_model is not None and self.rgb_model is None and self.pa_model is None: hm = flow_hm wh = flow_wh mov = flow_mov elif self.rgb_model is not None and self.flow_model is None and self.pa_model is None: hm = rgb_hm wh = rgb_wh mov = rgb_mov # TODO: two stream for rgb + pa elif self.pa_model is not None and self.rgb_model is not None and self.flow_model is None: hm = (1 - self.opt.hm_fusion_rgb ) * pa_hm + self.opt.hm_fusion_rgb * rgb_hm wh = (1 - self.opt.wh_fusion_rgb ) * pa_wh + self.opt.wh_fusion_rgb * rgb_wh mov = (1 - self.opt.mov_fusion_rgb ) * pa_mov + self.opt.mov_fusion_rgb * rgb_mov elif self.pa_model is not None and self.rgb_model is None and self.flow_model is None: hm = pa_hm wh = pa_wh mov = pa_mov else: print('No model exists.') assert 0 # ADDED: minus mem (only detect on current clip) #mov = None detections = moc_decode(hm, wh, mov, N=self.opt.N, K=self.opt.K - 0) #hm = hm[:,42:63,:,:] #detections = moc_decode_multihm(hm, wh, mov, N=self.opt.N, K=self.opt.K - 0) return detections
def process(self, images, flows, video_tag): with torch.no_grad(): if self.rgb_model_backbone is not None: if video_tag == 0: rgb_features = [ self.rgb_model_backbone(images[i]) for i in range(self.opt.K) ] rgb_features = [ self.rgb_model_deconv(rgb_features[i]) for i in range(self.opt.K) ] self.rgb_buffer = rgb_features if self.opt.flip_test: rgb_features_flip = [ self.rgb_model_backbone(images[i + self.opt.K]) for i in range(self.opt.K) ] rgb_features_flip = [ self.rgb_model_deconv( rgb_features_flip[i + self.opt.K]) for i in range(self.opt.K) ] self.rgb_buffer_flip = rgb_features_flip else: del self.rgb_buffer[0] self.rgb_buffer.append( self.rgb_model_deconv( self.rgb_model_backbone(images[self.opt.K - 1]))) if self.opt.flip_test: del self.rgb_buffer_flip[0] self.rgb_buffer_flip.append( self.rgb_model_deconv( self.rgb_model_backbone(images[-1]))) rgb_output = self.rgb_model_branch(self.rgb_buffer, self.rgb_buffer_flip) #rgb_hm = rgb_output[0]['hm'].sigmoid_() rgb_hm = rgb_output[0]['hm'] rgb_wh = rgb_output[0]['wh'] rgb_mov = rgb_output[0]['mov'] if self.opt.flip_test: rgb_hm_f = rgb_output[1]['hm'].sigmoid_() rgb_wh_f = rgb_output[1]['wh'] rgb_hm = (rgb_hm + flip_tensor(rgb_hm_f)) / 2 rgb_wh = (rgb_wh + flip_tensor(rgb_wh_f)) / 2 if self.flow_model_backbone is not None: if video_tag == 0: flow_features = [ self.flow_model_backbone(flows[i]) for i in range(self.opt.K) ] self.flow_buffer = flow_features if self.opt.flip_test: flow_features_flip = [ self.flow_model_backbone(flows[i + self.opt.K]) for i in range(self.opt.K) ] self.flow_buffer_flip = flow_features_flip else: del self.flow_buffer[0] self.flow_buffer.append( self.flow_model_backbone(flows[self.opt.K - 1])) if self.opt.flip_test: del self.flow_buffer_flip[0] self.flow_buffer_flip.append( self.flow_model_backbone(flows[-1])) flow_output = self.flow_model_branch(self.flow_buffer, self.flow_buffer_flip) flow_hm = flow_output[0]['hm'].sigmoid_() flow_wh = flow_output[0]['wh'] flow_mov = flow_output[0]['mov'] if self.opt.flip_test: flow_hm_f = flow_output[1]['hm'].sigmoid_() flow_wh_f = flow_output[1]['wh'] flow_hm = (flow_hm + flip_tensor(flow_hm_f)) / 2 flow_wh = (flow_wh + flip_tensor(flow_wh_f)) / 2 if self.flow_model_backbone is not None and self.rgb_model_backbone is not None: hm = (1 - self.opt.hm_fusion_rgb ) * flow_hm + self.opt.hm_fusion_rgb * rgb_hm wh = (1 - self.opt.wh_fusion_rgb ) * flow_wh + self.opt.wh_fusion_rgb * rgb_wh mov = (1 - self.opt.mov_fusion_rgb ) * flow_mov + self.opt.mov_fusion_rgb * rgb_mov elif self.flow_model_backbone is not None and self.rgb_model_backbone is None: hm = flow_hm wh = flow_wh mov = flow_mov elif self.rgb_model_backbone is not None and self.flow_model_backbone is None: hm = rgb_hm wh = rgb_wh mov = rgb_mov else: print('No model exists.') assert 0 detections = moc_decode(hm, wh, mov, N=self.opt.N, K=self.opt.K) return detections
def process(self, images, flows, video_tag): with torch.no_grad(): if self.rgb_model_backbone is not None: if video_tag == 0: rgb_features = [ self.rgb_model_backbone(images[i]) for i in range(self.opt.K) ] rgb_features = [ self.rgb_model_deconv(rgb_features[i]) for i in range(self.opt.K) ] self.rgb_buffer = rgb_features if self.opt.flip_test: rgb_features_flip = [ self.rgb_model_backbone(images[i + self.opt.K]) for i in range(self.opt.K) ] rgb_features_flip = [ self.rgb_model_deconv( rgb_features_flip[i + self.opt.K]) for i in range(self.opt.K) ] self.rgb_buffer_flip = rgb_features_flip else: del self.rgb_buffer[0] self.rgb_buffer.append( self.rgb_model_deconv( self.rgb_model_backbone(images[self.opt.K - 1]))) if self.opt.flip_test: del self.rgb_buffer_flip[0] self.rgb_buffer_flip.append( self.rgb_model_deconv( self.rgb_model_backbone(images[-1]))) rgb_output = self.rgb_model_branch(self.rgb_buffer, self.rgb_buffer_flip) #rgb_hm = rgb_output[0]['hm'].sigmoid_() rgb_hm = rgb_output[0]['hm'] rgb_wh = rgb_output[0]['wh'] rgb_mov = rgb_output[0]['mov'] if self.opt.flip_test: rgb_hm_f = rgb_output[1]['hm'].sigmoid_() rgb_wh_f = rgb_output[1]['wh'] rgb_hm = (rgb_hm + flip_tensor(rgb_hm_f)) / 2 rgb_wh = (rgb_wh + flip_tensor(rgb_wh_f)) / 2 if self.flow_model_backbone is not None: if video_tag == 0: flow_features = [ self.flow_model_backbone(flows[i]) for i in range(self.opt.K) ] self.flow_buffer = flow_features if self.opt.flip_test: flow_features_flip = [ self.flow_model_backbone(flows[i + self.opt.K]) for i in range(self.opt.K) ] self.flow_buffer_flip = flow_features_flip else: del self.flow_buffer[0] self.flow_buffer.append( self.flow_model_backbone(flows[self.opt.K - 1])) if self.opt.flip_test: del self.flow_buffer_flip[0] self.flow_buffer_flip.append( self.flow_model_backbone(flows[-1])) flow_output = self.flow_model_branch(self.flow_buffer, self.flow_buffer_flip) flow_hm = flow_output[0]['hm'].sigmoid_() flow_wh = flow_output[0]['wh'] flow_mov = flow_output[0]['mov'] if self.opt.flip_test: flow_hm_f = flow_output[1]['hm'].sigmoid_() flow_wh_f = flow_output[1]['wh'] flow_hm = (flow_hm + flip_tensor(flow_hm_f)) / 2 flow_wh = (flow_wh + flip_tensor(flow_wh_f)) / 2 if self.pa_model_backbone is not None: # added to allow reshape bb, cc, hh, ww = flows[0].size() if video_tag == 0: flows = torch.cat(flows, dim=0) flows_pa, flows_rgb = self.pa_model_PA(flows) pa_features = self.pa_model_backbone(flows_rgb, flows_pa) pa_features = self.pa_model_deconv(pa_features) self.pa_buffer = pa_features ''' not used hence NOT IMPLEMENTED for now! if self.opt.flip_test: rgb_features_flip = [self.rgb_model_backbone(images[i + self.opt.K]) for i in range(self.opt.K)] rgb_features_flip = [self.rgb_model_deconv(rgb_features_flip[i + self.opt.K]) for i in range(self.opt.K)] self.rgb_buffer_flip = rgb_features_flip ''' else: # when self.pa_buffer is a list #del self.pa_buffer[0] # del does not work here cus self.pa_buffer is no longer a list #self.pa_buffer.append(self.pa_model_deconv(self.pa_model_PA(flows.view(-1, cc, hh, ww)))) flow_cur_clip = flows[-1] flows_pa, flows_rgb = self.pa_model_PA(flow_cur_clip) pa_buffer_clip = self.pa_model_backbone( flows_rgb, flows_pa) pa_buffer_clip = self.pa_model_deconv(pa_buffer_clip) self.pa_buffer = torch.cat( (self.pa_buffer[1:], pa_buffer_clip), dim=0) ''' not used hence NOT IMPLEMENTED for now! if self.opt.flip_test: del self.rgb_buffer_flip[0] self.rgb_buffer_flip.append(self.rgb_model_deconv(self.rgb_model_backbone(images[-1]))) ''' pa_output = self.pa_model_branch(self.pa_buffer) #rgb_hm = rgb_output[0]['hm'].sigmoid_() pa_hm = pa_output[0]['hm'] pa_wh = pa_output[0]['wh'] pa_mov = pa_output[0]['mov'] if self.opt.flip_test: rgb_hm_f = rgb_output[1]['hm'].sigmoid_() rgb_wh_f = rgb_output[1]['wh'] rgb_hm = (rgb_hm + flip_tensor(rgb_hm_f)) / 2 rgb_wh = (rgb_wh + flip_tensor(rgb_wh_f)) / 2 if self.flow_model_backbone is not None and self.rgb_model_backbone is not None: hm = (1 - self.opt.hm_fusion_rgb ) * flow_hm + self.opt.hm_fusion_rgb * rgb_hm wh = (1 - self.opt.wh_fusion_rgb ) * flow_wh + self.opt.wh_fusion_rgb * rgb_wh mov = (1 - self.opt.mov_fusion_rgb ) * flow_mov + self.opt.mov_fusion_rgb * rgb_mov elif self.flow_model_backbone is not None and self.rgb_model_backbone is None: hm = flow_hm wh = flow_wh mov = flow_mov elif self.rgb_model_backbone is not None and self.flow_model_backbone is None: hm = rgb_hm wh = rgb_wh mov = rgb_mov elif self.pa_model_backbone is not None and self.rgb_model_backbone is None and self.flow_model_backbone is None: hm = pa_hm wh = pa_wh mov = pa_mov else: print('No model exists.') assert 0 detections = moc_decode(hm, wh, mov, N=self.opt.N, K=self.opt.K) return detections # size: (1, 100, 18): last dim == 4K + 1 + 1 (box, score, cls)