def classify(self, video_name, model_mask=None, verbose=False): """ Input a file on harddisk Args: filename: Returns: cls: classification scores frm_scores: frame-wise classification scores """ video_idx = self.__data_manager.get_video_idx_by_name(video_name) rgb_frm_it = self.__data_manager.vidoe_frame_iterator(video_idx, frame_type=0, batch_size=1, step=5) flow_frm_it = None if self.__need_flow: flow_frm_it = self.__data_manager.vidoe_frame_iterator( video_idx, frame_type=3, batch_size=10, step=1) all_scores = [] all_start = time.clock() cnt = 0 # process model mask mask = [True] * self.__num_net n_model = self.__num_net if model_mask is not None: for i in range(len(model_mask)): mask[i] = model_mask[i] if not mask[i]: n_model -= 1 for rgb_stack in rgb_frm_it: start = time.clock() cnt += 1 frm_scores = [] flow_stack = None if self.__need_flow: assert (flow_frm_it is not None) flow_stack = flow_frm_it.next() if len( flow_stack ) < 10: # the spatial net's input channel is 10, so discard the stack continue for net, run, in_type, data_aug in zip(self.__net_vec, mask, self.__input_type, self.__data_aug): if not run: continue if in_type == 0: # RGB input frm_scores.append( net.predict_single_frame(rgb_stack[:1], self.__score_name, over_sample=data_aug)) elif in_type == 1: # Flow input assert (flow_stack is not None) frm_scores.append( net.predict_single_flow_stack(flow_stack, self.__score_name, over_sample=data_aug)) all_scores.append(frm_scores) end = time.clock() elapsed = end - start if verbose: print("frame sample {}: {} second".format(cnt, elapsed)) if len( all_scores ) == 0: # all_score is of size (#model, #frames, #models, #classes) if verbose: print('warning: no frames found for ' + video_name) return None, None, None, None # aggregate frame-wise scores model_scores = [] for i in range(n_model): model_scores.append( sliding_window_aggregation_func(np.array( [x[i] for x in all_scores]), norm=False)) final_scores = default_fusion_func( np.zeros_like(model_scores[0]), model_scores, [w for w, m in zip(self.__net_weights, mask) if m]) all_end = time.clock() total_time = all_end - all_start if verbose: print("total time: {} second".format(total_time)) return final_scores, model_scores, all_scores, total_time
def _classify_from_file(self, filename, model_mask, cache_manager=None): """ Input a file on harddisk Args: filename: cache: cache intermediate results and use previously cached intermediate result is possible Returns: cls: classification scores frm_scores: frame-wise classification scores """ vid_info = _dummy_vid_info() vid_info.path = filename video_proc = VideoProc(vid_info) video_proc.open_video(True) # here we use interval of 30, roughly 1FPS frm_it = None cached_flow = None if cache_manager is not None: frm_it = cache_manager.load(videoname=filename, type="framestack") cached_flow = cache_manager.load(videoname=filename, type="flowstack") if frm_it is None: frm_it = video_proc.frame_iter(timely=False, ignore_err=True, interval=30, length=6 if self.__need_flow else 1, new_size=(340, 256)) all_scores = [] all_start = time.clock() # process model mask mask = [True] * self.__num_net n_model = self.__num_net if model_mask is not None: for i in xrange(len(model_mask)): mask[i] = model_mask[i] if not mask[i]: n_model -= 1 frame_cache = [] flow_cache = [] cnt = 0 for frm_stack in frm_it: if cache_manager is not None: frame_cache.append(frm_stack) start = time.clock() cnt += 1 frm_scores = [] flow_stack = None for net, run, in_type, conv_support, net_input_size in \ zip(self.__net_vec, mask, self.__input_type, self.__conv_support, self.__input_size): if not run: continue frame_size = (340 * net_input_size / 224, 256 * net_input_size / 224) if in_type == 0: # RGB input frm_scores.append( net.predict_single_frame( frm_stack[:1], self.__score_name, over_sample=not conv_support, frame_size=None if net_input_size == 224 else frame_size)) elif in_type == 1: # Flow input if flow_stack is None: # Extract flow if necessary if cached_flow is not None: flow_stack = cached_flow[cnt - 1] else: flow_stack = self.__flow_extractor.extract_flow( frm_stack, frame_size) if cache_manager is not None: flow_cache.append(flow_stack) frm_scores.append( net.predict_single_flow_stack( flow_stack, self.__score_name, over_sample=not conv_support)) all_scores.append(frm_scores) end = time.clock() elapsed = end - start # print "frame sample {}: {} second".format(cnt, elapsed) if cache_manager is not None: if len(frame_cache) != 0: cache_manager.dump(frame_cache, filename, "framestack") if len(flow_cache) != 0: cache_manager.dump(flow_cache, filename, "flowstack") # aggregate frame-wise scores agg_scores = [] for i in xrange(n_model): model_scores = sliding_window_aggregation_func(np.array( [x[i] for x in all_scores]), norm=False) agg_scores.append(model_scores) final_scores = default_fusion_func( np.zeros_like(agg_scores[0]), agg_scores, [w for w, m in zip(self.__net_weights, mask) if m]) all_end = time.clock() total_time = all_end - all_start # print "total time: {} second".format(total_time) print('{0} processed.'.format(filename)) return final_scores, all_scores, total_time
def _classify_from_file(self, filename, model_mask): """ Input a file on harddisk Args: filename: Returns: cls: classification scores frm_scores: frame-wise classification scores """ vid_info = _dummy_vid_info() vid_info.path = filename video_proc = VideoProc(vid_info) video_proc.open_video(True) # here we use interval of 30, roughly 1FPS frm_it = video_proc.frame_iter(timely=False, ignore_err=True, interval=30, length=6 if self.__need_flow else 1, new_size=(340, 256)) all_scores = [] all_start = time.clock() cnt = 0 # process model mask mask = [True] * self.__num_net n_model = self.__num_net if model_mask is not None: for i in xrange(len(model_mask)): mask[i] = model_mask[i] if not mask[i]: n_model -= 1 for frm_stack in frm_it: start = time.clock() cnt += 1 frm_scores = [] flow_stack = None for net, run, in_type, conv_support, net_input_size in \ zip(self.__net_vec, mask, self.__input_type, self.__conv_support, self.__input_size): if not run: continue frame_size = (340 * net_input_size / 224, 256 * net_input_size / 224) if in_type == 0: # RGB input frm_scores.append(net.predict_single_frame(frm_stack[:1], self.__score_name, over_sample=not conv_support, frame_size=None if net_input_size == 224 else frame_size )) elif in_type == 1: # Flow input if flow_stack is None: # Extract flow if necessary flow_stack = self.__flow_extractor.extract_flow(frm_stack, frame_size) frm_scores.append(net.predict_single_flow_stack(flow_stack, self.__score_name, over_sample=not conv_support)) all_scores.append(frm_scores) end = time.clock() elapsed = end - start print "frame sample {}: {} second".format(cnt, elapsed) # aggregate frame-wise scores agg_scores = [] for i in xrange(n_model): model_scores = sliding_window_aggregation_func(np.array([x[i] for x in all_scores]), norm=False) agg_scores.append(model_scores) final_scores = default_fusion_func(np.zeros_like(agg_scores[0]), agg_scores, [w for w, m in zip(self.__net_weights, mask) if m]) all_end = time.clock() total_time = all_end - all_start print "total time: {} second".format(total_time) return final_scores, all_scores, total_time