def get_person_boxes(cfg, object_predictor, mid_frame, frame_provider): outputs = object_predictor(mid_frame) fields = outputs["instances"]._fields pred_classes = fields["pred_classes"] selection_mask = pred_classes == 0 # acquire person boxes pred_classes = pred_classes[selection_mask] pred_boxes = fields["pred_boxes"].tensor[selection_mask] scores = fields["scores"][selection_mask] boxes = cv2_transform.scale_boxes(cfg.DATA.TEST_CROP_SIZE, pred_boxes, frame_provider.display_height, frame_provider.display_width) boxes = torch.cat( [torch.full((boxes.shape[0], 1), float(0)).cuda(), boxes], axis=1) return boxes, scores
def obj_detect(self, mid_frame): outputs = self.object_predictor(mid_frame) fields = outputs["instances"]._fields pred_classes = fields["pred_classes"] selection_mask = pred_classes == 0 # acquire person boxes # pred_classes = pred_classes[selection_mask] pred_boxes = fields["pred_boxes"].tensor[selection_mask] # scores = fields["scores"][selection_mask] boxes = cv2_transform.scale_boxes(self.cfg.DATA.TEST_CROP_SIZE, pred_boxes, self.frame_provider.display_height, self.frame_provider.display_width) boxes = torch.cat( [torch.full((boxes.shape[0], 1), float(0)).cuda(), boxes], axis=1) # return boxes self.queue_demo.put(boxes)
def detector(object_predictor , image, backbone, cfg , display_height, display_width ): if backbone == 'yolo': boxes = object_predictor.detect_image(image) boxes = torch.as_tensor(boxes).float().cuda() return boxes else: outputs = object_predictor(image) fields = outputs["instances"]._fields pred_classes = fields["pred_classes"] selection_mask = pred_classes == 0 # acquire person boxes pred_classes = pred_classes[selection_mask] pred_boxes = fields["pred_boxes"].tensor[selection_mask] scores = fields["scores"][selection_mask] boxes = cv2_transform.scale_boxes(cfg.DATA.TEST_CROP_SIZE, pred_boxes, display_height, display_width) return boxes
def _images_and_boxes_preprocessing_cv2(self, imgs, boxes): """ This function performs preprocessing for the input images and corresponding boxes for one clip with opencv as backend. Args: imgs (tensor): the images. boxes (ndarray): the boxes for the current clip. Returns: imgs (tensor): list of preprocessed images. boxes (ndarray): preprocessed boxes. """ height, width, _ = imgs[0].shape boxes[:, [0, 2]] *= width boxes[:, [1, 3]] *= height boxes = cv2_transform.clip_boxes_to_image(boxes, height, width) # `transform.py` is list of np.array. However, for AVA, we only have # one np.array. boxes = [boxes] # The image now is in HWC, BGR format. if self._split == "train": # "train" imgs, boxes = cv2_transform.random_short_side_scale_jitter_list( imgs, min_size=self._jitter_min_scale, max_size=self._jitter_max_scale, boxes=boxes, ) imgs, boxes = cv2_transform.random_crop_list(imgs, self._crop_size, order="HWC", boxes=boxes) # random flip imgs, boxes = cv2_transform.horizontal_flip_list(0.5, imgs, order="HWC", boxes=boxes) elif self._split == "val": # Short side to test_scale. Non-local and STRG uses 256. imgs = [cv2_transform.scale(self._crop_size, img) for img in imgs] boxes = [ cv2_transform.scale_boxes(self._crop_size, boxes[0], height, width) ] imgs, boxes = cv2_transform.spatial_shift_crop_list( self._crop_size, imgs, 1, boxes=boxes) if self._test_force_flip: imgs, boxes = cv2_transform.horizontal_flip_list(1, imgs, order="HWC", boxes=boxes) elif self._split == "test": # Short side to test_scale. Non-local and STRG uses 256. imgs = [cv2_transform.scale(self._crop_size, img) for img in imgs] boxes = [ cv2_transform.scale_boxes(self._crop_size, boxes[0], height, width) ] if self._test_force_flip: imgs, boxes = cv2_transform.horizontal_flip_list(1, imgs, order="HWC", boxes=boxes) else: raise NotImplementedError("Unsupported split mode {}".format( self._split)) # Convert image to CHW keeping BGR order. imgs = [cv2_transform.HWC2CHW(img) for img in imgs] # Image [0, 255] -> [0, 1]. imgs = [img / 255.0 for img in imgs] imgs = [ np.ascontiguousarray( # img.reshape((3, self._crop_size, self._crop_size)) img.reshape((3, imgs[0].shape[1], imgs[0].shape[2]) )).astype(np.float32) for img in imgs ] # Do color augmentation (after divided by 255.0). if self._split == "train" and self._use_color_augmentation: if not self._pca_jitter_only: imgs = cv2_transform.color_jitter_list( imgs, img_brightness=0.4, img_contrast=0.4, img_saturation=0.4, ) imgs = cv2_transform.lighting_list( imgs, alphastd=0.1, eigval=np.array(self._pca_eigval).astype(np.float32), eigvec=np.array(self._pca_eigvec).astype(np.float32), ) # Normalize images by mean and std. imgs = [ cv2_transform.color_normalization( img, np.array(self._data_mean, dtype=np.float32), np.array(self._data_std, dtype=np.float32), ) for img in imgs ] # Concat list of images to single ndarray. imgs = np.concatenate([np.expand_dims(img, axis=1) for img in imgs], axis=1) if not self._use_bgr: # Convert image format from BGR to RGB. imgs = imgs[::-1, ...] imgs = np.ascontiguousarray(imgs) imgs = torch.from_numpy(imgs) boxes = cv2_transform.clip_boxes_to_image( # boxes[0], self._crop_size, self._crop_size. boxes[0], imgs[0].shape[1], imgs[0].shape[2], ) return imgs, boxes
def demo(cfg): # Set random seed from configs. np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) # Setup logging format. logging.setup_logging() # Print config. logger.info("Run demo with config:") logger.info(cfg) # Build the video model and print model statistics. model = model_builder.build_model(cfg) model.eval() misc.log_model_info(model) # Load a checkpoint to test if applicable. if cfg.TEST.CHECKPOINT_FILE_PATH != "": ckpt = cfg.TEST.CHECKPOINT_FILE_PATH elif cu.has_checkpoint(cfg.OUTPUT_DIR): ckpt = cu.get_last_checkpoint(cfg.OUTPUT_DIR) elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "": # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current # checkpoint folder, try to load checkpoint from # TRAIN.CHECKPOINT_FILE_PATH and test it. ckpt = cfg.TRAIN.CHECKPOINT_FILE_PATH else: raise NotImplementedError("Unknown way to load checkpoint.") cu.load_checkpoint( ckpt, model, cfg.NUM_GPUS > 1, None, inflation=False, convert_from_caffe2="caffe2" in [cfg.TEST.CHECKPOINT_TYPE, cfg.TRAIN.CHECKPOINT_TYPE], ) if cfg.DETECTION.ENABLE: # Load object detector from detectron2 dtron2_cfg_file = cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_CFG dtron2_cfg = get_cfg() dtron2_cfg.merge_from_file(model_zoo.get_config_file(dtron2_cfg_file)) dtron2_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5 dtron2_cfg.MODEL.WEIGHTS = cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_WEIGHTS object_predictor = DefaultPredictor(dtron2_cfg) # Load the labels of AVA dataset with open(cfg.DEMO.LABEL_FILE_PATH) as f: labels = f.read().split('\n')[:-1] palette = np.random.randint(64, 128, (len(labels), 3)).tolist() boxes = [] else: # Load the labels of Kinectics-400 dataset labels_df = pd.read_csv(cfg.DEMO.LABEL_FILE_PATH) labels = labels_df['name'].values frame_provider = VideoReader(cfg) seq_len = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE frames = [] pred_labels = [] s = 0. for able_to_read, frame in frame_provider: if not able_to_read: # when reaches the end frame, clear the buffer and continue to the next one. frames = [] continue if len(frames) != seq_len: frame_processed = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_processed = scale(cfg.DATA.TEST_CROP_SIZE, frame_processed) frames.append(frame_processed) if cfg.DETECTION.ENABLE and len(frames) == seq_len // 2 - 1: mid_frame = frame if len(frames) == seq_len: start = time() if cfg.DETECTION.ENABLE: outputs = object_predictor(mid_frame) fields = outputs["instances"]._fields pred_classes = fields["pred_classes"] selection_mask = pred_classes == 0 # acquire person boxes pred_classes = pred_classes[selection_mask] pred_boxes = fields["pred_boxes"].tensor[selection_mask] scores = fields["scores"][selection_mask] boxes = cv2_transform.scale_boxes( cfg.DATA.TEST_CROP_SIZE, pred_boxes, frame_provider.display_height, frame_provider.display_width) boxes = torch.cat( [torch.full((boxes.shape[0], 1), float(0)).cuda(), boxes], axis=1) inputs = torch.as_tensor(frames).float() inputs = inputs / 255.0 # Perform color normalization. inputs = inputs - torch.tensor(cfg.DATA.MEAN) inputs = inputs / torch.tensor(cfg.DATA.STD) # T H W C -> C T H W. inputs = inputs.permute(3, 0, 1, 2) # 1 C T H W. inputs = inputs.unsqueeze(0) # Sample frames for the fast pathway. index = torch.linspace(0, inputs.shape[2] - 1, cfg.DATA.NUM_FRAMES).long() fast_pathway = torch.index_select(inputs, 2, index) # logger.info('fast_pathway.shape={}'.format(fast_pathway.shape)) # Sample frames for the slow pathway. index = torch.linspace(0, fast_pathway.shape[2] - 1, fast_pathway.shape[2] // cfg.SLOWFAST.ALPHA).long() slow_pathway = torch.index_select(fast_pathway, 2, index) # logger.info('slow_pathway.shape={}'.format(slow_pathway.shape)) inputs = [slow_pathway, fast_pathway] """ # Transfer the data to the current GPU device. if isinstance(inputs, (list,)): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) """ # Perform the forward pass. if cfg.DETECTION.ENABLE: # When there is nothing in the scene, # use a dummy variable to disable all computations below. if not len(boxes): preds = torch.tensor([]) else: preds = model(inputs, boxes) else: preds = model(inputs) # Gather all the predictions across all the devices to perform ensemble. if cfg.NUM_GPUS > 1: preds = du.all_gather(preds)[0] if cfg.DETECTION.ENABLE: # This post processing was intendedly assigned to the cpu since my laptop GPU # RTX 2080 runs out of its memory, if your GPU is more powerful, I'd recommend # to change this section to make CUDA does the processing. preds = preds.cpu().detach().numpy() pred_masks = preds > .1 label_ids = [ np.nonzero(pred_mask)[0] for pred_mask in pred_masks ] pred_labels = [[ labels[label_id] for label_id in perbox_label_ids ] for perbox_label_ids in label_ids] # I'm unsure how to detectron2 rescales boxes to image original size, so I use # input boxes of slowfast and rescale back it instead, it's safer and even if boxes # was not rescaled by cv2_transform.rescale_boxes, it still works. boxes = boxes.cpu().detach().numpy() ratio = np.min([ frame_provider.display_height, frame_provider.display_width ]) / cfg.DATA.TEST_CROP_SIZE boxes = boxes[:, 1:] * ratio else: ## Option 1: single label inference selected from the highest probability entry. # label_id = preds.argmax(-1).cpu() # pred_label = labels[label_id] # Option 2: multi-label inferencing selected from probability entries > threshold label_ids = torch.nonzero( preds.squeeze() > .1).reshape(-1).cpu().detach().numpy() pred_labels = labels[label_ids] logger.info(pred_labels) if not list(pred_labels): pred_labels = ['Unknown'] # # option 1: remove the oldest frame in the buffer to make place for the new one. # frames.pop(0) # option 2: empty the buffer frames = [] s = time() - start if cfg.DETECTION.ENABLE and pred_labels and boxes.any(): for box, box_labels in zip(boxes.astype(int), pred_labels): cv2.rectangle(frame, tuple(box[:2]), tuple(box[2:]), (0, 255, 0), thickness=2) label_origin = box[:2] for label in box_labels: label_origin[-1] -= 5 (label_width, label_height), _ = cv2.getTextSize( label, cv2.FONT_HERSHEY_SIMPLEX, .5, 2) cv2.rectangle(frame, (label_origin[0], label_origin[1] + 5), (label_origin[0] + label_width, label_origin[1] - label_height - 5), palette[labels.index(label)], -1) cv2.putText(frame, label, tuple(label_origin), cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 255, 255), 1) label_origin[-1] -= label_height + 5 if not cfg.DETECTION.ENABLE: # Display predicted labels to frame. y_offset = 50 cv2.putText(frame, 'Action:', (10, y_offset), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=.65, color=(0, 235, 0), thickness=2) for pred_label in pred_labels: y_offset += 30 cv2.putText(frame, '{}'.format(pred_label), (20, y_offset), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=.65, color=(0, 235, 0), thickness=2) # Display prediction speed cv2.putText(frame, 'Speed: {:.2f}s'.format(s), (10, 25), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=.65, color=(0, 235, 0), thickness=2) # Display the frame cv2.imshow('SlowFast', frame) # hit Esc to quit the demo. key = cv2.waitKey(1) if key == 27: break frame_provider.clean()
def get_predictions(self): """ Predict and append prediction results to each box in each keyframe in `self.pred_boxes` dictionary. """ # Set random seed from configs. np.random.seed(self.cfg.RNG_SEED) torch.manual_seed(self.cfg.RNG_SEED) # Setup logging format. logging.setup_logging(self.cfg.OUTPUT_DIR) # Print config. logger.info("Run demo with config:") logger.info(self.cfg) assert (self.cfg.NUM_GPUS <= 1), "Cannot run demo visualization on multiple GPUs." # Build the video model and print model statistics. model = build_model(self.cfg) model.eval() logger.info("Start loading model info") misc.log_model_info(model, self.cfg, use_train_input=False) logger.info("Start loading model weights") cu.load_test_checkpoint(self.cfg, model) logger.info("Finish loading model weights") logger.info("Start making predictions for precomputed boxes.") for keyframe_idx, boxes_and_labels in tqdm.tqdm( self.pred_boxes.items()): inputs = self.get_input_clip(keyframe_idx) boxes = boxes_and_labels[0] boxes = torch.from_numpy(np.array(boxes)).float() box_transformed = scale_boxes( self.cfg.DATA.TEST_CROP_SIZE, boxes, self.display_height, self.display_width, ) # Pad frame index for each box. box_inputs = torch.cat( [ torch.full((box_transformed.shape[0], 1), float(0)), box_transformed, ], axis=1, ) if self.cfg.NUM_GPUS: # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) box_inputs = box_inputs.cuda() preds = model(inputs, box_inputs) preds = preds.detach() if self.cfg.NUM_GPUS: preds = preds.cpu() boxes_and_labels[1] = preds
def __call__(self, task): """ Returns the prediction results for the current task. Args: task (TaskInfo object): task object that contain the necessary information for action prediction. (e.g. frames, boxes) Returns: task (TaskInfo object): the same task info object but filled with prediction values (a tensor) and the corresponding boxes for action detection task. """ if self.cfg.DETECTION.ENABLE: task = self.object_detector(task) frames, bboxes = task.frames, task.bboxes if bboxes is not None: bboxes = cv2_transform.scale_boxes( self.cfg.DATA.TEST_CROP_SIZE, bboxes, task.img_height, task.img_width, ) if self.cfg.DEMO.INPUT_FORMAT == "BGR": frames = [ cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in frames ] frames = [ cv2_transform.scale(self.cfg.DATA.TEST_CROP_SIZE, frame) for frame in frames ] inputs = process_cv2_inputs(frames, self.cfg) if bboxes is not None: index_pad = torch.full( size=(bboxes.shape[0], 1), fill_value=float(0), device=bboxes.device, ) # Pad frame index for each box. bboxes = torch.cat([index_pad, bboxes], axis=1) if self.cfg.NUM_GPUS > 0: # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(device=torch.device( self.gpu_id), non_blocking=True) else: inputs = inputs.cuda(device=torch.device(self.gpu_id), non_blocking=True) if self.cfg.DETECTION.ENABLE and not bboxes.shape[0]: preds = torch.tensor([]) else: preds = self.model(inputs, bboxes) if self.cfg.NUM_GPUS: preds = preds.cpu() if bboxes is not None: bboxes = bboxes.detach().cpu() preds = preds.detach() task.add_action_preds(preds) if bboxes is not None: task.add_bboxes(bboxes[:, 1:]) return task
def images_and_boxes_preprocessing_cv2(self, imgs, boxes): """ This function performs preprocessing for the input images and corresponding boxes for one clip with opencv as backend. Args: imgs (list of ndarrays with len num_frames): the images. Each image is a ndarray with shape (H, W, C) boxes (ndarray): the boxes for the current clip - not normalized. shape (num_boxes, 4 = x1, y1, x2, y2) Returns: imgs (tensor): list of preprocessed images. shape: (C, num_frames, H, W) boxes (ndarray): preprocessed boxes. shape (num_boxes, 4 = x1, y1, x2, y2) """ # Assure that boxes have the right size boxes = cv2_transform.clip_boxes_to_image(boxes, self.img_height, self.img_width) # `transform.py` is list of np.array. However, for AVA like structure, we only have # one np.array. boxes = [boxes] # The image now is in HWC, BGR format. # Short side to test_scale. Non-local and STRG uses 256. imgs = [cv2_transform.scale(self.crop_size, img) for img in imgs] # Boxes have to be adjusted to new image scale boxes = [ cv2_transform.scale_boxes(self.crop_size, boxes[0], self.img_height, self.img_width) ] # Convert image to CHW keeping BGR order. imgs = [cv2_transform.HWC2CHW(img) for img in imgs] # Image [0, 255] -> [0, 1]. imgs = [img / 255.0 for img in imgs] imgs = [ np.ascontiguousarray( img.reshape((3, imgs[0].shape[1], imgs[0].shape[2]))).astype(np.float32) for img in imgs ] # Normalize images by mean and std. imgs = [ cv2_transform.color_normalization( img, np.array(self.data_mean, dtype=np.float32), np.array(self.data_std, dtype=np.float32), ) for img in imgs ] # Concat list of images to single ndarray. imgs = np.concatenate([np.expand_dims(img, axis=1) for img in imgs], axis=1) if not self.use_bgr: # Convert image format from BGR to RGB. # Note that Kinetics pre-training uses RGB! imgs = imgs[::-1, ...] imgs = np.ascontiguousarray(imgs) imgs = torch.from_numpy(imgs) boxes = cv2_transform.clip_boxes_to_image(boxes[0], imgs[0].shape[1], imgs[0].shape[2]) # If you are interested to see, how the images look like, you can activate this # export_image(cfg, imgs.permute(1, 0, 2, 3).data.numpy(), [boxes], "demo", "CHW", True, use_bgr) return imgs, boxes
def __call__(self, task): """ Returns the prediction results for the current task. Args: task (TaskInfo object): task object that contain the necessary information for action prediction. (e.g. frames, boxes) Returns: task (TaskInfo object): the same task info object but filled with prediction values (a tensor) and the corresponding boxes for action detection task. """ # * ------ 1. first stage : starting detection ----------------------*/ if self.cfg.DETECTION.ENABLE: task = self.object_detector(task) # * ------ 2. Second stage : starting recognition ----------------------*/ frames, bboxes = task.frames, task.bboxes ################################################################################################################ from slowfast.datasets.utils import pack_pathway_output, tensor_normalize from torchvision import transforms from PIL import Image if self.cfg.DEMO.INPUT_FORMAT == "BGR": frames = [ cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in frames ] inputs1 = [] inputs0 = [] cv2_transform.lineSpace(0, 63, 32, frames, inputs1) cv2_transform.lineSpace(0, 31, 8, inputs1, inputs0) inputs0 = [ cv2_transform.scale(self.cfg.DATA.TEST_CROP_SIZE, frame) for frame in inputs0 ] inputs1 = [ cv2_transform.scale(self.cfg.DATA.TEST_CROP_SIZE, frame) for frame in inputs1 ] inputs0 = torch.from_numpy(np.array(inputs0)).float() / 255 inputs1 = torch.from_numpy(np.array(inputs1)).float() / 255 inputs0 = tensor_normalize(inputs0, self.cfg.DATA.MEAN, self.cfg.DATA.STD) inputs1 = tensor_normalize(inputs1, self.cfg.DATA.MEAN, self.cfg.DATA.STD) # T H W C -> C T H W. inputs0 = inputs0.permute(3, 0, 1, 2) inputs1 = inputs1.permute(3, 0, 1, 2) inputs0 = inputs0.unsqueeze(0) inputs1 = inputs1.unsqueeze(0) inputs = [inputs0, inputs1] ############################################################################################################### if bboxes is not None: bboxes = cv2_transform.scale_boxes( self.cfg.DATA.TEST_CROP_SIZE, bboxes, task.img_height, task.img_width, ) # if self.cfg.DEMO.INPUT_FORMAT == "BGR": # frames = [ # cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in frames # ] # frames = [ # cv2_transform.scale(self.cfg.DATA.TEST_CROP_SIZE, frame) # for frame in frames # ] # change frames to slowfast inputs # inputs = process_cv2_inputs(frames, self.cfg) # add person cls to bbox if bboxes is not None: index_pad = torch.full( size=(bboxes.shape[0], 1), fill_value=float(0), device=bboxes.device, ) # Pad frame index for each box. bboxes = torch.cat([index_pad, bboxes], axis=1) if self.cfg.NUM_GPUS > 0: # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(device=torch.device( self.gpu_id), non_blocking=True) else: inputs = inputs.cuda(device=torch.device(self.gpu_id), non_blocking=True) if self.cfg.DETECTION.ENABLE and not bboxes.shape[0]: preds = torch.tensor([]) else: # change {1,3,8,224,224]->[8,3,224,224] bboxes = bboxes.unsqueeze(0).unsqueeze(0) inputs[0] = inputs[0].squeeze(0).permute(1, 0, 2, 3) inputs[1] = inputs[1].squeeze(0).permute(1, 0, 2, 3) ########################################################## import numpy numpy.set_printoptions(suppress=True) # import scipy.io as io # inputs0 = inputs[0].squeeze(0).permute( # 1, 0, 2, 3)[0].permute(1, 2, 0).data.cpu().numpy() # cv2.imwrite("1.jpg", np.array( # inputs0*255, dtype=np.float32)) # dtype=np.uint8 # print(inputs0) # numpy.save("input0.npy", inputs0) # result0 = numpy.array(inputs0.reshape(-1, 1)) # numpy.savetxt("result0.txt", result0) # io.savemat("save.mat", {"result0": result0}) ####################### save .txt file ############################ # result0 = numpy.array( # inputs[0].cpu().reshape(-1, 1)).astype(np.float32) # # result0 = result0.astype('float') # # for i in range(10): # # print(result0[i]) # # exit(0) # result0.astype('float32').tofile("input0.txt") # result1 = numpy.array( # inputs[1].cpu().reshape(-1, 1)).astype(np.float32) # result1.astype('float32').tofile("input1.txt") # result0 = numpy.array( # bboxes.cpu().reshape(-1, 1)).astype(np.float32) # result0.astype('float32').tofile("input2.txt") ##################################### save .npy file ################### # numpy.save("input0.npy", inputs[0].cpu().numpy()) # numpy.save("input1.npy", inputs[1].cpu().numpy()) # numpy.save("input2.npy", bboxes.cpu().numpy()) # input0 = torch.from_numpy(np.load("input0.npy")).cuda() # input1 = torch.from_numpy(np.load("input1.npy")).cuda() # input2 = torch.from_numpy(np.load("input2.npy")).cuda() ########################################################## preds = self.model(inputs, bboxes) # preds = self.model([input0, input1], input2) # result_pred = numpy.array(preds.detach().cpu().reshape(-1, 1)) # numpy.savetxt("result_preds.txt", result_pred) print(preds) exit(0) #***************************** open with video test ########################## bboxes = bboxes.squeeze(0).squeeze(0) # change[1,1,3,5] -->[3,5] #***************************** open with video test end ########################## if self.cfg.NUM_GPUS: preds = preds.cpu() if bboxes is not None: bboxes = bboxes.detach().cpu() preds = preds.detach() task.add_action_preds(preds) if bboxes is not None: task.add_bboxes(bboxes[:, 1:]) return task