def __read_image_preprocessed(image_path): # read image as rgb, channel first, cropped, and transformed img = cv2.imread(image_path) img = img[:, :, (2, 1, 0)] img = image_utils.resize_crop(img) img = img.astype(np.float32) / 255.0 img[:, :] -= RGB_MEAN img[:, :] /= RGB_STD img = np.transpose(img, (2, 0, 1)) return img
def __pre_process_for_charades(img): __img_mean = [0.485, 0.456, 0.406] __img_std = [0.229, 0.224, 0.225] img = image_utils.resize_crop(img) img = img.astype(np.float32) img /= float(255) img = img[:, :, (2, 1, 0)] img[:, :, 0] = (img[:, :, 0] - __img_mean[0]) / __img_std[0] img[:, :, 1] = (img[:, :, 1] - __img_mean[1]) / __img_std[1] img[:, :, 2] = (img[:, :, 2] - __img_mean[2]) / __img_std[2] return img
def video_save_frames_specific_duration(action_num, video_num, video_path, frames_root_pathes, start_stop_sec, image_name_format, verbose=False): assert len(frames_root_pathes) == len(start_stop_sec) cap = FFMPEG_VideoReader(video_path, False) cap.initialize() fps = float(cap.fps) duration_sec = cap.duration img_dim = 224 start_stop_sec = np.array(start_stop_sec) for i, s_s_sec in enumerate(start_stop_sec): start_sec, stop_sec = s_s_sec frame_root_path = frames_root_pathes[i] # offset of starting/stopping the action sec_offset = 0.25 start_idx = int((start_sec + sec_offset) * fps) stop_idx = int((stop_sec + sec_offset) * fps) + 1 if verbose: print('action, video: %d, %d' % (action_num, video_num)) print('%d/%d' % (start_sec, stop_sec)) print('%d/%d' % (start_idx, stop_idx)) for idx_frame in range(start_idx, stop_idx): time_sec = idx_frame / fps if verbose and idx_frame % 100 == 0: print('... time_sec, frame: %d/%d' % (time_sec, idx_frame)) frame = cap.get_frame(time_sec) frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) frame = image_utils.resize_crop(frame, target_width=img_dim, target_height=img_dim) image_name = image_name_format % (idx_frame, ) frame_path = os.path.join(frame_root_path, image_name) cv2.imwrite(frame_path, frame) # very important, or we'd have memory leakage cap.__del__()
def __preprocess_img(self, params): idx = params[0] path = params[1] img = cv2.imread(path) img = image_utils.resize_crop(img) img = img.astype(np.float32) img /= float(255) img = img[:, :, (2, 1, 0)] img[:, :, 0] = (img[:, :, 0] - self.__img_mean[0]) / self.__img_std[0] img[:, :, 1] = (img[:, :, 1] - self.__img_mean[1]) / self.__img_std[1] img[:, :, 2] = (img[:, :, 2] - self.__img_mean[2]) / self.__img_std[2] self.__images[idx] = img
def __preprocess_img(img_path): # load image img = cv2.imread(img_path) img = image_utils.resize_crop(img) # as float img = img.astype(np.float32) # divide by 225 as caffe expect images to be in range 0-1 img /= float(255) # also, swap to get RGB, as caffe expect RGB images img = img[:, :, (2, 1, 0)] return img
def test_resnet(): # testing these networks mean = np.array([0.485, 0.456, 0.406], dtype=np.float32) std = np.array([0.229, 0.224, 0.225], dtype=np.float32) # load model and weights model_type = c.CNN_MODEL_TYPES.resnet50 model_path = Pth('Torch_Models/ResNet/resnet50-19c8e357.pth') model = resnet50() class_names_path = Pth('ImageNet/class_names.txt') test_img1 = '/local/mnt/workspace/Pictures/test_img_car.jpg' test_img2 = '/local/mnt/workspace/Pictures/test_img_cat.jpg' test_img3 = '/local/mnt/workspace/Pictures/test_img_stove.jpg' test_img4 = '/local/mnt/workspace/Pictures/test_img_dog.jpg' test_imgs = [test_img1, test_img2, test_img3, test_img4] class_names = utils.txt_load(class_names_path) model_dict = torch.load(model_path) model.load_state_dict(model_dict, strict=True) # flag the model as testing only model = model.cuda() model.eval() model.training = False # print summary input_size = (3, 224, 224) # (B, C, H, W) torchsummary.summary(model, input_size) for test_img in test_imgs: # load test imag, and pre-process it img = cv2.imread(test_img) img = img[:, :, (2, 1, 0)] img = image_utils.resize_crop(img) img = img.astype(np.float32) # normalize image img /= 255.0 img[:, :] -= mean img[:, :] /= std print(np.min(img)) print(np.max(img)) print(img.shape) img = np.transpose(img, (2, 0, 1)) input = np.expand_dims(img, axis=0) print(input.shape) input = torch.from_numpy(input).cuda() predictions = model(input) predictions = F.softmax(predictions) predictions = predictions.tolist() predictions = np.array(predictions) predictions *= 100 print(np.min(predictions)) print(np.max(predictions)) predictions = predictions[0] idx = np.argsort(predictions)[::-1][:5] class_name = ' # '.join([class_names[i] for i in idx]) prob = ' # '.join(['%.02f' % predictions[i] for i in idx]) print('#########################') print('') print(test_img) print(class_name) print(prob) print('')