def __read_image_preprocessed(image_path):
    # read image as rgb, channel first, cropped, and transformed
    img = cv2.imread(image_path)
    img = img[:, :, (2, 1, 0)]
    img = image_utils.resize_crop(img)
    img = img.astype(np.float32) / 255.0
    img[:, :] -= RGB_MEAN
    img[:, :] /= RGB_STD
    img = np.transpose(img, (2, 0, 1))
    return img
예제 #2
0
def __pre_process_for_charades(img):
    __img_mean = [0.485, 0.456, 0.406]
    __img_std = [0.229, 0.224, 0.225]
    img = image_utils.resize_crop(img)
    img = img.astype(np.float32)
    img /= float(255)
    img = img[:, :, (2, 1, 0)]
    img[:, :, 0] = (img[:, :, 0] - __img_mean[0]) / __img_std[0]
    img[:, :, 1] = (img[:, :, 1] - __img_mean[1]) / __img_std[1]
    img[:, :, 2] = (img[:, :, 2] - __img_mean[2]) / __img_std[2]

    return img
예제 #3
0
def video_save_frames_specific_duration(action_num,
                                        video_num,
                                        video_path,
                                        frames_root_pathes,
                                        start_stop_sec,
                                        image_name_format,
                                        verbose=False):
    assert len(frames_root_pathes) == len(start_stop_sec)

    cap = FFMPEG_VideoReader(video_path, False)
    cap.initialize()
    fps = float(cap.fps)
    duration_sec = cap.duration
    img_dim = 224

    start_stop_sec = np.array(start_stop_sec)

    for i, s_s_sec in enumerate(start_stop_sec):
        start_sec, stop_sec = s_s_sec
        frame_root_path = frames_root_pathes[i]

        # offset of starting/stopping the action
        sec_offset = 0.25

        start_idx = int((start_sec + sec_offset) * fps)
        stop_idx = int((stop_sec + sec_offset) * fps) + 1

        if verbose:
            print('action, video: %d, %d' % (action_num, video_num))
            print('%d/%d' % (start_sec, stop_sec))
            print('%d/%d' % (start_idx, stop_idx))

        for idx_frame in range(start_idx, stop_idx):
            time_sec = idx_frame / fps
            if verbose and idx_frame % 100 == 0:
                print('... time_sec, frame: %d/%d' % (time_sec, idx_frame))

            frame = cap.get_frame(time_sec)
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            frame = image_utils.resize_crop(frame,
                                            target_width=img_dim,
                                            target_height=img_dim)

            image_name = image_name_format % (idx_frame, )
            frame_path = os.path.join(frame_root_path, image_name)
            cv2.imwrite(frame_path, frame)

    # very important, or we'd have memory leakage
    cap.__del__()
예제 #4
0
    def __preprocess_img(self, params):

        idx = params[0]
        path = params[1]

        img = cv2.imread(path)
        img = image_utils.resize_crop(img)
        img = img.astype(np.float32)
        img /= float(255)
        img = img[:, :, (2, 1, 0)]
        img[:, :, 0] = (img[:, :, 0] - self.__img_mean[0]) / self.__img_std[0]
        img[:, :, 1] = (img[:, :, 1] - self.__img_mean[1]) / self.__img_std[1]
        img[:, :, 2] = (img[:, :, 2] - self.__img_mean[2]) / self.__img_std[2]

        self.__images[idx] = img
예제 #5
0
def __preprocess_img(img_path):
    # load image
    img = cv2.imread(img_path)

    img = image_utils.resize_crop(img)

    # as float
    img = img.astype(np.float32)

    # divide by 225 as caffe expect images to be in range 0-1
    img /= float(255)

    # also, swap to get RGB, as caffe expect RGB images
    img = img[:, :, (2, 1, 0)]

    return img
예제 #6
0
def test_resnet():
    # testing these networks
    mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
    std = np.array([0.229, 0.224, 0.225], dtype=np.float32)

    # load model and weights
    model_type = c.CNN_MODEL_TYPES.resnet50
    model_path = Pth('Torch_Models/ResNet/resnet50-19c8e357.pth')
    model = resnet50()

    class_names_path = Pth('ImageNet/class_names.txt')
    test_img1 = '/local/mnt/workspace/Pictures/test_img_car.jpg'
    test_img2 = '/local/mnt/workspace/Pictures/test_img_cat.jpg'
    test_img3 = '/local/mnt/workspace/Pictures/test_img_stove.jpg'
    test_img4 = '/local/mnt/workspace/Pictures/test_img_dog.jpg'

    test_imgs = [test_img1, test_img2, test_img3, test_img4]

    class_names = utils.txt_load(class_names_path)
    model_dict = torch.load(model_path)
    model.load_state_dict(model_dict, strict=True)

    # flag the model as testing only
    model = model.cuda()
    model.eval()
    model.training = False

    # print summary
    input_size = (3, 224, 224)  # (B, C, H, W)
    torchsummary.summary(model, input_size)

    for test_img in test_imgs:
        # load test imag, and pre-process it
        img = cv2.imread(test_img)
        img = img[:, :, (2, 1, 0)]
        img = image_utils.resize_crop(img)
        img = img.astype(np.float32)

        # normalize image
        img /= 255.0
        img[:, :] -= mean
        img[:, :] /= std

        print(np.min(img))
        print(np.max(img))

        print(img.shape)
        img = np.transpose(img, (2, 0, 1))
        input = np.expand_dims(img, axis=0)
        print(input.shape)

        input = torch.from_numpy(input).cuda()
        predictions = model(input)
        predictions = F.softmax(predictions)
        predictions = predictions.tolist()
        predictions = np.array(predictions)
        predictions *= 100

        print(np.min(predictions))
        print(np.max(predictions))
        predictions = predictions[0]

        idx = np.argsort(predictions)[::-1][:5]
        class_name = ' # '.join([class_names[i] for i in idx])
        prob = ' # '.join(['%.02f' % predictions[i] for i in idx])
        print('#########################')
        print('')
        print(test_img)
        print(class_name)
        print(prob)
        print('')