コード例 #1
0
 def score(self, ref_path, hyp_path):
     from sklearn.metrics import f1_score
     ref = skv.vread(ref_path)
     hyp = skv.vread(hyp_path)
     score = f1_score(
         np.any(ref, axis=-1).flatten(),
         np.any(hyp, axis=-1).flatten())
     print(score)
コード例 #2
0
ファイル: ffmpeg.py プロジェクト: sangramch/deepvideo
    def _calc_psnr(r_vid, c_vid):
        # calculate PSNR

        np_r_vid = sk.vread(r_vid)
        np_c_vid = sk.vread(c_vid)

        psnr = PSNR(data_range=255).calc_video(np_r_vid, np_c_vid)

        return psnr
コード例 #3
0
ファイル: ffmpeg.py プロジェクト: sangramch/deepvideo
    def _calc_ssim(r_vid, c_vid):
        # calculate SSIM Guassian

        np_r_vid = sk.vread(r_vid)
        np_c_vid = sk.vread(c_vid)

        ssim = SSIM(data_range=255, multichannel=True,
                    gaussian_weights=True).calc_video(np_r_vid, np_c_vid)

        return ssim
    def _get_video_batch(self, x_paths, as_grey=False, reduce_frames=True, verbose=False):
        """
        Returns ndarray of shape (batch_size, num_frames, width, height, channels).
        If as_grey, then channels dimension is squeezed out.
        """

        videos = []
        failed = []
        
        for row in x_paths.itertuples():
            filepath = row.filepath
            obf_id = row.Index
            
            try:
                # load
                video = skv.vread(filepath, as_grey=as_grey)
            except:
                try:
                    # try again
                    print("trying again to load id:\t", obf_id)        
                    video = skv.vread(filepath, as_grey=as_grey)        
                except:
                    if verbose:
                            print("FAILED TO LOAD:", filepath)
                    print("faild to load id:\t", obf_id)
                    failed.append(obf_id)

                    first_row = False
                    x_paths_removed_fault = x_paths.drop(row.Index)
                    x_paths_removed_fault = pd.concat([x_paths_removed_fault, \
                                              x_paths_removed_fault.ix[np.random.random_integers(0, self.batch_size -2, 1)]])
                    return self._get_video_batch(x_paths_removed_fault, as_grey, reduce_frames, verbose)
                
            
            # fill video if neccessary
            if video.shape[0] < self.num_frames:
                video = self._fill_video(video) 
            
            # reduce
            if reduce_frames:
                step_size = int(video.shape[0] / 90)
                frames = np.arange(random.randint(0,step_size-1), video.shape[0], step_size)
                try:
                    video = video[frames, :, :] #.squeeze()  
                    videos.append(video)
                
                except IndexError:
                    if verbose:
                        print("FAILED TO REDUCE:", filepath)
                    print("failed to reduce id:\t", obf_id)
                    failed.append(obf_id)
                       
        return np.array(videos), failed
コード例 #5
0
ファイル: capVideoFrames.py プロジェクト: Str1k3-r/DTRCNNLSTM
def readImages(ims, thresholdParam=0.95):
    images = skv.vread(ims)
    count = 0
    temp = images[0]
    temp = cv.cvtColor(temp, cv.COLOR_BGR2GRAY)

    numberOfFrames, a, b, c = images.shape

    finalImages = np.zeros((numberOfFrames, a, b))
    finalImages[0] = temp

    d = []

    for image in images:
        image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
        score = m.compare_ssim(temp, image)
        d.append(score)

        if score < thresholdParam:
            finalImages[count] = image
            temp = image
            count += 1

    finalImages = finalImages[:count]
    del images, temp, d
    return finalImages, count
コード例 #6
0
def data_aug(video_path, num_of_aug, aug_dict=data_aug_dict):
    """
    Augment all frames in the video and creates num_of_aug new videos.

    :param video_path: Path to video
    :param num_of_aug: Number of different augmentations
    :param aug_dict: Augmentation dictionary
    :return: New videos
    """
    new_videos = []
    video = vread(video_path)
    for aug_i in range(num_of_aug):  # run over augmentations
        str_data_aug = 'data_aug_' + str(aug_i)
        new_video = []
        # run over frames
        for frame_idx in range(video.shape[0]):  #  frames
            frame = video[frame_idx, :, :, :]
            aug_frame = aug_dict[str(str_data_aug)](frame)
            # # for debug:
            # plt.figure()
            # plt.imshow(aug_frame)
            # plt.show()
            # #
            new_video.append(aug_frame)

        new_video = np.stack(new_video)
        new_videos.append(new_video)
    return new_videos
コード例 #7
0
def compressvideo(filepath : str, output: str, ylabelval: float, xkey: str, ykey: str):
    # Reading
    vid = vio.vread(filepath)  
    # Writing
    with h5py.File(output, 'w') as wfile:
        wfile.create_dataset(xkey, data=vid)
        wfile.create_dataset(ykey, data=ylabelval)
コード例 #8
0
def GetFrames(fileName, skipLength=1, debug=False):
    '''
    Get video frames after skipping
    Args:
        fileName: full fileName to read
        skipLength: Number of skips to perform
    Returns:
        Numpy array of frames
    '''

    if debug:
        print "Started creating Frame List for file", fileName

    try:
        frameList = vread(fileName)
    except:
        return None

    if debug:
        print 'The video shape is', frameList.shape
        print 'The array type is', frameList.dtype

    frameList = frameList[range(0, frameList.shape[0], skipLength), :, :, :]
    # Skip frames according to skipLength

    if debug:
        print 'The new shape after skipping', skipLength, 'is', frameList.shape
        print "Finished creating Frame List"

    return frameList
コード例 #9
0
    def __getitem__(self, index):
        """ Get a sample from the dataset
        """
        if self.videos is not None:
            # If dataset is preloaded
            video = self.videos[index]
            label = self.labels[index]
        else:
            # If on-demand data loading
            video_fn, label = self.filenames[index]
            video = vp.vread(video_fn)
            video = np.reshape(video, (video.shape[3], video.shape[0],
                                       video.shape[1], video.shape[2]))

        if self.spatial_transform is not None:
            self.spatial_transform.randomize_parameters()
            video = video.reshape((video.shape[1], video.shape[2],
                                   video.shape[3], video.shape[0]))
            clip = [
                self.spatial_transform(Image.fromarray(img)) for img in video
            ]
            video = torch.stack(clip, 0).permute(1, 0, 2, 3)

        # return video and label
        return video, label
コード例 #10
0
def load_frames(idx, video_files, data_mode, data_path, zips_list,
                in_which_zip):
    """Load the video frames"""
    if data_mode == 'json':
        vid_path = video_files[idx]
        if not os.path.isabs(vid_path):
            vid_path = os.path.join(data_path, vid_path)
        # Deal with multiple possible extensions
        path_base, path_ext = os.path.splitext(vid_path)
        if path_ext == '.csv':
            vid_path = path_base + '.mp4'
        elif path_ext == '':
            vid_path = vid_path + '.mp4'
        frames = vread(vid_path)
    elif data_mode == 'npz':
        vid_path = os.path.dirname(video_files[idx])
        right_zip = zips_list[in_which_zip[idx]]
        zip_path = os.path.join(data_path, right_zip)  # type: str
        zip_path = zip_path.split('_meta.zip')[0] + '_video.zip'
        with zipfile.ZipFile(zip_path) as my_zip:
            frames = np.zeros((100, 100, 100, 3), dtype=np.uint8)
            for i in range(100):
                img_path = os.path.join(vid_path, '%04d.jpg' % i)
                img_path = os.path.normpath(img_path).replace('\\', '/')
                file = my_zip.read(img_path)
                image = np.array(Image.open(BytesIO(file), ))
                image = np.dstack((image, image, image))
                frames[i, ...] = image
    else:
        raise Exception('Data mode must be either json or npz')

    return frames
コード例 #11
0
    def __getitem__(self, index):
        """
        Args:
            index (int): Index
        Returns:
            tuple: (image, target) where target is class_index of the target class.
        """
        path = self.data[index]['video']
        clip_name = path.rstrip().split('/')[-1].split('.')[0]
        try:
            clip_numpy = vp.vread(path)
        except:
            print(path)
            raise
        n_frames = clip_numpy.shape[0]
        frame_indices = [x for x in range(n_frames)]
        if self.temporal_transform is not None:
            frame_indices = self.temporal_transform(frame_indices)
#         clip = [Image.fromarray(clip_numpy[i]) for i in frame_indices]
        clip = [clip_numpy[i] for i in frame_indices]
#         path = self.data[index]['video']
#         
#         frame_indices = self.data[index]['frame_indices']

#         clip = self.loader(path, frame_indices)
        if self.spatial_transforms is not None:
            self.spatial_transforms[self.annotationDict[clip_name]].randomize_parameters()
            clip = [self.spatial_transforms[self.annotationDict[clip_name]](img) for img in clip]
        clip = torch.stack(clip, 0).permute(1, 0, 2, 3)

        target = self.data[index]
        if self.target_transform is not None:
            target = self.target_transform(target)
        return clip, target, path
コード例 #12
0
def extract(video_path, positions):
    if not isinstance(positions, (list, tuple)):
        raise RuntimeError('expect list or tuple, get {}.'.format(
            type(positions)))

    capture = cv2.VideoCapture(video_path)
    frames = []
    for pos in positions:
        total_len = capture.get(cv2.CAP_PROP_FRAME_COUNT) / capture.get(
            cv2.CAP_PROP_FPS)
        if pos >= total_len:
            continue

        capture.set(cv2.CAP_PROP_POS_MSEC, pos * 1000)
        ok, img = capture.read()
        if not ok:
            img = vread(video_path)[math.floor(
                capture.get(cv2.CAP_PROP_FPS) * pos)]
            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
            # raise RuntimeError(video_path, pos)
        if img.shape[0] > img.shape[1] > 224:
            img = cv2.resize(img,
                             (224, round(img.shape[0] / img.shape[1] * 224)),
                             interpolation=cv2.INTER_CUBIC)
        elif img.shape[1] > img.shape[0] > 224:
            img = cv2.resize(img,
                             (round(img.shape[1] / img.shape[0] * 224), 224),
                             interpolation=cv2.INTER_CUBIC)
        frames.append(img)
    capture.release()
    return frames
コード例 #13
0
ファイル: check_dataset.py プロジェクト: duxiaodan/BigVidGAN
def file_check(label_path):
	for video in os.listdir(label_path):
		try:
			vid = io.vread(os.path.join(label_path, video))
		except:
			print("video: {} in class: {} doesn't open".format(video, label_path))
	print('Finished {}'.format(label_path))
コード例 #14
0
def predict(data_dir, output_dir, checkpoint, batch_size):
    in_place, output_video_t = build_forward(INPUT_SIZE, NUM_FRAMES,
                                             tf.estimator.ModeKeys.PREDICT)

    saver = tf.train.Saver()
    with tf.Session() as sess:
      try:
        saver.restore(sess, 'weights/{}_best'.format(checkpoint))
      except Exception as e:
        print('{}: {} is not a valid checkpoint to restore from.'.format(e, checkpoint))

      prediction_folder = os.path.join(output_dir, 'output')
      # check that prediction folder exists
      if not os.path.exists(prediction_folder):
        os.mkdir(prediction_folder) 
        
      input_videos = glob.glob(os.path.join(data_dir, '/*.mp4'))
      
      for ivf in input_videos:
        in_video = vidio.vread(ivf)
        out_video = sess.run([output_video_t],
            { in_place : in_video[:NUM_FRAMES, :INPUT_SIZE, :INPUT_SIZE] })
        base_filename = ivf.split('/')[-1]
        out_filename = os.path.join(prediction_folder, base_filename)
        writer = vidio.FFmpegWriter(out_filename)
        for i in range(out_video.shape[0]):
          writer.writeFrame(out_video[i,...])
        writer.close()
コード例 #15
0
ファイル: video_dataset.py プロジェクト: sangramch/deepvideo
    def _get_frames(self, index):
        # return frames (t, H, W ,C)

        # numpy video data
        video_frames = sk.vread(self.video_file_names[index])

        return video_frames
コード例 #16
0
    def __getitem__(self, index):
        """ Get a sample from the dataset
        """
        if self.videos is not None:
            # If dataset is preloaded
            video = self.videos[index]
            label = self.labels[index]
        else:
            # If on-demand data loading
            video_fn, label = self.filenames[index]
            video = vp.vread(video_fn)

        if self.spatial_transform is not None:
            self.spatial_transform.randomize_parameters()
            if self.resize is not None:
                video = [
                    cv2.resize(img, literal_eval(self.resize)) for img in video
                ]
            clip = [
                self.spatial_transform(Image.fromarray(img)) for img in video
            ]
            video = torch.stack(clip, 0).permute(1, 0, 2, 3)

        self.logger.debug((video.shape, label))
        # return video and label
        return video, label
コード例 #17
0
def extract_frames(fname):
    meta = skvio.ffprobe(fname)
    fps = int(meta['video']['@r_frame_rate'].split('/')[0])
    size = (int(meta['video']['@width']), int(meta['video']['@height']))
    assert fps > 0, 'Broken video %s' % fname
    frames = list(skvio.vread(fname))
    return frames, fps, size
コード例 #18
0
def process_file(filepath):
    filename = os.path.basename(filepath)
    video = vreader.vread(filepath)
    metadata = {
        'total_frames': video.shape[0],
        'rows': video.shape[1],
        'cols': video.shape[2],
        'channels': video.shape[3]
    }
    extras = vreader.ffprobe(filepath).get('video', {})
    metadata['frame_rate'] = extras.get('@avg_frame_rate', '')
    metadata['duration'] = extras.get('@duration', '')
    payload = {'video_id': filename}
    # now we would send each video in frames
    for idx, frame in enumerate(video):
        metadata['id'] = idx
        img_str = base64.b64encode(frame.flatten()).decode(
            'utf8')  # decode to plane string
        payload['data'] = img_str
        payload['metadata'] = metadata
        print('%s, sending frame: %s' % (filename, idx))
        producer.send(TOPIC, json.dumps(payload).encode('utf8'))
        print('%s, sent frame: %s' % (filename, idx))
    #once done move the file to
    print('%s, moving to completed folder' % (filename, ))
    shutil.move(filepath, COMPLETED_DIR)
コード例 #19
0
ファイル: SVE.py プロジェクト: kmanjari/RL-Activity-Detection
    def __init__(self):
        filename = 'serve.mp4'
        height = 24
        width = 40
        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Box(0, 1, [height * width])

        self._seed()
        #self._reset()

        self.__videodata = None
        try:
            self.__videodata = skio.vread(filename)
        except FileNotFoundError:
            print(filename + ' could not be opened')
            return False

        with open(filename.split(".")[0] + ".labels",
                  encoding='utf-8') as file:
            l = file.readlines()
        self.__labels = [x.strip() for x in l]

        self.__nb_frames, _, _, _ = self.__videodata.shape
        self.__w = width
        self.__h = height
        self.__index = 0
        self.__correct = 0
        self.__action = 1
コード例 #20
0
def visualize(video: str, subtitle: str):
    video = vread(video)

    ax = plt.subplot(111)
    ax.spines["top"].set_visible(False)
    ax.spines["bottom"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["left"].set_visible(False)
    plt.axis('off')

    text = plt.text(0.5,
                    0.1,
                    "",
                    ha='center',
                    va='center',
                    transform=ax.transAxes,
                    fontdict={
                        'fontsize': 14,
                        'color': 'yellow',
                        'fontweight': 500
                    })
    text.set_path_effects([
        path_effects.Stroke(linewidth=3, foreground='black'),
        path_effects.Normal()
    ])

    subs = subtitle.split()
    inc = max(len(video) / (len(subs) + 1), 0.01)

    img = None

    for i, frame in enumerate(video):
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        detected = detector(gray, 1)
        shape = predictor(
            gray, detected[0]
        )  # detected[0] ensures only first detected face is chosen for further processing
        shape = face_utils.shape_to_np(
            shape
        )  # shape predictor outputs 68 coordinate pairs, each responsible for a particular landmark
        mouth_points = shape[48:68]
        hull = cv2.convexHull(mouth_points)  # draw convex hull around mouth
        frame = cv2.drawContours(frame, [hull], -1, (100, 255, 0), 3)
        # cv2.imshow('test', frame)
        # cv2.waitKey(10)
        video[i] = frame

    for i, frame in enumerate(video):
        sub = " ".join(subs[:int(i / inc)])
        text.set_text(sub)

        if img is None:
            img = plt.imshow(frame)
        else:
            img.set_data(frame)

        plt.pause(1 / 250)

    plt.show()
コード例 #21
0
def load_reduce_pipeline(path):
    vid = skv.vread(path)
    vid = block_reduce(vid, (1, 5, 5, 1), func=np.max)
    vid = vid[:, :, 100:324, :]

    return np.pad(vid, ((0, 0), (4, 4), (0, 0), (0, 0)),
                  "constant",
                  constant_values=(0))
コード例 #22
0
ファイル: engine.py プロジェクト: zjz5250/deepfake
 def extract_video(video_path):
     from skvideo.io import vread
     try:
         ret = vread(video_path)
     except Exception as e:
         print("reading failed on ", video_path, e)
         ret = np.array([])
     return ret
コード例 #23
0
    def _get_video_batch(self,
                         x_paths,
                         is_training,
                         as_grey=False,
                         reduce_frames=True,
                         verbose=False,
                         y=None):
        """
        Returns ndarray of shape (batch_size, num_frames, width, height, channels).
        If as_grey, then channels dimension is squeezed out.
        """

        videos = []
        #failed = []
        i = 0
        for f in x_paths:
            filepath = 'micro/' + f
            assert x_paths[i] == f

            # load
            video = skv.vread(filepath, as_grey=as_grey)

            # fill video if neccessary
            if video.shape[0] < self.num_frames:
                video = self._fill_video(video)

            #videos.append(video)

            # reduce
            if reduce_frames:
                frames = np.arange(0, video.shape[0], 2)
                try:
                    video = video[frames, :, :]  #.squeeze()
                    videos.append(self.augment(video))
                    #print(video.shape)
                    #videos.append(video)

                except IndexError:
                    if verbose:
                        print("FAILED TO REDUCE: {filepath}")
                    print("id:\t{obf_id}")
                    failed.append(obf_id)
            else:
                #videos.append(video)

                if is_training:
                    if (y[i, self.classmap['blank']] == 1
                            or y[i, self.classmap['duiker']] == 1
                            or y[i, self.classmap['other (primate)']] == 1
                            or y[i, self.classmap['human']] == 1):
                        videos.append(video)
                    else:
                        videos.append(self.augment(video.astype(np.float32)))
                else:
                    videos.append(video)
            i += 1

        return np.array(videos)
コード例 #24
0
ファイル: visualization.py プロジェクト: redsphinx/3tconv
def try_load_avi(the_path):
    vid = skvid.vread(the_path)
    print('asdf')

    pass


# the_path = '/fast/gabras/dots/dataset_avi/train/scale/14023.avi'
# try_load_avi(the_path)
コード例 #25
0
def load_video(vid_path):
    try:
        video = vread(vid_path)
        return video
    except:
        log_fail = open('fail_crop1.txt','a')
        log_fail.write(vid_path + '\n')
        log_fail.close()
        return []
コード例 #26
0
def main():
    # parse the arguments
    parser = argparse.ArgumentParser(description='Retouch video partially.')
    parser.add_argument('--src_path',
                        type=str,
                        default='../originals',
                        help='source path')
    parser.add_argument('--dst_path',
                        type=str,
                        default='../retouch_temporal_videos',
                        help='destination path')
    parser.add_argument('--intensity',
                        type=str,
                        default='strong',
                        help='strong or weak')
    args = parser.parse_args()

    SRC_PATH = args.src_path
    DST_PATH = args.dst_path
    INTENSITY = args.intensity

    methods = ["blur", "median", "noise"]

    fnames = glob(join(SRC_PATH, "*.mp4"))

    for fname in fnames:
        vOriginal = np.array(vio.vread(fname, inputdict={}))
        vRetouched = np.zeros(vOriginal.shape)

        fn, w, h, c = vOriginal.shape

        start_fn = int(fn / 3)
        end_fn = int(fn * 2 / 3)

        for method in methods:

            print("processing {}".format(
                join(DST_PATH, INTENSITY, method,
                     fname.split("\\")[-1])))

            for idx in range(fn):
                if idx >= start_fn and idx <= end_fn:
                    vRetouched[idx] = manipulate(vOriginal[idx],
                                                 method,
                                                 intensity=INTENSITY)
                else:
                    vRetouched[idx] = vOriginal[idx]

            write_option['-b:v'] = "800k"
            writer = vio.FFmpegWriter(filename=join(DST_PATH, INTENSITY,
                                                    method,
                                                    fname.split("\\")[-1]),
                                      inputdict={'-r': fps},
                                      outputdict=write_option)
            for frame in vRetouched:
                writer.writeFrame(frame)
            writer.close()
def load_video(vid_path):
    try:
        video = vread(vid_path)
        return video
    except:
        log_fail = open('fail_crop1.txt', 'a')
        log_fail.write(vid_path + '\n')
        log_fail.close()
        return []
コード例 #28
0
ファイル: loader.py プロジェクト: qqzz0xx/PyLabelDicom
    def loadDicom(self, path):
        if path is None:
            return
        start_time = time.time()
        self.image_path = path
        self.image_dir = osp.dirname(osp.abspath(path))
        self.image_suffix = osp.splitext(path)[1]

        if str.lower(self.image_suffix) in ['.mp4', '.avi', '.flv', '.wmv']:
            img_nda = skio.vread(path)
            img_nda = img_nda.astype('float32')
            self.image_type = 'video'
            self._channel = img_nda.shape[3]
            self._spacing = (1, 1, 1)
            self._dims = (img_nda.shape[1], img_nda.shape[0], img_nda.shape[2])
            print(img_nda.shape)
            output = self.numpy_array_as_vtk_image_data(img_nda)
        else:
            img_itk = sitk.ReadImage(path)
            spacing = img_itk.GetSpacing()
            dims = img_itk.GetSize()
            channel = img_itk.GetNumberOfComponentsPerPixel()
            frame_count = 1 if len(dims) == 2 else dims[2]
            spacing_z = 1 if len(dims) == 2 else spacing[2]

            self._dims = (dims[0], dims[1], frame_count)
            self._spacing = (spacing[0], spacing[1], spacing_z)
            self._channel = channel
            self.image_type = 'image' if frame_count == 1 else 'volume'

            img_nda = sitk.GetArrayFromImage(img_itk)
            img_nda = img_nda.astype('float32')

            importer = vtk.vtkImageImport()
            importer.SetDataScalarTypeToFloat()
            importer.SetNumberOfScalarComponents(self._channel)
            importer.SetDataExtent(0, self._dims[0] - 1, 0, self._dims[1] - 1,
                                   0, self._dims[2] - 1)
            importer.SetWholeExtent(0, self._dims[0] - 1, 0, self._dims[1] - 1,
                                    0, self._dims[2] - 1)
            importer.SetDataSpacing(self._spacing[0], self._spacing[1],
                                    self._spacing[2])
            importer.CopyImportVoidPointer(img_nda, img_nda.nbytes)
            importer.Update()
            output = importer.GetOutput()

        # print(output)
        self.image_data.DeepCopy(output)

        print("---laod dicom:  %s seconds ---" % (time.time() - start_time))
        print("image dims ", self._dims)
        print("image spacing ", self._spacing)
        print("image channel ", self._channel)
        # print('load image info:')
        # print(output)

        return self.image_data
コード例 #29
0
def read_video(path, face_predictor_path, verbose=False):
    if verbose:
        print("loading: " + path)
    video = vread(path)
    frames = np.array([frame for frame in video])
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor(face_predictor_path)
    mouth_video = get_frames_mouth(detector, predictor, frames)

    return np.array(mouth_video)
コード例 #30
0
def load(fname: str) -> np.ndarray:
    vid = skvio.vread(fname)
    n, h, w, depth = vid.shape
    if depth != 3:
        print('You need to provide a colored video!')
        sys.exit(2)

    log.info('loaded video with %d frames and resulution %d, %d', n, h, w)

    return vid
コード例 #31
0
def write_video_tf_record(video_path,
                          video_name,
                          video_class,
                          tfwriter,
                          error_files,
                          success_file,
                          format=".mp4",
                          num_channels=3,
                          num_frames_keep=None):
    """Write video to _tf_record file

    :param video_path: Path of the video, string
    :param video_name: name of the video file
    :param video_class: string, class of video 
    :param tfwriter: tf.python_io.TFRecordWriter() object 
    :param num_channels: integer, specifies number of video channels, 3 for RGB by default
     

    """
    try:
        vid = sk.vread(video_path)
    except:
        print('RUNTIME ERROR')
        with open(error_files, 'a') as f:
            f.write(video_path)
            f.write("\n")
            f.close()
        return
    #import ipdb;
    #ipdb.set_trace()
    num_frames_total = vid.shape[0]
    height = vid.shape[1]
    width = vid.shape[2]

    # make list of frame indexes to keep
    if (num_frames_keep is not None):
        num_frames = num_frames_keep
        frame_list = np.linspace(0,
                                 num_frames_total - 1,
                                 num=num_frames,
                                 dtype='int')
        video_np = vid[frame_list, :, :, :]

    else:
        num_frames = num_frames_total
        video_np = vid

    video_string = video_np.tobytes()  #convert from np array to bytes array
    example = build_tf_example_basic(video_string, str.encode(video_name),
                                     height, width, video_class, num_frames)
    tfwriter.write(example.SerializeToString())
    with open(success_file, 'a') as f:
        f.write(video_path + "\n")
        f.close()
    return
コード例 #32
0
    def __getitem__(self, index):
        annotation = self.annotation[index]
        video_path = annotation['path']
        clazz = annotation['class']

        metadata = ffprobe(video_path)
        duration = float(metadata["video"]["@duration"])

        output_parameter = self.base_parameter

        if self.clip_duration > 0:
            sta = int(random() * max((duration - self.clip_duration), 0.))
            output_parameter.update({
                "-ss": "{}".format(sta),
                "-t": "{}".format(min(self.clip_duration, duration - sta))
            })
        video_data = vread(video_path, outputdict=output_parameter)

        if self.transform:
            video_data = self.transform(video_data)

        return video_data, clazz