Esempio n. 1
0
def annotate_video(file_path, coordinates):
    """
    Annotates supplied video from predicted coordinates.
    
    Args:
        file_path: path
            System path of video to annotate
        coordinates: list
            Predicted body part coordinates for each frame in the video
    """

    # Load raw video
    from skvideo.io import vreader, ffprobe, FFmpegWriter
    videogen = vreader(file_path)
    video_metadata = ffprobe(file_path)['video']
    fps = video_metadata['@r_frame_rate']
    frame_height, frame_width = next(vreader(file_path)).shape[:2]
    frame_side = frame_width if frame_width >= frame_height else frame_height

    # Initialize annotated video
    vcodec = 'libvpx-vp9'  #'libx264'
    writer = FFmpegWriter(normpath(file_path.split('.')[0] + '_tracked.mp4'),
                          inputdict={'-r': fps},
                          outputdict={
                              '-r': fps,
                              '-bitrate': '-1',
                              '-vcodec': vcodec,
                              '-pix_fmt': 'yuv420p',
                              '-lossless': '1'
                          })  #'-lossless': '1'

    # Annotate video
    from PIL import Image, ImageDraw
    i = 0
    while True:
        try:
            frame = next(videogen)
            image = Image.fromarray(frame)
            image_draw = ImageDraw.Draw(image)
            image_coordinates = coordinates[i]
            image = helpers.display_body_parts(image,
                                               image_draw,
                                               image_coordinates,
                                               image_height=frame_height,
                                               image_width=frame_width,
                                               marker_radius=int(frame_side /
                                                                 150))
            image = helpers.display_segments(image,
                                             image_draw,
                                             image_coordinates,
                                             image_height=frame_height,
                                             image_width=frame_width,
                                             segment_width=int(frame_side /
                                                               100))
            writer.writeFrame(np.array(image))
            i += 1
        except:
            break

    writer.close()
Esempio n. 2
0
def generate_dataset(video_dir):
    if not os.path.exists(video_dir):
        raise ValueError("video dir does not exist")
    video_files = os.listdir(video_dir)
    image_index = 1
    # for i in video_files:
    #     print i.decode('utf8')
    # exit()
    for video in video_files:
        if 'rmvb' in video:
            continue
        print video
        video_path = os.path.join(video_dir, video)
        metadata = ffprobe(video_path)
        frame_info = metadata["video"]["@avg_frame_rate"].split('/')
        rate = int(frame_info[0])/int(frame_info[1])

        try:
            frame_array = vreader(video_path)
            frame_index = 0
            for frame in frame_array:
                if frame_index % (rate * interval) == 0:
                    image = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                    image = cv2.resize(image, (image_height, image_width))
                    cv2.imwrite(test_raw_file + str(image_index) + '.jpg', image)
                    cv2.imwrite(test_copy_file + str(image_index) + '.jpg', image)
                    print("write image %d" % image_index)
                    image_index += 1
                elif frame_index % (rate * interval) == 1:
                    image = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                    image = cv2.resize(image, (image_height, image_width))
                    cv2.imwrite(test_copy_file + str(image_index -1) + '.jpg', image)
                frame_index += 1
        except RuntimeError:
            continue
Esempio n. 3
0
def detect_video(conf, video_file, out_path, yolo, level=0):
    """Use yolo v3 to detect video.
    # Argument:
        video: video file.
        yolo: YOLO, yolo model.
        level : on which resolution to run detection, 
        range[1 - 7], default is 416
        the resolution list is in conf.resolutions
    """
    videogen = io.vreader(video_file)
    metdata = io.ffprobe(video_file)
    frame_rate = int(
        int(metdata['video']['@avg_frame_rate'].split('/')[0]) /
        int(metdata['video']['@avg_frame_rate'].split('/')[1]))
    frame = next(videogen)
    shape = (frame.shape[1], frame.shape[0])
    video_writer = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*'XVID'),
                                   frame_rate, shape)
    for frame in tqdm(videogen, total=int(metdata['video']['@nb_frames'])):
        detected_frame = np.array(
            yolo.detect_on_img(conf, Image.fromarray(frame),
                               level=level))[..., ::-1]
        #         pdb.set_trace()
        video_writer.write(detected_frame)
    video_writer.release()
    videogen.close()
Esempio n. 4
0
def convert_vid(path):
    out_path = path.split('.')[0] + '.hdf5'
    cap = vreader(path)
    with h5py.File(out_path, 'w') as f:
        needs_resize = False
        first_frame = next(cap)

        # NOTE this assumes original aspect ratio is 16:9
        if first_frame.shape[0] != 320:
            needs_resize = True
            first_frame = np.swapaxes(resize(first_frame, (320, 180)), 0, 1)

        f.create_dataset('vid_frames',
                         data=np.expand_dims(first_frame, 0),
                         maxshape=(None, first_frame.shape[0],
                                   first_frame.shape[1], first_frame.shape[2]),
                         compression='gzip')
        for ind, frame in enumerate(cap):
            if needs_resize:
                frame = np.swapaxes(resize(frame, (320, 180)), 0, 1)

            f['vid_frames'].resize((f['vid_frames'].shape[0] + 1), axis=0)
            f['vid_frames'][-1] = frame

    print('Wrote', out_path)
def write_frames(flags, rgb_out_dir):
    rgb_video = os.path.join(flags.dataset, 'rgb.mp4')
    video = io.vreader(rgb_video)
    for i, frame in enumerate(video):
        print(f"Writing rgb frame {i:06}" + " " * 10, end='\r')
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        frame = cv2.resize(frame, (OUT_WIDTH, OUT_HEIGHT))
        frame_path = os.path.join(rgb_out_dir, f"{i:06}.jpg")
        params = [int(cv2.IMWRITE_JPEG_QUALITY), 90]
        cv2.imwrite(frame_path, frame, params)
def extract_frames_from_vid(vid_path):
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    data = io.ffprobe(vid_path)['video']
    rate = int(data['@r_frame_rate'].split('/')[0])
    out = None
    for frame in tqdm(io.vreader(vid_path), unit=' frame'):
        if out is None:
            out = cv2.VideoWriter('out_' + vid_path, fourcc, rate,
                                  frame.shape[1:3][::-1])
        frame = frame_face_blur(frame)
        out.write(frame)
    out.release()
Esempio n. 7
0
 def process_video(self,
                   video_p: Path,
                   output_p: Path,
                   reduce_rate: int = 1):
     meta = ffprobe(video_p)
     nb_frames = int(meta["video"]["@nb_frames"])
     frames = vreader(str(video_p))
     writer = FFmpegWriter(str(output_p),
                           outputdict={"-r": str(int(30 / reduce_rate))})
     for i, frame in enumerate(tqdm(frames, total=nb_frames)):
         if i % reduce_rate == 0:
             frame = self.process_frame(frame)
             writer.writeFrame(frame)
     writer.close()
 def __init__(self, file_paths, transform=False, resize=(256, 256)):
     """
     file_paths: a list of length batch_size containing paths to video files
     transform: whether to crop/resize the frame_tensor
         Note: this must currently be set to True
     resize: resize dimensions for the frames (h, w)
     """
     self.frame_generators = []
     for file_path in file_paths:
         assert os.path.exists(
             file_path), 'Video file path ' + file_path + ' does not exist.'
         self.frame_generators.append(vid.vreader(str(file_path)))
     assert transform == True, 'Non-transformed batch not implemented.'
     self.transform = transform
     self.resize = resize
def generate_batches(video_path, batch_size=64, video_options=None):
    vid = vreader(str(video_path), outputdict={'-s': '224x224'})

    batch = []

    for frame in vid:

        if len(batch) == batch_size:
            batch = []

        batch.append(frame)

        if len(batch) == batch_size:
            yield np.array(batch)

    yield np.array(batch)
Esempio n. 10
0
def get_video_info(video_path):

    cap = sk.vreader(video_path)
    seg_l = 4

    metadata = sk.ffprobe(video_path)
    # print (json.dumps(metadata, indent=4))
    # print (json.dumps(metadata["video"], indent=4))
    """
    fps : @r_frame_rate
    length : @duration
    frames : @nb_frames
    """
    length = float(json.dumps(metadata["video"]["@duration"]).replace('"', ''))
    # fnum = float(json.dumps(metadata["video"]["@nb_frames"]).replace('"', ''))
    fps = float(
        json.dumps(metadata["video"]["@r_frame_rate"]).replace(
            '"', '').split('/')[0]) / float(
                json.dumps(metadata["video"]["@r_frame_rate"]).replace(
                    '"', '').split('/')[1])
    fnum = int(np.ceil(length * fps))

    print('length : %.5f / frames : %d / fps : %.2f' % (length, fnum, fps))

    img_id = []
    frame_list = []
    id = 0
    for frame in cap:
        frame = cv2.resize(frame, dsize=(224, 224))
        frame_list.append(frame)
        img_id.append(id)
        id += 1

    segs = [img_id[i:i + seg_l] for i in range(len(img_id) - seg_l + 1)]
    segs = reduce(lambda x, y: x + y, segs)

    feat = []

    for seg in segs:
        feat.append(frame_list[seg])

    idx = np.arange(fps, fnum, fps)
    idx = np.floor(idx)
    idx = idx.tolist()
    idx = map(int, idx)

    return feat, fnum, fps, length, img_id, idx
Esempio n. 11
0
def write_images():

    root = '../UCF-14/'

    classes = open('../dataset/classInd_14.txt', 'r')
    var1 = {}
    for line in classes:
        words = line.split(" ")
        var1[words[1].split("\n")[0]] = words[0]

    for path, subdirs, files in os.walk(root):
        for filename in files:
            print filename
            if ".DS_Store" not in filename:
                folder = 'images' + '/' + filename.split('.')[0] + '/'
                if not os.path.isdir(folder):
                    os.mkdir(folder)
                else:
                    shutil.rmtree(folder)
                    os.mkdir(folder)
                try:
                    cnt = 0
                    full_path = path + '/' + filename
                    cap = vreader(full_path)
                    fcnt = 1

                    for frame in cap:
                        vid_name = filename.split('.')[0]
                        img_path = folder + vid_name + '_{}.jpg'.format(cnt +
                                                                        1)
                        img_name = vid_name + '_{}'.format(cnt + 1)
                        if fcnt % 5 == 0:
                            vwrite(img_path, frame)
                            cnt = cnt + 1
                        fcnt += 1

                    if cnt:
                        with open("count.txt", "w") as txt:
                            text = str(cnt) + " " + img_name.split(
                                '.')[0] + "\n"
                            txt.write(text)
                except (RuntimeError, TypeError, NameError):
                    print "Some Error happened"
Esempio n. 12
0
def make_video_frames(datadir, outdir):
    # train video data
    for i in range(10000):
        if not os.path.exists(os.path.join(outdir, 'video%s' % i)):
            os.makedirs(os.path.join(outdir, 'video%s' % i))
        filename = ('video%s.mp4' % i)
        videopath = os.path.join(datadir, filename)
        cap = sk.vreader(videopath)

        metadata = sk.ffprobe(videopath)
        # print json.dumps(metadata["video"], indent=4)
        """
        fps : @r_frame_rate
        length : @duration
        frames : @nb_frames
        """
        length = float(
            json.dumps(metadata["video"]["@duration"]).replace('"', ''))
        frames = float(
            json.dumps(metadata["video"]["@nb_frames"]).replace('"', ''))
        fps = int(frames / length)

        print('%sth video' % i)
        print('length : %d / frames : %d / fps : %d' % (length, frames, fps))

        cent = np.linspace(0, length, 7)[1:-1]
        for x in range(len(cent)):
            cent[x] = int(cent[x])
        frames = cent * fps

        idx = 0
        filenum = 0
        for frame in cap:
            if idx in frames:
                frame = cv2.resize(frame, dsize=(224, 224))
                sk.vwrite(outdir + '/video%s/frame%s.png' % (i, filenum),
                          frame)
                filenum += 1
            idx += 1

        if i % 1000 == 0:
            print('%sth video processed...' % i)
Esempio n. 13
0
def extract_keyframes_ffmpeg(video_path: str, output_path: str) -> None:
    """
    Extract all keyframes from a video file and save them to disk
    :param video_path: Absolute path to the input video
    :param output_path: Absolute path where all keyframes will be saved
    """
    assert os.path.exists(video_path)
    video_name = os.path.splitext(os.path.basename(video_path))[0]
    video_data = io.vreader(video_path,
                            outputdict={
                                '-vf': 'select=eq(pict_type\,PICT_TYPE_I)',
                                '-vsync': 'vfr'
                            })
    cnt = 0
    for kframe in video_data:
        cv2.imwrite(join(output_path, f'{video_name}_{cnt:03d}.png'),
                    cv2.cvtColor(kframe, code=cv2.COLOR_RGB2BGR))
        cnt += 1
    print(
        f'EXTK> Extracted {cnt} keyframes from {os.path.basename(video_path)} to {output_path}'
    )
Esempio n. 14
0
def extract_frames(d_path, fps=25):
    """
    extract frames from video files, 1 frame per second by default
    :param d_path: data path
    :param fps: number of frames being extracted per second
    :return: data and labels
    """
    for n in range(1, 81):
        video_id = str(n).zfill(2)
        video_name = '{}videos/video{}.mp4'.format(d_path, video_id)
        video_reader = vreader(video_name)
        for i, frame in enumerate(video_reader):
            if i % fps == 0:
                # remove black boarders
                # frame = frame[:, 40:-55, :]
                # downscale frame to (224, 224, 3)
                frame = resize(frame, (224, 224, 3))
                # extract frame to different folder
                frame_path = '{}frames/video{}-frame{}.png'.format(
                    d_path, video_id, i)
                plt.imsave(arr=frame, fname=frame_path)
Esempio n. 15
0
def main(argv):
    pycaffe_dir = os.path.dirname(__file__)

    parser = argparse.ArgumentParser()
    # Required arguments: input file.
    parser.add_argument("input_file", help="Path to the input image file")

    # Optional arguments.
    parser.add_argument("--model_def", help="Model definition file.")
    parser.add_argument("--pretrained_model",
                        help="Trained model weights file.")

    args = parser.parse_args()

    metadata = ffprobe(args.input_file)
    avg_frame_rate = metadata["video"]["@avg_frame_rate"].split('/')
    rate = int(avg_frame_rate[0]) / int(avg_frame_rate[1])
    video = vreader(args.input_file)

    if os.path.exists('./temp/image') == False:
        os.mkdir('temp/image')
    image_files = os.listdir('./temp/image')
    if len(image_files) > 0:
        for filename in image_files:
            os.remove('./temp/image/' + filename)

    index = 0
    for frame in video:
        if index % (rate * interval) == 0:
            img = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            cv2.imwrite('temp/image/' + str(index / rate) + '.jpg', img)
        index += 1

    # Pre-load caffe model.
    nsfw_net = caffe.Net(
        args.model_def,  # pylint: disable=invalid-name
        args.pretrained_model,
        caffe.TEST)

    # Load transformer
    # Note that the parameters are hard-coded for best results
    caffe_transformer = caffe.io.Transformer(
        {'data': nsfw_net.blobs['data'].data.shape})
    caffe_transformer.set_transpose(
        'data', (2, 0, 1))  # move image channels to outermost
    caffe_transformer.set_mean('data', np.array(
        [104, 117, 123]))  # subtract the dataset-mean value in each channel
    caffe_transformer.set_raw_scale('data',
                                    255)  # rescale from [0, 1] to [0, 255]
    caffe_transformer.set_channel_swap(
        'data', (2, 1, 0))  # swap channels from RGB to BGR

    # fetch all image score.
    scores_list = np.array([])
    safe_count = 0
    unsafe_count = 0
    middle_count = 0
    danger_list = []
    image_files = os.listdir('./temp/image')
    for filename in image_files:
        image_data = open('./temp/image/' + filename).read()
        scores = caffe_preprocess_and_compute(
            image_data,
            caffe_transformer=caffe_transformer,
            caffe_net=nsfw_net,
            output_layers=['prob'])
        if scores[1] > 0.8:
            unsafe_count += 1
            danger_list.append(filename)
        elif scores[1] < 0.2:
            safe_count += 1
        else:
            middle_count += 1
        scores_list = np.append(scores_list, scores[1])
    # Scores is the array containing SFW / NSFW image probabilities
    # scores[1] indicates the NSFW probability
    print("total: %d, safe: %d, unsafe: %d, middle: %d" %
          (scores_list.shape[0], safe_count, unsafe_count, middle_count))
    print danger_list
Esempio n. 16
0
def main(video_dict):
    model_def = 'nsfw_model/deploy.prototxt'
    pretrained_model = 'nsfw_model/resnet_50_1by2_nsfw.caffemodel'

    pycaffe_dir = os.path.dirname(__file__)

    # Pre-load caffe model.
    nsfw_net = caffe.Net(
        model_def,  # pylint: disable=invalid-name
        pretrained_model,
        caffe.TEST)

    # Load transformer
    # Note that the parameters are hard-coded for best results
    caffe_transformer = caffe.io.Transformer(
        {'data': nsfw_net.blobs['data'].data.shape})
    caffe_transformer.set_transpose(
        'data', (2, 0, 1))  # move image channels to outermost
    caffe_transformer.set_mean('data', np.array(
        [104, 117, 123]))  # subtract the dataset-mean value in each channel
    caffe_transformer.set_raw_scale('data',
                                    255)  # rescale from [0, 1] to [0, 255]
    caffe_transformer.set_channel_swap(
        'data', (2, 1, 0))  # swap channels from RGB to BGR

    if not os.path.exists(image_temp):
        os.mkdir(image_temp)

    if not os.path.exists(video_temp):
        os.mkdir(video_temp)

    conclusion_dict = {}

    for video_name in video_dict:
        video_path = os.path.join(video_temp, video_name)
        if not os.path.exists(video_path):
            continue

        image_files = os.listdir(image_temp)
        if len(image_files) > 0:
            for filename in image_files:
                filepath = os.path.join(image_temp, filename)
                os.remove(filepath)

        metadata = ffprobe(video_path)
        avg_frame_rate = metadata["video"]["@avg_frame_rate"].split('/')
        rate = int(avg_frame_rate[0]) / int(avg_frame_rate[1])
        video = vreader(video_path)

        index = 0
        for frame in video:
            if index % (rate * interval) == 0:
                img = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                cv2.imwrite(image_temp + '/' + str(index / rate) + '.jpg', img)
            index += 1

        # fetch all image score.
        safe_count = 0
        danger_count = 0
        warning_count = 0
        middle_count = 0
        danger_list = []
        warning_list = []
        scores_list = np.array([])

        image_files = os.listdir(image_temp)
        for image_name in image_files:
            second = image_name.split('.')[0]
            image_path = os.path.join(image_temp, image_name)
            image_data = open(image_path).read()
            scores = caffe_preprocess_and_compute(
                image_data,
                caffe_transformer=caffe_transformer,
                caffe_net=nsfw_net,
                output_layers=['prob'])
            if scores[1] > 0.8:
                danger_count += 1
                danger_list.append(second)
            elif scores[1] > 0.5:
                warning_count += 1
                middle_count += 1
                warning_list.append(second)
            elif scores[1] > 0.2:
                middle_count += 1
            else:
                safe_count += 1
            scores_list = np.append(scores_list, scores[1])
        # Scores is the array containing SFW / NSFW image probabilities
        # scores[1] indicates the NSFW probability

        conclusion_dict[video_name] = {
            'url': video_dict[video_name],
            'name': video_name.split('.')[0],
            'extension': video_name.split('.')[-1],
            'total_count': scores_list.shape[0],
            'danger_count': danger_count,
            'warning_count': warning_count,
            'danger_second': danger_list,
            'warning_second': warning_list,
        }
        print(
            "video name: %s, total: %d, danger_count: %d, warning_count: %d" %
            (video_name, scores_list.shape[0], danger_count, warning_count))
        os.remove(video_path)
    return conclusion_dict
Esempio n. 17
0
def tagVideo(modelpath, videopath, outputPath=None):
    """ detect if persons in video are wearing masks or not
    """
    result = -1
    model = MaskDetector()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.load_state_dict(torch.load(modelpath,
                                     map_location=device)['state_dict'],
                          strict=False)

    model = model.to(device)
    model.eval()

    faceDetector = FaceDetector(
        prototype=
        '/var/www/covosk-cv/covid-mask-detector/models/deploy.prototxt.txt',
        model=
        '/var/www/covosk-cv/covid-mask-detector/models/res10_300x300_ssd_iter_140000.caffemodel',
    )

    transformations = Compose([
        ToPILImage(),
        Resize((100, 100)),
        ToTensor(),
    ])

    if outputPath:
        writer = FFmpegWriter(str(outputPath))

    font = cv2.FONT_HERSHEY_SIMPLEX
    #cv2.namedWindow('main', cv2.WINDOW_NORMAL)
    labels = ['No mask', 'Mask']
    labelColor = [(10, 0, 255), (10, 255, 0)]
    for frame in vreader(str(videopath)):
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        faces = faceDetector.detect(frame)
        #print ("FRAME")
        for face in faces:
            xStart, yStart, width, height = face
            #print ("FACE",face)

            # clamp coordinates that are outside of the image
            xStart, yStart = max(xStart, 0), max(yStart, 0)

            # Image is 640x640
            #print ("DIMS",xStart, yStart ,width,height)
            right = min(xStart + width, 639)
            bottom = min(yStart + height, 639)
            #print ("Right",xStart+width)
            #print ("Bottom",yStart+height)

            area = width * height

            inarea = (right - xStart) * (bottom - yStart)
            #print ("Area",area)
            #print ("inArea",inarea)
            areaperc = inarea / area
            #print ("areaperc",areaperc)

            # predict mask label on extracted face
            faceImg = frame[yStart:yStart + height, xStart:xStart + width]
            output = model(transformations(faceImg).unsqueeze(0).to(device))
            #print ("OUTPUT",output)
            _, predicted = torch.max(output.data, 1)
            #print ("result",_)

            # center text according to the face frame
            textSize = cv2.getTextSize(labels[predicted], font, 1, 2)[0]
            textX = xStart + width // 2 - textSize[0] // 2

            # draw prediction label
            cc = (126, 65, 64)
            if (areaperc > 0.75):
                #print (labels[predicted])
                if predicted:
                    result = 1
                elif result == -1:
                    result = 0
            else:
                cc = (200, 200, 200)

            # draw face frame
            cv2.rectangle(frame, (xStart, yStart),
                          (xStart + width, yStart + height),
                          cc,
                          thickness=2)
            cv2.putText(frame, labels[predicted], (textX, yStart - 20), font,
                        1, labelColor[predicted], 2)
        if outputPath:
            writer.writeFrame(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
        #cv2.imshow('main', frame)
        #if cv2.waitKey(1) & 0xFF == ord('q'):
        #    break
    if outputPath:
        writer.close()
    #cv2.destroyAllWindows()
    #if result==0: print ("No Mask Found")
    #if result==1: print ("Mask Found")
    #if result==-1: print ("No face in photo")
    return result
Esempio n. 18
0
def tagVideo(modelpath=None,
             videopath=None,
             outputPath=None,
             outputPathMask=None):
    modelpath = "./mask-detection/models/face_mask.ckpt"
    """ detect if persons in video are wearing masks or not
    """
    model = MaskDetector()
    model.load_state_dict(torch.load(modelpath,
                                     map_location='cpu')['state_dict'],
                          strict=False)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    model.eval()

    faceDetector = FaceDetector(
        prototype='mask-detection/models/deploy.prototxt.txt',
        model='mask-detection/models/res10_300x300_ssd_iter_140000.caffemodel',
    )

    transformations = Compose([
        ToPILImage(),
        Resize((100, 100)),
        ToTensor(),
    ])
    if outputPath:
        writer = FFmpegWriter(str(outputPath))
    font = cv2.FONT_HERSHEY_SIMPLEX
    #cv2.namedWindow('main', cv2.WINDOW_NORMAL)
    labels = ['No mask', 'Mask']
    labelColor = [(10, 0, 255), (10, 255, 0)]
    try:
        for frame in vreader(str(videopath)):
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            faces = faceDetector.detect(frame)
            for face in faces:
                xStart, yStart, width, height = face

                # clamp coordinates that are outside of the image
                xStart, yStart = max(xStart, 0), max(yStart, 0)

                # predict mask label on extracted face
                faceImg = frame[yStart:yStart + height, xStart:xStart + width]

                output = model(
                    transformations(faceImg).unsqueeze(0).to(device))
                _, predicted = torch.max(output.data, 1)
                cv2.rectangle(frame, (xStart, yStart),
                              (xStart + width, yStart + height), (126, 65, 64),
                              thickness=2)

                # center text according to the face frame
                textSize = cv2.getTextSize(labels[predicted], font, 1, 2)[0]
                textX = xStart + width // 2 - textSize[0] // 2

                # draw prediction label
                cv2.putText(frame, labels[predicted], (textX, yStart - 20),
                            font, 1, labelColor[predicted], 2)
            if outputPath:
                try:
                    if labels[predicted] == "No mask":
                        writer.writeFrame(
                            cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
                        writer.close()
                        os.remove(videopath)
                        print("Person without mask detected!")
                        return ("No mask")
                    elif labels[predicted] == "Mask":
                        os.remove(videopath)
                        print("Person with mask detected!")
                        return ("Face detected with Mask")
                except:
                    os.remove(videopath)
                    print("No face detected!")
                    return ("No face detected!")
            #cv2.imshow('main', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    #cv2.destroyAllWindows()
    except:
        os.remove(videopath)
        print("Image could not be opened!")
Esempio n. 19
0
def video_to_frames(video_path: str, frame_template: str):
    for i, frame in enumerate(vreader(video_path)):
        frame = resize(frame, (240, 320))
        frame = frame[10:170]
        frame = round_(frame * 255).astype("uint8")
        imsave(frame_template.format(i), frame, check_contrast=False)
Esempio n. 20
0
path = os.path.join('checkpoints', model_name + '.meta')
meta_graph = tf.train.import_meta_graph(path)

session = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
meta_graph.restore(sess=session,
                   save_path=os.path.join('checkpoints', model_name))

# load placeholders
c_input = tf.get_collection('c_input')[0]
c_state_input = tf.get_collection('c_state_input')[0]
c_fb_input = tf.get_collection('c_fb_input')[0]
c_output = tf.get_collection('c_output')[0]
c_state_output = tf.get_collection('c_state_output')[0]

# load video frames
reader = io.vreader(video_path)

# fill buffer
buffer = []
buffer_hr = []
for i in range(buffer_len):

    add_image = next(reader)
    buffer_hr.append(add_image)

    # downscale by factor 4 with gaussian smoothing
    if downscale:
        s = 1.5
        add_image = gf(add_image, sigma=[s, s, 0])[0::4, 0::4, :]
        add_image = np.rint(np.clip(add_image, 0, 255)).astype(np.uint8)
Esempio n. 21
0
src_det_file_path = '/home/jinchoi/src/rehab/dataset/action/kinetics/detectron_results/{}/full_kinetics_detection_{}_rearranged_org_spatial_dim.npy'.format(split,split)
# src_det_file_path = '/home/jinchoi/src/rehab/dataset/action/kinetics/detectron_results/{}/full_kinetics_detection_{}_rearranged.npy'.format(split,split)

videos_root = '/home/jinchoi/src/rehab/dataset/action/kinetics/videos/{}'.format(split)

height_in_tgt_dets = 256 

# read the original detection file
dets = np.load(src_det_file_path, allow_pickle=True).item()

pdb.set_trace()

for i,(cur_cls,vid_datas) in enumerate(dets['dets'].items()):
    cur_cls = cur_cls.replace(' ', '_')

    for k,v in vid_datas.items():
        # get the width and height of the video
        filelist = gb.glob(os.path.join(videos_root, cur_cls, k)+'*')
        if len(filelist) > 0:
            vidfile_name = filelist[0].split('/')[-1]
        input_video_path = os.path.join(videos_root, cur_cls, vidfile_name)
        videogen = vreader(input_video_path)

        vis_vid = []
        resize_dim = 

        for idx,frame in enumerate(videogen):
            cv2.resize(frame, )  
            pdb.set_trace()
            print(' ')
Esempio n. 22
0
def analyze_video(file_path, model, framework, resolution, lite):
    """
    Predict pose coordinates on supplied video.
    
    Args:
        file_path: path
            System path of video to analyze
        model: deep learning model
            Initialized EfficientPose model to utilize (RT, I, II, III, IV, RT_Lite, I_Lite or II_Lite)
        framework: string
            Deep learning framework to use (Keras, TensorFlow, TensorFlow Lite or PyTorch)
        resolution: int
            Input height and width of model to utilize
        lite: boolean
            Defines if EfficientPose Lite model is used
            
    Returns:
        Predicted pose coordinates in all frames of the supplied video.
    """

    # Define batch size and number of batches in each part
    batch_size = 1 if framework in ['tensorflowlite', 'tflite'] else 49
    part_size = 490 if framework in ['tensorflowlite', 'tflite'] else 10

    # Load video
    from skvideo.io import vreader, ffprobe
    start_time = time.time()
    try:
        videogen = vreader(file_path)
        video_metadata = ffprobe(file_path)['video']
        num_video_frames = int(video_metadata['@nb_frames'])
        num_batches = int(np.ceil(num_video_frames / batch_size))
        frame_height, frame_width = next(vreader(file_path)).shape[:2]
    except:
        print(
            '\n##########################################################################################################'
        )
        print(
            'Video "{0}" could not be loaded. Please verify that the file is working.'
            .format(file_path))
        print(
            '##########################################################################################################\n'
        )
        return False

    # Operate on batches
    coordinates = []
    batch_num = 1
    part_start_time = time.time()
    print(
        '\n##########################################################################################################'
    )
    while True:

        # Fetch batch of frames
        batch = [next(videogen, None) for _ in range(batch_size)]
        if not type(batch[0]) == np.ndarray:
            break
        elif not type(batch[-1]) == np.ndarray:
            batch = [
                frame if type(frame) == np.ndarray else np.zeros(
                    (frame_height, frame_width, 3)) for frame in batch
            ]

        # Preprocess batch
        batch = helpers.preprocess(batch, resolution, lite)

        # Perform inference
        batch_outputs = infer(batch, model, lite, framework)

        # Extract coordinates for batch
        batch_coordinates = [
            helpers.extract_coordinates(batch_outputs[n, ...], frame_height,
                                        frame_width) for n in range(batch_size)
        ]
        coordinates += batch_coordinates

        # Print partial processing time
        if batch_num % part_size == 0:
            print(
                '{0} of {1}: Part processed in {2} seconds | Video processed for {3} seconds'
                .format(int(batch_num / part_size),
                        int(np.ceil(num_batches / part_size)),
                        '%.3f' % (time.time() - part_start_time),
                        '%.3f' % (time.time() - start_time)))
            part_start_time = time.time()
        batch_num += 1

    # Print total processing time
    print('{0} of {0}: Video processed in {1} seconds'.format(
        int(np.ceil(num_batches / part_size)),
        '%.3f' % (time.time() - start_time)))
    print(
        '##########################################################################################################\n'
    )

    return coordinates[:num_video_frames]
Esempio n. 23
0
def tagVideo(modelpath, videopath, outputPath=None):
    """ detect if persons in video are wearing masks or not
    """
    model = MaskDetector()
    model.load_state_dict(torch.load(modelpath)['state_dict'], strict=False)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    model.eval()

    faceDetector = FaceDetector(
        prototype='./models/deploy.prototxt.txt',
        model='./models/res10_300x300_ssd_iter_140000.caffemodel',
    )

    transformations = Compose([
        ToPILImage(),
        Resize((100, 100)),
        ToTensor(),
    ])

    if outputPath:
        writer = FFmpegWriter(str(outputPath))

    # fontC = 'simsun.ttc'
    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.FONT_HERSHEY_SIMPLEX
    cv2.namedWindow('main', cv2.WINDOW_NORMAL)
    labels = ['No mask', 'Mask']
    labelColor = [
        (255, 255, 255), (10, 255, 0)
    ]  # Can have a different color for predicted with mask or without
    for frame in vreader(str(videopath)):
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        faces = faceDetector.detect(frame)
        for face in faces:
            xStart, yStart, width, height = face

            # clamp coordinates that are outside of the image
            xStart, yStart = max(xStart, 0), max(yStart, 0)

            # predict mask label on extracted face
            faceImg = frame[yStart:yStart + height, xStart:xStart + width]
            output = model(transformations(faceImg).unsqueeze(0).to(device))
            _, predicted = torch.max(output.data, 1)

            # draw face frame
            cv2.rectangle(frame, (xStart, yStart),
                          (xStart + width, yStart + height), (255, 255, 255),
                          thickness=2)

            # draw the prediction label in CHINESE

            imgNoMask = np.zeros([20, 40, 3], dtype=np.uint8)
            imgMask = np.zeros([20, 20, 3], dtype=np.uint8)
            imgNoMask.fill(255)
            imgMask.fill(255)

            b, g, r, a = 0, 0, 0, 0
            if predicted == 0:
                img = cv2ImgAddText(imgNoMask, "没有", 3, 3, (b, g, r), 15)
            else:
                img = cv2ImgAddText(imgMask, "有", 3, 3, (b, g, r), 15)
            img_height, img_width, _ = img.shape
            frame[
                yStart:yStart + img_height, xStart:xStart +
                img_width] = img  # Replace the top corner left with the image of Chinese words

            # Add the Prediction Label in ENGLISH according to the face frame

            # center text according to the face frame
            textSize = cv2.getTextSize(labels[predicted], font, 1, 2)[0]
            textX = xStart + width // 2 - textSize[0] // 2

            # draw prediction label
            cv2.putText(frame, labels[predicted], (textX + 40, yStart + 20),
                        font, 0.5, labelColor[predicted], 1)

        if outputPath:
            writer.writeFrame(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
        cv2.imshow('main', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    if outputPath:
        writer.close()
    cv2.destroyAllWindows()
Esempio n. 24
0
def extract_db(config, dir_meta, cameras):
    """

    :param config: config from config.yaml
    :param dir_meta: determine extraction of train or test
    :param cameras:
    :return:
    """
    dataset = []  # all images of train or test
    all_joints = dt.vicon_joints
    for dir, meta in dir_meta:  # one action contains 8 cam views
        meta_sub = meta['subject']
        meta_act = config['action_reverse_map'][
            meta['action']]  # action string name
        meta_subact = meta['subaction']

        gt_pos_path = os.path.join(dir, 'gt_skel_gbl_pos.txt')
        gt_ori_path = os.path.join(dir, 'gt_skel_gbl_ori.txt')
        calib_imu_bone_path = os.path.join(
            dir, 's{}_{}{}_calib_imu_bone.txt'.format(meta_sub, meta_act,
                                                      meta_subact))
        calib_imu_ref_path = os.path.join(
            dir, 's{}_{}{}_calib_imu_ref.txt'.format(meta_sub, meta_act,
                                                     meta_subact))
        imu_data_path = os.path.join(
            dir, 's{}_{}{}_Xsens.sensors'.format(meta_sub, meta_act,
                                                 meta_subact))
        bvh_path = os.path.join(
            dir, '{}{}_BlenderZXY_YmZ.bvh'.format(meta_act, meta_subact))
        gt_pos = dt.parse_vicon_gt_pos(gt_pos_path)
        gt_ori = dt.parse_vicon_gt_ori(gt_ori_path)
        imu_data = dt.parse_sensor_6axis(imu_data_path)
        calib_imu_bone = dt.parse_calib_imu_bone(calib_imu_bone_path)
        calib_imu_ref = dt.parse_calib_imu_ref(calib_imu_ref_path)
        bone_info = dt.parse_imu_bone_info(bvh_path)
        canvas_size = (1079., 1919.)  # height width

        filtered_joints = config['joints_filter']

        # bone vector / orientation, not camera related
        bones = [
            'Head', 'Sternum', 'Pelvis', 'L_UpArm', 'R_UpArm', 'L_LowArm',
            'R_LowArm', 'L_UpLeg', 'R_UpLeg', 'L_LowLeg', 'R_LowLeg'
        ]
        # obtain ref for all bones
        bone_refs = dict()
        for bone in bones:
            joint_p = bone_info[bone][0]
            joint_c = bone_info[bone][1]
            bone_vec = np.array(bone_info[bone][2]) * 25.4
            q_TI = calib_imu_ref[bone]
            q_bi = calib_imu_bone[bone]
            q_TI = Quaternion(q_TI)
            q_bi = Quaternion(q_bi)
            q_ib = q_bi.conjugate
            bone_refs[bone] = {
                'joint_p': joint_p,
                'joint_c': joint_c,
                'bone_vec': bone_vec,
                'q_TI': q_TI,
                'q_ib': q_ib
            }

        bone_vectors = dict()  # of all frames

        for c in range(8):
            mp4_file_name = 'TC_S{}_{}{}_cam{}.mp4'.format(
                meta_sub, meta_act, meta_subact, c + 1)
            mp4_file_path = os.path.join(dir, mp4_file_name)
            cam = cameras[c]
            vid_info = ffprobe(mp4_file_path)
            vid_frame_num = int(vid_info['video']['@nb_frames'])

            # print(mp4_file_name, vid_info['video']['@nb_frames'], len(gt_pos)- int(vid_info['video']['@nb_frames']),
            #       vid_info['video']['@bit_rate'])

            out_path = os.path.join(config['db_out_dir'], 'marked')
            out_path = os.path.join(
                out_path, 'sub{}_{}_{}_cam{}'.format(meta_sub, meta_act,
                                                     meta_subact, c + 1))
            if config['save_visualization']:
                if not os.path.exists(out_path):
                    os.makedirs(out_path)

            # where to save extract frames
            seq_dir_name = 's_{:0>2}_act_{:0>2}_subact_{:0>2}_ca_{:0>2}'.format(
                meta_sub, meta['action'], meta_subact, c + 1)
            seq_dir_path = os.path.join(config['db_out_dir'], seq_dir_name)
            if config['save_frame']:
                if not os.path.exists(seq_dir_path):
                    os.makedirs(seq_dir_path)

            vid_ff = vreader(mp4_file_path)
            min_frame_to_iter = min(vid_frame_num, len(gt_pos), len(gt_ori),
                                    len(imu_data))
            for idx in tqdm(range(min_frame_to_iter)):
                pose3d = np.zeros([3, len(all_joints)])
                for idx_j, j in enumerate(all_joints):
                    pose3d[:, idx_j] = gt_pos[idx][j]
                pose3d = pose3d * 0.0254  # inch to meter
                pose2d = project_pose3d_to_2d(pose3d, cam, do_distor_corr=True)

                if config['save_visualization'] or config['save_frame']:
                    aframe = next(vid_ff)
                if config[
                        'save_visualization']:  # skeleton visualization save to disk
                    out_file_path = os.path.join(out_path,
                                                 '{:0>6d}.jpg'.format(idx))
                    marked_img = _visualize_one_frame(
                        aframe, pose2d)  # todo vis box on image
                    img_4save = cv2.cvtColor(marked_img, cv2.COLOR_RGB2BGR)
                    cv2.imwrite(out_file_path, img_4save)

                # cropping box information
                p2d, p3d_cam, p3d, vis = filter_and_project_2d_pose(
                    gt_pos[idx],
                    filtered_joints,
                    cam,
                    canvas_size,
                    do_distor_corr=True)
                mvpose_vis = np.reshape([vis / 2., vis / 2., vis / 2.],
                                        (3, -1))
                # vis follow coco protocol, divide 2 and copy 3 times to follow mvpose
                root_joint = project_pose3d_to_cam(
                    np.reshape(gt_pos[idx]['Hips'], (3, -1)) * 0.0254, cam)
                tl_joint = np.copy(root_joint)  # shape (3,1)
                br_joint = np.copy(root_joint)
                tl_joint[0, 0] -= 1.0000
                tl_joint[1, 0] -= 0.9000
                br_joint[0, 0] += 1.0000
                br_joint[1, 0] += 1.1000
                bbox_25d = np.concatenate((root_joint, tl_joint, br_joint),
                                          axis=1)
                bbox = project_cam_to_uv(
                    bbox_25d, cam,
                    do_distor_corr=True)  # contain 3 point: center, tl, br

                box_center = tuple(bbox[:, 0])  # (x, y)
                box_scale = tuple((bbox[:, 2] - bbox[:, 1]) / 200.)
                box = tuple(np.concatenate([bbox[:, 2], bbox[:, 1]
                                            ]))  # (x_tl, y_tl, x_br, y_br)

                frame_file_name = '{:0>6d}.jpg'.format(idx)
                frame_file_path = os.path.join(seq_dir_path, frame_file_name)
                if config['save_frame']:  # save video frame to disk
                    frame_to_cv = cv2.cvtColor(aframe, cv2.COLOR_RGB2BGR)
                    cv2.imwrite(frame_file_path, frame_to_cv)

                # notice: Difference between totalcapture and h36m project,
                # (1) joints_3d in mm
                # (2) camera['T'] in mm
                # (3) in totalcapture: point_Camera = R.dot(point_Tracking) + T (point and T in m);
                #     in h36m: point_Camera = R.dot(point_Tracking - T)  (point and T in mm)
                #     aka in h36m: point_Tracking = R^{-1}.dot(point_Camera) + T
                # (4) coordinates shape is (num_cords, 3), aka row vector, but I like col vector more
                cam_in_h36m_format = copy.deepcopy(cam)
                cam_in_h36m_format['R'] = cam_in_h36m_format['R']
                cam_in_h36m_format['T'] = cam_in_h36m_format['T'] * 1000. * (
                    -1.)
                cam_in_h36m_format['T'] = cam_in_h36m_format['R'].T.dot(
                    cam_in_h36m_format['T'])
                del cam_in_h36m_format['intri_mat']
                del cam_in_h36m_format['extri_mat']

                # bone vector
                # avoid parsing in each view, only in first view
                if idx not in bone_vectors:
                    bone_vector_of_one_frame = dict()
                    for bone in bones:
                        q_TI = bone_refs[bone]['q_TI']
                        q_ib = bone_refs[bone]['q_ib']
                        bone_vec = bone_refs[bone]['bone_vec']

                        ori = imu_data[idx][bone][0]
                        q_Ii = Quaternion(ori)
                        q_Tb = q_TI * q_Ii * q_ib
                        rotated_bone_vec = q_Tb.rotate(bone_vec)
                        bone_vector_of_one_frame[bone] = rotated_bone_vec
                    bone_vectors[idx] = bone_vector_of_one_frame

                dataitem = {
                    'image': os.path.join(seq_dir_name,
                                          '{:0>6d}.jpg'.format(idx)),
                    'joints_2d': p2d.T,
                    'joints_3d':
                    (p3d_cam *
                     1000.).T,  # 3d pose in camera frame, for psm evaluation
                    'joints_vis': mvpose_vis.T,  # 0: in-visible, 1: visible.
                    'center': box_center,
                    'scale': box_scale,
                    'box': box,
                    'video_id': mp4_file_name,  # mp4 file name  # todo
                    'image_id': idx,
                    'subject': meta['subject'],
                    'action': meta['action'],
                    'subaction': meta['subaction'],
                    'camera_id': c,  # start from 0
                    'camera': cam_in_h36m_format,
                    'source': 'totalcapture',
                    'bone_vec': bone_vectors[idx],
                    'joints_gt': p3d.T * 1000.  # groundtruth in tracking frame
                }

                dataset.append(dataitem)

    return dataset
Esempio n. 25
0
                            len(argv) - 2, \
                            desc='Files', \
                            ), \
                         argv[2:]):

        # Loading source video
        VIDEO_PATH = abspath(source)

        if not os.access(VIDEO_PATH, os.R_OK):
            print('Internal error, source file is not avaliable')
            exit(1)
        METADATA = get_metdata(source)
        VIDEO_FRAMES_COUNT = int(METADATA["@nb_frames"])
        VIDEO_TIME_BASE = eval(METADATA["@codec_time_base"])
        # VIDEO_FRAMES_COUNT = 100000
        cap = vreader(VIDEO_PATH)

        L = []
        diffs = [0]
        frames = []

        # Hashing
        for _, frame in zip(trange(VIDEO_FRAMES_COUNT, leave=False), cap):
            hash_img = int(phash64(frame))
            if len(L):
                diffs.append(hamming(hash_img, L[-1]))
            L.append(hash_img)
            frames.append(frame)

        # Get scene segmentation and their hash
        scenes = get_joly_scenes_sementation(frames, nb_std_above_mean_th=2.)#get_scenes_segmentation(diffs, nb_std_above_mean_th=2.5)
Esempio n. 26
0
import skvideo.io as skv
import numpy as np
import os

assert os.path.isfile('computation_recorded.mp4')

video = skv.vreader('computation_recorded.mp4')
writer = skv.FFmpegWriter('video_corrected.mp4')
for frame in video:
    writer.writeFrame(np.flip(frame, axis=-1))
Esempio n. 27
0
def run_demo(cuda, record, vfile):
    model = 'models/21styles.params'
    ngf = 128
    style_size = 512
    style_folder = 'images/styles/'
    mirror = False
    vDir = './video/'
    vPath = vDir + vfile
    oFile = 'output21-' + vfile
    wM, hM = 640, 480
    if cuda:
        ctx = mx.gpu(0)
        os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
    else:
        ctx = mx.cpu(0)
    style_loader = StyleLoader(style_folder, style_size, ctx)
    style_model = Net(ngf=ngf)
    style_model.load_parameters(model, ctx=ctx)
    metadata = ffprobe(vPath)
    fps = metadata["video"]["@avg_frame_rate"]
    #	print(json.dumps(metadata["video"], indent=4))
    w, h = int(metadata["video"]["@width"]), int(metadata["video"]["@height"])
    downsize = h > hM
    if downsize:
        w = 2 * int(w * hM / h / 2)
        h = hM


#	downsize = w > wM
#	if downsize :
#		h = 2 * int(h * wM / w / 2); w = wM
    swidth = int(w / 4)
    sheight = int(h / 4)
    wName = vfile + '  STYLIZED VIDEO   fps:' + fps + '  W:' + str(
        w) + '  H:' + str(h)
    if record:
        out = FFmpegWriter(vDir + oFile,
                           inputdict={
                               '-r': str(fps),
                               '-s': '{}x{}'.format(2 * w, h)
                           },
                           outputdict={
                               '-r': str(fps),
                               '-c:v': 'h264'
                           })
    key, idx = 0, 0
    cv2.namedWindow(wName, cv2.WINDOW_NORMAL)
    cv2.resizeWindow(wName, 2 * w, h)
    for img in vreader(vPath):
        idx += 1
        if downsize:
            img = cv2.resize(img, (w, h), interpolation=cv2.INTER_AREA)
        if mirror:
            img = cv2.flip(img, 1)
        cimg = img.copy()
        img = np.array(img).transpose(2, 0, 1).astype(float)
        img = F.expand_dims(mx.nd.array(img, ctx=ctx), 0)
        # changing styles
        if idx % 50 == 1:
            style_v = style_loader.get(int(idx / 20))
            style_model.set_target(style_v)
        img = style_model(img)

        simg = np.squeeze(style_v.asnumpy())
        simg = simg.transpose(1, 2, 0).astype('uint8')
        img = F.clip(img[0], 0, 255).asnumpy()
        img = img.transpose(1, 2, 0).astype('uint8')

        # display
        simg = cv2.resize(simg, (swidth, sheight),
                          interpolation=cv2.INTER_CUBIC)
        cimg[0:sheight, 0:swidth, :] = simg
        img = np.concatenate((cimg, cv2.cvtColor(img, cv2.COLOR_BGR2RGB)),
                             axis=1)
        if record:
            out.writeFrame(img)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        cv2.imshow(wName, img)
        key = cv2.waitKey(1)
        if key == 27:  # Esc
            break
    if record:
        out.close()
        transferAudio(vPath, vDir, oFile)
        print("Done OK. Created Stylised Video file", vDir + oFile)
        print("fps :", fps, "    W:", w, " H:", h)
    cv2.destroyAllWindows()
Esempio n. 28
0
def video_classify(video_dict):
    if not os.path.exists(image_temp):
        os.mkdir(image_temp)

    if not os.path.exists(video_temp):
        os.mkdir(video_temp)

    conclusion_dict = {}

    for video_name in video_dict:
        video_id = video_name.split('.')[0]

        video_path = os.path.join(video_temp, video_name)
        if not os.path.exists(video_path):
            continue

        image_video_temp = os.path.join(image_temp, video_id)
        if not os.path.exists(image_video_temp):
            os.mkdir(image_video_temp)
        else:
            shutil.rmtree(image_video_temp)
            os.mkdir(image_video_temp)

        metadata = ffprobe(video_path)
        avg_frame_rate = metadata["video"]["@avg_frame_rate"].split('/')
        rate = int(avg_frame_rate[0]) / int(avg_frame_rate[1])
        video = vreader(video_path)

        index = 0
        for frame in video:
            if index % (rate * interval) == 0:
                img = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                img_name = str(index / rate) + '.jpg'
                img_path = os.path.join(image_video_temp, img_name)
                cv2.imwrite(img_path, img)
            index += 1

        # fetch all image score.
        safe_count = 0
        danger_count = 0
        warning_count = 0
        middle_count = 0
        danger_list = []
        warning_list = []
        scores_list = np.array([])

        image_files = os.listdir(image_video_temp)
        for image_name in image_files:
            second = image_name.split('.')[0]
            image_path = os.path.join(image_video_temp, image_name)
            image_data = open(image_path).read()
            scores = caffe_preprocess_and_compute(
                image_data,
                caffe_transformer=caffe_transformer,
                caffe_net=nsfw_net,
                output_layers=['prob'])
            if scores[1] > 0.8:
                danger_count += 1
                danger_list.append(second)
            elif scores[1] > 0.5:
                warning_count += 1
                middle_count += 1
                warning_list.append(second)
            elif scores[1] > 0.2:
                middle_count += 1
            else:
                safe_count += 1
            scores_list = np.append(scores_list, scores[1])
        # Scores is the array containing SFW / NSFW image probabilities
        # scores[1] indicates the NSFW probability

        conclusion_dict[video_name] = {
            'url': video_dict[video_name],
            'name': video_id,
            'extension': video_name.split('.')[-1],
            'total_count': scores_list.shape[0],
            'danger_count': danger_count,
            'warning_count': warning_count,
            'danger_second': danger_list,
            'warning_second': warning_list,
        }
        print(
            "video name: %s, total: %d, danger_count: %d, warning_count: %d" %
            (video_name, scores_list.shape[0], danger_count, warning_count))
        os.remove(video_path)
        shutil.rmtree(image_video_temp)
    return conclusion_dict