Ejemplo n.º 1
0
import cv2
import numpy as np
from ffpyplayer.player import MediaPlayer

cap = cv2.VideoCapture("Dog.mp4")
sourcePath = "Dog.mp4"
player = MediaPlayer(sourcePath)

width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter("dog_out_1.mp4", fourcc, 30, (width, height))

while True:
    ret, frame = cap.read()
    audio_frame, val = player.get_frame()
    # frame2_resized = cv2.resize(frame2, (500, 500))
    frame_resized = cv2.resize(frame, (width // 2, height // 2))
    rows, columns, channels = frame_resized.shape
    R = cv2.getRotationMatrix2D((columns / 2, rows / 2), 270, 0.5)
    frame2_rot = cv2.warpAffine(frame_resized, R, (columns, rows))
    # img_resize = cv2.resize(img, (width, height))
    # together = cv2.addWeighted(img_resize, 0.25, frame, 1, 0, frame)
    # cv2.imshow('test', frame)
    cv2.imshow("bvid", frame2_rot)
    if (cv2.waitKey(1) & 0xFF == ord('q')):
        break
    # out.write(frame)
    out.write(frame2_rot)
Ejemplo n.º 2
0
    def row_keyword_mapper(self, psm=11, oem=3, sorting='rows'):

        ### Maps text in each bounding boxes for file names
        if sorting == 'rows':
            self.key_mapping_l2r = dict()

            for key in self.mapping_l2r.keys():
                self.img = cv2.imread(key)
                print(key)
                height, width = self.img.shape[:2]
                video = None
                if CONF.write_video:
                    fourcc = cv2.VideoWriter_fourcc(*'XVID')
                    fname = extract_basename(key)
                    video = cv2.VideoWriter(
                        'tess_{}.avi'.format(fname.split('.')[0]), fourcc,
                        float(1), (width, height))
                # print(height, width)
                # self.img = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB)
                box = self.mapping_l2r[key]
                key_word = []
                ###### Parallelization can be used
                box.sort_row_wise()
                img = self.img.copy()
                key_word = self.func(box, video=video)

                ####################################
                #                 for row in box.rows:
                #                     words = []
                #                     for box in row:
                #                         x, y, w, h = box
                #                         # img2 = self.img[y-3:y+h+3, x-3:x+w+3, :]
                #                         img2 = self.img[y:y+h, x:x+w, :]
                #                         text = pytesseract.image_to_string(img2, output_type=pytesseract.Output.DICT, lang='eng', config='-c  tessedit_char_whitelist="$%@.,&():ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789/\\ "  --psm {} --oem {}'.format(psm, oem))
                #                         # print(text)
                #                         text = text['text']
                #                         # if text == '':
                #                         #     plt.imshow(img2, cmap='gray')
                #                         #     plt.show()
                #                         #     li = [img2]
                #                         #     text = self.recognizer.recognize(li)
                #                         #     # text = self.recognizer.recognize_from_boxes([self.img], [[[x, y, w, h]]])
                #                         #     print(text)
                #                         img = cv2.rectangle(img,(x,y),(x+w,y+h),(0, 255, 0),2)

                #                         if CONF.write_video:
                #                             text = text.split('\n')
                #                             text = [t.strip('\n') for t in text]
                #                             text = [t for t in text if t != '']
                #                             _text = copy.deepcopy(text)
                #                             text = ' '.join(text)
                #                             for t in _text:
                #                                 temp_img = img.copy()
                #                                 image = cv2.putText(temp_img, t, (x, y), font, fontscale, color, thickness, cv2.LINE_AA)
                #                                 video.write(image)
                #                         # plt.imshow(image)
                #                         # plt.show()
                #                         words.append(text)
                #                     key_word.append(words)
                #                     ################################
                self.key_mapping_l2r[key] = key_word
                cv2.imwrite('out.jpg', img)
        else:
            self.key_mapping_t2b = dict()
            for key in self.mapping_t2b.keys():
                self.img = cv2.imread(key)
                box = self.mapping_t2b[key]
                key_word = []
                ###### Parallelization can be used
                box.sort_col_wise()
                for row in box.rows:
                    words = []
                    for box in row:
                        x, y, w, h = box
                        img2 = self.img[y - 3:y + h + 3, x - 3:x + w + 3, :]
                        text = pytesseract.image_to_string(
                            img2,
                            output_type=pytesseract.Output.DICT,
                            lang='eng',
                            config=CONF.TESS_CONFIG)
                        #                         text = pytesseract.image_to_string(img2, output_type=pytesseract.Output.DICT, lang='eng', config='-c  tessedit_char_whitelist="$%@&():ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789/\\\n "  --psm {} --oem {}'.format(psm, oem))

                        text = text['text'].split('\n')
                        text = [t.strip('\n') for t in text]
                        text = [t for t in text if t != '']
                        words.append(text)
                    key_word.append(words)
                self.key_mapping_t2b[key] = key_word
Ejemplo n.º 3
0
params['model_pose'] = "BODY_25"
# params['model_pose'] = "COCO" #18 keypoints
# params['model_pose'] = "MPI" # 15 keypoints
# params['model_pose'] = "MPI_4_layers" # 15 keypoints, less accurate but faster
# params['net_resolution']="-1x368" # default, best balance, 16*23

assert(os.path.exists(args[0].video)),'Video Path given does not exist!'
cap = cv2.VideoCapture(args[0].video)
frame_w = cap.get(3)
frame_h = cap.get(4)
vid_fps = cap.get(5)

basename = os.path.basename(args[0].video).split('.')[0]

fourcc = cv2.VideoWriter_fourcc('H','2','6','4')
out_vid = cv2.VideoWriter(basename+'_cc.avi',fourcc, vid_fps, (int(frame_w), int(frame_h)))


ratio = frame_w / float(frame_h)
# net_height_mult = 69 #set this
# net_height_mult = 60 #set this
net_height_mult = 45 #set this
# net_height_mult = 23 #set this
net_height = net_height_mult * 16
net_width = net_height*ratio
net_width = int((net_width // 16 + 1) * 16)

params['net_resolution']='{:0}x{:0}'.format(net_width, net_height)
print("Net Resolution Set: ", params['net_resolution'])
# params['net_resolution']="-1x1072" # 16*51
params['scale_number'] = 1
Ejemplo n.º 4
0
from picamera.array import PiRGBArray
from picamera import PiCamera
import time

# initialize the camera and grab a reference to the raw camera capture
resX = 240
resY = 180
camera = PiCamera()
camera.resolution = (resX, resY)
camera.framerate = 10
rawCapture = PiRGBArray(camera, size=(resX, resY))

print(time.strftime("%H_%M_%S"))
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter(
    time.strftime("%H_%M_%S") + '.avi', fourcc, 20.0, (resX, resY))

# initialize the HOG descriptor/person detector
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
detectFlag = 0
detectCounter = [0]
# allow the camera to warmup
time.sleep(0.1)

GPIO.setmode(GPIO.BOARD)

GPIO.setup(16, GPIO.OUT)


def classfier(testImage, threadNum, capTime, detectCounter):
import numpy as np
import cv2

from utils import CFEVideoConf, image_resize

cap = cv2.VideoCapture(0)

save_path = 'saved-media/glasses_and_stash.mp4'
frames_per_seconds = 24
config = CFEVideoConf(cap, filepath=save_path, res='720p')
out = cv2.VideoWriter(save_path, config.video_type, frames_per_seconds,
                      config.dims)
face_cascade = cv2.CascadeClassifier(
    'cascades/data/haarcascade_frontalface_default.xml')
eyes_cascade = cv2.CascadeClassifier(
    'cascades/third-party/frontalEyes35x16.xml')
nose_cascade = cv2.CascadeClassifier('cascades/third-party/Nose18x15.xml')
glasses = cv2.imread("images/fun/glasses.png", -1)
mustache = cv2.imread('images/fun/mustache.png', -1)

while (True):
    # Capture frame-by-frame
    ret, frame = cap.read()
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray,
                                          scaleFactor=1.5,
                                          minNeighbors=5)

    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)

    for (x, y, w, h) in faces:
Ejemplo n.º 6
0
def detect_realtime(Yolo,
                    output_path,
                    input_size=416,
                    show=False,
                    CLASSES=YOLO_COCO_CLASSES,
                    score_threshold=0.3,
                    iou_threshold=0.45,
                    rectangle_colors=''):
    times = []
    vid = cv2.VideoCapture(0)

    # by default VideoCapture returns float instead of int
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(vid.get(cv2.CAP_PROP_FPS))
    codec = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, codec, fps,
                          (width, height))  # output_path must be .mp4

    while True:
        _, frame = vid.read()

        try:
            original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB)
        except:
            break
        image_data = image_preprocess(np.copy(original_frame),
                                      [input_size, input_size])
        #image_data = tf.expand_dims(image_data, 0)
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        if YOLO_FRAMEWORK == "tf":
            pred_bbox = Yolo.predict(image_data)
        elif YOLO_FRAMEWORK == "trt":
            batched_input = tf.constant(image_data)
            result = Yolo(batched_input)
            pred_bbox = []
            for key, value in result.items():
                value = value.numpy()
                pred_bbox.append(value)

        t2 = time.time()

        pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
        pred_bbox = tf.concat(pred_bbox, axis=0)

        bboxes = postprocess_boxes(pred_bbox, original_frame, input_size,
                                   score_threshold)
        bboxes = nms(bboxes, iou_threshold, method='nms')

        times.append(t2 - t1)
        times = times[-20:]

        ms = sum(times) / len(times) * 1000
        fps = 1000 / ms

        print("Time: {:.2f}ms, {:.1f} FPS".format(ms, fps))

        frame = draw_bbox(original_frame,
                          bboxes,
                          CLASSES=CLASSES,
                          rectangle_colors=rectangle_colors)
        image = cv2.putText(frame, "Time: {:.1f}FPS".format(fps), (0, 30),
                            cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

        if output_path != '': out.write(frame)
        if show:
            cv2.imshow('output', frame)
            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break

    cv2.destroyAllWindows()
Ejemplo n.º 7
0
        cv2.putText(img,
                    text=text,
                    org=(x, y),
                    fontScale=font_scale,
                    fontFace=font_face,
                    thickness=thickness,
                    color=color,
                    lineType=line_type)
        y += 60

    # if you don't want to save the output as a video, set this to False
    save_video = True

    if save_video:
        if vw is None:
            codec = cv2.VideoWriter_fourcc(*'DIVX')
            vid_width_height = img.shape[1], img.shape[0]
            vw = cv2.VideoWriter(mnist_prediction_path, codec, 30,
                                 vid_width_height)
        # 15 fps above doesn't work robustly so we right frame twice at 30 fps
        vw.write(img)
        vw.write(img)

    # scale down image for display
    img_disp = cv2.resize(img, (0, 0), fx=0.5, fy=0.5)
    cv2_imshow(img_disp)
    IPython.display.clear_output(wait=True)

cap.release()
if vw is not None:
    vw.release()
    NMS_THRESH = 0.15

    print args.video_path
    if not os.path.exists(args.video_path):
        print 'Video does not exist.'

    video = cv2.VideoCapture(args.video_path)

    # Get width, height
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))  # float
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))  # float

    # Define the codec and create VideoWriter object
    # TODO: The videos I am using are 30fps, but you should programmatically get this.
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string,
                          fourcc, 30.0, (width, height))

    n_frame = 1
    # TODO: add time function per frame.
    while (True):
        ret, frame = video.read()

        if ret == True:
            # frame is BGR cv2 image.
            # # Detect all object classes and regress object bounds
            scores, boxes = im_detect(net, frame)

            cls_ind = 1
            cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            dets = np.hstack(
Ejemplo n.º 9
0
@author: Royzon
@contact: [email protected]
@time: 19-1-6
'''
import cv2
import face_recognition
import numpy as np
import linecache
import time
from face_register import get_face_encoding
width, height = 768, 432
# fourcc = cv2.VideoWriter_fourcc(*'XVID')
# fourcc = cv2.VideoWriter_fourcc(*'X264')
fourcc = cv2.VideoWriter_fourcc(*"MJPG")
# fourcc = cv2.VideoWriter_fourcc('M', 'P', '4', '2')
out = cv2.VideoWriter('./data/output.mp4', fourcc, 20.0, (width, height))

choos = input("weither to register face (y/n)")
if choos == 'y':
    get_face_encoding()
else:
    print('go next steps...')

recog = input("weither to recognize face (y/n):")
if recog == 'y':
    face_file = "./data/face.txt"
    rec_name = ""
    rec_id_num = ""
    total_name = []
    total_id_num = []
    total_face_encoding = []
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 27 23:13:59 2019

@author: Ahmed khaled
"""

import numpy as np
import cv2
import pyscreenshot as pys

forcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output.avi', forcc, 8, (1920, 1080))

while True:
    img = pys.grab()
    img_np = np.array(img)

    #frame= cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY)

    cv2.imshow('Screen', img_np)
    out.write(img_np)

    if cv2.waitKey(20) & 0xFF == ord('q'):
        break

out.release()
cv2.destroyAllWindows()
Ejemplo n.º 11
0
          (total_files, filetype))

# Iterate over files of given type in input directory
for c, filename in enumerate(
    [f for f in os.listdir(directory) if f.endswith(filetype)]):
    print("Processing file '%s' (%s of %s)." % (filename, c + 1, total_files))
    video = cv2.VideoCapture(filename)

    # Gather info about input video
    fps = int(video.get(cv2.CAP_PROP_FPS))
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Define the codec and create VideoWriter object for output
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    fn, ext = os.path.splitext(os.path.basename(filename))
    out = cv2.VideoWriter("%s/%s_%s%s" % (output_directory, fn, suffix, ext),
                          fourcc, fps, (width, height))

    # Flip video frame by frame and write to output file
    while (video.isOpened()):
        ret, frame = video.read()
        if ret:
            frame = cv2.flip(frame, 1)
            out.write(frame)
        else:
            break

    video.release()
    out.release()
Ejemplo n.º 12
0
def _main_(args):
    config_path = args.conf
    input_path = args.input
    output_path = args.output

    with open(config_path) as config_buffer:
        config = json.load(config_buffer)

    makedirs(output_path)

    ###############################
    #   Set some parameter
    ###############################
    net_h, net_w = 1088, 1920  # 416, 416   # a multiple of 32, the smaller the faster
    obj_thresh, nms_thresh = 0.5, 0.45  # 0.5, 0.45

    ###############################
    #   Load the model
    ###############################
    os.environ['CUDA_VISIBLE_DEVICES'] = config['valid']['gpus']
    #infer_model = load_model(config['train']['saved_weights_name'])
    infer_model = load_model(
        str(
            os.path.join(config['train']['tensorboard_dir'],
                         config['train']['saved_weights_name'] +
                         '-backup.h5')))

    ###############################
    #   Predict bounding boxes
    ###############################
    if 'webcam' in input_path:  # do detection on the first webcam
        video_reader = cv2.VideoCapture(0)

        # the main loop
        batch_size = 1
        images = []
        while True:
            ret_val, image = video_reader.read()
            if ret_val == True: images += [image]

            if (len(images) == batch_size) or (ret_val == False
                                               and len(images) > 0):
                batch_boxes = get_yolo_boxes(infer_model, images, net_h, net_w,
                                             config['model']['anchors'],
                                             obj_thresh, nms_thresh)

                for i in range(len(images)):
                    draw_boxes(images[i], batch_boxes[i],
                               config['model']['labels'], obj_thresh)
                    cv2.imshow('video with bboxes', images[i])
                images = []
            if cv2.waitKey(1) == 27:
                break  # esc to quit
        cv2.destroyAllWindows()
    elif input_path[-4:] == '.mp4':  # do detection on a video
        video_out = output_path + input_path.split('/')[-1]
        video_reader = cv2.VideoCapture(input_path)

        nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))

        video_writer = cv2.VideoWriter(video_out,
                                       cv2.VideoWriter_fourcc(*'MPEG'), 50.0,
                                       (frame_w, frame_h))
        # the main loop
        batch_size = 1
        images = []
        start_point = 0  #%
        show_window = False
        #for i in tqdm(range(nb_frames)):
        for i in range(nb_frames):
            _, image = video_reader.read()

            if (float(i + 1) / nb_frames) > start_point / 100.:
                images += [image]

                if (i % batch_size == 0) or (i == (nb_frames - 1)
                                             and len(images) > 0):
                    # predict the bounding boxes
                    batch_boxes = get_yolo_boxes(infer_model, images, net_h,
                                                 net_w,
                                                 config['model']['anchors'],
                                                 obj_thresh, nms_thresh)

                    for i in range(len(images)):
                        # draw bounding boxes on the image using labels
                        draw_boxes(images[i], batch_boxes[i],
                                   config['model']['labels'], obj_thresh)

                        # show the video with detection bounding boxes
                        if show_window:
                            cv2.imshow('video with bboxes', images[i])

                        # write result to the output video
                        video_writer.write(images[i])
                    images = []
                if show_window and cv2.waitKey(1) == 27: break  # esc to quit

        if show_window: cv2.destroyAllWindows()
        video_reader.release()
        video_writer.release()
    else:  # do detection on an image or a set of images
        image_paths = []

        if os.path.isdir(input_path):
            for inp_file in os.listdir(input_path):
                image_paths += [os.path.join(input_path, inp_file)]
        else:
            image_paths += [input_path]

        image_paths = [
            inp_file for inp_file in image_paths
            if (inp_file[-4:] in ['.jpg', '.png', 'JPEG', '.JPG', '.PNG'])
        ]

        # the main loop
        for image_path in image_paths:
            image = cv2.imread(image_path)
            iid = os.path.basename(image_path)

            # predict the bounding boxes
            boxes = get_yolo_boxes(infer_model, [image], net_h, net_w,
                                   config['model']['anchors'], obj_thresh,
                                   nms_thresh)[0]

            # print out boxes info
            for _box in boxes:
                label_name = config['model']['labels'][_box.get_label()]
                score = float(_box.get_score())
                if score < obj_thresh:
                    continue
                box = [
                    int(float(_box.xmin)),
                    int(float(_box.ymin)),
                    int(float(_box.xmax)),
                    int(float(_box.ymax))
                ]
                print('{}\t{}\t{}\t{}\t{}'.format(iid, 'label:', label_name,
                                                  score, box))
import os
import time

class_colors = [[0, 0, 0], [0, 255, 0]]
NCLASSES = 2
HEIGHT = 416
WIDTH = 416

model = SegNet_Mobile(n_classes=NCLASSES,
                      input_height=HEIGHT,
                      input_width=WIDTH)
model.load_weights("models/last1.h5")

cap = cv2.VideoCapture('/home/fmc/WX/Segmentation/SegNet-Mobile-tf2/1.mp4')
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output.mp4', fourcc, 30.0, (960, 544))
fps = 0.0

while (cap.isOpened()):

    t1 = time.time()
    ret, img = cap.read()

    #############################
    # 格式转变,BGRtoRGB
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # 转变成Image
    img = Image.fromarray(np.uint8(img))

    old_img = copy.deepcopy(img)
    orininal_h = np.array(img).shape[0]
Ejemplo n.º 14
0
def show_analysis(
    session=r'Y:\Data 2018-2019\Complement 4 - schizophrenia Project\2019 Adult Behavior C4_for revisions\EZM\ALC_060519_2_mC4\ALC_060519_2_57G_EZM',
    output=r"C:\Users\User\Desktop\Code\annotated_SI_part.avi",
    speedX=10,
):
    import cv2
    import tqdm
    import collections
    import sys

    vids = get_all_videos_in_session(session)
    df = get_analysis_dfs(session, vids)
    location, body_loc, zm_center, open_coord, closed_coord = get_quadrant(df)

    try:
        curr_filename = os.path.join(session, vids[0] + '.avi')
        cap = cv2.VideoCapture(curr_filename)
        fourcc = cv2.VideoWriter_fourcc(*'DIVX')
        writer = cv2.VideoWriter(output, fourcc, 1 / cap.get(2),
                                 (int(cap.get(3)), int(cap.get(4))))
        for idx, row in tqdm.tqdm(df.iterrows(), total=df.shape[0]):
            if not idx % int(speed) == 0: continue
            if row['source'][0] in curr_filename:
                # cv is open to the right place
                pass
            else:
                cap.release()
                curr_filename = os.path.join(session,
                                             row['source'][0] + '.avi')
                cap = cv2.VideoCapture(curr_filename)
            # get the frame
            cap.set(1, row['frame_number'][0])  # 1==set frame number
            succ, img = cap.read()

            # draw the object circles
            img = cv2.circle(img, (int(zm_center[0]), int(zm_center[1])), 5,
                             (0, 0, 255))
            img = cv2.circle(img, (int(open_coord[0]), int(open_coord[1])), 5,
                             (0, 0, 255))
            img = cv2.circle(img, (int(closed_coord[0]), int(closed_coord[1])),
                             5, (0, 0, 255))

            if not np.isnan(location[idx]):
                if location[idx] == 1:
                    img = cv2.circle(
                        img, (int(body_loc[0][idx]), int(body_loc[1][idx])), 5,
                        (255, 0, 0))
                else:
                    img = cv2.circle(
                        img, (int(body_loc[0][idx]), int(body_loc[1][idx])), 5,
                        (0, 255, 0))

            # add to writer
            writer.write(img)
    except Exception as er:

        raise er.with_traceback(sys.exc_info()[2])
    finally:
        # for index, row in df.iterrows():
        cap.release()
        writer.release()
        cv2.destroyAllWindows()
import numpy as np
import cv2

cap = cv2.VideoCapture(0)

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'DIVX')
out = cv2.VideoWriter('output.avi', fourcc, 20.0, (640, 480))

while (cap.isOpened()):
    ret, frame = cap.read()
    if ret == True:
        # frame = cv2.flip(frame,0)
        # write the flipped frame
        out.write(frame)

        cv2.imshow('frame', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    else:
        break

# Release everything if job is finished
cap.release()
out.release()
cv2.destroyAllWindows()
widths = [306, 142]
heights = [416, 268]

sizes = [(widths[0],heights[0]),(widths[1],heights[1])]

ofsets_h = [2957, 3365]
ofsets_v = [117, 79]

# calling the screen recorder function
screenRec(fps, sim_time, fourcc_avi, completeName1,completeName2,widths,heights,sizes,ofsets_h,ofsets_v)

# setting the minimal area for detecting the moving object, higher means less noise
min_areas = [10000, 4000]

# making the video files for the trimmed an filtered videos
prod1_out_m1 = cv2.VideoWriter(os.path.join(save_path_filtered_pick, "pick45.avi"), fourcc_avi,fps,sizes[0])
prod1_out_m2 = cv2.VideoWriter(os.path.join(save_path_filtered_pick, "pick46.avi"), fourcc_avi,fps,sizes[0])
prod1_out_m3 = cv2.VideoWriter(os.path.join(save_path_filtered_pick, "pick47.avi"), fourcc_avi,fps,sizes[0])
prod1_out_m4 = cv2.VideoWriter(os.path.join(save_path_filtered_pick, "pick48.avi"), fourcc_avi,fps,sizes[0])
prod1_out_m5 = cv2.VideoWriter(os.path.join(save_path_filtered_pick, "pick49.avi"), fourcc_avi,fps,sizes[0])
prod1_out_m6 = cv2.VideoWriter(os.path.join(save_path_filtered_pick, "pick50.avi"), fourcc_avi,fps,sizes[0])
prod1_out_m7 = cv2.VideoWriter(os.path.join(save_path_filtered_pick, "pick51.avi"), fourcc_avi,fps,sizes[0])
prod1_out_m8 = cv2.VideoWriter(os.path.join(save_path_filtered_pick, "pick52.avi"), fourcc_avi,fps,sizes[0])
prod1_out_m9 = cv2.VideoWriter(os.path.join(save_path_filtered_pick, "pick53.avi"), fourcc_avi,fps,sizes[0])
# prod1_out_m10 = cv2.VideoWriter(os.path.join(save_path, "10.avi"), fourcc_avi,fps,size)

prod1_out_m12 = cv2.VideoWriter(os.path.join(save_path_filtered_drop, "drop1.avi"), fourcc_avi,fps,sizes[1])
prod1_out_m22 = cv2.VideoWriter(os.path.join(save_path_filtered_drop, "drop2.avi"), fourcc_avi,fps,sizes[1])
prod1_out_m32 = cv2.VideoWriter(os.path.join(save_path_filtered_drop, "drop3.avi"), fourcc_avi,fps,sizes[1])
prod1_out_m42 = cv2.VideoWriter(os.path.join(save_path_filtered_drop, "drop4.avi"), fourcc_avi,fps,sizes[1])
prod1_out_m52 = cv2.VideoWriter(os.path.join(save_path_filtered_drop, "drop5.avi"), fourcc_avi,fps,sizes[1])
def main():
    SEND_COMMAND = STOP
    LAST_COMMAND = SEND_COMMAND

    s = make_connections()

    cap = cv2.VideoCapture(numCam)

    draw_source_dest(cap)
    source, dest = get_source_dest()

    occupied_grids, planned_path = process_image.main(source, dest, cap, grid_size, frame_width, frame_height, decision)

    qt, path, pts = get_qt_path_pts(planned_path)

    min_v = 500

    x_act = list()
    y_act = list()


    # destination of the path to reach 
    final_x, final_y = qt[-1]

    (winW, winH) = (grid_size, grid_size)

    if runAlgorithm == 'tracker':
        bbox = intialize_tracker(cap)

    size = (frame_width, frame_height) 

    # folderNum = get_folder_num()

    filePath = str(folderNum) + '/' + 'video.mp4'

    result = cv2.VideoWriter(filePath,  
                             cv2.VideoWriter_fourcc(*'MP4V'), 
                             20, size)

    while True:

        # accel, gyro, mag, temp = get_IMU_data(conn)

        timer = cv2.getTickCount()
        _, img = cap.read()
        img = cv2.resize(img,(frame_width, frame_height))

        draw_circle_on_source(img, path)
        result.write(img)
        make_grids(img, grid_size, winW, winH)

        img = cv2.polylines(img, [pts] , False, (255,120,255), 3)

        if runAlgorithm == 'tracker':
            SEND_COMMAND = run_tracker_algo(tracker, img, final_x, final_y, qt, grid_size, min_v)

        else:
            SEND_COMMAND, xt, yt = run_heading_algo(img, stkr1minHSV,stkr1maxHSV, stkr2minHSV, stkr2maxHSV, qt)
            x_act.append(xt)
            y_act.append(yt)
        
        if SEND_COMMAND == 'done':
            break

        LAST_COMMAND = send_command(s, LAST_COMMAND, SEND_COMMAND)
        # to print the direcction of the car
        print('Action ' + direction[SEND_COMMAND])
        
        cv2.imshow('window', img)

        if cv2.waitKey(2) & 0xFF == 27:
            break 



    # can be used to make the graph of the actual and experimental path  
    
    make_graph(x_act, y_act, pts)
    
    SEND_COMMAND = STOP
    finish(s, cap, SEND_COMMAND)
def frameFilter(vs,completeName_filtered,min_area,fps,height,width,prod1_out_m):
    # setting parameters
    prod1 = [0]
    movement = [1]
    no_movement = [0]

    size = (width,height)

    prod1_out = cv2.VideoWriter(completeName_filtered, fourcc_avi, fps, size)

    # initializing variables and arrays
    prod1_frames = []
    prod1_matrix = [[],
                    [],
                    [],
                    [],
                    [],
                    [],
                    [],
                    [],
                    []]
    firstFrame = None
    a = 0
    vid1 = 0
    row = 1
    firstIndex = 0
    secondIndex = 0
    thirdIndex = 0
    fourthIndex = 0
    fifthIndex = 0
    sixthIndex = 0
    seventhIndex = 0
    eightIndex = 0
    ninthIndex = 0
    tenthIndex = 0

    # loop over the frames of the video
    while True:
        # grab the current frame and initialize the occupied/unoccupied
        # text
        succes, frame = vs.read()
        text = "Unoccupied"
        # if the frame could not be grabbed, then we have reached the end
        # of the video
        if frame is None:
            break

        # resize the frame, convert it to grayscale, and blur it
        # frame = cv2.resize(frame, size)
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (21, 21), 0)
        # if the first frame is None, initialize it
        if firstFrame is None:
            firstFrame = gray
            continue

        # compute the absolute difference between the current frame and
        # first frame
        frameDelta = cv2.absdiff(firstFrame, gray)
        thresh = cv2.threshold(frameDelta, 25, 255, cv2.THRESH_BINARY)[1]
        # dilate the thresholded image to fill in holes, then find contours
        # on thresholded image
        thresh = cv2.dilate(thresh, None, iterations=2)
        cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cnts = imutils.grab_contours(cnts)
        # loop over the contours

        for c in cnts:
            # if the contour is too small, ignore it
            if cv2.contourArea(c) < min_area:
                continue
            # compute the bounding box for the contour, draw it on the frame,
            # and update the text
            (x, y, w, h) = cv2.boundingRect(c)
            # cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            text = "Occupied"

        # appending the frames which have movement to the matrix
        if text == "Occupied":
            prod1_frames.append(frame)
            prod1_matrix[row-1].append(frame)
            prod1 = np.concatenate((prod1, movement))
        elif text == "Unoccupied":
            prod1 = np.concatenate((prod1, no_movement))

        # change this for the different simulations
        thresh_hold_frames = 5

        # setting the conditions on when to go to the next row in the frame matrix
        if prod1[len(prod1)-1] == 0:
            a += 1
            if a == thresh_hold_frames and len(prod1)-1 > thresh_hold_frames:
                vid1 += 1
                if firstIndex == 0:
                    firstIndex = len(prod1)-1
                    row = 2
                elif firstIndex > 0 and secondIndex == 0:
                    secondIndex = len(prod1)-1
                    row = 3
                elif secondIndex > 0 and thirdIndex == 0:
                    thirdIndex = len(prod1)-1
                    row = 4
                elif thirdIndex > 0 and fourthIndex == 0:
                    fourthIndex = len(prod1)-1
                    row = 5
                elif fourthIndex > 0 and fifthIndex == 0:
                    fifthIndex =len(prod1)-1
                    row = 6
                elif fifthIndex > 0 and sixthIndex == 0:
                    sixthIndex = len(prod1)-1
                    row = 7
                elif sixthIndex > 0 and seventhIndex == 0:
                    seventhIndex = len(prod1)-1
                    row = 8
                elif seventhIndex > 0 and eightIndex == 0:
                    eightIndex = len(prod1)-1
                    row = 9
                elif eightIndex > 0 and ninthIndex == 0:
                    ninthIndex = len(prod1)-1
                    # row = 10
                elif ninthIndex > 0 and tenthIndex == 0:
                    tenthIndex = len(prod1)-1
        elif prod1[len(prod1)-1] == 1:
            a = 0

        # draw the text and timestamp on the frame
        # cv2.putText(frame, "Simulation Status: {}".format(text), (10, 20),
        #             cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        # cv2.putText(frame, datetime.datetime.now().strftime("%A %d %B %Y %I:%M:%S%p"),
        #             (10, frame.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 255), 1)
        # show the frame and record if the user presses a key
        cv2.imshow("Security Feed", frame)
        cv2.imshow("Thresh", thresh)
        cv2.imshow("Frame Delta", frameDelta)
        key = cv2.waitKey(1) & 0xFF
        # if the `q` key is pressed, break from the lop
        if key == ord("q"):
            break

    # testing if no moving frames are missed
    moving_frames = np.count_nonzero(prod1==1)
    print("Counted moving frames: "+str(moving_frames))
    print("Moving frames in array: "+str(len(prod1_frames)))

    # making one video of all the moving frames
    for j in range(moving_frames):
        prod1_frame = cv2.cvtColor(prod1_frames[j], cv2.COLOR_BGR2RGB)
        prod1_out.write(prod1_frame)

    # making multiple videos for every seperate movement
    for rows in range(len(prod1_matrix)):
        for column in range(len(prod1_matrix[rows])):
            prod1_frame1 = cv2.cvtColor(prod1_matrix[rows][column], cv2.COLOR_BGR2RGB)
            prod1_out_m[rows].write(prod1_frame1)
        prod1_out_m[rows].release()

    print(prod1)

    # cleanup the camera and close any open windows
    prod1_out.release()
    vs.release()
    cv2.destroyAllWindows()
Ejemplo n.º 19
0
import cv2
import math

labelsPath = "./coco.names"
LABELS = open(labelsPath).read().strip().split("\n")

np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8")

weightsPath = "./yolov3.weights"
configPath = "./yolov3.cfg"
cap = cv2.VideoCapture('./queda.mp4')
hasFrame, frame = cap.read()
net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
vid_writer = cv2.VideoWriter('output.avi',
                             cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10,
                             (frame.shape[1], frame.shape[0]))

while cv2.waitKey(1) < 0:

    ret, image = cap.read()
    image = cv2.resize(image, (640, 360))
    (H, W) = image.shape[:2]
    ln = net.getLayerNames()
    ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
    blob = cv2.dnn.blobFromImage(image,
                                 1 / 300.0, (416, 416),
                                 swapRB=True,
                                 crop=False)
    net.setInput(blob)
    start = time.time()
Ejemplo n.º 20
0
def trackMultipleObjects():
	rectangleColor = (0, 255, 0)
	frameCounter = 0
	currentCarID = 0
	fps = 0
	
	carTracker = {}
	carNumbers = {}
	carLocation1 = {}
	carLocation2 = {}
	speed = [None] * 1000
	
	# Write output to video file
	out = cv2.VideoWriter('outpy.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (WIDTH,HEIGHT))
    


	while True:
		start_time = time.time()
		rc, image = video.read()
		if type(image) == type(None):
			break
		
		image = cv2.resize(image, (WIDTH, HEIGHT))
		resultImage = image.copy()
		
		frameCounter = frameCounter + 1
        
        for carID in carTracker.keys():
			trackingQuality = carTracker[carID].update(image)
        
        if not (frameCounter % 10):
			gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
			cars = carCascade.detectMultiScale(gray, 1.1, 13, 18, (24, 24))
            
            for (_x, _y, _w, _h) in cars:
				x = int(_x)
				y = int(_y)
				w = int(_w)
				h = int(_h)
			
				x_bar = x + 0.5 * w
				y_bar = y + 0.5 * h
                
                for carID in carTracker.keys():
					trackedPosition = carTracker[carID].get_position()
					
					t_x = int(trackedPosition.left())
					t_y = int(trackedPosition.top())
					t_w = int(trackedPosition.width())
					t_h = int(trackedPosition.height())
					
					t_x_bar = t_x + 0.5 * t_w
					t_y_bar = t_y + 0.5 * t_h
                    
                    
         for carID in carTracker.keys():
			trackedPosition = carTracker[carID].get_position()
					
			t_x = int(trackedPosition.left())
			t_y = int(trackedPosition.top())
			t_w = int(trackedPosition.width())
			t_h = int(trackedPosition.height())
			
			cv2.rectangle(resultImage, (t_x, t_y), (t_x + t_w, t_y + t_h), rectangleColor, 4)
			
			# speed estimation
			carLocation2[carID] = [t_x, t_y, t_w, t_h]
		
		end_time = time.time()
		
		if not (end_time == start_time):
			fps = 1.0/(end_time - start_time)
Ejemplo n.º 21
0
def translate_video():
    # TensorFlow
    if var.get() == "Rococó":
        graph = load_graph('frozen_models/frozen_rococo.pb')
    elif var.get() == "Ukiyo-e":
        graph = load_graph('frozen_models/frozen_ukiyo.pb')
    elif var.get() == "Van Gogh":
        graph = load_graph('frozen_models/frozen_vg.pb')
    else:
        graph = load_graph('frozen_models/frozen_fauvism.pb')
    image_tensor = graph.get_tensor_by_name('image_tensor:0')
    output_tensor = graph.get_tensor_by_name('generate_output/output:0')
    sess = tf.Session(graph=graph)

    # Directorio del archivo.
    video_path = askopenfilename()

    #Definición del CODEC
    cap = cv2.VideoCapture(video_path)
    save_path = askdirectory(title='Directorio de guardado')
    save_path = save_path + "/video_transformed.avi"
    fourcc = cv2.VideoWriter_fourcc('M', 'P', '4', 'V')
    out = cv2.VideoWriter(save_path, fourcc, 25, (768, 256))

    # OpenCV

    if cap.isOpened() == False:
        print('Imposible obtener los datos del video.')
    while True:
        # Obtenemos el frame.
        ret, frame = cap.read()
        if ret == True:
            # Se reduce el tamaño del frame a uno procesable por pix2pix
            frame_resize = resize_out(frame)
            # Se aplica pre procesamiento del frame.
            gray_image = cv2.cvtColor(frame_resize, cv2.COLOR_BGR2GRAY)
            gaussian_image = cv2.GaussianBlur(gray_image, (3, 3), 0)
            # Se extraen los bordes.
            edge = 255 - auto_canny(gaussian_image)
            edge_color = edge_color = cv2.cvtColor(edge, cv2.COLOR_GRAY2BGR)
            black_image = np.zeros(edge.shape, np.uint8)
            # Se genera la predicción.
            combined_image = np.concatenate([edge, black_image], axis=1)
            image_rgb = cv2.cvtColor(
                combined_image,
                cv2.COLOR_BGR2RGB)  # OpenCV uses BGR instead of RGB
            generated_image = sess.run(output_tensor,
                                       feed_dict={image_tensor: image_rgb})
            image_bgr = cv2.cvtColor(np.squeeze(generated_image),
                                     cv2.COLOR_RGB2BGR)
            image_normal = np.concatenate(
                [frame_resize, edge_color, image_bgr], axis=1)

            #Se escribe el cuadro en al salida.
            out.write(image_normal)

            cv2.imshow('Procesando...', image_normal)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break

    sess.close()
    cap.release()
    out.release()
    print('Ha terminado la traducción.')
    cv2.destroyAllWindows()
Ejemplo n.º 22
0
def main():
    args = build_argparser().parse_args()
    logger = logging.getLogger('main')

    is_benchmarking = False
    # initialize variables with the input arguments for easy access
    model_path_dict = {
        'FaceDetectionModel': args.faceDetectionModel,
        'LandmarkRegressionModel': args.landmarkRegressionModel,
        'HeadPoseEstimationModel': args.headPoseEstimationModel,
        'GazeEstimationModel': args.gazeEstimationModel
    }
    preview_flags = args.previewFlags
    input_filename = args.input
    device_name = args.device
    prob_threshold = args.prob_threshold
    output_path = args.output_path

    if input_filename.lower() == 'cam':
        feeder = InputFeeder(input_type='cam')
    else:
        if not os.path.isfile(input_filename):
            logger.error("Unable to find specified video file")
            exit(1)
        feeder = InputFeeder(input_type='video', input_file=input_filename)

    for model_path in list(model_path_dict.values()):
        if not os.path.isfile(model_path):
            logger.error("Unable to find specified model file" + str(model_path))
            exit(1)

    # instantiate model
    face_detection_model = FaceDetectionModel(model_path_dict['FaceDetectionModel'], device_name, threshold=prob_threshold)
    landmark_detection_model = LandmarkDetectionModel(model_path_dict['LandmarkRegressionModel'], device_name, threshold=prob_threshold)
    head_pose_estimation_model = HeadPoseEstimationModel(model_path_dict['HeadPoseEstimationModel'], device_name, threshold=prob_threshold)
    gaze_estimation_model = GazeEstimationModel(model_path_dict['GazeEstimationModel'], device_name, threshold=prob_threshold)

    if not is_benchmarking:
        mouse_controller = MouseController('medium', 'fast')

    # load Models
    start_model_load_time = time.time()
    face_detection_model.load_model()
    landmark_detection_model.load_model()
    head_pose_estimation_model.load_model()
    gaze_estimation_model.load_model()
    total_model_load_time = time.time() - start_model_load_time

    feeder.load_data()

    out_video = cv2.VideoWriter(os.path.join('output_video.mp4'), cv2.VideoWriter_fourcc(*'avc1'), int(feeder.get_fps()/10),
                                (1920, 1080), True)

    frame_count = 0
    start_inference_time = time.time()
    for ret, frame in feeder.next_batch():

        if not ret:
            break

        frame_count += 1

        key = cv2.waitKey(60)

        try:
            face_cords, cropped_image = face_detection_model.predict(frame)

            if type(cropped_image) == int:
                logger.warning("Unable to detect the face")
                if key == 27:
                    break
                continue

            left_eye_image, right_eye_image, eye_cords = landmark_detection_model.predict(cropped_image)
            pose_output = head_pose_estimation_model.predict(cropped_image)
            mouse_cord, gaze_vector = gaze_estimation_model.predict(left_eye_image, right_eye_image, pose_output)

        except Exception as e:
            logger.warning("Could predict using model" + str(e) + " for frame " + str(frame_count))
            continue

        image = cv2.resize(frame, (500, 500))

        if not len(preview_flags) == 0:
            preview_frame = draw_preview(
                frame, preview_flags, cropped_image, left_eye_image, right_eye_image,
                face_cords, eye_cords, pose_output, gaze_vector)
            image = np.hstack((cv2.resize(frame, (500, 500)), cv2.resize(preview_frame, (500, 500))))

        cv2.imshow('preview', image)
        out_video.write(frame)

        if frame_count % 5 == 0 and not is_benchmarking:
            mouse_controller.move(mouse_cord[0], mouse_cord[1])

        if key == 27:
            break

    total_time = time.time() - start_inference_time
    total_inference_time = round(total_time, 1)
    fps = frame_count / total_inference_time

    try:
        os.mkdir(output_path)
    except OSError as error:
        logger.error(error)

    with open(output_path+'stats.txt', 'w') as f:
        f.write(str(total_inference_time) + '\n')
        f.write(str(fps) + '\n')
        f.write(str(total_model_load_time) + '\n')

    logger.info('Model load time: ' + str(total_model_load_time))
    logger.info('Inference time: ' + str(total_inference_time))
    logger.info('FPS: ' + str(fps))

    logger.info('Video stream ended')
    cv2.destroyAllWindows()
    feeder.close()
import cv2
import numpy as np
import glob
import random

nIMAGES = 351 * 3
files = glob.glob('/home/jps/Downloads/merge_from_ofoct (2).jpg')
ik = cv2.imread('/home/jps/Downloads/merge_from_ofoct (2).jpg')
sk = ik.shape
print sk
mov = '/home/jps/Downloads/' + 'circle_2.avi'
MOV = cv2.VideoWriter(filename=mov,
                      fourcc=cv2.VideoWriter_fourcc('F', 'M', 'P', '4'),
                      fps=25,
                      frameSize=(640, sk[0]))
for i in np.arange(1, nIMAGES):
    print 'Working on: ' + files[0][-14:-4]
    image = cv2.imread(files[0], -1)
    # cv2.imshow("image", image)
    # cv2.waitKey(0)
    print(0 + 4 * i), (640 + 4 * i)
    imageout = image[0:sk[0], (0 + 4 * i):(640 + 4 * i)]

    # cv2.imshow("imageout", imageout)
    # cv2.waitKey(0)

    MOV.write(imageout)
    # MOV.write(dst)

    #/home/jps/Desktop/videocorp/CORP/circular_pipes_1.jpg.jpgcrop y:h, x:w
		right = int(right * r)
		bottom = int(bottom * r)
		left = int(left * r)
 
		# draw the predicted face name on the image
		cv2.rectangle(frame, (left, top), (right, bottom),
			(0, 255, 0), 2)
		y = top - 15 if top - 15 > 15 else top + 15
		cv2.putText(frame, name, (left, y), cv2.FONT_HERSHEY_SIMPLEX,
			0.75, (0, 255, 0), 2)
        
        	# if the video writer is None *AND* we are supposed to write
	# the output video to disk initialize the writer
	if writer is None and args["output"] is not None:
		fourcc = cv2.VideoWriter_fourcc(*"MJPG")
		writer = cv2.VideoWriter(args["output"], fourcc, 20,
			(frame.shape[1], frame.shape[0]), True)
 
	# if the writer is not None, write the frame with recognized
	# faces to disk
	if writer is not None:
		writer.write(frame)

    # check to see if we are supposed to display the output frame to
	# the screen
	if args["display"] > 0:
		cv2.imshow("Frame", frame)
		key = cv2.waitKey(1) & 0xFF
 
		# if the `q` key was pressed, break from the loop
		if key == ord("q"):
			break
Ejemplo n.º 25
0
    def predict_pcl_files(self,
                          pcl_path: str = None,
                          image_root_path: str = None,
                          net_config_path: str = None,
                          class_config_path: str = None,
                          ckpt_path: str = None,
                          save: bool = False,
                          viewer_port: Optional[int] = None):
        """predict a single point cloud data from numpy array format
        expected shape=[n_points, xyzr=4]

        :param pcl_path: .bin file or folder with .bin files, saved numpy array data
        :param image_root_path:
        :param net_config_path: .yaml file, model setup
        :param class_config_path: dataset class configure
        :param ckpt_path: .tckpt file, trained weights
        :param show_gt: if display GT label
        :param save: save to video
        :param viewer_port:
        """

        if pcl_path is None:
            pcl_path = self.example_pcl_folderpath
        if net_config_path is None:
            net_config_path = self.default_net_config_path
        if class_config_path is None:
            class_config_path = self.default_class_config_path
        if ckpt_path is None:
            ckpt_path = self.default_ckpt_path

        if os.path.isdir(pcl_path):
            filenames = [Path(os.path.join(pcl_path, f))
                         for f in os.listdir(pcl_path)
                         if f.endswith(".bin")]
        else:
            filenames = [Path(pcl_path), ]
        filenames.sort()

        root_path = filenames[0].parent
        if image_root_path is None:
            image_root_path = Path(root_path) / "image_2"
        else:
            image_root_path = Path(image_root_path)

        if save:
            fourcc = cv2.VideoWriter_fourcc(*'MP4V')
            out_stream = cv2.VideoWriter("./output.avi", fourcc, 10.0, (1000, 900))
        else:
            out_stream = None
        vis_img = np.zeros((900, 1000, 3), dtype=np.uint8)

        if viewer_port is not None:
            from utils.visualization import LidarViewer
            lidar_viewer = LidarViewer(viewer_port)
        else:
            lidar_viewer = None

        detector = SecondDetector(net_config_path, class_config_path, ckpt_path, detect_range=(-50, -50, 50, 50))

        for file_path in filenames:
            points = np.fromfile(file_path, dtype=np.float32, count=-1).reshape([-1, 4])
            print(file_path, points.shape)
            start = time()
            res = detector.predict_on_points(points)
            end = time()
            boxes_lidar = res[0]["box3d_lidar"].detach().cpu().numpy()
            scores = res[0]["scores"].detach().cpu().numpy()
            labels = res[0]["label_preds"].detach().cpu().numpy()

            if lidar_viewer is not None:
                lidar_viewer.load_points(points[:, :3], points[:, 3])
            # print("--------")
            # print(f">> filename: {file_path}")
            # print(f">> scores:   {scores}")
            # print(f">> labels:   {labels}")
            # print(f">> time:     {end-start} [s]")
            # print(">> boxes_lidar: ")
            # for each in boxes_lidar:
            #     print(each)  # (x, y, z, w, l, h, yaw) in velodyne coordinates
            bev = detector.visualize_bev(points, boxes_lidar, labels=labels)

            file_token = file_path.stem
            img = cv2.imread(str(image_root_path/(file_token + ".png")))
            img = cv2.resize(img, (1000, 300))
            vis_img[:300, :, :] = img
            vis_img[300:, :, :] = bev[::-1, :, :]

            # if show_gt:
            #     label_filepath = root_path / "label_2" / (file_token + ".txt")
            #     calib_filepath = root_path / "calib" / (file_token + ".txt")
            #     labels = read_kitti_label(label_filepath)
            #     calib = KittiCalibrationParser(calib_filepath)
            #     for each in labels:
            #         print(each)
            #         if each["name"] != "DontCare":
            #             # labels are in ref camera coord. thus here transform to lidar coord
            #             each["box"][:3] = calib.project_ref_to_velo(each["box"][:3])
            #             bev = detector.draw_box_in_bev(bev, each["box"].reshape([-1, 7]), labels)
            if save:
                out_stream.write(vis_img)

            cv2.imshow("bev",bev[::-1, :, :])
            cv2.imshow("img", vis_img)
            if cv2.waitKey(1) == 27:
                break

        cv2.destroyAllWindows()
        if save:
            out_stream.release()
Ejemplo n.º 26
0
def main():
    parser = argparse.ArgumentParser(description='Whiteboard inpainting demo')
    parser.add_argument(
        '-i',
        '--input',
        required=True,
        help='Required. Path to a video file or a device node of a web-camera.'
    )
    parser.add_argument('--loop',
                        default=False,
                        action='store_true',
                        help='Optional. Enable reading the input in a loop.')
    parser.add_argument('-o',
                        '--output',
                        required=False,
                        help='Optional. Name of the output file(s) to save.')
    parser.add_argument('-limit',
                        '--output_limit',
                        required=False,
                        default=1000,
                        type=int,
                        help='Optional. Number of frames to store in output. '
                        'If 0 is set, all frames are stored.')
    parser.add_argument(
        '-m_i',
        '--m_instance_segmentation',
        type=str,
        required=False,
        help='Required. Path to the instance segmentation model.')
    parser.add_argument(
        '-m_s',
        '--m_semantic_segmentation',
        type=str,
        required=False,
        help='Required. Path to the semantic segmentation model.')
    parser.add_argument(
        '-t',
        '--threshold',
        type=float,
        default=0.6,
        help='Optional. Threshold for person instance segmentation model.')
    parser.add_argument('--no_show',
                        help="Optional. Don't show output.",
                        action='store_true')
    parser.add_argument(
        '-d',
        '--device',
        type=str,
        default='CPU',
        help=
        'Optional. Specify a target device to infer on. CPU, GPU, HDDL or MYRIAD is '
        'acceptable. The demo will look for a suitable plugin for the device specified.'
    )
    parser.add_argument('-u',
                        '--utilization_monitors',
                        default='',
                        type=str,
                        help='Optional. List of monitors to show initially.')
    args = parser.parse_args()

    cap = open_images_capture(args.input, args.loop)
    if cap.get_type() not in ('VIDEO', 'CAMERA'):
        raise RuntimeError(
            "The input should be a video file or a numeric camera ID")

    if bool(args.m_instance_segmentation) == bool(
            args.m_semantic_segmentation):
        raise ValueError(
            'Set up exactly one of segmentation models: '
            '--m_instance_segmentation or --m_semantic_segmentation')

    labels_dir = Path(__file__).resolve().parents[3] / 'data/dataset_classes'
    mouse = MouseClick()
    if not args.no_show:
        cv2.namedWindow(WINNAME)
        cv2.setMouseCallback(WINNAME, mouse.get_points)

    log.info('OpenVINO Runtime')
    log.info('\tbuild: {}'.format(get_version()))
    core = Core()

    model_path = args.m_instance_segmentation if args.m_instance_segmentation else args.m_semantic_segmentation
    log.info('Reading model {}'.format(model_path))
    if args.m_instance_segmentation:
        labels_file = str(labels_dir / 'coco_80cl_bkgr.txt')
        segmentation = MaskRCNN(core, args.m_instance_segmentation,
                                labels_file, args.threshold, args.device)
    elif args.m_semantic_segmentation:
        labels_file = str(labels_dir / 'cityscapes_19cl_bkgr.txt')
        segmentation = SemanticSegmentation(core, args.m_semantic_segmentation,
                                            labels_file, args.threshold,
                                            args.device)
    log.info('The model {} is loaded to {}'.format(model_path, args.device))

    metrics = PerformanceMetrics()
    video_writer = cv2.VideoWriter()
    black_board = False
    frame_number = 0
    key = -1

    start_time = perf_counter()
    frame = cap.read()
    if frame is None:
        raise RuntimeError("Can't read an image from the input")

    out_frame_size = (frame.shape[1], frame.shape[0] * 2)
    output_frame = np.full((frame.shape[0], frame.shape[1], 3),
                           255,
                           dtype='uint8')
    presenter = monitors.Presenter(
        args.utilization_monitors, 20,
        (out_frame_size[0] // 4, out_frame_size[1] // 16))
    if args.output and not video_writer.open(args.output,
                                             cv2.VideoWriter_fourcc(*'MJPG'),
                                             cap.fps(), out_frame_size):
        raise RuntimeError("Can't open video writer")

    while frame is not None:
        mask = None
        detections = segmentation.get_detections([frame])
        expand_mask(detections, frame.shape[1] // 27)
        if len(detections[0]) > 0:
            mask = detections[0][0][2]
            for i in range(1, len(detections[0])):
                mask = cv2.bitwise_or(mask, detections[0][i][2])

        if mask is not None:
            mask = np.stack([mask, mask, mask], axis=-1)
        else:
            mask = np.zeros(frame.shape, dtype='uint8')

        clear_frame = remove_background(frame, invert_colors=not black_board)

        output_frame = np.where(mask, output_frame, clear_frame)
        merged_frame = np.vstack([frame, output_frame])
        merged_frame = cv2.resize(merged_frame, out_frame_size)

        metrics.update(start_time, merged_frame)

        if video_writer.isOpened() and (args.output_limit <= 0 or
                                        frame_number <= args.output_limit - 1):
            video_writer.write(merged_frame)

        presenter.drawGraphs(merged_frame)
        if not args.no_show:
            cv2.imshow(WINNAME, merged_frame)
            key = check_pressed_keys(key)
            if key == 27:  # 'Esc'
                break
            if key == ord('i'):  # catch pressing of key 'i'
                black_board = not black_board
                output_frame = 255 - output_frame
            else:
                presenter.handleKey(key)

        if mouse.crop_available:
            x0, x1 = min(mouse.points[0][0], mouse.points[1][0]), \
                     max(mouse.points[0][0], mouse.points[1][0])
            y0, y1 = min(mouse.points[0][1], mouse.points[1][1]), \
                     max(mouse.points[0][1], mouse.points[1][1])
            x1, y1 = min(x1, output_frame.shape[1] - 1), min(
                y1, output_frame.shape[0] - 1)
            board = output_frame[y0:y1, x0:x1, :]
            if board.shape[0] > 0 and board.shape[1] > 0:
                cv2.namedWindow('Board', cv2.WINDOW_KEEPRATIO)
                cv2.imshow('Board', board)

        frame_number += 1
        start_time = perf_counter()
        frame = cap.read()

    metrics.log_total()
    for rep in presenter.reportMeans():
        log.info(rep)
Ejemplo n.º 27
0
def main():
	if not os.path.isfile(args.face):
		fnames = list(glob(os.path.join(args.face, '*.jpg')))
		sorted_fnames = sorted(fnames, key=lambda f: int(os.path.basename(f).split('.')[0]))
		full_frames = [cv2.imread(f) for f in sorted_fnames]

	elif args.face.split('.')[1] in ['jpg', 'png', 'jpeg']:
		full_frames = [cv2.imread(args.face)]
		fps = args.fps

	else:
		video_stream = cv2.VideoCapture(args.face)
		fps = video_stream.get(cv2.CAP_PROP_FPS)

		print('Reading video frames...')

		full_frames = []
		while 1:
			still_reading, frame = video_stream.read()
			if not still_reading:
				video_stream.release()
				break
			if args.resize_factor > 1:
				frame = cv2.resize(frame, (frame.shape[1]//args.resize_factor, frame.shape[0]//args.resize_factor))

			if args.rotate:
				frame = cv2.rotate(frame, cv2.cv2.ROTATE_90_CLOCKWISE)

			y1, y2, x1, x2 = args.crop
			if x2 == -1: x2 = frame.shape[1]
			if y2 == -1: y2 = frame.shape[0]

			frame = frame[y1:y2, x1:x2]

			full_frames.append(frame)

	print ("Number of frames available for inference: "+str(len(full_frames)))

	if not args.audio.endswith('.wav'):
		print('Extracting raw audio...')
		command = 'ffmpeg -y -i {} -strict -2 {}'.format(args.audio, 'temp/temp.wav')

		subprocess.call(command, shell=True)
		args.audio = 'temp/temp.wav'

	wav = audio.load_wav(args.audio, 16000)
	mel = audio.melspectrogram(wav)
	print(mel.shape)

	if np.isnan(mel.reshape(-1)).sum() > 0:
		raise ValueError('Mel contains nan! Using a TTS voice? Add a small epsilon noise to the wav file and try again')

	mel_chunks = []
	mel_idx_multiplier = 80./fps 
	i = 0
	while 1:
		start_idx = int(i * mel_idx_multiplier)
		if start_idx + mel_step_size > len(mel[0]):
			mel_chunks.append(mel[:, len(mel[0]) - mel_step_size:])
			break
		mel_chunks.append(mel[:, start_idx : start_idx + mel_step_size])
		i += 1

	print("Length of mel chunks: {}".format(len(mel_chunks)))

	full_frames = full_frames[:len(mel_chunks)]

	batch_size = args.wav2lip_batch_size
	gen = datagen(full_frames.copy(), mel_chunks)

	for i, (img_batch, mel_batch, frames, coords) in enumerate(tqdm(gen, 
											total=int(np.ceil(float(len(mel_chunks))/batch_size)))):
		if i == 0:
			model = load_model(args.checkpoint_path)
			print ("Model loaded")

			frame_h, frame_w = full_frames[0].shape[:-1]
			out = cv2.VideoWriter('temp/result.avi', 
									cv2.VideoWriter_fourcc(*'DIVX'), fps, (frame_w, frame_h))

		img_batch = torch.FloatTensor(np.transpose(img_batch, (0, 3, 1, 2))).to(device)
		mel_batch = torch.FloatTensor(np.transpose(mel_batch, (0, 3, 1, 2))).to(device)

		with torch.no_grad():
			pred = model(mel_batch, img_batch)

		pred = pred.cpu().numpy().transpose(0, 2, 3, 1) * 255.
		
		for p, f, c in zip(pred, frames, coords):
			y1, y2, x1, x2 = c
			p = cv2.resize(p.astype(np.uint8), (x2 - x1, y2 - y1))

			f[y1:y2, x1:x2] = p
			out.write(f)

	out.release()

	command = 'ffmpeg -y -i {} -i {} -strict -2 -q:v 1 {}'.format(args.audio, 'temp/result.avi', args.outfile)
	subprocess.call(command, shell=True)
Ejemplo n.º 28
0
import cv2
import glob
import random
import math
import numpy as np
import dlib
import itertools
import face_recognition
from sklearn.svm import SVC
from PIL import Image, ImageDraw

v1 = cv2.VideoCapture("Young_blonde_woman_6.mp4")
frame_width = int(v1.get(3))
frame_height = int(v1.get(4))
out = cv2.VideoWriter('output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'),
                      10, (frame_width, frame_height))

face_locations = []
face_encodings = []
face_emotions = []

process_this_frame = True

emotions = [
    "anger", "disgust", "fear", "happiness", "neutral", "sadness", "surprise"
]  # Emotion list
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(
    "shape_predictor_68_face_landmarks.dat"
)  # Use this to draw landmarks on detected face
    # Video feed dimensions
    _, frame = cap.read()
    v_height, v_width = frame.shape[:2]

    # print(v_height,v_width)

    # Output saving
    if(opt.save_video):
        fourcc = cv2.VideoWriter_fourcc(*'MP4V')

        filename = opt.input_file_path.split("/")[-1]
        filepath = os.path.join(opt.output_path,filename)

        fps = cap.get(cv2.CAP_PROP_FPS)
        out = cv2.VideoWriter(filepath, fourcc, fps, (v_width, v_height))

    print("\nPerforming object detection:")

    # For a black image
    x = y = v_height if v_height > v_width else v_width

    # Putting original image into black image
    start_new_i_height = int((y - v_height) / 2)
    start_new_i_width = int((x - v_width) / 2)

    # For accommodate results in original frame
    mul_constant = x / opt.frame_size
    # print(mul_constant)

    # for text in output
import cv2 as cv

video = cv.VideoCapture(0)

fourcc = cv.VideoWriter_fourcc(*'XVID')
out = cv.VideoWriter("video/kayit.avi", fourcc, 20.0, (640, 480))

resim = cv.imread("resim/ad_soyad.jpg")

i = 0
while(video.isOpened()):
    i += 1
    ret, frame = video.read()
    if i % 10 == 0:
        frame = resim
        i = 0
    #print(nisangah.shape)
    #print(nisangah[0, 0])
    if ret:

        out.write(frame)
        cv.imshow("kamera", frame)
        if cv.waitKey(33) == ord('q'):
            break

out.release()
video.release()