import cv2 import numpy as np from ffpyplayer.player import MediaPlayer cap = cv2.VideoCapture("Dog.mp4") sourcePath = "Dog.mp4" player = MediaPlayer(sourcePath) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fourcc = cv2.VideoWriter_fourcc(*"mp4v") out = cv2.VideoWriter("dog_out_1.mp4", fourcc, 30, (width, height)) while True: ret, frame = cap.read() audio_frame, val = player.get_frame() # frame2_resized = cv2.resize(frame2, (500, 500)) frame_resized = cv2.resize(frame, (width // 2, height // 2)) rows, columns, channels = frame_resized.shape R = cv2.getRotationMatrix2D((columns / 2, rows / 2), 270, 0.5) frame2_rot = cv2.warpAffine(frame_resized, R, (columns, rows)) # img_resize = cv2.resize(img, (width, height)) # together = cv2.addWeighted(img_resize, 0.25, frame, 1, 0, frame) # cv2.imshow('test', frame) cv2.imshow("bvid", frame2_rot) if (cv2.waitKey(1) & 0xFF == ord('q')): break # out.write(frame) out.write(frame2_rot)
def row_keyword_mapper(self, psm=11, oem=3, sorting='rows'): ### Maps text in each bounding boxes for file names if sorting == 'rows': self.key_mapping_l2r = dict() for key in self.mapping_l2r.keys(): self.img = cv2.imread(key) print(key) height, width = self.img.shape[:2] video = None if CONF.write_video: fourcc = cv2.VideoWriter_fourcc(*'XVID') fname = extract_basename(key) video = cv2.VideoWriter( 'tess_{}.avi'.format(fname.split('.')[0]), fourcc, float(1), (width, height)) # print(height, width) # self.img = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB) box = self.mapping_l2r[key] key_word = [] ###### Parallelization can be used box.sort_row_wise() img = self.img.copy() key_word = self.func(box, video=video) #################################### # for row in box.rows: # words = [] # for box in row: # x, y, w, h = box # # img2 = self.img[y-3:y+h+3, x-3:x+w+3, :] # img2 = self.img[y:y+h, x:x+w, :] # text = pytesseract.image_to_string(img2, output_type=pytesseract.Output.DICT, lang='eng', config='-c tessedit_char_whitelist="$%@.,&():ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789/\\ " --psm {} --oem {}'.format(psm, oem)) # # print(text) # text = text['text'] # # if text == '': # # plt.imshow(img2, cmap='gray') # # plt.show() # # li = [img2] # # text = self.recognizer.recognize(li) # # # text = self.recognizer.recognize_from_boxes([self.img], [[[x, y, w, h]]]) # # print(text) # img = cv2.rectangle(img,(x,y),(x+w,y+h),(0, 255, 0),2) # if CONF.write_video: # text = text.split('\n') # text = [t.strip('\n') for t in text] # text = [t for t in text if t != ''] # _text = copy.deepcopy(text) # text = ' '.join(text) # for t in _text: # temp_img = img.copy() # image = cv2.putText(temp_img, t, (x, y), font, fontscale, color, thickness, cv2.LINE_AA) # video.write(image) # # plt.imshow(image) # # plt.show() # words.append(text) # key_word.append(words) # ################################ self.key_mapping_l2r[key] = key_word cv2.imwrite('out.jpg', img) else: self.key_mapping_t2b = dict() for key in self.mapping_t2b.keys(): self.img = cv2.imread(key) box = self.mapping_t2b[key] key_word = [] ###### Parallelization can be used box.sort_col_wise() for row in box.rows: words = [] for box in row: x, y, w, h = box img2 = self.img[y - 3:y + h + 3, x - 3:x + w + 3, :] text = pytesseract.image_to_string( img2, output_type=pytesseract.Output.DICT, lang='eng', config=CONF.TESS_CONFIG) # text = pytesseract.image_to_string(img2, output_type=pytesseract.Output.DICT, lang='eng', config='-c tessedit_char_whitelist="$%@&():ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789/\\\n " --psm {} --oem {}'.format(psm, oem)) text = text['text'].split('\n') text = [t.strip('\n') for t in text] text = [t for t in text if t != ''] words.append(text) key_word.append(words) self.key_mapping_t2b[key] = key_word
params['model_pose'] = "BODY_25" # params['model_pose'] = "COCO" #18 keypoints # params['model_pose'] = "MPI" # 15 keypoints # params['model_pose'] = "MPI_4_layers" # 15 keypoints, less accurate but faster # params['net_resolution']="-1x368" # default, best balance, 16*23 assert(os.path.exists(args[0].video)),'Video Path given does not exist!' cap = cv2.VideoCapture(args[0].video) frame_w = cap.get(3) frame_h = cap.get(4) vid_fps = cap.get(5) basename = os.path.basename(args[0].video).split('.')[0] fourcc = cv2.VideoWriter_fourcc('H','2','6','4') out_vid = cv2.VideoWriter(basename+'_cc.avi',fourcc, vid_fps, (int(frame_w), int(frame_h))) ratio = frame_w / float(frame_h) # net_height_mult = 69 #set this # net_height_mult = 60 #set this net_height_mult = 45 #set this # net_height_mult = 23 #set this net_height = net_height_mult * 16 net_width = net_height*ratio net_width = int((net_width // 16 + 1) * 16) params['net_resolution']='{:0}x{:0}'.format(net_width, net_height) print("Net Resolution Set: ", params['net_resolution']) # params['net_resolution']="-1x1072" # 16*51 params['scale_number'] = 1
from picamera.array import PiRGBArray from picamera import PiCamera import time # initialize the camera and grab a reference to the raw camera capture resX = 240 resY = 180 camera = PiCamera() camera.resolution = (resX, resY) camera.framerate = 10 rawCapture = PiRGBArray(camera, size=(resX, resY)) print(time.strftime("%H_%M_%S")) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter( time.strftime("%H_%M_%S") + '.avi', fourcc, 20.0, (resX, resY)) # initialize the HOG descriptor/person detector hog = cv2.HOGDescriptor() hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector()) detectFlag = 0 detectCounter = [0] # allow the camera to warmup time.sleep(0.1) GPIO.setmode(GPIO.BOARD) GPIO.setup(16, GPIO.OUT) def classfier(testImage, threadNum, capTime, detectCounter):
import numpy as np import cv2 from utils import CFEVideoConf, image_resize cap = cv2.VideoCapture(0) save_path = 'saved-media/glasses_and_stash.mp4' frames_per_seconds = 24 config = CFEVideoConf(cap, filepath=save_path, res='720p') out = cv2.VideoWriter(save_path, config.video_type, frames_per_seconds, config.dims) face_cascade = cv2.CascadeClassifier( 'cascades/data/haarcascade_frontalface_default.xml') eyes_cascade = cv2.CascadeClassifier( 'cascades/third-party/frontalEyes35x16.xml') nose_cascade = cv2.CascadeClassifier('cascades/third-party/Nose18x15.xml') glasses = cv2.imread("images/fun/glasses.png", -1) mustache = cv2.imread('images/fun/mustache.png', -1) while (True): # Capture frame-by-frame ret, frame = cap.read() gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale(gray, scaleFactor=1.5, minNeighbors=5) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA) for (x, y, w, h) in faces:
def detect_realtime(Yolo, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors=''): times = [] vid = cv2.VideoCapture(0) # by default VideoCapture returns float instead of int width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vid.get(cv2.CAP_PROP_FPS)) codec = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4 while True: _, frame = vid.read() try: original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB) except: break image_data = image_preprocess(np.copy(original_frame), [input_size, input_size]) #image_data = tf.expand_dims(image_data, 0) image_data = image_data[np.newaxis, ...].astype(np.float32) t1 = time.time() if YOLO_FRAMEWORK == "tf": pred_bbox = Yolo.predict(image_data) elif YOLO_FRAMEWORK == "trt": batched_input = tf.constant(image_data) result = Yolo(batched_input) pred_bbox = [] for key, value in result.items(): value = value.numpy() pred_bbox.append(value) t2 = time.time() pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox] pred_bbox = tf.concat(pred_bbox, axis=0) bboxes = postprocess_boxes(pred_bbox, original_frame, input_size, score_threshold) bboxes = nms(bboxes, iou_threshold, method='nms') times.append(t2 - t1) times = times[-20:] ms = sum(times) / len(times) * 1000 fps = 1000 / ms print("Time: {:.2f}ms, {:.1f} FPS".format(ms, fps)) frame = draw_bbox(original_frame, bboxes, CLASSES=CLASSES, rectangle_colors=rectangle_colors) image = cv2.putText(frame, "Time: {:.1f}FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) if output_path != '': out.write(frame) if show: cv2.imshow('output', frame) if cv2.waitKey(25) & 0xFF == ord("q"): cv2.destroyAllWindows() break cv2.destroyAllWindows()
cv2.putText(img, text=text, org=(x, y), fontScale=font_scale, fontFace=font_face, thickness=thickness, color=color, lineType=line_type) y += 60 # if you don't want to save the output as a video, set this to False save_video = True if save_video: if vw is None: codec = cv2.VideoWriter_fourcc(*'DIVX') vid_width_height = img.shape[1], img.shape[0] vw = cv2.VideoWriter(mnist_prediction_path, codec, 30, vid_width_height) # 15 fps above doesn't work robustly so we right frame twice at 30 fps vw.write(img) vw.write(img) # scale down image for display img_disp = cv2.resize(img, (0, 0), fx=0.5, fy=0.5) cv2_imshow(img_disp) IPython.display.clear_output(wait=True) cap.release() if vw is not None: vw.release()
NMS_THRESH = 0.15 print args.video_path if not os.path.exists(args.video_path): print 'Video does not exist.' video = cv2.VideoCapture(args.video_path) # Get width, height width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) # float height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) # float # Define the codec and create VideoWriter object # TODO: The videos I am using are 30fps, but you should programmatically get this. fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, 30.0, (width, height)) n_frame = 1 # TODO: add time function per frame. while (True): ret, frame = video.read() if ret == True: # frame is BGR cv2 image. # # Detect all object classes and regress object bounds scores, boxes = im_detect(net, frame) cls_ind = 1 cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack(
@author: Royzon @contact: [email protected] @time: 19-1-6 ''' import cv2 import face_recognition import numpy as np import linecache import time from face_register import get_face_encoding width, height = 768, 432 # fourcc = cv2.VideoWriter_fourcc(*'XVID') # fourcc = cv2.VideoWriter_fourcc(*'X264') fourcc = cv2.VideoWriter_fourcc(*"MJPG") # fourcc = cv2.VideoWriter_fourcc('M', 'P', '4', '2') out = cv2.VideoWriter('./data/output.mp4', fourcc, 20.0, (width, height)) choos = input("weither to register face (y/n)") if choos == 'y': get_face_encoding() else: print('go next steps...') recog = input("weither to recognize face (y/n):") if recog == 'y': face_file = "./data/face.txt" rec_name = "" rec_id_num = "" total_name = [] total_id_num = [] total_face_encoding = []
# -*- coding: utf-8 -*- """ Created on Sat Apr 27 23:13:59 2019 @author: Ahmed khaled """ import numpy as np import cv2 import pyscreenshot as pys forcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output.avi', forcc, 8, (1920, 1080)) while True: img = pys.grab() img_np = np.array(img) #frame= cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY) cv2.imshow('Screen', img_np) out.write(img_np) if cv2.waitKey(20) & 0xFF == ord('q'): break out.release() cv2.destroyAllWindows()
(total_files, filetype)) # Iterate over files of given type in input directory for c, filename in enumerate( [f for f in os.listdir(directory) if f.endswith(filetype)]): print("Processing file '%s' (%s of %s)." % (filename, c + 1, total_files)) video = cv2.VideoCapture(filename) # Gather info about input video fps = int(video.get(cv2.CAP_PROP_FPS)) width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) # Define the codec and create VideoWriter object for output fourcc = cv2.VideoWriter_fourcc(*"mp4v") fn, ext = os.path.splitext(os.path.basename(filename)) out = cv2.VideoWriter("%s/%s_%s%s" % (output_directory, fn, suffix, ext), fourcc, fps, (width, height)) # Flip video frame by frame and write to output file while (video.isOpened()): ret, frame = video.read() if ret: frame = cv2.flip(frame, 1) out.write(frame) else: break video.release() out.release()
def _main_(args): config_path = args.conf input_path = args.input output_path = args.output with open(config_path) as config_buffer: config = json.load(config_buffer) makedirs(output_path) ############################### # Set some parameter ############################### net_h, net_w = 1088, 1920 # 416, 416 # a multiple of 32, the smaller the faster obj_thresh, nms_thresh = 0.5, 0.45 # 0.5, 0.45 ############################### # Load the model ############################### os.environ['CUDA_VISIBLE_DEVICES'] = config['valid']['gpus'] #infer_model = load_model(config['train']['saved_weights_name']) infer_model = load_model( str( os.path.join(config['train']['tensorboard_dir'], config['train']['saved_weights_name'] + '-backup.h5'))) ############################### # Predict bounding boxes ############################### if 'webcam' in input_path: # do detection on the first webcam video_reader = cv2.VideoCapture(0) # the main loop batch_size = 1 images = [] while True: ret_val, image = video_reader.read() if ret_val == True: images += [image] if (len(images) == batch_size) or (ret_val == False and len(images) > 0): batch_boxes = get_yolo_boxes(infer_model, images, net_h, net_w, config['model']['anchors'], obj_thresh, nms_thresh) for i in range(len(images)): draw_boxes(images[i], batch_boxes[i], config['model']['labels'], obj_thresh) cv2.imshow('video with bboxes', images[i]) images = [] if cv2.waitKey(1) == 27: break # esc to quit cv2.destroyAllWindows() elif input_path[-4:] == '.mp4': # do detection on a video video_out = output_path + input_path.split('/')[-1] video_reader = cv2.VideoCapture(input_path) nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT)) frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT)) frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH)) video_writer = cv2.VideoWriter(video_out, cv2.VideoWriter_fourcc(*'MPEG'), 50.0, (frame_w, frame_h)) # the main loop batch_size = 1 images = [] start_point = 0 #% show_window = False #for i in tqdm(range(nb_frames)): for i in range(nb_frames): _, image = video_reader.read() if (float(i + 1) / nb_frames) > start_point / 100.: images += [image] if (i % batch_size == 0) or (i == (nb_frames - 1) and len(images) > 0): # predict the bounding boxes batch_boxes = get_yolo_boxes(infer_model, images, net_h, net_w, config['model']['anchors'], obj_thresh, nms_thresh) for i in range(len(images)): # draw bounding boxes on the image using labels draw_boxes(images[i], batch_boxes[i], config['model']['labels'], obj_thresh) # show the video with detection bounding boxes if show_window: cv2.imshow('video with bboxes', images[i]) # write result to the output video video_writer.write(images[i]) images = [] if show_window and cv2.waitKey(1) == 27: break # esc to quit if show_window: cv2.destroyAllWindows() video_reader.release() video_writer.release() else: # do detection on an image or a set of images image_paths = [] if os.path.isdir(input_path): for inp_file in os.listdir(input_path): image_paths += [os.path.join(input_path, inp_file)] else: image_paths += [input_path] image_paths = [ inp_file for inp_file in image_paths if (inp_file[-4:] in ['.jpg', '.png', 'JPEG', '.JPG', '.PNG']) ] # the main loop for image_path in image_paths: image = cv2.imread(image_path) iid = os.path.basename(image_path) # predict the bounding boxes boxes = get_yolo_boxes(infer_model, [image], net_h, net_w, config['model']['anchors'], obj_thresh, nms_thresh)[0] # print out boxes info for _box in boxes: label_name = config['model']['labels'][_box.get_label()] score = float(_box.get_score()) if score < obj_thresh: continue box = [ int(float(_box.xmin)), int(float(_box.ymin)), int(float(_box.xmax)), int(float(_box.ymax)) ] print('{}\t{}\t{}\t{}\t{}'.format(iid, 'label:', label_name, score, box))
import os import time class_colors = [[0, 0, 0], [0, 255, 0]] NCLASSES = 2 HEIGHT = 416 WIDTH = 416 model = SegNet_Mobile(n_classes=NCLASSES, input_height=HEIGHT, input_width=WIDTH) model.load_weights("models/last1.h5") cap = cv2.VideoCapture('/home/fmc/WX/Segmentation/SegNet-Mobile-tf2/1.mp4') fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output.mp4', fourcc, 30.0, (960, 544)) fps = 0.0 while (cap.isOpened()): t1 = time.time() ret, img = cap.read() ############################# # 格式转变,BGRtoRGB img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # 转变成Image img = Image.fromarray(np.uint8(img)) old_img = copy.deepcopy(img) orininal_h = np.array(img).shape[0]
def show_analysis( session=r'Y:\Data 2018-2019\Complement 4 - schizophrenia Project\2019 Adult Behavior C4_for revisions\EZM\ALC_060519_2_mC4\ALC_060519_2_57G_EZM', output=r"C:\Users\User\Desktop\Code\annotated_SI_part.avi", speedX=10, ): import cv2 import tqdm import collections import sys vids = get_all_videos_in_session(session) df = get_analysis_dfs(session, vids) location, body_loc, zm_center, open_coord, closed_coord = get_quadrant(df) try: curr_filename = os.path.join(session, vids[0] + '.avi') cap = cv2.VideoCapture(curr_filename) fourcc = cv2.VideoWriter_fourcc(*'DIVX') writer = cv2.VideoWriter(output, fourcc, 1 / cap.get(2), (int(cap.get(3)), int(cap.get(4)))) for idx, row in tqdm.tqdm(df.iterrows(), total=df.shape[0]): if not idx % int(speed) == 0: continue if row['source'][0] in curr_filename: # cv is open to the right place pass else: cap.release() curr_filename = os.path.join(session, row['source'][0] + '.avi') cap = cv2.VideoCapture(curr_filename) # get the frame cap.set(1, row['frame_number'][0]) # 1==set frame number succ, img = cap.read() # draw the object circles img = cv2.circle(img, (int(zm_center[0]), int(zm_center[1])), 5, (0, 0, 255)) img = cv2.circle(img, (int(open_coord[0]), int(open_coord[1])), 5, (0, 0, 255)) img = cv2.circle(img, (int(closed_coord[0]), int(closed_coord[1])), 5, (0, 0, 255)) if not np.isnan(location[idx]): if location[idx] == 1: img = cv2.circle( img, (int(body_loc[0][idx]), int(body_loc[1][idx])), 5, (255, 0, 0)) else: img = cv2.circle( img, (int(body_loc[0][idx]), int(body_loc[1][idx])), 5, (0, 255, 0)) # add to writer writer.write(img) except Exception as er: raise er.with_traceback(sys.exc_info()[2]) finally: # for index, row in df.iterrows(): cap.release() writer.release() cv2.destroyAllWindows()
import numpy as np import cv2 cap = cv2.VideoCapture(0) # Define the codec and create VideoWriter object fourcc = cv2.VideoWriter_fourcc(*'DIVX') out = cv2.VideoWriter('output.avi', fourcc, 20.0, (640, 480)) while (cap.isOpened()): ret, frame = cap.read() if ret == True: # frame = cv2.flip(frame,0) # write the flipped frame out.write(frame) cv2.imshow('frame', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break else: break # Release everything if job is finished cap.release() out.release() cv2.destroyAllWindows()
widths = [306, 142] heights = [416, 268] sizes = [(widths[0],heights[0]),(widths[1],heights[1])] ofsets_h = [2957, 3365] ofsets_v = [117, 79] # calling the screen recorder function screenRec(fps, sim_time, fourcc_avi, completeName1,completeName2,widths,heights,sizes,ofsets_h,ofsets_v) # setting the minimal area for detecting the moving object, higher means less noise min_areas = [10000, 4000] # making the video files for the trimmed an filtered videos prod1_out_m1 = cv2.VideoWriter(os.path.join(save_path_filtered_pick, "pick45.avi"), fourcc_avi,fps,sizes[0]) prod1_out_m2 = cv2.VideoWriter(os.path.join(save_path_filtered_pick, "pick46.avi"), fourcc_avi,fps,sizes[0]) prod1_out_m3 = cv2.VideoWriter(os.path.join(save_path_filtered_pick, "pick47.avi"), fourcc_avi,fps,sizes[0]) prod1_out_m4 = cv2.VideoWriter(os.path.join(save_path_filtered_pick, "pick48.avi"), fourcc_avi,fps,sizes[0]) prod1_out_m5 = cv2.VideoWriter(os.path.join(save_path_filtered_pick, "pick49.avi"), fourcc_avi,fps,sizes[0]) prod1_out_m6 = cv2.VideoWriter(os.path.join(save_path_filtered_pick, "pick50.avi"), fourcc_avi,fps,sizes[0]) prod1_out_m7 = cv2.VideoWriter(os.path.join(save_path_filtered_pick, "pick51.avi"), fourcc_avi,fps,sizes[0]) prod1_out_m8 = cv2.VideoWriter(os.path.join(save_path_filtered_pick, "pick52.avi"), fourcc_avi,fps,sizes[0]) prod1_out_m9 = cv2.VideoWriter(os.path.join(save_path_filtered_pick, "pick53.avi"), fourcc_avi,fps,sizes[0]) # prod1_out_m10 = cv2.VideoWriter(os.path.join(save_path, "10.avi"), fourcc_avi,fps,size) prod1_out_m12 = cv2.VideoWriter(os.path.join(save_path_filtered_drop, "drop1.avi"), fourcc_avi,fps,sizes[1]) prod1_out_m22 = cv2.VideoWriter(os.path.join(save_path_filtered_drop, "drop2.avi"), fourcc_avi,fps,sizes[1]) prod1_out_m32 = cv2.VideoWriter(os.path.join(save_path_filtered_drop, "drop3.avi"), fourcc_avi,fps,sizes[1]) prod1_out_m42 = cv2.VideoWriter(os.path.join(save_path_filtered_drop, "drop4.avi"), fourcc_avi,fps,sizes[1]) prod1_out_m52 = cv2.VideoWriter(os.path.join(save_path_filtered_drop, "drop5.avi"), fourcc_avi,fps,sizes[1])
def main(): SEND_COMMAND = STOP LAST_COMMAND = SEND_COMMAND s = make_connections() cap = cv2.VideoCapture(numCam) draw_source_dest(cap) source, dest = get_source_dest() occupied_grids, planned_path = process_image.main(source, dest, cap, grid_size, frame_width, frame_height, decision) qt, path, pts = get_qt_path_pts(planned_path) min_v = 500 x_act = list() y_act = list() # destination of the path to reach final_x, final_y = qt[-1] (winW, winH) = (grid_size, grid_size) if runAlgorithm == 'tracker': bbox = intialize_tracker(cap) size = (frame_width, frame_height) # folderNum = get_folder_num() filePath = str(folderNum) + '/' + 'video.mp4' result = cv2.VideoWriter(filePath, cv2.VideoWriter_fourcc(*'MP4V'), 20, size) while True: # accel, gyro, mag, temp = get_IMU_data(conn) timer = cv2.getTickCount() _, img = cap.read() img = cv2.resize(img,(frame_width, frame_height)) draw_circle_on_source(img, path) result.write(img) make_grids(img, grid_size, winW, winH) img = cv2.polylines(img, [pts] , False, (255,120,255), 3) if runAlgorithm == 'tracker': SEND_COMMAND = run_tracker_algo(tracker, img, final_x, final_y, qt, grid_size, min_v) else: SEND_COMMAND, xt, yt = run_heading_algo(img, stkr1minHSV,stkr1maxHSV, stkr2minHSV, stkr2maxHSV, qt) x_act.append(xt) y_act.append(yt) if SEND_COMMAND == 'done': break LAST_COMMAND = send_command(s, LAST_COMMAND, SEND_COMMAND) # to print the direcction of the car print('Action ' + direction[SEND_COMMAND]) cv2.imshow('window', img) if cv2.waitKey(2) & 0xFF == 27: break # can be used to make the graph of the actual and experimental path make_graph(x_act, y_act, pts) SEND_COMMAND = STOP finish(s, cap, SEND_COMMAND)
def frameFilter(vs,completeName_filtered,min_area,fps,height,width,prod1_out_m): # setting parameters prod1 = [0] movement = [1] no_movement = [0] size = (width,height) prod1_out = cv2.VideoWriter(completeName_filtered, fourcc_avi, fps, size) # initializing variables and arrays prod1_frames = [] prod1_matrix = [[], [], [], [], [], [], [], [], []] firstFrame = None a = 0 vid1 = 0 row = 1 firstIndex = 0 secondIndex = 0 thirdIndex = 0 fourthIndex = 0 fifthIndex = 0 sixthIndex = 0 seventhIndex = 0 eightIndex = 0 ninthIndex = 0 tenthIndex = 0 # loop over the frames of the video while True: # grab the current frame and initialize the occupied/unoccupied # text succes, frame = vs.read() text = "Unoccupied" # if the frame could not be grabbed, then we have reached the end # of the video if frame is None: break # resize the frame, convert it to grayscale, and blur it # frame = cv2.resize(frame, size) gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) gray = cv2.GaussianBlur(gray, (21, 21), 0) # if the first frame is None, initialize it if firstFrame is None: firstFrame = gray continue # compute the absolute difference between the current frame and # first frame frameDelta = cv2.absdiff(firstFrame, gray) thresh = cv2.threshold(frameDelta, 25, 255, cv2.THRESH_BINARY)[1] # dilate the thresholded image to fill in holes, then find contours # on thresholded image thresh = cv2.dilate(thresh, None, iterations=2) cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnts = imutils.grab_contours(cnts) # loop over the contours for c in cnts: # if the contour is too small, ignore it if cv2.contourArea(c) < min_area: continue # compute the bounding box for the contour, draw it on the frame, # and update the text (x, y, w, h) = cv2.boundingRect(c) # cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) text = "Occupied" # appending the frames which have movement to the matrix if text == "Occupied": prod1_frames.append(frame) prod1_matrix[row-1].append(frame) prod1 = np.concatenate((prod1, movement)) elif text == "Unoccupied": prod1 = np.concatenate((prod1, no_movement)) # change this for the different simulations thresh_hold_frames = 5 # setting the conditions on when to go to the next row in the frame matrix if prod1[len(prod1)-1] == 0: a += 1 if a == thresh_hold_frames and len(prod1)-1 > thresh_hold_frames: vid1 += 1 if firstIndex == 0: firstIndex = len(prod1)-1 row = 2 elif firstIndex > 0 and secondIndex == 0: secondIndex = len(prod1)-1 row = 3 elif secondIndex > 0 and thirdIndex == 0: thirdIndex = len(prod1)-1 row = 4 elif thirdIndex > 0 and fourthIndex == 0: fourthIndex = len(prod1)-1 row = 5 elif fourthIndex > 0 and fifthIndex == 0: fifthIndex =len(prod1)-1 row = 6 elif fifthIndex > 0 and sixthIndex == 0: sixthIndex = len(prod1)-1 row = 7 elif sixthIndex > 0 and seventhIndex == 0: seventhIndex = len(prod1)-1 row = 8 elif seventhIndex > 0 and eightIndex == 0: eightIndex = len(prod1)-1 row = 9 elif eightIndex > 0 and ninthIndex == 0: ninthIndex = len(prod1)-1 # row = 10 elif ninthIndex > 0 and tenthIndex == 0: tenthIndex = len(prod1)-1 elif prod1[len(prod1)-1] == 1: a = 0 # draw the text and timestamp on the frame # cv2.putText(frame, "Simulation Status: {}".format(text), (10, 20), # cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) # cv2.putText(frame, datetime.datetime.now().strftime("%A %d %B %Y %I:%M:%S%p"), # (10, frame.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 255), 1) # show the frame and record if the user presses a key cv2.imshow("Security Feed", frame) cv2.imshow("Thresh", thresh) cv2.imshow("Frame Delta", frameDelta) key = cv2.waitKey(1) & 0xFF # if the `q` key is pressed, break from the lop if key == ord("q"): break # testing if no moving frames are missed moving_frames = np.count_nonzero(prod1==1) print("Counted moving frames: "+str(moving_frames)) print("Moving frames in array: "+str(len(prod1_frames))) # making one video of all the moving frames for j in range(moving_frames): prod1_frame = cv2.cvtColor(prod1_frames[j], cv2.COLOR_BGR2RGB) prod1_out.write(prod1_frame) # making multiple videos for every seperate movement for rows in range(len(prod1_matrix)): for column in range(len(prod1_matrix[rows])): prod1_frame1 = cv2.cvtColor(prod1_matrix[rows][column], cv2.COLOR_BGR2RGB) prod1_out_m[rows].write(prod1_frame1) prod1_out_m[rows].release() print(prod1) # cleanup the camera and close any open windows prod1_out.release() vs.release() cv2.destroyAllWindows()
import cv2 import math labelsPath = "./coco.names" LABELS = open(labelsPath).read().strip().split("\n") np.random.seed(42) COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8") weightsPath = "./yolov3.weights" configPath = "./yolov3.cfg" cap = cv2.VideoCapture('./queda.mp4') hasFrame, frame = cap.read() net = cv2.dnn.readNetFromDarknet(configPath, weightsPath) vid_writer = cv2.VideoWriter('output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10, (frame.shape[1], frame.shape[0])) while cv2.waitKey(1) < 0: ret, image = cap.read() image = cv2.resize(image, (640, 360)) (H, W) = image.shape[:2] ln = net.getLayerNames() ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()] blob = cv2.dnn.blobFromImage(image, 1 / 300.0, (416, 416), swapRB=True, crop=False) net.setInput(blob) start = time.time()
def trackMultipleObjects(): rectangleColor = (0, 255, 0) frameCounter = 0 currentCarID = 0 fps = 0 carTracker = {} carNumbers = {} carLocation1 = {} carLocation2 = {} speed = [None] * 1000 # Write output to video file out = cv2.VideoWriter('outpy.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (WIDTH,HEIGHT)) while True: start_time = time.time() rc, image = video.read() if type(image) == type(None): break image = cv2.resize(image, (WIDTH, HEIGHT)) resultImage = image.copy() frameCounter = frameCounter + 1 for carID in carTracker.keys(): trackingQuality = carTracker[carID].update(image) if not (frameCounter % 10): gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) cars = carCascade.detectMultiScale(gray, 1.1, 13, 18, (24, 24)) for (_x, _y, _w, _h) in cars: x = int(_x) y = int(_y) w = int(_w) h = int(_h) x_bar = x + 0.5 * w y_bar = y + 0.5 * h for carID in carTracker.keys(): trackedPosition = carTracker[carID].get_position() t_x = int(trackedPosition.left()) t_y = int(trackedPosition.top()) t_w = int(trackedPosition.width()) t_h = int(trackedPosition.height()) t_x_bar = t_x + 0.5 * t_w t_y_bar = t_y + 0.5 * t_h for carID in carTracker.keys(): trackedPosition = carTracker[carID].get_position() t_x = int(trackedPosition.left()) t_y = int(trackedPosition.top()) t_w = int(trackedPosition.width()) t_h = int(trackedPosition.height()) cv2.rectangle(resultImage, (t_x, t_y), (t_x + t_w, t_y + t_h), rectangleColor, 4) # speed estimation carLocation2[carID] = [t_x, t_y, t_w, t_h] end_time = time.time() if not (end_time == start_time): fps = 1.0/(end_time - start_time)
def translate_video(): # TensorFlow if var.get() == "Rococó": graph = load_graph('frozen_models/frozen_rococo.pb') elif var.get() == "Ukiyo-e": graph = load_graph('frozen_models/frozen_ukiyo.pb') elif var.get() == "Van Gogh": graph = load_graph('frozen_models/frozen_vg.pb') else: graph = load_graph('frozen_models/frozen_fauvism.pb') image_tensor = graph.get_tensor_by_name('image_tensor:0') output_tensor = graph.get_tensor_by_name('generate_output/output:0') sess = tf.Session(graph=graph) # Directorio del archivo. video_path = askopenfilename() #Definición del CODEC cap = cv2.VideoCapture(video_path) save_path = askdirectory(title='Directorio de guardado') save_path = save_path + "/video_transformed.avi" fourcc = cv2.VideoWriter_fourcc('M', 'P', '4', 'V') out = cv2.VideoWriter(save_path, fourcc, 25, (768, 256)) # OpenCV if cap.isOpened() == False: print('Imposible obtener los datos del video.') while True: # Obtenemos el frame. ret, frame = cap.read() if ret == True: # Se reduce el tamaño del frame a uno procesable por pix2pix frame_resize = resize_out(frame) # Se aplica pre procesamiento del frame. gray_image = cv2.cvtColor(frame_resize, cv2.COLOR_BGR2GRAY) gaussian_image = cv2.GaussianBlur(gray_image, (3, 3), 0) # Se extraen los bordes. edge = 255 - auto_canny(gaussian_image) edge_color = edge_color = cv2.cvtColor(edge, cv2.COLOR_GRAY2BGR) black_image = np.zeros(edge.shape, np.uint8) # Se genera la predicción. combined_image = np.concatenate([edge, black_image], axis=1) image_rgb = cv2.cvtColor( combined_image, cv2.COLOR_BGR2RGB) # OpenCV uses BGR instead of RGB generated_image = sess.run(output_tensor, feed_dict={image_tensor: image_rgb}) image_bgr = cv2.cvtColor(np.squeeze(generated_image), cv2.COLOR_RGB2BGR) image_normal = np.concatenate( [frame_resize, edge_color, image_bgr], axis=1) #Se escribe el cuadro en al salida. out.write(image_normal) cv2.imshow('Procesando...', image_normal) if cv2.waitKey(1) & 0xFF == ord('q'): break else: break sess.close() cap.release() out.release() print('Ha terminado la traducción.') cv2.destroyAllWindows()
def main(): args = build_argparser().parse_args() logger = logging.getLogger('main') is_benchmarking = False # initialize variables with the input arguments for easy access model_path_dict = { 'FaceDetectionModel': args.faceDetectionModel, 'LandmarkRegressionModel': args.landmarkRegressionModel, 'HeadPoseEstimationModel': args.headPoseEstimationModel, 'GazeEstimationModel': args.gazeEstimationModel } preview_flags = args.previewFlags input_filename = args.input device_name = args.device prob_threshold = args.prob_threshold output_path = args.output_path if input_filename.lower() == 'cam': feeder = InputFeeder(input_type='cam') else: if not os.path.isfile(input_filename): logger.error("Unable to find specified video file") exit(1) feeder = InputFeeder(input_type='video', input_file=input_filename) for model_path in list(model_path_dict.values()): if not os.path.isfile(model_path): logger.error("Unable to find specified model file" + str(model_path)) exit(1) # instantiate model face_detection_model = FaceDetectionModel(model_path_dict['FaceDetectionModel'], device_name, threshold=prob_threshold) landmark_detection_model = LandmarkDetectionModel(model_path_dict['LandmarkRegressionModel'], device_name, threshold=prob_threshold) head_pose_estimation_model = HeadPoseEstimationModel(model_path_dict['HeadPoseEstimationModel'], device_name, threshold=prob_threshold) gaze_estimation_model = GazeEstimationModel(model_path_dict['GazeEstimationModel'], device_name, threshold=prob_threshold) if not is_benchmarking: mouse_controller = MouseController('medium', 'fast') # load Models start_model_load_time = time.time() face_detection_model.load_model() landmark_detection_model.load_model() head_pose_estimation_model.load_model() gaze_estimation_model.load_model() total_model_load_time = time.time() - start_model_load_time feeder.load_data() out_video = cv2.VideoWriter(os.path.join('output_video.mp4'), cv2.VideoWriter_fourcc(*'avc1'), int(feeder.get_fps()/10), (1920, 1080), True) frame_count = 0 start_inference_time = time.time() for ret, frame in feeder.next_batch(): if not ret: break frame_count += 1 key = cv2.waitKey(60) try: face_cords, cropped_image = face_detection_model.predict(frame) if type(cropped_image) == int: logger.warning("Unable to detect the face") if key == 27: break continue left_eye_image, right_eye_image, eye_cords = landmark_detection_model.predict(cropped_image) pose_output = head_pose_estimation_model.predict(cropped_image) mouse_cord, gaze_vector = gaze_estimation_model.predict(left_eye_image, right_eye_image, pose_output) except Exception as e: logger.warning("Could predict using model" + str(e) + " for frame " + str(frame_count)) continue image = cv2.resize(frame, (500, 500)) if not len(preview_flags) == 0: preview_frame = draw_preview( frame, preview_flags, cropped_image, left_eye_image, right_eye_image, face_cords, eye_cords, pose_output, gaze_vector) image = np.hstack((cv2.resize(frame, (500, 500)), cv2.resize(preview_frame, (500, 500)))) cv2.imshow('preview', image) out_video.write(frame) if frame_count % 5 == 0 and not is_benchmarking: mouse_controller.move(mouse_cord[0], mouse_cord[1]) if key == 27: break total_time = time.time() - start_inference_time total_inference_time = round(total_time, 1) fps = frame_count / total_inference_time try: os.mkdir(output_path) except OSError as error: logger.error(error) with open(output_path+'stats.txt', 'w') as f: f.write(str(total_inference_time) + '\n') f.write(str(fps) + '\n') f.write(str(total_model_load_time) + '\n') logger.info('Model load time: ' + str(total_model_load_time)) logger.info('Inference time: ' + str(total_inference_time)) logger.info('FPS: ' + str(fps)) logger.info('Video stream ended') cv2.destroyAllWindows() feeder.close()
import cv2 import numpy as np import glob import random nIMAGES = 351 * 3 files = glob.glob('/home/jps/Downloads/merge_from_ofoct (2).jpg') ik = cv2.imread('/home/jps/Downloads/merge_from_ofoct (2).jpg') sk = ik.shape print sk mov = '/home/jps/Downloads/' + 'circle_2.avi' MOV = cv2.VideoWriter(filename=mov, fourcc=cv2.VideoWriter_fourcc('F', 'M', 'P', '4'), fps=25, frameSize=(640, sk[0])) for i in np.arange(1, nIMAGES): print 'Working on: ' + files[0][-14:-4] image = cv2.imread(files[0], -1) # cv2.imshow("image", image) # cv2.waitKey(0) print(0 + 4 * i), (640 + 4 * i) imageout = image[0:sk[0], (0 + 4 * i):(640 + 4 * i)] # cv2.imshow("imageout", imageout) # cv2.waitKey(0) MOV.write(imageout) # MOV.write(dst) #/home/jps/Desktop/videocorp/CORP/circular_pipes_1.jpg.jpgcrop y:h, x:w
right = int(right * r) bottom = int(bottom * r) left = int(left * r) # draw the predicted face name on the image cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2) y = top - 15 if top - 15 > 15 else top + 15 cv2.putText(frame, name, (left, y), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 255, 0), 2) # if the video writer is None *AND* we are supposed to write # the output video to disk initialize the writer if writer is None and args["output"] is not None: fourcc = cv2.VideoWriter_fourcc(*"MJPG") writer = cv2.VideoWriter(args["output"], fourcc, 20, (frame.shape[1], frame.shape[0]), True) # if the writer is not None, write the frame with recognized # faces to disk if writer is not None: writer.write(frame) # check to see if we are supposed to display the output frame to # the screen if args["display"] > 0: cv2.imshow("Frame", frame) key = cv2.waitKey(1) & 0xFF # if the `q` key was pressed, break from the loop if key == ord("q"): break
def predict_pcl_files(self, pcl_path: str = None, image_root_path: str = None, net_config_path: str = None, class_config_path: str = None, ckpt_path: str = None, save: bool = False, viewer_port: Optional[int] = None): """predict a single point cloud data from numpy array format expected shape=[n_points, xyzr=4] :param pcl_path: .bin file or folder with .bin files, saved numpy array data :param image_root_path: :param net_config_path: .yaml file, model setup :param class_config_path: dataset class configure :param ckpt_path: .tckpt file, trained weights :param show_gt: if display GT label :param save: save to video :param viewer_port: """ if pcl_path is None: pcl_path = self.example_pcl_folderpath if net_config_path is None: net_config_path = self.default_net_config_path if class_config_path is None: class_config_path = self.default_class_config_path if ckpt_path is None: ckpt_path = self.default_ckpt_path if os.path.isdir(pcl_path): filenames = [Path(os.path.join(pcl_path, f)) for f in os.listdir(pcl_path) if f.endswith(".bin")] else: filenames = [Path(pcl_path), ] filenames.sort() root_path = filenames[0].parent if image_root_path is None: image_root_path = Path(root_path) / "image_2" else: image_root_path = Path(image_root_path) if save: fourcc = cv2.VideoWriter_fourcc(*'MP4V') out_stream = cv2.VideoWriter("./output.avi", fourcc, 10.0, (1000, 900)) else: out_stream = None vis_img = np.zeros((900, 1000, 3), dtype=np.uint8) if viewer_port is not None: from utils.visualization import LidarViewer lidar_viewer = LidarViewer(viewer_port) else: lidar_viewer = None detector = SecondDetector(net_config_path, class_config_path, ckpt_path, detect_range=(-50, -50, 50, 50)) for file_path in filenames: points = np.fromfile(file_path, dtype=np.float32, count=-1).reshape([-1, 4]) print(file_path, points.shape) start = time() res = detector.predict_on_points(points) end = time() boxes_lidar = res[0]["box3d_lidar"].detach().cpu().numpy() scores = res[0]["scores"].detach().cpu().numpy() labels = res[0]["label_preds"].detach().cpu().numpy() if lidar_viewer is not None: lidar_viewer.load_points(points[:, :3], points[:, 3]) # print("--------") # print(f">> filename: {file_path}") # print(f">> scores: {scores}") # print(f">> labels: {labels}") # print(f">> time: {end-start} [s]") # print(">> boxes_lidar: ") # for each in boxes_lidar: # print(each) # (x, y, z, w, l, h, yaw) in velodyne coordinates bev = detector.visualize_bev(points, boxes_lidar, labels=labels) file_token = file_path.stem img = cv2.imread(str(image_root_path/(file_token + ".png"))) img = cv2.resize(img, (1000, 300)) vis_img[:300, :, :] = img vis_img[300:, :, :] = bev[::-1, :, :] # if show_gt: # label_filepath = root_path / "label_2" / (file_token + ".txt") # calib_filepath = root_path / "calib" / (file_token + ".txt") # labels = read_kitti_label(label_filepath) # calib = KittiCalibrationParser(calib_filepath) # for each in labels: # print(each) # if each["name"] != "DontCare": # # labels are in ref camera coord. thus here transform to lidar coord # each["box"][:3] = calib.project_ref_to_velo(each["box"][:3]) # bev = detector.draw_box_in_bev(bev, each["box"].reshape([-1, 7]), labels) if save: out_stream.write(vis_img) cv2.imshow("bev",bev[::-1, :, :]) cv2.imshow("img", vis_img) if cv2.waitKey(1) == 27: break cv2.destroyAllWindows() if save: out_stream.release()
def main(): parser = argparse.ArgumentParser(description='Whiteboard inpainting demo') parser.add_argument( '-i', '--input', required=True, help='Required. Path to a video file or a device node of a web-camera.' ) parser.add_argument('--loop', default=False, action='store_true', help='Optional. Enable reading the input in a loop.') parser.add_argument('-o', '--output', required=False, help='Optional. Name of the output file(s) to save.') parser.add_argument('-limit', '--output_limit', required=False, default=1000, type=int, help='Optional. Number of frames to store in output. ' 'If 0 is set, all frames are stored.') parser.add_argument( '-m_i', '--m_instance_segmentation', type=str, required=False, help='Required. Path to the instance segmentation model.') parser.add_argument( '-m_s', '--m_semantic_segmentation', type=str, required=False, help='Required. Path to the semantic segmentation model.') parser.add_argument( '-t', '--threshold', type=float, default=0.6, help='Optional. Threshold for person instance segmentation model.') parser.add_argument('--no_show', help="Optional. Don't show output.", action='store_true') parser.add_argument( '-d', '--device', type=str, default='CPU', help= 'Optional. Specify a target device to infer on. CPU, GPU, HDDL or MYRIAD is ' 'acceptable. The demo will look for a suitable plugin for the device specified.' ) parser.add_argument('-u', '--utilization_monitors', default='', type=str, help='Optional. List of monitors to show initially.') args = parser.parse_args() cap = open_images_capture(args.input, args.loop) if cap.get_type() not in ('VIDEO', 'CAMERA'): raise RuntimeError( "The input should be a video file or a numeric camera ID") if bool(args.m_instance_segmentation) == bool( args.m_semantic_segmentation): raise ValueError( 'Set up exactly one of segmentation models: ' '--m_instance_segmentation or --m_semantic_segmentation') labels_dir = Path(__file__).resolve().parents[3] / 'data/dataset_classes' mouse = MouseClick() if not args.no_show: cv2.namedWindow(WINNAME) cv2.setMouseCallback(WINNAME, mouse.get_points) log.info('OpenVINO Runtime') log.info('\tbuild: {}'.format(get_version())) core = Core() model_path = args.m_instance_segmentation if args.m_instance_segmentation else args.m_semantic_segmentation log.info('Reading model {}'.format(model_path)) if args.m_instance_segmentation: labels_file = str(labels_dir / 'coco_80cl_bkgr.txt') segmentation = MaskRCNN(core, args.m_instance_segmentation, labels_file, args.threshold, args.device) elif args.m_semantic_segmentation: labels_file = str(labels_dir / 'cityscapes_19cl_bkgr.txt') segmentation = SemanticSegmentation(core, args.m_semantic_segmentation, labels_file, args.threshold, args.device) log.info('The model {} is loaded to {}'.format(model_path, args.device)) metrics = PerformanceMetrics() video_writer = cv2.VideoWriter() black_board = False frame_number = 0 key = -1 start_time = perf_counter() frame = cap.read() if frame is None: raise RuntimeError("Can't read an image from the input") out_frame_size = (frame.shape[1], frame.shape[0] * 2) output_frame = np.full((frame.shape[0], frame.shape[1], 3), 255, dtype='uint8') presenter = monitors.Presenter( args.utilization_monitors, 20, (out_frame_size[0] // 4, out_frame_size[1] // 16)) if args.output and not video_writer.open(args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(), out_frame_size): raise RuntimeError("Can't open video writer") while frame is not None: mask = None detections = segmentation.get_detections([frame]) expand_mask(detections, frame.shape[1] // 27) if len(detections[0]) > 0: mask = detections[0][0][2] for i in range(1, len(detections[0])): mask = cv2.bitwise_or(mask, detections[0][i][2]) if mask is not None: mask = np.stack([mask, mask, mask], axis=-1) else: mask = np.zeros(frame.shape, dtype='uint8') clear_frame = remove_background(frame, invert_colors=not black_board) output_frame = np.where(mask, output_frame, clear_frame) merged_frame = np.vstack([frame, output_frame]) merged_frame = cv2.resize(merged_frame, out_frame_size) metrics.update(start_time, merged_frame) if video_writer.isOpened() and (args.output_limit <= 0 or frame_number <= args.output_limit - 1): video_writer.write(merged_frame) presenter.drawGraphs(merged_frame) if not args.no_show: cv2.imshow(WINNAME, merged_frame) key = check_pressed_keys(key) if key == 27: # 'Esc' break if key == ord('i'): # catch pressing of key 'i' black_board = not black_board output_frame = 255 - output_frame else: presenter.handleKey(key) if mouse.crop_available: x0, x1 = min(mouse.points[0][0], mouse.points[1][0]), \ max(mouse.points[0][0], mouse.points[1][0]) y0, y1 = min(mouse.points[0][1], mouse.points[1][1]), \ max(mouse.points[0][1], mouse.points[1][1]) x1, y1 = min(x1, output_frame.shape[1] - 1), min( y1, output_frame.shape[0] - 1) board = output_frame[y0:y1, x0:x1, :] if board.shape[0] > 0 and board.shape[1] > 0: cv2.namedWindow('Board', cv2.WINDOW_KEEPRATIO) cv2.imshow('Board', board) frame_number += 1 start_time = perf_counter() frame = cap.read() metrics.log_total() for rep in presenter.reportMeans(): log.info(rep)
def main(): if not os.path.isfile(args.face): fnames = list(glob(os.path.join(args.face, '*.jpg'))) sorted_fnames = sorted(fnames, key=lambda f: int(os.path.basename(f).split('.')[0])) full_frames = [cv2.imread(f) for f in sorted_fnames] elif args.face.split('.')[1] in ['jpg', 'png', 'jpeg']: full_frames = [cv2.imread(args.face)] fps = args.fps else: video_stream = cv2.VideoCapture(args.face) fps = video_stream.get(cv2.CAP_PROP_FPS) print('Reading video frames...') full_frames = [] while 1: still_reading, frame = video_stream.read() if not still_reading: video_stream.release() break if args.resize_factor > 1: frame = cv2.resize(frame, (frame.shape[1]//args.resize_factor, frame.shape[0]//args.resize_factor)) if args.rotate: frame = cv2.rotate(frame, cv2.cv2.ROTATE_90_CLOCKWISE) y1, y2, x1, x2 = args.crop if x2 == -1: x2 = frame.shape[1] if y2 == -1: y2 = frame.shape[0] frame = frame[y1:y2, x1:x2] full_frames.append(frame) print ("Number of frames available for inference: "+str(len(full_frames))) if not args.audio.endswith('.wav'): print('Extracting raw audio...') command = 'ffmpeg -y -i {} -strict -2 {}'.format(args.audio, 'temp/temp.wav') subprocess.call(command, shell=True) args.audio = 'temp/temp.wav' wav = audio.load_wav(args.audio, 16000) mel = audio.melspectrogram(wav) print(mel.shape) if np.isnan(mel.reshape(-1)).sum() > 0: raise ValueError('Mel contains nan! Using a TTS voice? Add a small epsilon noise to the wav file and try again') mel_chunks = [] mel_idx_multiplier = 80./fps i = 0 while 1: start_idx = int(i * mel_idx_multiplier) if start_idx + mel_step_size > len(mel[0]): mel_chunks.append(mel[:, len(mel[0]) - mel_step_size:]) break mel_chunks.append(mel[:, start_idx : start_idx + mel_step_size]) i += 1 print("Length of mel chunks: {}".format(len(mel_chunks))) full_frames = full_frames[:len(mel_chunks)] batch_size = args.wav2lip_batch_size gen = datagen(full_frames.copy(), mel_chunks) for i, (img_batch, mel_batch, frames, coords) in enumerate(tqdm(gen, total=int(np.ceil(float(len(mel_chunks))/batch_size)))): if i == 0: model = load_model(args.checkpoint_path) print ("Model loaded") frame_h, frame_w = full_frames[0].shape[:-1] out = cv2.VideoWriter('temp/result.avi', cv2.VideoWriter_fourcc(*'DIVX'), fps, (frame_w, frame_h)) img_batch = torch.FloatTensor(np.transpose(img_batch, (0, 3, 1, 2))).to(device) mel_batch = torch.FloatTensor(np.transpose(mel_batch, (0, 3, 1, 2))).to(device) with torch.no_grad(): pred = model(mel_batch, img_batch) pred = pred.cpu().numpy().transpose(0, 2, 3, 1) * 255. for p, f, c in zip(pred, frames, coords): y1, y2, x1, x2 = c p = cv2.resize(p.astype(np.uint8), (x2 - x1, y2 - y1)) f[y1:y2, x1:x2] = p out.write(f) out.release() command = 'ffmpeg -y -i {} -i {} -strict -2 -q:v 1 {}'.format(args.audio, 'temp/result.avi', args.outfile) subprocess.call(command, shell=True)
import cv2 import glob import random import math import numpy as np import dlib import itertools import face_recognition from sklearn.svm import SVC from PIL import Image, ImageDraw v1 = cv2.VideoCapture("Young_blonde_woman_6.mp4") frame_width = int(v1.get(3)) frame_height = int(v1.get(4)) out = cv2.VideoWriter('output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10, (frame_width, frame_height)) face_locations = [] face_encodings = [] face_emotions = [] process_this_frame = True emotions = [ "anger", "disgust", "fear", "happiness", "neutral", "sadness", "surprise" ] # Emotion list clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor( "shape_predictor_68_face_landmarks.dat" ) # Use this to draw landmarks on detected face
# Video feed dimensions _, frame = cap.read() v_height, v_width = frame.shape[:2] # print(v_height,v_width) # Output saving if(opt.save_video): fourcc = cv2.VideoWriter_fourcc(*'MP4V') filename = opt.input_file_path.split("/")[-1] filepath = os.path.join(opt.output_path,filename) fps = cap.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(filepath, fourcc, fps, (v_width, v_height)) print("\nPerforming object detection:") # For a black image x = y = v_height if v_height > v_width else v_width # Putting original image into black image start_new_i_height = int((y - v_height) / 2) start_new_i_width = int((x - v_width) / 2) # For accommodate results in original frame mul_constant = x / opt.frame_size # print(mul_constant) # for text in output
import cv2 as cv video = cv.VideoCapture(0) fourcc = cv.VideoWriter_fourcc(*'XVID') out = cv.VideoWriter("video/kayit.avi", fourcc, 20.0, (640, 480)) resim = cv.imread("resim/ad_soyad.jpg") i = 0 while(video.isOpened()): i += 1 ret, frame = video.read() if i % 10 == 0: frame = resim i = 0 #print(nisangah.shape) #print(nisangah[0, 0]) if ret: out.write(frame) cv.imshow("kamera", frame) if cv.waitKey(33) == ord('q'): break out.release() video.release()