def cvt_dataset(src_dir, dest_dir): if not os.path.exists(src_dir): raise Exception('cvt_dataset: src_dir does not exists!') if not os.path.exists(dest_dir): os.makedirs(dest_dir) if os.listdir(dest_dir): raise Exception('cvt_dataset: dest_dir must be an empty dir!') for node in os.listdir(src_dir): if os.path.isdir(os.path.join(src_dir, node)): cvt_dataset(os.path.join(src_dir, node), os.path.join(dest_dir, node)) else: name, ext = os.path.splitext(node) if ext != '.avi': continue frames = utils.extract_frames(os.path.join(src_dir, node)) utils.save_frames(os.path.join(dest_dir, name), frames) global num_processed num_processed += 1 if num_processed % 30 == 0: print('video(%s) has been processed' % num_processed)
def __main(): """Main function""" # Parameters declaration and parsing ap = argparse.ArgumentParser() ap.add_argument('-of', '--out_dir', required=False, default='.', help='Output directory') ap.add_argument('-i', '--input_video', required=True, help='Input video') ap.add_argument('-t', '--threshold', required=False, default=50, help='The threshold to decide if two frames are the same') args = ap.parse_args() frames = extract_frames(args.input_video) print('Total frames extracted: %d' % len(frames)) save_images(frames, join(args.out_dir, 'all')) frames = get_unique_frames(frames, int(args.threshold)) print('Unique frames: %d' % len(frames)) save_images(frames, join(args.out_dir, 'uniques'))
def main(args, callback=None): uid = uuid.uuid1() dir_name = './.TEMP-' + str(uid) os.makedirs(dir_name) audio_file = dir_name + '/audio.mp3' extract_audio(args.video, audio_file) frames_dir = dir_name + '/frames' os.makedirs(frames_dir) extract_frames(args.video, frames_dir) frames_count = len(glob(frames_dir + '/*')) audio_analyze = analyze(audio_file, frames_count) process(frames_dir, audio_analyze, args.size, neural=args.neural, colorize=args.colorize, brightify=args.brightify, callback=callback) construct_video(frames_dir, audio_file, get_fps(args.video), args.output) if not args.no_clean: rmtree(dir_name)
def classify(model, input): yt = YouTube('https://youtube.com/embed/%s?start=%d&end=%d' % (input['video'], start, end)) video = yt.streams.all()[0] video_file = video.download(videoPath) num_segments = 16 print('Extracting frames using ffmpeg...') frames = extract_frames(video_file, num_segments) # Prepare input tensor input = torch.stack([transform(frame) for frame in frames], 1).unsqueeze(0) # Make video prediction with torch.no_grad(): logits = model(input) h_x = F.softmax(logits, 1).mean(dim=0) probs, idx = h_x.sort(0, True) # Output the prediction. return categories[idx[0]] """ print('RESULT ON ' + video_file)
def main(): parser = argparse.ArgumentParser( description='Full pipeline to download Youtube video and infer deep trackers.') parser.add_argument('--overwrite', action='store_true', help='remove existing folder') # parser.add_argument('--YT_ID', type=str, default=None, # help='ID from YT') # parser.add_argument('--start', type=int, default=0, # help='starting time of the frames') # parser.add_argument('--duration', type=int, default=None, # help='duration time of the frames') # parser.add_argument('--ID', type=int, default=0, # help='ID of the sequence') parser.add_argument('--path', type=str, default="/home/$USER/Documents/Videos", help='where to save the sequence/video/results') parser.add_argument('--CSV', type=str, default="TrackingNet 2.0 Test Set Extension - Final TrackingNet2.0.csv", help='where to save the sequence/video/results') args = parser.parse_args() List_Sequences = os.path.join(args.path, args.CSV) df = pd.read_csv(List_Sequences) # print(df) for i, data in df.iterrows(): # print(data) # print(data["Youtube_ID"]) if i>6: # if (isinstance(data["Object_ID"], float)): # print(int(data["Object_ID"])) print(data) args.YT_ID = data["Youtube_ID"] args.start = int(data["Start Time"]) args.duration = int(data["Duration"]) args.ID = int(data["Object_ID"]) print(args) # # remove previous BB # if args.overwrite: # if os.path.exists(first_BB_path): # shutil.rmtree(first_BB_path) # define all paths sequence_ID = args.YT_ID + "_" + str(args.ID) full_video_path = os.path.join(args.path, "Videos", args.YT_ID+'.mp4') sequence_path = os.path.join(args.path, "Sequences", sequence_ID) cut_video_path = os.path.join(sequence_path, 'video.mp4') frame_path = os.path.join(sequence_path, 'frames') frame_BB_path = os.path.join(sequence_path, 'frames_BB') first_BB_path = os.path.join(sequence_path, "initial_BB.txt") tracking_results_path = os.path.join(sequence_path, 'results') video_BB_path = os.path.join(sequence_path, 'video_BB.mkv') # Download the video download_video(args.YT_ID, full_video_path) #Cut the video cut_video(full_video_path, cut_video_path, args.start, args.duration) #extract frames extract_frames(cut_video_path, frame_path) # draw the first Bounding box if not os.path.exists(first_BB_path): draw_first_BB(sequence_path, frame_path, first_BB_path)
def run_single_sequence(args): # define all paths sequence_ID = args.YT_ID + "_" + str(args.ID) full_video_path = os.path.join(args.path, "Videos", args.YT_ID + '.mp4') sequence_path = os.path.join(args.path, "Sequences", sequence_ID) cut_video_path = os.path.join(sequence_path, 'video.mp4') frame_path = os.path.join(sequence_path, 'frames') frame_BB_path = os.path.join(sequence_path, 'frames_BB') first_BB_path = os.path.join(sequence_path, "initial_BB.txt") tracking_results_path = os.path.join(sequence_path, 'results') video_BB_path = os.path.join(sequence_path, 'video_BB.mkv') # remove previous results if (args.overwrite): # if os.path.exists(sequence_path) and os.path.isdir(sequence_path): # # shutil.rmtree(sequence_path) if os.path.exists(cut_video_path): os.remove(cut_video_path) if os.path.exists(video_BB_path): os.remove(video_BB_path) if os.path.exists(frame_path) and os.path.isdir(frame_path): shutil.rmtree(frame_path) if os.path.exists(frame_BB_path) and os.path.isdir(frame_BB_path): shutil.rmtree(frame_BB_path) if os.path.exists(tracking_results_path) and os.path.isdir( tracking_results_path): shutil.rmtree(tracking_results_path) print(sequence_ID) # Download the video download_video(args.YT_ID, full_video_path) #Cut the video cut_video(full_video_path, cut_video_path, args.start, args.duration) #extract frames extract_frames(cut_video_path, frame_path) first_BB_path_shared = os.path.join( "/run/user/1001/gvfs/smb-share:server=10.68.74.21,share=tn2", "Sequences", sequence_ID, "initial_BB.txt") # draw the first Bounding box if not os.path.exists(first_BB_path) and os.path.exists( first_BB_path_shared): first_BB_path_shared = os.path.join( "/run/user/1001/gvfs/smb-share:server=10.68.74.21,share=tn2", "Sequences", sequence_ID, "initial_BB.txt") # first_BB_path_shared = os.path.join( # "/home/giancos/Documents/Videos", "Sequences", sequence_ID, "initial_BB.txt") # if os.path.exists(first_BB_path_shared): shutil.copyfile(first_BB_path_shared, first_BB_path) # elif: args.just_BB # else: # print("please Draw!") # draw_first_BB(sequence_path, frame_path, first_BB_path, sequence_ID) # if args.just_BB: # draw_first_BB(sequence_path, frame_path, # first_BB_path, sequence_ID) # draw BB if asked to if args.draw_BB: if (not os.path.exists(first_BB_path)): os.system(f"xdg-open {cut_video_path}") draw_first_BB(sequence_path, frame_path, first_BB_path, sequence_ID) if args.run_trackers: # if at that stage, no BB, then need to draw one if not os.path.exists(first_BB_path): draw_first_BB(sequence_path, frame_path, first_BB_path, sequence_ID) # Run trackers bsed on pysot run_tracker_pysot(args.YT_ID, args.ID, args.path, tracking_results_path, args.overwrite) # Run trackers bsed on pytracking run_tracker_pytracking(frame_path, sequence_path, tracking_results_path, sequence_ID, args.overwrite) # show result bounding boxes result_BB(tracking_results_path, frame_path, frame_BB_path, sequence_ID, args.YT_ID, args.overwrite) if args.play_video: # create results on video result_video(frame_BB_path, video_BB_path, args.overwrite) # run video os.system(f"xdg-open {video_BB_path}") sleep(args.duration + args.sleep_between_videos)
import utils import torch import sys, os from torchvision import transforms from artnet import ARTNet from PIL import Image labels = ['nonporn', 'p**n'] assert len(sys.argv) == 3, 'Insufficient number of argument' v = utils.extract_frames(sys.argv[2], 'samples') transform = transforms.Compose([ transforms.Resize((112, 112)), transforms.RandomCrop((112, 112)), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) frames = [Image.open(os.path.join(v, f)) for f in os.listdir(v)] frames = [transform(f) for f in frames] tensors = [] for i in range(0, len(frames), 16): tensors.append(torch.stack(frames[i:i + 16])) model = ARTNet() model.load_state_dict(torch.load(sys.argv[1])) model = model.to('cuda') for tensor in tensors: tensor = tensor.to('cuda') tensor = tensor.unsqueeze(0)
else: categories = models.load_categories('category_momentsv2.txt') # Load the video frame transform transform = models.load_transform() # Obtain video frames if args.frame_folder is not None: print('Loading frames in {}'.format(args.frame_folder)) import glob # here make sure after sorting the frame paths have the correct temporal order frame_paths = sorted(glob.glob(os.path.join(args.frame_folder, '*.jpg'))) frames = load_frames(frame_paths) else: print('Extracting frames using ffmpeg...') frames = extract_frames(args.video_file, args.num_segments) # Prepare input tensor if 'resnet3d50' in args.arch: # [1, num_frames, 3, 224, 224] input = torch.stack([transform(frame) for frame in frames], 1).unsqueeze(0) else: # [num_frames, 3, 224, 224] input = torch.stack([transform(frame) for frame in frames]) # Make video prediction with torch.no_grad(): logits = model(input) h_x = F.softmax(logits, 1).mean(dim=0) probs, idx = h_x.sort(0, True)
# Get dataset categories categories = models.load_categories() # Load the video frame transform transform = models.load_transform() # Obtain video frames if args.frame_folder is not None: print('Loading frames in {}'.format(args.frame_folder)) import glob # here make sure after sorting the frame paths have the correct temporal order frame_paths = sorted(glob.glob(os.path.join(args.frame_folder, '*.jpg'))) frames = load_frames(frame_paths) else: print('Extracting frames using ffmpeg...') frames = extract_frames(args.video_file, args.num_segments, args.start_frame, args.subsample_rate) # Prepare input tensor if args.arch == 'resnet3d50': # [1, num_frames, 3, 224, 224] input = torch.stack([transform(frame) for frame in frames], 1).unsqueeze(0) else: # [num_frames, 3, 224, 224] input = torch.stack([transform(frame) for frame in frames]) # Make video prediction with torch.no_grad(): logits = model(input) h_x = F.softmax(logits, 1).mean(dim=0) probs, idx = h_x.sort(0, True)
#categories = models.load_categories() # Load the video frame transform transform = models.load_transform() # Obtain video frames if args.frame_folder is not None: print('Loading frames in {}'.format(args.frame_folder)) import glob # here make sure after sorting the frame paths have the correct temporal order frame_paths = sorted(glob.glob(os.path.join(args.frame_folder, '*.jpg'))) print(frame_paths) frames = load_frames(frame_paths) else: print('Extracting frames using ffmpeg...') frames = extract_frames(name, args.num_segments) # Prepare input tensor if args.arch == 'resnet3d50': # [1, num_frames, 3, 224, 224] input = torch.stack([transform(frame) for frame in frames], 1).unsqueeze(0) else: # [num_frames, 3, 224, 224] input = torch.stack([transform(frame) for frame in frames]) # Make video prediction with torch.no_grad(): logits = model(input) h_x = F.softmax(logits, 1).mean(dim=0) probs, idx = h_x.sort(0, True)
def load_video(video_hash): yt = YouTube('https://youtube.com/embed/%s?start=%d&end=%d' % (video_hash, start, end)) video = yt.streams.all()[0] name = video.download('/tmp') # Load model model = models.load_model(arch) av_categories = pd.read_csv('CVS_Actions(NEW).csv', delimiter=';').values.tolist() trax = pd.read_csv('audioTracks_urls.csv') # Get dataset categories #categories = models.load_categories() # Load the video frame transform transform = models.load_transform() # Obtain video frames if frame_folder is not None: print('Loading frames in {}'.format(frame_folder)) import glob # here make sure after sorting the frame paths have the correct temporal order frame_paths = sorted(glob.glob(os.path.join(frame_folder, '*.jpg'))) print(frame_paths) frames = load_frames(frame_paths) else: print('Extracting frames using ffmpeg...') frames = extract_frames(name, num_segments) # Prepare input tensor if arch == 'resnet3d50': # [1, num_frames, 3, 224, 224] input = torch.stack([transform(frame) for frame in frames], 1).unsqueeze(0) else: # [num_frames, 3, 224, 224] input = torch.stack([transform(frame) for frame in frames]) # Make video prediction with torch.no_grad(): logits = model(input) h_x = F.softmax(logits, 1).mean(dim=0) probs, idx = h_x.sort(0, True) # Output the prediction. print('RESULT ON ' + name) y = float(av_categories[idx[0]][1]) * 125 x = float(av_categories[idx[0]][2]) * 125 trax = trax.assign( dist=lambda row: np.sqrt((x - row.valence)**2 + (y - row.energy)**2)) print('min', trax['dist'].min()) best = trax.nsmallest(100, 'dist') print(best) rand = randint(0, 9) print(rand) choice = best.iloc[rand, [1, 2, 5]] print('choice', choice) song = 'valence: ' + str(x) + ' arousal: ' + str( y) + " " + choice[0] + ' ' + choice[1] print(song) print(x, y) for i in range(0, 5): print('{:.3f} -> {} ->{}'.format(probs[i], idx[i], av_categories[idx[i]])) print('result cutegories', av_categories[idx[i]][0], av_categories[idx[i]][1]) #r = requests.get(match.iloc[0,2], allow_redirects=True) r = requests.get(choice[2], allow_redirects=True) open('./tmp/preview.mp3', 'wb').write(r.content) # Render output frames with prediction text. rendered_output = './tmp/' + video_hash + '_' + str(x) + '_' + str( y) + '.mp4' if rendered_output is not None: clip = VideoFileClip(name).subclip(30, 60) audioclip = AudioFileClip('./tmp/preview.mp3') txt_clip = TextClip(song, fontsize=16, color='white') clip_final = clip.set_audio(audioclip) video = CompositeVideoClip([clip_final, txt_clip]) video.set_duration(30).write_videofile(rendered_output)
from utils import make_video_from_frames, extract_frames from inference import Infer from datetime import datetime import os import segmentation_models_pytorch as smp input_file = "ufc234_gastelum_bisping_1080p_nosound_cut.mp4" output_file = "test.mp4" intermediate_dir = "video_frames" intermediate_dir2 = "video_frames_processed" print("Frame extraction") now = datetime.now() extract_frames(input_file, intermediate_dir) print(datetime.now() - now) print("Inference") num_batches = 70 model = smp.Unet("se_resnext50_32x4d") model.cuda() for i in range(0, num_batches): inferer1 = Infer( rez_dir=intermediate_dir2, image_folder=intermediate_dir, batch_size=2, num_batches=num_batches, batch_id=i, threshold=0.5, ) inferer1.inference(model) print(datetime.now() - now) print("Making video") make_video_from_frames(frame_dir=os.path.join(intermediate_dir2, "mask"),