Esempio n. 1
0
args = parser.parse_args()

# Load model
if args.multi:
    args.arch = 'multi_resnet3d50'

model = models.load_model(args.arch)

# Get dataset categories
if args.multi:
    categories = models.load_categories('category_multi_momentsv2.txt')
else:
    categories = models.load_categories('category_momentsv2.txt')

# Load the video frame transform
transform = models.load_transform()

# Obtain video frames
if args.frame_folder is not None:
    print('Loading frames in {}'.format(args.frame_folder))
    import glob
    # here make sure after sorting the frame paths have the correct temporal order
    frame_paths = sorted(glob.glob(os.path.join(args.frame_folder, '*.jpg')))
    frames = load_frames(frame_paths)
else:
    print('Extracting frames using ffmpeg...')
    frames = extract_frames(args.video_file, args.num_segments)

# Prepare input tensor
if 'resnet3d50' in args.arch:
    # [1, num_frames, 3, 224, 224]
Esempio n. 2
0
def load_video(video_hash):
    yt = YouTube('https://youtube.com/embed/%s?start=%d&end=%d' %
                 (video_hash, start, end))
    video = yt.streams.all()[0]
    name = video.download('/tmp')
    #   Load model
    model = models.load_model(arch)

    av_categories = pd.read_csv('CVS_Actions(NEW).csv',
                                delimiter=';').values.tolist()
    trax = pd.read_csv('audioTracks_urls.csv')

    # Get dataset categories
    #categories = models.load_categories()

    # Load the video frame transform
    transform = models.load_transform()

    # Obtain video frames
    if frame_folder is not None:
        print('Loading frames in {}'.format(frame_folder))
        import glob
        # here make sure after sorting the frame paths have the correct temporal order
        frame_paths = sorted(glob.glob(os.path.join(frame_folder, '*.jpg')))
        print(frame_paths)
        frames = load_frames(frame_paths)
    else:
        print('Extracting frames using ffmpeg...')
        frames = extract_frames(name, num_segments)

    # Prepare input tensor
    if arch == 'resnet3d50':
        # [1, num_frames, 3, 224, 224]
        input = torch.stack([transform(frame) for frame in frames],
                            1).unsqueeze(0)
    else:
        # [num_frames, 3, 224, 224]
        input = torch.stack([transform(frame) for frame in frames])

    # Make video prediction
    with torch.no_grad():
        logits = model(input)
        h_x = F.softmax(logits, 1).mean(dim=0)
        probs, idx = h_x.sort(0, True)

    # Output the prediction.

    print('RESULT ON ' + name)
    y = float(av_categories[idx[0]][1]) * 125
    x = float(av_categories[idx[0]][2]) * 125

    trax = trax.assign(
        dist=lambda row: np.sqrt((x - row.valence)**2 + (y - row.energy)**2))
    print('min', trax['dist'].min())

    best = trax.nsmallest(100, 'dist')
    print(best)

    rand = randint(0, 9)
    print(rand)
    choice = best.iloc[rand, [1, 2, 5]]

    print('choice', choice)

    song = 'valence: ' + str(x) + ' arousal: ' + str(
        y) + " " + choice[0] + ' ' + choice[1]
    print(song)
    print(x, y)
    for i in range(0, 5):
        print('{:.3f} -> {} ->{}'.format(probs[i], idx[i],
                                         av_categories[idx[i]]))
        print('result   cutegories', av_categories[idx[i]][0],
              av_categories[idx[i]][1])

    #r = requests.get(match.iloc[0,2], allow_redirects=True)
    r = requests.get(choice[2], allow_redirects=True)
    open('./tmp/preview.mp3', 'wb').write(r.content)
    # Render output frames with prediction text.
    rendered_output = './tmp/' + video_hash + '_' + str(x) + '_' + str(
        y) + '.mp4'
    if rendered_output is not None:
        clip = VideoFileClip(name).subclip(30, 60)
        audioclip = AudioFileClip('./tmp/preview.mp3')
        txt_clip = TextClip(song, fontsize=16, color='white')
        clip_final = clip.set_audio(audioclip)
        video = CompositeVideoClip([clip_final, txt_clip])
        video.set_duration(30).write_videofile(rendered_output)