def fetch_duration( event, save=False, save_locally=False, verbose=False, use_https=True, ): # The 'filepath' is only not None if you passed 'save_locally' as true video_url, filepath = _get_video_url( event, use_https, save_locally, verbose=verbose ) try: ffmpeg_location = getattr( settings, 'FFMPEG_LOCATION', 'ffmpeg' ) command = [ ffmpeg_location, '-i', video_url, ] if verbose: # pragma: no cover print ' '.join(command) t0 = time.time() out, err = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE ).communicate() t1 = time.time() if verbose: # pragma: no cover print "Took", t1 - t0, "seconds to extract duration information" matches = REGEX.findall(err) if matches: found, = matches hours = int(found[0]) minutes = int(found[1]) minutes += hours * 60 seconds = int(found[2]) seconds += minutes * 60 if save: event.duration = seconds event.save() if verbose: # pragma: no cover print show_duration(seconds, include_seconds=True) return seconds elif verbose: # pragma: no cover print "No Duration output. Error:" print err finally: if save_locally: if os.path.isfile(filepath): shutil.rmtree(os.path.dirname(filepath))
def test_show_duration_long_format(self): result = show_duration(60 * 60) eq_(result, "1 hour") result = show_duration(60) eq_(result, "1 minute") result = show_duration(2 * 60 * 60 + 10 * 60) eq_(result, "2 hours 10 minutes") result = show_duration(1 * 60 * 60 + 1 * 60) eq_(result, "1 hour 1 minute") result = show_duration(1 * 60 * 60 + 1 * 60 + 1) eq_(result, "1 hour 1 minute") result = show_duration(2 * 60 * 60 + 2 * 60) eq_(result, "2 hours 2 minutes") result = show_duration(1 * 60 * 60 + 1 * 60 + 1, include_seconds=True) eq_(result, "1 hour 1 minute 1 second") result = show_duration(1 * 60 * 60 + 1 * 60 + 2, include_seconds=True) eq_(result, "1 hour 1 minute 2 seconds") result = show_duration(49) eq_(result, "49 seconds")
def event_video(request, event): context = {} if event.duration: context['duration'] = event.duration context['duration_human'] = show_duration(event.duration) # basically a thin wrapper on the vidly info tag = event.template_environment and event.template_environment.get('tag') if tag: qs = VidlySubmission.objects.filter(event=event, tag=tag) for vidly_submission in qs.order_by('-submission_time')[:1]: context['tag'] = tag results = vidly.query(tag).get(tag, {}) context['status'] = results.get('Status') context['finished'] = results.get('Status') == 'Finished' if context['finished']: if not vidly_submission.finished: vidly_submission.finished = timezone.now() vidly_submission.save() if not event.archive_time: event.archive_time = timezone.now() event.save() elif results.get('Status') == 'Error': if not vidly_submission.errored: vidly_submission.errored = timezone.now() vidly_submission.save() return context
def _videos_by_tags(tags): """Return a list of dicts where each dict looks something like this: {'id': 123, 'tag': 'abc123', 'Status': 'Processing', 'finished': False} And if there's no VidlySubmission the dict will just look like this: {'id': 124} The advantage of this function is that you only need to do 1 query to Vid.ly for a long list of tags. """ all_results = vidly.query(tags.keys()) video_contexts = [] for tag, event in tags.items(): video_context = { 'id': event.id, } if event.duration: video_context['duration'] = event.duration video_context['duration_human'] = show_duration(event.duration) qs = VidlySubmission.objects.filter(event=event, tag=tag) for vidly_submission in qs.order_by('-submission_time')[:1]: video_context['tag'] = tag results = all_results.get(tag, {}) video_context['status'] = results.get('Status') video_context['finished'] = results.get('Status') == 'Finished' if video_context['finished']: if not vidly_submission.finished: vidly_submission.finished = timezone.now() vidly_submission.save() if not event.archive_time: event.archive_time = timezone.now() event.save() elif results.get('Status') == 'Error': if not vidly_submission.errored: vidly_submission.errored = timezone.now() vidly_submission.save() else: video_context['estimated_time_left'] = ( vidly_submission.get_estimated_time_left() ) break video_contexts.append(video_context) return video_contexts
def fetch_duration( event, save=False, save_locally=False, verbose=False, use_https=True, ): assert 'Vid.ly' in event.template.name, "Not a Vid.ly template" assert event.template_environment.get('tag'), "No Vid.ly tag in template" hd = False # This is commented out for the time being because we don't need the # HD version to just capture the duration. # qs = VidlySubmission.objects.filter(event=event) # for submission in qs.order_by('-submission_time')[:1]: # hd = submission.hd tag = event.template_environment['tag'] vidly_url = 'https://vid.ly/%s?content=video&format=' % tag if hd: vidly_url += 'hd_mp4' else: vidly_url += 'mp4' if event.privacy != Event.PRIVACY_PUBLIC: vidly_url += '&token=%s' % vidly.tokenize(tag, 60) response = requests.head(vidly_url) if response.status_code == 302: vidly_url = response.headers['Location'] response = requests.head(vidly_url) assert response.status_code == 200, response.status_code if verbose: # pragma: no cover if response.headers['Content-Length']: print "Content-Length:", print filesizeformat(int(response.headers['Content-Length'])) if not use_https: vidly_url = vidly_url.replace('https://', 'http://') if save_locally: # store it in a temporary location dir_ = tempfile.mkdtemp('videoinfo') filepath = os.path.join(dir_, '%s.mp4' % tag) t0 = time.time() _download_file(vidly_url, filepath) t1 = time.time() if verbose: # pragma: no cover seconds = int(t1 - t0) print "Took", show_duration(seconds, include_seconds=True), print "to download" vidly_url = filepath try: ffmpeg_location = getattr( settings, 'FFMPEG_LOCATION', 'ffmpeg' ) command = [ ffmpeg_location, '-i', vidly_url, ] if verbose: # pragma: no cover print ' '.join(command) out, err = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE ).communicate() matches = REGEX.findall(err) if matches: found, = matches hours = int(found[0]) minutes = int(found[1]) minutes += hours * 60 seconds = int(found[2]) seconds += minutes * 60 if save: event.duration = seconds event.save() return seconds elif verbose: # pragma: no cover print "No Duration output. Error:" print err finally: if save_locally: if os.path.isfile(filepath): shutil.rmtree(os.path.dirname(filepath))
def _get_video_url(event, use_https, save_locally, verbose=False): if event.template and 'Vid.ly' in event.template.name: assert event.template_environment.get('tag'), "No Vid.ly tag value" token_protected = event.privacy != Event.PRIVACY_PUBLIC hd = False qs = ( VidlySubmission.objects .filter(event=event) .filter(tag=event.template_environment['tag']) ) for submission in qs.order_by('-submission_time')[:1]: hd = submission.hd token_protected = submission.token_protection tag = event.template_environment['tag'] video_url = '%s/%s?content=video&format=' % ( settings.VIDLY_BASE_URL, tag, ) if hd: video_url += 'hd_mp4' else: video_url += 'mp4' if token_protected: video_url += '&token=%s' % vidly.tokenize(tag, 60) elif event.template and 'Ogg Video' in event.template.name: assert event.template_environment.get('url'), "No Ogg Video url value" video_url = event.template_environment['url'] else: raise AssertionError("Not valid template") response = requests.head(video_url) _count = 0 while response.status_code in (302, 301): video_url = response.headers['Location'] response = requests.head(video_url) _count += 1 if _count > 5: # just too many times break response = requests.head(video_url) assert response.status_code == 200, response.status_code if verbose: # pragma: no cover if response.headers.get('Content-Length'): print ( "Content-Length: %s" % ( filesizeformat(int(response.headers['Content-Length'])), ) ) if not use_https: video_url = video_url.replace('https://', 'http://') if save_locally: # store it in a temporary location dir_ = tempfile.mkdtemp('videoinfo') if 'Vid.ly' in event.template.name: filepath = os.path.join(dir_, '%s.mp4' % tag) else: filepath = os.path.join( dir_, os.path.basename(urlparse.urlparse(video_url).path) ) t0 = time.time() _download_file(video_url, filepath) t1 = time.time() if verbose: # pragma: no cover seconds = int(t1 - t0) print "Took", show_duration(seconds, include_seconds=True), print "to download" video_url = filepath else: filepath = None return video_url, filepath
def fetch_duration( event, save=False, save_locally=False, verbose=False, use_https=True, video_url=None ): """return number of seconds or None""" if video_url: assert not save_locally else: # The 'filepath' is only not None if 'save_locally' is true video_url, filepath = _get_video_url( event, use_https, save_locally, verbose=verbose ) # Some videos might return a 200 OK on a HEAD but are corrupted # and contains nothing if not save_locally: assert '://' in video_url head = requests.head(video_url) if head.headers.get('Content-Length') == '0': # corrupt file! raise AssertionError( '%s has a 0 byte Content-Length' % video_url ) if head.headers.get('Content-Type', '').startswith('text/html'): # Not a URL to an actual file! raise AssertionError( '%s is a text/html document' % video_url ) try: ffmpeg_location = getattr( settings, 'FFMPEG_LOCATION', 'ffmpeg' ) if verbose and not which(ffmpeg_location): print ffmpeg_location, "is not an executable path" command = [ ffmpeg_location, '-i', video_url, ] if verbose: # pragma: no cover print ' '.join(command) t0 = time.time() out, err = wrap_subprocess(command) t1 = time.time() if verbose: # pragma: no cover print "Took", t1 - t0, "seconds to extract duration information" matches = REGEX.findall(err) if matches: found, = matches hours = int(found[0]) minutes = int(found[1]) minutes += hours * 60 seconds = int(found[2]) seconds += minutes * 60 if save: # Because it's not safe to keep the event object open too # long, as it might have been edited in another thread, # just do an update here. Event.objects.filter(id=event.id).update(duration=seconds) if verbose: # pragma: no cover print show_duration(seconds, include_seconds=True) return seconds elif verbose: # pragma: no cover print "No Duration output. Error:" print err finally: if save_locally: if os.path.isfile(filepath): shutil.rmtree(os.path.dirname(filepath))
def fetch_screencapture( event, save=False, save_locally=False, verbose=False, use_https=True, ): assert event.duration, "no duration" video_url, filepath = _get_video_url( event, use_https, save_locally, verbose=verbose, ) save_dir = tempfile.mkdtemp('screencaptures-%s' % event.id) try: # r is "no. of frames to be extracted into images per second" which # means that if it's 1 it's one picture per second. # Instead, we want to extract a certain number of videos independent # of length r = '%.4f' % ( 1.0 * settings.SCREENCAPTURES_NO_PICTURES / event.duration, ) if verbose: # pragma: no cover print "Video duration:", print show_duration(event.duration, include_seconds=True) ffmpeg_location = getattr( settings, 'FFMPEG_LOCATION', 'ffmpeg' ) command = [ ffmpeg_location, '-i', video_url, '-r', r, os.path.join(save_dir, 'screencap-%02d.jpg') ] if verbose: # pragma: no cover print ' '.join(command) t0 = time.time() out, err = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE ).communicate() t1 = time.time() files = glob.glob(os.path.join(save_dir, 'screencap*.jpg')) if verbose: # pragma: no cover print "Took", t1 - t0, "seconds to extract", len(files), "pictures" created = 0 # We sort and reverse by name so that the first instance # that is created is the oldest one. # That way, when you look at the in the picture gallery # (which is sorted by ('event', '-created')) they appear in # correct chronological order. for i, filepath in enumerate(reversed(sorted(files))): if save: with open(filepath) as fp: Picture.objects.create( file=File(fp), notes="Screencap %d" % (len(files) - i,), event=event, ) created += 1 if not files: # pragma: no cover print "No output. Error:" print err if verbose: # pragma: no cover print "Created", created, "pictures" # end of this section, so add some margin print "\n" return created finally: if save_locally: if os.path.isfile(filepath): shutil.rmtree(os.path.dirname(filepath)) if os.path.isdir(save_dir): shutil.rmtree(save_dir)
def fetch_screencapture( event, save=False, save_locally=False, verbose=False, use_https=True, import_=True, import_if_possible=False, video_url=None, set_first_available=False, ): """return number of files that were successfully created or None""" assert event.duration, "no duration" # When you set `import_` to False, it creates the JPEGs and leaves # them there in a predictable location (so they can be swept up # by import_screencaptures later). # However, if you want to continue doing that plus at least # try to import the created pictures, then set # `import_if_possible=True`. # Then, if the import fails, the pictures are still there to be # picked up by the import_screencaptures() later. if import_if_possible: import_ = False if video_url: assert not save_locally else: video_url, filepath = _get_video_url( event, use_https, save_locally, verbose=verbose, ) if import_: save_dir = tempfile.mkdtemp('screencaptures-%s' % event.id) else: # Instead of importing we're going to put them in a directory # that does NOT get deleted when it has created the screecaps. save_dir = os.path.join( tempfile.gettempdir(), settings.SCREENCAPTURES_TEMP_DIRECTORY_NAME ) if not os.path.isdir(save_dir): os.mkdir(save_dir) directory_name = '%s_%s' % (event.id, event.slug) save_dir = os.path.join(save_dir, directory_name) if not os.path.isdir(save_dir): os.mkdir(save_dir) def format_time(seconds): m = seconds / 60 s = seconds % 60 h = m / 60 m = m % 60 return '%02d:%02d:%02d' % (h, m, s) # First, assume we will delete the temporary save_dir. # This is toggled if an exception happens in importing # the pictures. delete_save_dir = True try: if verbose: # pragma: no cover print "Video duration:", print show_duration(event.duration, include_seconds=True) ffmpeg_location = getattr( settings, 'FFMPEG_LOCATION', 'ffmpeg' ) if verbose and not which(ffmpeg_location): print ffmpeg_location, "is not an executable path" incr = float(event.duration) / settings.SCREENCAPTURES_NO_PICTURES seconds = 0 t0 = time.time() number = 0 output_template = os.path.join(save_dir, 'screencap-%02d.jpg') all_out = [] all_err = [] while seconds < event.duration: number += 1 output = output_template % number command = [ ffmpeg_location, '-ss', format_time(seconds), '-i', video_url, '-vframes', '1', output, ] if verbose: # pragma: no cover print ' '.join(command) out, err = wrap_subprocess(command) all_out.append(out) all_err.append(err) seconds += incr t1 = time.time() files = _get_files(save_dir) if verbose: # pragma: no cover print ( "Took %.2f seconds to extract %d pictures" % ( t1 - t0, len(files) ) ) if import_ or import_if_possible: if verbose and not files: # pragma: no cover print "No output. Error:" print '\n'.join(all_err) try: created = _import_files( event, files, set_first_available=set_first_available ) except Exception: delete_save_dir = False raise if verbose: # pragma: no cover print "Created", created, "pictures" # end of this section, so add some margin print "\n" return created else: if verbose: # pragma: no cover print "Created Temporary Directory", save_dir print '\t' + '\n\t'.join(os.listdir(save_dir)) return len(files) finally: if save_locally: if os.path.isfile(filepath): shutil.rmtree(os.path.dirname(filepath)) if ( delete_save_dir and os.path.isdir(save_dir) and (import_ or import_if_possible) ): if verbose: # pragma: no cover print "Deleting temporary directory" print save_dir shutil.rmtree(save_dir)
def _get_video_url(event, use_https, save_locally, verbose=False): if 'Vid.ly' in event.template.name: assert event.template_environment.get('tag'), "No Vid.ly tag value" token_protected = event.privacy != Event.PRIVACY_PUBLIC hd = False qs = (VidlySubmission.objects.filter(event=event).filter( tag=event.template_environment['tag'])) for submission in qs.order_by('-submission_time')[:1]: hd = submission.hd token_protected = submission.token_protection tag = event.template_environment['tag'] video_url = 'https://vid.ly/%s?content=video&format=' % tag if hd: video_url += 'hd_mp4' else: video_url += 'mp4' if token_protected: video_url += '&token=%s' % vidly.tokenize(tag, 60) elif 'Ogg Video' in event.template.name: assert event.template_environment.get('url'), "No Ogg Video url value" video_url = event.template_environment['url'] else: raise AssertionError("Not valid template") response = requests.head(video_url) _count = 0 while response.status_code in (302, 301): video_url = response.headers['Location'] response = requests.head(video_url) _count += 1 if _count > 5: # just too many times break response = requests.head(video_url) assert response.status_code == 200, response.status_code if verbose: # pragma: no cover if response.headers.get('Content-Length'): print("Content-Length: %s" % (filesizeformat(int(response.headers['Content-Length'])), )) if not use_https: video_url = video_url.replace('https://', 'http://') if save_locally: # store it in a temporary location dir_ = tempfile.mkdtemp('videoinfo') if 'Vid.ly' in event.template.name: filepath = os.path.join(dir_, '%s.mp4' % tag) else: filepath = os.path.join( dir_, os.path.basename(urlparse.urlparse(video_url).path)) t0 = time.time() _download_file(video_url, filepath) t1 = time.time() if verbose: # pragma: no cover seconds = int(t1 - t0) print "Took", show_duration(seconds, include_seconds=True), print "to download" video_url = filepath else: filepath = None return video_url, filepath
def fetch_duration( event, save=False, save_locally=False, verbose=False, use_https=True, ): # The 'filepath' is only not None if you passed 'save_locally' as true video_url, filepath = _get_video_url(event, use_https, save_locally, verbose=verbose) # Some videos might return a 200 OK on a HEAD but are corrupted # and contains nothing if not save_locally: assert '://' in video_url head = requests.head(video_url) if head.headers.get('Content-Length') == '0': # corrupt file! raise AssertionError('%s has a 0 byte Content-Length' % video_url) if head.headers.get('Content-Type', '').startswith('text/html'): # Not a URL to an actual file! raise AssertionError('%s is a text/html document' % video_url) try: ffmpeg_location = getattr(settings, 'FFMPEG_LOCATION', 'ffmpeg') command = [ ffmpeg_location, '-i', video_url, ] if verbose: # pragma: no cover print ' '.join(command) t0 = time.time() out, err = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() t1 = time.time() if verbose: # pragma: no cover print "Took", t1 - t0, "seconds to extract duration information" matches = REGEX.findall(err) if matches: found, = matches hours = int(found[0]) minutes = int(found[1]) minutes += hours * 60 seconds = int(found[2]) seconds += minutes * 60 if save: event.duration = seconds event.save() if verbose: # pragma: no cover print show_duration(seconds, include_seconds=True) return seconds elif verbose: # pragma: no cover print "No Duration output. Error:" print err finally: if save_locally: if os.path.isfile(filepath): shutil.rmtree(os.path.dirname(filepath))
def fetch_screencapture( event, save=False, save_locally=False, verbose=False, use_https=True, import_=True, import_if_possible=False, ): assert event.duration, "no duration" # When you set `import_` to False, it creates the JPEGs and leaves # them there in a predictable location (so they can be swept up # by import_screencaptures later). # However, if you want to continue doing that plus at least # try to import the created pictures, then set # `import_if_possible=True`. # Then, if the import fails, the pictures are still there to be # picked up by the import_screencaptures() later. if import_if_possible: import_ = False video_url, filepath = _get_video_url( event, use_https, save_locally, verbose=verbose, ) if import_: save_dir = tempfile.mkdtemp('screencaptures-%s' % event.id) else: # Instead of importing we're going to put them in a directory # that does NOT get deleted when it has created the screecaps. save_dir = os.path.join(tempfile.gettempdir(), settings.SCREENCAPTURES_TEMP_DIRECTORY_NAME) if not os.path.isdir(save_dir): os.mkdir(save_dir) directory_name = '%s_%s' % (event.id, event.slug) save_dir = os.path.join(save_dir, directory_name) if not os.path.isdir(save_dir): os.mkdir(save_dir) def format_time(seconds): m = seconds / 60 s = seconds % 60 h = m / 60 m = m % 60 return '%02d:%02d:%02d' % (h, m, s) # First, assume we will delete the temporary save_dir. # This is toggled if an exception happens in importing # the pictures. delete_save_dir = True try: if verbose: # pragma: no cover print "Video duration:", print show_duration(event.duration, include_seconds=True) ffmpeg_location = getattr(settings, 'FFMPEG_LOCATION', 'ffmpeg') incr = float(event.duration) / settings.SCREENCAPTURES_NO_PICTURES seconds = 0 t0 = time.time() number = 0 output_template = os.path.join(save_dir, 'screencap-%02d.jpg') all_out = [] all_err = [] while seconds < event.duration: number += 1 output = output_template % number command = [ ffmpeg_location, '-ss', format_time(seconds), '-i', video_url, '-vframes', '1', output, ] if verbose: # pragma: no cover print ' '.join(command) out, err = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() all_out.append(out) all_err.append(err) seconds += incr t1 = time.time() files = _get_files(save_dir) if verbose: # pragma: no cover print("Took %.2f seconds to extract %d pictures" % (t1 - t0, len(files))) if import_ or import_if_possible: if verbose and not files: # pragma: no cover print "No output. Error:" print '\n'.join(all_err) try: created = _import_files(event, files) except Exception: delete_save_dir = False raise if verbose: # pragma: no cover print "Created", created, "pictures" # end of this section, so add some margin print "\n" return created else: if verbose: # pragma: no cover print "Created Temporary Directory", save_dir print '\t' + '\n\t'.join(os.listdir(save_dir)) return len(files) finally: if save_locally: if os.path.isfile(filepath): shutil.rmtree(os.path.dirname(filepath)) if (delete_save_dir and os.path.isdir(save_dir) and (import_ or import_if_possible)): if verbose: # pragma: no cover print "Deleting temporary directory" print save_dir shutil.rmtree(save_dir)
def fetch_duration(event, save=False, save_locally=False, verbose=False, use_https=True, video_url=None): """return number of seconds or None""" if video_url: assert not save_locally else: # The 'filepath' is only not None if 'save_locally' is true video_url, filepath = _get_video_url(event, use_https, save_locally, verbose=verbose) # Some videos might return a 200 OK on a HEAD but are corrupted # and contains nothing if not save_locally: assert '://' in video_url head = requests.head(video_url) if head.headers.get('Content-Length') == '0': # corrupt file! raise AssertionError('%s has a 0 byte Content-Length' % video_url) if head.headers.get('Content-Type', '').startswith('text/html'): # Not a URL to an actual file! raise AssertionError('%s is a text/html document' % video_url) try: ffmpeg_location = getattr(settings, 'FFMPEG_LOCATION', 'ffmpeg') if verbose and not which(ffmpeg_location): print ffmpeg_location, "is not an executable path" command = [ ffmpeg_location, '-i', video_url, ] if verbose: # pragma: no cover print ' '.join(command) t0 = time.time() out, err = wrap_subprocess(command) t1 = time.time() if verbose: # pragma: no cover print "Took", t1 - t0, "seconds to extract duration information" matches = REGEX.findall(err) if matches: found, = matches hours = int(found[0]) minutes = int(found[1]) minutes += hours * 60 seconds = int(found[2]) seconds += minutes * 60 if save: # Because it's not safe to keep the event object open too # long, as it might have been edited in another thread, # just do an update here. Event.objects.filter(id=event.id).update(duration=seconds) if verbose: # pragma: no cover print show_duration(seconds, include_seconds=True) return seconds elif verbose: # pragma: no cover print "No Duration output. Error:" print err finally: if save_locally: if os.path.isfile(filepath): shutil.rmtree(os.path.dirname(filepath))
def fetch_durations(max_=10, order_by='?', verbose=False, dry_run=False, save_locally=False, save_locally_some=False): """this can be called by a cron job that will try to fetch duration for as many events as it can.""" qs = ( Event.objects .filter(duration__isnull=True) .filter(template__name__icontains='Vid.ly') .exclude(status=Event.STATUS_REMOVED) ) total_count = qs.count() if verbose: # pragma: no cover print total_count, "events to process" print count = success = skipped = 0 for event in qs.order_by('?')[:max_]: if verbose: # pragma: no cover print "Event: %r, (privacy:%s slug:%s)" % ( event.title, event.get_privacy_display(), event.slug, ) if not event.template_environment.get('tag'): if verbose: # pragma: no cover print "No Vid.ly Tag!" skipped += 1 continue count += 1 try: use_https = True if save_locally_some: # override save_locally based on the type of event save_locally = event.privacy != Event.PRIVACY_PUBLIC # then this is not necessary use_https = save_locally duration = fetch_duration( event, save=not dry_run, save_locally=save_locally, use_https=use_https, verbose=verbose, ) success += 1 if verbose: # pragma: no cover if duration: print ( "Duration: %s\n" % show_duration(duration, include_seconds=True) ) else: print "Unabled to extract Duration" except AssertionError: exc_type, exc_value, exc_traceback = sys.exc_info() print ''.join(traceback.format_tb(exc_traceback)) print exc_type, exc_value if verbose: # pragma: no cover print "Processed", count, print '(%d successfully)' % success, print '(%d skipped)' % skipped print total_count - count, "left to go"
def fetch_durations(max_=10, order_by='?', verbose=False, dry_run=False, save_locally=False, save_locally_some=False): """this can be called by a cron job that will try to fetch duration for as many events as it can.""" template_name_q = ( Q(template__name__icontains='Vid.ly') | Q(template__name__icontains='Ogg Video') ) qs = ( Event.objects .filter(duration__isnull=True) .filter(template_name_q) .exclude(status=Event.STATUS_REMOVED) ) total_count = qs.count() if verbose: # pragma: no cover print total_count, "events to process" print count = success = skipped = 0 cache_key = 'videoinfo_quarantined' quarantined = cache.get(cache_key, {}) if quarantined: skipped += len(quarantined) if verbose: # pragma: no cover print "Deliberately skipping" for e in Event.objects.filter(id__in=quarantined.keys()): print "\t%r (%s)" % (e.title, quarantined[e.id]) qs = qs.exclude(id__in=quarantined.keys()) for event in qs.order_by('?')[:max_ * 2]: if verbose: # pragma: no cover print "Event: %r, (privacy:%s slug:%s)" % ( event.title, event.get_privacy_display(), event.slug, ) if event.template_environment.get('tag'): print "Vid.ly tag:", print event.template_environment.get('tag') elif event.template_environment.get('url'): print "Ogg URL:", print event.template_environment.get('url') if ( not ( event.template_environment.get('tag') or event.template_environment.get('url') ) ): if verbose: # pragma: no cover print "No Vid.ly Tag or Ogg URL!" skipped += 1 continue count += 1 try: use_https = True if save_locally_some: # override save_locally based on the type of event save_locally = event.privacy != Event.PRIVACY_PUBLIC # then this is not necessary use_https = save_locally duration = fetch_duration( event, save=not dry_run, save_locally=save_locally, use_https=use_https, verbose=verbose, ) success += 1 if verbose: # pragma: no cover if duration: print ( "Duration: %s\n" % show_duration(duration, include_seconds=True) ) else: print "Unabled to extract Duration" except AssertionError: exc_type, exc_value, exc_traceback = sys.exc_info() print ''.join(traceback.format_tb(exc_traceback)) print exc_type, exc_value # put it away for a while quarantined[event.id] = exc_value cache.set(cache_key, quarantined, 60 * 60) if count >= max_: break if verbose: # pragma: no cover print "Processed", count, print '(%d successfully)' % success, print '(%d skipped)' % skipped print total_count - count, "left to go"
def fetch_duration( event, save=False, save_locally=False, verbose=False, use_https=True, ): # The 'filepath' is only not None if you passed 'save_locally' as true video_url, filepath = _get_video_url( event, use_https, save_locally, verbose=verbose ) # Some videos might return a 200 OK on a HEAD but are corrupted # and contains nothing if not save_locally: assert '://' in video_url head = requests.head(video_url) if head.headers.get('Content-Length') == '0': # corrupt file! raise AssertionError( '%s has a 0 byte Content-Length' % video_url ) if head.headers.get('Content-Type', '').startswith('text/html'): # Not a URL to an actual file! raise AssertionError( '%s is a text/html document' % video_url ) try: ffmpeg_location = getattr( settings, 'FFMPEG_LOCATION', 'ffmpeg' ) command = [ ffmpeg_location, '-i', video_url, ] if verbose: # pragma: no cover print ' '.join(command) t0 = time.time() out, err = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE ).communicate() t1 = time.time() if verbose: # pragma: no cover print "Took", t1 - t0, "seconds to extract duration information" matches = REGEX.findall(err) if matches: found, = matches hours = int(found[0]) minutes = int(found[1]) minutes += hours * 60 seconds = int(found[2]) seconds += minutes * 60 if save: event.duration = seconds event.save() if verbose: # pragma: no cover print show_duration(seconds, include_seconds=True) return seconds elif verbose: # pragma: no cover print "No Duration output. Error:" print err finally: if save_locally: if os.path.isfile(filepath): shutil.rmtree(os.path.dirname(filepath))