def handle(self, *args, **options): self.video = None handled_youtube_ids = [] # stored to deal with caching failed_youtube_ids = [] # stored to avoid requerying failures. set_process_priority.lowest(logging=settings.LOG) try: while True: # loop until the method is aborted # Grab any video that hasn't been tried yet videos = VideoFile.objects \ .filter(flagged_for_download=True, download_in_progress=False) \ .exclude(youtube_id__in=failed_youtube_ids) video_count = videos.count() if video_count == 0: self.stdout.write("Nothing to download; exiting.\n") break # Grab a video as OURS to handle, set fields to indicate to others that we're on it! # Update the video logging video = videos[0] video.download_in_progress = True video.percent_complete = 0 video.save() self.stdout.write("Downloading video '%s'...\n" % video.youtube_id) # Update the progress logging self.set_stages( num_stages=video_count + len(handled_youtube_ids) + len(failed_youtube_ids) + int(options["auto_cache"])) if not self.started(): self.start(stage_name=video.youtube_id) # Initiate the download process try: download_video(video.youtube_id, callback=partial( self.download_progress_callback, video)) handled_youtube_ids.append(video.youtube_id) self.stdout.write("Download is complete!\n") except Exception as e: # On error, report the error, mark the video as not downloaded, # and allow the loop to try other videos. self.stderr.write("Error in downloading %s: %s\n" % (video.youtube_id, e)) video.download_in_progress = False video.flagged_for_download = not isinstance( e, URLNotFound) # URLNotFound means, we won't try again video.save() # Rather than getting stuck on one video, continue to the next video. failed_youtube_ids.append(video.youtube_id) continue # This can take a long time, without any further update, so ... best to avoid. if options["auto_cache"] and caching.caching_is_enabled( ) and handled_youtube_ids: self.update_stage( stage_name=self.video.youtube_id, stage_percent=0, notes=_("Generating all pages related to videos.")) caching.regenerate_all_pages_related_to_videos(video_ids=list( set([ i18n.get_video_id(yid) or yid for yid in handled_youtube_ids ]))) # Update self.complete( notes= _("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully." ) % { "num_handled_videos": len(handled_youtube_ids), "num_total_videos": len(handled_youtube_ids) + len(failed_youtube_ids), }) except Exception as e: sys.stderr.write("Error: %s\n" % e) self.cancel(notes=_("Error: %s") % e)
def handle(self, *args, **options): self.video = None handled_youtube_ids = [] # stored to deal with caching failed_youtube_ids = [] # stored to avoid requerying failures. set_process_priority.lowest(logging=settings.LOG) try: while True: # loop until the method is aborted # Grab any video that hasn't been tried yet videos = VideoFile.objects \ .filter(flagged_for_download=True, download_in_progress=False) \ .exclude(youtube_id__in=failed_youtube_ids) video_count = videos.count() if video_count == 0: self.stdout.write(_("Nothing to download; exiting.") + "\n") break # Grab a video as OURS to handle, set fields to indicate to others that we're on it! # Update the video logging video = videos[0] video.download_in_progress = True video.percent_complete = 0 video.save() self.stdout.write((_("Downloading video '%(youtube_id)s'...") + "\n") % {"youtube_id": video.youtube_id}) # Update the progress logging self.set_stages(num_stages=video_count + len(handled_youtube_ids) + len(failed_youtube_ids) + int(options["auto_cache"])) if not self.started(): self.start(stage_name=video.youtube_id) # Initiate the download process try: ensure_dir(settings.CONTENT_ROOT) progress_callback = partial(self.download_progress_callback, video) try: # Download via urllib download_video(video.youtube_id, callback=progress_callback) except URLNotFound: # Video was not found on amazon cloud service, # either due to a KA mistake, or due to the fact # that it's a dubbed video. # # We can use youtube-dl to get that video!! logging.debug(_("Retrieving youtube video %(youtube_id)s via youtube-dl") % {"youtube_id": video.youtube_id}) def youtube_dl_cb(stats, progress_callback, *args, **kwargs): if stats['status'] == "finished": percent = 100. elif stats['status'] == "downloading": percent = 100. * stats['downloaded_bytes'] / stats['total_bytes'] else: percent = 0. progress_callback(percent=percent) scrape_video(video.youtube_id, quiet=not settings.DEBUG, callback=partial(youtube_dl_cb, progress_callback=progress_callback)) # If we got here, we downloaded ... somehow :) handled_youtube_ids.append(video.youtube_id) self.stdout.write(_("Download is complete!") + "\n") except DownloadCancelled: # Cancellation event video.percent_complete = 0 video.flagged_for_download = False video.download_in_progress = False video.save() failed_youtube_ids.append(video.youtube_id) except Exception as e: # On error, report the error, mark the video as not downloaded, # and allow the loop to try other videos. msg = _("Error in downloading %(youtube_id)s: %(error_msg)s") % {"youtube_id": video.youtube_id, "error_msg": unicode(e)} self.stderr.write("%s\n" % msg) # If a connection error, we should retry. if isinstance(e, DownloadError): connection_error = "[Errno 8]" in e.message elif isinstance(e, IOError) and hasattr(e, "strerror"): connection_error = e.strerror[0] == 8 else: connection_error = False video.download_in_progress = False video.flagged_for_download = connection_error # Any error other than a connection error is fatal. video.save() # Rather than getting stuck on one video, continue to the next video. self.update_stage(stage_status="error", notes=_("%(error_msg)s; continuing to next video.") % {"error_msg": msg}) failed_youtube_ids.append(video.youtube_id) continue # This can take a long time, without any further update, so ... best to avoid. if options["auto_cache"] and caching.caching_is_enabled() and handled_youtube_ids: self.update_stage(stage_name=self.video.youtube_id, stage_percent=0, notes=_("Generating all pages related to videos.")) caching.regenerate_all_pages_related_to_videos(video_ids=list(set([i18n.get_video_id(yid) or yid for yid in handled_youtube_ids]))) # Update self.complete(notes=_("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully.") % { "num_handled_videos": len(handled_youtube_ids), "num_total_videos": len(handled_youtube_ids) + len(failed_youtube_ids), }) except Exception as e: self.cancel(stage_status="error", notes=_("Error: %(error_msg)s") % {"error_msg": e}) raise
def handle(self, *args, **options): # Parse input parameters kwargs = {"host": args[0]} if len(args) >= 1 else {} max_retries = args[1] if len(args) >= 2 else 5 set_process_priority.lowest(logging=settings.LOG) # don't block users from web access due to syncing # Retry purgatory self.stdout_writeln(("Checking purgatory for unsaved models")+"...") call_command("retrypurgatory") try: client = SyncClient(**kwargs) except Exception as e: raise CommandError(e) connection_status = client.test_connection() if connection_status != "success": self.stderr_writeln(("KA Lite host is currently unreachable") + " (%s): %s" % (connection_status, client.url)) return self.stdout_writeln(("Initiating SyncSession")+"...") try: result = client.start_session() if result != "success": self.stderr_writeln(("Unable to initiate session")+": %s" % result.content) return except Exception as e: raise CommandError(e) self.stdout_writeln(("Syncing models")+"...") failure_tries = 0 while True: results = client.sync_models() upload_results = results["upload_results"] download_results = results["download_results"] # display counts for this block of models being transferred self.stdout_writeln("\t%-15s: %d (%d failed, %d error(s))" % ( ("Uploaded"), upload_results["saved_model_count"], upload_results["unsaved_model_count"], upload_results.has_key("error"))) self.stdout_writeln("\t%-15s: %d (%d failed, %d error(s))" % ( ("Downloaded"), download_results["saved_model_count"], download_results["unsaved_model_count"], download_results.has_key("error"))) # count the number of successes and failures success_count = upload_results["saved_model_count"] + download_results["saved_model_count"] fail_count = upload_results["unsaved_model_count"] + download_results["unsaved_model_count"] error_count = upload_results.has_key("error") + download_results.has_key("error") + upload_results.has_key("exceptions") # Report any errors if error_count > 0: if upload_results.has_key("error"): self.stderr_writeln("%s: %s" % (("Upload error"),upload_results["error"])) if download_results.has_key("error"): self.stderr_writeln("%s: %s" % (("Download error"),download_results["error"])) if upload_results.has_key("exceptions"): self.stderr_writeln("%s: %s" % (("Upload exceptions"),upload_results["exceptions"][:200])) # stop when nothing is being transferred anymore if success_count == 0 and (fail_count == 0 or failure_tries >= max_retries): break failure_tries += (fail_count > 0 and success_count == 0) # Allow the user to throttle the syncing by inserting a wait, so that users # aren't overwhelmed by the computational need for signing during sync if settings.SYNCING_THROTTLE_WAIT_TIME is not None: time.sleep(settings.SYNCING_THROTTLE_WAIT_TIME) # Report summaries self.stdout_writeln("%s... (%s: %d, %s: %d, %s: %d)" % (("Closing session"), ("Total uploaded"), client.session.models_uploaded, ("Total downloaded"), client.session.models_downloaded, ("Total errors"), client.session.errors)) # Report any exceptions if client.session.errors: self.stderr_writeln("Completed with %d errors."%client.session.errors) if failure_tries >= max_retries: self.stderr_writeln("%s (%d)." % ("Failed to upload all models (stopped after failed attempts)",failure_tries)) self.stdout_writeln(("Checking purgatory once more, to try saving any unsaved models")+"...") call_command("retrypurgatory") client.close_session()
def handle(self, *args, **options): # Parse input parameters kwargs = {"host": args[0]} if len(args) >= 1 else {} max_retries = args[1] if len(args) >= 2 else 5 set_process_priority.lowest( logging=settings.LOG ) # don't block users from web access due to syncing # Retry purgatory self.stdout_writeln(("Checking purgatory for unsaved models") + "...") call_command("retrypurgatory") try: client = SyncClient(**kwargs) except Exception as e: raise CommandError(e) connection_status = client.test_connection() if connection_status != "success": self.stderr_writeln(("KA Lite host is currently unreachable") + " (%s): %s" % (connection_status, client.url)) return self.stdout_writeln(("Initiating SyncSession") + "...") try: result = client.start_session() if result != "success": self.stderr_writeln(("Unable to initiate session") + ": %s" % result.content) return except Exception as e: raise CommandError(e) self.stdout_writeln(("Syncing models") + "...") failure_tries = 0 while True: results = client.sync_models() upload_results = results["upload_results"] download_results = results["download_results"] # display counts for this block of models being transferred self.stdout_writeln( "\t%-15s: %d (%d failed, %d error(s))" % (("Uploaded"), upload_results["saved_model_count"], upload_results["unsaved_model_count"], upload_results.has_key("error"))) self.stdout_writeln( "\t%-15s: %d (%d failed, %d error(s))" % (("Downloaded"), download_results["saved_model_count"], download_results["unsaved_model_count"], download_results.has_key("error"))) # count the number of successes and failures success_count = upload_results[ "saved_model_count"] + download_results["saved_model_count"] fail_count = upload_results[ "unsaved_model_count"] + download_results["unsaved_model_count"] error_count = upload_results.has_key( "error") + download_results.has_key( "error") + upload_results.has_key("exceptions") # Report any errors if error_count > 0: if upload_results.has_key("error"): self.stderr_writeln( "%s: %s" % (("Upload error"), upload_results["error"])) if download_results.has_key("error"): self.stderr_writeln( "%s: %s" % (("Download error"), download_results["error"])) if upload_results.has_key("exceptions"): self.stderr_writeln("%s: %s" % (("Upload exceptions"), upload_results["exceptions"][:200])) # stop when nothing is being transferred anymore if success_count == 0 and (fail_count == 0 or failure_tries >= max_retries): break failure_tries += (fail_count > 0 and success_count == 0) # Allow the user to throttle the syncing by inserting a wait, so that users # aren't overwhelmed by the computational need for signing during sync if settings.SYNCING_THROTTLE_WAIT_TIME is not None: time.sleep(settings.SYNCING_THROTTLE_WAIT_TIME) # Report summaries self.stdout_writeln( "%s... (%s: %d, %s: %d, %s: %d)" % (("Closing session"), ("Total uploaded"), client.session.models_uploaded, ("Total downloaded"), client.session.models_downloaded, ("Total errors"), client.session.errors)) # Report any exceptions if client.session.errors: self.stderr_writeln("Completed with %d errors." % client.session.errors) if failure_tries >= max_retries: self.stderr_writeln( "%s (%d)." % ("Failed to upload all models (stopped after failed attempts)", failure_tries)) self.stdout_writeln(( "Checking purgatory once more, to try saving any unsaved models") + "...") call_command("retrypurgatory") client.close_session()
def handle(self, *args, **options): self.video = None handled_youtube_ids = [] # stored to deal with caching failed_youtube_ids = [] # stored to avoid requerying failures. set_process_priority.lowest(logging=settings.LOG) try: while True: # loop until the method is aborted # Grab any video that hasn't been tried yet videos = VideoFile.objects \ .filter(flagged_for_download=True, download_in_progress=False) \ .exclude(youtube_id__in=failed_youtube_ids) video_count = videos.count() if video_count == 0: self.stdout.write("Nothing to download; exiting.\n") break # Grab a video as OURS to handle, set fields to indicate to others that we're on it! # Update the video logging video = videos[0] video.download_in_progress = True video.percent_complete = 0 video.save() self.stdout.write("Downloading video '%s'...\n" % video.youtube_id) # Update the progress logging self.set_stages(num_stages=video_count + len(handled_youtube_ids) + len(failed_youtube_ids) + int(options["auto_cache"])) if not self.started(): self.start(stage_name=video.youtube_id) # Initiate the download process try: download_video(video.youtube_id, callback=partial(self.download_progress_callback, video)) handled_youtube_ids.append(video.youtube_id) self.stdout.write("Download is complete!\n") except Exception as e: # On error, report the error, mark the video as not downloaded, # and allow the loop to try other videos. self.stderr.write("Error in downloading %s: %s\n" % (video.youtube_id, e)) video.download_in_progress = False video.flagged_for_download = not isinstance(e, URLNotFound) # URLNotFound means, we won't try again video.save() # Rather than getting stuck on one video, continue to the next video. failed_youtube_ids.append(video.youtube_id) continue # This can take a long time, without any further update, so ... best to avoid. if options["auto_cache"] and caching.caching_is_enabled() and handled_youtube_ids: self.update_stage(stage_name=self.video.youtube_id, stage_percent=0, notes=_("Generating all pages related to videos.")) caching.regenerate_all_pages_related_to_videos(video_ids=list(set([i18n.get_video_id(yid) or yid for yid in handled_youtube_ids]))) # Update self.complete(notes=_("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully.") % { "num_handled_videos": len(handled_youtube_ids), "num_total_videos": len(handled_youtube_ids) + len(failed_youtube_ids), }) except Exception as e: sys.stderr.write("Error: %s\n" % e) self.cancel(notes=_("Error: %s") % e)
def handle(self, *args, **options): self.video = None handled_youtube_ids = [] # stored to deal with caching failed_youtube_ids = [] # stored to avoid requerying failures. set_process_priority.lowest(logging=settings.LOG) try: while True: # loop until the method is aborted # Grab any video that hasn't been tried yet videos = VideoFile.objects \ .filter(flagged_for_download=True, download_in_progress=False) \ .exclude(youtube_id__in=failed_youtube_ids) video_count = videos.count() if video_count == 0: self.stdout.write( _("Nothing to download; exiting.") + "\n") break # Grab a video as OURS to handle, set fields to indicate to others that we're on it! # Update the video logging video = videos[0] video.download_in_progress = True video.percent_complete = 0 video.save() self.stdout.write( (_("Downloading video '%(youtube_id)s'...") + "\n") % {"youtube_id": video.youtube_id}) # Update the progress logging self.set_stages( num_stages=video_count + len(handled_youtube_ids) + len(failed_youtube_ids) + int(options["auto_cache"])) if not self.started(): self.start(stage_name=video.youtube_id) # Initiate the download process try: ensure_dir(settings.CONTENT_ROOT) progress_callback = partial( self.download_progress_callback, video) try: # Download via urllib download_video(video.youtube_id, callback=progress_callback) except URLNotFound: # Video was not found on amazon cloud service, # either due to a KA mistake, or due to the fact # that it's a dubbed video. # # We can use youtube-dl to get that video!! logging.debug( _("Retrieving youtube video %(youtube_id)s via youtube-dl" ) % {"youtube_id": video.youtube_id}) def youtube_dl_cb(stats, progress_callback, *args, **kwargs): if stats['status'] == "finished": percent = 100. elif stats['status'] == "downloading": percent = 100. * stats[ 'downloaded_bytes'] / stats['total_bytes'] else: percent = 0. progress_callback(percent=percent) scrape_video(video.youtube_id, quiet=not settings.DEBUG, callback=partial( youtube_dl_cb, progress_callback=progress_callback)) # If we got here, we downloaded ... somehow :) handled_youtube_ids.append(video.youtube_id) self.stdout.write(_("Download is complete!") + "\n") except DownloadCancelled: # Cancellation event video.percent_complete = 0 video.flagged_for_download = False video.download_in_progress = False video.save() failed_youtube_ids.append(video.youtube_id) except Exception as e: # On error, report the error, mark the video as not downloaded, # and allow the loop to try other videos. msg = _( "Error in downloading %(youtube_id)s: %(error_msg)s" ) % { "youtube_id": video.youtube_id, "error_msg": unicode(e) } self.stderr.write("%s\n" % msg) # If a connection error, we should retry. if isinstance(e, DownloadError): connection_error = "[Errno 8]" in e.message elif isinstance(e, IOError) and hasattr(e, "strerror"): connection_error = e.strerror[0] == 8 else: connection_error = False video.download_in_progress = False video.flagged_for_download = connection_error # Any error other than a connection error is fatal. video.save() # Rather than getting stuck on one video, continue to the next video. self.update_stage( stage_status="error", notes=_("%(error_msg)s; continuing to next video.") % {"error_msg": msg}) failed_youtube_ids.append(video.youtube_id) continue # This can take a long time, without any further update, so ... best to avoid. if options["auto_cache"] and caching.caching_is_enabled( ) and handled_youtube_ids: self.update_stage( stage_name=self.video.youtube_id, stage_percent=0, notes=_("Generating all pages related to videos.")) caching.regenerate_all_pages_related_to_videos(video_ids=list( set([ i18n.get_video_id(yid) or yid for yid in handled_youtube_ids ]))) # Update self.complete( notes= _("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully." ) % { "num_handled_videos": len(handled_youtube_ids), "num_total_videos": len(handled_youtube_ids) + len(failed_youtube_ids), }) except Exception as e: self.cancel(stage_status="error", notes=_("Error: %(error_msg)s") % {"error_msg": e}) raise