Beispiel #1
0
    def handle(self, *args, **options):
        self.video = None

        handled_youtube_ids = []  # stored to deal with caching
        failed_youtube_ids = []  # stored to avoid requerying failures.

        set_process_priority.lowest(logging=settings.LOG)

        try:
            while True:  # loop until the method is aborted
                # Grab any video that hasn't been tried yet
                videos = VideoFile.objects \
                    .filter(flagged_for_download=True, download_in_progress=False) \
                    .exclude(youtube_id__in=failed_youtube_ids)
                video_count = videos.count()
                if video_count == 0:
                    self.stdout.write("Nothing to download; exiting.\n")
                    break

                # Grab a video as OURS to handle, set fields to indicate to others that we're on it!
                # Update the video logging
                video = videos[0]
                video.download_in_progress = True
                video.percent_complete = 0
                video.save()
                self.stdout.write("Downloading video '%s'...\n" %
                                  video.youtube_id)

                # Update the progress logging
                self.set_stages(
                    num_stages=video_count + len(handled_youtube_ids) +
                    len(failed_youtube_ids) + int(options["auto_cache"]))
                if not self.started():
                    self.start(stage_name=video.youtube_id)

                # Initiate the download process
                try:
                    download_video(video.youtube_id,
                                   callback=partial(
                                       self.download_progress_callback, video))
                    handled_youtube_ids.append(video.youtube_id)
                    self.stdout.write("Download is complete!\n")
                except Exception as e:
                    # On error, report the error, mark the video as not downloaded,
                    #   and allow the loop to try other videos.
                    self.stderr.write("Error in downloading %s: %s\n" %
                                      (video.youtube_id, e))
                    video.download_in_progress = False
                    video.flagged_for_download = not isinstance(
                        e,
                        URLNotFound)  # URLNotFound means, we won't try again
                    video.save()
                    # Rather than getting stuck on one video, continue to the next video.
                    failed_youtube_ids.append(video.youtube_id)
                    continue

            # This can take a long time, without any further update, so ... best to avoid.
            if options["auto_cache"] and caching.caching_is_enabled(
            ) and handled_youtube_ids:
                self.update_stage(
                    stage_name=self.video.youtube_id,
                    stage_percent=0,
                    notes=_("Generating all pages related to videos."))
                caching.regenerate_all_pages_related_to_videos(video_ids=list(
                    set([
                        i18n.get_video_id(yid) or yid
                        for yid in handled_youtube_ids
                    ])))

            # Update
            self.complete(
                notes=
                _("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully."
                  ) % {
                      "num_handled_videos":
                      len(handled_youtube_ids),
                      "num_total_videos":
                      len(handled_youtube_ids) + len(failed_youtube_ids),
                  })

        except Exception as e:
            sys.stderr.write("Error: %s\n" % e)
            self.cancel(notes=_("Error: %s") % e)
Beispiel #2
0
    def handle(self, *args, **options):
        self.video = None

        handled_youtube_ids = []  # stored to deal with caching
        failed_youtube_ids = []  # stored to avoid requerying failures.

        set_process_priority.lowest(logging=settings.LOG)

        try:
            while True: # loop until the method is aborted
                # Grab any video that hasn't been tried yet
                videos = VideoFile.objects \
                    .filter(flagged_for_download=True, download_in_progress=False) \
                    .exclude(youtube_id__in=failed_youtube_ids)
                video_count = videos.count()
                if video_count == 0:
                    self.stdout.write(_("Nothing to download; exiting.") + "\n")
                    break

                # Grab a video as OURS to handle, set fields to indicate to others that we're on it!
                # Update the video logging
                video = videos[0]
                video.download_in_progress = True
                video.percent_complete = 0
                video.save()
                self.stdout.write((_("Downloading video '%(youtube_id)s'...") + "\n") % {"youtube_id": video.youtube_id})

                # Update the progress logging
                self.set_stages(num_stages=video_count + len(handled_youtube_ids) + len(failed_youtube_ids) + int(options["auto_cache"]))
                if not self.started():
                    self.start(stage_name=video.youtube_id)

                # Initiate the download process
                try:
                    ensure_dir(settings.CONTENT_ROOT)

                    progress_callback = partial(self.download_progress_callback, video)
                    try:
                        # Download via urllib
                        download_video(video.youtube_id, callback=progress_callback)

                    except URLNotFound:
                        # Video was not found on amazon cloud service,
                        #   either due to a KA mistake, or due to the fact
                        #   that it's a dubbed video.
                        #
                        # We can use youtube-dl to get that video!!
                        logging.debug(_("Retrieving youtube video %(youtube_id)s via youtube-dl") % {"youtube_id": video.youtube_id})

                        def youtube_dl_cb(stats, progress_callback, *args, **kwargs):
                            if stats['status'] == "finished":
                                percent = 100.
                            elif stats['status'] == "downloading":
                                percent = 100. * stats['downloaded_bytes'] / stats['total_bytes']
                            else:
                                percent = 0.
                            progress_callback(percent=percent)
                        scrape_video(video.youtube_id, quiet=not settings.DEBUG, callback=partial(youtube_dl_cb, progress_callback=progress_callback))

                    # If we got here, we downloaded ... somehow :)
                    handled_youtube_ids.append(video.youtube_id)
                    self.stdout.write(_("Download is complete!") + "\n")

                except DownloadCancelled:
                    # Cancellation event
                    video.percent_complete = 0
                    video.flagged_for_download = False
                    video.download_in_progress = False
                    video.save()
                    failed_youtube_ids.append(video.youtube_id)

                except Exception as e:
                    # On error, report the error, mark the video as not downloaded,
                    #   and allow the loop to try other videos.
                    msg = _("Error in downloading %(youtube_id)s: %(error_msg)s") % {"youtube_id": video.youtube_id, "error_msg": unicode(e)}
                    self.stderr.write("%s\n" % msg)

                    # If a connection error, we should retry.
                    if isinstance(e, DownloadError):
                        connection_error = "[Errno 8]" in e.message
                    elif isinstance(e, IOError) and hasattr(e, "strerror"):
                        connection_error = e.strerror[0] == 8
                    else:
                        connection_error = False

                    video.download_in_progress = False
                    video.flagged_for_download = connection_error  # Any error other than a connection error is fatal.
                    video.save()

                    # Rather than getting stuck on one video, continue to the next video.
                    self.update_stage(stage_status="error", notes=_("%(error_msg)s; continuing to next video.") % {"error_msg": msg})
                    failed_youtube_ids.append(video.youtube_id)
                    continue

            # This can take a long time, without any further update, so ... best to avoid.
            if options["auto_cache"] and caching.caching_is_enabled() and handled_youtube_ids:
                self.update_stage(stage_name=self.video.youtube_id, stage_percent=0, notes=_("Generating all pages related to videos."))
                caching.regenerate_all_pages_related_to_videos(video_ids=list(set([i18n.get_video_id(yid) or yid for yid in handled_youtube_ids])))

            # Update
            self.complete(notes=_("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully.") % {
                "num_handled_videos": len(handled_youtube_ids),
                "num_total_videos": len(handled_youtube_ids) + len(failed_youtube_ids),
            })

        except Exception as e:
            self.cancel(stage_status="error", notes=_("Error: %(error_msg)s") % {"error_msg": e})
            raise
Beispiel #3
0
    def handle(self, *args, **options):

        # Parse input parameters
        kwargs = {"host": args[0]} if len(args) >= 1 else {}
        max_retries = args[1] if len(args) >= 2 else 5
        
        set_process_priority.lowest(logging=settings.LOG)  # don't block users from web access due to syncing

        # Retry purgatory
        self.stdout_writeln(("Checking purgatory for unsaved models")+"...")
        call_command("retrypurgatory")

        try:
            client = SyncClient(**kwargs)
        except Exception as e:
            raise CommandError(e)

        connection_status = client.test_connection()
        if connection_status != "success":
            self.stderr_writeln(("KA Lite host is currently unreachable") + " (%s): %s" % (connection_status, client.url))
            return

        self.stdout_writeln(("Initiating SyncSession")+"...")
        try:
            result = client.start_session()
            if result != "success":
                self.stderr_writeln(("Unable to initiate session")+": %s" % result.content)
                return
        except Exception as e:
            raise CommandError(e)
                
        self.stdout_writeln(("Syncing models")+"...")

        failure_tries = 0
        while True:
            results = client.sync_models()

            upload_results = results["upload_results"]
            download_results = results["download_results"]

            
            # display counts for this block of models being transferred
            self.stdout_writeln("\t%-15s: %d (%d failed, %d error(s))" % (
                ("Uploaded"),
                upload_results["saved_model_count"],
                upload_results["unsaved_model_count"],
                upload_results.has_key("error")))
            self.stdout_writeln("\t%-15s: %d (%d failed, %d error(s))" % (
                ("Downloaded"),
                download_results["saved_model_count"],
                download_results["unsaved_model_count"],
                download_results.has_key("error")))

            # count the number of successes and failures
            success_count = upload_results["saved_model_count"]  + download_results["saved_model_count"]
            fail_count    = upload_results["unsaved_model_count"] + download_results["unsaved_model_count"]
            error_count   = upload_results.has_key("error")       + download_results.has_key("error") + upload_results.has_key("exceptions")

            # Report any errors
            if error_count > 0:
                if upload_results.has_key("error"):
                    self.stderr_writeln("%s: %s" % (("Upload error"),upload_results["error"]))
                if download_results.has_key("error"):
                    self.stderr_writeln("%s: %s" % (("Download error"),download_results["error"]))
                if upload_results.has_key("exceptions"):
                    self.stderr_writeln("%s: %s" % (("Upload exceptions"),upload_results["exceptions"][:200]))

            # stop when nothing is being transferred anymore
            if success_count == 0 and (fail_count == 0 or failure_tries >= max_retries):
                break
            failure_tries += (fail_count > 0 and success_count == 0)

            # Allow the user to throttle the syncing by inserting a wait, so that users
            #   aren't overwhelmed by the computational need for signing during sync
            if settings.SYNCING_THROTTLE_WAIT_TIME is not None:
                time.sleep(settings.SYNCING_THROTTLE_WAIT_TIME)

        # Report summaries
        self.stdout_writeln("%s... (%s: %d, %s: %d, %s: %d)" % 
            (("Closing session"), ("Total uploaded"), client.session.models_uploaded, ("Total downloaded"), client.session.models_downloaded, ("Total errors"), client.session.errors))

        # Report any exceptions
        if client.session.errors:
            self.stderr_writeln("Completed with %d errors."%client.session.errors)
        if failure_tries >= max_retries:
            self.stderr_writeln("%s (%d)." % ("Failed to upload all models (stopped after failed attempts)",failure_tries))

        self.stdout_writeln(("Checking purgatory once more, to try saving any unsaved models")+"...")
        call_command("retrypurgatory")

        client.close_session()
Beispiel #4
0
    def handle(self, *args, **options):

        # Parse input parameters
        kwargs = {"host": args[0]} if len(args) >= 1 else {}
        max_retries = args[1] if len(args) >= 2 else 5

        set_process_priority.lowest(
            logging=settings.LOG
        )  # don't block users from web access due to syncing

        # Retry purgatory
        self.stdout_writeln(("Checking purgatory for unsaved models") + "...")
        call_command("retrypurgatory")

        try:
            client = SyncClient(**kwargs)
        except Exception as e:
            raise CommandError(e)

        connection_status = client.test_connection()
        if connection_status != "success":
            self.stderr_writeln(("KA Lite host is currently unreachable") +
                                " (%s): %s" % (connection_status, client.url))
            return

        self.stdout_writeln(("Initiating SyncSession") + "...")
        try:
            result = client.start_session()
            if result != "success":
                self.stderr_writeln(("Unable to initiate session") +
                                    ": %s" % result.content)
                return
        except Exception as e:
            raise CommandError(e)

        self.stdout_writeln(("Syncing models") + "...")

        failure_tries = 0
        while True:
            results = client.sync_models()

            upload_results = results["upload_results"]
            download_results = results["download_results"]

            # display counts for this block of models being transferred
            self.stdout_writeln(
                "\t%-15s: %d (%d failed, %d error(s))" %
                (("Uploaded"), upload_results["saved_model_count"],
                 upload_results["unsaved_model_count"],
                 upload_results.has_key("error")))
            self.stdout_writeln(
                "\t%-15s: %d (%d failed, %d error(s))" %
                (("Downloaded"), download_results["saved_model_count"],
                 download_results["unsaved_model_count"],
                 download_results.has_key("error")))

            # count the number of successes and failures
            success_count = upload_results[
                "saved_model_count"] + download_results["saved_model_count"]
            fail_count = upload_results[
                "unsaved_model_count"] + download_results["unsaved_model_count"]
            error_count = upload_results.has_key(
                "error") + download_results.has_key(
                    "error") + upload_results.has_key("exceptions")

            # Report any errors
            if error_count > 0:
                if upload_results.has_key("error"):
                    self.stderr_writeln(
                        "%s: %s" % (("Upload error"), upload_results["error"]))
                if download_results.has_key("error"):
                    self.stderr_writeln(
                        "%s: %s" %
                        (("Download error"), download_results["error"]))
                if upload_results.has_key("exceptions"):
                    self.stderr_writeln("%s: %s" %
                                        (("Upload exceptions"),
                                         upload_results["exceptions"][:200]))

            # stop when nothing is being transferred anymore
            if success_count == 0 and (fail_count == 0
                                       or failure_tries >= max_retries):
                break
            failure_tries += (fail_count > 0 and success_count == 0)

            # Allow the user to throttle the syncing by inserting a wait, so that users
            #   aren't overwhelmed by the computational need for signing during sync
            if settings.SYNCING_THROTTLE_WAIT_TIME is not None:
                time.sleep(settings.SYNCING_THROTTLE_WAIT_TIME)

        # Report summaries
        self.stdout_writeln(
            "%s... (%s: %d, %s: %d, %s: %d)" %
            (("Closing session"),
             ("Total uploaded"), client.session.models_uploaded,
             ("Total downloaded"), client.session.models_downloaded,
             ("Total errors"), client.session.errors))

        # Report any exceptions
        if client.session.errors:
            self.stderr_writeln("Completed with %d errors." %
                                client.session.errors)
        if failure_tries >= max_retries:
            self.stderr_writeln(
                "%s (%d)." %
                ("Failed to upload all models (stopped after failed attempts)",
                 failure_tries))

        self.stdout_writeln((
            "Checking purgatory once more, to try saving any unsaved models") +
                            "...")
        call_command("retrypurgatory")

        client.close_session()
Beispiel #5
0
    def handle(self, *args, **options):
        self.video = None

        handled_youtube_ids = []  # stored to deal with caching
        failed_youtube_ids = []  # stored to avoid requerying failures.

        set_process_priority.lowest(logging=settings.LOG)
        
        try:
            while True: # loop until the method is aborted
                # Grab any video that hasn't been tried yet
                videos = VideoFile.objects \
                    .filter(flagged_for_download=True, download_in_progress=False) \
                    .exclude(youtube_id__in=failed_youtube_ids)
                video_count = videos.count()
                if video_count == 0:
                    self.stdout.write("Nothing to download; exiting.\n")
                    break

                # Grab a video as OURS to handle, set fields to indicate to others that we're on it!
                # Update the video logging
                video = videos[0]
                video.download_in_progress = True
                video.percent_complete = 0
                video.save()
                self.stdout.write("Downloading video '%s'...\n" % video.youtube_id)

                # Update the progress logging
                self.set_stages(num_stages=video_count + len(handled_youtube_ids) + len(failed_youtube_ids) + int(options["auto_cache"]))
                if not self.started():
                    self.start(stage_name=video.youtube_id)

                # Initiate the download process
                try:
                    download_video(video.youtube_id, callback=partial(self.download_progress_callback, video))
                    handled_youtube_ids.append(video.youtube_id)
                    self.stdout.write("Download is complete!\n")
                except Exception as e:
                    # On error, report the error, mark the video as not downloaded,
                    #   and allow the loop to try other videos.
                    self.stderr.write("Error in downloading %s: %s\n" % (video.youtube_id, e))
                    video.download_in_progress = False
                    video.flagged_for_download = not isinstance(e, URLNotFound)  # URLNotFound means, we won't try again
                    video.save()
                    # Rather than getting stuck on one video, continue to the next video.
                    failed_youtube_ids.append(video.youtube_id)
                    continue

            # This can take a long time, without any further update, so ... best to avoid.
            if options["auto_cache"] and caching.caching_is_enabled() and handled_youtube_ids:
                self.update_stage(stage_name=self.video.youtube_id, stage_percent=0, notes=_("Generating all pages related to videos."))
                caching.regenerate_all_pages_related_to_videos(video_ids=list(set([i18n.get_video_id(yid) or yid for yid in handled_youtube_ids])))

            # Update
            self.complete(notes=_("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully.") % {
                "num_handled_videos": len(handled_youtube_ids),
                "num_total_videos": len(handled_youtube_ids) + len(failed_youtube_ids),
            })

        except Exception as e:
            sys.stderr.write("Error: %s\n" % e)
            self.cancel(notes=_("Error: %s") % e)
Beispiel #6
0
    def handle(self, *args, **options):
        self.video = None

        handled_youtube_ids = []  # stored to deal with caching
        failed_youtube_ids = []  # stored to avoid requerying failures.

        set_process_priority.lowest(logging=settings.LOG)

        try:
            while True:  # loop until the method is aborted
                # Grab any video that hasn't been tried yet
                videos = VideoFile.objects \
                    .filter(flagged_for_download=True, download_in_progress=False) \
                    .exclude(youtube_id__in=failed_youtube_ids)
                video_count = videos.count()
                if video_count == 0:
                    self.stdout.write(
                        _("Nothing to download; exiting.") + "\n")
                    break

                # Grab a video as OURS to handle, set fields to indicate to others that we're on it!
                # Update the video logging
                video = videos[0]
                video.download_in_progress = True
                video.percent_complete = 0
                video.save()
                self.stdout.write(
                    (_("Downloading video '%(youtube_id)s'...") + "\n") %
                    {"youtube_id": video.youtube_id})

                # Update the progress logging
                self.set_stages(
                    num_stages=video_count + len(handled_youtube_ids) +
                    len(failed_youtube_ids) + int(options["auto_cache"]))
                if not self.started():
                    self.start(stage_name=video.youtube_id)

                # Initiate the download process
                try:
                    ensure_dir(settings.CONTENT_ROOT)

                    progress_callback = partial(
                        self.download_progress_callback, video)
                    try:
                        # Download via urllib
                        download_video(video.youtube_id,
                                       callback=progress_callback)

                    except URLNotFound:
                        # Video was not found on amazon cloud service,
                        #   either due to a KA mistake, or due to the fact
                        #   that it's a dubbed video.
                        #
                        # We can use youtube-dl to get that video!!
                        logging.debug(
                            _("Retrieving youtube video %(youtube_id)s via youtube-dl"
                              ) % {"youtube_id": video.youtube_id})

                        def youtube_dl_cb(stats, progress_callback, *args,
                                          **kwargs):
                            if stats['status'] == "finished":
                                percent = 100.
                            elif stats['status'] == "downloading":
                                percent = 100. * stats[
                                    'downloaded_bytes'] / stats['total_bytes']
                            else:
                                percent = 0.
                            progress_callback(percent=percent)

                        scrape_video(video.youtube_id,
                                     quiet=not settings.DEBUG,
                                     callback=partial(
                                         youtube_dl_cb,
                                         progress_callback=progress_callback))

                    # If we got here, we downloaded ... somehow :)
                    handled_youtube_ids.append(video.youtube_id)
                    self.stdout.write(_("Download is complete!") + "\n")

                except DownloadCancelled:
                    # Cancellation event
                    video.percent_complete = 0
                    video.flagged_for_download = False
                    video.download_in_progress = False
                    video.save()
                    failed_youtube_ids.append(video.youtube_id)

                except Exception as e:
                    # On error, report the error, mark the video as not downloaded,
                    #   and allow the loop to try other videos.
                    msg = _(
                        "Error in downloading %(youtube_id)s: %(error_msg)s"
                    ) % {
                        "youtube_id": video.youtube_id,
                        "error_msg": unicode(e)
                    }
                    self.stderr.write("%s\n" % msg)

                    # If a connection error, we should retry.
                    if isinstance(e, DownloadError):
                        connection_error = "[Errno 8]" in e.message
                    elif isinstance(e, IOError) and hasattr(e, "strerror"):
                        connection_error = e.strerror[0] == 8
                    else:
                        connection_error = False

                    video.download_in_progress = False
                    video.flagged_for_download = connection_error  # Any error other than a connection error is fatal.
                    video.save()

                    # Rather than getting stuck on one video, continue to the next video.
                    self.update_stage(
                        stage_status="error",
                        notes=_("%(error_msg)s; continuing to next video.") %
                        {"error_msg": msg})
                    failed_youtube_ids.append(video.youtube_id)
                    continue

            # This can take a long time, without any further update, so ... best to avoid.
            if options["auto_cache"] and caching.caching_is_enabled(
            ) and handled_youtube_ids:
                self.update_stage(
                    stage_name=self.video.youtube_id,
                    stage_percent=0,
                    notes=_("Generating all pages related to videos."))
                caching.regenerate_all_pages_related_to_videos(video_ids=list(
                    set([
                        i18n.get_video_id(yid) or yid
                        for yid in handled_youtube_ids
                    ])))

            # Update
            self.complete(
                notes=
                _("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully."
                  ) % {
                      "num_handled_videos":
                      len(handled_youtube_ids),
                      "num_total_videos":
                      len(handled_youtube_ids) + len(failed_youtube_ids),
                  })

        except Exception as e:
            self.cancel(stage_status="error",
                        notes=_("Error: %(error_msg)s") % {"error_msg": e})
            raise