Ejemplo n.º 1
0
        def add_missing_objects_to_db(youtube_ids_in_filesystem,
                                      videos_marked_at_all):
            # Files that exist, but are not in the DB, should be assumed to be good videos,
            #   and just needing to be added to the DB.  Add them to the DB in this way,
            #   so that these files also trigger the update code below (and trigger cache invalidation)
            youtube_ids_needing_model_creation = list(
                youtube_ids_in_filesystem - videos_marked_at_all)
            new_video_files = []
            if youtube_ids_needing_model_creation:
                for lang_code, youtube_ids in divide_videos_by_language(
                        youtube_ids_needing_model_creation).iteritems():
                    # OK to do bulk_create; cache invalidation triggered via save download
                    lang_video_files = [
                        VideoFile(youtube_id=id,
                                  percent_complete=100,
                                  download_in_progress=False,
                                  language=lang_code) for id in youtube_ids
                    ]
                    VideoFile.objects.bulk_create(lang_video_files)
                    new_video_files += lang_video_files
                    caching.invalidate_all_caches(
                    )  # Do this within the loop, to update users ASAP
                self.stdout.write(
                    "Created %d VideoFile models (and marked them as complete, since the files exist)\n"
                    % len(new_video_files))

            return [
                i18n.get_video_id(video_file.youtube_id)
                for video_file in new_video_files
            ]
Ejemplo n.º 2
0
    def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError(
                "This must only be run on distributed servers server.")

        lang_code = lcode_to_ietf(options["lang_code"])
        lang_name = get_language_name(lang_code)
        software_version = options["software_version"]
        logging.info(
            "Downloading language pack for lang_name=%s, software_version=%s" %
            (lang_name, software_version))

        # Download the language pack
        try:
            if options['file']:
                self.start(
                    _("Using local language pack '%(filepath)s'") %
                    {"filepath": options['file']})
                zip_filepath = options['file']
            else:
                self.start(
                    _("Downloading language pack '%(lang_code)s'") %
                    {"lang_code": lang_code})
                zip_filepath = get_language_pack(lang_code,
                                                 software_version,
                                                 callback=self.cb)

            # Unpack into locale directory
            self.next_stage(
                _("Unpacking language pack '%(lang_code)s'") %
                {"lang_code": lang_code})
            unpack_language(lang_code, zip_filepath=zip_filepath)

            #
            self.next_stage(
                _("Creating static files for language pack '%(lang_code)s'") %
                {"lang_code": lang_code})
            update_jsi18n_file(lang_code)

            self.next_stage(
                _("Moving files to their appropriate local disk locations."))
            move_dubbed_video_map(lang_code)
            move_exercises(lang_code)
            move_srts(lang_code)
            move_video_sizes_file(lang_code)

            self.next_stage()
            call_command("collectstatic", interactive=False)

            self.next_stage(_("Invalidate caches"))
            caching.invalidate_all_caches()

            self.complete(
                _("Finished processing language pack %(lang_name)s.") %
                {"lang_name": get_language_name(lang_code)})
        except Exception as e:
            self.cancel(stage_status="error",
                        notes=_("Error: %(error_msg)s") %
                        {"error_msg": unicode(e)})
            raise
Ejemplo n.º 3
0
 def delete_objects_for_missing_videos(youtube_ids_in_filesystem,
                                       videos_marked_at_all):
     # VideoFile objects say they're available, but that don't actually exist.
     deleted_video_ids = []
     videos_flagged_for_download = set([
         video.youtube_id for video in VideoFile.objects.filter(
             flagged_for_download=True)
     ])
     videos_needing_model_deletion_chunked = break_into_chunks(
         videos_marked_at_all - youtube_ids_in_filesystem -
         videos_flagged_for_download)
     # Disconnect cache-invalidation listener to prevent it from being called multiple times
     for chunk in videos_needing_model_deletion_chunked:
         video_files_needing_model_deletion = VideoFile.objects.filter(
             youtube_id__in=chunk)
         deleted_video_ids += [
             video_file.youtube_id
             for video_file in video_files_needing_model_deletion
         ]
         video_files_needing_model_deletion.delete()
     if deleted_video_ids:
         caching.invalidate_all_caches()
         self.stdout.write(
             "Deleted %d VideoFile models (because the videos didn't exist in the filesystem)\n"
             % len(deleted_video_ids))
     return deleted_video_ids
Ejemplo n.º 4
0
def invalidate_on_video_delete(sender, **kwargs):
    """
    Listen in to see when available videos become unavailable.
    """
    was_available = kwargs["instance"] and kwargs["instance"].percent_complete == 100
    if was_available:
        logging.debug("Invalidating cache on VideoFile delete for %s" % kwargs["instance"])
        from kalite import caching  # caching also imports updates; import here to prevent circular dependencies
        caching.invalidate_all_caches()
Ejemplo n.º 5
0
def invalidate_on_video_delete(sender, **kwargs):
    """
    Listen in to see when available videos become unavailable.
    """
    was_available = kwargs["instance"] and kwargs[
        "instance"].percent_complete == 100
    if was_available:
        logging.debug("Invalidating cache on VideoFile delete for %s" %
                      kwargs["instance"])
        from kalite import caching  # caching also imports updates; import here to prevent circular dependencies
        caching.invalidate_all_caches()
Ejemplo n.º 6
0
def invalidate_on_video_update(sender, **kwargs):
    """
    Listen in to see when videos become available.
    """
    # Can only do full check in Django 1.5+, but shouldn't matter--we should only save with
    # percent_complete == 100 once.
    just_now_available = kwargs["instance"] and kwargs["instance"].percent_complete == 100  # and "percent_complete" in kwargs["updated_fields"]
    if just_now_available:
        # This event should only happen once, so don't bother checking if
        #   this is the field that changed.
        logging.debug("Invalidating cache on VideoFile save for %s" % kwargs["instance"])
        from kalite import caching  # caching also imports updates; import here to prevent circular dependencies
        caching.invalidate_all_caches()
Ejemplo n.º 7
0
        def update_objects_to_be_complete(youtube_ids_in_filesystem):
            # Files that exist, are in the DB, but have percent_complete=0, download_in_progress=False
            updated_video_ids = []
            for chunk in break_into_chunks(youtube_ids_in_filesystem):
                video_files_needing_model_update = VideoFile.objects.filter(percent_complete=0, download_in_progress=False, youtube_id__in=chunk)
                video_files_needing_model_update.update(percent_complete=100, flagged_for_download=False)

                updated_video_ids += [i18n.get_video_id(video_file.youtube_id) for video_file in video_files_needing_model_update]

            if updated_video_ids:
                caching.invalidate_all_caches()
                self.stdout.write("Updated %d VideoFile models (to mark them as complete, since the files exist)\n" % len(updated_video_ids))
            return updated_video_ids
Ejemplo n.º 8
0
 def delete_objects_for_missing_videos(youtube_ids_in_filesystem, videos_marked_at_all):
     # VideoFile objects say they're available, but that don't actually exist.
     deleted_video_ids = []
     videos_flagged_for_download = set([video.youtube_id for video in VideoFile.objects.filter(flagged_for_download=True)])
     videos_needing_model_deletion_chunked = break_into_chunks(videos_marked_at_all - youtube_ids_in_filesystem - videos_flagged_for_download)
     # Disconnect cache-invalidation listener to prevent it from being called multiple times
     for chunk in videos_needing_model_deletion_chunked:
         video_files_needing_model_deletion = VideoFile.objects.filter(youtube_id__in=chunk)
         deleted_video_ids += [video_file.youtube_id for video_file in video_files_needing_model_deletion]
         video_files_needing_model_deletion.delete()
     if deleted_video_ids:
         caching.invalidate_all_caches()
         self.stdout.write("Deleted %d VideoFile models (because the videos didn't exist in the filesystem)\n" % len(deleted_video_ids))
     return deleted_video_ids
Ejemplo n.º 9
0
def invalidate_on_video_update(sender, **kwargs):
    """
    Listen in to see when videos become available.
    """
    # Can only do full check in Django 1.5+, but shouldn't matter--we should only save with
    # percent_complete == 100 once.
    just_now_available = kwargs["instance"] and kwargs[
        "instance"].percent_complete == 100  # and "percent_complete" in kwargs["updated_fields"]
    if just_now_available:
        # This event should only happen once, so don't bother checking if
        #   this is the field that changed.
        logging.debug("Invalidating cache on VideoFile save for %s" %
                      kwargs["instance"])
        from kalite import caching  # caching also imports updates; import here to prevent circular dependencies
        caching.invalidate_all_caches()
Ejemplo n.º 10
0
        def add_missing_objects_to_db(youtube_ids_in_filesystem, videos_marked_at_all):
            # Files that exist, but are not in the DB, should be assumed to be good videos,
            #   and just needing to be added to the DB.  Add them to the DB in this way,
            #   so that these files also trigger the update code below (and trigger cache invalidation)
            youtube_ids_needing_model_creation = list(youtube_ids_in_filesystem - videos_marked_at_all)
            new_video_files = []
            if youtube_ids_needing_model_creation:
                for lang_code, youtube_ids in divide_videos_by_language(youtube_ids_needing_model_creation).iteritems():
                    # OK to do bulk_create; cache invalidation triggered via save download
                    lang_video_files = [VideoFile(youtube_id=id, percent_complete=100, download_in_progress=False, language=lang_code) for id in youtube_ids]
                    VideoFile.objects.bulk_create(lang_video_files)
                    new_video_files += lang_video_files
                    caching.invalidate_all_caches()  # Do this within the loop, to update users ASAP
                self.stdout.write("Created %d VideoFile models (and marked them as complete, since the files exist)\n" % len(new_video_files))

            return [i18n.get_video_id(video_file.youtube_id) for video_file in new_video_files]
Ejemplo n.º 11
0
    def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError("This must only be run on distributed servers server.")

        lang_code = lcode_to_ietf(options["lang_code"])
        lang_name = get_language_name(lang_code)
        software_version = options["software_version"]
        logging.info("Downloading language pack for lang_name=%s, software_version=%s" % (lang_name, software_version))

        # Download the language pack
        try:
            if options['file']:
                self.start(_("Using local language pack '%(filepath)s'") % {"filepath": options['file']})
                zip_filepath = options['file']
            else:
                self.start(_("Downloading language pack '%(lang_code)s'") % {"lang_code": lang_code})
                zip_filepath = get_language_pack(lang_code, software_version, callback=self.cb)

            # Unpack into locale directory
            self.next_stage(_("Unpacking language pack '%(lang_code)s'") % {"lang_code": lang_code})
            unpack_language(lang_code, zip_filepath=zip_filepath)

            #
            self.next_stage(_("Creating static files for language pack '%(lang_code)s'") % {"lang_code": lang_code})
            update_jsi18n_file(lang_code)


            self.next_stage(_("Moving files to their appropriate local disk locations."))
            move_dubbed_video_map(lang_code)
            move_exercises(lang_code)
            move_srts(lang_code)
            move_video_sizes_file(lang_code)

            self.next_stage()
            call_command("collectstatic", interactive=False)

            self.next_stage(_("Invalidate caches"))
            caching.invalidate_all_caches()

            self.complete(_("Finished processing language pack %(lang_name)s.") % {"lang_name": get_language_name(lang_code)})
        except Exception as e:
            self.cancel(stage_status="error", notes=_("Error: %(error_msg)s") % {"error_msg": unicode(e)})
            raise
Ejemplo n.º 12
0
        def update_objects_to_be_complete(youtube_ids_in_filesystem):
            # Files that exist, are in the DB, but have percent_complete=0, download_in_progress=False
            updated_video_ids = []
            for chunk in break_into_chunks(youtube_ids_in_filesystem):
                video_files_needing_model_update = VideoFile.objects.filter(
                    percent_complete=0,
                    download_in_progress=False,
                    youtube_id__in=chunk)
                video_files_needing_model_update.update(
                    percent_complete=100, flagged_for_download=False)

                updated_video_ids += [
                    i18n.get_video_id(video_file.youtube_id)
                    for video_file in video_files_needing_model_update
                ]

            if updated_video_ids:
                caching.invalidate_all_caches()
                self.stdout.write(
                    "Updated %d VideoFile models (to mark them as complete, since the files exist)\n"
                    % len(updated_video_ids))
            return updated_video_ids
Ejemplo n.º 13
0
    def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError("videoscan should be run on the distributed server only.")

        caching_enabled = (settings.CACHE_TIME != 0)
        touched_video_ids = []

        # Filesystem
        files = glob.glob(os.path.join(settings.CONTENT_ROOT, "*.mp4"))
        youtube_ids_in_filesystem = set([os.path.splitext(os.path.basename(f))[0] for f in files])

        # Database
        videos_marked_at_all = set([video.youtube_id for video in VideoFile.objects.all()])

        def delete_objects_for_incomplete_videos():
            # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken
            video_files_to_delete = VideoFile.objects.filter(download_in_progress=False, percent_complete__gt=0, percent_complete__lt=100)
            deleted_video_ids = [i18n.get_video_id(video_file.youtube_id) for video_file in video_files_to_delete]
            video_files_to_delete.delete()
            if deleted_video_ids:
                self.stdout.write("Deleted %d VideoFile models (to mark them as not downloaded, since they were in a bad state)\n" % len(deleted_video_ids))
            return deleted_video_ids
        touched_video_ids += delete_objects_for_incomplete_videos()


        def add_missing_objects_to_db(youtube_ids_in_filesystem, videos_marked_at_all):
            # Files that exist, but are not in the DB, should be assumed to be good videos,
            #   and just needing to be added to the DB.  Add them to the DB in this way,
            #   so that these files also trigger the update code below (and trigger cache invalidation)
            youtube_ids_needing_model_creation = list(youtube_ids_in_filesystem - videos_marked_at_all)
            new_video_files = []
            if youtube_ids_needing_model_creation:
                for lang_code, youtube_ids in divide_videos_by_language(youtube_ids_needing_model_creation).iteritems():
                    # OK to do bulk_create; cache invalidation triggered via save download
                    lang_video_files = [VideoFile(youtube_id=id, percent_complete=100, download_in_progress=False, language=lang_code) for id in youtube_ids]
                    VideoFile.objects.bulk_create(lang_video_files)
                    new_video_files += lang_video_files
                self.stdout.write("Created %d VideoFile models (and marked them as complete, since the files exist)\n" % len(new_video_files))

            return [i18n.get_video_id(video_file.youtube_id) for video_file in new_video_files]

        touched_video_ids += add_missing_objects_to_db(youtube_ids_in_filesystem, videos_marked_at_all)

        def update_objects_to_be_complete(youtube_ids_in_filesystem):
            # Files that exist, are in the DB, but have percent_complete=0, download_in_progress=False
            updated_video_ids = []
            for chunk in break_into_chunks(youtube_ids_in_filesystem):
                video_files_needing_model_update = VideoFile.objects.filter(percent_complete=0, download_in_progress=False, youtube_id__in=chunk)
                video_files_needing_model_update.update(percent_complete=100, flagged_for_download=False)

                updated_video_ids += [i18n.get_video_id(video_file.youtube_id) for video_file in video_files_needing_model_update]

            if updated_video_ids:
                self.stdout.write("Updated %d VideoFile models (to mark them as complete, since the files exist)\n" % len(updated_video_ids))
            return updated_video_ids
        touched_video_ids += update_objects_to_be_complete(youtube_ids_in_filesystem)

        def delete_objects_for_missing_videos(youtube_ids_in_filesystem, videos_marked_at_all):
            # VideoFile objects say they're available, but that don't actually exist.
            deleted_video_ids = []
            videos_flagged_for_download = set([video.youtube_id for video in VideoFile.objects.filter(flagged_for_download=True)])
            videos_needing_model_deletion_chunked = break_into_chunks(videos_marked_at_all - youtube_ids_in_filesystem - videos_flagged_for_download)
            for chunk in videos_needing_model_deletion_chunked:
                video_files_needing_model_deletion = VideoFile.objects.filter(youtube_id__in=chunk)
                video_files_needing_model_deletion.delete()
                deleted_video_ids += [video_file.video_id for video_file in video_files_needing_model_deletion]
            if deleted_video_ids:
                self.stdout.write("Deleted %d VideoFile models (because the videos didn't exist in the filesystem)\n" % len(deleted_video_ids))
            return deleted_video_ids
        touched_video_ids += delete_objects_for_missing_videos(youtube_ids_in_filesystem, videos_marked_at_all)

        if options["auto_cache"] and touched_video_ids:
            caching.invalidate_all_caches()
Ejemplo n.º 14
0
    def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError("videoscan should be run on the distributed server only.")

        caching_enabled = (settings.CACHE_TIME != 0)
        touched_video_ids = []

        # Filesystem
        files = glob.glob(os.path.join(settings.CONTENT_ROOT, "*.mp4"))
        youtube_ids_in_filesystem = set([os.path.splitext(os.path.basename(f))[0] for f in files])

        # Database
        videos_marked_at_all = set([video.youtube_id for video in VideoFile.objects.all()])

        def delete_objects_for_incomplete_videos():
            # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken
            video_files_to_delete = VideoFile.objects.filter(download_in_progress=False, percent_complete__gt=0, percent_complete__lt=100)
            deleted_video_ids = [i18n.get_video_id(video_file.youtube_id) for video_file in video_files_to_delete]
            video_files_to_delete.delete()
            if deleted_video_ids:
                self.stdout.write("Deleted %d VideoFile models (to mark them as not downloaded, since they were in a bad state)\n" % len(deleted_video_ids))
            return deleted_video_ids
        touched_video_ids += delete_objects_for_incomplete_videos()


        def add_missing_objects_to_db(youtube_ids_in_filesystem, videos_marked_at_all):
            # Files that exist, but are not in the DB, should be assumed to be good videos,
            #   and just needing to be added to the DB.  Add them to the DB in this way,
            #   so that these files also trigger the update code below (and trigger cache invalidation)
            youtube_ids_needing_model_creation = list(youtube_ids_in_filesystem - videos_marked_at_all)
            new_video_files = []
            if youtube_ids_needing_model_creation:
                for lang_code, youtube_ids in divide_videos_by_language(youtube_ids_needing_model_creation).iteritems():
                    # OK to do bulk_create; cache invalidation triggered via save download
                    lang_video_files = [VideoFile(youtube_id=id, percent_complete=100, download_in_progress=False, language=lang_code) for id in youtube_ids]
                    VideoFile.objects.bulk_create(lang_video_files)
                    new_video_files += lang_video_files
                self.stdout.write("Created %d VideoFile models (and marked them as complete, since the files exist)\n" % len(new_video_files))

            return [i18n.get_video_id(video_file.youtube_id) for video_file in new_video_files]

        touched_video_ids += add_missing_objects_to_db(youtube_ids_in_filesystem, videos_marked_at_all)

        def update_objects_to_be_complete(youtube_ids_in_filesystem):
            # Files that exist, are in the DB, but have percent_complete=0, download_in_progress=False
            updated_video_ids = []
            for chunk in break_into_chunks(youtube_ids_in_filesystem):
                video_files_needing_model_update = VideoFile.objects.filter(percent_complete=0, download_in_progress=False, youtube_id__in=chunk)
                video_files_needing_model_update.update(percent_complete=100, flagged_for_download=False)

                updated_video_ids += [i18n.get_video_id(video_file.youtube_id) for video_file in video_files_needing_model_update]

            if updated_video_ids:
                self.stdout.write("Updated %d VideoFile models (to mark them as complete, since the files exist)\n" % len(updated_video_ids))
            return updated_video_ids
        touched_video_ids += update_objects_to_be_complete(youtube_ids_in_filesystem)

        def delete_objects_for_missing_videos(youtube_ids_in_filesystem, videos_marked_at_all):
            # VideoFile objects say they're available, but that don't actually exist.
            deleted_video_ids = []
            videos_flagged_for_download = set([video.youtube_id for video in VideoFile.objects.filter(flagged_for_download=True)])
            videos_needing_model_deletion_chunked = break_into_chunks(videos_marked_at_all - youtube_ids_in_filesystem - videos_flagged_for_download)
            # Disconnect cache-invalidation listener to prevent it from being called multiple times
            pre_delete.disconnect(receiver=updates.invalidate_on_video_delete, sender=VideoFile)
            for chunk in videos_needing_model_deletion_chunked:
                video_files_needing_model_deletion = VideoFile.objects.filter(youtube_id__in=chunk)
                deleted_video_ids += [video_file.youtube_id for video_file in video_files_needing_model_deletion]
                video_files_needing_model_deletion.delete()
            if deleted_video_ids:
                caching.invalidate_all_caches()
                self.stdout.write("Deleted %d VideoFile models (because the videos didn't exist in the filesystem)\n" % len(deleted_video_ids))
            return deleted_video_ids
            pre_delete.connect(receiver=updates.invalidate_on_video_delete, sender=VideoFile)
        touched_video_ids += delete_objects_for_missing_videos(youtube_ids_in_filesystem, videos_marked_at_all)

        if options["auto_cache"] and touched_video_ids:
            caching.invalidate_all_caches()
Ejemplo n.º 15
0
    def handle(self, *args, **options):
        self.video = None

        handled_youtube_ids = []  # stored to deal with caching
        failed_youtube_ids = []  # stored to avoid requerying failures.

        set_process_priority.lowest(logging=settings.LOG)

        try:
            while True:  # loop until the method is aborted
                # Grab any video that hasn't been tried yet
                videos = VideoFile.objects \
                    .filter(flagged_for_download=True, download_in_progress=False) \
                    .exclude(youtube_id__in=failed_youtube_ids)
                video_count = videos.count()
                if video_count == 0:
                    self.stdout.write(
                        _("Nothing to download; exiting.") + "\n")
                    break

                # Grab a video as OURS to handle, set fields to indicate to others that we're on it!
                # Update the video logging
                video = videos[0]
                video.download_in_progress = True
                video.percent_complete = 0
                video.save()
                self.stdout.write(
                    (_("Downloading video '%(youtube_id)s'...") + "\n") %
                    {"youtube_id": video.youtube_id})

                # Update the progress logging
                self.set_stages(
                    num_stages=video_count + len(handled_youtube_ids) +
                    len(failed_youtube_ids) + int(options["auto_cache"]))
                if not self.started():
                    self.start(stage_name=video.youtube_id)

                # Initiate the download process
                try:

                    progress_callback = partial(
                        self.download_progress_callback, video)
                    try:
                        # Download via urllib
                        download_video(video.youtube_id,
                                       callback=progress_callback)

                    except URLNotFound:
                        # Video was not found on amazon cloud service,
                        #   either due to a KA mistake, or due to the fact
                        #   that it's a dubbed video.
                        #
                        # We can use youtube-dl to get that video!!
                        logging.debug(
                            _("Retrieving youtube video %(youtube_id)s via youtube-dl"
                              ) % {"youtube_id": video.youtube_id})

                        def youtube_dl_cb(stats, progress_callback, *args,
                                          **kwargs):
                            if stats['status'] == "finished":
                                percent = 100.
                            elif stats['status'] == "downloading":
                                percent = 100. * stats[
                                    'downloaded_bytes'] / stats['total_bytes']
                            else:
                                percent = 0.
                            progress_callback(percent=percent)

                        scrape_video(video.youtube_id,
                                     quiet=not settings.DEBUG,
                                     callback=partial(
                                         youtube_dl_cb,
                                         progress_callback=progress_callback))

                    except IOError as e:
                        logging.exception(e)
                        video.download_in_progress = False
                        video.save()
                        failed_youtube_ids.append(video.youtube_id)
                        time.sleep(10)
                        continue

                    # If we got here, we downloaded ... somehow :)
                    handled_youtube_ids.append(video.youtube_id)
                    self.stdout.write(_("Download is complete!") + "\n")

                except DownloadCancelled:
                    # Cancellation event
                    video.percent_complete = 0
                    video.flagged_for_download = False
                    video.download_in_progress = False
                    video.save()
                    failed_youtube_ids.append(video.youtube_id)

                except Exception as e:
                    # On error, report the error, mark the video as not downloaded,
                    #   and allow the loop to try other videos.
                    msg = _(
                        "Error in downloading %(youtube_id)s: %(error_msg)s"
                    ) % {
                        "youtube_id": video.youtube_id,
                        "error_msg": unicode(e)
                    }
                    self.stderr.write("%s\n" % msg)

                    # If a connection error, we should retry.
                    if isinstance(e, DownloadError):
                        connection_error = "[Errno 8]" in e.args[0]
                    elif isinstance(e, IOError) and hasattr(e, "strerror"):
                        connection_error = e.strerror[0] == 8
                    else:
                        connection_error = False

                    video.download_in_progress = False
                    video.flagged_for_download = connection_error  # Any error other than a connection error is fatal.
                    video.save()

                    # Rather than getting stuck on one video, continue to the next video.
                    self.update_stage(
                        stage_status="error",
                        notes=_("%(error_msg)s; continuing to next video.") %
                        {"error_msg": msg})
                    failed_youtube_ids.append(video.youtube_id)
                    continue

            # Update
            self.complete(
                notes=
                _("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully."
                  ) % {
                      "num_handled_videos":
                      len(handled_youtube_ids),
                      "num_total_videos":
                      len(handled_youtube_ids) + len(failed_youtube_ids),
                  })
            caching.invalidate_all_caches()

        except Exception as e:
            self.cancel(stage_status="error",
                        notes=_("Error: %(error_msg)s") % {"error_msg": e})
            raise
Ejemplo n.º 16
0
    def handle(self, *args, **options):
        self.video = None

        handled_youtube_ids = []  # stored to deal with caching
        failed_youtube_ids = []  # stored to avoid requerying failures.

        set_process_priority.lowest(logging=settings.LOG)

        try:
            while True: # loop until the method is aborted
                # Grab any video that hasn't been tried yet
                videos = VideoFile.objects \
                    .filter(flagged_for_download=True, download_in_progress=False) \
                    .exclude(youtube_id__in=failed_youtube_ids)
                video_count = videos.count()
                if video_count == 0:
                    self.stdout.write(_("Nothing to download; exiting.") + "\n")
                    break

                # Grab a video as OURS to handle, set fields to indicate to others that we're on it!
                # Update the video logging
                video = videos[0]
                video.download_in_progress = True
                video.percent_complete = 0
                video.save()
                self.stdout.write((_("Downloading video '%(youtube_id)s'...") + "\n") % {"youtube_id": video.youtube_id})

                # Update the progress logging
                self.set_stages(num_stages=video_count + len(handled_youtube_ids) + len(failed_youtube_ids) + int(options["auto_cache"]))
                if not self.started():
                    self.start(stage_name=video.youtube_id)

                # Initiate the download process
                try:

                    progress_callback = partial(self.download_progress_callback, video)
                    try:
                        # Download via urllib
                        download_video(video.youtube_id, callback=progress_callback)

                    except URLNotFound:
                        # Video was not found on amazon cloud service,
                        #   either due to a KA mistake, or due to the fact
                        #   that it's a dubbed video.
                        #
                        # We can use youtube-dl to get that video!!
                        logging.debug(_("Retrieving youtube video %(youtube_id)s via youtube-dl") % {"youtube_id": video.youtube_id})

                        def youtube_dl_cb(stats, progress_callback, *args, **kwargs):
                            if stats['status'] == "finished":
                                percent = 100.
                            elif stats['status'] == "downloading":
                                percent = 100. * stats['downloaded_bytes'] / stats['total_bytes']
                            else:
                                percent = 0.
                            progress_callback(percent=percent)
                        scrape_video(video.youtube_id, quiet=not settings.DEBUG, callback=partial(youtube_dl_cb, progress_callback=progress_callback))

                    except IOError as e:
                        logging.exception(e)
                        video.download_in_progress = False
                        video.save()
                        failed_youtube_ids.append(video.youtube_id)
                        time.sleep(10)
                        continue

                    # If we got here, we downloaded ... somehow :)
                    handled_youtube_ids.append(video.youtube_id)
                    self.stdout.write(_("Download is complete!") + "\n")

                except DownloadCancelled:
                    # Cancellation event
                    video.percent_complete = 0
                    video.flagged_for_download = False
                    video.download_in_progress = False
                    video.save()
                    failed_youtube_ids.append(video.youtube_id)

                except Exception as e:
                    # On error, report the error, mark the video as not downloaded,
                    #   and allow the loop to try other videos.
                    msg = _("Error in downloading %(youtube_id)s: %(error_msg)s") % {"youtube_id": video.youtube_id, "error_msg": unicode(e)}
                    self.stderr.write("%s\n" % msg)

                    # If a connection error, we should retry.
                    if isinstance(e, DownloadError):
                        connection_error = "[Errno 8]" in e.args[0]
                    elif isinstance(e, IOError) and hasattr(e, "strerror"):
                        connection_error = e.strerror[0] == 8
                    else:
                        connection_error = False

                    video.download_in_progress = False
                    video.flagged_for_download = connection_error  # Any error other than a connection error is fatal.
                    video.save()

                    # Rather than getting stuck on one video, continue to the next video.
                    self.update_stage(stage_status="error", notes=_("%(error_msg)s; continuing to next video.") % {"error_msg": msg})
                    failed_youtube_ids.append(video.youtube_id)
                    continue

            # Update
            self.complete(notes=_("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully.") % {
                "num_handled_videos": len(handled_youtube_ids),
                "num_total_videos": len(handled_youtube_ids) + len(failed_youtube_ids),
            })
            caching.invalidate_all_caches()

        except Exception as e:
            self.cancel(stage_status="error", notes=_("Error: %(error_msg)s") % {"error_msg": e})
            raise