Esempio n. 1
0
    def test_cache_invalidation(self):
        """Create the cache item, then invalidate it and show that it is deleted."""

        # Get a random youtube id
        n_videos = len(topicdata.NODE_CACHE['Video'])
        video_slug = topicdata.NODE_CACHE['Video'].keys()[
            10]  #random.choice(topicdata.NODE_CACHE['Video'].keys())
        sys.stdout.write("Testing on video_slug = %s\n" % video_slug)
        youtube_id = topicdata.NODE_CACHE['Video'][video_slug]['youtube_id']
        video_path = topicdata.NODE_CACHE['Video'][video_slug]['paths'][0]

        # Clean the cache for this item
        caching.expire_page(path=video_path)

        # Create the cache item, and check it
        self.assertTrue(not caching.has_cache_key(path=video_path),
                        "expect: no cache key after expiring the page")
        caching.regenerate_all_pages_related_to_videos(video_ids=[youtube_id])
        self.assertTrue(caching.has_cache_key(path=video_path),
                        "expect: Cache key exists after Django Client get")

        # Invalidate the cache item, and check it
        caching.invalidate_all_pages_related_to_video(
            video_id=youtube_id)  # test the convenience function
        self.assertTrue(not caching.has_cache_key(path=video_path),
                        "expect: no cache key after expiring the page")
    def handle(self, *args, **options):
        caching_enabled = (settings.CACHE_TIME != 0)
        touched_video_ids = []

        # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken
        video_files_to_delete = VideoFile.objects.filter(download_in_progress=False, percent_complete__gt=0, percent_complete__lt=100)
        youtube_ids_to_delete = [d["youtube_id"] for d in video_files_to_delete.values("youtube_id")]
        video_files_to_delete.delete()
        touched_video_ids += [i18n.get_video_id(yid) for yid in youtube_ids_to_delete]
        if len(video_files_to_delete):
            self.stdout.write("Deleted %d VideoFile models (to mark them as not downloaded, since they were in a bad state)\n" % len(video_files_to_delete))

        files = glob.glob(settings.CONTENT_ROOT + "*.mp4")
        videos_marked_at_all = set([video.youtube_id for video in VideoFile.objects.all()])
        videos_marked_as_in_progress = set([video.youtube_id for video in VideoFile.objects.filter(download_in_progress=True)])
        videos_marked_as_unstarted = set([video.youtube_id for video in VideoFile.objects.filter(percent_complete=0, download_in_progress=False)])
        
        videos_flagged_for_download = set([video.youtube_id for video in VideoFile.objects.filter(flagged_for_download=True)])
        videos_in_filesystem = set([path.replace("\\", "/").split("/")[-1].split(".")[0] for path in files])

        # Files that exist, but are not in the DB, should be assumed to be good videos, 
        #   and just needing to be added to the DB.  Add them to the DB in this way,
        #   so that these files also trigger the update code below (and trigger cache invalidation)
        video_ids_needing_model_creation = list(videos_in_filesystem - videos_marked_at_all)
        count = len(video_ids_needing_model_creation)
        if count:
            # OK to do bulk_create; cache invalidation triggered via save download
            VideoFile.objects.bulk_create([VideoFile(youtube_id=id, percent_complete=0, download_in_progress=False) for id in video_ids_needing_model_creation])
            self.stdout.write("Created %d VideoFile models (to mark them as complete, since the files exist)\n" % len(video_ids_needing_model_creation))
            touched_video_ids += [i18n.get_video_id(yid) or yid for yid in video_ids_needing_model_creation]

        # Files that exist, are in the DB, but have percent_complete=0, download_in_progress=False
        #   These should be individually saved to be 100% complete, to trigger their availability (and cache invalidation)
        count = 0
        for chunk in break_into_chunks(videos_in_filesystem):
            video_files_needing_model_update = VideoFile.objects.filter(percent_complete=0, download_in_progress=False, youtube_id__in=chunk)
            count += video_files_needing_model_update.count()
            for videofile in video_files_needing_model_update:
                videofile.percent_complete = 100
                videofile.flagged_for_download = False
                videofile.save()
        if count:
            self.stdout.write("Updated %d VideoFile models (to mark them as complete, since the files exist)\n" % count)

        # VideoFile objects say they're available, but that don't actually exist.
        count = 0
        videos_needing_model_deletion_chunked = break_into_chunks(videos_marked_at_all - videos_in_filesystem - videos_flagged_for_download)
        for chunk in videos_needing_model_deletion_chunked:
            video_files_needing_model_deletion = VideoFile.objects.filter(youtube_id__in=chunk)
            count += video_files_needing_model_deletion.count()
            video_files_needing_model_deletion.delete()
            touched_video_ids += [i18n.get_video_id(yid) or yid for yid in chunk]
        if count:
            self.stdout.write("Deleted %d VideoFile models (because the videos didn't exist in the filesystem)\n" % count)

        if options["auto_cache"] and caching_enabled and touched_video_ids:
            caching.regenerate_all_pages_related_to_videos(video_ids=list(set(touched_video_ids)))
Esempio n. 3
0
    def handle(self, *args, **options):

        caching_enabled = settings.CACHE_TIME != 0
        handled_video_ids = []
        
        while True: # loop until the method is aborted
            
            if VideoFile.objects.filter(download_in_progress=True).count() > 0:
                self.stderr.write("Another download is still in progress; aborting.\n")
                break
            
            videos = VideoFile.objects.filter(flagged_for_download=True, download_in_progress=False)
            if videos.count() == 0:
                self.stdout.write("Nothing to download; aborting.\n")
                break

            video = videos[0]
            
            if video.cancel_download == True:
                video.download_in_progress = False
                video.save()
                self.stdout.write("Download cancelled; aborting.\n")
                break
            
            video.download_in_progress = True
            video.percent_complete = 0
            video.save()
            
            self.stdout.write("Downloading video '%s'...\n" % video.youtube_id)
            try:
                download_video(video.youtube_id, callback=download_progress_callback(self, video))
                self.stdout.write("Download is complete!\n")
            except Exception as e:
                self.stderr.write("Error in downloading: %s\n" % e)
                video.download_in_progress = False
                video.save()
                force_job("videodownload", "Download Videos")  # infinite recursive call? :(
                break
            
            handled_video_ids.append(video.youtube_id)
            
            # Expire, but don't regenerate until the very end, for efficiency.
            if caching_enabled:
                caching.invalidate_all_pages_related_to_video(video_id=video.youtube_id)

        if options["auto_cache"] and caching_enabled and handled_video_ids:
            caching.regenerate_all_pages_related_to_videos(video_ids=handled_video_ids)
Esempio n. 4
0
    def test_cache_invalidation(self):
        """Create the cache item, then invalidate it and show that it is deleted."""
        
        # Get a random video id
        n_videos = len(topicdata.NODE_CACHE['Video'])
        video_id = topicdata.NODE_CACHE['Video'].keys()[10]#random.choice(topicdata.NODE_CACHE['Video'].keys())
        sys.stdout.write("Testing on video_id = %s\n" % video_id)
        video_path = topicdata.NODE_CACHE['Video'][video_id][0]['path']

        # Clean the cache for this item
        caching.expire_page(path=video_path, failure_ok=True)
        
        # Create the cache item, and check it
        self.assertTrue(not caching.has_cache_key(path=video_path), "expect: no cache key after expiring the page")
        caching.regenerate_all_pages_related_to_videos(video_ids=[video_id])
        self.assertTrue(caching.has_cache_key(path=video_path), "expect: Cache key exists after Django Client get")

        # Invalidate the cache item, and check it
        caching.invalidate_all_pages_related_to_video(video_id=video_id) # test the convenience function
        self.assertTrue(not caching.has_cache_key(path=video_path), "expect: no cache key after expiring the page")
Esempio n. 5
0
    def handle(self, *args, **options):

        caching_enabled = settings.CACHE_TIME != 0
        handled_video_ids = []  # stored to deal with caching

        while True:  # loop until the method is aborted

            if VideoFile.objects.filter(download_in_progress=True).count() > 0:
                self.stderr.write(
                    "Another download is still in progress; aborting.\n")
                break

            # Grab any video that hasn't been tried yet
            videos = VideoFile.objects.filter(flagged_for_download=True,
                                              download_in_progress=False)
            if videos.count() == 0:
                self.stdout.write("Nothing to download; aborting.\n")
                break

            video = videos[0]

            # User intervention
            if video.cancel_download == True:
                video.download_in_progress = False
                video.save()
                self.stdout.write("Download cancelled; aborting.\n")
                break

            # Grab a video as OURS to handle, set fields to indicate to others that we're on it!
            video.download_in_progress = True
            video.percent_complete = 0
            video.save()

            self.stdout.write("Downloading video '%s'...\n" % video.youtube_id)
            try:
                download_video(video.youtube_id,
                               callback=download_progress_callback(
                                   self, video))
                handled_video_ids.append(video.youtube_id)
                self.stdout.write("Download is complete!\n")
            except Exception as e:

                if isinstance(e, URLNotFound):
                    # This should never happen, but if it does, remove the VideoFile from the queue, and continue
                    # to the next video. Warning: this will leave the update page in a weird state, currently
                    # (and require a refresh of the update page in order to start showing progress again)
                    video.delete()
                    continue

                # On connection error, report the error, mark the video as not downloaded, and give up for now.
                self.stderr.write("Error in downloading %s: %s\n" %
                                  (video.youtube_id, e))
                video.download_in_progress = False
                video.percent_complete = 0
                video.save()
                break

            # Expire, but don't regenerate until the very end, for efficiency.
            if caching_enabled:
                caching.invalidate_all_pages_related_to_video(
                    video_id=video.youtube_id)

        # After all is done, regenerate all pages
        #   since this is computationally intensive, only do it after we're sure
        #   nothing more will change (so that we don't regenerate something that is
        #   later invalidated by another video downloaded in the loop)
        if options["auto_cache"] and caching_enabled and handled_video_ids:
            caching.regenerate_all_pages_related_to_videos(
                video_ids=handled_video_ids)
Esempio n. 6
0
    def handle(self, *args, **options):
        caching_enabled = (settings.CACHE_TIME != 0)
        touched_video_ids = []

        # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken
        video_files_to_delete = VideoFile.objects.filter(
            download_in_progress=False,
            percent_complete__gt=0,
            percent_complete__lt=100)
        youtube_ids_to_delete = [
            d["youtube_id"] for d in video_files_to_delete.values("youtube_id")
        ]
        video_files_to_delete.delete()

        if caching_enabled:
            for youtube_id in youtube_ids_to_delete:
                caching.invalidate_all_pages_related_to_video(
                    video_id=youtube_id)
                touched_video_ids.append(youtube_id)
        if len(video_files_to_delete):
            self.stdout.write(
                "Deleted %d VideoFile models (to mark them as not downloaded, since they were in a bad state)\n"
                % len(video_files_to_delete))

        files = glob.glob(settings.CONTENT_ROOT + "*.mp4")
        subtitle_files = glob.glob(settings.CONTENT_ROOT + "*.srt")
        videos_marked_at_all = set(
            [video.youtube_id for video in VideoFile.objects.all()])
        videos_marked_as_in_progress = set([
            video.youtube_id
            for video in VideoFile.objects.filter(download_in_progress=True)
        ])
        videos_marked_as_unstarted = set([
            video.youtube_id
            for video in VideoFile.objects.filter(percent_complete=0,
                                                  download_in_progress=False)
        ])

        videos_in_filesystem = set([
            path.replace("\\", "/").split("/")[-1].split(".")[0]
            for path in files
        ])
        videos_in_filesystem_chunked = break_into_chunks(videos_in_filesystem)

        videos_flagged_for_download = set([
            video.youtube_id
            for video in VideoFile.objects.filter(flagged_for_download=True)
        ])

        subtitles_in_filesystem = set([
            path.replace("\\", "/").split("/")[-1].split(".")[0]
            for path in subtitle_files
        ])
        subtitles_in_filesystem_chunked = break_into_chunks(
            subtitles_in_filesystem)

        count = 0
        for chunk in videos_in_filesystem_chunked:
            video_files_needing_model_update = VideoFile.objects.filter(
                percent_complete=0,
                download_in_progress=False,
                youtube_id__in=chunk)
            count += video_files_needing_model_update.count()
            video_files_needing_model_update.update(percent_complete=100,
                                                    flagged_for_download=False)
            if caching_enabled:
                for vf in video_files_needing_model_update:
                    caching.invalidate_all_pages_related_to_video(
                        video_id=vf.youtube_id)
                    touched_video_ids.append(vf.youtube_id)
        if count:
            self.stdout.write(
                "Updated %d VideoFile models (to mark them as complete, since the files exist)\n"
                % count)

        video_ids_needing_model_creation = list(videos_in_filesystem -
                                                videos_marked_at_all)
        count = len(video_ids_needing_model_creation)
        if count:
            VideoFile.objects.bulk_create([
                VideoFile(youtube_id=youtube_id, percent_complete=100)
                for youtube_id in video_ids_needing_model_creation
            ])
            if caching_enabled:
                for vid in video_ids_needing_model_creation:
                    caching.invalidate_all_pages_related_to_video(video_id=vid)
                    touched_video_ids.append(vid)
            self.stdout.write(
                "Created %d VideoFile models (to mark them as complete, since the files exist)\n"
                % count)

        count = 0
        videos_needing_model_deletion_chunked = break_into_chunks(
            videos_marked_at_all - videos_in_filesystem -
            videos_flagged_for_download)
        for chunk in videos_needing_model_deletion_chunked:
            video_files_needing_model_deletion = VideoFile.objects.filter(
                youtube_id__in=chunk)
            count += video_files_needing_model_deletion.count()
            video_files_needing_model_deletion.delete()
            if caching_enabled:
                for video_id in chunk:
                    caching.invalidate_all_pages_related_to_video(
                        video_id=video_id)
                    touched_video_ids.append(video_id)
        if count:
            self.stdout.write(
                "Deleted %d VideoFile models (because the videos didn't exist in the filesystem)\n"
                % count)

        count = 0
        for chunk in subtitles_in_filesystem_chunked:
            video_files_needing_model_update = VideoFile.objects.filter(
                subtitle_download_in_progress=False,
                subtitles_downloaded=False,
                youtube_id__in=chunk)
            count += video_files_needing_model_update.count()
            video_files_needing_model_update.update(subtitles_downloaded=True)
            if caching_enabled:
                for vf in video_files_needing_model_update:
                    caching.invalidate_all_pages_related_to_video(
                        video_id=vf.youtube_id)
                    touched_video_ids.append(vf.youtube_id)

        if count:
            self.stdout.write(
                "Updated %d VideoFile models (marked them as having subtitles)\n"
                % count)

        if options["auto_cache"] and caching_enabled and touched_video_ids:
            caching.regenerate_all_pages_related_to_videos(
                video_ids=touched_video_ids)
Esempio n. 7
0
    def handle(self, *args, **options):

        caching_enabled = settings.CACHE_TIME != 0
        handled_video_ids = []  # stored to deal with caching

        while True: # loop until the method is aborted
            
            if VideoFile.objects.filter(download_in_progress=True).count() > 0:
                self.stderr.write("Another download is still in progress; aborting.\n")
                break

            # Grab any video that hasn't been tried yet
            videos = VideoFile.objects.filter(flagged_for_download=True, download_in_progress=False)
            if videos.count() == 0:
                self.stdout.write("Nothing to download; aborting.\n")
                break

            video = videos[0]

            # User intervention
            if video.cancel_download == True:
                video.download_in_progress = False
                video.save()
                self.stdout.write("Download cancelled; aborting.\n")
                break

            # Grab a video as OURS to handle, set fields to indicate to others that we're on it!
            video.download_in_progress = True
            video.percent_complete = 0
            video.save()
            
            self.stdout.write("Downloading video '%s'...\n" % video.youtube_id)
            try:
                download_video(video.youtube_id, callback=download_progress_callback(self, video))
                handled_video_ids.append(video.youtube_id)
                self.stdout.write("Download is complete!\n")
            except Exception as e:

                if isinstance(e, URLNotFound):
                    # This should never happen, but if it does, remove the VideoFile from the queue, and continue
                    # to the next video. Warning: this will leave the update page in a weird state, currently
                    # (and require a refresh of the update page in order to start showing progress again)
                    video.delete()
                    continue

                # On connection error, report the error, mark the video as not downloaded, and give up for now.
                self.stderr.write("Error in downloading %s: %s\n" % (video.youtube_id, e))
                video.download_in_progress = False
                video.percent_complete = 0
                video.save()
                break

            # Expire, but don't regenerate until the very end, for efficiency.
            if caching_enabled:
                caching.invalidate_all_pages_related_to_video(video_id=video.youtube_id)

        # After all is done, regenerate all pages
        #   since this is computationally intensive, only do it after we're sure
        #   nothing more will change (so that we don't regenerate something that is
        #   later invalidated by another video downloaded in the loop)
        if options["auto_cache"] and caching_enabled and handled_video_ids:
            caching.regenerate_all_pages_related_to_videos(video_ids=handled_video_ids)
Esempio n. 8
0
    def handle(self, *args, **options):
        self.video = None

        handled_youtube_ids = []  # stored to deal with caching
        failed_youtube_ids = []  # stored to avoid requerying failures.

        set_process_priority.lowest(logging=settings.LOG)

        try:
            while True:  # loop until the method is aborted
                # Grab any video that hasn't been tried yet
                videos = VideoFile.objects \
                    .filter(flagged_for_download=True, download_in_progress=False) \
                    .exclude(youtube_id__in=failed_youtube_ids)
                video_count = videos.count()
                if video_count == 0:
                    self.stdout.write("Nothing to download; exiting.\n")
                    break

                # Grab a video as OURS to handle, set fields to indicate to others that we're on it!
                # Update the video logging
                video = videos[0]
                video.download_in_progress = True
                video.percent_complete = 0
                video.save()
                self.stdout.write("Downloading video '%s'...\n" %
                                  video.youtube_id)

                # Update the progress logging
                self.set_stages(
                    num_stages=video_count + len(handled_youtube_ids) +
                    len(failed_youtube_ids) + int(options["auto_cache"]))
                if not self.started():
                    self.start(stage_name=video.youtube_id)

                # Initiate the download process
                try:
                    download_video(video.youtube_id,
                                   callback=partial(
                                       self.download_progress_callback, video))
                    handled_youtube_ids.append(video.youtube_id)
                    self.stdout.write("Download is complete!\n")
                except Exception as e:
                    # On error, report the error, mark the video as not downloaded,
                    #   and allow the loop to try other videos.
                    self.stderr.write("Error in downloading %s: %s\n" %
                                      (video.youtube_id, e))
                    video.download_in_progress = False
                    video.flagged_for_download = not isinstance(
                        e,
                        URLNotFound)  # URLNotFound means, we won't try again
                    video.save()
                    # Rather than getting stuck on one video, continue to the next video.
                    failed_youtube_ids.append(video.youtube_id)
                    continue

            # This can take a long time, without any further update, so ... best to avoid.
            if options["auto_cache"] and caching.caching_is_enabled(
            ) and handled_youtube_ids:
                self.update_stage(
                    stage_name=self.video.youtube_id,
                    stage_percent=0,
                    notes=_("Generating all pages related to videos."))
                caching.regenerate_all_pages_related_to_videos(video_ids=list(
                    set([
                        i18n.get_video_id(yid) or yid
                        for yid in handled_youtube_ids
                    ])))

            # Update
            self.complete(
                notes=
                _("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully."
                  ) % {
                      "num_handled_videos":
                      len(handled_youtube_ids),
                      "num_total_videos":
                      len(handled_youtube_ids) + len(failed_youtube_ids),
                  })

        except Exception as e:
            sys.stderr.write("Error: %s\n" % e)
            self.cancel(notes=_("Error: %s") % e)
Esempio n. 9
0
    def handle(self, *args, **options):
        caching_enabled = (settings.CACHE_TIME != 0)
        touched_video_ids = []

        # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken
        video_files_to_delete = VideoFile.objects.filter(download_in_progress=False, percent_complete__gt=0, percent_complete__lt=100)
        youtube_ids_to_delete = [d["youtube_id"] for d in video_files_to_delete.values("youtube_id")]
        video_files_to_delete.delete()

        if caching_enabled:
            for youtube_id in youtube_ids_to_delete:
                caching.invalidate_all_pages_related_to_video(video_id=youtube_id)
                touched_video_ids.append(youtube_id)
        if len(video_files_to_delete):
            self.stdout.write("Deleted %d VideoFile models (to mark them as not downloaded, since they were in a bad state)\n" % len(video_files_to_delete))

        files = glob.glob(settings.CONTENT_ROOT + "*.mp4")
        subtitle_files = glob.glob(settings.CONTENT_ROOT + "*.srt")
        videos_marked_at_all = set([video.youtube_id for video in VideoFile.objects.all()])
        videos_marked_as_in_progress = set([video.youtube_id for video in VideoFile.objects.filter(download_in_progress=True)])
        videos_marked_as_unstarted = set([video.youtube_id for video in VideoFile.objects.filter(percent_complete=0, download_in_progress=False)])
        
        videos_in_filesystem = set([path.replace("\\", "/").split("/")[-1].split(".")[0] for path in files])
        videos_in_filesystem_chunked = break_into_chunks(videos_in_filesystem)

        videos_flagged_for_download = set([video.youtube_id for video in VideoFile.objects.filter(flagged_for_download=True)])

        subtitles_in_filesystem = set([path.replace("\\", "/").split("/")[-1].split(".")[0] for path in subtitle_files])
        subtitles_in_filesystem_chunked = break_into_chunks(subtitles_in_filesystem)
        
        count = 0
        for chunk in videos_in_filesystem_chunked:
            video_files_needing_model_update = VideoFile.objects.filter(percent_complete=0, download_in_progress=False, youtube_id__in=chunk)
            count += video_files_needing_model_update.count()
            video_files_needing_model_update.update(percent_complete=100, flagged_for_download=False)
            if caching_enabled:
                for vf in video_files_needing_model_update:
                    caching.invalidate_all_pages_related_to_video(video_id=vf.youtube_id)
                    touched_video_ids.append(vf.youtube_id)
        if count:
            self.stdout.write("Updated %d VideoFile models (to mark them as complete, since the files exist)\n" % count)
        
        video_ids_needing_model_creation = list(videos_in_filesystem - videos_marked_at_all)
        count = len(video_ids_needing_model_creation)
        if count:
            VideoFile.objects.bulk_create([VideoFile(youtube_id=youtube_id, percent_complete=100) for youtube_id in video_ids_needing_model_creation])
            if caching_enabled:
                for vid in video_ids_needing_model_creation:
                    caching.invalidate_all_pages_related_to_video(video_id=vid)
                    touched_video_ids.append(vid)
            self.stdout.write("Created %d VideoFile models (to mark them as complete, since the files exist)\n" % count)
        
        count = 0
        videos_needing_model_deletion_chunked = break_into_chunks(videos_marked_at_all - videos_in_filesystem - videos_flagged_for_download)
        for chunk in videos_needing_model_deletion_chunked:
            video_files_needing_model_deletion = VideoFile.objects.filter(youtube_id__in=chunk)
            count += video_files_needing_model_deletion.count()
            video_files_needing_model_deletion.delete()
            if caching_enabled:
                for video_id in chunk:
                    caching.invalidate_all_pages_related_to_video(video_id=video_id)
                    touched_video_ids.append(video_id)
        if count:
            self.stdout.write("Deleted %d VideoFile models (because the videos didn't exist in the filesystem)\n" % count)

        count = 0
        for chunk in subtitles_in_filesystem_chunked:
            video_files_needing_model_update = VideoFile.objects.filter(subtitle_download_in_progress=False, subtitles_downloaded=False, youtube_id__in=chunk)
            count += video_files_needing_model_update.count()
            video_files_needing_model_update.update(subtitles_downloaded=True)
            if caching_enabled:
                for vf in video_files_needing_model_update:
                    caching.invalidate_all_pages_related_to_video(video_id=vf.youtube_id)
                    touched_video_ids.append(vf.youtube_id)
                    
        if count:
            self.stdout.write("Updated %d VideoFile models (marked them as having subtitles)\n" % count)

        if options["auto_cache"] and caching_enabled and touched_video_ids:
            caching.regenerate_all_pages_related_to_videos(video_ids=touched_video_ids)
Esempio n. 10
0
    def handle(self, *args, **options):
        caching_enabled = (settings.CACHE_TIME != 0)
        touched_video_ids = []

        # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken
        video_files_to_delete = VideoFile.objects.filter(
            download_in_progress=False,
            percent_complete__gt=0,
            percent_complete__lt=100)
        youtube_ids_to_delete = [
            d["youtube_id"] for d in video_files_to_delete.values("youtube_id")
        ]
        video_files_to_delete.delete()
        touched_video_ids += [
            i18n.get_video_id(yid) for yid in youtube_ids_to_delete
        ]
        if len(video_files_to_delete):
            self.stdout.write(
                "Deleted %d VideoFile models (to mark them as not downloaded, since they were in a bad state)\n"
                % len(video_files_to_delete))

        files = glob.glob(settings.CONTENT_ROOT + "*.mp4")
        videos_marked_at_all = set(
            [video.youtube_id for video in VideoFile.objects.all()])
        videos_marked_as_in_progress = set([
            video.youtube_id
            for video in VideoFile.objects.filter(download_in_progress=True)
        ])
        videos_marked_as_unstarted = set([
            video.youtube_id
            for video in VideoFile.objects.filter(percent_complete=0,
                                                  download_in_progress=False)
        ])

        videos_flagged_for_download = set([
            video.youtube_id
            for video in VideoFile.objects.filter(flagged_for_download=True)
        ])
        videos_in_filesystem = set([
            path.replace("\\", "/").split("/")[-1].split(".")[0]
            for path in files
        ])

        # Files that exist, but are not in the DB, should be assumed to be good videos,
        #   and just needing to be added to the DB.  Add them to the DB in this way,
        #   so that these files also trigger the update code below (and trigger cache invalidation)
        video_ids_needing_model_creation = list(videos_in_filesystem -
                                                videos_marked_at_all)
        count = len(video_ids_needing_model_creation)
        if count:
            # OK to do bulk_create; cache invalidation triggered via save download
            VideoFile.objects.bulk_create([
                VideoFile(youtube_id=id,
                          percent_complete=0,
                          download_in_progress=False)
                for id in video_ids_needing_model_creation
            ])
            self.stdout.write(
                "Created %d VideoFile models (to mark them as complete, since the files exist)\n"
                % len(video_ids_needing_model_creation))
            touched_video_ids += [
                i18n.get_video_id(yid) or yid
                for yid in video_ids_needing_model_creation
            ]

        # Files that exist, are in the DB, but have percent_complete=0, download_in_progress=False
        #   These should be individually saved to be 100% complete, to trigger their availability (and cache invalidation)
        count = 0
        for chunk in break_into_chunks(videos_in_filesystem):
            video_files_needing_model_update = VideoFile.objects.filter(
                percent_complete=0,
                download_in_progress=False,
                youtube_id__in=chunk)
            count += video_files_needing_model_update.count()
            for videofile in video_files_needing_model_update:
                videofile.percent_complete = 100
                videofile.flagged_for_download = False
                videofile.save()
        if count:
            self.stdout.write(
                "Updated %d VideoFile models (to mark them as complete, since the files exist)\n"
                % count)

        # VideoFile objects say they're available, but that don't actually exist.
        count = 0
        videos_needing_model_deletion_chunked = break_into_chunks(
            videos_marked_at_all - videos_in_filesystem -
            videos_flagged_for_download)
        for chunk in videos_needing_model_deletion_chunked:
            video_files_needing_model_deletion = VideoFile.objects.filter(
                youtube_id__in=chunk)
            count += video_files_needing_model_deletion.count()
            video_files_needing_model_deletion.delete()
            touched_video_ids += [
                i18n.get_video_id(yid) or yid for yid in chunk
            ]
        if count:
            self.stdout.write(
                "Deleted %d VideoFile models (because the videos didn't exist in the filesystem)\n"
                % count)

        if options["auto_cache"] and caching_enabled and touched_video_ids:
            caching.regenerate_all_pages_related_to_videos(
                video_ids=list(set(touched_video_ids)))
Esempio n. 11
0
    def handle(self, *args, **options):
        self.video = None

        handled_youtube_ids = []  # stored to deal with caching
        failed_youtube_ids = []  # stored to avoid requerying failures.

        set_process_priority.lowest(logging=settings.LOG)
        
        try:
            while True: # loop until the method is aborted
                # Grab any video that hasn't been tried yet
                videos = VideoFile.objects \
                    .filter(flagged_for_download=True, download_in_progress=False) \
                    .exclude(youtube_id__in=failed_youtube_ids)
                video_count = videos.count()
                if video_count == 0:
                    self.stdout.write("Nothing to download; exiting.\n")
                    break

                # Grab a video as OURS to handle, set fields to indicate to others that we're on it!
                # Update the video logging
                video = videos[0]
                video.download_in_progress = True
                video.percent_complete = 0
                video.save()
                self.stdout.write("Downloading video '%s'...\n" % video.youtube_id)

                # Update the progress logging
                self.set_stages(num_stages=video_count + len(handled_youtube_ids) + len(failed_youtube_ids) + int(options["auto_cache"]))
                if not self.started():
                    self.start(stage_name=video.youtube_id)

                # Initiate the download process
                try:
                    download_video(video.youtube_id, callback=partial(self.download_progress_callback, video))
                    handled_youtube_ids.append(video.youtube_id)
                    self.stdout.write("Download is complete!\n")
                except Exception as e:
                    # On error, report the error, mark the video as not downloaded,
                    #   and allow the loop to try other videos.
                    self.stderr.write("Error in downloading %s: %s\n" % (video.youtube_id, e))
                    video.download_in_progress = False
                    video.flagged_for_download = not isinstance(e, URLNotFound)  # URLNotFound means, we won't try again
                    video.save()
                    # Rather than getting stuck on one video, continue to the next video.
                    failed_youtube_ids.append(video.youtube_id)
                    continue

            # This can take a long time, without any further update, so ... best to avoid.
            if options["auto_cache"] and caching.caching_is_enabled() and handled_youtube_ids:
                self.update_stage(stage_name=self.video.youtube_id, stage_percent=0, notes=_("Generating all pages related to videos."))
                caching.regenerate_all_pages_related_to_videos(video_ids=list(set([i18n.get_video_id(yid) or yid for yid in handled_youtube_ids])))

            # Update
            self.complete(notes=_("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully.") % {
                "num_handled_videos": len(handled_youtube_ids),
                "num_total_videos": len(handled_youtube_ids) + len(failed_youtube_ids),
            })

        except Exception as e:
            sys.stderr.write("Error: %s\n" % e)
            self.cancel(notes=_("Error: %s") % e)