Example #1
0
    def test_cache_invalidation(self):
        """Create the cache item, then invalidate it and show that it is deleted."""

        # Get a random youtube id
        n_videos = len(topicdata.NODE_CACHE['Video'])
        video_slug = topicdata.NODE_CACHE['Video'].keys()[
            10]  #random.choice(topicdata.NODE_CACHE['Video'].keys())
        sys.stdout.write("Testing on video_slug = %s\n" % video_slug)
        youtube_id = topicdata.NODE_CACHE['Video'][video_slug]['youtube_id']
        video_path = topicdata.NODE_CACHE['Video'][video_slug]['paths'][0]

        # Clean the cache for this item
        caching.expire_page(path=video_path)

        # Create the cache item, and check it
        self.assertTrue(not caching.has_cache_key(path=video_path),
                        "expect: no cache key after expiring the page")
        caching.regenerate_all_pages_related_to_videos(video_ids=[youtube_id])
        self.assertTrue(caching.has_cache_key(path=video_path),
                        "expect: Cache key exists after Django Client get")

        # Invalidate the cache item, and check it
        caching.invalidate_all_pages_related_to_video(
            video_id=youtube_id)  # test the convenience function
        self.assertTrue(not caching.has_cache_key(path=video_path),
                        "expect: no cache key after expiring the page")
Example #2
0
    def handle(self, *args, **options):

        caching_enabled = settings.CACHE_TIME != 0
        handled_video_ids = []
        
        while True: # loop until the method is aborted
            
            if VideoFile.objects.filter(download_in_progress=True).count() > 0:
                self.stderr.write("Another download is still in progress; aborting.\n")
                break
            
            videos = VideoFile.objects.filter(flagged_for_download=True, download_in_progress=False)
            if videos.count() == 0:
                self.stdout.write("Nothing to download; aborting.\n")
                break

            video = videos[0]
            
            if video.cancel_download == True:
                video.download_in_progress = False
                video.save()
                self.stdout.write("Download cancelled; aborting.\n")
                break
            
            video.download_in_progress = True
            video.percent_complete = 0
            video.save()
            
            self.stdout.write("Downloading video '%s'...\n" % video.youtube_id)
            try:
                download_video(video.youtube_id, callback=download_progress_callback(self, video))
                self.stdout.write("Download is complete!\n")
            except Exception as e:
                self.stderr.write("Error in downloading: %s\n" % e)
                video.download_in_progress = False
                video.save()
                force_job("videodownload", "Download Videos")  # infinite recursive call? :(
                break
            
            handled_video_ids.append(video.youtube_id)
            
            # Expire, but don't regenerate until the very end, for efficiency.
            if caching_enabled:
                caching.invalidate_all_pages_related_to_video(video_id=video.youtube_id)

        if options["auto_cache"] and caching_enabled and handled_video_ids:
            caching.regenerate_all_pages_related_to_videos(video_ids=handled_video_ids)
Example #3
0
def delete_videos(request):
    youtube_ids = simplejson.loads(request.raw_post_data or "{}").get("youtube_ids", [])
    for id in youtube_ids:
        # Delete the file on disk
        delete_downloaded_files(id)

        # Delete the file in the database
        videofile = get_object_or_None(VideoFile, youtube_id=id)
        if videofile:
            videofile.cancel_download = True
            videofile.flagged_for_download = False
            videofile.flagged_for_subtitle_download = False
            videofile.save()

        # Refresh the cache
        invalidate_all_pages_related_to_video(video_id=id)

    return JsonResponse({})
Example #4
0
def delete_videos(request):
    youtube_ids = simplejson.loads(request.raw_post_data or "{}").get("youtube_ids", [])
    for id in youtube_ids:
        # Delete the file on disk
        delete_downloaded_files(id)

        # Delete the file in the database
        videofile = get_object_or_None(VideoFile, youtube_id=id)
        if videofile:
            videofile.cancel_download = True
            videofile.flagged_for_download = False
            videofile.flagged_for_subtitle_download = False
            videofile.save()

        # Refresh the cache
        invalidate_all_pages_related_to_video(video_id=id)

    return JsonResponse({})
Example #5
0
    def test_cache_invalidation(self):
        """Create the cache item, then invalidate it and show that it is deleted."""
        
        # Get a random video id
        n_videos = len(topicdata.NODE_CACHE['Video'])
        video_id = topicdata.NODE_CACHE['Video'].keys()[10]#random.choice(topicdata.NODE_CACHE['Video'].keys())
        sys.stdout.write("Testing on video_id = %s\n" % video_id)
        video_path = topicdata.NODE_CACHE['Video'][video_id][0]['path']

        # Clean the cache for this item
        caching.expire_page(path=video_path, failure_ok=True)
        
        # Create the cache item, and check it
        self.assertTrue(not caching.has_cache_key(path=video_path), "expect: no cache key after expiring the page")
        caching.regenerate_all_pages_related_to_videos(video_ids=[video_id])
        self.assertTrue(caching.has_cache_key(path=video_path), "expect: Cache key exists after Django Client get")

        # Invalidate the cache item, and check it
        caching.invalidate_all_pages_related_to_video(video_id=video_id) # test the convenience function
        self.assertTrue(not caching.has_cache_key(path=video_path), "expect: no cache key after expiring the page")
Example #6
0
    def handle(self, *args, **options):

        caching_enabled = settings.CACHE_TIME != 0
        handled_video_ids = []  # stored to deal with caching

        while True:  # loop until the method is aborted

            if VideoFile.objects.filter(download_in_progress=True).count() > 0:
                self.stderr.write(
                    "Another download is still in progress; aborting.\n")
                break

            # Grab any video that hasn't been tried yet
            videos = VideoFile.objects.filter(flagged_for_download=True,
                                              download_in_progress=False)
            if videos.count() == 0:
                self.stdout.write("Nothing to download; aborting.\n")
                break

            video = videos[0]

            # User intervention
            if video.cancel_download == True:
                video.download_in_progress = False
                video.save()
                self.stdout.write("Download cancelled; aborting.\n")
                break

            # Grab a video as OURS to handle, set fields to indicate to others that we're on it!
            video.download_in_progress = True
            video.percent_complete = 0
            video.save()

            self.stdout.write("Downloading video '%s'...\n" % video.youtube_id)
            try:
                download_video(video.youtube_id,
                               callback=download_progress_callback(
                                   self, video))
                handled_video_ids.append(video.youtube_id)
                self.stdout.write("Download is complete!\n")
            except Exception as e:

                if isinstance(e, URLNotFound):
                    # This should never happen, but if it does, remove the VideoFile from the queue, and continue
                    # to the next video. Warning: this will leave the update page in a weird state, currently
                    # (and require a refresh of the update page in order to start showing progress again)
                    video.delete()
                    continue

                # On connection error, report the error, mark the video as not downloaded, and give up for now.
                self.stderr.write("Error in downloading %s: %s\n" %
                                  (video.youtube_id, e))
                video.download_in_progress = False
                video.percent_complete = 0
                video.save()
                break

            # Expire, but don't regenerate until the very end, for efficiency.
            if caching_enabled:
                caching.invalidate_all_pages_related_to_video(
                    video_id=video.youtube_id)

        # After all is done, regenerate all pages
        #   since this is computationally intensive, only do it after we're sure
        #   nothing more will change (so that we don't regenerate something that is
        #   later invalidated by another video downloaded in the loop)
        if options["auto_cache"] and caching_enabled and handled_video_ids:
            caching.regenerate_all_pages_related_to_videos(
                video_ids=handled_video_ids)
Example #7
0
    def handle(self, *args, **options):
        caching_enabled = (settings.CACHE_TIME != 0)
        touched_video_ids = []

        # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken
        video_files_to_delete = VideoFile.objects.filter(
            download_in_progress=False,
            percent_complete__gt=0,
            percent_complete__lt=100)
        youtube_ids_to_delete = [
            d["youtube_id"] for d in video_files_to_delete.values("youtube_id")
        ]
        video_files_to_delete.delete()

        if caching_enabled:
            for youtube_id in youtube_ids_to_delete:
                caching.invalidate_all_pages_related_to_video(
                    video_id=youtube_id)
                touched_video_ids.append(youtube_id)
        if len(video_files_to_delete):
            self.stdout.write(
                "Deleted %d VideoFile models (to mark them as not downloaded, since they were in a bad state)\n"
                % len(video_files_to_delete))

        files = glob.glob(settings.CONTENT_ROOT + "*.mp4")
        subtitle_files = glob.glob(settings.CONTENT_ROOT + "*.srt")
        videos_marked_at_all = set(
            [video.youtube_id for video in VideoFile.objects.all()])
        videos_marked_as_in_progress = set([
            video.youtube_id
            for video in VideoFile.objects.filter(download_in_progress=True)
        ])
        videos_marked_as_unstarted = set([
            video.youtube_id
            for video in VideoFile.objects.filter(percent_complete=0,
                                                  download_in_progress=False)
        ])

        videos_in_filesystem = set([
            path.replace("\\", "/").split("/")[-1].split(".")[0]
            for path in files
        ])
        videos_in_filesystem_chunked = break_into_chunks(videos_in_filesystem)

        videos_flagged_for_download = set([
            video.youtube_id
            for video in VideoFile.objects.filter(flagged_for_download=True)
        ])

        subtitles_in_filesystem = set([
            path.replace("\\", "/").split("/")[-1].split(".")[0]
            for path in subtitle_files
        ])
        subtitles_in_filesystem_chunked = break_into_chunks(
            subtitles_in_filesystem)

        count = 0
        for chunk in videos_in_filesystem_chunked:
            video_files_needing_model_update = VideoFile.objects.filter(
                percent_complete=0,
                download_in_progress=False,
                youtube_id__in=chunk)
            count += video_files_needing_model_update.count()
            video_files_needing_model_update.update(percent_complete=100,
                                                    flagged_for_download=False)
            if caching_enabled:
                for vf in video_files_needing_model_update:
                    caching.invalidate_all_pages_related_to_video(
                        video_id=vf.youtube_id)
                    touched_video_ids.append(vf.youtube_id)
        if count:
            self.stdout.write(
                "Updated %d VideoFile models (to mark them as complete, since the files exist)\n"
                % count)

        video_ids_needing_model_creation = list(videos_in_filesystem -
                                                videos_marked_at_all)
        count = len(video_ids_needing_model_creation)
        if count:
            VideoFile.objects.bulk_create([
                VideoFile(youtube_id=youtube_id, percent_complete=100)
                for youtube_id in video_ids_needing_model_creation
            ])
            if caching_enabled:
                for vid in video_ids_needing_model_creation:
                    caching.invalidate_all_pages_related_to_video(video_id=vid)
                    touched_video_ids.append(vid)
            self.stdout.write(
                "Created %d VideoFile models (to mark them as complete, since the files exist)\n"
                % count)

        count = 0
        videos_needing_model_deletion_chunked = break_into_chunks(
            videos_marked_at_all - videos_in_filesystem -
            videos_flagged_for_download)
        for chunk in videos_needing_model_deletion_chunked:
            video_files_needing_model_deletion = VideoFile.objects.filter(
                youtube_id__in=chunk)
            count += video_files_needing_model_deletion.count()
            video_files_needing_model_deletion.delete()
            if caching_enabled:
                for video_id in chunk:
                    caching.invalidate_all_pages_related_to_video(
                        video_id=video_id)
                    touched_video_ids.append(video_id)
        if count:
            self.stdout.write(
                "Deleted %d VideoFile models (because the videos didn't exist in the filesystem)\n"
                % count)

        count = 0
        for chunk in subtitles_in_filesystem_chunked:
            video_files_needing_model_update = VideoFile.objects.filter(
                subtitle_download_in_progress=False,
                subtitles_downloaded=False,
                youtube_id__in=chunk)
            count += video_files_needing_model_update.count()
            video_files_needing_model_update.update(subtitles_downloaded=True)
            if caching_enabled:
                for vf in video_files_needing_model_update:
                    caching.invalidate_all_pages_related_to_video(
                        video_id=vf.youtube_id)
                    touched_video_ids.append(vf.youtube_id)

        if count:
            self.stdout.write(
                "Updated %d VideoFile models (marked them as having subtitles)\n"
                % count)

        if options["auto_cache"] and caching_enabled and touched_video_ids:
            caching.regenerate_all_pages_related_to_videos(
                video_ids=touched_video_ids)
Example #8
0
    def handle(self, *args, **options):

        caching_enabled = settings.CACHE_TIME != 0
        handled_video_ids = []  # stored to deal with caching

        while True: # loop until the method is aborted
            
            if VideoFile.objects.filter(download_in_progress=True).count() > 0:
                self.stderr.write("Another download is still in progress; aborting.\n")
                break

            # Grab any video that hasn't been tried yet
            videos = VideoFile.objects.filter(flagged_for_download=True, download_in_progress=False)
            if videos.count() == 0:
                self.stdout.write("Nothing to download; aborting.\n")
                break

            video = videos[0]

            # User intervention
            if video.cancel_download == True:
                video.download_in_progress = False
                video.save()
                self.stdout.write("Download cancelled; aborting.\n")
                break

            # Grab a video as OURS to handle, set fields to indicate to others that we're on it!
            video.download_in_progress = True
            video.percent_complete = 0
            video.save()
            
            self.stdout.write("Downloading video '%s'...\n" % video.youtube_id)
            try:
                download_video(video.youtube_id, callback=download_progress_callback(self, video))
                handled_video_ids.append(video.youtube_id)
                self.stdout.write("Download is complete!\n")
            except Exception as e:

                if isinstance(e, URLNotFound):
                    # This should never happen, but if it does, remove the VideoFile from the queue, and continue
                    # to the next video. Warning: this will leave the update page in a weird state, currently
                    # (and require a refresh of the update page in order to start showing progress again)
                    video.delete()
                    continue

                # On connection error, report the error, mark the video as not downloaded, and give up for now.
                self.stderr.write("Error in downloading %s: %s\n" % (video.youtube_id, e))
                video.download_in_progress = False
                video.percent_complete = 0
                video.save()
                break

            # Expire, but don't regenerate until the very end, for efficiency.
            if caching_enabled:
                caching.invalidate_all_pages_related_to_video(video_id=video.youtube_id)

        # After all is done, regenerate all pages
        #   since this is computationally intensive, only do it after we're sure
        #   nothing more will change (so that we don't regenerate something that is
        #   later invalidated by another video downloaded in the loop)
        if options["auto_cache"] and caching_enabled and handled_video_ids:
            caching.regenerate_all_pages_related_to_videos(video_ids=handled_video_ids)
Example #9
0
    def handle(self, *args, **options):
        caching_enabled = (settings.CACHE_TIME != 0)
        touched_video_ids = []

        # delete VideoFile objects that are not marked as in progress, but are neither 0% nor 100% done; they're broken
        video_files_to_delete = VideoFile.objects.filter(download_in_progress=False, percent_complete__gt=0, percent_complete__lt=100)
        youtube_ids_to_delete = [d["youtube_id"] for d in video_files_to_delete.values("youtube_id")]
        video_files_to_delete.delete()

        if caching_enabled:
            for youtube_id in youtube_ids_to_delete:
                caching.invalidate_all_pages_related_to_video(video_id=youtube_id)
                touched_video_ids.append(youtube_id)
        if len(video_files_to_delete):
            self.stdout.write("Deleted %d VideoFile models (to mark them as not downloaded, since they were in a bad state)\n" % len(video_files_to_delete))

        files = glob.glob(settings.CONTENT_ROOT + "*.mp4")
        subtitle_files = glob.glob(settings.CONTENT_ROOT + "*.srt")
        videos_marked_at_all = set([video.youtube_id for video in VideoFile.objects.all()])
        videos_marked_as_in_progress = set([video.youtube_id for video in VideoFile.objects.filter(download_in_progress=True)])
        videos_marked_as_unstarted = set([video.youtube_id for video in VideoFile.objects.filter(percent_complete=0, download_in_progress=False)])
        
        videos_in_filesystem = set([path.replace("\\", "/").split("/")[-1].split(".")[0] for path in files])
        videos_in_filesystem_chunked = break_into_chunks(videos_in_filesystem)

        videos_flagged_for_download = set([video.youtube_id for video in VideoFile.objects.filter(flagged_for_download=True)])

        subtitles_in_filesystem = set([path.replace("\\", "/").split("/")[-1].split(".")[0] for path in subtitle_files])
        subtitles_in_filesystem_chunked = break_into_chunks(subtitles_in_filesystem)
        
        count = 0
        for chunk in videos_in_filesystem_chunked:
            video_files_needing_model_update = VideoFile.objects.filter(percent_complete=0, download_in_progress=False, youtube_id__in=chunk)
            count += video_files_needing_model_update.count()
            video_files_needing_model_update.update(percent_complete=100, flagged_for_download=False)
            if caching_enabled:
                for vf in video_files_needing_model_update:
                    caching.invalidate_all_pages_related_to_video(video_id=vf.youtube_id)
                    touched_video_ids.append(vf.youtube_id)
        if count:
            self.stdout.write("Updated %d VideoFile models (to mark them as complete, since the files exist)\n" % count)
        
        video_ids_needing_model_creation = list(videos_in_filesystem - videos_marked_at_all)
        count = len(video_ids_needing_model_creation)
        if count:
            VideoFile.objects.bulk_create([VideoFile(youtube_id=youtube_id, percent_complete=100) for youtube_id in video_ids_needing_model_creation])
            if caching_enabled:
                for vid in video_ids_needing_model_creation:
                    caching.invalidate_all_pages_related_to_video(video_id=vid)
                    touched_video_ids.append(vid)
            self.stdout.write("Created %d VideoFile models (to mark them as complete, since the files exist)\n" % count)
        
        count = 0
        videos_needing_model_deletion_chunked = break_into_chunks(videos_marked_at_all - videos_in_filesystem - videos_flagged_for_download)
        for chunk in videos_needing_model_deletion_chunked:
            video_files_needing_model_deletion = VideoFile.objects.filter(youtube_id__in=chunk)
            count += video_files_needing_model_deletion.count()
            video_files_needing_model_deletion.delete()
            if caching_enabled:
                for video_id in chunk:
                    caching.invalidate_all_pages_related_to_video(video_id=video_id)
                    touched_video_ids.append(video_id)
        if count:
            self.stdout.write("Deleted %d VideoFile models (because the videos didn't exist in the filesystem)\n" % count)

        count = 0
        for chunk in subtitles_in_filesystem_chunked:
            video_files_needing_model_update = VideoFile.objects.filter(subtitle_download_in_progress=False, subtitles_downloaded=False, youtube_id__in=chunk)
            count += video_files_needing_model_update.count()
            video_files_needing_model_update.update(subtitles_downloaded=True)
            if caching_enabled:
                for vf in video_files_needing_model_update:
                    caching.invalidate_all_pages_related_to_video(video_id=vf.youtube_id)
                    touched_video_ids.append(vf.youtube_id)
                    
        if count:
            self.stdout.write("Updated %d VideoFile models (marked them as having subtitles)\n" % count)

        if options["auto_cache"] and caching_enabled and touched_video_ids:
            caching.regenerate_all_pages_related_to_videos(video_ids=touched_video_ids)