Esempio n. 1
0
def delete_videos(request):
    """
    API endpoint for deleting videos.
    """

    paths = OrderedSet(json.loads(request.body or "{}").get("paths", []))

    lang = json.loads(request.body or "{}").get("lang", "en")

    youtube_ids = get_download_youtube_ids(paths,
                                           language=lang,
                                           downloaded=True)

    num_deleted = 0

    for id in youtube_ids:
        # Delete the file on disk
        if delete_downloaded_files(id):
            num_deleted += 1

    annotate_content_models_by_youtube_id(youtube_ids=youtube_ids.keys(),
                                          language=lang)

    return JsonResponseMessageSuccess(
        _("Deleted %(num_videos)s video(s) successfully.") %
        {"num_videos": num_deleted})
Esempio n. 2
0
 def setUp(self):
     UpdatesTestCase.setUp(self)
     delete_downloaded_files(self.real_video.youtube_id)
     annotate_content_models_by_youtube_id(
         youtube_ids=[self.real_video.youtube_id])
     updated = get_content_item(content_id=self.real_video.id)
     self.assertFalse(updated['available'])
Esempio n. 3
0
    def test_simple_download(self):
        """
        Tests that a real, existing video can be downloaded
        """
        # Download a video that exists for real!
        download_video(self.real_video.youtube_id)
        # Check that file exists
        self.assertTrue(
            os.path.exists(get_video_local_path(self.real_video.youtube_id)))
        # After downloading the video, annotate the database
        annotate_content_models_by_youtube_id(
            youtube_ids=[self.real_video.youtube_id])
        # Check that it's been marked available
        updated = get_content_item(content_id=self.real_video.id)
        logger.error(updated)
        self.assertTrue(updated['available'])

        # Adding in an unrelated test (becase we don't need database etc. for
        # this to be tested.
        self.assertEqual(get_local_video_size("/bogus/path", default=123), 123)
Esempio n. 4
0
def delete_videos(request):
    """
    API endpoint for deleting videos.
    """

    paths = OrderedSet(json.loads(request.body or "{}").get("paths", []))

    lang = json.loads(request.body or "{}").get("lang", "en")

    youtube_ids = get_download_youtube_ids(paths, language=lang, downloaded=True)

    num_deleted = 0

    for id in youtube_ids:
        # Delete the file on disk
        if delete_downloaded_files(id):
            num_deleted += 1

    annotate_content_models_by_youtube_id(youtube_ids=youtube_ids.keys(), language=lang)

    return JsonResponseMessageSuccess(_("Deleted %(num_videos)s video(s) successfully.") % {"num_videos": num_deleted})
Esempio n. 5
0
    def test_simple_download(self):
        """
        Tests that a real, existing video can be downloaded
        """
        # Download a video that exists for real!
        download_video(self.real_video.youtube_id)
        # Check that file exists
        self.assertTrue(os.path.exists(
            get_video_local_path(self.real_video.youtube_id)
        ))
        # After downloading the video, annotate the database
        annotate_content_models_by_youtube_id(youtube_ids=[self.real_video.youtube_id])
        # Check that it's been marked available
        updated = get_content_item(content_id=self.real_video.id)
        logger.error(updated)
        self.assertTrue(updated['available'])

        # Adding in an unrelated test (becase we don't need database etc. for
        # this to be tested.
        self.assertEqual(
            get_local_video_size("/bogus/path", default=123),
            123
        )
Esempio n. 6
0
 def setUp(self):
     UpdatesTestCase.setUp(self)
     delete_downloaded_files(self.real_video.youtube_id)
     annotate_content_models_by_youtube_id(youtube_ids=[self.real_video.youtube_id])
     updated = get_content_item(content_id=self.real_video.id)
     self.assertFalse(updated['available'])
Esempio n. 7
0
    def handle(self, *args, **options):
        self.setup(options)
        self.video = {}

        handled_youtube_ids = []  # stored to deal with caching
        failed_youtube_ids = []  # stored to avoid requerying failures.

        set_process_priority.lowest(logging=settings.LOG)

        try:
            while True:
                # loop until the method is aborted
                # Grab any video that hasn't been tried yet

                video_queue = VideoQueue()

                video_count = video_queue.count()
                if video_count == 0:
                    self.stdout.write(_("Nothing to download; exiting.") + "\n")
                    break

                # Grab a video as OURS to handle, set fields to indicate to others that we're on it!
                # Update the video logging
                video = video_queue.next()

                video["download_in_progress"] = True
                video["percent_complete"] = 0
                self.stdout.write((_("Downloading video '%(youtube_id)s'...") + "\n") % {"youtube_id": video.get("youtube_id")})

                # Update the progress logging
                self.set_stages(num_stages=video_count + len(handled_youtube_ids) + len(failed_youtube_ids) + int(options["auto_cache"]))
                if not self.started():
                    self.start(stage_name=video.get("youtube_id"))

                # Initiate the download process
                try:

                    progress_callback = partial(self.download_progress_callback, video)

                    # Don't try to download a file that already exists in the content dir - just say it was successful
                    # and call it a day!
                    if not os.path.exists(os.path.join(settings.CONTENT_ROOT, "{id}.mp4".format(id=video.get("youtube_id")))):

                        try:
                            # Download via urllib
                            download_video(video.get("youtube_id"), callback=progress_callback)

                        except URLNotFound:
                            # Video was not found on amazon cloud service,
                            #   either due to a KA mistake, or due to the fact
                            #   that it's a dubbed video.
                            #
                            # We can use youtube-dl to get that video!!
                            logging.debug(_("Retrieving youtube video %(youtube_id)s via youtube-dl") % {"youtube_id": video.get("youtube_id")})

                            def youtube_dl_cb(stats, progress_callback, *args, **kwargs):
                                if stats['status'] == "finished":
                                    percent = 100.
                                elif stats['status'] == "downloading":
                                    percent = 100. * stats['downloaded_bytes'] / stats['total_bytes']
                                else:
                                    percent = 0.
                                progress_callback(percent=percent)
                            scrape_video(video.get("youtube_id"), quiet=not settings.DEBUG, callback=partial(youtube_dl_cb, progress_callback=progress_callback))

                        except IOError as e:
                            logging.exception(e)
                            failed_youtube_ids.append(video.get("youtube_id"))
                            video_queue.remove_file(video.get("youtube_id"))
                            time.sleep(10)
                            continue

                    # If we got here, we downloaded ... somehow :)
                    handled_youtube_ids.append(video.get("youtube_id"))
                    video_queue.remove_file(video.get("youtube_id"))
                    self.stdout.write(_("Download is complete!") + "\n")

                    annotate_content_models_by_youtube_id(youtube_ids=[video.get("youtube_id")], language=video.get("language"))

                except DownloadCancelled:
                    # Cancellation event
                    video_queue.clear()
                    failed_youtube_ids.append(video.get("youtube_id"))

                except Exception as e:
                    # On error, report the error, mark the video as not downloaded,
                    #   and allow the loop to try other videos.
                    msg = _("Error in downloading %(youtube_id)s: %(error_msg)s") % {"youtube_id": video.get("youtube_id"), "error_msg": unicode(e)}
                    self.stderr.write("%s\n" % msg)

                    # If a connection error, we should retry.
                    if isinstance(e, DownloadError):
                        connection_error = "[Errno 8]" in e.args[0]
                    elif isinstance(e, IOError) and hasattr(e, "strerror"):
                        connection_error = e.strerror[0] == 8
                    else:
                        connection_error = False

                    # Rather than getting stuck on one video, continue to the next video.
                    self.update_stage(stage_status="error", notes=_("%(error_msg)s; continuing to next video.") % {"error_msg": msg})
                    failed_youtube_ids.append(video.get("youtube_id"))
                    video_queue.remove_file(video.get("youtube_id"))
                    continue

            # Update
            self.complete(notes=_("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully.") % {
                "num_handled_videos": len(handled_youtube_ids),
                "num_total_videos": len(handled_youtube_ids) + len(failed_youtube_ids),
            })

        except Exception as e:
            self.cancel(stage_status="error", notes=_("Error: %(error_msg)s") % {"error_msg": e})
            raise
Esempio n. 8
0
    def handle(self, *args, **options):
        self.setup(options)
        self.video = {}

        handled_youtube_ids = []  # stored to deal with caching
        failed_youtube_ids = []  # stored to avoid requerying failures.

        set_process_priority.lowest(logging=logger)

        try:
            while True:
                # loop until the method is aborted
                # Grab any video that hasn't been tried yet

                video_queue = VideoQueue()

                video_count = video_queue.count()
                if video_count == 0:
                    self.stdout.write(_("Nothing to download; exiting.") + "\n")
                    break

                # Grab a video as OURS to handle, set fields to indicate to others that we're on it!
                # Update the video logging
                video = video_queue.next()

                video["download_in_progress"] = True
                video["percent_complete"] = 0
                self.stdout.write((_("Downloading video '%(youtube_id)s'...") + "\n") % {"youtube_id": video.get("youtube_id")})

                # Update the progress logging
                self.set_stages(num_stages=video_count + len(handled_youtube_ids) + len(failed_youtube_ids) + int(options["auto_cache"]))
                if not self.started():
                    self.start(stage_name=video.get("youtube_id"))

                # Initiate the download process
                try:

                    progress_callback = partial(self.download_progress_callback, video)

                    # Don't try to download a file that already exists in the content dir - just say it was successful
                    # and call it a day!
                    if not os.path.exists(os.path.join(settings.CONTENT_ROOT, "{id}.mp4".format(id=video.get("youtube_id")))):

                        retries = 0
                        while True:
                            try:
                                download_video(video.get("youtube_id"), callback=progress_callback)
                                break
                            except (socket.timeout, ConnectionError):
                                retries += 1
                                msg = _(
                                    "Pausing download for '{title}', failed {failcnt} times, sleeping for 30s, retry number {retries}"
                                ).format(
                                    title=video.get("title"),
                                    failcnt=DOWNLOAD_MAX_RETRIES,
                                    retries=retries,
                                )
                                try:
                                    self.update_stage(
                                        stage_name=video.get("youtube_id"),
                                        stage_percent=0.,
                                        notes=msg
                                    )
                                except AssertionError:
                                    # Raised by update_stage when the video
                                    # download job has ended
                                    raise DownloadCancelled()
                                logger.info(msg)
                                time.sleep(30)
                                continue

                    # If we got here, we downloaded ... somehow :)
                    handled_youtube_ids.append(video.get("youtube_id"))
                    
                    # Remove from item from the queue
                    video_queue.remove_file(video.get("youtube_id"))
                    self.stdout.write(_("Download is complete!") + "\n")

                    annotate_content_models_by_youtube_id(youtube_ids=[video.get("youtube_id")], language=video.get("language"))

                except DownloadCancelled:
                    video_queue.clear()
                    failed_youtube_ids.append(video.get("youtube_id"))
                    break

                except (HTTPError, Exception) as e:
                    # Rather than getting stuck on one video,
                    # completely remove this item from the queue
                    failed_youtube_ids.append(video.get("youtube_id"))
                    video_queue.remove_file(video.get("youtube_id"))
                    logger.exception(e)

                    if getattr(e, "response", None):
                        reason = _(
                            "Got non-OK HTTP status: {status}"
                        ).format(
                            status=e.response.status_code
                        )
                    else:
                        reason = _(
                            "Unhandled request exception: "
                            "{exception}"
                        ).format(
                            exception=str(e),
                        )
                    msg = _(
                        "Skipping '{title}', reason: {reason}"
                    ).format(
                        title=video.get('title'),
                        reason=reason,
                    )
                    # Inform the user of this problem
                    self.update_stage(
                        stage_name=video.get("youtube_id"),
                        stage_percent=0.,
                        notes=msg
                    )
                    logger.info(msg)
                    continue

            # Update
            self.complete(notes=_("Downloaded %(num_handled_videos)s of %(num_total_videos)s videos successfully.") % {
                "num_handled_videos": len(handled_youtube_ids),
                "num_total_videos": len(handled_youtube_ids) + len(failed_youtube_ids),
            })

        except Exception as e:
            logger.exception(e)
            self.cancel(stage_status="error", notes=_("Error: %(error_msg)s") % {"error_msg": e})
            raise