Example #1
0
    def convert_missing_downloads(max_videos, dryrun=False, 
        missing_on_s3=False, language_channels=None):
        """Download from YouTube and use Zencoder to start converting any
        missing downloadable content into its appropriate downloadable format.
        """

        videos_converted = 0
        converted_formats = None

        if missing_on_s3:
            # With this option, videos that are missing in the S3 converted
            # bucket are converted. The API's download_urls is ignored.
            logger.info("Searching for videos that are missing from S3")
            formats_to_convert = s3.list_missing_converted_formats()
            legacy_mp4_videos = s3.list_legacy_mp4_videos()
        else:
            # With this option (the default), videos that are missing in the
            # API's download_urls are converted, if they do not already exist
            # on S3.  Videos that are missing from S3, but present in the API's
            # download_urls, are ignored.
            logger.info(
                "Searching for videos that are missing from API download_urls")
            formats_to_convert = api.list_missing_video_content()
            converted_formats = s3.list_converted_formats()

        if language_channels:
            if language_channels[0] == 'all':
                channel_ids_set = lang_utils.video_ids_set()
            else:
                channel_ids_set = lang_utils.video_ids_set(language_channels)
            # Use converted_formats if already downloaded above, otherwise get it now
            converted_formats = converted_formats or s3.list_converted_formats()
            for vid_id in channel_ids_set:
                if vid_id not in converted_formats:
                    formats_to_convert[vid_id] = DOWNLOADABLE_FORMATS

        for youtube_id, missing_formats in formats_to_convert.iteritems():
            if videos_converted >= max_videos:
                logger.info("Stopping: max videos reached")
                break

            if "_DUP_" in youtube_id:
                logger.info(
                    ("Skipping video {0} as it has invalid DUP in youtube ID"
                     .format(youtube_id)))
                continue

            if missing_on_s3:
                # We already know the formats are missing from S3.
                formats_to_create = missing_formats
                if (youtube_id in legacy_mp4_videos and
                        "mp4" in formats_to_create):
                    if dryrun:
                        logger.info(
                            "Skipping copy of legacy content due to dryrun")
                    else:
                        s3.copy_legacy_content_to_new_location(youtube_id)
                    formats_to_create.remove("mp4")
            else:
                # Don't recreate any formats that are already waiting on s3
                # but are, for any reason, not known by the API.
                already_converted_still_unpublished = (
                    converted_formats[youtube_id] & missing_formats)
                if len(already_converted_still_unpublished) > 0:
                    logger.info(
                        "Video %s missing formats %s in API but they are "
                        "already converted; use publish step" %
                        (youtube_id,
                        ",".join(already_converted_still_unpublished)))
                formats_to_create = (
                    missing_formats - already_converted_still_unpublished)

            if len(formats_to_create) == 0:
                continue

            logger.info("Starting conversion of %s into formats %s" %
                        (youtube_id, ",".join(formats_to_create)))

            if dryrun:
                logger.info(
                    "Skipping downloading and sending job to zencoder due to "
                    "dryrun")
            else:
                s3_source_url = s3.get_or_create_unconverted_source_url(
                    youtube_id)
                assert(s3_source_url)

                zencode.start_converting(
                    youtube_id, s3_source_url, formats_to_create)

            videos_converted += 1
Example #2
0
    def convert_missing_downloads(max_videos, dryrun=False):
        """Download from YouTube and use Zencoder to start converting any
        missing downloadable content into its appropriate downloadable format.
        """

        videos_converted = 0
        error_ids = []

        # With this option, videos that are missing in the S3 converted
        # bucket are converted. The API's download_urls is ignored.
        logger.info("Searching for videos that are missing from S3")
        formats_to_convert = s3.list_missing_converted_formats()
        legacy_mp4_videos = s3.list_legacy_mp4_videos()

        for youtube_id, missing_formats in formats_to_convert.iteritems():
            if videos_converted >= max_videos:
                logger.info("Stopping: max videos reached")
                break

            if "_DUP_" in youtube_id:
                logger.info(
                    ("Skipping video {0} as it has invalid DUP in youtube ID"
                     .format(youtube_id)))
                continue

            # We already know the formats are missing from S3.
            formats_to_create = missing_formats
            if (youtube_id in legacy_mp4_videos and
                    "mp4" in formats_to_create):
                if dryrun:
                    logger.info(
                        "Skipping copy of legacy content due to dryrun")
                else:
                    s3.copy_legacy_content_to_new_location(youtube_id)
                formats_to_create.remove("mp4")

            if len(formats_to_create) == 0:
                continue

            logger.info("Starting conversion of %s into formats %s" %
                        (youtube_id, ",".join(formats_to_create)))

            if dryrun:
                logger.info(
                    "Skipping downloading and sending job to zencoder due to "
                    "dryrun")
                videos_converted += 1
            else:
                s3_source_url = s3.get_or_create_unconverted_source_url(
                    youtube_id)
                if not s3_source_url:
                    logger.warning("No S3 source URL created for %s; skipping"
                                   % youtube_id)
                    error_ids.append(youtube_id)
                    continue

                try:
                    zencode.start_converting(youtube_id, s3_source_url,
                                             formats_to_create)
                    videos_converted += 1
                except Exception, why:
                    logger.error('Skipping youtube_id "%s": %s'
                                 % (youtube_id, why))
                    error_ids.append(youtube_id)
Example #3
0
    def convert_missing_downloads(max_videos, dryrun=False):
        """Download from YouTube and use Zencoder to start converting any
        missing downloadable content into its appropriate downloadable format.
        """

        videos_converted = 0
        error_ids = []

        # With this option, videos that are missing in the S3 converted
        # bucket are converted. The API's download_urls is ignored.
        logger.info("Searching for videos that are missing from S3")
        formats_to_convert = s3.list_missing_converted_formats()
        legacy_mp4_videos = s3.list_legacy_mp4_videos()

        for youtube_id, missing_formats in formats_to_convert.iteritems():
            if videos_converted >= max_videos:
                logger.info("Stopping: max videos reached")
                break

            if "_DUP_" in youtube_id:
                logger.info(
                    ("Skipping video {0} as it has invalid DUP in youtube ID".
                     format(youtube_id)))
                continue

            # We already know the formats are missing from S3.
            formats_to_create = missing_formats
            if (youtube_id in legacy_mp4_videos
                    and "mp4" in formats_to_create):
                if dryrun:
                    logger.info(
                        "Skipping copy of legacy content due to dryrun")
                else:
                    s3.copy_legacy_content_to_new_location(youtube_id)
                formats_to_create.remove("mp4")

            if len(formats_to_create) == 0:
                continue

            logger.info("Starting conversion of %s into formats %s" %
                        (youtube_id, ",".join(formats_to_create)))

            if dryrun:
                logger.info(
                    "Skipping downloading and sending job to zencoder due to "
                    "dryrun")
                videos_converted += 1
            else:
                s3_source_url = s3.get_or_create_unconverted_source_url(
                    youtube_id)
                if not s3_source_url:
                    logger.warning(
                        "No S3 source URL created for %s; skipping" %
                        youtube_id)
                    error_ids.append(youtube_id)
                    continue

                try:
                    zencode.start_converting(youtube_id, s3_source_url,
                                             formats_to_create)
                    videos_converted += 1
                except Exception, why:
                    logger.error('Skipping youtube_id "%s": %s' %
                                 (youtube_id, why))
                    error_ids.append(youtube_id)