def convert_missing_downloads(max_videos, dryrun=False, missing_on_s3=False, language_channels=None): """Download from YouTube and use Zencoder to start converting any missing downloadable content into its appropriate downloadable format. """ videos_converted = 0 converted_formats = None if missing_on_s3: # With this option, videos that are missing in the S3 converted # bucket are converted. The API's download_urls is ignored. logger.info("Searching for videos that are missing from S3") formats_to_convert = s3.list_missing_converted_formats() legacy_mp4_videos = s3.list_legacy_mp4_videos() else: # With this option (the default), videos that are missing in the # API's download_urls are converted, if they do not already exist # on S3. Videos that are missing from S3, but present in the API's # download_urls, are ignored. logger.info( "Searching for videos that are missing from API download_urls") formats_to_convert = api.list_missing_video_content() converted_formats = s3.list_converted_formats() if language_channels: if language_channels[0] == 'all': channel_ids_set = lang_utils.video_ids_set() else: channel_ids_set = lang_utils.video_ids_set(language_channels) # Use converted_formats if already downloaded above, otherwise get it now converted_formats = converted_formats or s3.list_converted_formats() for vid_id in channel_ids_set: if vid_id not in converted_formats: formats_to_convert[vid_id] = DOWNLOADABLE_FORMATS for youtube_id, missing_formats in formats_to_convert.iteritems(): if videos_converted >= max_videos: logger.info("Stopping: max videos reached") break if "_DUP_" in youtube_id: logger.info( ("Skipping video {0} as it has invalid DUP in youtube ID" .format(youtube_id))) continue if missing_on_s3: # We already know the formats are missing from S3. formats_to_create = missing_formats if (youtube_id in legacy_mp4_videos and "mp4" in formats_to_create): if dryrun: logger.info( "Skipping copy of legacy content due to dryrun") else: s3.copy_legacy_content_to_new_location(youtube_id) formats_to_create.remove("mp4") else: # Don't recreate any formats that are already waiting on s3 # but are, for any reason, not known by the API. already_converted_still_unpublished = ( converted_formats[youtube_id] & missing_formats) if len(already_converted_still_unpublished) > 0: logger.info( "Video %s missing formats %s in API but they are " "already converted; use publish step" % (youtube_id, ",".join(already_converted_still_unpublished))) formats_to_create = ( missing_formats - already_converted_still_unpublished) if len(formats_to_create) == 0: continue logger.info("Starting conversion of %s into formats %s" % (youtube_id, ",".join(formats_to_create))) if dryrun: logger.info( "Skipping downloading and sending job to zencoder due to " "dryrun") else: s3_source_url = s3.get_or_create_unconverted_source_url( youtube_id) assert(s3_source_url) zencode.start_converting( youtube_id, s3_source_url, formats_to_create) videos_converted += 1
def convert_missing_downloads(max_videos, dryrun=False): """Download from YouTube and use Zencoder to start converting any missing downloadable content into its appropriate downloadable format. """ videos_converted = 0 error_ids = [] # With this option, videos that are missing in the S3 converted # bucket are converted. The API's download_urls is ignored. logger.info("Searching for videos that are missing from S3") formats_to_convert = s3.list_missing_converted_formats() legacy_mp4_videos = s3.list_legacy_mp4_videos() for youtube_id, missing_formats in formats_to_convert.iteritems(): if videos_converted >= max_videos: logger.info("Stopping: max videos reached") break if "_DUP_" in youtube_id: logger.info( ("Skipping video {0} as it has invalid DUP in youtube ID" .format(youtube_id))) continue # We already know the formats are missing from S3. formats_to_create = missing_formats if (youtube_id in legacy_mp4_videos and "mp4" in formats_to_create): if dryrun: logger.info( "Skipping copy of legacy content due to dryrun") else: s3.copy_legacy_content_to_new_location(youtube_id) formats_to_create.remove("mp4") if len(formats_to_create) == 0: continue logger.info("Starting conversion of %s into formats %s" % (youtube_id, ",".join(formats_to_create))) if dryrun: logger.info( "Skipping downloading and sending job to zencoder due to " "dryrun") videos_converted += 1 else: s3_source_url = s3.get_or_create_unconverted_source_url( youtube_id) if not s3_source_url: logger.warning("No S3 source URL created for %s; skipping" % youtube_id) error_ids.append(youtube_id) continue try: zencode.start_converting(youtube_id, s3_source_url, formats_to_create) videos_converted += 1 except Exception, why: logger.error('Skipping youtube_id "%s": %s' % (youtube_id, why)) error_ids.append(youtube_id)
def convert_missing_downloads(max_videos, dryrun=False): """Download from YouTube and use Zencoder to start converting any missing downloadable content into its appropriate downloadable format. """ videos_converted = 0 error_ids = [] # With this option, videos that are missing in the S3 converted # bucket are converted. The API's download_urls is ignored. logger.info("Searching for videos that are missing from S3") formats_to_convert = s3.list_missing_converted_formats() legacy_mp4_videos = s3.list_legacy_mp4_videos() for youtube_id, missing_formats in formats_to_convert.iteritems(): if videos_converted >= max_videos: logger.info("Stopping: max videos reached") break if "_DUP_" in youtube_id: logger.info( ("Skipping video {0} as it has invalid DUP in youtube ID". format(youtube_id))) continue # We already know the formats are missing from S3. formats_to_create = missing_formats if (youtube_id in legacy_mp4_videos and "mp4" in formats_to_create): if dryrun: logger.info( "Skipping copy of legacy content due to dryrun") else: s3.copy_legacy_content_to_new_location(youtube_id) formats_to_create.remove("mp4") if len(formats_to_create) == 0: continue logger.info("Starting conversion of %s into formats %s" % (youtube_id, ",".join(formats_to_create))) if dryrun: logger.info( "Skipping downloading and sending job to zencoder due to " "dryrun") videos_converted += 1 else: s3_source_url = s3.get_or_create_unconverted_source_url( youtube_id) if not s3_source_url: logger.warning( "No S3 source URL created for %s; skipping" % youtube_id) error_ids.append(youtube_id) continue try: zencode.start_converting(youtube_id, s3_source_url, formats_to_create) videos_converted += 1 except Exception, why: logger.error('Skipping youtube_id "%s": %s' % (youtube_id, why)) error_ids.append(youtube_id)