def convert_missing_downloads(max_videos, dryrun=False, missing_on_s3=False, language_channels=None): """Download from YouTube and use Zencoder to start converting any missing downloadable content into its appropriate downloadable format. """ videos_converted = 0 converted_formats = None if missing_on_s3: # With this option, videos that are missing in the S3 converted # bucket are converted. The API's download_urls is ignored. logger.info("Searching for videos that are missing from S3") formats_to_convert = s3.list_missing_converted_formats() legacy_mp4_videos = s3.list_legacy_mp4_videos() else: # With this option (the default), videos that are missing in the # API's download_urls are converted, if they do not already exist # on S3. Videos that are missing from S3, but present in the API's # download_urls, are ignored. logger.info( "Searching for videos that are missing from API download_urls") formats_to_convert = api.list_missing_video_content() converted_formats = s3.list_converted_formats() if language_channels: if language_channels[0] == 'all': channel_ids_set = lang_utils.video_ids_set() else: channel_ids_set = lang_utils.video_ids_set(language_channels) # Use converted_formats if already downloaded above, otherwise get it now converted_formats = converted_formats or s3.list_converted_formats() for vid_id in channel_ids_set: if vid_id not in converted_formats: formats_to_convert[vid_id] = DOWNLOADABLE_FORMATS for youtube_id, missing_formats in formats_to_convert.iteritems(): if videos_converted >= max_videos: logger.info("Stopping: max videos reached") break if "_DUP_" in youtube_id: logger.info( ("Skipping video {0} as it has invalid DUP in youtube ID" .format(youtube_id))) continue if missing_on_s3: # We already know the formats are missing from S3. formats_to_create = missing_formats if (youtube_id in legacy_mp4_videos and "mp4" in formats_to_create): if dryrun: logger.info( "Skipping copy of legacy content due to dryrun") else: s3.copy_legacy_content_to_new_location(youtube_id) formats_to_create.remove("mp4") else: # Don't recreate any formats that are already waiting on s3 # but are, for any reason, not known by the API. already_converted_still_unpublished = ( converted_formats[youtube_id] & missing_formats) if len(already_converted_still_unpublished) > 0: logger.info( "Video %s missing formats %s in API but they are " "already converted; use publish step" % (youtube_id, ",".join(already_converted_still_unpublished))) formats_to_create = ( missing_formats - already_converted_still_unpublished) if len(formats_to_create) == 0: continue logger.info("Starting conversion of %s into formats %s" % (youtube_id, ",".join(formats_to_create))) if dryrun: logger.info( "Skipping downloading and sending job to zencoder due to " "dryrun") else: s3_source_url = s3.get_or_create_unconverted_source_url( youtube_id) assert(s3_source_url) zencode.start_converting( youtube_id, s3_source_url, formats_to_create) videos_converted += 1
def publish_converted_videos(max_videos, dryrun=False, use_archive=True): logger.info( "Searching for downloadable content that needs to be " "published") publish_attempts = 0 converted_formats = s3.list_converted_formats() for youtube_id, missing_formats in (api.list_missing_video_content() .iteritems()): if publish_attempts >= max_videos: logger.info("Stopping: max videos reached") break converted_missing_formats = ( converted_formats[youtube_id] & missing_formats) unconverted_formats = missing_formats - converted_missing_formats if len(unconverted_formats) > 0: logger.info( "Video %s missing formats %s which are still " "unconverted, can't be published" % (youtube_id, ",".join(unconverted_formats))) # If no converted content waiting, just continue to next video if len(converted_missing_formats) == 0: continue if dryrun: logger.info( "Skipping publish for video {0} formats {1} due to dryrun" .format(youtube_id, ", ".join(converted_missing_formats))) else: if use_archive: if s3.upload_converted_to_archive( youtube_id, converted_missing_formats): logger.info("Successfully uploaded to archive.org") else: logger.error( "Unable to upload video {0} to archive.org" .format(youtube_id)) continue else: logger.info("Skipping upload to archive.org; assuming API " "points directly to S3 instead.") current_format_downloads = (api.video_metadata(youtube_id)[ "download_urls"] or {}) current_formats = set(current_format_downloads.keys()) new_formats = current_formats | converted_missing_formats if "mp4" in new_formats: # PNG thumbnails are generated as part of the MP4 # conversion process. If mp4 has been uploaded to # archive.org, png is guaranteed to be there as well. new_formats.add("png") if api.update_download_available(youtube_id, new_formats): logger.info( "Updated KA download_available, set to {0} for video " "{1}".format(", ".join(new_formats), youtube_id)) else: logger.error( "Unable to update KA download_available to {0} for " "youtube id {1}".format(", ".join(new_formats), youtube_id)) publish_attempts += 1