def convert_and_get_metadata(source_file_path, dest_file_path, ffmpeg_base_args, vernon_id, file_type, title): if os.path.exists(dest_file_path): message = "Cancelling video conversion: " + dest_file_path + " already exists." logging.warning(message) post_slack_message(message) return with tempfile.TemporaryDirectory() as tmp_folder: tmp_path = os.path.join(tmp_folder, os.path.basename(dest_file_path)) ffmpeg_args = ["ffmpeg", '-i', source_file_path ] + ffmpeg_base_args + [tmp_path] cmd_str = " ".join(ffmpeg_args) logging.info("Running " + cmd_str) r = subprocess.run(ffmpeg_args, check=True) fixity_move(tmp_path, dest_file_path, failsafe_folder=None) logging.info("Conversion complete: " + dest_file_path) metadata = get_video_metadata(dest_file_path) with open(dest_file_path + ".json", 'w') as f: json.dump(metadata, f, indent=2, default=str) metadata.update({ 'vernon_id': vernon_id, 'filetype': file_type, 'title': title }) write_metadata_summary_entry(metadata) new_file_slack_message("*New file* :hatching_chick:", dest_file_path, seconds_to_hms(metadata['duration_secs'])) return metadata
def main(): # LOOK FOR VIDEO FILES TO CONVERT logging.info("Looking for video files to convert...") logging.info("settings.WATCH_FOLDER: %s." % settings.WATCH_FOLDER) source_file_path = find_video_file(settings.WATCH_FOLDER) if not source_file_path: logging.info("No files found. Waiting 1hr.\n") time.sleep(3600) return logging.info("source_file_path: %s" % source_file_path) logging.info("Looking for video files to convert... DONE\n") # MAKE SURE WE HAVE THE DESTINATION FOLDERS try: logging.info("Making sure we have the destination folders...") master_filename = os.path.basename(source_file_path) master_basename = os.path.splitext(master_filename)[0] master_re_match = re.match(r"([a-zA-Z0-9]+)_([ma][a-z]\d\d)_(.+)", master_basename) try: vernon_id, master_file_type, title = master_re_match.groups() assert master_file_type[0] == "m" except: if os.getenv('FLEXIBLE_MASTER_NAMING', False) == 'True': vernon_id = "" master_file_type = "m" title = master_basename else: raise ValueError( "%s is not named like a collections preservation master file. Consider setting the environment variable FLEXIBLE_MASTER_NAMING=True." % master_filename) access_file_type = "a%s" % master_file_type[1:] web_file_type = "w%s" % master_file_type[1:] vernon_id_str = vernon_id + "_" if vernon_id else "" access_filename = vernon_id_str + access_file_type + "_" + title + settings.ACCESS_FFMPEG_DESTINATION_EXT web_filename = vernon_id_str + web_file_type + "_" + title + settings.WEB_FFMPEG_DESTINATION_EXT destination_master_folder = settings.MASTER_FOLDER + vernon_id_str + title + '/' destination_access_folder = settings.ACCESS_FOLDER + vernon_id_str + title + '/' destination_web_folder = settings.WEB_FOLDER + vernon_id_str + title + '/' if not os.path.exists(destination_master_folder): os.mkdir(destination_master_folder) if not os.path.exists(destination_access_folder): os.mkdir(destination_access_folder) if settings.TRANSCODE_WEB_COPY: if not os.path.exists(destination_web_folder): os.mkdir(destination_web_folder) master_file_path = destination_master_folder + master_filename access_file_path = destination_access_folder + access_filename web_file_path = destination_web_folder + web_filename logging.info("master_file_path: %s" % master_file_path) logging.info("access_file_path: %s" % access_file_path) if settings.TRANSCODE_WEB_COPY: logging.info("web_file_path: %s" % web_file_path) logging.info("Making sure we have the destination folders... DONE\n") except Exception as e: return post_slack_exception( "Could not make sure we have the destination folders. There may be something funny with the file name: %s" % e) # HASH MASTER AND LOG METADATA try: logging.info("Hashing master and logging metadata...") generate_file_md5(source_file_path, store=True) master_metadata = get_video_metadata(source_file_path) master_metadata.update({ 'vernon_id': vernon_id, 'filetype': master_file_type, 'title': title }) write_metadata_summary_entry(master_metadata) logging.info("Hashing master and logging metadata... DONE\n") except Exception as e: return post_slack_exception( "Couldn't hash master and log metadata: %s" % e) # UPDATE XOS WITH STUB VIDEO try: logging.info("Getting or creating XOS stub video...") asset_id = get_or_create_xos_stub_video({ 'title': master_filename + " NOT UPLOADED", 'master_metadata': master_metadata }) logging.info("Stub video django ID: %s" % asset_id) logging.info("Getting or creating XOS stub video... DONE\n") except Exception as e: return post_slack_exception("Couldn't update XOS: %s" % e) # CONVERT TO ACCESS AND WEB FORMATS if settings.EXHIBITIONS_TRANSCODER: # Transcoder settings for in-gallery exhibitions videos access_metadata, web_metadata = convert_to_exhibition_formats( source_file_path, access_file_path, access_file_type, web_file_path, web_file_type, vernon_id, title, ) else: # Transcoder settings for collections videos access_metadata, web_metadata = convert_to_collection_formats( source_file_path, access_file_path, access_file_type, web_file_path, web_file_type, vernon_id, title, ) # MOVE THE SOURCE FILE INTO THE MASTER FOLDER try: logging.info("Moving the source file into the master folder...") fixity_move(source_file_path, master_file_path, failsafe_folder=settings.OUTPUT_FOLDER) with open(master_file_path + ".json", 'w') as f: json.dump(master_metadata, f, indent=2, default=str) new_file_slack_message( "*New master file* :movie_camera:", master_file_path, seconds_to_hms(master_metadata['duration_secs'])) logging.info("Moving the source file into the master folder... DONE\n") except Exception as e: return post_slack_exception( "Couldn't move the source file into the master folder: %s" % e) # UPLOAD THE ACCESS AND WEB FILES TO S3 try: logging.info("Uploading access file to S3...") upload_to_s3(access_file_path) logging.info("Uploading access file to S3... DONE\n") if settings.TRANSCODE_WEB_COPY: logging.info("Uploading web file to S3...") upload_to_s3(web_file_path) shutil.rmtree(destination_web_folder) logging.info("Uploading web file to S3... DONE\n") except Exception as e: return post_slack_exception("%s Couldn't upload to S3" % e) # UPDATE XOS VIDEO URLS AND METADATA try: logging.info("Updating XOS video urls and metadata...") generate_file_md5(master_file_path, store=True) xos_asset_data = { 'title': master_filename, 'resource': os.path.basename(access_file_path), 'access_metadata': json.dumps(access_metadata, default=str), } if settings.TRANSCODE_WEB_COPY: xos_asset_data.update({ 'web_resource': os.path.basename(web_file_path), 'web_metadata': json.dumps(web_metadata, default=str) }) update_xos_with_final_video(asset_id, xos_asset_data) logging.info("Updating XOS video urls and metadata... DONE\n") except Exception as e: return post_slack_exception( "%s Couldn't update XOS video urls and metadata" % e) unlock(source_file_path) logging.info("=" * 80)
def test_frames_rounding_2(self): self.assertEqual(seconds_to_hms(65.99, output_frames=True), '01:06:00')
def get_video_metadata(video_location): """ Use ffprobe to discern information about the video. :param video_location: URL or Path to video file. URLs that 30x redirect to a file are OK. :return: Dictionary of attributes. """ ffprobe_args = [ "ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", video_location ] try: command = " ".join(ffprobe_args) logging.info("Running %s" % command) cmd = subprocess.run(ffprobe_args, stdout=subprocess.PIPE, check=True) out = cmd.stdout m = json.loads(out.decode('utf-8')) except subprocess.CalledProcessError as e: raise FFMPEGError(e.returncode, ffprobe_args) from e # put the first stream of each type in the top-level, for straightforward property access via e.g. j['video']['width'] for stream in m['streams']: if stream['codec_type'] not in m: m[stream['codec_type']] = stream m_video = m.get('video', {}) m_audio = m.get('audio', {}) frame_rate = m_video.get('avg_frame_rate', "0/1").split("/") video_frame_rate = int(frame_rate[0]) * 1.0 / int(frame_rate[1]) # TODO: these are naive datetimes at the moment. Need to make them aware. # Use various techniques to get the creation date. If it's in the video metadata, use that, else use the # file/header. file_metadata = get_file_metadata(video_location) try: creation_datetime = parse_date( m['format']['tags'].get('creation_time')) except (KeyError, ValueError, TypeError): creation_datetime = file_metadata['creation_datetime'] _, ext = os.path.splitext(video_location) with open('%s.md5' % video_location) as checksum_file: checksum = checksum_file.read() duration_hms = seconds_to_hms(float(m['format'].get('duration', 0.0)), always_include_hours=True, output_frames=True, framerate=video_frame_rate) return { 'mime_type': VIDEO_MIME_TYPES.get(ext, None), 'creation_datetime': str(creation_datetime), 'file_size_bytes': file_metadata['file_size_bytes'], 'duration_secs': float(m['format'].get('duration', 0.0)), 'duration_hms': duration_hms, 'overall_bit_rate': int(m['format'].get('bit_rate', 0)) or None, 'video_codec': m_video.get('codec_name', None), 'video_bit_rate': int(m_video.get('bit_rate', 0)) or None, 'video_max_bit_rate': int(m_video.get('max_bit_rate', 0)) or None, 'video_frame_rate': video_frame_rate, 'width': m_video.get('width', None), # int already 'height': m_video.get('height', None), # int already 'audio_codec': m_audio.get('codec_name', None), 'audio_channels': m_audio.get('channels', None), # int 'audio_sample_rate': int(m_audio.get('sample_rate', 0)) or None, 'audio_bit_rate': int(m_audio.get('bit_rate', 0)) or None, 'audio_max_bit_rate': int(m_audio.get('max_bit_rate', 0)) or None, 'checksum': checksum, }
def test_hours_and_frames(self): self.assertEqual( seconds_to_hms(72.5, always_include_hours=True, output_frames=True, framerate=30), '00:01:12:15', )
def test_frames_rounding(self): self.assertEqual(seconds_to_hms(65.16, output_frames=True, framerate=25), '01:05:04')
def test_frames_2(self): self.assertEqual(seconds_to_hms(72.5, output_frames=True, framerate=30), '01:12:15')
def test_ignore_decimals(self): self.assertEqual(seconds_to_hms(72.5, decimal_places=2, output_frames=True), '01:12:12')
def test_frames(self): self.assertEqual(seconds_to_hms(72.5, output_frames=True), '01:12:12')
def test_secs_decimals_2(self): self.assertEqual(seconds_to_hms(72.5, decimal_places=2), '01:12.50')
def test_always_include_hours(self): self.assertEqual(seconds_to_hms(75, always_include_hours=True), '00:01:15')
def test_hours(self): self.assertEqual(seconds_to_hms(3600), '01:00:00')
def test_secs_only(self): self.assertEqual(seconds_to_hms(75), '01:15')