예제 #1
0
def convert_and_get_metadata(source_file_path, dest_file_path,
                             ffmpeg_base_args, vernon_id, file_type, title):
    if os.path.exists(dest_file_path):
        message = "Cancelling video conversion: " + dest_file_path + " already exists."
        logging.warning(message)
        post_slack_message(message)
        return

    with tempfile.TemporaryDirectory() as tmp_folder:
        tmp_path = os.path.join(tmp_folder, os.path.basename(dest_file_path))
        ffmpeg_args = ["ffmpeg", '-i', source_file_path
                       ] + ffmpeg_base_args + [tmp_path]
        cmd_str = " ".join(ffmpeg_args)
        logging.info("Running " + cmd_str)
        r = subprocess.run(ffmpeg_args, check=True)
        fixity_move(tmp_path, dest_file_path, failsafe_folder=None)
        logging.info("Conversion complete: " + dest_file_path)

    metadata = get_video_metadata(dest_file_path)
    with open(dest_file_path + ".json", 'w') as f:
        json.dump(metadata, f, indent=2, default=str)
    metadata.update({
        'vernon_id': vernon_id,
        'filetype': file_type,
        'title': title
    })
    write_metadata_summary_entry(metadata)
    new_file_slack_message("*New file* :hatching_chick:", dest_file_path,
                           seconds_to_hms(metadata['duration_secs']))

    return metadata
예제 #2
0
def main():
    # LOOK FOR VIDEO FILES TO CONVERT
    logging.info("Looking for video files to convert...")
    logging.info("settings.WATCH_FOLDER: %s." % settings.WATCH_FOLDER)
    source_file_path = find_video_file(settings.WATCH_FOLDER)
    if not source_file_path:
        logging.info("No files found. Waiting 1hr.\n")
        time.sleep(3600)
        return
    logging.info("source_file_path: %s" % source_file_path)
    logging.info("Looking for video files to convert... DONE\n")

    # MAKE SURE WE HAVE THE DESTINATION FOLDERS
    try:
        logging.info("Making sure we have the destination folders...")
        master_filename = os.path.basename(source_file_path)
        master_basename = os.path.splitext(master_filename)[0]
        master_re_match = re.match(r"([a-zA-Z0-9]+)_([ma][a-z]\d\d)_(.+)",
                                   master_basename)

        try:
            vernon_id, master_file_type, title = master_re_match.groups()
            assert master_file_type[0] == "m"
        except:
            if os.getenv('FLEXIBLE_MASTER_NAMING', False) == 'True':
                vernon_id = ""
                master_file_type = "m"
                title = master_basename
            else:
                raise ValueError(
                    "%s is not named like a collections preservation master file. Consider setting the environment variable FLEXIBLE_MASTER_NAMING=True."
                    % master_filename)

        access_file_type = "a%s" % master_file_type[1:]
        web_file_type = "w%s" % master_file_type[1:]
        vernon_id_str = vernon_id + "_" if vernon_id else ""
        access_filename = vernon_id_str + access_file_type + "_" + title + settings.ACCESS_FFMPEG_DESTINATION_EXT
        web_filename = vernon_id_str + web_file_type + "_" + title + settings.WEB_FFMPEG_DESTINATION_EXT
        destination_master_folder = settings.MASTER_FOLDER + vernon_id_str + title + '/'
        destination_access_folder = settings.ACCESS_FOLDER + vernon_id_str + title + '/'
        destination_web_folder = settings.WEB_FOLDER + vernon_id_str + title + '/'

        if not os.path.exists(destination_master_folder):
            os.mkdir(destination_master_folder)
        if not os.path.exists(destination_access_folder):
            os.mkdir(destination_access_folder)
        if settings.TRANSCODE_WEB_COPY:
            if not os.path.exists(destination_web_folder):
                os.mkdir(destination_web_folder)

        master_file_path = destination_master_folder + master_filename
        access_file_path = destination_access_folder + access_filename
        web_file_path = destination_web_folder + web_filename

        logging.info("master_file_path: %s" % master_file_path)
        logging.info("access_file_path: %s" % access_file_path)
        if settings.TRANSCODE_WEB_COPY:
            logging.info("web_file_path: %s" % web_file_path)

        logging.info("Making sure we have the destination folders... DONE\n")
    except Exception as e:
        return post_slack_exception(
            "Could not make sure we have the destination folders. There may be something funny with the file name: %s"
            % e)

    # HASH MASTER AND LOG METADATA
    try:
        logging.info("Hashing master and logging metadata...")
        generate_file_md5(source_file_path, store=True)
        master_metadata = get_video_metadata(source_file_path)
        master_metadata.update({
            'vernon_id': vernon_id,
            'filetype': master_file_type,
            'title': title
        })
        write_metadata_summary_entry(master_metadata)
        logging.info("Hashing master and logging metadata... DONE\n")
    except Exception as e:
        return post_slack_exception(
            "Couldn't hash master and log metadata: %s" % e)

    # UPDATE XOS WITH STUB VIDEO
    try:
        logging.info("Getting or creating XOS stub video...")
        asset_id = get_or_create_xos_stub_video({
            'title':
            master_filename + " NOT UPLOADED",
            'master_metadata':
            master_metadata
        })
        logging.info("Stub video django ID: %s" % asset_id)
        logging.info("Getting or creating XOS stub video... DONE\n")
    except Exception as e:
        return post_slack_exception("Couldn't update XOS: %s" % e)

    # CONVERT TO ACCESS AND WEB FORMATS
    if settings.EXHIBITIONS_TRANSCODER:
        # Transcoder settings for in-gallery exhibitions videos
        access_metadata, web_metadata = convert_to_exhibition_formats(
            source_file_path,
            access_file_path,
            access_file_type,
            web_file_path,
            web_file_type,
            vernon_id,
            title,
        )
    else:
        # Transcoder settings for collections videos
        access_metadata, web_metadata = convert_to_collection_formats(
            source_file_path,
            access_file_path,
            access_file_type,
            web_file_path,
            web_file_type,
            vernon_id,
            title,
        )

    # MOVE THE SOURCE FILE INTO THE MASTER FOLDER
    try:
        logging.info("Moving the source file into the master folder...")
        fixity_move(source_file_path,
                    master_file_path,
                    failsafe_folder=settings.OUTPUT_FOLDER)
        with open(master_file_path + ".json", 'w') as f:
            json.dump(master_metadata, f, indent=2, default=str)
        new_file_slack_message(
            "*New master file* :movie_camera:", master_file_path,
            seconds_to_hms(master_metadata['duration_secs']))
        logging.info("Moving the source file into the master folder... DONE\n")
    except Exception as e:
        return post_slack_exception(
            "Couldn't move the source file into the master folder: %s" % e)

    # UPLOAD THE ACCESS AND WEB FILES TO S3
    try:
        logging.info("Uploading access file to S3...")
        upload_to_s3(access_file_path)
        logging.info("Uploading access file to S3... DONE\n")
        if settings.TRANSCODE_WEB_COPY:
            logging.info("Uploading web file to S3...")
            upload_to_s3(web_file_path)
            shutil.rmtree(destination_web_folder)
            logging.info("Uploading web file to S3... DONE\n")
    except Exception as e:
        return post_slack_exception("%s Couldn't upload to S3" % e)

    # UPDATE XOS VIDEO URLS AND METADATA
    try:
        logging.info("Updating XOS video urls and metadata...")
        generate_file_md5(master_file_path, store=True)
        xos_asset_data = {
            'title': master_filename,
            'resource': os.path.basename(access_file_path),
            'access_metadata': json.dumps(access_metadata, default=str),
        }
        if settings.TRANSCODE_WEB_COPY:
            xos_asset_data.update({
                'web_resource':
                os.path.basename(web_file_path),
                'web_metadata':
                json.dumps(web_metadata, default=str)
            })
        update_xos_with_final_video(asset_id, xos_asset_data)
        logging.info("Updating XOS video urls and metadata... DONE\n")
    except Exception as e:
        return post_slack_exception(
            "%s Couldn't update XOS video urls and metadata" % e)

    unlock(source_file_path)
    logging.info("=" * 80)
예제 #3
0
 def test_frames_rounding_2(self):
     self.assertEqual(seconds_to_hms(65.99, output_frames=True), '01:06:00')
예제 #4
0
def get_video_metadata(video_location):
    """
    Use ffprobe to discern information about the video.

    :param video_location: URL or Path to video file. URLs that 30x redirect to a file are OK.
    :return: Dictionary of attributes.
    """

    ffprobe_args = [
        "ffprobe", "-v", "quiet", "-print_format", "json", "-show_format",
        "-show_streams", video_location
    ]
    try:
        command = " ".join(ffprobe_args)
        logging.info("Running %s" % command)
        cmd = subprocess.run(ffprobe_args, stdout=subprocess.PIPE, check=True)
        out = cmd.stdout
        m = json.loads(out.decode('utf-8'))
    except subprocess.CalledProcessError as e:
        raise FFMPEGError(e.returncode, ffprobe_args) from e

    # put the first stream of each type in the top-level, for straightforward property access via e.g. j['video']['width']
    for stream in m['streams']:
        if stream['codec_type'] not in m:
            m[stream['codec_type']] = stream

    m_video = m.get('video', {})
    m_audio = m.get('audio', {})

    frame_rate = m_video.get('avg_frame_rate', "0/1").split("/")
    video_frame_rate = int(frame_rate[0]) * 1.0 / int(frame_rate[1])

    # TODO: these are naive datetimes at the moment. Need to make them aware.
    # Use various techniques to get the creation date. If it's in the video metadata, use that, else use the
    # file/header.

    file_metadata = get_file_metadata(video_location)
    try:
        creation_datetime = parse_date(
            m['format']['tags'].get('creation_time'))
    except (KeyError, ValueError, TypeError):
        creation_datetime = file_metadata['creation_datetime']

    _, ext = os.path.splitext(video_location)

    with open('%s.md5' % video_location) as checksum_file:
        checksum = checksum_file.read()

    duration_hms = seconds_to_hms(float(m['format'].get('duration', 0.0)),
                                  always_include_hours=True,
                                  output_frames=True,
                                  framerate=video_frame_rate)

    return {
        'mime_type': VIDEO_MIME_TYPES.get(ext, None),
        'creation_datetime': str(creation_datetime),
        'file_size_bytes': file_metadata['file_size_bytes'],
        'duration_secs': float(m['format'].get('duration', 0.0)),
        'duration_hms': duration_hms,
        'overall_bit_rate': int(m['format'].get('bit_rate', 0)) or None,
        'video_codec': m_video.get('codec_name', None),
        'video_bit_rate': int(m_video.get('bit_rate', 0)) or None,
        'video_max_bit_rate': int(m_video.get('max_bit_rate', 0)) or None,
        'video_frame_rate': video_frame_rate,
        'width': m_video.get('width', None),  # int already
        'height': m_video.get('height', None),  # int already
        'audio_codec': m_audio.get('codec_name', None),
        'audio_channels': m_audio.get('channels', None),  # int
        'audio_sample_rate': int(m_audio.get('sample_rate', 0)) or None,
        'audio_bit_rate': int(m_audio.get('bit_rate', 0)) or None,
        'audio_max_bit_rate': int(m_audio.get('max_bit_rate', 0)) or None,
        'checksum': checksum,
    }
예제 #5
0
 def test_hours_and_frames(self):
     self.assertEqual(
         seconds_to_hms(72.5, always_include_hours=True, output_frames=True, framerate=30),
         '00:01:12:15',
     )
예제 #6
0
 def test_frames_rounding(self):
     self.assertEqual(seconds_to_hms(65.16, output_frames=True, framerate=25), '01:05:04')
예제 #7
0
 def test_frames_2(self):
     self.assertEqual(seconds_to_hms(72.5, output_frames=True, framerate=30), '01:12:15')
예제 #8
0
 def test_ignore_decimals(self):
     self.assertEqual(seconds_to_hms(72.5, decimal_places=2, output_frames=True), '01:12:12')
예제 #9
0
 def test_frames(self):
     self.assertEqual(seconds_to_hms(72.5, output_frames=True), '01:12:12')
예제 #10
0
 def test_secs_decimals_2(self):
     self.assertEqual(seconds_to_hms(72.5, decimal_places=2), '01:12.50')
예제 #11
0
 def test_always_include_hours(self):
     self.assertEqual(seconds_to_hms(75, always_include_hours=True), '00:01:15')
예제 #12
0
 def test_hours(self):
     self.assertEqual(seconds_to_hms(3600), '01:00:00')
예제 #13
0
 def test_secs_only(self):
     self.assertEqual(seconds_to_hms(75), '01:15')