Ejemplo n.º 1
0
def get_metadata(path, new_log_textfile):
    '''
    Recursively create mediainfos and mediatraces for AV files.
    This should probably go in ififuncs as it could be used by other scripts.
    '''
    mediainfo_version = 'mediainfo'
    try:
        mediainfo_version = subprocess.check_output([
            'mediainfo', '--Version'
        ]).rstrip()
    except subprocess.CalledProcessError as grepexc:
        mediainfo_version = grepexc.output.rstrip().splitlines()[1]
    for root, _, filenames in os.walk(path):
        for av_file in filenames:
            if av_file.endswith(
                    ('.mov', 'MP4', '.mp4', '.mkv', '.MXF', '.mxf', '.dv', '.DV')
            ):
                if av_file[0] != '.':
                    inputxml = "%s/%s_mediainfo.xml" % (
                        os.path.join(path, 'metadata'), os.path.basename(av_file)
                        )
                    inputtracexml = "%s/%s_mediatrace.xml" % (
                        os.path.join(path, 'metadata'), os.path.basename(av_file)
                        )
                    print 'Generating mediainfo xml of input file and saving it in %s' % inputxml
                    ififuncs.make_mediainfo(
                        inputxml, 'mediaxmlinput', os.path.join(root, av_file)
                    )
                    ififuncs.generate_log(
                        new_log_textfile,
                        'EVENT = Metadata extraction - eventDetail=Technical metadata extraction via mediainfo, eventOutcome=%s, agentName=%s' % (inputxml, mediainfo_version)
                    )
                    print 'Generating mediatrace xml of input file and saving it in %s' % inputtracexml
                    ififuncs.make_mediatrace(
                        inputtracexml,
                        'mediatracexmlinput',
                        os.path.join(root, av_file)
                    )
                    ififuncs.generate_log(
                        new_log_textfile,
                        'EVENT = Metadata extraction - eventDetail=Mediatrace technical metadata extraction via mediainfo, eventOutcome=%s, agentName=%s' % (inputtracexml, mediainfo_version)
                    )
            elif av_file.endswith(
                    ('.tif', 'tiff', '.doc', '.txt', '.docx', '.pdf', '.jpg', '.jpeg', '.png', '.rtf', '.xml', '.odt')
            ):
                blacklist = ('siegfried', 'exiftool', 'mediainfo', 'mediatrace')
                if av_file[0] != '.':
                    if any(word in blacklist for word in av_file):
                        exiftool_version = 'exiftool'
                        try:
                            exiftool_version = subprocess.check_output([
                                'exiftool', '-ver'
                            ])
                        except subprocess.CalledProcessError as grepexc:
                            exiftool_version = grepexc.output.rstrip().splitlines()[1]
                        siegfried_version = 'siegfried'
                        try:
                            siegfried_version = subprocess.check_output([
                                'sf', '-version'
                            ])
                        except subprocess.CalledProcessError as grepexc:
                            siegfried_version = grepexc.output.rstrip().splitlines()[1]
                        inputxml = "%s/%s_exiftool.xml" % (
                            os.path.join(path, 'metadata'), os.path.basename(av_file)
                            )
                        inputtracexml = "%s/%s_siegfried.json" % (
                            os.path.join(path, 'metadata'), os.path.basename(av_file)
                            )
                        ififuncs.make_siegfried(
                            inputtracexml,
                            os.path.join(root, av_file)
                        )
                        print 'Generating exiftool xml of input file and saving it in %s' % inputxml
                        ififuncs.generate_log(
                            new_log_textfile,
                            'EVENT = Metadata extraction - eventDetail=Technical metadata extraction via exiftool, eventOutcome=%s, agentName=%s' % (inputxml, exiftool_version)
                        )
                        print 'Generating mediatrace xml of input file and saving it in %s' % inputtracexml
                        ififuncs.make_exiftool(
                            inputxml,
                            os.path.join(root, av_file)
                        )
                        ififuncs.generate_log(
                            new_log_textfile,
                            'EVENT = Format identification - eventType=format identification, eventDetail=Format identification via PRONOM signatures using Siegfried, eventOutcome=%s, agentName=%s' % (inputtracexml, siegfried_version)
                        )
Ejemplo n.º 2
0
def get_metadata(path, new_log_textfile):
    '''
    Recursively create mediainfos and mediatraces for AV files.
    This should probably go in ififuncs as it could be used by other scripts.
    '''
    mediainfo_version = 'mediainfo'
    try:
        mediainfo_version = subprocess.check_output(['mediainfo',
                                                     '--Version']).rstrip()
    except subprocess.CalledProcessError as grepexc:
        mediainfo_version = grepexc.output.rstrip().splitlines()[1]
    for root, directories, filenames in os.walk(path):
        directories[:] = [d for d in directories if d != 'metadata']
        for av_file in filenames:
            if av_file.lower().endswith(
                ('.mov', 'MP4', '.mp4', '.mkv', '.MXF', '.mxf', '.dv', '.DV',
                 '.3gp', '.webm', '.swf', '.avi')):
                if av_file[0] != '.':
                    inputxml = "%s/%s_mediainfo.xml" % (os.path.join(
                        path, 'metadata'), os.path.basename(av_file))
                    inputtracexml = "%s/%s_mediatrace.xml" % (os.path.join(
                        path, 'metadata'), os.path.basename(av_file))
                    print 'Generating mediainfo xml of input file and saving it in %s' % inputxml
                    ififuncs.make_mediainfo(inputxml, 'mediaxmlinput',
                                            os.path.join(root, av_file))
                    ififuncs.generate_log(
                        new_log_textfile,
                        'EVENT = Metadata extraction - eventDetail=Technical metadata extraction via mediainfo, eventOutcome=%s, agentName=%s'
                        % (inputxml, mediainfo_version))
                    print 'Generating mediatrace xml of input file and saving it in %s' % inputtracexml
                    ififuncs.make_mediatrace(inputtracexml,
                                             'mediatracexmlinput',
                                             os.path.join(root, av_file))
                    ififuncs.generate_log(
                        new_log_textfile,
                        'EVENT = Metadata extraction - eventDetail=Mediatrace technical metadata extraction via mediainfo, eventOutcome=%s, agentName=%s'
                        % (inputtracexml, mediainfo_version))
            elif av_file.lower().endswith(
                ('.tif', 'tiff', '.doc', '.txt', '.docx', '.pdf', '.jpg',
                 '.jpeg', '.png', '.rtf', '.xml', '.odt', '.cr2', '.epub',
                 '.ppt', '.pptx', '.xls', '.xlsx', '.gif', '.bmp', '.csv')):
                if av_file[0] != '.':
                    if not av_file.lower().endswith(('.txt', '.csv')):
                        exiftool_version = 'exiftool'
                        try:
                            exiftool_version = subprocess.check_output(
                                ['exiftool', '-ver'])
                        except subprocess.CalledProcessError as grepexc:
                            exiftool_version = grepexc.output.rstrip(
                            ).splitlines()[1]
                        inputxml = "%s/%s_exiftool.json" % (os.path.join(
                            path, 'metadata'), os.path.basename(av_file))
                        ififuncs.generate_log(
                            new_log_textfile,
                            'EVENT = Metadata extraction - eventDetail=Technical metadata extraction via exiftool, eventOutcome=%s, agentName=%s'
                            % (inputxml, exiftool_version))
                        print 'Generating exiftool json of input file and saving it in %s' % inputxml
                        ififuncs.make_exiftool(inputxml,
                                               os.path.join(root, av_file))
                    siegfried_version = 'siegfried'
                    try:
                        siegfried_version = subprocess.check_output(
                            ['sf', '-version'])
                    except subprocess.CalledProcessError as grepexc:
                        siegfried_version = grepexc.output.rstrip().splitlines(
                        )[1]
                    inputtracexml = "%s/%s_siegfried.json" % (os.path.join(
                        path, 'metadata'), os.path.basename(av_file))
                    print 'Generating Siegfried json of input file and saving it in %s' % inputtracexml
                    ififuncs.make_siegfried(inputtracexml,
                                            os.path.join(root, av_file))
                    ififuncs.generate_log(
                        new_log_textfile,
                        'EVENT = Format identification - eventType=format identification, eventDetail=Format identification via PRONOM signatures using Siegfried, eventOutcome=%s, agentName=%s'
                        % (inputtracexml, siegfried_version))