def set_output_options(derivType, inputPath, outputDir): outputOptions = [] strict = ['-strict', '-2'] base = pymmFunctions.get_base(inputPath) baseMinusExtension = pymmFunctions.get_base(inputPath, 'baseMinusExtension') # make a delivery directory for a package that is based on the deriv type derivDeliv = os.path.join(outputDir, derivType) if not os.path.isdir(derivDeliv): print("Making a directory at " + derivDeliv) try: os.mkdir(os.path.join(outputDir, derivType)) except: print("couldn't make a dir at " + derivDeliv) if derivType == 'resourcespace': ext = 'mp4' outputOptions.extend(strict) outputFilePath = os.path.join(derivDeliv, baseMinusExtension + '_lrp.' + ext) outputOptions.append(outputFilePath) elif derivType == 'proresHQ': ext = 'mov' outputFilePath = os.path.join(derivDeliv, baseMinusExtension + '_proresHQ.' + ext) outputOptions.append(outputFilePath) else: print('~ ~ ~ ~ ~') # DO STUFF TO OTHER DERIV TYPES return outputOptions
def make_hashdeep_manifest(inputPath, _type): ''' given a directory, make a hashdeep manifest. chdir into target dir, make a manifest with relative paths, and get out. For the SIP manifest, this currently relies on a bagit-style tree to contain both the manifest and the package. proposal: also store the manifest as a blob (or as text?) in a db entry... yeah. ''' _uuid = pymmFunctions.get_base(inputPath) if _type == 'hashdeep': # there should be a child dir with the same name as inputPath target = os.path.join(inputPath, _uuid) if not os.path.isdir(target): print("the expected directory structure is not present.") # @logme return False elif _type == 'objects': # were in the 'real' SIP dir so look for a subdir called 'objects' target = os.path.join(inputPath, _uuid, 'objects') # we want to write the manifest to the metadata dir inputPath = os.path.join(inputPath, _uuid, 'metadata') if not os.path.isdir(target) or not os.path.isdir(inputPath): print("the expected directory structure is not present.") # @logme return False manifestPath = manifest_path(inputPath, _uuid, _type) # run hashdeep on the package command = ['hashdeep', '-rvvl', '-c', 'md5', '-W', manifestPath, '.'] # print(command) here = os.getcwd() os.chdir(target) manifest = subprocess.call(command, stdout=subprocess.PIPE) os.chdir(here) return manifestPath
def main(): config = pymmFunctions.read_config() args = set_args() requiredArgs = ['inputPath', 'destination'] inputPath = args.inputPath algorithm = args.algorithm removeOriginals = args.removeOriginals destination = args.destination loglevel = args.loglevel logDir = args.logDir now = pymmFunctions.timestamp('now') # Quit if there are required variables missing missingArgs = 0 for _arg in requiredArgs: if getattr(args, _arg) == None: print("CONFIGURATION PROBLEM:\n" "You forgot to set " + _arg + ". It is required.\n" "Try again, but set " + _arg + " with the flag --" + _arg + "\n") missingArgs += 1 if missingArgs > 0: sys.exit() # set up rsync log if loglevel == 'all': pymmLogpath = os.path.join(config['logging']['pymm_log_dir'], 'pymm_log.txt') # AT WHAT POINT WILL WE ACTUALLY WANT TO PYMMLOG A COPY? FINAL AIP XFER? try: rsyncLogpath = os.path.join( logDir, 'rsync_log_' + pymmFunctions.get_base(inputPath) + '_' + pymmFunctions.timestamp('now') + '.txt') except: print("there was a problem getting the rsync log path ....") rsyncLogpath = '' else: rsyncLogpath = '' # sniff what the input is dir_or_file = pymmFunctions.dir_or_file(inputPath) if dir_or_file == False: print("oy you've got big problems. " + inputPath + " is not a directory or a file. what is it? is it a ghost?") sys.exit() # copy the input according to its type elif dir_or_file == 'dir': # add trailing slash for rsync destination directory if not destination[-1] == '/': destination = destination + '/' copy_dir(inputPath, rsyncLogpath, destination) elif dir_or_file == 'file': copy_file(inputPath, rsyncLogpath, destination) else: print("o_O what is going on here? you up to something?") sys.exit()
def set_output_options(derivType, inputType, inputPath, outputDir): ''' Set the output filepath and its extension ''' outputOptions = {} # the ffmpeg docs say the strict flag is no longer required # for aac encoding in mp4 but I ran into issues without it, # so I'll keep it for now (7/2018) base = pymmFunctions.get_base(inputPath) baseMinusExtension = pymmFunctions.get_base(inputPath, 'baseMinusExtension') # make a delivery directory for a package that is based on the deriv type derivDeliv = os.path.join(outputDir, derivType) if not os.path.isdir(derivDeliv): print("Making a directory at " + derivDeliv) try: os.mkdir(os.path.join(outputDir, derivType)) except: print("couldn't make a dir at " + derivDeliv) if derivType == 'resourcespace': if inputType in ('VIDEO', 'sequence'): ext = 'mp4' outputOptions['-strict'] = '-2' elif inputType == 'AUDIO': ext = 'mp3' else: ext = 'mp4' print("F**K EVERYTHING: ERROR GETTING THE FILE TYPE.") outputFilePath = os.path.join(derivDeliv, baseMinusExtension + '_access.' + ext) elif derivType == 'proresHQ': ext = 'mov' outputFilePath = os.path.join(derivDeliv, baseMinusExtension + '_proresHQ.' + ext) else: print('~ ~ ~ ~ ~') # DO STUFF TO OTHER DERIV TYPES outputOptions = options_to_list(outputOptions) outputOptions.append(outputFilePath) return outputOptions
def make_frame_md5(inputPath,metadataDir): print('making frame md5') if not pymmFunctions.is_av(inputPath): # FUN FACT: YOU CAN RUN FFMPEG FRAMEMD5 ON A TEXT FILE!! print(inputPath+" IS NOT AN AV FILE SO WHY ARE YOU TRYING TO MAKE A FRAME MD5 REPORT?") return False else: md5File = pymmFunctions.get_base(inputPath)+"_frame-md5.txt" frameMd5Filepath = os.path.join(metadataDir,md5File) frameMd5Command = ['ffmpeg','-i',inputPath,'-f','framemd5',frameMd5Filepath] output = subprocess.Popen(frameMd5Command,stdout=subprocess.PIPE,stderr=subprocess.PIPE) try: out,err = output.communicate() if err: print(err.decode('utf-8')) return frameMd5Filepath except: return False
def get_mediainfo_report(inputPath, destination, _JSON=None, altFileName=None): # handle an exception for the way # DPX folders are named in processingVars if altFileName: basename = altFileName else: basename = pymmFunctions.get_base(inputPath) # write mediainfo output to a logfile if the destination is a directory .. if os.path.isdir(destination): if _JSON: outputType = "JSON" else: outputType = "XML" outputFilepath = '{}_mediainfo.xml'.format( os.path.join(destination, basename)) mediainfoOutput = '--LogFile={}'.format(outputFilepath) out = subprocess.run([ 'mediainfo', inputPath, '--Output={}'.format(outputType), mediainfoOutput ], stdout=subprocess.PIPE) mediainfoJSON = out.stdout.decode('utf-8') if _JSON: return mediainfoJSON else: return outputFilepath # ... otherwise pass something like '' as a destination # and just get the raw mediainfo output else: out = subprocess.run(['mediainfo', '--Output=JSON', inputPath], stdout=subprocess.PIPE) mediainfoJSON = out.stdout.decode('utf-8') # print(mediainfoJSON) if _JSON: return mediainfoJSON else: print("{} doesn't exist and you didn't say you " "want the raw mediainfo output.\n" "What do you want??".format(destination)) return False
def get_mediainfo_report(inputPath,destination,_JSON=False): basename = pymmFunctions.get_base(inputPath) # write mediainfo output to a logfile if the destination is a directory ... if os.path.isdir(destination): mediainfoOutput = '--LogFile='+os.path.join(destination,basename+'_mediainfo.xml') mediainfoXML = subprocess.Popen(['mediainfo',inputPath,'--Output=XML',mediainfoOutput],stdout=subprocess.PIPE) # yeah it's an OrderedDict, not JSON, but I will grab the Video track and Audio track as JSON later... mediainfoJSON = xmltodict.parse(mediainfoXML.communicate()[0]) if _JSON: return mediainfoJSON else: return True # ... otherwise pass something like '' as a destination and just get the raw mediainfo output else: mediainfoXML = subprocess.Popen(['mediainfo','--Output=XML',inputPath],stdout=subprocess.PIPE) mediainfoJSON = xmltodict.parse(mediainfoXML.communicate()[0]) if _JSON: return mediainfoJSON else: print(destination+" doesn't exist and you didn't say you want the raw mediainfo output.\n" "What do you want??") return False
def copy_file(inputPath, rsyncLogOptions, destination): # GET A HASH, RSYNC THE THING, GET A HASH OF THE DESTINATION FILE, CZECH THE TWO AND RETURN TRUE/FALSE # hashing redundant when using rsync.... # inputFileHash = hash_file(inputPath) destFilepath = os.path.join(destination, pymmFunctions.get_base(inputPath)) if not rsyncLogOptions == '': rsyncCommand = [ 'rsync', '-rtvPih', '--log-file=' + rsyncLogOptions, inputPath, destFilepath ] else: rsyncCommand = ['rsync', '-rtvPih', inputPath, destFilepath] # print(' '.join(rsyncCommand)) if pymmFunctions.get_system() in ('mac', 'linux'): try: subprocess.check_call(rsyncCommand, stderr=subprocess.PIPE) return True except subprocess.CalledProcessError as error: print("rsync failed?") print(error) return error else: print('go get a mac, my man.') return False
def main(): config = pymmFunctions.read_config() args = set_args() requiredArgs = ['inputPath', 'destination'] inputPath = args.inputPath movingSIP = args.movingSIP algorithm = args.algorithm removeOriginals = args.removeOriginals destination = args.destination loglevel = args.loglevel logDir = args.logDir useMV = args.useMV now = pymmFunctions.timestamp('now') # Quit if there are required variables missing missingArgs = 0 try: # see if the input/destination are on the same filesystem # if so, we will use mv rather than rsync for efficiency inputFS = pymmFunctions.get_filesystem_id(inputPath) destFS = pymmFunctions.get_filesystem_id(destination) print(inputFS, destFS) if inputFS == destFS: print("HEYYYY") sameFilesystem = True else: sameFilesystem = False except: sameFilesystem = False for _arg in requiredArgs: if getattr(args, _arg) == None: print("CONFIGURATION PROBLEM:\n" "You forgot to set {0}. It is required.\n" "Try again, but set {0} with the flag --{0}\n".format(_arg)) missingArgs += 1 if missingArgs > 0: sys.exit() if not movingSIP: # set up rsync log if loglevel == 'all': pymmLogpath = os.path.join(config['logging']['pymm_log_dir'], 'pymm_log.txt') try: rsyncLogPath = os.path.join( logDir, 'rsync_log_{}_{}.txt'.format( pymmFunctions.get_base(inputPath), pymmFunctions.timestamp('now'))) except: print("there was a problem getting the rsync log path ....") rsyncLogPath = '' else: rsyncLogPath = '.' # sniff what the input is dir_or_file = pymmFunctions.dir_or_file(inputPath) if dir_or_file == False: print( "oy you've got big problems. {} is not a directory or a file." " what is it? is it a ghost?".format(inputPath)) return False, False # sys.exit(1) # copy the input according to its type elif dir_or_file == 'dir': # add trailing slash for rsync destination directory if not destination[-1] == '/': destination = destination + '/' if not sameFilesystem == True: rsync_object(inputPath, rsyncLogPath, destination) else: mv_object(inputPath, destination) elif dir_or_file == 'file': if not sameFilesystem == True or useMV == False: rsync_object(inputPath, rsyncLogPath, destination) else: mv_object(inputPath, destination) else: print("o_O what is going on here? you up to something?") # sys.exit() else: stagedSIPpath, safe = move_n_verify_sip(inputPath, destination) print(stagedSIPpath) return stagedSIPpath, safe
def make_frame_md5(inputPath, metadataDir): print('making frame md5') print(inputPath) md5File = pymmFunctions.get_base(inputPath) + "_frame-md5.txt" frameMd5Filepath = os.path.join(metadataDir, md5File) av = pymmFunctions.is_av(inputPath) returnValue = False if not av: # FUN FACT: YOU CAN RUN FFMPEG FRAMEMD5 ON A TEXT FILE!! print("{} IS NOT AN AV FILE SO " "WHY ARE YOU TRYING TO MAKE " "A FRAME MD5 REPORT?".format(inputPath)) elif av == 'VIDEO': frameMd5Command = [ 'ffmpeg', '-i', inputPath, '-f', 'framemd5', frameMd5Filepath ] output = subprocess.Popen(frameMd5Command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) try: out, err = output.communicate() if err: # this output is captured in stderr for some reason print("FRAME MD5 CHA CHA CHA") # print(err.decode('utf-8')) returnValue = frameMd5Filepath except: print(out.decode()) elif av == 'AUDIO': sampleRate = pymmFunctions.get_audio_sample_rate(inputPath) frameMd5Command = [ 'ffmpeg', '-i', inputPath, '-af', 'asetnsamples=n={}'.format(sampleRate), '-f', 'framemd5', '-vn', frameMd5Filepath ] output = subprocess.run(frameMd5Command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # print(output.returncode) try: if output.returncode == 0: # print(output) print("FRAME MD5 CHA CHA CHA") returnValue = frameMd5Filepath except: print(output.stderr.decode()) elif av == 'DPX': pass ''' OK: FOOD FOR THOUGHT: IT TAKES EFFING FOREVER TO CALCULATE FRAMEMD5 VALUES FOR A DPX SEQUENCE. SLIGHTLY LONGER THAN THE HASHDEEP MANIFEST THAT WILL BE CREATED LATER. SO... SKIP FRAMEMD5 FOR DPX? SINCE WE ARE ALREADY CALCULATING A HASH MANIFEST LATER ON? MAYBE LATER GET A FUNCTION TO PARSE A HASH MANIFEST FOR THE FOLDER AND TURN IT INTO A ACTUALLY ON THE SOUPED UP LINUX SERVER THIS IS REALLY FAST. SO MAYBE RUN A BENCH MARK AND IF THE SYSTEM CAN HANDLE IT RUN THIS FUNCTION ''' # filePattern,startNumber,file0 = pymmFunctions.parse_sequence_folder(inputPath) # frameMd5Command = [ # 'ffmpeg', # '-start_number',startNumber, # '-i',filePattern, # '-f','framemd5', # frameMd5Filepath # ] # print(' '.join(frameMd5Command)) # output = subprocess.Popen( # frameMd5Command, # stdout=subprocess.PIPE, # stderr=subprocess.PIPE # ) # try: # out,err = output.communicate() # if err: # # this output is captured in stderr for some reason # print("FRAME MD5 CHA CHA CHA") # # print(err.decode('utf-8')) # returnValue = frameMd5Filepath # except: # print(out.decode()) return returnValue