def create_thumbnail(obj, dsid, tnid): # We receive a file and create a jpg thumbnail directory, file = get_datastream_as_file(obj, dsid, "tmp") # Make a thumbnail with convert r = subprocess.call(['convert', directory+'/'+file+'[0]', '-thumbnail', \ '%sx%s' % (tn_size[0], tn_size[1]), directory+'/'+tnid]) if r == 0: update_datastream(obj, tnid, directory+'/'+tnid, label='thumbnail', mimeType='image/jpeg') # this is necessary because we are using curl, and the library caches try: if (obj['TN'].label.split('/')[0] != 'image'): if(obj[dsid].mimeType.split('/')[0] == 'image'): update_datastream(obj, 'TN', directory+'/'+tnid, label=obj[dsid].mimeType, mimeType='image/jpeg') except FedoraConnectionException: update_datastream(obj, 'TN', directory+'/'+tnid, label=obj[dsid].mimeType, mimeType='image/jpeg') else : logging.warning('PID:%s DSID:%s Thumbnail creation failed (return code:%d).' % (obj.pid, dsid, r)) #if 'TN' not in obj: # for ds in obj: # print ds # update_datastream(obj, 'TN', directory+'/'+tnid, label=obj[dsid].mimeType, mimeType='image/jpeg') #elif (obj[dsid].mimeType.split('/')[0] == 'image') and (obj['TN'].label.split('/')[0] != 'image'): # update_datastream(obj, 'TN', directory+'/'+tnid, label=obj[dsid].mimeType, mimeType='image/jpeg') logging.debug(directory) logging.debug(file) logging.debug(tnid) logging.debug(os.listdir(directory)) rmtree(directory, ignore_errors=True) return r
def do_abbyy_ocr(obj, dsid): # Download a datastream as a temp file and get its location and filename. directory, file = get_datastream_as_file(obj, dsid, 'tiff') # ABBYY must be run while you are in the CLI directory. oldpwd = os.getcwd() os.chdir(abbyy_cli_home) txtfile = "%(dir)s/tmpfile.txt" % {'dir': directory} pdffile = "%(dir)s/tmpfile.pdf" % {'dir': directory} xmlfile = "%(dir)s/tmpfile.xml" % {'dir': directory} r = subprocess.call(["./CLI", "-ics", "-if", "%(dir)s/%(file)s" % {'dir': directory, 'file': file}, "-f", "PDF", "-pem", "ImageOnText", "-pfpf", "Automatic", "-pfq", "90", "-pfpr", "150", "-of", pdffile, "-f", "XML", "-xaca", "-of", xmlfile, "-f", "Text", "-tel", "-tpb", "-tet", "UTF8", "-of", txtfile]) logging.debug(os.listdir(directory)) if r == 0: if os.path.exists(txtfile): update_datastream(obj, 'OCR', txtfile, label='OCR Text', mimeType='text/plain') if os.path.exists(xmlfile): update_datastream(obj, 'ABBYY', xmlfile, label='FineReader XML data', mimeType='application/xml') if os.path.exists(pdffile): update_datastream(obj, 'PDF', pdffile, label='Page PDF', mimeType='application/pdf') else: logging.error("Error calling ABBYY FineReader CLI. Error code %(err)d." % {'err': r}) rmtree(directory, ignore_errors=True) # Go back to the prevsious working directory os.chdir(oldpwd)
def runRules(self, obj, dsid, body): logging.debug('runRules start') try: logging.info("pid:" + obj.pid + ", dsid:" + dsid) # is this a reschedule request? if dsid == '' and body.find('reschedule import') >= 0: dsid = 'tiff' if dsid == 'tiff': cwd = os.getcwd() tmpdir, tiff_file = get_datastream_as_file(obj, dsid, 'tiff') os.chdir(tmpdir) run_conversions(obj, tmpdir, tiff_file) and update_fedora(obj, tmpdir) rmtree(tmpdir, ignore_errors=True) os.chdir(cwd) else: logging.debug("ignoring dsid: " + dsid) # lets try catching anything, anything at all except: logging.error("an exception occurred: " + str(sys.exc_info()[0])) logging.debug('runRules end')
def transform_abbyy_xml(obj, dsid): directory, file = get_datastream_as_file(obj, dsid, 'abbyy') plugins_dir = os.path.dirname(__file__) r = subprocess.call(["java", "-jar", saxon_home+'/saxon9.jar', '-t', '-s:'+directory+'/'+file, '-xsl:'+plugins_dir+'/emic-simple-ABBYY2TEI.xsl' , '-o:'+directory+'/'+file+'_tei']) logging.debug(os.listdir(directory)) if r == 0: if os.path.exists(directory+'/'+file+'_tei'): update_datastream(obj, 'TEI', directory+'/'+file+'_tei', label='PAGE LEVEL TEI', mimeType='text/xml') else: logging.error("Error calling saxon" % {'err': r})
def create_ogg(obj, dsid, oggid): #recieve a wav file create a OGG directory, file = get_datastream_as_file(obj, dsid, "wav") # Make OGG with ffmpeg r = subprocess.call(['ffmpeg', '-i', directory+'/'+file, '-acodec', 'libvorbis', '-ab', '48k', directory+'/'+oggid]) if r == 0: update_datastream(obj, oggid, directory+'/'+oggid, label='compressed to ogg', mimeType='audio/ogg') else: logging.warning('PID:%s DSID:%s OGG creation failed (ffmpeg return code:%d).' % (obj.pid, dsid, r)) rmtree(directory, ignore_errors=True) return r
def create_mp3(obj, dsid, mp3id): # We recieve a WAV file. Create a MP3 directory, file = get_datastream_as_file(obj, dsid, "wav") # Make MP3 with lame r = subprocess.call(['lame', '-mm', '--cbr', '-b48', directory+'/'+file, directory+'/'+mp3id]) if r == 0: update_datastream(obj, mp3id, directory+'/'+mp3id, label='compressed to mp3', mimeType='audio/mpeg') else: logging.warning('PID:%s DSID:%s MP3 creation failed (lame return code:%d).' % (obj.pid, dsid, r)) rmtree(directory, ignore_errors=True) return r
def create_jp2(obj, dsid): # We receive a TIFF and create a Lossless JPEG 2000 file from it. directory, file = get_datastream_as_file(obj, dsid, 'tiff') # Make a lossless JP2 # kdu_compress -i $i -o $bn.jp2 -rate -,0.5 Clayers=2 Creversible=yes Clevels=8 "Cprecincts={256,256},{256,256},{128,128}" Corder="RPCL" ORGgen_plt="yes" ORGtparts="R" Cblk="{32,32}" r = subprocess.call([ "kdu_compress", "-i", directory + '/' + file, "-o", directory + "/tmpfile_lossless.jp2", "-rate", "-,0.5", "Clayers=2", "Creversible=yes", "Clevels=8", "Cprecincts={256,256},{256,256},{128,128}", "Corder=RPCL", "ORGgen_plt=yes", "ORGtparts=R", "Cblk={32,32}" ]) if r == 0: update_datastream( obj, 'LosslessJP2', directory + '/tmpfile_lossless.jp2', label='Lossless JPEG2000', mimeType='image/jp2') r2 = subprocess.call([ "kdu_compress", "-i", directory + '/' + file, "-o", directory + "/tmpfile_lossy.jp2", "-rate", "0.5", "Clayers=1", "Clevels=7", "Cprecincts={256,256},{256,256},{256,256},{128,128},{128,128},{64,64},{64,64},{32,32},{16,16}", "Corder=RPCL", "ORGgen_plt=yes", "ORGtparts=R", "Cblk={32,32}", "Cuse_sop=yes" ]) if r2 == 0: update_datastream( obj, 'JP2', directory + '/tmpfile_lossy.jp2', label='Compressed JPEG2000', mimeType='image/jp2') r3 = subprocess.call([ "convert", "-thumbnail", "200x200", directory + '/' + file, directory + "/tmpfile_TN.jpg" ]) if r3 == 0: update_datastream( obj, 'TN', directory + '/tmpfile_TN.jpg', label='Thumbnail', mimeType='image/jpg') logging.debug(os.listdir(directory)) rmtree(directory, ignore_errors=True)
def create_jp2(obj, dsid): # We receive a TIFF and create a Lossless JPEG 2000 file from it. directory, file = get_datastream_as_file(obj, dsid, 'tiff') # Make a lossless JP2 # kdu_compress -i $i -o $bn.jp2 -rate -,0.5 Clayers=2 Creversible=yes Clevels=8 "Cprecincts={256,256},{256,256},{128,128}" Corder="RPCL" ORGgen_plt="yes" ORGtparts="R" Cblk="{32,32}" r = subprocess.call([ "kdu_compress", "-i", directory + '/' + file, "-o", directory + "/tmpfile_lossless.jp2", "-rate", "-,0.5", "Clayers=2", "Creversible=yes", "Clevels=8", "Cprecincts={256,256},{256,256},{128,128}", "Corder=RPCL", "ORGgen_plt=yes", "ORGtparts=R", "Cblk={32,32}" ]) if r == 0: update_datastream(obj, 'LosslessJP2', directory + '/tmpfile_lossless.jp2', label='Lossless JPEG2000', mimeType='image/jp2') r2 = subprocess.call([ "kdu_compress", "-i", directory + '/' + file, "-o", directory + "/tmpfile_lossy.jp2", "-rate", "0.5", "Clayers=1", "Clevels=7", "Cprecincts={256,256},{256,256},{256,256},{128,128},{128,128},{64,64},{64,64},{32,32},{16,16}", "Corder=RPCL", "ORGgen_plt=yes", "ORGtparts=R", "Cblk={32,32}", "Cuse_sop=yes" ]) if r2 == 0: update_datastream(obj, 'JP2', directory + '/tmpfile_lossy.jp2', label='Compressed JPEG2000', mimeType='image/jp2') r3 = subprocess.call([ "convert", "-thumbnail", "200x200", directory + '/' + file, directory + "/tmpfile_TN.jpg" ]) if r3 == 0: update_datastream(obj, 'TN', directory + '/tmpfile_TN.jpg', label='Thumbnail', mimeType='image/jpg') logging.debug(os.listdir(directory)) rmtree(directory, ignore_errors=True)
def create_swf(obj, dsid, swfid): #recieve PDF create a SWF for use with flexpaper directory, file = get_datastream_as_file(obj, dsid, "pdf") r = subprocess.call(['pdf2swf', directory+'/'+file, '-o', directory+'/'+swfid,\ '-T 9', '-f', '-t', '-s', 'storeallcharacters', '-G']) if r != 0: logging.warning('PID:%s DSID:%s SWF creation failed. Trying alternative.' % (obj.pid, dsid)) r = subprocess.call(['pdf2swf', directory+'/'+file, '-o', directory+'/'+swfid,\ '-T 9', '-f', '-t', '-s', 'storeallcharacters', '-G', '-s', 'poly2bitmap']) if r != 0: logging.warning('PID:%s DSID:%s SWF creation failed (pdf2swf return code:%d).' % (obj.pid, dsid, r)) if r == 0: update_datastream(obj, swfid, directory+'/'+swfid, label='pdf to swf', mimeType='application/x-shockwave-flash') rmtree(directory, ignore_errors=True) return r
def runRules(self, obj, dsid, body): logging.info("pid:" + obj.pid + ", dsid:" + dsid) # is this a reschedule request? if dsid == '' and body.find('reschedule import') >= 0: dsid = 'tiff' if dsid == 'tiff': try: tmpdir, tiff_file = get_datastream_as_file(obj, dsid, 'tiff') cwd = os.getcwd() os.chdir(tmpdir) run_conversions(obj, tmpdir, tiff_file) and update_fedora(obj, tmpdir) rmtree(tmpdir, ignore_errors=True) except Exception as e: logging.error("an exception occurred: " + str(e)) os.chdir(cwd) else: logging.debug("ignoring dsid: " + dsid)
def create_jp2(obj, dsid, jp2id): # We receive a TIFF and create a Lossless JPEG 2000 file from it. directory, file = get_datastream_as_file(obj, dsid, 'tiff') r = subprocess.call(["convert", directory+'/'+file, '+compress', directory+'/uncompressed.tiff']) if r != 0: logging.warning('PID:%s DSID:%s JP2 creation failed (convert return code:%d).' % (obj.pid, dsid, r)) rmtree(directory, ignore_errors=True) return r; r = subprocess.call(["kdu_compress", "-i", directory+'/uncompressed.tiff', "-o", directory+"/tmpfile_lossy.jp2",\ "-rate", "0.5", "Clayers=1", "Clevels=7",\ "Cprecincts={256,256},{256,256},{256,256},{128,128},{128,128},{64,64},{64,64},{32,32},{16,16}",\ "Corder=RPCL", "ORGgen_plt=yes", "ORGtparts=R", "Cblk={32,32}", "Cuse_sop=yes"]) if r != 0: logging.warning('PID:%s DSID:%s JP2 creation failed. Trying alternative.' % (obj.pid, dsid)) r = subprocess.call(["convert", directory+'/'+file, '-compress', 'JPEG2000', '-quality', '50%', directory+'/tmpfile_lossy.jp2']) if r != 0: logging.warning('PID:%s DSID:%s JP2 creation failed (kdu_compress return code:%d).' % (obj.pid, dsid, r)) if r == 0: update_datastream(obj, jp2id, directory+'/tmpfile_lossy.jp2', label='Compressed JPEG2000', mimeType='image/jp2') rmtree(directory, ignore_errors=True) return r