コード例 #1
0
def transform_abbyy_xml(obj, dsid):
    directory, file = get_datastream_as_file(obj, dsid, 'abbyy')
    plugins_dir = os.path.dirname(__file__) 
    r = subprocess.call(["java", "-jar", saxon_home+'/saxon9.jar', '-t', '-s:'+directory+'/'+file, '-xsl:'+plugins_dir+'/emic-simple-ABBYY2TEI.xsl' , '-o:'+directory+'/'+file+'_tei'])
    logging.debug(os.listdir(directory))
    if r == 0:
        if os.path.exists(directory+'/'+file+'_tei'):
            update_datastream(obj, 'TEI', directory+'/'+file+'_tei', label='PAGE LEVEL TEI', mimeType='text/xml')
    else:
        logging.error("Error calling saxon" % {'err': r})
コード例 #2
0
def create_ogg(obj, dsid, oggid):
    #recieve a wav file create a OGG
    directory, file = get_datastream_as_file(obj, dsid, "wav")
    
    # Make OGG with ffmpeg
    r = subprocess.call(['ffmpeg', '-i', directory+'/'+file, '-acodec', 'libvorbis', '-ab', '48k', directory+'/'+oggid])
    if r == 0:
        update_datastream(obj, oggid, directory+'/'+oggid, label='compressed to ogg', mimeType='audio/ogg')
    else:
        logging.warning('PID:%s DSID:%s OGG creation failed (ffmpeg return code:%d).' % (obj.pid, dsid, r))
    rmtree(directory, ignore_errors=True)
    return r
コード例 #3
0
def create_ogg(obj, dsid, oggid):
    #recieve a wav file create a OGG
    directory, file = get_datastream_as_file(obj, dsid, "wav")
    
    # Make OGG with ffmpeg
    r = subprocess.call(['ffmpeg', '-i', directory+'/'+file, '-acodec', 'libvorbis', '-ab', '48k', directory+'/'+oggid])
    if r == 0:
        update_datastream(obj, oggid, directory+'/'+oggid, label='compressed to ogg', mimeType='audio/ogg')
    else:
        logging.warning('PID:%s DSID:%s OGG creation failed (ffmpeg return code:%d).' % (obj.pid, dsid, r))
    rmtree(directory, ignore_errors=True)
    return r
コード例 #4
0
def create_mp3(obj, dsid, mp3id):
    # We recieve a WAV file. Create a MP3
    directory, file = get_datastream_as_file(obj, dsid, "wav")
    
    # Make MP3 with lame
    r = subprocess.call(['lame', '-mm', '--cbr', '-b48', directory+'/'+file, directory+'/'+mp3id])
    if r == 0:
      update_datastream(obj, mp3id, directory+'/'+mp3id, label='compressed to mp3', mimeType='audio/mpeg')
    else:
      logging.warning('PID:%s DSID:%s MP3 creation failed (lame return code:%d).' % (obj.pid, dsid, r))

    rmtree(directory, ignore_errors=True)
    return r
コード例 #5
0
def create_mp3(obj, dsid, mp3id):
    # We recieve a WAV file. Create a MP3
    directory, file = get_datastream_as_file(obj, dsid, "wav")
    
    # Make MP3 with lame
    r = subprocess.call(['lame', '-mm', '--cbr', '-b48', directory+'/'+file, directory+'/'+mp3id])
    if r == 0:
      update_datastream(obj, mp3id, directory+'/'+mp3id, label='compressed to mp3', mimeType='audio/mpeg')
    else:
      logging.warning('PID:%s DSID:%s MP3 creation failed (lame return code:%d).' % (obj.pid, dsid, r))

    rmtree(directory, ignore_errors=True)
    return r
コード例 #6
0
def create_swf(obj, dsid, swfid):
    #recieve PDF create a SWF for use with flexpaper
    directory, file = get_datastream_as_file(obj, dsid, "pdf")
    
    r = subprocess.call(['pdf2swf', directory+'/'+file, '-o', directory+'/'+swfid,\
         '-T 9', '-f', '-t', '-s', 'storeallcharacters', '-G'])
    if r != 0:
        logging.warning('PID:%s DSID:%s SWF creation failed. Trying alternative.' % (obj.pid, dsid))
        r = subprocess.call(['pdf2swf', directory+'/'+file, '-o', directory+'/'+swfid,\
             '-T 9', '-f', '-t', '-s', 'storeallcharacters', '-G', '-s', 'poly2bitmap'])
        if r != 0:
            logging.warning('PID:%s DSID:%s SWF creation failed (pdf2swf return code:%d).' % (obj.pid, dsid, r))

    if r == 0:
        update_datastream(obj, swfid, directory+'/'+swfid, label='pdf to swf', mimeType='application/x-shockwave-flash')

    rmtree(directory, ignore_errors=True)
    return r
コード例 #7
0
def create_swf(obj, dsid, swfid):
    #recieve PDF create a SWF for use with flexpaper
    directory, file = get_datastream_as_file(obj, dsid, "pdf")
    
    r = subprocess.call(['pdf2swf', directory+'/'+file, '-o', directory+'/'+swfid,\
         '-T 9', '-f', '-t', '-s', 'storeallcharacters', '-G'])
    if r != 0:
        logging.warning('PID:%s DSID:%s SWF creation failed. Trying alternative.' % (obj.pid, dsid))
        r = subprocess.call(['pdf2swf', directory+'/'+file, '-o', directory+'/'+swfid,\
             '-T 9', '-f', '-t', '-s', 'storeallcharacters', '-G', '-s', 'poly2bitmap'])
        if r != 0:
            logging.warning('PID:%s DSID:%s SWF creation failed (pdf2swf return code:%d).' % (obj.pid, dsid, r))

    if r == 0:
        update_datastream(obj, swfid, directory+'/'+swfid, label='pdf to swf', mimeType='application/x-shockwave-flash')

    rmtree(directory, ignore_errors=True)
    return r
コード例 #8
0
def create_thumbnail(obj, dsid, tnid):
    # We receive a file and create a jpg thumbnail
    directory, file = get_datastream_as_file(obj, dsid, "tmp")
    
    # Make a thumbnail with convert
    r = subprocess.call(['convert', directory+'/'+file+'[0]', '-thumbnail', \
         '%sx%s' % (tn_size[0], tn_size[1]), directory+'/'+tnid])
   
    if r == 0:
        update_datastream(obj, tnid, directory+'/'+tnid, label='thumbnail', mimeType='image/jpeg')

        # this is necessary because we are using curl, and the library caches 
        try:
            if (obj['TN'].label.split('/')[0] != 'image'): 
                if(obj[dsid].mimeType.split('/')[0] == 'image'):
                    update_datastream(obj, 'TN', directory+'/'+tnid, label=obj[dsid].mimeType, mimeType='image/jpeg')
        except FedoraConnectionException:
            update_datastream(obj, 'TN', directory+'/'+tnid, label=obj[dsid].mimeType, mimeType='image/jpeg')
    else :
        logging.warning('PID:%s DSID:%s Thumbnail creation failed (return code:%d).' % (obj.pid, dsid, r))
        #if 'TN' not in obj:
        #    for ds in obj:
        #        print ds
        #    update_datastream(obj, 'TN', directory+'/'+tnid, label=obj[dsid].mimeType, mimeType='image/jpeg')
        #elif (obj[dsid].mimeType.split('/')[0] == 'image') and (obj['TN'].label.split('/')[0] != 'image'): 
        #    update_datastream(obj, 'TN', directory+'/'+tnid, label=obj[dsid].mimeType, mimeType='image/jpeg')
       
    logging.debug(directory)
    logging.debug(file)
    logging.debug(tnid)
    logging.debug(os.listdir(directory))

    rmtree(directory, ignore_errors=True)
    return r
コード例 #9
0
def do_abbyy_ocr(obj, dsid):
    # Download a datastream as a temp file and get its location and filename.
    directory, file = get_datastream_as_file(obj, dsid, 'tiff')
    
    # ABBYY must be run while you are in the CLI directory.
    oldpwd = os.getcwd()
    os.chdir(abbyy_cli_home)
    txtfile = "%(dir)s/tmpfile.txt" % {'dir': directory}
    pdffile = "%(dir)s/tmpfile.pdf" % {'dir': directory}
    xmlfile = "%(dir)s/tmpfile.xml" % {'dir': directory}
    
    r = subprocess.call(["./CLI", "-ics", "-if", "%(dir)s/%(file)s" % {'dir': directory, 'file': file},
              "-f", "PDF", "-pem", "ImageOnText", "-pfpf", "Automatic", "-pfq", "90", "-pfpr", "150", "-of", pdffile,
              "-f", "XML", "-xaca", "-of", xmlfile, 
              "-f", "Text", "-tel", "-tpb", "-tet", "UTF8", "-of", txtfile])
    logging.debug(os.listdir(directory))
    if r == 0:
        if os.path.exists(txtfile):
            update_datastream(obj, 'OCR', txtfile, label='OCR Text', mimeType='text/plain')
        if os.path.exists(xmlfile):
            update_datastream(obj, 'ABBYY', xmlfile, label='FineReader XML data', mimeType='application/xml')
        if os.path.exists(pdffile):
            update_datastream(obj, 'PDF', pdffile, label='Page PDF', mimeType='application/pdf')
    else:
        logging.error("Error calling ABBYY FineReader CLI. Error code %(err)d." % {'err': r})
    
    rmtree(directory, ignore_errors=True)
    # Go back to the prevsious working directory
    os.chdir(oldpwd)
コード例 #10
0
def update_fedora_add_datastreams(obj, tmpdir):
    return (update_datastream(obj, 'tn', tmpdir + '/tmp.jpg', 'thumbnail image', 'image/jpeg') and
            update_datastream(obj, 'jp2', tmpdir + '/tmp.jp2', 'jp2 image', 'image/jp2') and
            update_datastream(obj, 'jp2lossless', tmpdir + '/tmp_lossless.jp2', 'jp2 image (lossless)', 'image/jp2') and
            update_datastream(obj, 'xml', tmpdir + '/tmp.xml', 'ocr xml', 'text/xml') and
            update_datastream(obj, 'text', tmpdir + '/tmp.txt', 'ocr text', 'text/plain') and
            update_datastream(obj, 'pdf', tmpdir + '/tmp.pdf', 'pdf', 'application/pdf'))
コード例 #11
0
def create_thumbnail(obj, dsid, tnid):
    # We receive a file and create a jpg thumbnail
    directory, file = get_datastream_as_file(obj, dsid, "tmp")
    
    # Make a thumbnail with convert
    r = subprocess.call(['convert', directory+'/'+file+'[0]', '-thumbnail', \
         '%sx%s' % (tn_size[0], tn_size[1]), directory+'/'+tnid])
   
    if r == 0:
        update_datastream(obj, tnid, directory+'/'+tnid, label='thumbnail', mimeType='image/jpeg')

        # this is necessary because we are using curl, and the library caches 
        try:
            if (obj['TN'].label.split('/')[0] != 'image'): 
                if(obj[dsid].mimeType.split('/')[0] == 'image'):
                    update_datastream(obj, 'TN', directory+'/'+tnid, label=obj[dsid].mimeType, mimeType='image/jpeg')
        except FedoraConnectionException:
            update_datastream(obj, 'TN', directory+'/'+tnid, label=obj[dsid].mimeType, mimeType='image/jpeg')
    else :
        logging.warning('PID:%s DSID:%s Thumbnail creation failed (return code:%d).' % (obj.pid, dsid, r))
        #if 'TN' not in obj:
        #    for ds in obj:
        #        print ds
        #    update_datastream(obj, 'TN', directory+'/'+tnid, label=obj[dsid].mimeType, mimeType='image/jpeg')
        #elif (obj[dsid].mimeType.split('/')[0] == 'image') and (obj['TN'].label.split('/')[0] != 'image'): 
        #    update_datastream(obj, 'TN', directory+'/'+tnid, label=obj[dsid].mimeType, mimeType='image/jpeg')
       
    logging.debug(directory)
    logging.debug(file)
    logging.debug(tnid)
    logging.debug(os.listdir(directory))

    rmtree(directory, ignore_errors=True)
    return r
コード例 #12
0
def create_jp2(obj, dsid, jp2id):
    # We receive a TIFF and create a Lossless JPEG 2000 file from it.
    directory, file = get_datastream_as_file(obj, dsid, 'tiff') 
    r = subprocess.call(["convert", directory+'/'+file, '+compress', directory+'/uncompressed.tiff'])
    if r != 0:
        logging.warning('PID:%s DSID:%s JP2 creation failed (convert return code:%d).' % (obj.pid, dsid, r))
        rmtree(directory, ignore_errors=True)
        return r;
    r = subprocess.call(["kdu_compress", "-i", directory+'/uncompressed.tiff', 
      "-o", directory+"/tmpfile_lossy.jp2",\
      "-rate", "0.5", "Clayers=1", "Clevels=7",\
      "Cprecincts={256,256},{256,256},{256,256},{128,128},{128,128},{64,64},{64,64},{32,32},{16,16}",\
      "Corder=RPCL", "ORGgen_plt=yes", "ORGtparts=R", "Cblk={32,32}", "Cuse_sop=yes"])
    if r != 0:
        logging.warning('PID:%s DSID:%s JP2 creation failed. Trying alternative.' % (obj.pid, dsid))
    	r = subprocess.call(["convert", directory+'/'+file, '-compress', 'JPEG2000', '-quality', '50%', directory+'/tmpfile_lossy.jp2'])
        if r != 0:
            logging.warning('PID:%s DSID:%s JP2 creation failed (kdu_compress return code:%d).' % (obj.pid, dsid, r))

    if r == 0:
        update_datastream(obj, jp2id, directory+'/tmpfile_lossy.jp2', label='Compressed JPEG2000', mimeType='image/jp2')

    rmtree(directory, ignore_errors=True)
    return r
コード例 #13
0
def create_jp2(obj, dsid, jp2id):
    # We receive a TIFF and create a Lossless JPEG 2000 file from it.
    directory, file = get_datastream_as_file(obj, dsid, 'tiff') 
    r = subprocess.call(["convert", directory+'/'+file, '+compress', directory+'/uncompressed.tiff'])
    if r != 0:
        logging.warning('PID:%s DSID:%s JP2 creation failed (convert return code:%d).' % (obj.pid, dsid, r))
        rmtree(directory, ignore_errors=True)
        return r;
    r = subprocess.call(["kdu_compress", "-i", directory+'/uncompressed.tiff', 
      "-o", directory+"/tmpfile_lossy.jp2",\
      "-rate", "0.5", "Clayers=1", "Clevels=7",\
      "Cprecincts={256,256},{256,256},{256,256},{128,128},{128,128},{64,64},{64,64},{32,32},{16,16}",\
      "Corder=RPCL", "ORGgen_plt=yes", "ORGtparts=R", "Cblk={32,32}", "Cuse_sop=yes"])
    if r != 0:
        logging.warning('PID:%s DSID:%s JP2 creation failed. Trying alternative.' % (obj.pid, dsid))
    	r = subprocess.call(["convert", directory+'/'+file, '-compress', 'JPEG2000', '-quality', '50%', directory+'/tmpfile_lossy.jp2'])
        if r != 0:
            logging.warning('PID:%s DSID:%s JP2 creation failed (kdu_compress return code:%d).' % (obj.pid, dsid, r))

    if r == 0:
        update_datastream(obj, jp2id, directory+'/tmpfile_lossy.jp2', label='Compressed JPEG2000', mimeType='image/jp2')

    rmtree(directory, ignore_errors=True)
    return r
コード例 #14
0
def create_jp2(obj, dsid):

    # We receive a TIFF and create a Lossless JPEG 2000 file from it.
    directory, file = get_datastream_as_file(obj, dsid, 'tiff')

    # Make a lossless JP2
    # kdu_compress -i $i -o $bn.jp2 -rate -,0.5 Clayers=2 Creversible=yes Clevels=8 "Cprecincts={256,256},{256,256},{128,128}" Corder="RPCL" ORGgen_plt="yes" ORGtparts="R" Cblk="{32,32}"
    r = subprocess.call([
        "kdu_compress", "-i", directory + '/' + file, "-o",
        directory + "/tmpfile_lossless.jp2", "-rate", "-,0.5", "Clayers=2",
        "Creversible=yes", "Clevels=8",
        "Cprecincts={256,256},{256,256},{128,128}", "Corder=RPCL",
        "ORGgen_plt=yes", "ORGtparts=R", "Cblk={32,32}"
    ])
    if r == 0:
        update_datastream(
            obj,
            'LosslessJP2',
            directory + '/tmpfile_lossless.jp2',
            label='Lossless JPEG2000',
            mimeType='image/jp2')
    r2 = subprocess.call([
        "kdu_compress", "-i", directory + '/' + file, "-o",
        directory + "/tmpfile_lossy.jp2", "-rate", "0.5", "Clayers=1",
        "Clevels=7",
        "Cprecincts={256,256},{256,256},{256,256},{128,128},{128,128},{64,64},{64,64},{32,32},{16,16}",
        "Corder=RPCL", "ORGgen_plt=yes", "ORGtparts=R", "Cblk={32,32}",
        "Cuse_sop=yes"
    ])
    if r2 == 0:
        update_datastream(
            obj,
            'JP2',
            directory + '/tmpfile_lossy.jp2',
            label='Compressed JPEG2000',
            mimeType='image/jp2')
    r3 = subprocess.call([
        "convert", "-thumbnail", "200x200", directory + '/' + file,
        directory + "/tmpfile_TN.jpg"
    ])
    if r3 == 0:
        update_datastream(
            obj,
            'TN',
            directory + '/tmpfile_TN.jpg',
            label='Thumbnail',
            mimeType='image/jpg')
    logging.debug(os.listdir(directory))
    rmtree(directory, ignore_errors=True)
コード例 #15
0
def create_jp2(obj, dsid):

    # We receive a TIFF and create a Lossless JPEG 2000 file from it.
    directory, file = get_datastream_as_file(obj, dsid, 'tiff')

    # Make a lossless JP2
    # kdu_compress -i $i -o $bn.jp2 -rate -,0.5 Clayers=2 Creversible=yes Clevels=8 "Cprecincts={256,256},{256,256},{128,128}" Corder="RPCL" ORGgen_plt="yes" ORGtparts="R" Cblk="{32,32}"
    r = subprocess.call([
        "kdu_compress", "-i", directory + '/' + file, "-o",
        directory + "/tmpfile_lossless.jp2", "-rate", "-,0.5", "Clayers=2",
        "Creversible=yes", "Clevels=8",
        "Cprecincts={256,256},{256,256},{128,128}", "Corder=RPCL",
        "ORGgen_plt=yes", "ORGtparts=R", "Cblk={32,32}"
    ])
    if r == 0:
        update_datastream(obj,
                          'LosslessJP2',
                          directory + '/tmpfile_lossless.jp2',
                          label='Lossless JPEG2000',
                          mimeType='image/jp2')
    r2 = subprocess.call([
        "kdu_compress", "-i", directory + '/' + file, "-o",
        directory + "/tmpfile_lossy.jp2", "-rate", "0.5", "Clayers=1",
        "Clevels=7",
        "Cprecincts={256,256},{256,256},{256,256},{128,128},{128,128},{64,64},{64,64},{32,32},{16,16}",
        "Corder=RPCL", "ORGgen_plt=yes", "ORGtparts=R", "Cblk={32,32}",
        "Cuse_sop=yes"
    ])
    if r2 == 0:
        update_datastream(obj,
                          'JP2',
                          directory + '/tmpfile_lossy.jp2',
                          label='Compressed JPEG2000',
                          mimeType='image/jp2')
    r3 = subprocess.call([
        "convert", "-thumbnail", "200x200", directory + '/' + file,
        directory + "/tmpfile_TN.jpg"
    ])
    if r3 == 0:
        update_datastream(obj,
                          'TN',
                          directory + '/tmpfile_TN.jpg',
                          label='Thumbnail',
                          mimeType='image/jpg')
    logging.debug(os.listdir(directory))
    rmtree(directory, ignore_errors=True)