コード例 #1
0
    # === STATIC DATASTREAM SECTION ===
    # these DS are defined here - sources are created as required

    # ingest my custom datastreams for this object
    # create a JP2 datastream
    tifFile = objectData['datastreams']['TIFF']
    baseName = os.path.splitext(os.path.basename(tifFile))[0]

    jp2File = os.path.join(config.tempDir, "%s.jp2" % baseName)
    converter.tif_to_jp2(tifFile, jp2File, 'default', 'default') # this will generate jp2File
    fedoraLib.update_datastream(obj, u"JP2", jp2File, label=os.path.basename(jp2File), mimeType=misc.getMimeType("jp2"))
    os.remove(jp2File) # finished with that

    # i'm generating my own thumbnails
    tnFile = os.path.join(config.tempDir, "tmp.jpg")
    converter.tif_to_jpg(tifFile, tnFile, imageMagicOpts='TN')
    #add a TN datastream to the map object
    fedoraLib.update_datastream(obj, u"TN", tnFile, label=unicode(config.myCollectionName+"_TN.jpg"), mimeType=misc.getMimeType("jpg"))
    os.remove(tnFile) # delete it so we can recreate it again for the next thumbnail
    # now tnFile is closed and deleted

    if config.jhoveCmd != None: # config.jhoveCmd will be empty if jhove extraction cannot be completed
        # extract mix metadata
        #cmd= jhove -h xml $INFILE | xsltproc jhove2mix.xslt - > `basename ${$INFILE%.*}.mix`
        mixFile = os.path.join(config.tempDir, "%s.mix.xml" % baseName)
        """ extract this into tif_to_mix() """
        outfile = open(mixFile, "w")
        jhoveCmd1 = ["jhove", "-h", "xml", tifFile]
        jhoveCmd2 = config.jhoveCmd
        p1 = subprocess.Popen(jhoveCmd1, stdout=subprocess.PIPE)
        p2 = subprocess.Popen(jhoveCmd2, stdin=p1.stdout, stdout=outfile)
コード例 #2
0
    pages = pageIndex.xpath("//mets:structMap/mets:div/mets:div/*", namespaces=nsmap)
    fullPageData = []
    for p in pages:
        label = p.attrib['LABEL']
        fileid = p.xpath("mets:fptr", namespaces=nsmap)[0].attrib['FILEID']
        filename = pageIndex.xpath("//mets:fileSec/mets:fileGrp/mets:file[@ID='%s']/mets:FLocat" % fileid, namespaces=nsmap)[0].attrib['{%s}href' % nsmap['xlink']]
        tup = (fileid, label, os.path.join(os.path.splitext(filename)[0], filename))
        fullPageData.append(tup)
    fullPageData.sort(key=lambda tup: tup[0])
    count = len(fullPageData)

    # book thumbnail is the first image
    if not config.dryrun:
        tnFile = os.path.join(config.tempDir, "tmp.jpg")
        converter.tif_to_jpg(os.path.join(bookFolder, fullPageData[0][2]), tnFile, imageMagicOpts='TN')
        #add a TN datastream to the book
        fedoraLib.update_datastream(bookObj, u"TN", tnFile, label=unicode(config.myCollectionName+"_TN.jpg"), mimeType=misc.getMimeType("jpg"))

    print("Build book object with %d pages" % count)

    baseName = objectData['label']

    fullPDF = os.path.join(config.tempDir, "%s.pdf" % baseName)

    for idx, pageset in enumerate(fullPageData):
        page = pageset[2]
        print("\n==========\nIngesting object %d of %d: %s" % (idx+1, count, page))

        basePage = os.path.splitext(os.path.basename(page))[0]