Beispiel #1
0
def create_csv(obj, dsid='OBJ', derivativeid='CSV', args=[]):
    logger = logging.getLogger('islandoraUtils.DSConverter.create_csv')
    directory, file = get_datastream_as_file(obj, dsid, "document")
    in_file = directory + '/' + file
    process = subprocess.Popen(['xls2csv', '-x', in_file] + args,
                               stdout=subprocess.PIPE)
    output = process.communicate()[0]
    if process.returncode != 0:
        logger.warning(
            'PID:%s DSID:%s CSV creation failed (xls2csv return code:%d).' %
            (obj.pid, dsid, r))
    if process.returncode == 0:
        num_sheet = 0
        out_file = directory + '/' + 'csv.csv'
        logger.warning('Output: ' + output)
        sheets = output.split("\f")
        for sheet in sheets:
            if len(sheet) != 0:
                logger.warning('PID:%s DSID:%s CSV create sheet: %d.' %
                               (obj.pid, dsid, num_sheet))
                f = open(out_file, 'w')
                f.write(sheet)
                f.close()
                new_dsid = derivativeid + '_SHEET_' + str(
                    num_sheet) if num_sheet > 0 else derivativeid
                update_datastream(obj, new_dsid, out_file,
                                  'CSV Generated Metadata', 'text/csv')
                num_sheet += 1
    rmtree(directory, ignore_errors=True)
    return process.returncode
def handle_map_object(fedora_object, item):
    print '%s - handle map object' % (item.do_id,)
    # tiff image file
    tiff = workflow.core.models.Item_File.objects.get(item=item, use='MASTER')
    fedoraLib.update_datastream(fedora_object, 'TIFF', tiff.path, label=tiff.name, mimeType='image/tiff', controlGroup='M')
    handle_derived_jp2(fedora_object, tiff)
    return
Beispiel #3
0
def create_pdf(obj, dsid, pdfid):
    logger = logging.getLogger('islandoraUtils.DSConverter.create_pdf')
    #recieve document and create a PDF with libreoffice if possible
    directory, file = get_datastream_as_file(obj, dsid, "document")

    subprocess.call([
        'soffice', '--headless', '-convert-to', 'pdf', '-outdir', directory,
        directory + '/' + file
    ])
    newfile = file.split('.', 1)[0]
    newfile += '.pdf'

    if os.path.isfile(directory + '/' + newfile):
        update_datastream(obj,
                          pdfid,
                          directory + '/' + newfile,
                          label='doc to pdf',
                          mimeType='application/pdf')
        # we should probably be using true or false like normal python, but i stay consistant here
        value = 0
    else:
        value = 1
        logger.warning('PID:%s DSID:%s PDF creation failed.' % (obj.pid, dsid))

    logger.debug(os.listdir(directory))
    rmtree(directory, ignore_errors=True)
    return value
Beispiel #4
0
def create_swf(obj, dsid, swfid, args=None):
    logger = logging.getLogger('islandoraUtils.DSConverter.create_swf')
    directory, file = get_datastream_as_file(
        obj, dsid, "pdf")  #recieve PDF create a SWF for use with flexpaper
    program = [
        'pdf2swf', directory + '/' + file, '-o', directory + '/' + swfid
    ]
    if args == None:
        default_args = ['-T 9', '-f', '-t', '-s', 'storeallcharacters', '-G']
        pdf2swf = subprocess.Popen(program + default_args,
                                   stdout=subprocess.PIPE)
        out, err = pdf2swf.communicate()
        # try with additional arguments
        if pdf2swf.returncode != 0:
            logger.warning(
                'PID:%s DSID:%s SWF creation failed. Trying alternative.' %
                (obj.pid, dsid))
            extra_args = ['-s', 'poly2bitmap']
            pdf2swf = subprocess.Popen(program + default_args + extra_args,
                                       stdout=subprocess.PIPE)
            out, err = pdf2swf.communicate()
        # catch the case where PDF2SWF fails to create the file, but returns
        if pdf2swf.returncode == 0 and os.path.isfile(directory + '/' + swfid):
            update_datastream(obj,
                              swfid,
                              directory + '/' + swfid,
                              label='pdf to swf',
                              mimeType='application/x-shockwave-flash')
            r = 0
        elif not os.path.isfile(directory + '/' + swfid):
            logger.warning(
                'PID:%s DSID:%s SWF creation failed (pdf2swf returned: "%s").'
                % (obj.pid, dsid, out))
            r = 1
        else:
            logger.warning(
                'PID:%s DSID:%s SWF creation failed (pdf2swf return code:%d).'
                % (obj.pid, dsid, pdf2swf.returncode))
            r = pdf2swf.returncode
    else:
        r = subprocess.call(program + args)
        if r != 0:
            logger.warning(
                'PID:%s DSID:%s SWF creation failed (pdf 2swf return code:%d).'
                % (obj.pid, dsid, r))
        if r == 0:
            update_datastream(obj,
                              swfid,
                              directory + '/' + swfid,
                              label='pdf to swf',
                              mimeType='application/x-shockwave-flash')
    rmtree(directory, ignore_errors=True)
    return r
def handle_derived_jp2(fedora_object, tiff):
    baseName = os.path.splitext(tiff.name)[0]
    #jp2_file = os.path.join('/tmp', '%s.jp2' % (baseName,))
    #converter.tif_to_jp2(tiff.path, jp2_file, 'default', 'default')
    shutil.copy(tiff.path, '/tmp/')
    jp2_source = os.path.join('/tmp', tiff.name)
    encoder = '/usr/local/dlxs/prep/i/image/encodeJp2'
    jp2_file = subprocess.Popen([encoder, jp2_source], stdout=subprocess.PIPE).communicate()[0].strip()
    os.remove(jp2_source)
    fedoraLib.update_datastream(fedora_object, u"JP2", jp2_file, label=os.path.basename(jp2_file), mimeType=u'image/jp2', controlGroup='M')
    os.remove(jp2_file) # finished with that
    return
def handle_image_object(fedora_object, item):
    print '%s - handle image object' % (item.do_id,)
    # tiff image file
    tiff = workflow.core.models.Item_File.objects.get(item=item, use='MASTER')
    fedoraLib.update_datastream(fedora_object, 'TIFF', tiff.path, label=tiff.name, mimeType='image/tiff', controlGroup='M')
    handle_derived_jp2(fedora_object, tiff)
    #handle_derived_mix(fedora_object, tiff)
    try:
        kml = workflow.core.models.Item_File.objects.get(item=item, use='KML')
        # activate this when ready
        # fedoraLib.update_datastream(fedora_object, 'KML', kml.path, label=kml.name, mimeType='text/xml', controlGroup='M')
    except:
        return 
    return 
Beispiel #7
0
def marcxml_to_mods(obj, dsid, dsidOut='MODS'):
    logger = logging.getLogger('islandoraUtils.DSConverter.marcxml_to_mods')
    directory, file = get_datastream_as_file(obj, dsid, 'MARCXML')
    logger.debug('Got datastream')
    marcxml = etree.parse(os.path.join(directory, file))
    logger.debug('Parsed datastream')
    transform = etree.XSLT(
        etree.parse(
            os.path.join(os.path.dirname(__file__),
                         '__resources/marcxml2mods.xslt')))
    logger.debug('Parsed XSLT')
    transformed = transform(marcxml)
    logger.debug('Transformed datastream')

    with open(os.path.join(directory, dsidOut), 'w', 0) as temp:
        transformed.write(temp)
        logger.debug('Wrote transformed DS to disk')

    r = update_datastream(obj,
                          dsidOut,
                          temp.name,
                          label='MODS (translated from MARCXML)',
                          mimeType="text/xml")

    rmtree(directory, ignore_errors=True)
    return r
Beispiel #8
0
def create_jp2(obj, dsid, jp2id):
    logger = logging.getLogger('islandoraUtils.DSConverter.create_jp2')
    # We receive a TIFF and create a Lossless JPEG 2000 file from it.
    directory, file = get_datastream_as_file(obj, dsid, 'tiff')
    r = subprocess.call([
        "convert", directory + '/' + file, '+compress', '-colorspace', 'RGB',
        directory + '/uncompressed.tiff'
    ])
    if r != 0:
        logger.warning(
            'PID:%s DSID:%s JP2 creation failed (convert return code:%d).' %
            (obj.pid, dsid, r))
        rmtree(directory, ignore_errors=True)
        return r
    r = subprocess.call([
        "kdu_compress", "-i", directory + '/uncompressed.tiff', "-o",
        directory + "/tmpfile_lossy.jp2", "-rate", "0.5", "Clayers=1",
        "Clevels=7",
        "Cprecincts={256,256},{256,256},{256,256},{128,128},{128,128},{64,64},{64,64},{32,32},{16,16}",
        "Corder=RPCL", "ORGgen_plt=yes", "ORGtparts=R", "Cblk={32,32}",
        "Cuse_sop=yes"
    ])
    if r != 0:
        logger.warning(
            'PID:%s DSID:%s JP2 creation failed. Trying alternative.' %
            (obj.pid, dsid))
        r = subprocess.call([
            "convert", directory + '/' + file, '-compress', 'JPEG2000',
            '-quality', '50%', directory + '/tmpfile_lossy.jp2'
        ])
        if r != 0:
            logger.warning(
                'PID:%s DSID:%s JP2 creation failed (kdu_compress return code:%d).'
                % (obj.pid, dsid, r))

    if r == 0:
        update_datastream(obj,
                          jp2id,
                          directory + '/tmpfile_lossy.jp2',
                          label='Compressed JPEG2000',
                          mimeType='image/jp2')

    rmtree(directory, ignore_errors=True)
    return r
Beispiel #9
0
def create_fits(obj, dsid, derivativeid='FITS', args=[]):
    logger = logging.getLogger('islandoraUtils.DSConverter.create_fits')
    directory, file = get_datastream_as_file(obj, dsid, "document")
    in_file = directory + '/' + file
    out_file = directory + '/FITS.xml'
    program = ['fits', '-i', in_file, '-o', out_file]
    r = subprocess.call(program + args)
    if r != 0:
        logger.warning(
            'PID:%s DSID:%s FITS creation failed (fits return code:%d).' %
            (obj.pid, dsid, r))
    if r == 0:
        update_datastream(obj,
                          derivativeid,
                          out_file,
                          label='FITS Generated Image Metadata',
                          mimeType='text/xml')
    rmtree(directory, ignore_errors=True)
    return r
Beispiel #10
0
def create_mp3(obj, dsid, mp3id, args=None):

    logger = logging.getLogger('islandoraUtils.DSConverter.create_mp3')

    #mimetype throws keyerror if it doesn't exist
    try:
        mime = obj[dsid].mimeType
    except KeyError:
        mime = None

    if mime == 'audio/mpeg':
        ext = 'mp3'
    else:
        ext = 'wav'

    # We recieve a WAV file. Create a MP3
    directory, file = get_datastream_as_file(obj, dsid, ext)

    # I think we need more sensible defaults for web streaming
    if args == None:
        args = ['-mj', '-v', '-V6', '-B224', '--strictly-enforce-ISO']

    args.insert(0, 'lame')
    args.append(os.path.join(directory, file))
    outpath = os.path.join(directory, mp3id)
    args.append(outpath)

    # Make MP3 with lame
    r = subprocess.call(args)
    if r == 0:
        update_datastream(obj,
                          mp3id,
                          outpath,
                          label='compressed to mp3',
                          mimeType='audio/mpeg')
    else:
        logger.warning(
            'PID:%s DSID:%s MP3 creation failed (lame return code:%d).' %
            (obj.pid, dsid, r))

    rmtree(directory, ignore_errors=True)
    return r
 def add_fedora_object_ds(self, ds_label = '', ds_mimeType = '', control_group = 'M', connect_tries = 3, ds_checksumType = None, ds_checksum = None):
     return update_datastream(
                              self.fedora_object,
                              self.dsid,
                              self.converted_file_path,
                              label = ds_label,
                              mimeType = ds_mimeType,
                              controlGroup = control_group,
                              tries = connect_tries,
                              checksumType = ds_checksumType,
                              checksum = ds_checksum
                              )
    def fedoraMessage(self, message, obj, client):
        if 'usc:mezzanineCModel' in message['content_models'] and message['method'] == 'ingest' and 'PBCORE' in obj:
           data = {
             'parent': obj.pid
           }
           # Get the mezz path from the PBCore.
           # /pb:pbcoreInstantiationDocument/pb:instantiationIdentifier[@source="filename"]
           pbcore = etree.fromstring(obj['PBCORE'].getContent().read())
           path = pbcore.xpath('/pb:pbcoreInstantiationDocument/pb:instantiationIdentifier[@source="filename"]', namespaces={
             'pb': 'http://www.pbcore.org/PBCore/PBCoreNamespace.html'
           })
           if len(path) > 0:
               path = path[0].text
           else:
               self.logger.warning('Missing path in PBCore.')
           # Throw the mezz path at the access copy function, and create a child as an access copy.
           data['video_path'] = self.produceVideoAccessCopy(path)
           # Throw the mezz path at the thumbnail function, and store the thumbnail somewhere.
           data['thumbnail_path'] = self.produceThumbnail(path)

           # Throw the paths to the access copy and thumbnail at Islandora.
           r = self.requests_session.post(self.islandora_create_access_endpoint, data=data)
           if r.status_code == requests.codes.forbidden:
               # first attempt might fail due to an expired session... Let's try to authenticate, and try again.
               r = self.requests_session.post(self.islandora_url + '/user/login', data={
                 'name': self.islandora_username,
                 'pass': self.islandora_password,
                 'form_id': 'user_login',
               }, headers={'content-type': 'application/x-www-form-urlencoded'})

               r = self.requests_session.post(self.islandora_create_access_endpoint, data=data)

           update_datastream(obj, 'TN', data['thumbnail_path'], label='Thumbnail', mimeType='image/png')

           os.remove(data['thumbnail_path'])

           if r.status_code == requests.codes.created:
               self.logger.info('Islandora created new access variant.')
           else:
               self.logger.warning('Islandora failed to create the new access variant.')
Beispiel #13
0
def create_ogg(obj, dsid, oggid):
    logger = logging.getLogger('islandoraUtils.DSConverter.create_ogg')
    #recieve a wav file create a OGG
    directory, file = get_datastream_as_file(obj, dsid, "wav")

    # Make OGG with ffmpeg
    r = subprocess.call([
        'ffmpeg', '-i', directory + '/' + file, '-acodec', 'libvorbis', '-ab',
        '96k', directory + '/' + oggid
    ])
    if r == 0:
        update_datastream(obj,
                          oggid,
                          directory + '/' + oggid,
                          label='compressed to ogg',
                          mimeType='audio/ogg')
    else:
        logger.warning(
            'PID:%s DSID:%s OGG creation failed (ffmpeg return code:%d).' %
            (obj.pid, dsid, r))
    rmtree(directory, ignore_errors=True)
    return r
def handle_derived_mix(fedora_object, tiff):
    """
    Extract MIX metadata from the input tiff file
    """
    basename = os.path.splitext(tiff.name)[0]
    mix_file = os.path.join("/tmp", "%s.mix.xml" % baseName)
    out_file = open(mix_file, "w")
    #cmd= jhove -h xml $INFILE | xsltproc jhove2mix.xslt - > `basename ${$INFILE%.*}.mix`
    jhoveCmd1 = ["/opt/jhove/jhove", "-h", "xml", tiff.name]
    jhoveCmd2 = ["xsltproc", "data/jhove2mix.xslt", "-"] # complete cmd for xsltproc
    #jhoveCmd2 = ["xalan", "-xsl", "data/jhove2mix.xslt"] # complete cmd for xalan
    p1 = subprocess.Popen(jhoveCmd1, stdout=subprocess.PIPE)
    p2 = subprocess.Popen(jhoveCmd2, stdin=p1.stdout, stdout=out_file)
    r = p2.communicate()
    if os.path.getsize(mix_file) == 0:
        # failed for some reason
        print("jhove conversion failed")
    else:
        fedoraLib.update_datastream(fedora_object, u"MIX", mix_file, label=os.path.basename(mix_file), mimeType=misc.getMimeType("xml"))
    out_file.close()
    """ end extract """
    os.remove(mix_file) # finished with that
    return
def handle_page_object(fedora_client, fedora_object, page, ocr_path, label):
    """
    The page object gets some extra relationships as a member of a book object.
    It should also get:
        - MODS (this should be based on parent book mods, but with page label from METS structmap)
        - JP2 (derived from TIFF)
        - MIX
        - OCR, if available
    """
    page_cm = ITEM_TYPE_CM_MAP['page']
    page_basename = os.path.splitext(page.name)[0]
    page_pid = '%s-%s' % (fedora_object.pid, page_basename)
    page_label = u'%s, %s' % (label, drl.utils.shorten_string(fedora_object.label, 205))
    extraNamespaces = { 'pageNS' : 'info:islandora/islandora-system:def/pageinfo#' }
    # should the page number be a counter here instead of int(page_basename)?
    extraRelationships = { fedora_relationships.rels_predicate('pageNS', 'isPageNumber') : str(int(page_basename)),
                           fedora_relationships.rels_predicate('pageNS', 'isPageOf') : str(fedora_object.pid) }
    page_object = addObjectToFedora(fedora_client, page_label, page_pid, fedora_object.pid, page_cm, extraNamespaces=extraNamespaces, extraRelationships=extraRelationships)
    fedoraLib.update_datastream(page_object, 'TIFF', page.path, label=page.name, mimeType='image/tiff', controlGroup='M')
    handle_derived_jp2(page_object, page)
    #handle_derived_mix(page_object, page)
    if ocr_path:
        ocr_filename = os.path.basename(ocr_path) 
        fedoraLib.update_datastream(page_object, u'OCR', ocr_path, label=unicode(ocr_filename), mimeType=u'text/plain', controlGroup='M')
Beispiel #16
0
def create_mp4(obj, dsid, mp4id):
    logger = logging.getLogger('islandoraUtils.DSConverter.create_mp4')
    directory, file = get_datastream_as_file(obj, dsid, 'video')

    infile = os.path.join(directory, file)
    mp4file = os.path.join(directory, 'output.mp4')

    r = subprocess.call([
        'ffmpeg', '-i', infile, '-f', 'mp4', '-vcodec', 'libx264', '-preset',
        'medium', '-acodec', 'libfaac', '-ab', '128k', '-ac', '2', '-async',
        '1', '-movflags', 'faststart', mp4file
    ])
    if r == 0:
        update_datastream(obj,
                          mp4id,
                          mp4file,
                          label='compressed mp4',
                          mimeType='video/mp4')
    else:
        logger.warning('PID:%s DSID:%s MP4 creation (ffmpeg) failed.' %
                       (obj.pid, dsid))

    rmtree(directory, ignore_errors=True)
    return r
def handle_text_object(fedora_client, fedora_object, item):
    print '%s - handle text object' % (item.do_id,)
    # marcxml
    marcxml = workflow.core.models.Item_File.objects.get(item=item, use='MARCXML')
    fedoraLib.update_datastream(fedora_object, u'MARCXML', marcxml.path, label=marcxml.name, mimeType=u'text/xml', controlGroup='M')
    # mets 
    mets = workflow.core.models.Item_File.objects.get(item=item, use='METS')
    fedoraLib.update_datastream(fedora_object, u'METS', mets.path, label=mets.name, mimeType=u'text/xml', controlGroup='M')
    # ocr zip
    ocr_zipfile = workflow.core.models.Item_File.objects.get(item=item, use='OCR_ZIP')
    ocr_zip = zipfile.ZipFile(ocr_zipfile.path, 'r')
    # master pdf and ocr
    book_PDF_filename = os.path.join("/tmp", "%s.pdf" % item.do_id) 
    book_OCR_filename = os.path.join("/tmp", "%s-full.ocr" % item.do_id)
    ocr_page_list = []
    # pages
    page_label_dict = get_page_label_dict_from_mets(mets.path)
    cleaned_page_labels = clean_page_labels(page_label_dict)
    pages = workflow.core.models.Item_File.objects.filter(item=item, use='MASTER').order_by('name')
    for page in pages:
        ocr_filename = '%s.txt' % (os.path.splitext(page.name)[0],)
        ocr_path = None # initalize
        if ocr_filename in ocr_zip.namelist():
            ocr_file = ocr_zip.extract(ocr_filename, '/tmp') 
            ocr_path = os.path.join('/tmp', ocr_filename) 
            # add this page's ocr to the running total
            f = open(ocr_path, 'r')
            ocr_page_list.append(f.read())
            f.close()
        page_label = cleaned_page_labels[page.name]
        handle_page_object(fedora_client, fedora_object, page, ocr_path, page_label)
        if ocr_path:
            os.remove(ocr_path)

    ocr_book_data = ''.join(ocr_page_list)
    f = open(book_OCR_filename, "w")
    f.write(ocr_book_data)
    f.close()
    fedoraLib.update_datastream(fedora_object, u"BOOKOCR", book_OCR_filename, label=unicode(os.path.basename(book_OCR_filename)), mimeType="text/plain")
    os.remove(book_OCR_filename)

    return
Beispiel #18
0
    print("Attempt to create collection '%s' with pid=%s" % (myLabel, myPid))
    # validate the pid
    try:
        collection_object = fedora.getObject(myPid)
        print("Attempted to create already existing collection %s" % myPid)
        return collection_object
    except FedoraConnectionException, fcx:
        if not fcx.httpcode in [404]:
            raise fcx

    collection_object = fedora.createObject(myPid, label=myLabel)

    # this is the biggest difference between objects and collections - a collection policy
    # collection policy
    fedoraLib.update_datastream(collection_object, u"COLLECTION_POLICY", "collection_policy.xml", label=u'COLLECTION_POLICY', mimeType=u'text/xml', controlGroup=u'X')

    # thumnail, if one is supplied
    if tnUrl:
        # possibly check if tnUrl is a valid jpg
        #add a TN datastream to the object after creating it from the book cover
        fedoraLib.update_datastream(collection_object, 'TN', tnUrl, label=myLabel+'_TN.jpg', mimeType='image/jpeg')

    # rels-ext relations
    collection_relsext = createRelsExt(collection_object, parentPid, contentModel)

    return collection_object

def addObjectToFedora(fedora, myLabel, myPid, parentPid, contentModel, tnUrl=None, extraNamespaces={}, extraRelationships={}):
    """
    Add an object (not a collection) to fedora
    print("Attempt to create collection '%s' with pid=%s" % (myLabel, myPid))
    # validate the pid
    try:
        collection_object = fedora.getObject(myPid)
        print("Attempted to create already existing collection %s" % myPid)
        return collection_object
    except FedoraConnectionException, fcx:
        if not fcx.httpcode in [404]:
            raise fcx
        # if it is a 404, then we're ok - just make the object and continue

    collection_object = fedora.createObject(myPid, label=myLabel)

    # this is the biggest difference between objects and collections - a collection policy
    # collection policy
    fedoraLib.update_datastream(collection_object, u"COLLECTION_POLICY", "data/collection_policy.xml", label=u'COLLECTION_POLICY', mimeType=u'text/xml', controlGroup=u'X')

    # thumbnail, if one is supplied
    if tnUrl:
        tnExt = os.path.splitext(tnUrl)[1]
        fedoraLib.update_datastream(collection_object, u'TN', tnUrl, label=u"%s_TN%s" % (myLabel, tnExt), mimeType=misc.getMimeType(tnExt))

    # rels-ext relations
    collection_relsext = createRelsExt(collection_object, parentPid, contentModel)

    return collection_object

def addObjectToFedora(fedora, myLabel, myPid, parentPid, contentModel, tnUrl=None, state=u'A', extraNamespaces={}, extraRelationships={}):
    """
    Add an object (not a collection) to fedora
    @param fedora The fedora instance to add the object to
        pass
    # validate required objects, (for now) skip if not found
    try:
        mods = workflow.core.models.Item_File.objects.get(item=item, use='MODS')
        dc = workflow.core.models.Item_File.objects.get(item=item, use='DC')
        thumb = workflow.core.models.Item_File.objects.get(item=item, use='THUMB')
    except:
        return
    try:
        obj = addObjectToFedora(fedora_client, label, pid, parent_pid, cm)
    except Exception, ex:
        print 'connection error while trying to add fedora object %s: %s' % (pid, ex.message)
        return False
    # mods
    mods = workflow.core.models.Item_File.objects.get(item=item, use='MODS')
    fedoraLib.update_datastream(obj, u'MODS', mods.path, label=mods.name, mimeType=u'text/xml', controlGroup='X')
    # dc
    dc = workflow.core.models.Item_File.objects.get(item=item, use='DC')
    fedoraLib.update_datastream(obj, u'DC', dc.path, label=dc.name, mimeType=u'text/xml', controlGroup='M')
    # thumb
    thumb = workflow.core.models.Item_File.objects.get(item=item, use='THUMB')
    fedoraLib.update_datastream(obj, u'TN', thumb.path, label=thumb.name, mimeType=u'image/jpeg', controlGroup='M')
    return obj

def handle_derived_jp2(fedora_object, tiff):
    baseName = os.path.splitext(tiff.name)[0]
    #jp2_file = os.path.join('/tmp', '%s.jp2' % (baseName,))
    #converter.tif_to_jp2(tiff.path, jp2_file, 'default', 'default')
    shutil.copy(tiff.path, '/tmp/')
    jp2_source = os.path.join('/tmp', tiff.name)
    encoder = '/usr/local/dlxs/prep/i/image/encodeJp2'
Beispiel #21
0
    try:
        collection_object = fedora.getObject(myPid)
        print("Attempted to create already existing collection %s" % myPid)
        return collection_object
    except FedoraConnectionException, fcx:
        if not fcx.httpcode in [404]:
            raise fcx
        # if it is a 404, then we're ok - just make the object and continue

    collection_object = fedora.createObject(myPid, label=myLabel)

    # this is the biggest difference between objects and collections - a collection policy
    # collection policy
    fedoraLib.update_datastream(collection_object,
                                u"COLLECTION_POLICY",
                                "data/collection_policy.xml",
                                label=u'COLLECTION_POLICY',
                                mimeType=u'text/xml',
                                controlGroup=u'X')

    # thumbnail, if one is supplied
    if tnUrl:
        tnExt = os.path.splitext(tnUrl)[1]
        fedoraLib.update_datastream(collection_object,
                                    u'TN',
                                    tnUrl,
                                    label=u"%s_TN%s" % (myLabel, tnExt),
                                    mimeType=misc.getMimeType(tnExt))

    # rels-ext relations
    collection_relsext = createRelsExt(collection_object, parentPid,
                                       contentModel)
Beispiel #22
0
def create_thumbnail(obj, dsid, tnid):
    logger = logging.getLogger('islandoraUtils.DSConverter.create_thumbnail')

    # We receive a file and create a jpg thumbnail
    directory, file = get_datastream_as_file(obj, dsid, "tmp")

    # fine out what mimetype the input file is
    try:
        mime = obj[dsid].mimeType
    except KeyError:
        mime = None

    infile = os.path.join(directory, file)
    tmpfile = os.path.join(directory, 'tmp.jpg')
    tnfile = os.path.join(directory, tnid)

    # make the thumbnail based on the mimetype of the input
    # right now we assume everything but video/mp4 can be handled
    if mime == 'video/mp4':
        # grab the 'middle' of the video for use in creating thumbnails from mp4s
        p = subprocess.Popen(['ffmpeg', '-i', infile],
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
        stdout, stderr = p.communicate()
        # use stderr as ffmpeg expects two params, but duration is still returned with only the source
        duration = re.search("Duration:\s{1}\d{2}:\d{2}:\d{2}\.\d{2},",
                             stderr).group()
        duration = duration.replace("Duration: ", '')
        duration = duration.split('.')
        # get everything before the milliseconds in hr:min:seconds format
        duration = duration[0]
        duration = map(int, duration.split(':'))
        time = math.floor(
            ((duration[0] * 360) + (duration[1] * 60) + duration[2]) / 2)
        r = subprocess.call(['ffmpeg', '-itsoffset', '-4', '-ss', str(time), '-i', infile, '-vcodec', 'mjpeg',\
             '-vframes', '1', '-an', '-f', 'rawvideo', tmpfile])
        if r == 0:
            r = subprocess.call(['convert', '%s[0]' % tmpfile, '-thumbnail', '%sx%s' % tn_size,\
                 '-colorspace', 'rgb', 'jpg:%s'%tnfile])
    else:
        # Make a thumbnail with convert
        r = subprocess.call(['convert', '%s[0]' % infile, '-thumbnail', \
             '%sx%s' % tn_size, '-colorspace', 'rgb', '+profile', '*', 'jpg:%s'%tnfile])
    if r == 0:
        update_datastream(obj,
                          tnid,
                          directory + '/' + tnid,
                          label='thumbnail',
                          mimeType='image/jpeg')
    else:
        logger.warning(
            'PID:%s DSID:%s Thumbnail creation failed (return code:%d).' %
            (obj.pid, dsid, r))

    logger.debug(directory)
    logger.debug(file)
    logger.debug(tnid)
    logger.debug(os.listdir(directory))

    rmtree(directory, ignore_errors=True)
    return r