Ejemplo n.º 1
0
    def __processImage(self, el, firstImage):
        i_dict = {
            'id': el.get('id', default=None),
            'path': el.findtext('ruta'),
            'description': el.findtext('pie'),
            'line': el.sourceline
        }
        if i_dict['id'] and i_dict['path'] and path.exists(self.getPath(i_dict['path'])):
            #Get or create an new image containing the new object.
            try:
                image = FedoraWrapper.client.getObject(FedoraWrapper.getPid(uri=ao.NS['fjm-db'].uri, predicate='imageID', obj="'%(id)s'" % i_dict))
            except KeyError:
                image = FedoraWrapper.getNextObject(self.prefix, label='Image: %(id)s' % i_dict)
                
            #FIXME:  Detect Mimetype, and create image accordingly?
            update_datastream(obj=image, dsid="JPG", filename=self.getPath(i_dict['path']), mimeType="image/jpeg")
                
            i_rels_ext = FR.rels_ext(obj=image, namespaces=ao.NS.values())
                
            rels = [
                (
                    FR.rels_predicate(alias='fedora-model', predicate='hasModel'),
                    FR.rels_object('atm:imageCModel', FR.rels_object.PID)
                ),
                (
                    FR.rels_predicate(alias='fjm-db', predicate='imageID'),
                    FR.rels_object(el.get('id'), FR.rels_object.LITERAL)
                ),
                #Relate the image to the concert as a general image...
                (
                    FR.rels_predicate(alias='atm-rel', predicate='isImageOf'),
                    FR.rels_object(self.concert_obj.pid, FR.rels_object.PID)
                )
            ]
            
            #Set the first image as the "primary" (Used for thumbnails)
            if firstImage:
                rels.append(
                    (
                        FR.rels_predicate(alias='atm-rel', predicate='isIconOf'),
                        FR.rels_object(self.concert_obj.pid, FR.rels_object.PID)
                    )
                )
            
            #Update and commit the rels_ext
            FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=i_rels_ext).update()
            
            dc = dict()
            dc['type'] = [unicode('StillImage')]
            #Add a description, based on the 'pie' (if it exists, and there isn't already on for the image...), and don't clobber any existing description...
            if i_dict['description'] and 'description' not in dc:
                dc['description'] = [unicode('%(description)s' % i_dict)]
            Concert.save_dc(image, dc)

            image.state = unicode('A')
            return True
        else:
            logger.warning('No ID or invalid path for image at line: %(line)s' % i_dict)
            return False
Ejemplo n.º 2
0
    def process(self):
        logger = self.logger
        logger.info('Starting to ingest: %(class)s %(id)s' % {'class': type(self), 'id': self.dbid})
        
        try:
            logger.info('Checking to see if %s already exists in Fedora' % self.norm_name)
            pid = self[self.norm_name]
            logger.info('Found %(pid)s' % {'pid': pid})
            if pid:
                logger.warning('%(name)s already exists as pid %(pid)s! Overwriting DC DS!' % {'name': self.norm_name, 'pid': pid})
                self.composer = FedoraWrapper.client.getObject(pid)
            else:
                msg = 'Something went horribly wrong!  Found a pid (%(pid)s), but couldn\'t access it...' % {'pid': pid}
                logger.error(msg)
                raise Exception(msg)
        except KeyError:
            try:
                logger.debug('Not known by name, checking by composerID')
                pid = FedoraWrapper.getPid(uri=Composer.NS['fjm-db'].uri, predicate='composerID', obj="'%s'" % self.dbid)
                logger.info('Found %(pid)s' % {'pid': pid})
                if pid:
                    logger.warning('%(name)s already exists as pid %(pid)s! Overwriting DC DS!' % {'name': self.norm_name, 'pid': pid})
                    self.composer = FedoraWrapper.client.getObject(pid)
                else:
                    msg = 'Something went horribly wrong!  Found a pid (%(pid)s), but couldn\'t access it...' % {'pid': pid}
                    logger.error(msg)
                    raise Exception(msg)
            except KeyError:
                logger.info('Doesn\'t exist: creating a new Fedora Object')
                self.composer = FedoraWrapper.getNextObject(self.prefix, label='Composer %s' % self.dbid)

        rels_ext = FR.rels_ext(self.composer, namespaces=Composer.NS.values())
        rels = [
            (
                FR.rels_predicate(alias='fedora-model', predicate='hasModel'),
                FR.rels_object('atm:personCModel', FR.rels_object.PID)
            ),
            (
                FR.rels_predicate(alias='fjm-db', predicate='composerID'),
                FR.rels_object(self.dbid, FR.rels_object.LITERAL)
            )
        ]
        
        FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=rels_ext).update()
        FedoraWrapper.correlateDBEntry('composedBy', 'composerID')
        
        #Yay Pythonic-ness?  Try to get an existing EAC-CPF, or create one if none is found
        try:
            eaccpf = CPF.EACCPF(self.composer.pid, xml=self.composer['EAC-CPF'].getContent().read())
            event_type="modified"
        except fcrepo.connection.FedoraConnectionException, e:
            if e.httpcode == 404:
                eaccpf = CPF.EACCPF(self.composer.pid)
                event_type="created"
            else:
                raise e
Ejemplo n.º 3
0
    def __processConferences(self):
        logger = logging.getLogger('ingest.atm_concert.__processConferences')
        for el in self.element.findall('Eventos_Asociados/Evento_Asociado'):
            e_dict = {
                'id': el.get('id'),
                'type': el.findtext('Tipo'),
                'description': el.findtext('descripcion'),
                'mp3_path': el.findtext('ruta'),
                'concert': self.dbid,
                'line': el.sourceline
            }
            
            if e_dict['id']:
                try:
                    pid = FedoraWrapper.getPid(uri=Concert.NS['fjm-db'].uri, predicate="lectureID", obj="'%(id)s'" % e_dict)
                    conference = FedoraWrapper.client.getObject(pid)
                except KeyError:
                    conference = FedoraWrapper.getNextObject(self.prefix, label="Conference %(id)s in %(concert)s" % e_dict)
                    
                c_rels_ext = FR.rels_ext(obj=conference, namespaces=ao.NS.values())
                
                rels = [
                    (
                        FR.rels_predicate(alias='fedora-rels-ext', predicate='isMemberOf'),
                        FR.rels_object(self.concert_obj.pid, FR.rels_object.PID)
                    ),
                    (
                        FR.rels_predicate(alias='fedora-model', predicate='hasModel'),
                        FR.rels_object('atm:lectureCModel', FR.rels_object.PID)
                    ),
                    (
                        FR.rels_predicate(alias='fjm-db', predicate='lectureID'),
                        FR.rels_object(e_dict['id'], FR.rels_object.LITERAL)
                    )
                ]
                

                #Add and commit relationships
                FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=c_rels_ext).update()
                
                if e_dict['mp3_path']:
                    mp3_path = self.getPath(e_dict['mp3_path'])
                    if path.exists(mp3_path):
                        update_datastream(obj=conference, dsid='MP3', filename=mp3_path, mimeType="audio/mpeg")
                    else:
                        logger.error('MP3 specified (%(mp3_path)s), but doesn\'t exist for id %(id)s on line %(line)s' % e_dict)
                else:
                    logger.warning('No MP3 indicated for id %(id)s on line %(line)s' % e_dict)
                
                dc = dict()
                dc['type'] = [unicode('Sound')]
                dc['description'] = [unicode(e_dict['description'])]
                dc['subject'] = [unicode(e_dict['type'])]
                Concert.save_dc(conference, dc)
                conference.state = unicode('A')
Ejemplo n.º 4
0
def createRelsExt(childObject,
                  parentPid,
                  contentModel,
                  extraNamespaces={},
                  extraRelationships={}):
    """
    Create the RELS-EXT relationships between childObject and object:parentPid
    We set the default namespace for our interconnections, then apply the content model, and make
    childObject a member of the object:parentPid collection.  If object:parentPid doesn't have the
    collection content model then strange things might happen.
    @param childObject The FedoraObject to attach the RELS-EXT to.
    @param parentPid The pid of the parent to assign to childObject.
    @param contentModel The @contentModel to give to childObject.
    @param extraNamespaces Any @extraNamespaces to put in the RELS-EXT data.
    @param extraRelationsips Any additional relationship values to assign to childObject.  By default
           the object gets: hasModel:contentModel and isMemberOfCollection:parentPid
    """

    nsmap = [
        fedora_relationships.rels_namespace(
            'fedora', 'info:fedora/fedora-system:def/relations-external#'),
        fedora_relationships.rels_namespace(
            'fedora-model', 'info:fedora/fedora-system:def/model#')
    ]
    if extraNamespaces and type(extraNamespaces) is DictType:
        for k, v in extraNamespaces.iteritems():
            nsmap.append(fedora_relationships.rels_namespace(k, v))

    #add relationships
    rels_ext = fedora_relationships.rels_ext(childObject, nsmap, 'fedora')

    rels_ext.addRelationship(
        fedora_relationships.rels_predicate('fedora-model', 'hasModel'),
        [contentModel, "pid"])
    rels_ext.addRelationship(
        fedora_relationships.rels_predicate('fedora', 'isMemberOfCollection'),
        [parentPid, "pid"])
    if extraRelationships and type(extraRelationships) is DictType:
        for k, v in extraRelationships.iteritems():
            rels_ext.addRelationship(k, [v, "literal"])

    loop = True
    while loop:
        loop = False
        try:
            rels_ext.update()
        except FedoraConnectionException, fedoraEXL:
            if str(fedoraEXL.body).find(
                    "is currently being modified by another thread") != -1:
                loop = True
                print(
                    "Trouble (thread lock) updating obj(%s) RELS-EXT - retrying."
                    % childObject.pid)
            else:
                print("Error updating obj(%s) RELS-EXT" % childObject.pid)
Ejemplo n.º 5
0
 def process(self):
     try:
         pid = Instrument.__getClasses()[self.classID]
         instrumentClass = FedoraWrapper.client.getObject(pid)
     except KeyError:
         instrumentClass = FedoraWrapper.getNextObject(self.prefix, label='Instrument class %s' % self.classID)
         Instrument.__addInstrumentClass(self.classID, instrumentClass.pid)
         c_rels = [
             (
                 FR.rels_predicate(alias='fjm-db', predicate='instrumentClassID'),
                 FR.rels_object(self.classID, FR.rels_object.LITERAL)
             ),
             (
                 FR.rels_predicate(alias='fedora-model', predicate='hasModel'),
                 FR.rels_object('atm:instrumentClassCModel', FR.rels_object.PID)
             )
         ]
         FedoraWrapper.addRelationshipsWithoutDup(c_rels, fedora=instrumentClass).update()
         dc = dict()
         dc['title'] = [self.instrumentClass]
         Instrument.save_dc(instrumentClass, dc)
     instrumentClass.state = unicode('A')
         
     try:
         pid = Instrument.__getInstruments()[self.instrumentName]
         instrument = FedoraWrapper.client.getObject(pid)
     except KeyError:
         instrument = FedoraWrapper.getNextObject(self.prefix, label='Instrument %s' % self.dbid)
         Instrument.__addInstrument(self.instrumentName, instrument.pid)
         dc = dict()
         dc['title'] = [self.instrumentName]
         Instrument.save_dc(instrument, dc)
     i_rels = [
         (
             FR.rels_predicate(alias='fjm-db', predicate='instrumentID'),
             FR.rels_object(self.dbid, FR.rels_object.LITERAL)
         ),
         (
             FR.rels_predicate(alias='fedora-model', predicate='hasModel'),
             FR.rels_object('atm:instrumentCModel', FR.rels_object.PID)
         ),
         (
             FR.rels_predicate(alias='fedora-rels-ext', predicate='isMemberOf'),
             FR.rels_object(instrumentClass.pid, FR.rels_object.PID)
         )
     ]
     FedoraWrapper.addRelationshipsWithoutDup(i_rels, fedora=instrument).update()
     FedoraWrapper.correlateDBEntry('instrument', 'instrumentID')
     instrument.state = unicode('A')
Ejemplo n.º 6
0
    def correlateDBEntry(predicate, idpred):
        '''
        This function is used to add relations involving PIDs to objects, based on relations to literals which were added during the original ingest.
        For example, in the original ingest, 'performances' are added with a relation to the score db id 'fjm-db:basedOn', which the scores are have relations to their DB id 'fjm-db:scoreID'.  This function uses a query which matches the two literals, and adds the relation 'atm-rel:basedOn' (note:  same predicate, different namespace) to the performance,  which relates directly to the score whose ID matched.
        
        NOTE:  SPARQL is bloody amazing.  That is all...
            (query description:
                1.  add prefixes,
                2.  select the object and subject of the relationship to resolve, based on matching the ID
                3.  optionally select any already existing relationships
                4.  keep results where step 3 returned nothing, or those where the selected $sub is not equal to anything found in step 3.)
        TODO (minor): I can see this being a little slow, as it is called fairly often...  Some method to streamline this might be nice, or to call it less frequently?...  Anyway.
        '''
        FedoraWrapper.init()
        for result in FedoraWrapper.client.searchTriples(query='\
PREFIX atm-rel: <%(atm-rel)s> \
PREFIX fjm-db: <%(fjm-db)s> \
SELECT $obj $sub \
FROM <#ri> \
WHERE { \
    $obj fjm-db:%(predicate)s $id . \
    $sub fjm-db:%(idpred)s $id . \
    OPTIONAL {$obj atm-rel:%(predicate)s $pid} . \
    FILTER(!bound($pid) || $sub != $pid) \
}' % {
        'fjm-db': ao.NS['fjm-db'].uri,
        'atm-rel': ao.NS['atm-rel'].uri,
        'predicate': predicate,
        'idpred': idpred
    }, lang='sparql', limit='1000000'):
            FedoraWrapper.addRelationshipWithoutDup((
                    FR.rels_predicate(alias='atm-rel', predicate=predicate),
                    FR.rels_object(result['sub']['value'].rpartition('/')[2], FR.rels_object.PID)
                ), fedora=FedoraWrapper.client.getObject(result['obj']['value'].rpartition('/')[2])).update()
def handle_still_mods(still_mods_parser, mods_file_name):
    '''
    This function will handle the creation of clip objects
    @param still_mods_parser
      The etree xml parser to get ingest data from 
    @return boolean
      True on success, false if something was wrong
    '''
    still_path = get_file_path_from_xpath(still_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Still Image']")
    
    still_pid = fedora.getNextPID(name_space)
    still_label = unicode(movie_name + '_' + mods_file_name[mods_file_name.find('-')+1:mods_file_name.rfind('.')])
    still_object = fedora.createObject(still_pid, label = still_label)
    still_object_RELS_EXT = fedora_relationships.rels_ext(still_object,[hamilton_rdf_name_space, fedora_model_namespace])
        
        #datastreams
    add_MODS_datastream(still_object, mods_file_path)
    if still_path:
        png_file_handle = open(still_path, 'rb')
        try:
            still_object.addDataStream(u'PNG', u'aTmpStr', label=u'PNG',
            mimeType = u'image/png', controlGroup = u'M',
            logMessage = u'Added PNG datastream.')
            datastream = still_object['PNG']
            datastream.setContent(png_file_handle)
            logging.info('Added PNG datastream to:' + still_pid)
        except FedoraConnectionException:
            logging.error('Error in adding PNG datastream to:' + still_pid + '\n')
        png_file_handle.close()
        
        #relationships
        still_clip_element_list = still_mods_parser.xpath("//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Video clip']")
        if still_clip_element_list:
            still_clip_file_name = still_clip_element_list[0].text
            still_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isStillOf'), clips_to_pids[still_clip_file_name])
        else:
            still_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isStillOf'), movie_pid)
        
        still_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':benshiStill')
        still_object_RELS_EXT.update()
        
        return True
    return False
def createRelsExt(childObject, parentPid, contentModel, extraNamespaces={}, extraRelationships={}):
    """
    Create the RELS-EXT relationships between childObject and object:parentPid
    We set the default namespace for our interconnections, then apply the content model, and make
    childObject a member of the object:parentPid collection.  If object:parentPid doesn't have the
    collection content model then strange things might happen.
    @param childObject The FedoraObject to attach the RELS-EXT to.
    @param parentPid The pid of the parent to assign to childObject.
    @param contentModel The @contentModel to give to childObject.
    @param extraNamespaces Any @extraNamespaces to put in the RELS-EXT data.
    @param extraRelationsips Any additional relationship values to assign to childObject.  By default
           the object gets: hasModel:contentModel and isMemberOfCollection:parentPid
    """

    nsmap = [ fedora_relationships.rels_namespace('fedora', 'info:fedora/fedora-system:def/relations-external#'),
              fedora_relationships.rels_namespace('fedora-model', 'info:fedora/fedora-system:def/model#')
             ]
    if extraNamespaces and type(extraNamespaces) is DictType:
        for k, v in extraNamespaces.iteritems():
            nsmap.append(fedora_relationships.rels_namespace(k, v))

    #add relationships
    rels_ext=fedora_relationships.rels_ext(childObject, nsmap, 'fedora')

    rels_ext.addRelationship(fedora_relationships.rels_predicate('fedora-model', 'hasModel'), [contentModel, "pid"])
    rels_ext.addRelationship(fedora_relationships.rels_predicate('fedora', 'isMemberOfCollection'), [parentPid, "pid"])
    if extraRelationships and type(extraRelationships) is DictType:
        for k, v in extraRelationships.iteritems():
            rels_ext.addRelationship(k, [v, "pid"])

    loop = True
    while loop:
        loop = False
        try:
            rels_ext.update()
        except FedoraConnectionException, fedoraEXL:
            if str(fedoraEXL.body).find("is currently being modified by another thread") != -1:
                loop = True
                print("Trouble (thread lock) updating obj(%s) RELS-EXT - retrying." % childObject.pid)
            else:
                print("Error updating obj(%s) RELS-EXT" % childObject.pid)
Ejemplo n.º 9
0
    def process(self):
        logger = self.logger
        logger.info('Starting to ingest: Groupo %s' % self.dbid)
        
        try:
            pid = FedoraWrapper.getPid(uri=ao.NS['fjm-db'].uri, predicate='groupID', obj="'%s'" % self.dbid)
            if pid:
                logger.warning('Group %(id)s already exists as pid %(pid)s! Overwriting DC DS!' % {'id': self.dbid, 'pid': pid})
                group = FedoraWrapper.client.getObject(pid)
            else:
                raise Exception('Something went horribly wrong!  Found a pid, but couldn\'t access it...')
        except KeyError:
            group = FedoraWrapper.getNextObject(self.prefix, label='Group %s' % self.dbid)

        rels_ext = FR.rels_ext(group, namespaces=ao.NS.values())
        rels = [
            (
                FR.rels_predicate(alias='fedora-model', predicate='hasModel'),
                FR.rels_object('atm:groupCModel', FR.rels_object.PID)
            ),
            (
                FR.rels_predicate(alias='fjm-db', predicate='groupID'),
                FR.rels_object(self.dbid, FR.rels_object.LITERAL)
            )
        ]
        
        FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=rels_ext).update()

        dc = dict()
        dc['type'] = [unicode('Collection')]
        dc['title'] = [self.element.findtext('grupo').strip()]
        Group.save_dc(group, dc)
        
        FedoraWrapper.correlateDBEntry('group', 'groupID')
        group.state = unicode('A')
            
Ejemplo n.º 10
0
def handle_page_object(fedora_client, fedora_object, page, ocr_path, label):
    """
    The page object gets some extra relationships as a member of a book object.
    It should also get:
        - MODS (this should be based on parent book mods, but with page label from METS structmap)
        - JP2 (derived from TIFF)
        - MIX
        - OCR, if available
    """
    page_cm = ITEM_TYPE_CM_MAP['page']
    page_basename = os.path.splitext(page.name)[0]
    page_pid = '%s-%s' % (fedora_object.pid, page_basename)
    page_label = u'%s, %s' % (label, drl.utils.shorten_string(fedora_object.label, 205))
    extraNamespaces = { 'pageNS' : 'info:islandora/islandora-system:def/pageinfo#' }
    # should the page number be a counter here instead of int(page_basename)?
    extraRelationships = { fedora_relationships.rels_predicate('pageNS', 'isPageNumber') : str(int(page_basename)),
                           fedora_relationships.rels_predicate('pageNS', 'isPageOf') : str(fedora_object.pid) }
    page_object = addObjectToFedora(fedora_client, page_label, page_pid, fedora_object.pid, page_cm, extraNamespaces=extraNamespaces, extraRelationships=extraRelationships)
    fedoraLib.update_datastream(page_object, 'TIFF', page.path, label=page.name, mimeType='image/tiff', controlGroup='M')
    handle_derived_jp2(page_object, page)
    #handle_derived_mix(page_object, page)
    if ocr_path:
        ocr_filename = os.path.basename(ocr_path) 
        fedoraLib.update_datastream(page_object, u'OCR', ocr_path, label=unicode(ocr_filename), mimeType=u'text/plain', controlGroup='M')
Ejemplo n.º 11
0
    def test_two_namespace_literal(self):
        xmlStr = """
<rdf:RDF xmlns:coal="http://www.coalliance.org/ontologies/relsint" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:fedora="info:fedora/fedora-system:def/relations-external#" xmlns:jon="http://jebus/trainstation">
  <rdf:Description rdf:about="info:fedora/coccc:2040">
    <jon:feezle>JON</jon:feezle>
  </rdf:Description>
</rdf:RDF>
        """
        relationship = fedora_relationship([
            rels_namespace('coal',
                           'http://www.coalliance.org/ontologies/relsint'),
            rels_namespace('jon', 'http://jebus/trainstation')
        ])
        relationship.addRelationship('coccc:2040',
                                     rels_predicate('jon', 'feezle'),
                                     rels_object('JON', rels_object.LITERAL))
        result_string = XmlHelper.mangle(relationship.toString())
        expected_string = XmlHelper.mangle(xmlStr)
        self.assertEqual(result_string, expected_string,
                         'Generated XML Incorrect')
Ejemplo n.º 12
0
        if object_fetch_exception.httpcode in [404]:
            logging.info(name_space + ':itm missing, creating object.\n')
            collection_object = fedora.createObject(collection_pid, label = collection_label)
            #collection_policy
            try:
                collection_object.addDataStream(u'COLLECTION_POLICY', collection_policy, label=u'COLLECTION_POLICY',
                mimeType=u'text/xml', controlGroup=u'X',
                logMessage=u'Added basic COLLECTION_POLICY data.')
                logging.info('Added COLLECTION_POLICY datastream to:' + collection_pid)
            except FedoraConnectionException:
                logging.error('Error in adding COLLECTION_POLICY datastream to:' + collection_pid + '\n')
            
            #add relationships
            collection_object_RELS_EXT = fedora_relationships.rels_ext(collection_object, fedora_model_namespace)
            collection_object_RELS_EXT.addRelationship('isMemberOfCollection','islandora:root')
            collection_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'),'islandora:collectionCModel')
            collection_object_RELS_EXT.update()

    #loop through the mods folder
    for mods_file in mods_files:
        if mods_file.endswith('-MODS.xml'):
            #get mods file contents
            mods_file_path = os.path.join(source_directory, 'mods', mods_file)
            mods_file_handle = open(mods_file_path)
            mods_contents = mods_file_handle.read()
           
            #get map_label from mods title
            mods_tree = etree.parse(mods_file_path)
            map_label = mods_tree.xpath("*[local-name() = 'titleInfo']/*[local-name() = 'title']/text()")
            map_label = map_label[0]
            if len(map_label) > 255:
                    controlGroup=u'X',
                    logMessage=u'Added basic COLLECTION_POLICY data.')
                logging.info('Added COLLECTION_POLICY datastream to:' +
                             collection_pid)
            except FedoraConnectionException:
                logging.error(
                    'Error in adding COLLECTION_POLICY datastream to:' +
                    collection_pid + '\n')

            #add relationships
            collection_object_RELS_EXT = fedora_relationships.rels_ext(
                collection_object, fedora_model_namespace)
            collection_object_RELS_EXT.addRelationship('isMemberOf',
                                                       'islandora:root')
            collection_object_RELS_EXT.addRelationship(
                fedora_relationships.rels_predicate('fedora-model',
                                                    'hasModel'),
                'islandora:collectionCModel')
            collection_object_RELS_EXT.update()

    #loop through the mods folder
    for mods_file in mods_files:
        if mods_file.endswith('MODS.xml'):
            #get mods file contents
            mods_file_path = os.path.join(source_directory, 'mods-xml',
                                          mods_file)
            mods_file_handle = open(mods_file_path)
            mods_contents = mods_file_handle.read()

            #get book_label from mods title
            mods_tree = etree.parse(mods_file_path)
            book_label = mods_tree.xpath(
def handle_clip_mods(clip_mods_parser, mods_file_name):
    '''
    This function will handle the creation of clip objects
    @param clip_mods_parser
      The etree xml parser to get ingest data from 
    @return boolean
      True on success, false if something was wrong
    '''
    clip_pid = fedora.getNextPID(name_space)

    high_resolution_mov_path = get_file_path_from_xpath(
        clip_mods_parser,
        "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='High Quality Video']"
    )
    low_resolution_mov_path = get_file_path_from_xpath(
        clip_mods_parser,
        "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Web Quality Video']"
    )

    clip_number = mods_file_name[mods_file_name.find('-cp') +
                                 3:mods_file_name.rfind('.')]
    clip_number = clip_number.replace('-sub', '')

    clip_label = unicode(movie_name + '_' +
                         mods_file_name[mods_file_name.find('-') +
                                        1:mods_file_name.rfind('.')])
    clip_object = fedora.createObject(clip_pid, label=clip_label)
    #datastreams
    add_MODS_datastream(clip_object, mods_file_path)
    if high_resolution_mov_path:
        hires_file_handle = open(high_resolution_mov_path, 'rb')
        try:
            clip_object.addDataStream(u'HIGHRES',
                                      u'aTmpStr',
                                      label=u'HIGHRES',
                                      mimeType=u'video/quicktime',
                                      controlGroup=u'M',
                                      logMessage=u'Added HIGHRES datastream.')
            datastream = clip_object['HIGHRES']
            datastream.setContent(hires_file_handle)
            logging.info('Added HIGHRES datastream to:' + clip_pid)
        except FedoraConnectionException:
            logging.error('Error in adding HIGHRES datastream to:' + clip_pid +
                          '\n')
        hires_file_handle.close()

    if low_resolution_mov_path:
        lowres_file_handle = open(low_resolution_mov_path, 'rb')
        try:
            clip_object.addDataStream(u'LOWRES',
                                      u'aTmpStr',
                                      label=u'LOWRES',
                                      mimeType=u'video/quicktime',
                                      controlGroup=u'M',
                                      logMessage=u'Added LOWRES datastream.')
            datastream = clip_object['LOWRES']
            datastream.setContent(lowres_file_handle)
            logging.info('Added LOWRES datastream to:' + clip_pid)
        except FedoraConnectionException:
            logging.error('Error in adding LOWRES datastream to:' + clip_pid +
                          '\n')
        lowres_file_handle.close()

    #relationships
    clip_object_RELS_EXT = fedora_relationships.rels_ext(
        clip_object, [hamilton_rdf_name_space, fedora_model_namespace])
    clip_object_RELS_EXT.addRelationship(
        fedora_relationships.rels_predicate('hamilton', 'isClipOf'), movie_pid)
    clip_object_RELS_EXT.addRelationship(
        fedora_relationships.rels_predicate('hamilton', 'isClipNumber'),
        fedora_relationships.rels_object(
            str(clip_number), fedora_relationships.rels_object.LITERAL))

    global clips_to_pids
    clips_to_pids[mods_file_name] = clip_pid

    #this section handles the diferent types of clips (subs or not)
    if not '-sub' in mods_file_name:
        #add relationships
        clip_object_RELS_EXT.addRelationship(
            fedora_relationships.rels_predicate('fedora-model', 'hasModel'),
            name_space + ':benshiClip')
        clip_object_RELS_EXT.update()
        return True
    else:
        #add relationships
        master_clip_file_name = mods_file_name.replace('-sub', '')
        clip_object_RELS_EXT.addRelationship(
            fedora_relationships.rels_predicate('hamilton', 'isSubOf'),
            clips_to_pids[master_clip_file_name])
        clip_object_RELS_EXT.addRelationship(
            fedora_relationships.rels_predicate('fedora-model', 'hasModel'),
            name_space + ':benshiClipSubbed')
        clip_object_RELS_EXT.update()
        return True
    return False
def handle_transcript_mods(transcript_mods_parser, mods_file_name):
    '''
    This function will handle the creation of clip objects
    @param transcript_mods_parser
      The etree xml parser to get ingest data from 
    @return boolean
      True on success, false if something was wrong
    '''

    transcript_pid = fedora.getNextPID(name_space)
    transcript_label = unicode(movie_name + '_' +
                               mods_file_name[mods_file_name.find('-tr-') +
                                              4:mods_file_name.rfind('.')])
    transcript_object = fedora.createObject(transcript_pid,
                                            label=transcript_label)
    transcript_object_RELS_EXT = fedora_relationships.rels_ext(
        transcript_object, [hamilton_rdf_name_space, fedora_model_namespace])
    transcript_path = get_file_path_from_xpath(
        transcript_mods_parser,
        "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Document']"
    )
    time_synced_transcript_path = get_file_path_from_xpath(
        transcript_mods_parser,
        "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Document with time-sync encoding']"
    )

    #datastreams
    add_MODS_datastream(transcript_object, mods_file_path)

    if time_synced_transcript_path:
        time_synced_transcript_handle = open(time_synced_transcript_path, 'rb')
        try:
            transcript_object.addDataStream(
                u'TimeSyncedTranscript',
                u'aTmpStr',
                label=u'POPCORN',
                mimeType=u'application/xml',
                controlGroup=u'M',
                logMessage=u'Added TimeSyncedTranscript datastream.')
            datastream = transcript_object['TimeSyncedTranscript']
            datastream.setContent(time_synced_transcript_handle)
            logging.info('Added TimeSyncedTranscript datastream to:' +
                         transcript_pid)
        except FedoraConnectionException:
            logging.error(
                'Error in adding TimeSyncedTranscript datastream to:' +
                transcript_pid + '\n')
        time_synced_transcript_handle.close()

    if transcript_path:
        pdf_file_handle = open(transcript_path, 'rb')
        try:
            transcript_object.addDataStream(
                u'PDF',
                u'aTmpStr',
                label=u'PDF',
                mimeType=u'application/pdf',
                controlGroup=u'M',
                logMessage=u'Added PDF datastream.')
            datastream = transcript_object['PDF']
            datastream.setContent(pdf_file_handle)
            logging.info('Added PDF datastream to:' + transcript_pid)
        except FedoraConnectionException:
            logging.error('Error in adding PDF datastream to:' +
                          transcript_pid + '\n')
        pdf_file_handle.close()

    #relationships
    #handle is transcript of
    transcript_clip_element_list = transcript_mods_parser.xpath(
        "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Video clip']"
    )
    if len(
            transcript_clip_element_list
    ) == 1:  #there are multiple related items if the transcript is for the whole movie
        transcript_clip_file_name = transcript_clip_element_list[0].text
        transcript_object_RELS_EXT.addRelationship(
            fedora_relationships.rels_predicate('hamilton', 'isTranscriptOf'),
            clips_to_pids[transcript_clip_file_name])
    else:
        transcript_object_RELS_EXT.addRelationship(
            fedora_relationships.rels_predicate('hamilton', 'isTranscriptOf'),
            movie_pid)

    #handle the 3 different transcript types
    if '-jpneng' in mods_file_name:
        transcript_object_RELS_EXT.addRelationship(
            fedora_relationships.rels_predicate('fedora-model', 'hasModel'),
            name_space + ':EnglishJapaneseTranscript')
    elif '-jpn' in mods_file_name:
        transcript_object_RELS_EXT.addRelationship(
            fedora_relationships.rels_predicate('fedora-model', 'hasModel'),
            name_space + ':JapaneseTranscript')
    elif '-eng' in mods_file_name:
        transcript_object_RELS_EXT.addRelationship(
            fedora_relationships.rels_predicate('fedora-model', 'hasModel'),
            name_space + ':EnglishTranscript')
    else:
        return False

    transcript_object_RELS_EXT.update()

    return True
def handle_still_mods(still_mods_parser, mods_file_name):
    '''
    This function will handle the creation of clip objects
    @param still_mods_parser
      The etree xml parser to get ingest data from 
    @return boolean
      True on success, false if something was wrong
    '''
    still_path = get_file_path_from_xpath(
        still_mods_parser,
        "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Still Image']"
    )

    still_pid = fedora.getNextPID(name_space)
    still_label = unicode(movie_name + '_' +
                          mods_file_name[mods_file_name.find('-') +
                                         1:mods_file_name.rfind('.')])
    still_object = fedora.createObject(still_pid, label=still_label)
    still_object_RELS_EXT = fedora_relationships.rels_ext(
        still_object, [hamilton_rdf_name_space, fedora_model_namespace])

    #datastreams
    add_MODS_datastream(still_object, mods_file_path)
    if still_path:
        png_file_handle = open(still_path, 'rb')
        try:
            still_object.addDataStream(u'PNG',
                                       u'aTmpStr',
                                       label=u'PNG',
                                       mimeType=u'image/png',
                                       controlGroup=u'M',
                                       logMessage=u'Added PNG datastream.')
            datastream = still_object['PNG']
            datastream.setContent(png_file_handle)
            logging.info('Added PNG datastream to:' + still_pid)
        except FedoraConnectionException:
            logging.error('Error in adding PNG datastream to:' + still_pid +
                          '\n')
        png_file_handle.close()

        #relationships
        still_clip_element_list = still_mods_parser.xpath(
            "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Video clip']"
        )
        if still_clip_element_list:
            still_clip_file_name = still_clip_element_list[0].text
            still_object_RELS_EXT.addRelationship(
                fedora_relationships.rels_predicate('hamilton', 'isStillOf'),
                clips_to_pids[still_clip_file_name])
        else:
            still_object_RELS_EXT.addRelationship(
                fedora_relationships.rels_predicate('hamilton', 'isStillOf'),
                movie_pid)

        still_object_RELS_EXT.addRelationship(
            fedora_relationships.rels_predicate('fedora-model', 'hasModel'),
            name_space + ':benshiStill')
        still_object_RELS_EXT.update()

        return True
    return False
def handle_misc_mods(misc_mods_parser, mods_file_name):
    '''
    This function will handle the creation of clip objects
    @param misc_mods_parser
      The etree xml parser to get ingest data from 
    @return boolean
      True on success, false if something was wrong
    '''
    misc_type_list = misc_mods_parser.xpath(
        "//*[local-name() = 'mods']//*[local-name() = 'genre'][@type='local']")
    if misc_type_list:
        misc_type = misc_type_list[0].text
        print(misc_type)
        if misc_type == 'sound recording':  #fix up benshi object
            #datastreams
            add_MODS_datastream(benshi_object, mods_file_path)
            audio_file_path = get_file_path_from_xpath(
                misc_mods_parser,
                "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Audio']"
            )

            if audio_file_path:
                audio_file_handle = open(audio_file_path, 'rb')
                try:
                    benshi_object.addDataStream(
                        u'MP3',
                        u'aTmpStr',
                        label=u'MP3',
                        mimeType=u'audio/mpeg',
                        controlGroup=u'M',
                        logMessage=u'Added MP3 datastream.')
                    datastream = benshi_object['MP3']
                    datastream.setContent(audio_file_handle)
                    logging.info('Added MP3 datastream to:' + benshi_pid)
                except FedoraConnectionException:
                    logging.error('Error in adding MP3 datastream to:' +
                                  benshi_pid + '\n')
                audio_file_handle.close()
                print(audio_file_path)

        elif misc_type == 'essay':
            misc_pid = fedora.getNextPID(name_space)
            misc_label = unicode(movie_name + '_' + misc_type)
            misc_object = fedora.createObject(misc_pid, label=misc_label)
            misc_object_RELS_EXT = fedora_relationships.rels_ext(
                misc_object, [hamilton_rdf_name_space, fedora_model_namespace])
            #datastreams
            add_MODS_datastream(misc_object, mods_file_path)

            essay_file_path = get_file_path_from_xpath(
                misc_mods_parser,
                "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Article']"
            )
            if essay_file_path:
                essay_file_handle = open(essay_file_path, 'rb')
                try:
                    misc_object.addDataStream(
                        u'DOCX',
                        u'aTmpStr',
                        label=u'DOCX',
                        mimeType=
                        u'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
                        controlGroup=u'M',
                        logMessage=u'Added DOCX datastream.')
                    datastream = misc_object['DOCX']
                    datastream.setContent(essay_file_handle)
                    logging.info('Added DOCX datastream to:' + misc_pid)
                except FedoraConnectionException:
                    logging.error('Error in adding DOCX datastream to:' +
                                  misc_pid + '\n')
                essay_file_handle.close()

            #relationships
            misc_object_RELS_EXT.addRelationship(
                fedora_relationships.rels_predicate('hamilton', 'isEssayOf'),
                movie_pid)
            misc_object_RELS_EXT.addRelationship(
                fedora_relationships.rels_predicate('fedora-model',
                                                    'hasModel'),
                name_space + ':benshiEssay')
            misc_object_RELS_EXT.update()

        elif misc_type == 'presentation':
            misc_pid = fedora.getNextPID(name_space)
            misc_label = unicode(movie_name + '_' + misc_type)
            misc_object = fedora.createObject(misc_pid, label=misc_label)
            misc_object_RELS_EXT = fedora_relationships.rels_ext(
                misc_object, [hamilton_rdf_name_space, fedora_model_namespace])
            #datastreams
            add_MODS_datastream(misc_object, mods_file_path)

            presentation_file_path = get_file_path_from_xpath(
                misc_mods_parser,
                "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Presentation']"
            )
            if presentation_file_path:
                presentation_file_handle = open(presentation_file_path, 'rb')
                try:
                    misc_object.addDataStream(
                        u'PPTX',
                        u'aTmpStr',
                        label=u'PPTX',
                        mimeType=
                        u'application/vnd.openxmlformats-officedocument.presentationml.presentation',
                        controlGroup=u'M',
                        logMessage=u'Added PPTX datastream.')
                    datastream = misc_object['PPTX']
                    datastream.setContent(presentation_file_handle)
                    logging.info('Added PPTX datastream to:' + misc_pid)
                except FedoraConnectionException:
                    logging.error('Error in adding PPTX datastream to:' +
                                  misc_pid + '\n')
                presentation_file_handle.close()

            #relationships
            misc_object_RELS_EXT.addRelationship(
                fedora_relationships.rels_predicate('hamilton',
                                                    'isPresentationOf'),
                movie_pid)
            misc_object_RELS_EXT.addRelationship(
                fedora_relationships.rels_predicate('fedora-model',
                                                    'hasModel'),
                name_space + ':benshiPresentation')
            misc_object_RELS_EXT.update()

        #movie gets the opac redirect it's special
        elif misc_type == 'Motion Picture':  #fix up movie object
            #datastreams
            add_MODS_datastream(movie_object, mods_file_path)

            opac_path_list = misc_mods_parser.xpath(
                "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url']"
            )
            opac_path = opac_path_list[0].text
            if opac_path:
                try:
                    movie_object.addDataStream(
                        u'OPAC',
                        u'aTmpStr',
                        label=u'OPAC',
                        mimeType=u'text/html',
                        controlGroup=u'R',
                        location=unicode(opac_path),
                        logMessage=u'Added OPAC datastream.')
                    logging.info('Added OPAC datastream to:' + movie_pid)
                except FedoraConnectionException:
                    logging.error('Error in adding OPAC datastream to:' +
                                  movie_pid + '\n')

            #biography is special it has a docx and a pdf
            #can't use 'get_file_path_from_xpath' these are different then the rest, can change it or handle things here (handle things here, dev_speed)
        elif misc_type == 'biography':
            misc_pid = fedora.getNextPID(name_space)
            misc_label = unicode(movie_name + '_Narrator')
            misc_object = fedora.createObject(misc_pid, label=misc_label)
            misc_object_RELS_EXT = fedora_relationships.rels_ext(
                misc_object, [hamilton_rdf_name_space, fedora_model_namespace])

            #get the paths for the pdf/docx
            docx_file_path = False
            pdf_file_path = False
            path_list = misc_mods_parser.xpath(
                "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Article']"
            )
            if path_list:
                for path_element in path_list:
                    if 'currently unavailable' not in path_element:
                        if '.docx' in path_element.text:
                            docx_file_path = os.path.normpath(
                                os.path.join(mods_directory,
                                             path_element.text))
                        elif 'pdf' in path_element.text:
                            pdf_file_path = os.path.normpath(
                                os.path.join(mods_directory,
                                             path_element.text))

            #datastreams
            add_MODS_datastream(misc_object, mods_file_path)

            if docx_file_path:
                docx_file_handle = open(docx_file_path, 'rb')
                try:
                    misc_object.addDataStream(
                        u'DOCX',
                        u'aTmpStr',
                        label=u'DOCX',
                        mimeType=
                        u'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
                        controlGroup=u'M',
                        logMessage=u'Added DOCX datastream.')
                    datastream = misc_object['DOCX']
                    datastream.setContent(docx_file_handle)
                    logging.info('Added DOCX datastream to:' + misc_pid)
                except FedoraConnectionException:
                    logging.error('Error in adding DOCX datastream to:' +
                                  misc_pid + '\n')
                docx_file_handle.close()

            if pdf_file_path:
                pdf_file_handle = open(pdf_file_path, 'rb')
                try:
                    misc_object.addDataStream(
                        u'PDF',
                        u'aTmpStr',
                        label=u'PDF',
                        mimeType=u'application/pdf',
                        controlGroup=u'M',
                        logMessage=u'Added PDF datastream.')
                    datastream = misc_object['PDF']
                    datastream.setContent(pdf_file_handle)
                    logging.info('Added PDF datastream to:' + misc_pid)
                except FedoraConnectionException:
                    logging.error('Error in adding PDF datastream to:' +
                                  misc_pid + '\n')
                pdf_file_handle.close()

            #relationships
            misc_object_RELS_EXT.addRelationship(
                fedora_relationships.rels_predicate('hamilton',
                                                    'isNarratorOf'),
                benshi_pid)
            misc_object_RELS_EXT.addRelationship(
                fedora_relationships.rels_predicate('fedora-model',
                                                    'hasModel'),
                name_space + ':benshiNarrator')
            misc_object_RELS_EXT.update()

        else:
            return False

    return True
Ejemplo n.º 18
0
 def __processConcert(self):
     logger = logging.getLogger('ingest.atm_concert.Concert.__processConcert')
     #Get the/an object
     try:
         pid = FedoraWrapper.getPid(uri=Concert.NS['fjm-db'].uri, predicate='concertID', obj="'%s'" % self.dbid)
         if pid:
             logger.warning('Concert %s found as %s.  Overwriting DSs!' % (self.dbid, pid))
             concert = FedoraWrapper.client.getObject(pid)
     except KeyError:
         concert = FedoraWrapper.getNextObject(prefix=self.prefix, label="concert %s" % self.dbid)
     
     
     logger.info('Adding CustomXML datastream')
     if Concert.save_etree(concert, self.element, 'CustomXML', 'Original XML', controlGroup='M'):
         logger.info('CustomXML added successfully')
     else:
         logger.error('Error while adding CustomXML!')
     
     #Ingest the WAV (if it exists...)
     WAV = self.element.findtext('Grabacion/wav')
     if WAV:
         WAV = self.getPath(WAV)
         if path.exists(WAV):
             update_datastream(obj=concert, dsid='WAV', filename=WAV, 
                 label='WAV', mimeType="audio/x-wav")
         else:
             logger.warning('WAV file specified (%s), but does not exist!', WAV)
     else:
         logger.warning('No WAV found at %s!  Skipping...', WAV)
     
     #Ingest the MARCXML...  FIXME: Maybe this might not make sense to attempt, if there's no WAV?
     MARC = path.join(path.dirname(WAV), '%s.xml' % self.dbid)
     if path.exists(MARC):
         update_datastream(obj=concert, dsid='MARCXML', mimeType="application/xml", filename=MARC)
         logger.debug('Added %s', MARC)
     else:
         logger.debug('Couldn\'t find MARCXML at %s', MARC)
     
     cycle_info = {
         'norm_name': Concert.normalize_name([self.element.findtext('tipo')])
     }
     
     #Create cycle stuff
     try:
         pid = Concert.__cycles()[cycle_info['norm_name']]
         cycle = FedoraWrapper.client.getObject(pid)
     except KeyError:
         cycle = FedoraWrapper.getNextObject(prefix=self.prefix, label='Cycle %s' % (len(Concert.__cycles()) + 1))
         Concert.__cycles()[cycle_info['norm_name']] = cycle.pid
         c_dc = dict()
         c_dc['title'] = [cycle_info['norm_name']]
         Concert.save_dc(cycle, c_dc)
     FedoraWrapper.addRelationshipsWithoutDup(rels=[
         (
             FR.rels_predicate(alias='fedora-model', predicate='hasModel'),
             FR.rels_object('atm:concertCycleCModel', FR.rels_object.PID)
         )
     ], fedora=cycle).update()
     
     
     #Add relations to concert object
     rels_ext = FR.rels_ext(obj=concert, namespaces=ao.NS.values())
     rels = [
         #Don't know that this one is necessary...  Oh well...
         (
             FR.rels_predicate(alias='fjm-db', predicate='concertID'),
             FR.rels_object(self.dbid, FR.rels_object.LITERAL)
         ),
         (
             FR.rels_predicate(alias='fedora-rels-ext', predicate='isMemberOfCollection'),
             FR.rels_object('atm:concertCollection', FR.rels_object.PID)
         ),
         (
             FR.rels_predicate(alias='fedora-rels-ext', predicate='isMemberOf'),
             FR.rels_object(cycle.pid, FR.rels_object.PID)
         ),
         (
             FR.rels_predicate(alias='fedora-model', predicate='hasModel'),
             FR.rels_object('atm:concertCModel', FR.rels_object.PID)
         )
     ]
     
     #Write 'out' rels_ext
     FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=rels_ext).update()
     
     desc = self.element.findtext('Descripcion')
     dc = dict()
     dc['type'] = [unicode('Event')]
     if desc:
         dc['description'] = [unicode(desc)]
     dc['title'] = [unicode(Concert.normalize_name([self.element.findtext('titulo')]))]
     Concert.save_dc(concert, dc)
     
     self.concert_obj = concert
     concert.state = unicode('A')
Ejemplo n.º 19
0
    def process(self):
        logger = self.logger
        logger.info('Starting to ingest: Performer %s' % self.dbid)
        
        try:
            logger.info('Checking to see if %s already exists in Fedora' % self.norm_name)
            pid = self[self.norm_name]
            logger.info('Found %(pid)s' % {'pid': pid})
            if pid:
                logger.warning('%(name)s already exists as pid %(pid)s!' % {'name': self.norm_name, 'pid': pid})
                self.performer = FedoraWrapper.client.getObject(pid)
            else:
                msg = 'Something went horribly wrong!  Found a pid (%(pid)s), but couldn\'t access it...' % {'pid': pid}
                logger.error(msg)
                raise Exception(msg)
        except KeyError:
            try:
                logger.debug('Not known by name, checking by performerID')
                pid = FedoraWrapper.getPid(uri=Performer.NS['fjm-db'].uri, predicate='performerID', obj="'%s'" % self.dbid)
                logger.info('Found %(pid)s' % {'pid': pid})
                if pid:
                    logger.warning('%(name)s already exists as pid %(pid)s! Overwriting DC DS!' % {'name': self.norm_name, 'pid': pid})
                    self.performer = FedoraWrapper.client.getObject(pid)
                else:
                    msg = 'Something went horribly wrong!  Found a pid (%(pid)s), but couldn\'t access it...' % {'pid': pid}
                    logger.error(msg)
                    raise Exception(msg)
            except KeyError:
                logger.info('Doesn\'t exist: creating a new Fedora Object')
                self.performer = FedoraWrapper.getNextObject(self.prefix, label='Performer: %s' % self.dbid)
                
            dc = dict()
            dc['title'] = [self.norm_name]
            Performer.save_dc(self.performer, dc)

        rels_ext = FR.rels_ext(self.performer, namespaces=Performer.NS.values())
        rels = [
            (
                FR.rels_predicate(alias='fedora-model', predicate='hasModel'),
                FR.rels_object('atm:personCModel', FR.rels_object.PID)
            ),
            (
                FR.rels_predicate(alias='fjm-db', predicate='performerID'),
                FR.rels_object(self.dbid, FR.rels_object.LITERAL)
            )
        ]
        
        FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=rels_ext).update()
            
        #Yay Pythonic-ness?  Try to get an existing EAC-CPF, or create one if none is found
        try:
            eaccpf = CPF.EACCPF(self.performer.pid, xml=self.performer['EAC-CPF'].getContent().read())
            event_type="modified"
        except fcrepo.connection.FedoraConnectionException as e:
            if e.httpcode == 404:
                eaccpf = CPF.EACCPF(self.performer.pid)
                event_type="created"
            else:
                raise e
        eaccpf.add_maintenance_event(type=event_type, time="now", agent_type="machine", agent="atm_performer.py")
        eaccpf.add_XML_source(caption='XML from database dump', xml=self.element)
        eaccpf.add_name_entry(name=self.name)
        
        Performer.save_etree(self.performer, eaccpf.element, 'EAC-CPF', 'EAC-CPF record', controlGroup='M')
        
        self[self.norm_name] = self.performer.pid
        self.performer.state = unicode('A')

        FedoraWrapper.correlateDBEntry('player', 'performerID')
def handle_clip_mods(clip_mods_parser, mods_file_name):
    '''
    This function will handle the creation of clip objects
    @param clip_mods_parser
      The etree xml parser to get ingest data from 
    @return boolean
      True on success, false if something was wrong
    '''    
    clip_pid = fedora.getNextPID(name_space)
    
    high_resolution_mov_path = get_file_path_from_xpath(clip_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='High Quality Video']")
    low_resolution_mov_path = get_file_path_from_xpath(clip_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Web Quality Video']")
    

    clip_number =  mods_file_name[mods_file_name.find('-cp') + 3:mods_file_name.rfind('.')]
    clip_number = clip_number.replace('-sub','')
    
    clip_label=unicode(movie_name + '_' + mods_file_name[mods_file_name.find('-')+1:mods_file_name.rfind('.')])
    clip_object = fedora.createObject(clip_pid, label = clip_label)
    #datastreams
    add_MODS_datastream(clip_object, mods_file_path)
    if high_resolution_mov_path:
        hires_file_handle = open(high_resolution_mov_path, 'rb')
        try:
            clip_object.addDataStream(u'HIGHRES', u'aTmpStr', label = u'HIGHRES',
            mimeType = u'video/quicktime', controlGroup = u'M',
            logMessage = u'Added HIGHRES datastream.')
            datastream = clip_object['HIGHRES']
            datastream.setContent(hires_file_handle)
            logging.info('Added HIGHRES datastream to:' + clip_pid)
        except FedoraConnectionException:
            logging.error('Error in adding HIGHRES datastream to:' + clip_pid + '\n')
        hires_file_handle.close()
    
    if low_resolution_mov_path:
        lowres_file_handle = open(low_resolution_mov_path, 'rb')
        try:
            clip_object.addDataStream(u'LOWRES', u'aTmpStr', label=u'LOWRES',
            mimeType = u'video/quicktime', controlGroup = u'M',
            logMessage = u'Added LOWRES datastream.')
            datastream = clip_object['LOWRES']
            datastream.setContent(lowres_file_handle)
            logging.info('Added LOWRES datastream to:' + clip_pid)
        except FedoraConnectionException:
            logging.error('Error in adding LOWRES datastream to:' + clip_pid + '\n')
        lowres_file_handle.close()
    
    #relationships
    clip_object_RELS_EXT = fedora_relationships.rels_ext(clip_object,[hamilton_rdf_name_space, fedora_model_namespace])
    clip_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isClipOf'), movie_pid)
    clip_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isClipNumber'), fedora_relationships.rels_object(str(clip_number), fedora_relationships.rels_object.LITERAL))
    
    global clips_to_pids
    clips_to_pids[mods_file_name] = clip_pid
    
    #this section handles the diferent types of clips (subs or not)
    if not '-sub' in mods_file_name:
        #add relationships
        clip_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':benshiClip')
        clip_object_RELS_EXT.update()
        return True
    else:
        #add relationships
        master_clip_file_name = mods_file_name.replace('-sub','')
        clip_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isSubOf'), clips_to_pids[master_clip_file_name])
        clip_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':benshiClipSubbed')
        clip_object_RELS_EXT.update()
        return True
    return False
def handle_misc_mods(misc_mods_parser, mods_file_name):
    '''
    This function will handle the creation of clip objects
    @param misc_mods_parser
      The etree xml parser to get ingest data from 
    @return boolean
      True on success, false if something was wrong
    '''
    misc_type_list = misc_mods_parser.xpath("//*[local-name() = 'mods']//*[local-name() = 'genre'][@type='local']")
    if misc_type_list:
        misc_type = misc_type_list[0].text
        print(misc_type)
        if misc_type == 'sound recording':#fix up benshi object
            #datastreams
            add_MODS_datastream(benshi_object, mods_file_path)
            audio_file_path = get_file_path_from_xpath(misc_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Audio']")
            
            if audio_file_path:
                audio_file_handle = open(audio_file_path, 'rb')
                try:
                    benshi_object.addDataStream(u'MP3', u'aTmpStr', label=u'MP3',
                    mimeType = u'audio/mpeg', controlGroup = u'M',
                    logMessage = u'Added MP3 datastream.')
                    datastream = benshi_object['MP3']
                    datastream.setContent(audio_file_handle)
                    logging.info('Added MP3 datastream to:' + benshi_pid)
                except FedoraConnectionException:
                    logging.error('Error in adding MP3 datastream to:' + benshi_pid + '\n')
                audio_file_handle.close()
                print(audio_file_path)
            
        elif misc_type == 'essay':
            misc_pid = fedora.getNextPID(name_space)
            misc_label = unicode(movie_name + '_' + misc_type)
            misc_object = fedora.createObject(misc_pid, label = misc_label)
            misc_object_RELS_EXT = fedora_relationships.rels_ext(misc_object,[hamilton_rdf_name_space, fedora_model_namespace])
            #datastreams
            add_MODS_datastream(misc_object, mods_file_path)
            
            essay_file_path = get_file_path_from_xpath(misc_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Article']")
            if essay_file_path:
                essay_file_handle = open(essay_file_path, 'rb')
                try:
                    misc_object.addDataStream(u'DOCX', u'aTmpStr', label=u'DOCX',
                    mimeType = u'application/vnd.openxmlformats-officedocument.wordprocessingml.document', controlGroup = u'M',
                    logMessage = u'Added DOCX datastream.')
                    datastream = misc_object['DOCX']
                    datastream.setContent(essay_file_handle)
                    logging.info('Added DOCX datastream to:' + misc_pid)
                except FedoraConnectionException:
                    logging.error('Error in adding DOCX datastream to:' + misc_pid + '\n')
                essay_file_handle.close()
            
            #relationships
            misc_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isEssayOf'), movie_pid)
            misc_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':benshiEssay')
            misc_object_RELS_EXT.update()
            
        elif misc_type == 'presentation':
            misc_pid = fedora.getNextPID(name_space)
            misc_label = unicode(movie_name + '_' + misc_type)
            misc_object = fedora.createObject(misc_pid, label = misc_label)
            misc_object_RELS_EXT = fedora_relationships.rels_ext(misc_object,[hamilton_rdf_name_space, fedora_model_namespace])
            #datastreams
            add_MODS_datastream(misc_object, mods_file_path)
            
            presentation_file_path = get_file_path_from_xpath(misc_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Presentation']")
            if presentation_file_path:
                presentation_file_handle = open(presentation_file_path, 'rb')
                try:
                    misc_object.addDataStream(u'PPTX', u'aTmpStr', label=u'PPTX',
                    mimeType = u'application/vnd.openxmlformats-officedocument.presentationml.presentation', controlGroup = u'M',
                    logMessage = u'Added PPTX datastream.')
                    datastream = misc_object['PPTX']
                    datastream.setContent(presentation_file_handle)
                    logging.info('Added PPTX datastream to:' + misc_pid)
                except FedoraConnectionException:
                    logging.error('Error in adding PPTX datastream to:' + misc_pid + '\n')
                presentation_file_handle.close()
            
            #relationships
            misc_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isPresentationOf'), movie_pid)
            misc_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':benshiPresentation')
            misc_object_RELS_EXT.update()
            
        #movie gets the opac redirect it's special
        elif misc_type == 'Motion Picture':#fix up movie object
            #datastreams
            add_MODS_datastream(movie_object, mods_file_path)
            
            opac_path_list = misc_mods_parser.xpath("//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url']")
            opac_path = opac_path_list[0].text
            if opac_path:
                try:
                    movie_object.addDataStream(u'OPAC', u'aTmpStr', label = u'OPAC',
                    mimeType = u'text/html', controlGroup = u'R',
                    location = unicode(opac_path),
                    logMessage = u'Added OPAC datastream.')
                    logging.info('Added OPAC datastream to:' + movie_pid)
                except FedoraConnectionException:
                    logging.error('Error in adding OPAC datastream to:' + movie_pid + '\n')
            
            #biography is special it has a docx and a pdf
            #can't use 'get_file_path_from_xpath' these are different then the rest, can change it or handle things here (handle things here, dev_speed)
        elif misc_type == 'biography':
            misc_pid = fedora.getNextPID(name_space)
            misc_label = unicode(movie_name + '_Narrator')
            misc_object = fedora.createObject(misc_pid, label = misc_label)
            misc_object_RELS_EXT = fedora_relationships.rels_ext(misc_object,[hamilton_rdf_name_space, fedora_model_namespace])
            
            #get the paths for the pdf/docx
            docx_file_path = False
            pdf_file_path = False
            path_list = misc_mods_parser.xpath("//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Article']")
            if path_list:
                for path_element in path_list:
                    if 'currently unavailable' not in path_element:
                        if '.docx' in path_element.text:
                            docx_file_path = os.path.normpath(os.path.join(mods_directory, path_element.text))
                        elif 'pdf' in path_element.text:
                            pdf_file_path = os.path.normpath(os.path.join(mods_directory, path_element.text))
            
            
            #datastreams
            add_MODS_datastream(misc_object, mods_file_path) 
            
            if docx_file_path:
                docx_file_handle = open(docx_file_path, 'rb')
                try:
                    misc_object.addDataStream(u'DOCX', u'aTmpStr', label = u'DOCX',
                    mimeType = u'application/vnd.openxmlformats-officedocument.wordprocessingml.document', controlGroup = u'M',
                    logMessage = u'Added DOCX datastream.')
                    datastream = misc_object['DOCX']
                    datastream.setContent(docx_file_handle)
                    logging.info('Added DOCX datastream to:' + misc_pid)
                except FedoraConnectionException:
                    logging.error('Error in adding DOCX datastream to:' + misc_pid + '\n')
                docx_file_handle.close()
            
            if pdf_file_path:
                pdf_file_handle = open(pdf_file_path, 'rb')
                try:
                    misc_object.addDataStream(u'PDF', u'aTmpStr', label = u'PDF',
                    mimeType = u'application/pdf', controlGroup = u'M',
                    logMessage = u'Added PDF datastream.')
                    datastream = misc_object['PDF']
                    datastream.setContent(pdf_file_handle)
                    logging.info('Added PDF datastream to:' + misc_pid)
                except FedoraConnectionException:
                    logging.error('Error in adding PDF datastream to:' + misc_pid + '\n')
                pdf_file_handle.close()
            
            #relationships
            misc_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isNarratorOf'), benshi_pid)
            misc_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':benshiNarrator')
            misc_object_RELS_EXT.update()

        else:
            return False
        
    return True
Ejemplo n.º 22
0
    def __processProgram(self):
        p_el = self.element.find('programa')
        
        filename = self.getPath(p_el.findtext('ruta'))
        
        if len(p_el) != 0:
            try:
                pid = FedoraWrapper.getPid(uri=Concert.NS['fjm-db'].uri, predicate='programConcertID', obj="'%s'" % self.dbid)
                program = FedoraWrapper.client.getObject(pid)
            except KeyError:
                #Get a Fedora Object for the program
                program = FedoraWrapper.getNextObject(self.prefix, 
                label='Program for concert %(dbid)s' % {'dbid': self.dbid})
        
            #Add the PDF to the program object...  Should probably do an "existence" check, but anyway...
            update_datastream(obj=program, dsid='PDF', 
                filename=filename,
                mimeType='application/pdf'
            )
            
            #Create the RELS-EXT datastream
            rels_ext = FR.rels_ext(obj=program, namespaces=ao.NS.values())
            rels = [
                (
                    FR.rels_predicate(alias='fedora-rels-ext', predicate='isMemberOf'),
                    FR.rels_object(self.concert_obj.pid, FR.rels_object.PID)
                ),
                (
                    FR.rels_predicate(alias='fedora-model', predicate='hasModel'),
                    FR.rels_object('atm:programCModel', FR.rels_object.PID)
                ),
                (
                    FR.rels_predicate(alias='fjm-db', predicate='programConcertID'),
                    FR.rels_object(self.dbid, FR.rels_object.LITERAL)
                )
            ]
            
            for a_el in p_el.findall('AutorNotas[@id]'):
                fore, sur = a_el.findtext('Nombre'), a_el.findtext('Apellidos')
                normed = self.normalize_name([fore, sur])
                try:
                    pid = Person._people()[normed]
                    author = FedoraWrapper.client.getObject(pid)
                except KeyError:
                    author = FedoraWrapper.getNextObject(self.prefix, label="an author")
                    dc = dict()
                    dc['title'] = [normed]
                    Concert.save_dc(author, dc)
                    
                rels.append(
                    (
                        FR.rels_predicate(alias='atm-rel', predicate='authoredBy'),
                        FR.rels_object(author.pid, FR.rels_object.PID)
                    )
                )
                
                FedoraWrapper.addRelationshipsWithoutDup([
                    (
                        FR.rels_predicate(alias='fedora-model', predicate="hasModel"),
                        FR.rels_object('atm:personCModel', FR.rels_object.PID)
                    )
                ], fedora=author).update()
                
                #Yay Pythonic-ness?  Try to get an existing EAC-CPF, or create one if none is found
                try:
                    #No point in updating if there's already one there...  This is really just a check?
                    eaccpf = CPF.EACCPF(author.pid, xml=author['EAC-CPF'].getContent().read())
                    event_type="modified"
                except fcrepo.connection.FedoraConnectionException, e:
                    if e.httpcode == 404:
                        eaccpf = CPF.EACCPF(author.pid)
                        event_type="created"
                    else:
                        raise e
                    eaccpf.add_maintenance_event(type=event_type, time="now", agent_type="machine", agent="atm_concert.py")
                    eaccpf.add_XML_source(caption='(Slightly modified (Put into an element)) XML from database dump', xml=a_el)
                    eaccpf.add_name_entry(name={'forename': fore, 'surname': sur})

                    Concert.save_etree(author, eaccpf.element, 'EAC-CPF', 'EAC-CPF record', controlGroup='M')
                    author.state = unicode('A')
                
            #XXX: This is seeming particularly less-than-elegant at the moment, creating a 'placeholder' object for composer notes...  Anyway.
            if len(p_el.findall('Notas_Obras/Obra[@id]')) > 0:
                try:
                    pid = '%s:composerText' % self.prefix
                    author = FedoraWrapper.client.getObject(pid)
                except:
                    author = FedoraWrapper.client.createObject(pid, label=unicode('Composer Text'))
                    #Yay Pythonic-ness?  Try to get an existing EAC-CPF, or create one if none is found
                    try:
                        eaccpf = CPF.EACCPF(author.pid, xml=author['EAC-CPF'].getContent().read())
                        event_type="modified"
                    except fcrepo.connection.FedoraConnectionException, e:
                        if e.httpcode == 404:
                            eaccpf = CPF.EACCPF(author.pid)
                            event_type="created"
                        else:
                            raise e
                    eaccpf.add_maintenance_event(type=event_type, time="now", agent_type="machine", agent="atm_concert.py")
                    name = {'forename': 'Texto', 'surname': 'Compositores'}
                    eaccpf.add_name_entry(name=name)
                    
                    dc = dict()
                    dc['title'] = Concert.normalize_name(name)
                    Concert.save_dc(author, dc)
                    Concert.save_etree(author, eaccpf.element, 'EAC-CPF', 'EAC-CPF record', controlGroup='M')
            
                rels.append(
                    (
                        FR.rels_predicate(alias='atm-rel', predicate='authoredBy'),
                        FR.rels_object(author.pid, FR.rels_object.PID)
                    )
                )
                
                author.state = unicode('A')
def handle_transcript_mods(transcript_mods_parser, mods_file_name):
    '''
    This function will handle the creation of clip objects
    @param transcript_mods_parser
      The etree xml parser to get ingest data from 
    @return boolean
      True on success, false if something was wrong
    '''
    
    transcript_pid = fedora.getNextPID(name_space)
    transcript_label = unicode(movie_name + '_' + mods_file_name[mods_file_name.find('-tr-') + 4:mods_file_name.rfind('.')])
    transcript_object = fedora.createObject(transcript_pid, label = transcript_label)
    transcript_object_RELS_EXT = fedora_relationships.rels_ext(transcript_object,[hamilton_rdf_name_space, fedora_model_namespace])
    transcript_path = get_file_path_from_xpath(transcript_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Document']")
    time_synced_transcript_path = get_file_path_from_xpath(transcript_mods_parser,"//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Document with time-sync encoding']")
    
    #datastreams
    add_MODS_datastream(transcript_object, mods_file_path)
        
    if time_synced_transcript_path:
        time_synced_transcript_handle = open(time_synced_transcript_path, 'rb')
        try:
            transcript_object.addDataStream(u'TimeSyncedTranscript', u'aTmpStr', label=u'POPCORN',
            mimeType = u'application/xml', controlGroup = u'M',
            logMessage = u'Added TimeSyncedTranscript datastream.')
            datastream = transcript_object['TimeSyncedTranscript']
            datastream.setContent(time_synced_transcript_handle)
            logging.info('Added TimeSyncedTranscript datastream to:' + transcript_pid)
        except FedoraConnectionException:
            logging.error('Error in adding TimeSyncedTranscript datastream to:' + transcript_pid + '\n')
        time_synced_transcript_handle.close()
        
    if transcript_path:
        pdf_file_handle = open(transcript_path, 'rb')
        try:
            transcript_object.addDataStream(u'PDF', u'aTmpStr', label=u'PDF',
            mimeType = u'application/pdf', controlGroup = u'M',
            logMessage = u'Added PDF datastream.')
            datastream = transcript_object['PDF']
            datastream.setContent(pdf_file_handle)
            logging.info('Added PDF datastream to:' + transcript_pid)
        except FedoraConnectionException:
            logging.error('Error in adding PDF datastream to:' + transcript_pid + '\n')
        pdf_file_handle.close()
    
    #relationships
    #handle is transcript of
    transcript_clip_element_list = transcript_mods_parser.xpath("//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Video clip']")
    if len(transcript_clip_element_list) == 1:#there are multiple related items if the transcript is for the whole movie
        transcript_clip_file_name = transcript_clip_element_list[0].text
        transcript_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isTranscriptOf'), clips_to_pids[transcript_clip_file_name])
    else:
        transcript_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isTranscriptOf'), movie_pid)
        
    #handle the 3 different transcript types
    if '-jpneng' in mods_file_name:
        transcript_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':EnglishJapaneseTranscript')
    elif '-jpn' in mods_file_name:
        transcript_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':JapaneseTranscript')
    elif '-eng' in mods_file_name:
        transcript_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':EnglishTranscript')
    else:
        return False

    transcript_object_RELS_EXT.update()
        
    return True
                    controlGroup=u'X',
                    logMessage=u'Added basic COLLECTION_POLICY data.')
                logging.info('Added COLLECTION_POLICY datastream to:' +
                             collection_pid)
            except FedoraConnectionException:
                logging.error(
                    'Error in adding COLLECTION_POLICY datastream to:' +
                    collection_pid + '\n')

            #add relationships
            collection_object_RELS_EXT = fedora_relationships.rels_ext(
                collection_object, fedora_model_namespace)
            collection_object_RELS_EXT.addRelationship('isMemberOf',
                                                       'islandora:root')
            collection_object_RELS_EXT.addRelationship(
                fedora_relationships.rels_predicate('fedora-model',
                                                    'hasModel'),
                'islandora:collectionCModel')
            collection_object_RELS_EXT.update()
    #put in the benshi Islandora:BenshiMovie content model
    try:
        model_pid = u'islandora:benshiMovie'
        fedora.getObject(model_pid)
    except FedoraConnectionException, object_fetch_exception:
        if object_fetch_exception.httpcode in [404]:
            logging.info('islandora:BenshiMovie missing, creating object.\n')
            model_object = fedora.createObject(model_pid,
                                               label=u'BenshiMovieCModel')
            #add relationships
            model_object_RELS_EXT = fedora_relationships.rels_ext(
                model_object, fedora_model_namespace)
            model_object_RELS_EXT.addRelationship(
Ejemplo n.º 25
0
 def __processPerformance(self, p_el):
     logger = logging.getLogger('ingest.atm_concert.Concert.__processPerformance')
     p_dict = {
         'piece': p_el.get('id_obra'), 
         'concert': self.dbid, 
         'order': p_el.findtext('Posicion')
     }
     
     #TODO:  Bloody well deduplicate (ensure that this object does not already exist in Fedora)
     try:
         pid = FedoraWrapper.getPid(tuples=[
             (Concert.NS['fjm-db'].uri, 'basedOn', "'%s'" % p_dict['piece']), #Not sure if this is really necessary with the other two conditions...
             ('fedora-rels-ext:', 'isMemberOf', "<fedora:%s>" % self.concert_obj.pid), #To ensure that the performance actually belongs to this concert...
             (Concert.NS['atm-rel'].uri, 'concertOrder', "'%s'" % p_dict['order']) #To eliminate the confusion if the same piece is played twice in the same concert.
         ])
         if pid:
             performance = FedoraWrapper.client.getObject(pid)
     except KeyError:
         performance = FedoraWrapper.getNextObject(self.prefix, label='Performance of %(piece)s in %(concert)s' % p_dict)
     
     #Add MP3 to performance (if there is one to add)
     p_mp3 = p_el.findtext('mp3_Obra')
     if p_mp3:
         mp3_path = self.getPath(p_mp3)
         if path.exists(mp3_path):
             update_datastream(obj=performance, dsid='MP3', 
                 filename=mp3_path, mimeType='audio/mpeg')
         else:
             logger.warning('MP3 entry for performance of %(piece)s in concert %(concert)s, but the file does not exist!' % p_dict)
     else:
         logger.debug('No performance MP3 for %(concert)s/%(piece)s' % p_dict)
     
     #Add relationships
     #1  - To concert
     #2  - To score
     #3  - To CM
     #4  - Position in concert
     rels_ext = FR.rels_ext(obj=performance, namespaces=ao.NS.values())
     rels = [
         (
             FR.rels_predicate(alias='fedora-rels-ext', predicate='isMemberOf'),
             FR.rels_object(self.concert_obj.pid, FR.rels_object.PID)
         ),
         (
             FR.rels_predicate(alias='fedora-model', predicate='hasModel'),
             FR.rels_object('atm:performanceCModel', FR.rels_object.PID)
         ),
         (
             FR.rels_predicate(alias='atm-rel', predicate='concertOrder'),
             FR.rels_object(p_dict['order'], FR.rels_object.LITERAL)
         ),
         (
             FR.rels_predicate(alias='fjm-db', predicate='basedOn'),
             FR.rels_object(p_dict['piece'], FR.rels_object.LITERAL)
         )
     ]
     
     #Add relations and commit
     FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=rels_ext).update()
     FedoraWrapper.correlateDBEntry('basedOn', 'scoreID')
     
     #Create objects for any movements within the piece
     for m_el in p_el.findall('Movimientos/Movimiento'):
         m_dict = {
             'concert': p_dict['concert'],
             'piece': p_dict['piece'],
             'id': m_el.get('id'),
             'corder': p_dict['order'],
             'porder': m_el.get('posicion'),
             'name': m_el.findtext('NOMBRE'),
             'MP3': m_el.findtext('mp3_Movimiento'),
             'line': m_el.sourceline,
             'file': self.file_name
         }
         
         #Sanity test
         if m_dict['porder']:
             #Get a Fedora Object for this movement
             try:
                 pid = FedoraWrapper.getPid(tuples=[
                     ('fedora-rels-ext:', 'isMemberOf', '<fedora:%s>' % performance.pid),
                     ('fedora-model:', 'hasModel', '<fedora:atm:movementCModel>'),
                     (Concert.NS['atm-rel'].uri, 'pieceOrder', "'%s'" % m_dict['porder'])
                 ])
                 mov = FedoraWrapper.client.getObject(pid)
             except KeyError:
                 mov = FedoraWrapper.getNextObject(self.prefix, label='Movement: %(concert)s/%(piece)s/%(id)s' % m_dict)
             
             #Get DC and set the title if we have a name.
             mov_dc = dict()
             mov_dc['type'] = [unicode('Event')]
             if m_dict['name']:
                 mov_dc['title'] = [unicode(m_dict['name'])]
             Concert.save_dc(mov, mov_dc)
             
             #Set the three required relations:
             #1 - To the performance
             #2 - To the content model
             #3 - The order this movement occurs within the piece
             m_rels_ext = FR.rels_ext(obj=mov, namespaces=Concert.NS.values())
             m_rels = [
                 (
                     FR.rels_predicate(alias='fedora-rels-ext', predicate='isMemberOf'),
                     FR.rels_object(performance.pid, FR.rels_object.PID)
                 ),
                 (
                     FR.rels_predicate(alias='fedora-model', predicate='hasModel'),
                     FR.rels_object('atm:movementCModel', FR.rels_object.PID)
                 ),
                 (
                     FR.rels_predicate(alias='atm-rel', predicate='pieceOrder'),
                     FR.rels_object(m_dict['porder'], FR.rels_object.LITERAL)
                 )
             ]
             
             FedoraWrapper.addRelationshipsWithoutDup(m_rels, rels_ext=m_rels_ext).update()
             
             #Add the MP3 (if it exists)
             if m_dict['MP3']:
                 mp3_path = self.getPath(m_dict['MP3'])
                 if path.exists(mp3_path):
                     update_datastream(obj=mov, dsid='MP3', 
                         filename=mp3_path, mimeType='audio/mpeg')
                 else:
                     logger.warning("MP3 entry for movement %(id)s in performance of %(piece)s in %(concert)s on line %(line)s of %(file)s" % m_dict)
             else:
                 logger.debug('No movement MP3 for %(concert)s/%(piece)s/%(id)s on line %(line)s of %(file)s' % m_dict)
         else:
             logger.error('Movement %(concert)s/%(piece)s/%(id)s does not have a position near line %(line)s of %(file)s!' % m_dict)
     #Done with movements
             
     #Create objects for the performers.
     for per_el in p_el.findall('Interpretes/Interprete'):
         perf = {
             'id': per_el.get('id'),
             'group': per_el.get('id_grupo', default=None),
             'line': per_el.sourceline,
             'file': self.file_name
         }
         perf.update(p_dict)
             
         if perf['id']:
             rels = [
                 #Relate performer to CModel
                 (
                     FR.rels_predicate(alias='fedora-model', predicate='hasModel'),
                     FR.rels_object('atm:performerCModel', FR.rels_object.PID)
                 ),
                 #Relate performer to performance
                 (
                     FR.rels_predicate(alias='atm-rel', predicate='performance'),
                     FR.rels_object(performance.pid, FR.rels_object.PID)
                 ),
                 #Relate perfomer to their 'person' entry
                 (
                     FR.rels_predicate(alias='fjm-db', predicate='player'),
                     FR.rels_object(perf['id'], FR.rels_object.LITERAL)
                 )
             ]
             
             try:
                 t_list = list()
                 for pred, obj in rels:
                     if obj.type == FR.rels_object.LITERAL:
                         t_obj = "'%s'" % obj
                     else:
                         t_obj = "<fedora:%s>" % obj
                     t_list.append(("%s" % Concert.NS[pred.alias].uri, "%s" % pred.predicate, "%s" % t_obj))
                     
                 pid = FedoraWrapper.getPid(tuples=t_list)
                 if pid:
                     performer = FedoraWrapper.client.getObject(pid)
             except KeyError:
                 performer = FedoraWrapper.getNextObject(prefix = self.prefix, label = 'Performer: %(concert)s/%(piece)s/%(id)s in group %(group)s' % perf)
                 
             #Relate the performer to the listed group (or 'unaffiliated, if none)
             if perf['group'] != None:
                 rels.append(
                     (
                         FR.rels_predicate(alias='fjm-db', predicate='group'),
                         FR.rels_object(perf['group'], FR.rels_object.LITERAL)
                     )
                 )
             else:
                 rels.append(
                     (
                         FR.rels_predicate(alias='atm-rel', predicate='group'),
                         FR.rels_object('atm:unaffiliatedPerfomer', FR.rels_object.PID)
                     )
                 )
                     
             for i_el in per_el.findall('Instrumentos/Instrumento'):
                 inst_id = i_el.get('id')
                 rels.append(
                     (
                         FR.rels_predicate(alias='fjm-db', predicate='instrument'),
                         FR.rels_object(inst_id, FR.rels_object.LITERAL)
                     )
                 )
             
             FedoraWrapper.addRelationshipsWithoutDup(rels, fedora=performer).update()
             FedoraWrapper.correlateDBEntry('player', 'performerID')
             FedoraWrapper.correlateDBEntry('group', 'groupID')
             FedoraWrapper.correlateDBEntry('instrument', 'instrumentID')
         else:
             logger.error("Performer on line %(line)s of %(file)s does not have an ID!" % perf)
Ejemplo n.º 26
0
    def process(self):
        logger = self.logger
        logger.info('Starting to ingest: Score %s' % self.dbid)
        
        try:
            pid = FedoraWrapper.getPid(uri=ao.NS['fjm-db'].uri, predicate='scoreID', obj="'%s'" % self.dbid)
            if pid:
                logger.warning('Score %(id)s already exists as pid %(pid)s! Overwriting PDF and DC DSs!' % {'id': self.dbid, 'pid': pid})
                score = FedoraWrapper.client.getObject(pid)
            else:
                raise Exception('Something went horribly wrong!  Found a pid, but couldn\'t access it...')
        except KeyError:
            score = FedoraWrapper.getNextObject(self.prefix, label='Score %s' % self.dbid)
            
        rels_ext = FR.rels_ext(score, namespaces=ao.NS.values())
        rels = [
            (
                FR.rels_predicate(alias='fedora-model', predicate='hasModel'),
                FR.rels_object('atm:scoreCModel', FR.rels_object.PID)
            ),
            (
                FR.rels_predicate(alias='fjm-db', predicate='scoreID'),
                FR.rels_object(self.dbid, FR.rels_object.LITERAL)
            )
        ]
        
        titn = self.element.findtext('titn_partitura')
        if titn:
            rels.append(
                (
                    FR.rels_predicate(alias='fjm-titn', predicate='score'),
                    FR.rels_object(titn, FR.rels_object.LITERAL)
                )
            )
        #FIXME:  'Direction' of composer relation...  Should I go from the score to the composer, or (as I think I do in my hand-made objects) from the composer to the score...  Or should I make the relationships go in both directions?
        composer = self.element.findtext('ID_COMPOSITOR')
        if composer:
            rels.append(
                (
                    FR.rels_predicate(alias='fjm-db', predicate='composedBy'),
                    FR.rels_object(composer, FR.rels_object.LITERAL)
                )
            )
            
        FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=rels_ext).update()
        FedoraWrapper.correlateDBEntry('composedBy', 'composerID')
        FedoraWrapper.correlateDBEntry('basedOn', 'scoreID')
        
        dc = dict()
        dc['type'] = [unicode('StillImage')]
        dc['title'] = [self.element.findtext('TITULO')]
        Score.save_dc(score, dc)

        filename = self.element.findtext('Ruta_Partitura')
        if filename:
            fn = self.getPath(filename)
            if path.exists(fn):
                update_datastream(obj=score, dsid='PDF', label="Score PDF", filename=fn, mimeType='application/pdf')
            else:
                logger.error('PDF specified for score %(id)s, but file does not seem to exist!' % {'id': self.dbid})
                
            marc = self.getPath(path.join(path.dirname(filename), '%s.xml' % self.dbid))
            if path.exists(marc):
                update_datastream(obj=score, dsid='MARCXML', label="MARC XML", filename=marc, mimeType='application/xml')
        else:
            logger.info('No PDF for %s', self.dbid)
     if object_fetch_exception.httpcode in [404]:
         logging.info(name_space + ':JapaneseSilentFilmCollection missing, creating object.\n')
         collection_object = fedora.createObject(collection_pid, label = collection_label)
         #collection_policy
         try:
             collection_object.addDataStream(u'COLLECTION_POLICY', collection_policy, label=u'COLLECTION_POLICY',
             mimeType=u'text/xml', controlGroup=u'X',
             logMessage=u'Added basic COLLECTION_POLICY data.')
             logging.info('Added COLLECTION_POLICY datastream to:' + collection_pid)
         except FedoraConnectionException:
             logging.error('Error in adding COLLECTION_POLICY datastream to:' + collection_pid + '\n')
         
         #add relationships
         collection_object_RELS_EXT=fedora_relationships.rels_ext(collection_object,fedora_model_namespace)
         collection_object_RELS_EXT.addRelationship('isMemberOf','islandora:root')
         collection_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'),'islandora:collectionCModel')
         collection_object_RELS_EXT.update()
 #put in the benshi Islandora:BenshiMovie content model
 try:
     model_pid = u'islandora:benshiMovie'
     fedora.getObject(model_pid)
 except FedoraConnectionException, object_fetch_exception:
     if object_fetch_exception.httpcode in [404]:
         logging.info('islandora:BenshiMovie missing, creating object.\n')
         model_object = fedora.createObject(model_pid, label = u'BenshiMovieCModel')
         #add relationships
         model_object_RELS_EXT=fedora_relationships.rels_ext(model_object,fedora_model_namespace)
         model_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'),'fedora-system:ContentModel-3.0')
         model_object_RELS_EXT.update()