def add_policy_to_rels(obj): # TODO: This isn't very efficient. If the users and roles are the same we shouldn't # bother updating the rels. It will do for now in testing however. logger = logging.getLogger( 'IslandoraListener.coalliance.add_policy_to_rels') policy_ds = obj['POLICY'] try: xacml = Xacml(policy_ds.getContent().read()) except XacmlException: return False relsext = rels_ext( obj, rels_namespace('islandora', 'http://islandora.ca/ontology/relsext#'), 'islandora') users = xacml.viewingRule.getUsers() roles = xacml.viewingRule.getRoles() logger.debug("Users in policy: %s." % users) logger.debug("Roles in policy: %s." % roles) #remove the old users and roles before we add new ones and have duplicates relsext.purgeRelationships(predicate='isViewableByUser') relsext.purgeRelationships(predicate='isViewableByRole') for user in users: relsext.addRelationship('isViewableByUser', rels_object(user, rels_object.LITERAL)) for role in roles: relsext.addRelationship('isViewableByRole', rels_object(role, rels_object.LITERAL)) relsext.update()
def main(argv): fedora = connectToFedora("localhost", "fedoraAdmin", "password") if not fedora: print("Failed to connect to fedora instance") return 1 ### SCAN FOR OBJECTS oldModel = "ilives:pageCModel" newModel = "islandora:pageCModel" objects = getPidsForContentModel(oldModel) print("Found %d objects to update" % len(objects)) for pid in objects: strippedPid = pid.replace('info:fedora/', '') print(strippedPid) + " ...", try: obj = fedora.getObject(strippedPid) except FedoraConnectionException, fcx: print("Failed to connect to object %s" % pid) continue nsmap = [ fedora_relationships.rels_namespace('fedora', 'info:fedora/fedora-system:def/relations-external#'), fedora_relationships.rels_namespace('fedora-model', 'info:fedora/fedora-system:def/model#') ] rels_ext = fedora_relationships.rels_ext(obj, nsmap, 'fedora') editRelsExt(rels_ext, ["fedora-model", "hasModel"], newModel) commitRelsExt(rels_ext) print("Done")
def add_policy_to_rels(obj): # TODO: This isn't very efficient. If the users and roles are the same we shouldn't # bother updating the rels. It will do for now in testing however. logger = logging.getLogger('IslandoraListener.coalliance.add_policy_to_rels') policy_ds = obj['POLICY'] try: xacml = Xacml(policy_ds.getContent().read()) except XacmlException: return False relsext = rels_ext(obj, rels_namespace('islandora','http://islandora.ca/ontology/relsext#'), 'islandora') users = xacml.viewingRule.getUsers() roles = xacml.viewingRule.getRoles() logger.debug("Users in policy: %s." % users) logger.debug("Roles in policy: %s." % roles) #remove the old users and roles before we add new ones and have duplicates relsext.purgeRelationships(predicate='isViewableByUser') relsext.purgeRelationships(predicate='isViewableByRole') for user in users: relsext.addRelationship('isViewableByUser', rels_object(user,rels_object.LITERAL)) for role in roles: relsext.addRelationship('isViewableByRole', rels_object(role,rels_object.LITERAL)) relsext.update()
def main(argv): """ Note: this script does not correct pageNS:pageProgression. We will need to detect if its there before we try to fix it since we only have to make a change if it is there """ fedora = connectToFedora("http://*****:*****@@dmin") if not fedora: print("Failed to connect to fedora instance") return 1 ### SCAN FOR BOOK OBJECTS oldModel = "archiveorg:bookCModel" newModel = "islandora:bookCModel" books = getPidsForContentModel(oldModel) print("Found %d book objects to update" % len(books)) for pid in books: strippedPid = pid.replace('info:fedora/', '') print(strippedPid) + " ...", try: obj = fedora.getObject(strippedPid) except FedoraConnectionException, fcx: print("Failed to connect to object %s" % pid) continue nsmap = [ fedora_relationships.rels_namespace('fedora', 'info:fedora/fedora-system:def/relations-external#'), fedora_relationships.rels_namespace('fedora-model', 'info:fedora/fedora-system:def/model#') ] rels_ext = fedora_relationships.rels_ext(obj, nsmap, 'fedora') editRelsExt(rels_ext, ["fedora-model", "hasModel"], newModel) commitRelsExt(rels_ext) print("Done")
def __processImage(self, el, firstImage): i_dict = { 'id': el.get('id', default=None), 'path': el.findtext('ruta'), 'description': el.findtext('pie'), 'line': el.sourceline } if i_dict['id'] and i_dict['path'] and path.exists(self.getPath(i_dict['path'])): #Get or create an new image containing the new object. try: image = FedoraWrapper.client.getObject(FedoraWrapper.getPid(uri=ao.NS['fjm-db'].uri, predicate='imageID', obj="'%(id)s'" % i_dict)) except KeyError: image = FedoraWrapper.getNextObject(self.prefix, label='Image: %(id)s' % i_dict) #FIXME: Detect Mimetype, and create image accordingly? update_datastream(obj=image, dsid="JPG", filename=self.getPath(i_dict['path']), mimeType="image/jpeg") i_rels_ext = FR.rels_ext(obj=image, namespaces=ao.NS.values()) rels = [ ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:imageCModel', FR.rels_object.PID) ), ( FR.rels_predicate(alias='fjm-db', predicate='imageID'), FR.rels_object(el.get('id'), FR.rels_object.LITERAL) ), #Relate the image to the concert as a general image... ( FR.rels_predicate(alias='atm-rel', predicate='isImageOf'), FR.rels_object(self.concert_obj.pid, FR.rels_object.PID) ) ] #Set the first image as the "primary" (Used for thumbnails) if firstImage: rels.append( ( FR.rels_predicate(alias='atm-rel', predicate='isIconOf'), FR.rels_object(self.concert_obj.pid, FR.rels_object.PID) ) ) #Update and commit the rels_ext FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=i_rels_ext).update() dc = dict() dc['type'] = [unicode('StillImage')] #Add a description, based on the 'pie' (if it exists, and there isn't already on for the image...), and don't clobber any existing description... if i_dict['description'] and 'description' not in dc: dc['description'] = [unicode('%(description)s' % i_dict)] Concert.save_dc(image, dc) image.state = unicode('A') return True else: logger.warning('No ID or invalid path for image at line: %(line)s' % i_dict) return False
def process(self): logger = self.logger logger.info('Starting to ingest: %(class)s %(id)s' % {'class': type(self), 'id': self.dbid}) try: logger.info('Checking to see if %s already exists in Fedora' % self.norm_name) pid = self[self.norm_name] logger.info('Found %(pid)s' % {'pid': pid}) if pid: logger.warning('%(name)s already exists as pid %(pid)s! Overwriting DC DS!' % {'name': self.norm_name, 'pid': pid}) self.composer = FedoraWrapper.client.getObject(pid) else: msg = 'Something went horribly wrong! Found a pid (%(pid)s), but couldn\'t access it...' % {'pid': pid} logger.error(msg) raise Exception(msg) except KeyError: try: logger.debug('Not known by name, checking by composerID') pid = FedoraWrapper.getPid(uri=Composer.NS['fjm-db'].uri, predicate='composerID', obj="'%s'" % self.dbid) logger.info('Found %(pid)s' % {'pid': pid}) if pid: logger.warning('%(name)s already exists as pid %(pid)s! Overwriting DC DS!' % {'name': self.norm_name, 'pid': pid}) self.composer = FedoraWrapper.client.getObject(pid) else: msg = 'Something went horribly wrong! Found a pid (%(pid)s), but couldn\'t access it...' % {'pid': pid} logger.error(msg) raise Exception(msg) except KeyError: logger.info('Doesn\'t exist: creating a new Fedora Object') self.composer = FedoraWrapper.getNextObject(self.prefix, label='Composer %s' % self.dbid) rels_ext = FR.rels_ext(self.composer, namespaces=Composer.NS.values()) rels = [ ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:personCModel', FR.rels_object.PID) ), ( FR.rels_predicate(alias='fjm-db', predicate='composerID'), FR.rels_object(self.dbid, FR.rels_object.LITERAL) ) ] FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=rels_ext).update() FedoraWrapper.correlateDBEntry('composedBy', 'composerID') #Yay Pythonic-ness? Try to get an existing EAC-CPF, or create one if none is found try: eaccpf = CPF.EACCPF(self.composer.pid, xml=self.composer['EAC-CPF'].getContent().read()) event_type="modified" except fcrepo.connection.FedoraConnectionException, e: if e.httpcode == 404: eaccpf = CPF.EACCPF(self.composer.pid) event_type="created" else: raise e
def __processConferences(self): logger = logging.getLogger('ingest.atm_concert.__processConferences') for el in self.element.findall('Eventos_Asociados/Evento_Asociado'): e_dict = { 'id': el.get('id'), 'type': el.findtext('Tipo'), 'description': el.findtext('descripcion'), 'mp3_path': el.findtext('ruta'), 'concert': self.dbid, 'line': el.sourceline } if e_dict['id']: try: pid = FedoraWrapper.getPid(uri=Concert.NS['fjm-db'].uri, predicate="lectureID", obj="'%(id)s'" % e_dict) conference = FedoraWrapper.client.getObject(pid) except KeyError: conference = FedoraWrapper.getNextObject(self.prefix, label="Conference %(id)s in %(concert)s" % e_dict) c_rels_ext = FR.rels_ext(obj=conference, namespaces=ao.NS.values()) rels = [ ( FR.rels_predicate(alias='fedora-rels-ext', predicate='isMemberOf'), FR.rels_object(self.concert_obj.pid, FR.rels_object.PID) ), ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:lectureCModel', FR.rels_object.PID) ), ( FR.rels_predicate(alias='fjm-db', predicate='lectureID'), FR.rels_object(e_dict['id'], FR.rels_object.LITERAL) ) ] #Add and commit relationships FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=c_rels_ext).update() if e_dict['mp3_path']: mp3_path = self.getPath(e_dict['mp3_path']) if path.exists(mp3_path): update_datastream(obj=conference, dsid='MP3', filename=mp3_path, mimeType="audio/mpeg") else: logger.error('MP3 specified (%(mp3_path)s), but doesn\'t exist for id %(id)s on line %(line)s' % e_dict) else: logger.warning('No MP3 indicated for id %(id)s on line %(line)s' % e_dict) dc = dict() dc['type'] = [unicode('Sound')] dc['description'] = [unicode(e_dict['description'])] dc['subject'] = [unicode(e_dict['type'])] Concert.save_dc(conference, dc) conference.state = unicode('A')
def createRelsExt(childObject, parentPid, contentModel, extraNamespaces={}, extraRelationships={}): """ Create the RELS-EXT relationships between childObject and object:parentPid We set the default namespace for our interconnections, then apply the content model, and make childObject a member of the object:parentPid collection. If object:parentPid doesn't have the collection content model then strange things might happen. @param childObject The FedoraObject to attach the RELS-EXT to. @param parentPid The pid of the parent to assign to childObject. @param contentModel The @contentModel to give to childObject. @param extraNamespaces Any @extraNamespaces to put in the RELS-EXT data. @param extraRelationsips Any additional relationship values to assign to childObject. By default the object gets: hasModel:contentModel and isMemberOfCollection:parentPid """ nsmap = [ fedora_relationships.rels_namespace( 'fedora', 'info:fedora/fedora-system:def/relations-external#'), fedora_relationships.rels_namespace( 'fedora-model', 'info:fedora/fedora-system:def/model#') ] if extraNamespaces and type(extraNamespaces) is DictType: for k, v in extraNamespaces.iteritems(): nsmap.append(fedora_relationships.rels_namespace(k, v)) #add relationships rels_ext = fedora_relationships.rels_ext(childObject, nsmap, 'fedora') rels_ext.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), [contentModel, "pid"]) rels_ext.addRelationship( fedora_relationships.rels_predicate('fedora', 'isMemberOfCollection'), [parentPid, "pid"]) if extraRelationships and type(extraRelationships) is DictType: for k, v in extraRelationships.iteritems(): rels_ext.addRelationship(k, [v, "literal"]) loop = True while loop: loop = False try: rels_ext.update() except FedoraConnectionException, fedoraEXL: if str(fedoraEXL.body).find( "is currently being modified by another thread") != -1: loop = True print( "Trouble (thread lock) updating obj(%s) RELS-EXT - retrying." % childObject.pid) else: print("Error updating obj(%s) RELS-EXT" % childObject.pid)
def addRelationshipsWithoutDup(rels, fedora=None, rels_ext=None): if rels_ext: pass elif fedora: rels_ext = FR.rels_ext(obj=fedora, namespaces=ao.NS.values()) else: raise Exception('Either fedora or rels_ext must be provided!') for rel in rels: FedoraWrapper.addRelationshipWithoutDup(rel, rels_ext=rels_ext) return rels_ext
def handle_still_mods(still_mods_parser, mods_file_name): ''' This function will handle the creation of clip objects @param still_mods_parser The etree xml parser to get ingest data from @return boolean True on success, false if something was wrong ''' still_path = get_file_path_from_xpath(still_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Still Image']") still_pid = fedora.getNextPID(name_space) still_label = unicode(movie_name + '_' + mods_file_name[mods_file_name.find('-')+1:mods_file_name.rfind('.')]) still_object = fedora.createObject(still_pid, label = still_label) still_object_RELS_EXT = fedora_relationships.rels_ext(still_object,[hamilton_rdf_name_space, fedora_model_namespace]) #datastreams add_MODS_datastream(still_object, mods_file_path) if still_path: png_file_handle = open(still_path, 'rb') try: still_object.addDataStream(u'PNG', u'aTmpStr', label=u'PNG', mimeType = u'image/png', controlGroup = u'M', logMessage = u'Added PNG datastream.') datastream = still_object['PNG'] datastream.setContent(png_file_handle) logging.info('Added PNG datastream to:' + still_pid) except FedoraConnectionException: logging.error('Error in adding PNG datastream to:' + still_pid + '\n') png_file_handle.close() #relationships still_clip_element_list = still_mods_parser.xpath("//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Video clip']") if still_clip_element_list: still_clip_file_name = still_clip_element_list[0].text still_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isStillOf'), clips_to_pids[still_clip_file_name]) else: still_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isStillOf'), movie_pid) still_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':benshiStill') still_object_RELS_EXT.update() return True return False
def createRelsExt(childObject, parentPid, contentModel, extraNamespaces={}, extraRelationships={}): """ Create the RELS-EXT relationships between childObject and object:parentPid We set the default namespace for our interconnections, then apply the content model, and make childObject a member of the object:parentPid collection. If object:parentPid doesn't have the collection content model then strange things might happen. @param childObject The FedoraObject to attach the RELS-EXT to. @param parentPid The pid of the parent to assign to childObject. @param contentModel The @contentModel to give to childObject. @param extraNamespaces Any @extraNamespaces to put in the RELS-EXT data. @param extraRelationsips Any additional relationship values to assign to childObject. By default the object gets: hasModel:contentModel and isMemberOfCollection:parentPid """ nsmap = [ fedora_relationships.rels_namespace('fedora', 'info:fedora/fedora-system:def/relations-external#'), fedora_relationships.rels_namespace('fedora-model', 'info:fedora/fedora-system:def/model#') ] if extraNamespaces and type(extraNamespaces) is DictType: for k, v in extraNamespaces.iteritems(): nsmap.append(fedora_relationships.rels_namespace(k, v)) #add relationships rels_ext=fedora_relationships.rels_ext(childObject, nsmap, 'fedora') rels_ext.addRelationship(fedora_relationships.rels_predicate('fedora-model', 'hasModel'), [contentModel, "pid"]) rels_ext.addRelationship(fedora_relationships.rels_predicate('fedora', 'isMemberOfCollection'), [parentPid, "pid"]) if extraRelationships and type(extraRelationships) is DictType: for k, v in extraRelationships.iteritems(): rels_ext.addRelationship(k, [v, "pid"]) loop = True while loop: loop = False try: rels_ext.update() except FedoraConnectionException, fedoraEXL: if str(fedoraEXL.body).find("is currently being modified by another thread") != -1: loop = True print("Trouble (thread lock) updating obj(%s) RELS-EXT - retrying." % childObject.pid) else: print("Error updating obj(%s) RELS-EXT" % childObject.pid)
def addRelationshipWithoutDup(rel, fedora=None, rels_ext=None): ''' 'rel': a 2-tuple containing containing a rels_predicate and a rels_object, in that order. 'fedora': a fcrepo FedoraObject (could probably use some testing...) 'rels_ext': a islandoraUtils rels_ext object Only one of 'fedora' and 'rels_ext' is required. If both are given, only rels_ext will be used, whatever differences that might cause. XXX: Should probably get the list of namespaces in a better manner, so as not to require the import of atm_object ''' if rels_ext: pass elif fedora: rels_ext = FR.rels_ext(obj=fedora, namespaces=ao.NS.values()) else: raise Exception('Either fedora or rels_ext must be provided!') pred, obj = rel if len(rels_ext.getRelationships(predicate=pred, object=obj)) == 0: rels_ext.addRelationship(predicate=pred, object=obj) return rels_ext
def process(self): logger = self.logger logger.info('Starting to ingest: Groupo %s' % self.dbid) try: pid = FedoraWrapper.getPid(uri=ao.NS['fjm-db'].uri, predicate='groupID', obj="'%s'" % self.dbid) if pid: logger.warning('Group %(id)s already exists as pid %(pid)s! Overwriting DC DS!' % {'id': self.dbid, 'pid': pid}) group = FedoraWrapper.client.getObject(pid) else: raise Exception('Something went horribly wrong! Found a pid, but couldn\'t access it...') except KeyError: group = FedoraWrapper.getNextObject(self.prefix, label='Group %s' % self.dbid) rels_ext = FR.rels_ext(group, namespaces=ao.NS.values()) rels = [ ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:groupCModel', FR.rels_object.PID) ), ( FR.rels_predicate(alias='fjm-db', predicate='groupID'), FR.rels_object(self.dbid, FR.rels_object.LITERAL) ) ] FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=rels_ext).update() dc = dict() dc['type'] = [unicode('Collection')] dc['title'] = [self.element.findtext('grupo').strip()] Group.save_dc(group, dc) FedoraWrapper.correlateDBEntry('group', 'groupID') group.state = unicode('A')
collection_object.addDataStream( u'COLLECTION_POLICY', collection_policy, label=u'COLLECTION_POLICY', mimeType=u'text/xml', controlGroup=u'X', logMessage=u'Added basic COLLECTION_POLICY data.') logging.info('Added COLLECTION_POLICY datastream to:' + collection_pid) except FedoraConnectionException: logging.error( 'Error in adding COLLECTION_POLICY datastream to:' + collection_pid + '\n') #add relationships collection_object_RELS_EXT = fedora_relationships.rels_ext( collection_object, fedora_model_namespace) collection_object_RELS_EXT.addRelationship('isMemberOf', 'islandora:root') collection_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), 'islandora:collectionCModel') collection_object_RELS_EXT.update() #loop through the mods folder for mods_file in mods_files: if mods_file.endswith('MODS.xml'): #get mods file contents mods_file_path = os.path.join(source_directory, 'mods-xml', mods_file) mods_file_handle = open(mods_file_path)
def process(self): #self.logger.debug('Received line: %s' % self.element) info = dict() for part, value in zip(['forename', 'surname', 'birth_date', 'death_date', 'alt_forename', 'ceacs_member', 'academic_page', 'ceacs_arrival', 'ceacs_depart', 'phd_date', 'photo'], self.element): val = unicode(value.strip(), 'UTF-8', 'replace') if val or part in ['birth_date', 'death_date']: info[part] = val if 'photo' not in info: return #self.logger.debug('info dictionary: %s' % info) info['full_name'] = "%(surname)s, %(forename)s" % info auth_record = FedoraWrapper.getNextObject(prefix=self.prefix, label=info['full_name'].encode('ascii', 'replace')) cpf = eaccpf(auth_record.pid) cpf.add_maintenance_event(agent="Adam Vessey, via ir_authority.py") cpf.add_bin_source(caption='Row in Excel spreadsheet', obj=str(self.element)) cpf.add_name_entry(name={ 'forename': info['forename'], 'surname': info['surname'] }) if 'alt_forename' in info: cpf.add_name_entry(name={ 'forename': info['alt_forename'], 'surname': info['surname'] }, role='alternative') cl = list() rel = list() if 'phd_date' in info: cl.append({ 'date': info['phd_date'], 'event': 'Achieved PhD' }) if 'ceacs_arrival' in info and 'ceacs_depart' in info: rel = { 'dateRange': { 'fromDate': info['ceacs_arrival'], 'toDate': info['ceacs_depart'] }, 'event': 'CEACS membership' } elif 'ceacs_arrival' in info: rel = { 'dateRange': { 'fromDate': info['ceacs_arrival'] }, 'event': 'CEACS membership' } elif 'ceacs_depart' in info: rel = { 'dateRange': { 'toDate': info['ceacs_depart'] }, 'event': 'CEACS membership' } if cl: cpf.add_chron_list(cl) if rel: cpf.add_relation(type='cpfRelation', url='http://digital.march.es/ceacs', elements=rel) if 'academic_page' in info: cpf.add_relation(type="resourceRelation", url=info['academic_page'], elements={'descriptiveNote': 'Academic page'}) cpf.add_exist_dates(info['birth_date'], info['death_date']) #print(cpf) Authority.save_etree(auth_record, cpf.element, 'EAC-CPF', 'EAC-CPF record', controlGroup='X', hash='DISABLED') rels = FR.rels_ext(obj=auth_record, namespaces=Authority.NS.values()) rels.addRelationship(['fedora-model', 'hasModel'], ['ir:authorityCModel', 'pid']) rels.update() #Add image (with relationship to object?). if 'photo' in info: photo_path = self.getPath(info['photo']) if path.exists(photo_path): #Create the object... photo = FedoraWrapper.getNextObject(self.prefix, label=('Photo of %s' % info['full_name']).encode('ascii', 'replace')) #... add the datastream ... update_datastream(photo, 'JPG', filename=photo_path, label='Original image', checksumType='SHA-1', mimeType='image/jpeg') #... and relate the object. NSs = Authority.NS NSs['ir-rel'] = FR.rels_namespace('ir-rel', 'http://digital.march.es/ceacs#') p_rels = FR.rels_ext(photo, namespaces=NSs.values()) p_rels.addRelationship(['fedora-model', 'hasModel'], ['ir:photoCModel', 'pid']) p_rels.addRelationship(['ir-rel', 'iconOf'], [auth_record.pid, 'pid']) p_rels.update() else: self.logger.warning('photo: %s specified, but %s does not exist!' % (info['photo'], photo_path)) else: self.logger.debug('No photo specified.')
def handle_clip_mods(clip_mods_parser, mods_file_name): ''' This function will handle the creation of clip objects @param clip_mods_parser The etree xml parser to get ingest data from @return boolean True on success, false if something was wrong ''' clip_pid = fedora.getNextPID(name_space) high_resolution_mov_path = get_file_path_from_xpath( clip_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='High Quality Video']" ) low_resolution_mov_path = get_file_path_from_xpath( clip_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Web Quality Video']" ) clip_number = mods_file_name[mods_file_name.find('-cp') + 3:mods_file_name.rfind('.')] clip_number = clip_number.replace('-sub', '') clip_label = unicode(movie_name + '_' + mods_file_name[mods_file_name.find('-') + 1:mods_file_name.rfind('.')]) clip_object = fedora.createObject(clip_pid, label=clip_label) #datastreams add_MODS_datastream(clip_object, mods_file_path) if high_resolution_mov_path: hires_file_handle = open(high_resolution_mov_path, 'rb') try: clip_object.addDataStream(u'HIGHRES', u'aTmpStr', label=u'HIGHRES', mimeType=u'video/quicktime', controlGroup=u'M', logMessage=u'Added HIGHRES datastream.') datastream = clip_object['HIGHRES'] datastream.setContent(hires_file_handle) logging.info('Added HIGHRES datastream to:' + clip_pid) except FedoraConnectionException: logging.error('Error in adding HIGHRES datastream to:' + clip_pid + '\n') hires_file_handle.close() if low_resolution_mov_path: lowres_file_handle = open(low_resolution_mov_path, 'rb') try: clip_object.addDataStream(u'LOWRES', u'aTmpStr', label=u'LOWRES', mimeType=u'video/quicktime', controlGroup=u'M', logMessage=u'Added LOWRES datastream.') datastream = clip_object['LOWRES'] datastream.setContent(lowres_file_handle) logging.info('Added LOWRES datastream to:' + clip_pid) except FedoraConnectionException: logging.error('Error in adding LOWRES datastream to:' + clip_pid + '\n') lowres_file_handle.close() #relationships clip_object_RELS_EXT = fedora_relationships.rels_ext( clip_object, [hamilton_rdf_name_space, fedora_model_namespace]) clip_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('hamilton', 'isClipOf'), movie_pid) clip_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('hamilton', 'isClipNumber'), fedora_relationships.rels_object( str(clip_number), fedora_relationships.rels_object.LITERAL)) global clips_to_pids clips_to_pids[mods_file_name] = clip_pid #this section handles the diferent types of clips (subs or not) if not '-sub' in mods_file_name: #add relationships clip_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), name_space + ':benshiClip') clip_object_RELS_EXT.update() return True else: #add relationships master_clip_file_name = mods_file_name.replace('-sub', '') clip_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('hamilton', 'isSubOf'), clips_to_pids[master_clip_file_name]) clip_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), name_space + ':benshiClipSubbed') clip_object_RELS_EXT.update() return True return False
def handle_transcript_mods(transcript_mods_parser, mods_file_name): ''' This function will handle the creation of clip objects @param transcript_mods_parser The etree xml parser to get ingest data from @return boolean True on success, false if something was wrong ''' transcript_pid = fedora.getNextPID(name_space) transcript_label = unicode(movie_name + '_' + mods_file_name[mods_file_name.find('-tr-') + 4:mods_file_name.rfind('.')]) transcript_object = fedora.createObject(transcript_pid, label=transcript_label) transcript_object_RELS_EXT = fedora_relationships.rels_ext( transcript_object, [hamilton_rdf_name_space, fedora_model_namespace]) transcript_path = get_file_path_from_xpath( transcript_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Document']" ) time_synced_transcript_path = get_file_path_from_xpath( transcript_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Document with time-sync encoding']" ) #datastreams add_MODS_datastream(transcript_object, mods_file_path) if time_synced_transcript_path: time_synced_transcript_handle = open(time_synced_transcript_path, 'rb') try: transcript_object.addDataStream( u'TimeSyncedTranscript', u'aTmpStr', label=u'POPCORN', mimeType=u'application/xml', controlGroup=u'M', logMessage=u'Added TimeSyncedTranscript datastream.') datastream = transcript_object['TimeSyncedTranscript'] datastream.setContent(time_synced_transcript_handle) logging.info('Added TimeSyncedTranscript datastream to:' + transcript_pid) except FedoraConnectionException: logging.error( 'Error in adding TimeSyncedTranscript datastream to:' + transcript_pid + '\n') time_synced_transcript_handle.close() if transcript_path: pdf_file_handle = open(transcript_path, 'rb') try: transcript_object.addDataStream( u'PDF', u'aTmpStr', label=u'PDF', mimeType=u'application/pdf', controlGroup=u'M', logMessage=u'Added PDF datastream.') datastream = transcript_object['PDF'] datastream.setContent(pdf_file_handle) logging.info('Added PDF datastream to:' + transcript_pid) except FedoraConnectionException: logging.error('Error in adding PDF datastream to:' + transcript_pid + '\n') pdf_file_handle.close() #relationships #handle is transcript of transcript_clip_element_list = transcript_mods_parser.xpath( "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Video clip']" ) if len( transcript_clip_element_list ) == 1: #there are multiple related items if the transcript is for the whole movie transcript_clip_file_name = transcript_clip_element_list[0].text transcript_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('hamilton', 'isTranscriptOf'), clips_to_pids[transcript_clip_file_name]) else: transcript_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('hamilton', 'isTranscriptOf'), movie_pid) #handle the 3 different transcript types if '-jpneng' in mods_file_name: transcript_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), name_space + ':EnglishJapaneseTranscript') elif '-jpn' in mods_file_name: transcript_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), name_space + ':JapaneseTranscript') elif '-eng' in mods_file_name: transcript_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), name_space + ':EnglishTranscript') else: return False transcript_object_RELS_EXT.update() return True
def handle_still_mods(still_mods_parser, mods_file_name): ''' This function will handle the creation of clip objects @param still_mods_parser The etree xml parser to get ingest data from @return boolean True on success, false if something was wrong ''' still_path = get_file_path_from_xpath( still_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Still Image']" ) still_pid = fedora.getNextPID(name_space) still_label = unicode(movie_name + '_' + mods_file_name[mods_file_name.find('-') + 1:mods_file_name.rfind('.')]) still_object = fedora.createObject(still_pid, label=still_label) still_object_RELS_EXT = fedora_relationships.rels_ext( still_object, [hamilton_rdf_name_space, fedora_model_namespace]) #datastreams add_MODS_datastream(still_object, mods_file_path) if still_path: png_file_handle = open(still_path, 'rb') try: still_object.addDataStream(u'PNG', u'aTmpStr', label=u'PNG', mimeType=u'image/png', controlGroup=u'M', logMessage=u'Added PNG datastream.') datastream = still_object['PNG'] datastream.setContent(png_file_handle) logging.info('Added PNG datastream to:' + still_pid) except FedoraConnectionException: logging.error('Error in adding PNG datastream to:' + still_pid + '\n') png_file_handle.close() #relationships still_clip_element_list = still_mods_parser.xpath( "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Video clip']" ) if still_clip_element_list: still_clip_file_name = still_clip_element_list[0].text still_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('hamilton', 'isStillOf'), clips_to_pids[still_clip_file_name]) else: still_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('hamilton', 'isStillOf'), movie_pid) still_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), name_space + ':benshiStill') still_object_RELS_EXT.update() return True return False
def __processConcert(self): logger = logging.getLogger('ingest.atm_concert.Concert.__processConcert') #Get the/an object try: pid = FedoraWrapper.getPid(uri=Concert.NS['fjm-db'].uri, predicate='concertID', obj="'%s'" % self.dbid) if pid: logger.warning('Concert %s found as %s. Overwriting DSs!' % (self.dbid, pid)) concert = FedoraWrapper.client.getObject(pid) except KeyError: concert = FedoraWrapper.getNextObject(prefix=self.prefix, label="concert %s" % self.dbid) logger.info('Adding CustomXML datastream') if Concert.save_etree(concert, self.element, 'CustomXML', 'Original XML', controlGroup='M'): logger.info('CustomXML added successfully') else: logger.error('Error while adding CustomXML!') #Ingest the WAV (if it exists...) WAV = self.element.findtext('Grabacion/wav') if WAV: WAV = self.getPath(WAV) if path.exists(WAV): update_datastream(obj=concert, dsid='WAV', filename=WAV, label='WAV', mimeType="audio/x-wav") else: logger.warning('WAV file specified (%s), but does not exist!', WAV) else: logger.warning('No WAV found at %s! Skipping...', WAV) #Ingest the MARCXML... FIXME: Maybe this might not make sense to attempt, if there's no WAV? MARC = path.join(path.dirname(WAV), '%s.xml' % self.dbid) if path.exists(MARC): update_datastream(obj=concert, dsid='MARCXML', mimeType="application/xml", filename=MARC) logger.debug('Added %s', MARC) else: logger.debug('Couldn\'t find MARCXML at %s', MARC) cycle_info = { 'norm_name': Concert.normalize_name([self.element.findtext('tipo')]) } #Create cycle stuff try: pid = Concert.__cycles()[cycle_info['norm_name']] cycle = FedoraWrapper.client.getObject(pid) except KeyError: cycle = FedoraWrapper.getNextObject(prefix=self.prefix, label='Cycle %s' % (len(Concert.__cycles()) + 1)) Concert.__cycles()[cycle_info['norm_name']] = cycle.pid c_dc = dict() c_dc['title'] = [cycle_info['norm_name']] Concert.save_dc(cycle, c_dc) FedoraWrapper.addRelationshipsWithoutDup(rels=[ ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:concertCycleCModel', FR.rels_object.PID) ) ], fedora=cycle).update() #Add relations to concert object rels_ext = FR.rels_ext(obj=concert, namespaces=ao.NS.values()) rels = [ #Don't know that this one is necessary... Oh well... ( FR.rels_predicate(alias='fjm-db', predicate='concertID'), FR.rels_object(self.dbid, FR.rels_object.LITERAL) ), ( FR.rels_predicate(alias='fedora-rels-ext', predicate='isMemberOfCollection'), FR.rels_object('atm:concertCollection', FR.rels_object.PID) ), ( FR.rels_predicate(alias='fedora-rels-ext', predicate='isMemberOf'), FR.rels_object(cycle.pid, FR.rels_object.PID) ), ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:concertCModel', FR.rels_object.PID) ) ] #Write 'out' rels_ext FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=rels_ext).update() desc = self.element.findtext('Descripcion') dc = dict() dc['type'] = [unicode('Event')] if desc: dc['description'] = [unicode(desc)] dc['title'] = [unicode(Concert.normalize_name([self.element.findtext('titulo')]))] Concert.save_dc(concert, dc) self.concert_obj = concert concert.state = unicode('A')
def handle_transcript_mods(transcript_mods_parser, mods_file_name): ''' This function will handle the creation of clip objects @param transcript_mods_parser The etree xml parser to get ingest data from @return boolean True on success, false if something was wrong ''' transcript_pid = fedora.getNextPID(name_space) transcript_label = unicode(movie_name + '_' + mods_file_name[mods_file_name.find('-tr-') + 4:mods_file_name.rfind('.')]) transcript_object = fedora.createObject(transcript_pid, label = transcript_label) transcript_object_RELS_EXT = fedora_relationships.rels_ext(transcript_object,[hamilton_rdf_name_space, fedora_model_namespace]) transcript_path = get_file_path_from_xpath(transcript_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Document']") time_synced_transcript_path = get_file_path_from_xpath(transcript_mods_parser,"//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Document with time-sync encoding']") #datastreams add_MODS_datastream(transcript_object, mods_file_path) if time_synced_transcript_path: time_synced_transcript_handle = open(time_synced_transcript_path, 'rb') try: transcript_object.addDataStream(u'TimeSyncedTranscript', u'aTmpStr', label=u'POPCORN', mimeType = u'application/xml', controlGroup = u'M', logMessage = u'Added TimeSyncedTranscript datastream.') datastream = transcript_object['TimeSyncedTranscript'] datastream.setContent(time_synced_transcript_handle) logging.info('Added TimeSyncedTranscript datastream to:' + transcript_pid) except FedoraConnectionException: logging.error('Error in adding TimeSyncedTranscript datastream to:' + transcript_pid + '\n') time_synced_transcript_handle.close() if transcript_path: pdf_file_handle = open(transcript_path, 'rb') try: transcript_object.addDataStream(u'PDF', u'aTmpStr', label=u'PDF', mimeType = u'application/pdf', controlGroup = u'M', logMessage = u'Added PDF datastream.') datastream = transcript_object['PDF'] datastream.setContent(pdf_file_handle) logging.info('Added PDF datastream to:' + transcript_pid) except FedoraConnectionException: logging.error('Error in adding PDF datastream to:' + transcript_pid + '\n') pdf_file_handle.close() #relationships #handle is transcript of transcript_clip_element_list = transcript_mods_parser.xpath("//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Video clip']") if len(transcript_clip_element_list) == 1:#there are multiple related items if the transcript is for the whole movie transcript_clip_file_name = transcript_clip_element_list[0].text transcript_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isTranscriptOf'), clips_to_pids[transcript_clip_file_name]) else: transcript_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isTranscriptOf'), movie_pid) #handle the 3 different transcript types if '-jpneng' in mods_file_name: transcript_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':EnglishJapaneseTranscript') elif '-jpn' in mods_file_name: transcript_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':JapaneseTranscript') elif '-eng' in mods_file_name: transcript_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':EnglishTranscript') else: return False transcript_object_RELS_EXT.update() return True
def process(self): logger = self.logger logger.info('Starting to ingest: Performer %s' % self.dbid) try: logger.info('Checking to see if %s already exists in Fedora' % self.norm_name) pid = self[self.norm_name] logger.info('Found %(pid)s' % {'pid': pid}) if pid: logger.warning('%(name)s already exists as pid %(pid)s!' % {'name': self.norm_name, 'pid': pid}) self.performer = FedoraWrapper.client.getObject(pid) else: msg = 'Something went horribly wrong! Found a pid (%(pid)s), but couldn\'t access it...' % {'pid': pid} logger.error(msg) raise Exception(msg) except KeyError: try: logger.debug('Not known by name, checking by performerID') pid = FedoraWrapper.getPid(uri=Performer.NS['fjm-db'].uri, predicate='performerID', obj="'%s'" % self.dbid) logger.info('Found %(pid)s' % {'pid': pid}) if pid: logger.warning('%(name)s already exists as pid %(pid)s! Overwriting DC DS!' % {'name': self.norm_name, 'pid': pid}) self.performer = FedoraWrapper.client.getObject(pid) else: msg = 'Something went horribly wrong! Found a pid (%(pid)s), but couldn\'t access it...' % {'pid': pid} logger.error(msg) raise Exception(msg) except KeyError: logger.info('Doesn\'t exist: creating a new Fedora Object') self.performer = FedoraWrapper.getNextObject(self.prefix, label='Performer: %s' % self.dbid) dc = dict() dc['title'] = [self.norm_name] Performer.save_dc(self.performer, dc) rels_ext = FR.rels_ext(self.performer, namespaces=Performer.NS.values()) rels = [ ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:personCModel', FR.rels_object.PID) ), ( FR.rels_predicate(alias='fjm-db', predicate='performerID'), FR.rels_object(self.dbid, FR.rels_object.LITERAL) ) ] FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=rels_ext).update() #Yay Pythonic-ness? Try to get an existing EAC-CPF, or create one if none is found try: eaccpf = CPF.EACCPF(self.performer.pid, xml=self.performer['EAC-CPF'].getContent().read()) event_type="modified" except fcrepo.connection.FedoraConnectionException as e: if e.httpcode == 404: eaccpf = CPF.EACCPF(self.performer.pid) event_type="created" else: raise e eaccpf.add_maintenance_event(type=event_type, time="now", agent_type="machine", agent="atm_performer.py") eaccpf.add_XML_source(caption='XML from database dump', xml=self.element) eaccpf.add_name_entry(name=self.name) Performer.save_etree(self.performer, eaccpf.element, 'EAC-CPF', 'EAC-CPF record', controlGroup='M') self[self.norm_name] = self.performer.pid self.performer.state = unicode('A') FedoraWrapper.correlateDBEntry('player', 'performerID')
def handle_misc_mods(misc_mods_parser, mods_file_name): ''' This function will handle the creation of clip objects @param misc_mods_parser The etree xml parser to get ingest data from @return boolean True on success, false if something was wrong ''' misc_type_list = misc_mods_parser.xpath("//*[local-name() = 'mods']//*[local-name() = 'genre'][@type='local']") if misc_type_list: misc_type = misc_type_list[0].text print(misc_type) if misc_type == 'sound recording':#fix up benshi object #datastreams add_MODS_datastream(benshi_object, mods_file_path) audio_file_path = get_file_path_from_xpath(misc_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Audio']") if audio_file_path: audio_file_handle = open(audio_file_path, 'rb') try: benshi_object.addDataStream(u'MP3', u'aTmpStr', label=u'MP3', mimeType = u'audio/mpeg', controlGroup = u'M', logMessage = u'Added MP3 datastream.') datastream = benshi_object['MP3'] datastream.setContent(audio_file_handle) logging.info('Added MP3 datastream to:' + benshi_pid) except FedoraConnectionException: logging.error('Error in adding MP3 datastream to:' + benshi_pid + '\n') audio_file_handle.close() print(audio_file_path) elif misc_type == 'essay': misc_pid = fedora.getNextPID(name_space) misc_label = unicode(movie_name + '_' + misc_type) misc_object = fedora.createObject(misc_pid, label = misc_label) misc_object_RELS_EXT = fedora_relationships.rels_ext(misc_object,[hamilton_rdf_name_space, fedora_model_namespace]) #datastreams add_MODS_datastream(misc_object, mods_file_path) essay_file_path = get_file_path_from_xpath(misc_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Article']") if essay_file_path: essay_file_handle = open(essay_file_path, 'rb') try: misc_object.addDataStream(u'DOCX', u'aTmpStr', label=u'DOCX', mimeType = u'application/vnd.openxmlformats-officedocument.wordprocessingml.document', controlGroup = u'M', logMessage = u'Added DOCX datastream.') datastream = misc_object['DOCX'] datastream.setContent(essay_file_handle) logging.info('Added DOCX datastream to:' + misc_pid) except FedoraConnectionException: logging.error('Error in adding DOCX datastream to:' + misc_pid + '\n') essay_file_handle.close() #relationships misc_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isEssayOf'), movie_pid) misc_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':benshiEssay') misc_object_RELS_EXT.update() elif misc_type == 'presentation': misc_pid = fedora.getNextPID(name_space) misc_label = unicode(movie_name + '_' + misc_type) misc_object = fedora.createObject(misc_pid, label = misc_label) misc_object_RELS_EXT = fedora_relationships.rels_ext(misc_object,[hamilton_rdf_name_space, fedora_model_namespace]) #datastreams add_MODS_datastream(misc_object, mods_file_path) presentation_file_path = get_file_path_from_xpath(misc_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Presentation']") if presentation_file_path: presentation_file_handle = open(presentation_file_path, 'rb') try: misc_object.addDataStream(u'PPTX', u'aTmpStr', label=u'PPTX', mimeType = u'application/vnd.openxmlformats-officedocument.presentationml.presentation', controlGroup = u'M', logMessage = u'Added PPTX datastream.') datastream = misc_object['PPTX'] datastream.setContent(presentation_file_handle) logging.info('Added PPTX datastream to:' + misc_pid) except FedoraConnectionException: logging.error('Error in adding PPTX datastream to:' + misc_pid + '\n') presentation_file_handle.close() #relationships misc_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isPresentationOf'), movie_pid) misc_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':benshiPresentation') misc_object_RELS_EXT.update() #movie gets the opac redirect it's special elif misc_type == 'Motion Picture':#fix up movie object #datastreams add_MODS_datastream(movie_object, mods_file_path) opac_path_list = misc_mods_parser.xpath("//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url']") opac_path = opac_path_list[0].text if opac_path: try: movie_object.addDataStream(u'OPAC', u'aTmpStr', label = u'OPAC', mimeType = u'text/html', controlGroup = u'R', location = unicode(opac_path), logMessage = u'Added OPAC datastream.') logging.info('Added OPAC datastream to:' + movie_pid) except FedoraConnectionException: logging.error('Error in adding OPAC datastream to:' + movie_pid + '\n') #biography is special it has a docx and a pdf #can't use 'get_file_path_from_xpath' these are different then the rest, can change it or handle things here (handle things here, dev_speed) elif misc_type == 'biography': misc_pid = fedora.getNextPID(name_space) misc_label = unicode(movie_name + '_Narrator') misc_object = fedora.createObject(misc_pid, label = misc_label) misc_object_RELS_EXT = fedora_relationships.rels_ext(misc_object,[hamilton_rdf_name_space, fedora_model_namespace]) #get the paths for the pdf/docx docx_file_path = False pdf_file_path = False path_list = misc_mods_parser.xpath("//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Article']") if path_list: for path_element in path_list: if 'currently unavailable' not in path_element: if '.docx' in path_element.text: docx_file_path = os.path.normpath(os.path.join(mods_directory, path_element.text)) elif 'pdf' in path_element.text: pdf_file_path = os.path.normpath(os.path.join(mods_directory, path_element.text)) #datastreams add_MODS_datastream(misc_object, mods_file_path) if docx_file_path: docx_file_handle = open(docx_file_path, 'rb') try: misc_object.addDataStream(u'DOCX', u'aTmpStr', label = u'DOCX', mimeType = u'application/vnd.openxmlformats-officedocument.wordprocessingml.document', controlGroup = u'M', logMessage = u'Added DOCX datastream.') datastream = misc_object['DOCX'] datastream.setContent(docx_file_handle) logging.info('Added DOCX datastream to:' + misc_pid) except FedoraConnectionException: logging.error('Error in adding DOCX datastream to:' + misc_pid + '\n') docx_file_handle.close() if pdf_file_path: pdf_file_handle = open(pdf_file_path, 'rb') try: misc_object.addDataStream(u'PDF', u'aTmpStr', label = u'PDF', mimeType = u'application/pdf', controlGroup = u'M', logMessage = u'Added PDF datastream.') datastream = misc_object['PDF'] datastream.setContent(pdf_file_handle) logging.info('Added PDF datastream to:' + misc_pid) except FedoraConnectionException: logging.error('Error in adding PDF datastream to:' + misc_pid + '\n') pdf_file_handle.close() #relationships misc_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isNarratorOf'), benshi_pid) misc_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':benshiNarrator') misc_object_RELS_EXT.update() else: return False return True
def handle_clip_mods(clip_mods_parser, mods_file_name): ''' This function will handle the creation of clip objects @param clip_mods_parser The etree xml parser to get ingest data from @return boolean True on success, false if something was wrong ''' clip_pid = fedora.getNextPID(name_space) high_resolution_mov_path = get_file_path_from_xpath(clip_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='High Quality Video']") low_resolution_mov_path = get_file_path_from_xpath(clip_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Web Quality Video']") clip_number = mods_file_name[mods_file_name.find('-cp') + 3:mods_file_name.rfind('.')] clip_number = clip_number.replace('-sub','') clip_label=unicode(movie_name + '_' + mods_file_name[mods_file_name.find('-')+1:mods_file_name.rfind('.')]) clip_object = fedora.createObject(clip_pid, label = clip_label) #datastreams add_MODS_datastream(clip_object, mods_file_path) if high_resolution_mov_path: hires_file_handle = open(high_resolution_mov_path, 'rb') try: clip_object.addDataStream(u'HIGHRES', u'aTmpStr', label = u'HIGHRES', mimeType = u'video/quicktime', controlGroup = u'M', logMessage = u'Added HIGHRES datastream.') datastream = clip_object['HIGHRES'] datastream.setContent(hires_file_handle) logging.info('Added HIGHRES datastream to:' + clip_pid) except FedoraConnectionException: logging.error('Error in adding HIGHRES datastream to:' + clip_pid + '\n') hires_file_handle.close() if low_resolution_mov_path: lowres_file_handle = open(low_resolution_mov_path, 'rb') try: clip_object.addDataStream(u'LOWRES', u'aTmpStr', label=u'LOWRES', mimeType = u'video/quicktime', controlGroup = u'M', logMessage = u'Added LOWRES datastream.') datastream = clip_object['LOWRES'] datastream.setContent(lowres_file_handle) logging.info('Added LOWRES datastream to:' + clip_pid) except FedoraConnectionException: logging.error('Error in adding LOWRES datastream to:' + clip_pid + '\n') lowres_file_handle.close() #relationships clip_object_RELS_EXT = fedora_relationships.rels_ext(clip_object,[hamilton_rdf_name_space, fedora_model_namespace]) clip_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isClipOf'), movie_pid) clip_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isClipNumber'), fedora_relationships.rels_object(str(clip_number), fedora_relationships.rels_object.LITERAL)) global clips_to_pids clips_to_pids[mods_file_name] = clip_pid #this section handles the diferent types of clips (subs or not) if not '-sub' in mods_file_name: #add relationships clip_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':benshiClip') clip_object_RELS_EXT.update() return True else: #add relationships master_clip_file_name = mods_file_name.replace('-sub','') clip_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('hamilton','isSubOf'), clips_to_pids[master_clip_file_name]) clip_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'), name_space + ':benshiClipSubbed') clip_object_RELS_EXT.update() return True return False
def __processProgram(self): p_el = self.element.find('programa') filename = self.getPath(p_el.findtext('ruta')) if len(p_el) != 0: try: pid = FedoraWrapper.getPid(uri=Concert.NS['fjm-db'].uri, predicate='programConcertID', obj="'%s'" % self.dbid) program = FedoraWrapper.client.getObject(pid) except KeyError: #Get a Fedora Object for the program program = FedoraWrapper.getNextObject(self.prefix, label='Program for concert %(dbid)s' % {'dbid': self.dbid}) #Add the PDF to the program object... Should probably do an "existence" check, but anyway... update_datastream(obj=program, dsid='PDF', filename=filename, mimeType='application/pdf' ) #Create the RELS-EXT datastream rels_ext = FR.rels_ext(obj=program, namespaces=ao.NS.values()) rels = [ ( FR.rels_predicate(alias='fedora-rels-ext', predicate='isMemberOf'), FR.rels_object(self.concert_obj.pid, FR.rels_object.PID) ), ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:programCModel', FR.rels_object.PID) ), ( FR.rels_predicate(alias='fjm-db', predicate='programConcertID'), FR.rels_object(self.dbid, FR.rels_object.LITERAL) ) ] for a_el in p_el.findall('AutorNotas[@id]'): fore, sur = a_el.findtext('Nombre'), a_el.findtext('Apellidos') normed = self.normalize_name([fore, sur]) try: pid = Person._people()[normed] author = FedoraWrapper.client.getObject(pid) except KeyError: author = FedoraWrapper.getNextObject(self.prefix, label="an author") dc = dict() dc['title'] = [normed] Concert.save_dc(author, dc) rels.append( ( FR.rels_predicate(alias='atm-rel', predicate='authoredBy'), FR.rels_object(author.pid, FR.rels_object.PID) ) ) FedoraWrapper.addRelationshipsWithoutDup([ ( FR.rels_predicate(alias='fedora-model', predicate="hasModel"), FR.rels_object('atm:personCModel', FR.rels_object.PID) ) ], fedora=author).update() #Yay Pythonic-ness? Try to get an existing EAC-CPF, or create one if none is found try: #No point in updating if there's already one there... This is really just a check? eaccpf = CPF.EACCPF(author.pid, xml=author['EAC-CPF'].getContent().read()) event_type="modified" except fcrepo.connection.FedoraConnectionException, e: if e.httpcode == 404: eaccpf = CPF.EACCPF(author.pid) event_type="created" else: raise e eaccpf.add_maintenance_event(type=event_type, time="now", agent_type="machine", agent="atm_concert.py") eaccpf.add_XML_source(caption='(Slightly modified (Put into an element)) XML from database dump', xml=a_el) eaccpf.add_name_entry(name={'forename': fore, 'surname': sur}) Concert.save_etree(author, eaccpf.element, 'EAC-CPF', 'EAC-CPF record', controlGroup='M') author.state = unicode('A') #XXX: This is seeming particularly less-than-elegant at the moment, creating a 'placeholder' object for composer notes... Anyway. if len(p_el.findall('Notas_Obras/Obra[@id]')) > 0: try: pid = '%s:composerText' % self.prefix author = FedoraWrapper.client.getObject(pid) except: author = FedoraWrapper.client.createObject(pid, label=unicode('Composer Text')) #Yay Pythonic-ness? Try to get an existing EAC-CPF, or create one if none is found try: eaccpf = CPF.EACCPF(author.pid, xml=author['EAC-CPF'].getContent().read()) event_type="modified" except fcrepo.connection.FedoraConnectionException, e: if e.httpcode == 404: eaccpf = CPF.EACCPF(author.pid) event_type="created" else: raise e eaccpf.add_maintenance_event(type=event_type, time="now", agent_type="machine", agent="atm_concert.py") name = {'forename': 'Texto', 'surname': 'Compositores'} eaccpf.add_name_entry(name=name) dc = dict() dc['title'] = Concert.normalize_name(name) Concert.save_dc(author, dc) Concert.save_etree(author, eaccpf.element, 'EAC-CPF', 'EAC-CPF record', controlGroup='M') rels.append( ( FR.rels_predicate(alias='atm-rel', predicate='authoredBy'), FR.rels_object(author.pid, FR.rels_object.PID) ) ) author.state = unicode('A')
fedora.getObject(collection_pid) except FedoraConnectionException, object_fetch_exception: if object_fetch_exception.httpcode in [404]: logging.info(name_space + ':itm missing, creating object.\n') collection_object = fedora.createObject(collection_pid, label = collection_label) #collection_policy try: collection_object.addDataStream(u'COLLECTION_POLICY', collection_policy, label=u'COLLECTION_POLICY', mimeType=u'text/xml', controlGroup=u'X', logMessage=u'Added basic COLLECTION_POLICY data.') logging.info('Added COLLECTION_POLICY datastream to:' + collection_pid) except FedoraConnectionException: logging.error('Error in adding COLLECTION_POLICY datastream to:' + collection_pid + '\n') #add relationships collection_object_RELS_EXT = fedora_relationships.rels_ext(collection_object, fedora_model_namespace) collection_object_RELS_EXT.addRelationship('isMemberOfCollection','islandora:root') collection_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'),'islandora:collectionCModel') collection_object_RELS_EXT.update() #loop through the mods folder for mods_file in mods_files: if mods_file.endswith('-MODS.xml'): #get mods file contents mods_file_path = os.path.join(source_directory, 'mods', mods_file) mods_file_handle = open(mods_file_path) mods_contents = mods_file_handle.read() #get map_label from mods title mods_tree = etree.parse(mods_file_path) map_label = mods_tree.xpath("*[local-name() = 'titleInfo']/*[local-name() = 'title']/text()")
collection_object.addDataStream( u'COLLECTION_POLICY', collection_policy, label=u'COLLECTION_POLICY', mimeType=u'text/xml', controlGroup=u'X', logMessage=u'Added basic COLLECTION_POLICY data.') logging.info('Added COLLECTION_POLICY datastream to:' + collection_pid) except FedoraConnectionException: logging.error( 'Error in adding COLLECTION_POLICY datastream to:' + collection_pid + '\n') #add relationships collection_object_RELS_EXT = fedora_relationships.rels_ext( collection_object, fedora_model_namespace) collection_object_RELS_EXT.addRelationship('isMemberOf', 'islandora:root') collection_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), 'islandora:collectionCModel') collection_object_RELS_EXT.update() #put in the benshi Islandora:BenshiMovie content model try: model_pid = u'islandora:benshiMovie' fedora.getObject(model_pid) except FedoraConnectionException, object_fetch_exception: if object_fetch_exception.httpcode in [404]: logging.info('islandora:BenshiMovie missing, creating object.\n') model_object = fedora.createObject(model_pid,
def handle_misc_mods(misc_mods_parser, mods_file_name): ''' This function will handle the creation of clip objects @param misc_mods_parser The etree xml parser to get ingest data from @return boolean True on success, false if something was wrong ''' misc_type_list = misc_mods_parser.xpath( "//*[local-name() = 'mods']//*[local-name() = 'genre'][@type='local']") if misc_type_list: misc_type = misc_type_list[0].text print(misc_type) if misc_type == 'sound recording': #fix up benshi object #datastreams add_MODS_datastream(benshi_object, mods_file_path) audio_file_path = get_file_path_from_xpath( misc_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Audio']" ) if audio_file_path: audio_file_handle = open(audio_file_path, 'rb') try: benshi_object.addDataStream( u'MP3', u'aTmpStr', label=u'MP3', mimeType=u'audio/mpeg', controlGroup=u'M', logMessage=u'Added MP3 datastream.') datastream = benshi_object['MP3'] datastream.setContent(audio_file_handle) logging.info('Added MP3 datastream to:' + benshi_pid) except FedoraConnectionException: logging.error('Error in adding MP3 datastream to:' + benshi_pid + '\n') audio_file_handle.close() print(audio_file_path) elif misc_type == 'essay': misc_pid = fedora.getNextPID(name_space) misc_label = unicode(movie_name + '_' + misc_type) misc_object = fedora.createObject(misc_pid, label=misc_label) misc_object_RELS_EXT = fedora_relationships.rels_ext( misc_object, [hamilton_rdf_name_space, fedora_model_namespace]) #datastreams add_MODS_datastream(misc_object, mods_file_path) essay_file_path = get_file_path_from_xpath( misc_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Article']" ) if essay_file_path: essay_file_handle = open(essay_file_path, 'rb') try: misc_object.addDataStream( u'DOCX', u'aTmpStr', label=u'DOCX', mimeType= u'application/vnd.openxmlformats-officedocument.wordprocessingml.document', controlGroup=u'M', logMessage=u'Added DOCX datastream.') datastream = misc_object['DOCX'] datastream.setContent(essay_file_handle) logging.info('Added DOCX datastream to:' + misc_pid) except FedoraConnectionException: logging.error('Error in adding DOCX datastream to:' + misc_pid + '\n') essay_file_handle.close() #relationships misc_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('hamilton', 'isEssayOf'), movie_pid) misc_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), name_space + ':benshiEssay') misc_object_RELS_EXT.update() elif misc_type == 'presentation': misc_pid = fedora.getNextPID(name_space) misc_label = unicode(movie_name + '_' + misc_type) misc_object = fedora.createObject(misc_pid, label=misc_label) misc_object_RELS_EXT = fedora_relationships.rels_ext( misc_object, [hamilton_rdf_name_space, fedora_model_namespace]) #datastreams add_MODS_datastream(misc_object, mods_file_path) presentation_file_path = get_file_path_from_xpath( misc_mods_parser, "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Presentation']" ) if presentation_file_path: presentation_file_handle = open(presentation_file_path, 'rb') try: misc_object.addDataStream( u'PPTX', u'aTmpStr', label=u'PPTX', mimeType= u'application/vnd.openxmlformats-officedocument.presentationml.presentation', controlGroup=u'M', logMessage=u'Added PPTX datastream.') datastream = misc_object['PPTX'] datastream.setContent(presentation_file_handle) logging.info('Added PPTX datastream to:' + misc_pid) except FedoraConnectionException: logging.error('Error in adding PPTX datastream to:' + misc_pid + '\n') presentation_file_handle.close() #relationships misc_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('hamilton', 'isPresentationOf'), movie_pid) misc_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), name_space + ':benshiPresentation') misc_object_RELS_EXT.update() #movie gets the opac redirect it's special elif misc_type == 'Motion Picture': #fix up movie object #datastreams add_MODS_datastream(movie_object, mods_file_path) opac_path_list = misc_mods_parser.xpath( "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url']" ) opac_path = opac_path_list[0].text if opac_path: try: movie_object.addDataStream( u'OPAC', u'aTmpStr', label=u'OPAC', mimeType=u'text/html', controlGroup=u'R', location=unicode(opac_path), logMessage=u'Added OPAC datastream.') logging.info('Added OPAC datastream to:' + movie_pid) except FedoraConnectionException: logging.error('Error in adding OPAC datastream to:' + movie_pid + '\n') #biography is special it has a docx and a pdf #can't use 'get_file_path_from_xpath' these are different then the rest, can change it or handle things here (handle things here, dev_speed) elif misc_type == 'biography': misc_pid = fedora.getNextPID(name_space) misc_label = unicode(movie_name + '_Narrator') misc_object = fedora.createObject(misc_pid, label=misc_label) misc_object_RELS_EXT = fedora_relationships.rels_ext( misc_object, [hamilton_rdf_name_space, fedora_model_namespace]) #get the paths for the pdf/docx docx_file_path = False pdf_file_path = False path_list = misc_mods_parser.xpath( "//*[local-name() = 'mods']//*[local-name() = 'location']//*[local-name() = 'url'][@displayLabel='Article']" ) if path_list: for path_element in path_list: if 'currently unavailable' not in path_element: if '.docx' in path_element.text: docx_file_path = os.path.normpath( os.path.join(mods_directory, path_element.text)) elif 'pdf' in path_element.text: pdf_file_path = os.path.normpath( os.path.join(mods_directory, path_element.text)) #datastreams add_MODS_datastream(misc_object, mods_file_path) if docx_file_path: docx_file_handle = open(docx_file_path, 'rb') try: misc_object.addDataStream( u'DOCX', u'aTmpStr', label=u'DOCX', mimeType= u'application/vnd.openxmlformats-officedocument.wordprocessingml.document', controlGroup=u'M', logMessage=u'Added DOCX datastream.') datastream = misc_object['DOCX'] datastream.setContent(docx_file_handle) logging.info('Added DOCX datastream to:' + misc_pid) except FedoraConnectionException: logging.error('Error in adding DOCX datastream to:' + misc_pid + '\n') docx_file_handle.close() if pdf_file_path: pdf_file_handle = open(pdf_file_path, 'rb') try: misc_object.addDataStream( u'PDF', u'aTmpStr', label=u'PDF', mimeType=u'application/pdf', controlGroup=u'M', logMessage=u'Added PDF datastream.') datastream = misc_object['PDF'] datastream.setContent(pdf_file_handle) logging.info('Added PDF datastream to:' + misc_pid) except FedoraConnectionException: logging.error('Error in adding PDF datastream to:' + misc_pid + '\n') pdf_file_handle.close() #relationships misc_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('hamilton', 'isNarratorOf'), benshi_pid) misc_object_RELS_EXT.addRelationship( fedora_relationships.rels_predicate('fedora-model', 'hasModel'), name_space + ':benshiNarrator') misc_object_RELS_EXT.update() else: return False return True
print("Found %d page objects to update" % len(pages)) for pid in pages: strippedPid = pid.replace('info:fedora/', '') print(strippedPid) + " ...", try: obj = fedora.getObject(strippedPid) except FedoraConnectionException, fcx: print("Failed to connect to object %s" % pid) continue nsmap = [ fedora_relationships.rels_namespace('fedora', 'info:fedora/fedora-system:def/relations-external#'), fedora_relationships.rels_namespace('fedora-model', 'info:fedora/fedora-system:def/model#'), fedora_relationships.rels_namespace('pageNS', 'info:islandora/islandora-system:def/pageinfo#') ] rels_ext = fedora_relationships.rels_ext(obj, nsmap, 'fedora') # get the page number value = rels_ext.getRelationships(predicate=["pageNS", "isPageNumber"]) number = int(str(value[0][2])) # get the parent object value = rels_ext.getRelationships(predicate=["fedora", "isMemberOf"]) if value == []: value = rels_ext.getRelationships(predicate=["fedora", "isMemberOfCollection"]) if value: parent = str(value[0][2]) else: parent = None print " page: %d // parent: %s ..." % (number, str(parent)),
def __processPerformance(self, p_el): logger = logging.getLogger('ingest.atm_concert.Concert.__processPerformance') p_dict = { 'piece': p_el.get('id_obra'), 'concert': self.dbid, 'order': p_el.findtext('Posicion') } #TODO: Bloody well deduplicate (ensure that this object does not already exist in Fedora) try: pid = FedoraWrapper.getPid(tuples=[ (Concert.NS['fjm-db'].uri, 'basedOn', "'%s'" % p_dict['piece']), #Not sure if this is really necessary with the other two conditions... ('fedora-rels-ext:', 'isMemberOf', "<fedora:%s>" % self.concert_obj.pid), #To ensure that the performance actually belongs to this concert... (Concert.NS['atm-rel'].uri, 'concertOrder', "'%s'" % p_dict['order']) #To eliminate the confusion if the same piece is played twice in the same concert. ]) if pid: performance = FedoraWrapper.client.getObject(pid) except KeyError: performance = FedoraWrapper.getNextObject(self.prefix, label='Performance of %(piece)s in %(concert)s' % p_dict) #Add MP3 to performance (if there is one to add) p_mp3 = p_el.findtext('mp3_Obra') if p_mp3: mp3_path = self.getPath(p_mp3) if path.exists(mp3_path): update_datastream(obj=performance, dsid='MP3', filename=mp3_path, mimeType='audio/mpeg') else: logger.warning('MP3 entry for performance of %(piece)s in concert %(concert)s, but the file does not exist!' % p_dict) else: logger.debug('No performance MP3 for %(concert)s/%(piece)s' % p_dict) #Add relationships #1 - To concert #2 - To score #3 - To CM #4 - Position in concert rels_ext = FR.rels_ext(obj=performance, namespaces=ao.NS.values()) rels = [ ( FR.rels_predicate(alias='fedora-rels-ext', predicate='isMemberOf'), FR.rels_object(self.concert_obj.pid, FR.rels_object.PID) ), ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:performanceCModel', FR.rels_object.PID) ), ( FR.rels_predicate(alias='atm-rel', predicate='concertOrder'), FR.rels_object(p_dict['order'], FR.rels_object.LITERAL) ), ( FR.rels_predicate(alias='fjm-db', predicate='basedOn'), FR.rels_object(p_dict['piece'], FR.rels_object.LITERAL) ) ] #Add relations and commit FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=rels_ext).update() FedoraWrapper.correlateDBEntry('basedOn', 'scoreID') #Create objects for any movements within the piece for m_el in p_el.findall('Movimientos/Movimiento'): m_dict = { 'concert': p_dict['concert'], 'piece': p_dict['piece'], 'id': m_el.get('id'), 'corder': p_dict['order'], 'porder': m_el.get('posicion'), 'name': m_el.findtext('NOMBRE'), 'MP3': m_el.findtext('mp3_Movimiento'), 'line': m_el.sourceline, 'file': self.file_name } #Sanity test if m_dict['porder']: #Get a Fedora Object for this movement try: pid = FedoraWrapper.getPid(tuples=[ ('fedora-rels-ext:', 'isMemberOf', '<fedora:%s>' % performance.pid), ('fedora-model:', 'hasModel', '<fedora:atm:movementCModel>'), (Concert.NS['atm-rel'].uri, 'pieceOrder', "'%s'" % m_dict['porder']) ]) mov = FedoraWrapper.client.getObject(pid) except KeyError: mov = FedoraWrapper.getNextObject(self.prefix, label='Movement: %(concert)s/%(piece)s/%(id)s' % m_dict) #Get DC and set the title if we have a name. mov_dc = dict() mov_dc['type'] = [unicode('Event')] if m_dict['name']: mov_dc['title'] = [unicode(m_dict['name'])] Concert.save_dc(mov, mov_dc) #Set the three required relations: #1 - To the performance #2 - To the content model #3 - The order this movement occurs within the piece m_rels_ext = FR.rels_ext(obj=mov, namespaces=Concert.NS.values()) m_rels = [ ( FR.rels_predicate(alias='fedora-rels-ext', predicate='isMemberOf'), FR.rels_object(performance.pid, FR.rels_object.PID) ), ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:movementCModel', FR.rels_object.PID) ), ( FR.rels_predicate(alias='atm-rel', predicate='pieceOrder'), FR.rels_object(m_dict['porder'], FR.rels_object.LITERAL) ) ] FedoraWrapper.addRelationshipsWithoutDup(m_rels, rels_ext=m_rels_ext).update() #Add the MP3 (if it exists) if m_dict['MP3']: mp3_path = self.getPath(m_dict['MP3']) if path.exists(mp3_path): update_datastream(obj=mov, dsid='MP3', filename=mp3_path, mimeType='audio/mpeg') else: logger.warning("MP3 entry for movement %(id)s in performance of %(piece)s in %(concert)s on line %(line)s of %(file)s" % m_dict) else: logger.debug('No movement MP3 for %(concert)s/%(piece)s/%(id)s on line %(line)s of %(file)s' % m_dict) else: logger.error('Movement %(concert)s/%(piece)s/%(id)s does not have a position near line %(line)s of %(file)s!' % m_dict) #Done with movements #Create objects for the performers. for per_el in p_el.findall('Interpretes/Interprete'): perf = { 'id': per_el.get('id'), 'group': per_el.get('id_grupo', default=None), 'line': per_el.sourceline, 'file': self.file_name } perf.update(p_dict) if perf['id']: rels = [ #Relate performer to CModel ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:performerCModel', FR.rels_object.PID) ), #Relate performer to performance ( FR.rels_predicate(alias='atm-rel', predicate='performance'), FR.rels_object(performance.pid, FR.rels_object.PID) ), #Relate perfomer to their 'person' entry ( FR.rels_predicate(alias='fjm-db', predicate='player'), FR.rels_object(perf['id'], FR.rels_object.LITERAL) ) ] try: t_list = list() for pred, obj in rels: if obj.type == FR.rels_object.LITERAL: t_obj = "'%s'" % obj else: t_obj = "<fedora:%s>" % obj t_list.append(("%s" % Concert.NS[pred.alias].uri, "%s" % pred.predicate, "%s" % t_obj)) pid = FedoraWrapper.getPid(tuples=t_list) if pid: performer = FedoraWrapper.client.getObject(pid) except KeyError: performer = FedoraWrapper.getNextObject(prefix = self.prefix, label = 'Performer: %(concert)s/%(piece)s/%(id)s in group %(group)s' % perf) #Relate the performer to the listed group (or 'unaffiliated, if none) if perf['group'] != None: rels.append( ( FR.rels_predicate(alias='fjm-db', predicate='group'), FR.rels_object(perf['group'], FR.rels_object.LITERAL) ) ) else: rels.append( ( FR.rels_predicate(alias='atm-rel', predicate='group'), FR.rels_object('atm:unaffiliatedPerfomer', FR.rels_object.PID) ) ) for i_el in per_el.findall('Instrumentos/Instrumento'): inst_id = i_el.get('id') rels.append( ( FR.rels_predicate(alias='fjm-db', predicate='instrument'), FR.rels_object(inst_id, FR.rels_object.LITERAL) ) ) FedoraWrapper.addRelationshipsWithoutDup(rels, fedora=performer).update() FedoraWrapper.correlateDBEntry('player', 'performerID') FedoraWrapper.correlateDBEntry('group', 'groupID') FedoraWrapper.correlateDBEntry('instrument', 'instrumentID') else: logger.error("Performer on line %(line)s of %(file)s does not have an ID!" % perf)
def process(self): logger = self.logger logger.info('Starting to ingest: Score %s' % self.dbid) try: pid = FedoraWrapper.getPid(uri=ao.NS['fjm-db'].uri, predicate='scoreID', obj="'%s'" % self.dbid) if pid: logger.warning('Score %(id)s already exists as pid %(pid)s! Overwriting PDF and DC DSs!' % {'id': self.dbid, 'pid': pid}) score = FedoraWrapper.client.getObject(pid) else: raise Exception('Something went horribly wrong! Found a pid, but couldn\'t access it...') except KeyError: score = FedoraWrapper.getNextObject(self.prefix, label='Score %s' % self.dbid) rels_ext = FR.rels_ext(score, namespaces=ao.NS.values()) rels = [ ( FR.rels_predicate(alias='fedora-model', predicate='hasModel'), FR.rels_object('atm:scoreCModel', FR.rels_object.PID) ), ( FR.rels_predicate(alias='fjm-db', predicate='scoreID'), FR.rels_object(self.dbid, FR.rels_object.LITERAL) ) ] titn = self.element.findtext('titn_partitura') if titn: rels.append( ( FR.rels_predicate(alias='fjm-titn', predicate='score'), FR.rels_object(titn, FR.rels_object.LITERAL) ) ) #FIXME: 'Direction' of composer relation... Should I go from the score to the composer, or (as I think I do in my hand-made objects) from the composer to the score... Or should I make the relationships go in both directions? composer = self.element.findtext('ID_COMPOSITOR') if composer: rels.append( ( FR.rels_predicate(alias='fjm-db', predicate='composedBy'), FR.rels_object(composer, FR.rels_object.LITERAL) ) ) FedoraWrapper.addRelationshipsWithoutDup(rels, rels_ext=rels_ext).update() FedoraWrapper.correlateDBEntry('composedBy', 'composerID') FedoraWrapper.correlateDBEntry('basedOn', 'scoreID') dc = dict() dc['type'] = [unicode('StillImage')] dc['title'] = [self.element.findtext('TITULO')] Score.save_dc(score, dc) filename = self.element.findtext('Ruta_Partitura') if filename: fn = self.getPath(filename) if path.exists(fn): update_datastream(obj=score, dsid='PDF', label="Score PDF", filename=fn, mimeType='application/pdf') else: logger.error('PDF specified for score %(id)s, but file does not seem to exist!' % {'id': self.dbid}) marc = self.getPath(path.join(path.dirname(filename), '%s.xml' % self.dbid)) if path.exists(marc): update_datastream(obj=score, dsid='MARCXML', label="MARC XML", filename=marc, mimeType='application/xml') else: logger.info('No PDF for %s', self.dbid)
fedora.getObject(collection_pid) except FedoraConnectionException, object_fetch_exception: if object_fetch_exception.httpcode in [404]: logging.info(name_space + ':JapaneseSilentFilmCollection missing, creating object.\n') collection_object = fedora.createObject(collection_pid, label = collection_label) #collection_policy try: collection_object.addDataStream(u'COLLECTION_POLICY', collection_policy, label=u'COLLECTION_POLICY', mimeType=u'text/xml', controlGroup=u'X', logMessage=u'Added basic COLLECTION_POLICY data.') logging.info('Added COLLECTION_POLICY datastream to:' + collection_pid) except FedoraConnectionException: logging.error('Error in adding COLLECTION_POLICY datastream to:' + collection_pid + '\n') #add relationships collection_object_RELS_EXT=fedora_relationships.rels_ext(collection_object,fedora_model_namespace) collection_object_RELS_EXT.addRelationship('isMemberOf','islandora:root') collection_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'),'islandora:collectionCModel') collection_object_RELS_EXT.update() #put in the benshi Islandora:BenshiMovie content model try: model_pid = u'islandora:benshiMovie' fedora.getObject(model_pid) except FedoraConnectionException, object_fetch_exception: if object_fetch_exception.httpcode in [404]: logging.info('islandora:BenshiMovie missing, creating object.\n') model_object = fedora.createObject(model_pid, label = u'BenshiMovieCModel') #add relationships model_object_RELS_EXT=fedora_relationships.rels_ext(model_object,fedora_model_namespace) model_object_RELS_EXT.addRelationship(fedora_relationships.rels_predicate('fedora-model','hasModel'),'fedora-system:ContentModel-3.0') model_object_RELS_EXT.update()