def _parse_record_metadata_identifier_element(self, element, object_builder): text = element.text.strip() if len(text) == 0: return qualifier = element.attrib.get('qualifier', None) if qualifier in ('ARK', 'LOCAL-CONT-NO', 'OTHER'): pass elif qualifier == 'itemURL': object_builder.textrefs.append( Textref.Builder() .setName( TextrefName.Builder() .setText("Item URL") .setType(TextrefNameType.ELECTRONIC) .build() ) .setRefid( TextrefRefid.Builder() .setHref(Url.parse(text)) .setText(text) .setType(TextrefRefidType.URI) .build() ) .build() ) elif qualifier == 'thumbnailURL': object_builder.images.append( Image.Builder() .setFullSize( ImageVersion.Builder() .setUrl(Url.parse("http://digital.library.unt.edu/ark:" + object_builder.record_identifier[len('info:ark'):] + '/m1/1/med_res/')) .build() ) .setOriginal( ImageVersion.Builder() .setUrl(Url.parse("http://digital.library.unt.edu/ark:" + object_builder.record_identifier[len('info:ark'):] + '/m1/1/high_res/')) .build() ) .setSquareThumbnail( ImageVersion.Builder() .setHeightPx(self._SQUARE_THUMBNAIL_HEIGHT_PX) .setUrl(Url.parse("http://digital.library.unt.edu/ark:" + object_builder.record_identifier[len('info:ark'):] + '/m1/1/square/')) .setWidthPx(self._SQUARE_THUMBNAIL_WIDTH_PX) .build() ) .setThumbnail( ImageVersion.Builder() .setUrl(Url.parse(text)) .build() ) .build() ) elif qualifier is not None: self._logger.warn("ignoring unknown identifier qualifier '%s' on record %s", qualifier, object_builder.record_identifier)
def _map_omeka_item_files( self, object_builder, omeka_item, omeka_item_files, square_thumbnail_height_px, square_thumbnail_width_px ): if omeka_item.files_count is None or omeka_item.files_count == 0: return for omeka_file in omeka_item_files: if not omeka_file.mime_type.startswith('image/'): continue original_image_height = original_image_width = None for element_text in omeka_file.element_texts: if element_text.element_set.name == 'Omeka Image File': if element_text.element.name == 'Height': original_image_height = int(element_text.text) elif element_text.element.name == 'Width': original_image_width = int(element_text.text) # else: # print 'skipping image file element', element_name image_builder = Image.builder() image_version_builder = ImageVersion.builder().setUrl(Url.parse(omeka_file.file_urls.original)) if original_image_height is not None: image_version_builder.setHeightPx(UnsignedInteger.valueOf(original_image_height)) if original_image_width is not None: image_version_builder.setWidthPx(UnsignedInteger.valueOf(original_image_width)) image_builder.setOriginal(image_version_builder.build()) if omeka_file.file_urls.fullsize is not None: image_builder.setFullSize(ImageVersion.builder().setUrl(Url.parse(omeka_file.file_urls.fullsize)).build()) if omeka_file.file_urls.square_thumbnail is not None: image_builder.setSquareThumbnail( ImageVersion.builder() .setHeightPx(UnsignedInteger.valueOf(square_thumbnail_height_px)) .setUrl(Url.parse(omeka_file.file_urls.fullsize)) .setWidthPx(UnsignedInteger.valueOf(square_thumbnail_width_px)) .build() ) if omeka_file.file_urls.thumbnail is not None: image_builder.setThumbnail(ImageVersion.builder().setUrl(Url.parse(omeka_file.file_urls.thumbnail)).build()) object_builder.images.append(image_builder.build())
def __init__( self, endpoint_url, logger, object_id, omeka_item ): self._object_builder = \ Object.builder()\ .setCollectionId(object_id.getCollectionId())\ .setInstitutionId(object_id.getInstitutionId()) self.__logger = logger self.__object_id = object_id self.__omeka_item = omeka_item self.agents = [] self.categories = [] self.cultural_contexts = [] self.dc_date_builder = Date.builder().setType(DateType.CREATION) self.dc_date_certainty = None self.dates = [] self.descriptions = [] self.identifiers = [] self.images = [] self.inscriptions = [] self.locations = [] self.materials = [] self.measurements = [] self.relation_builders = [] self.subjects = [] self.techniques = [] self.textrefs = [] self.titles = [] self.work_types = [] self.textrefs.append( Textref.builder() .setName( TextrefName.builder() .setText("Omeka item URL") .setType(TextrefNameType.ELECTRONIC) .build() ) .setRefid( TextrefRefid.builder() .setHref(Url.parse(str(endpoint_url) + 'items/show/' + str(omeka_item.id))) .setText(str(endpoint_url) + 'items/show/' + str(omeka_item.id)) .setType(TextrefRefidType.URI) .build() ) .build() )
assert os.path.isdir(data_dir_path) put_institution( data_rights=\ RightsSet.Builder() .setElements(ImmutableList.of( Rights.Builder() .setRightsHolder('University of North Texas') .setText("The contents of Texas Fashion Collection, hosted by the University of North Texas Libraries (digital content including images, text, and sound and video recordings) are made publicly available by the collection-holding partners for use in research, teaching, and private study. For the full terms of use, see http://digital.library.unt.edu/terms-of-use/") .setType(RightsType.COPYRIGHTED) .build() )) .build(), institution_id=InstitutionId.parse('untvca'), institution_title='Texas Fashion Collection', institution_url=Url.parse('http://digital.library.unt.edu/explore/collections/TXFC/'), store_parameters=ImmutableMap.of( 'record_mapper', TxfcOaiPmhRecordMapper.__module__ + '.' + TxfcOaiPmhRecordMapper.__name__ # @UndefinedVariable ), ) put_collection( collection_id=CollectionId.parse('untvca/txfc'), institution_id=InstitutionId.parse('untvca'), object_store_uri=Uri.parse(OaiPmhFsObjectStore.URI_SCHEME + ':/' + os.path.join(data_dir_path, 'untvca', 'txfc').replace(os.path.sep, '/')), title='Texas Fashion Collection' ) put_institution( collection_store_uri=Uri.parse(OmekaFsCollectionStore.URI_SCHEME + ':/' + data_dir_path.replace(os.path.sep, '/')), institution_id=InstitutionId.parse('vccc'), institution_title='Vassar College Costume Collection',
def map_omeka_item(self, collection_id, endpoint_url, omeka_item, omeka_item_files, square_thumbnail_height_px, square_thumbnail_width_px): object_id = ObjectId.parse(str(collection_id) + '/' + str(omeka_item.id)) vocab_ref = VocabRef.Builder().setVocab(Vocab.COSTUME_CORE).build() feature_name = None omeka_collection_id = int(collection_id.getUnqualifiedCollectionId()) for item in self.OMEKA_COLLECTIONS.iteritems(): if item[1] == omeka_collection_id: feature_name = item[0] break assert feature_name is not None feature_value = None item_image_credit_line = item_image_license = None for element_text in omeka_item.element_texts: if len(element_text.text) == 0: continue if element_text.element_set.name == 'Dublin Core': if element_text.element.name == 'Title': if feature_value is None: feature_value = element_text.text elif element_text.element_set.name == 'Item Type Metadata': if element_text.element.name == 'Image Creator': item_image_credit_line = element_text.text elif element_text.element.name == 'Image License': item_image_license = element_text.text else: self._logger.warn("Omeka item %d has unknown element set name '%s'", omeka_item.id, element_text.element_set.name) object_builder = \ Object.Builder()\ .setCollectionId(collection_id)\ .setHidden(True)\ .setInstitutionId(collection_id.getInstitutionId())\ .setStructures(\ StructureSet.Builder().setElements(ImmutableList.of( Structure.Builder() .setText(feature_value) .setType( StructureType.Builder() .setText(feature_name) .setVocabRef(vocab_ref) .build() ) .build() )) .build() )\ .setTitles( TitleSet.Builder().setElements(ImmutableList.of( Title.Builder() .setText("%(feature_value)s" % locals()) .setType(TitleType.DESCRIPTIVE) .build() )) .build() )\ .setViewType(ViewType.DETAIL) images = [] for file_ in omeka_item_files: if not file_.mime_type.startswith('image/'): continue image_credit_line = item_image_credit_line image_license = item_image_license for element_text in file_.element_texts: if element_text.element_set.name == 'Dublin Core': if element_text.element.name == 'License': image_license = element_text.text elif element_text.element.name == 'Provenance': image_credit_line = element_text.text if image_credit_line is None or len(image_credit_line) == 0: self._logger.warn("Omeka item %d has a file %d missing a Provenance", omeka_item.id, file_.id) continue if image_license is None or len(image_license) == 0: self._logger.warn("Omeka item %d has a file %d missing a License", omeka_item.id, file_.id) continue license_vocab_ref = None if image_license.lower() == 'public domain': rights_type = RightsType.PUBLIC_DOMAIN elif image_license == 'CC0': rights_type = RightsType.LICENSED license_vocab_ref = \ VocabRef.Builder()\ .setVocab(Vocab.CREATIVE_COMMONS)\ .setUri(Uri.parse('https://creativecommons.org/publicdomain/zero/1.0/'))\ .build() elif image_license.startswith('CC BY-SA '): rights_type = RightsType.LICENSED version = image_license[len('CC BY-SA '):] float(version) license_vocab_ref = \ VocabRef.Builder()\ .setVocab(Vocab.CREATIVE_COMMONS)\ .setUri(Uri.parse("https://creativecommons.org/licenses/by-sa/%s/" % version))\ .build() else: rights_type = RightsType.LICENSED image_builder = Image.Builder() # @UndefinedVariable file_urls = file_.file_urls image_builder.setOriginal(ImageVersion.Builder().setUrl(Url.parse(file_urls.original)).build()) image_builder.setRights( RightsSet.Builder().setElements(ImmutableList.of( Rights.Builder() .setLicenseVocabRef(Optional.fromNullable(license_vocab_ref)) .setRightsHolder(image_credit_line) .setText(image_license) .setType(rights_type) .build() )) .build() ) if file_urls.square_thumbnail is None: self._logger.warn("Omeka item %d has a file %d missing a square thumbnail", omeka_item.id, file_.id) continue image_builder.setSquareThumbnail( ImageVersion.Builder() .setHeightPx(UnsignedInteger.valueOf(square_thumbnail_height_px)) .setUrl(Url.parse(file_urls.square_thumbnail)) .setWidthPx(UnsignedInteger.valueOf(square_thumbnail_width_px)) .build() ) images.append(image_builder.build()) if len(images) > 0: object_builder.setImages(ImmutableList.copyOf(images)) else: self._logger.warn("Omeka item %d has no valid images", omeka_item.id) object_ = object_builder.build() object_id = ObjectId.parse(str(collection_id) + '/' + urllib.quote(feature_value, '')) return ObjectEntry(object_id, object_)