def map_oai_pmh_record(self, collection_id, record_etree): record_identifier = record_etree.find('header').find('identifier').text object_id = ObjectId.parse(str(collection_id) + '/' + urllib.quote(record_identifier, '')) metadata_etree = record_etree.find('metadata') object_builder = \ self._ObjectBuilder( collection_id=collection_id, institution_id=collection_id.getInstitutionId(), record_identifier=record_identifier ) for etree in metadata_etree.find(self._UNTL_NS + 'metadata'): assert etree.tag.startswith(self._UNTL_NS) tag = etree.tag[len(self._UNTL_NS):] method_name = '_parse_record_metadata_' + tag + '_element' try: method = getattr(self, method_name) except AttributeError: self._logger.warn("no such method '%s' for record %s with text '%s'", method_name, record_identifier, etree.text) continue method( element=etree, object_builder=object_builder ) return ObjectEntry(object_id, object_builder.build())
def map_omeka_item(self, collection_id, endpoint_url, omeka_item, omeka_item_files, square_thumbnail_height_px, square_thumbnail_width_px): object_id = ObjectId.parse(str(collection_id) + '/' + str(omeka_item.id)) object_builder = \ self._ObjectBuilder( endpoint_url=endpoint_url, logger=self._logger, object_id=object_id, omeka_item=omeka_item ) for element_text in omeka_item.element_texts: text = element_text.text.strip() if len(text) == 0: continue self._map_omeka_item_element( element_name=element_text.element.name, element_set_name=element_text.element_set.name, object_builder=object_builder, text=text ) self._map_omeka_item_files( object_builder=object_builder, omeka_item=omeka_item, omeka_item_files=omeka_item_files, square_thumbnail_height_px=square_thumbnail_height_px, square_thumbnail_width_px=square_thumbnail_width_px ) if len(object_builder.work_types) == 0 and omeka_item.item_type is not None: self._map_omeka_item_type( object_builder=object_builder, omeka_item_type=omeka_item.item_type ) tag_names = [tag.name for tag in omeka_item.tags] if len(tag_names) > 0: self._map_omeka_item_tags( object_builder=object_builder, tag_names=tuple(tag_names) ) return ObjectEntry(object_id, object_builder.build())
def map_omeka_item(self, collection_id, endpoint_url, omeka_item, omeka_item_files, square_thumbnail_height_px, square_thumbnail_width_px): object_id = ObjectId.parse(str(collection_id) + '/' + str(omeka_item.id)) vocab_ref = VocabRef.Builder().setVocab(Vocab.COSTUME_CORE).build() feature_name = None omeka_collection_id = int(collection_id.getUnqualifiedCollectionId()) for item in self.OMEKA_COLLECTIONS.iteritems(): if item[1] == omeka_collection_id: feature_name = item[0] break assert feature_name is not None feature_value = None item_image_credit_line = item_image_license = None for element_text in omeka_item.element_texts: if len(element_text.text) == 0: continue if element_text.element_set.name == 'Dublin Core': if element_text.element.name == 'Title': if feature_value is None: feature_value = element_text.text elif element_text.element_set.name == 'Item Type Metadata': if element_text.element.name == 'Image Creator': item_image_credit_line = element_text.text elif element_text.element.name == 'Image License': item_image_license = element_text.text else: self._logger.warn("Omeka item %d has unknown element set name '%s'", omeka_item.id, element_text.element_set.name) object_builder = \ Object.Builder()\ .setCollectionId(collection_id)\ .setHidden(True)\ .setInstitutionId(collection_id.getInstitutionId())\ .setStructures(\ StructureSet.Builder().setElements(ImmutableList.of( Structure.Builder() .setText(feature_value) .setType( StructureType.Builder() .setText(feature_name) .setVocabRef(vocab_ref) .build() ) .build() )) .build() )\ .setTitles( TitleSet.Builder().setElements(ImmutableList.of( Title.Builder() .setText("%(feature_value)s" % locals()) .setType(TitleType.DESCRIPTIVE) .build() )) .build() )\ .setViewType(ViewType.DETAIL) images = [] for file_ in omeka_item_files: if not file_.mime_type.startswith('image/'): continue image_credit_line = item_image_credit_line image_license = item_image_license for element_text in file_.element_texts: if element_text.element_set.name == 'Dublin Core': if element_text.element.name == 'License': image_license = element_text.text elif element_text.element.name == 'Provenance': image_credit_line = element_text.text if image_credit_line is None or len(image_credit_line) == 0: self._logger.warn("Omeka item %d has a file %d missing a Provenance", omeka_item.id, file_.id) continue if image_license is None or len(image_license) == 0: self._logger.warn("Omeka item %d has a file %d missing a License", omeka_item.id, file_.id) continue license_vocab_ref = None if image_license.lower() == 'public domain': rights_type = RightsType.PUBLIC_DOMAIN elif image_license == 'CC0': rights_type = RightsType.LICENSED license_vocab_ref = \ VocabRef.Builder()\ .setVocab(Vocab.CREATIVE_COMMONS)\ .setUri(Uri.parse('https://creativecommons.org/publicdomain/zero/1.0/'))\ .build() elif image_license.startswith('CC BY-SA '): rights_type = RightsType.LICENSED version = image_license[len('CC BY-SA '):] float(version) license_vocab_ref = \ VocabRef.Builder()\ .setVocab(Vocab.CREATIVE_COMMONS)\ .setUri(Uri.parse("https://creativecommons.org/licenses/by-sa/%s/" % version))\ .build() else: rights_type = RightsType.LICENSED image_builder = Image.Builder() # @UndefinedVariable file_urls = file_.file_urls image_builder.setOriginal(ImageVersion.Builder().setUrl(Url.parse(file_urls.original)).build()) image_builder.setRights( RightsSet.Builder().setElements(ImmutableList.of( Rights.Builder() .setLicenseVocabRef(Optional.fromNullable(license_vocab_ref)) .setRightsHolder(image_credit_line) .setText(image_license) .setType(rights_type) .build() )) .build() ) if file_urls.square_thumbnail is None: self._logger.warn("Omeka item %d has a file %d missing a square thumbnail", omeka_item.id, file_.id) continue image_builder.setSquareThumbnail( ImageVersion.Builder() .setHeightPx(UnsignedInteger.valueOf(square_thumbnail_height_px)) .setUrl(Url.parse(file_urls.square_thumbnail)) .setWidthPx(UnsignedInteger.valueOf(square_thumbnail_width_px)) .build() ) images.append(image_builder.build()) if len(images) > 0: object_builder.setImages(ImmutableList.copyOf(images)) else: self._logger.warn("Omeka item %d has no valid images", omeka_item.id) object_ = object_builder.build() object_id = ObjectId.parse(str(collection_id) + '/' + urllib.quote(feature_value, '')) return ObjectEntry(object_id, object_)