def map_oai_pmh_record(self, collection_id, record_etree):
        record_identifier = record_etree.find('header').find('identifier').text
        object_id = ObjectId.parse(str(collection_id) + '/' + urllib.quote(record_identifier, ''))
        metadata_etree = record_etree.find('metadata')

        object_builder = \
            self._ObjectBuilder(
                collection_id=collection_id,
                institution_id=collection_id.getInstitutionId(),
                record_identifier=record_identifier
            )

        for etree in metadata_etree.find(self._UNTL_NS + 'metadata'):
            assert etree.tag.startswith(self._UNTL_NS)
            tag = etree.tag[len(self._UNTL_NS):]
            method_name = '_parse_record_metadata_' + tag + '_element'
            try:
                method = getattr(self, method_name)
            except AttributeError:
                self._logger.warn("no such method '%s' for record %s with text '%s'", method_name, record_identifier, etree.text)
                continue
            method(
                element=etree,
                object_builder=object_builder
            )

        return ObjectEntry(object_id, object_builder.build())
Ejemplo n.º 2
0
    def map_omeka_item(self, collection_id, endpoint_url, omeka_item, omeka_item_files, square_thumbnail_height_px, square_thumbnail_width_px):
        object_id = ObjectId.parse(str(collection_id) + '/' + str(omeka_item.id))

        object_builder = \
            self._ObjectBuilder(
                endpoint_url=endpoint_url,
                logger=self._logger,
                object_id=object_id,
                omeka_item=omeka_item
            )

        for element_text in omeka_item.element_texts:
            text = element_text.text.strip()
            if len(text) == 0:
                continue

            self._map_omeka_item_element(
                element_name=element_text.element.name,
                element_set_name=element_text.element_set.name,
                object_builder=object_builder,
                text=text
            )

        self._map_omeka_item_files(
            object_builder=object_builder,
            omeka_item=omeka_item,
            omeka_item_files=omeka_item_files,
            square_thumbnail_height_px=square_thumbnail_height_px,
            square_thumbnail_width_px=square_thumbnail_width_px
        )

        if len(object_builder.work_types) == 0 and omeka_item.item_type is not None:
            self._map_omeka_item_type(
                object_builder=object_builder,
                omeka_item_type=omeka_item.item_type
            )

        tag_names = [tag.name for tag in omeka_item.tags]
        if len(tag_names) > 0:
            self._map_omeka_item_tags(
                object_builder=object_builder,
                tag_names=tuple(tag_names)
            )

        return ObjectEntry(object_id, object_builder.build())
    def map_omeka_item(self, collection_id, endpoint_url, omeka_item, omeka_item_files, square_thumbnail_height_px, square_thumbnail_width_px):
        object_id = ObjectId.parse(str(collection_id) + '/' + str(omeka_item.id))

        vocab_ref = VocabRef.Builder().setVocab(Vocab.COSTUME_CORE).build()

        feature_name = None
        omeka_collection_id = int(collection_id.getUnqualifiedCollectionId())
        for item in self.OMEKA_COLLECTIONS.iteritems():
            if item[1] == omeka_collection_id:
                feature_name = item[0]
                break
        assert feature_name is not None

        feature_value = None
        item_image_credit_line = item_image_license = None
        for element_text in omeka_item.element_texts:
            if len(element_text.text) == 0:
                continue

            if element_text.element_set.name == 'Dublin Core':
                if element_text.element.name == 'Title':
                    if feature_value is None:
                        feature_value = element_text.text
            elif element_text.element_set.name == 'Item Type Metadata':
                if element_text.element.name == 'Image Creator':
                    item_image_credit_line = element_text.text
                elif element_text.element.name == 'Image License':
                    item_image_license = element_text.text
            else:
                self._logger.warn("Omeka item %d has unknown element set name '%s'", omeka_item.id, element_text.element_set.name)

        object_builder = \
            Object.Builder()\
                .setCollectionId(collection_id)\
                .setHidden(True)\
                .setInstitutionId(collection_id.getInstitutionId())\
                .setStructures(\
                    StructureSet.Builder().setElements(ImmutableList.of(
                        Structure.Builder()
                            .setText(feature_value)
                            .setType(
                                StructureType.Builder()
                                    .setText(feature_name)
                                    .setVocabRef(vocab_ref)
                                    .build()
                            )
                            .build()
                        ))
                        .build()
                )\
                .setTitles(
                    TitleSet.Builder().setElements(ImmutableList.of(
                        Title.Builder()
                            .setText("%(feature_value)s" % locals())
                            .setType(TitleType.DESCRIPTIVE)
                            .build()
                    ))
                    .build()
                )\
                .setViewType(ViewType.DETAIL)

        images = []
        for file_ in omeka_item_files:
            if not file_.mime_type.startswith('image/'):
                continue

            image_credit_line = item_image_credit_line
            image_license = item_image_license
            for element_text in file_.element_texts:
                if element_text.element_set.name == 'Dublin Core':
                    if element_text.element.name == 'License':
                        image_license = element_text.text
                    elif element_text.element.name == 'Provenance':
                        image_credit_line = element_text.text

            if image_credit_line is None or len(image_credit_line) == 0:
                self._logger.warn("Omeka item %d has a file %d missing a Provenance", omeka_item.id, file_.id)
                continue

            if image_license is None or len(image_license) == 0:
                self._logger.warn("Omeka item %d has a file %d missing a License", omeka_item.id, file_.id)
                continue

            license_vocab_ref = None
            if image_license.lower() == 'public domain':
                rights_type = RightsType.PUBLIC_DOMAIN
            elif image_license == 'CC0':
                rights_type = RightsType.LICENSED
                license_vocab_ref = \
                    VocabRef.Builder()\
                        .setVocab(Vocab.CREATIVE_COMMONS)\
                        .setUri(Uri.parse('https://creativecommons.org/publicdomain/zero/1.0/'))\
                        .build()
            elif image_license.startswith('CC BY-SA '):
                rights_type = RightsType.LICENSED
                version = image_license[len('CC BY-SA '):]
                float(version)
                license_vocab_ref = \
                    VocabRef.Builder()\
                        .setVocab(Vocab.CREATIVE_COMMONS)\
                        .setUri(Uri.parse("https://creativecommons.org/licenses/by-sa/%s/" % version))\
                        .build()
            else:
                rights_type = RightsType.LICENSED

            image_builder = Image.Builder()  # @UndefinedVariable
            file_urls = file_.file_urls
            image_builder.setOriginal(ImageVersion.Builder().setUrl(Url.parse(file_urls.original)).build())
            image_builder.setRights(
                RightsSet.Builder().setElements(ImmutableList.of(
                    Rights.Builder()
                        .setLicenseVocabRef(Optional.fromNullable(license_vocab_ref))
                        .setRightsHolder(image_credit_line)
                        .setText(image_license)
                        .setType(rights_type)
                        .build()
                ))
                .build()
            )
            if file_urls.square_thumbnail is None:
                self._logger.warn("Omeka item %d has a file %d missing a square thumbnail", omeka_item.id, file_.id)
                continue
            image_builder.setSquareThumbnail(
                ImageVersion.Builder()
                    .setHeightPx(UnsignedInteger.valueOf(square_thumbnail_height_px))
                    .setUrl(Url.parse(file_urls.square_thumbnail))
                    .setWidthPx(UnsignedInteger.valueOf(square_thumbnail_width_px))
                    .build()
            )
            images.append(image_builder.build())
        if len(images) > 0:
            object_builder.setImages(ImmutableList.copyOf(images))
        else:
            self._logger.warn("Omeka item %d has no valid images", omeka_item.id)

        object_ = object_builder.build()

        object_id = ObjectId.parse(str(collection_id) + '/' + urllib.quote(feature_value, ''))

        return ObjectEntry(object_id, object_)