class FunderRelationSchema(Schema): """Funder schema.""" name = SanitizedUnicode( validate=validate.Length(min=1, error=_('Name cannot be blank.'))) id = SanitizedUnicode() @validates_schema def validate_funder(self, data, **kwargs): """Validates that either id either name are present.""" id_ = data.get("id") name = data.get("name") if id_: data = {"id": id_} elif name: data = {"name": name} if not id_ and not name: raise ValidationError( _("An existing id or a free text name must be present"), "funder")
class CommunityMetadataSchema(Schema): """Community metadata schema.""" COMMUNITY_TYPES = [ 'organization', 'event', 'topic', 'project', ] title = SanitizedUnicode(required=True, validate=_not_blank(max=250)) description = SanitizedUnicode(validate=_not_blank(max=2000)) curation_policy = SanitizedHTML(validate=_not_blank(max=2000)) page = SanitizedHTML(validate=_not_blank(max=2000)) # TODO: Use general small vocabularies type = SanitizedUnicode(validate=validate.OneOf(COMMUNITY_TYPES)) website = fields.Url(validate=_not_blank()) funding = fields.List(fields.Nested(FundingSchema)) organizations = fields.List(fields.Nested(AffiliationSchema))
class AffiliationSchema(Schema): """Affiliation of a creator/contributor.""" id = SanitizedUnicode() name = SanitizedUnicode() @validates_schema def validate_affiliation(self, data, **kwargs): """Validates that either id either name are present.""" id_ = data.get("id") name = data.get("name") if id_: data = {"id": id_} elif name: data = {"name": name} if not id_ and not name: raise ValidationError( _("An existing id or a free text name must be present"), "affiliations" )
class CreatorSchema(Schema): """Creator schema.""" person_or_org = fields.Nested(PersonOrOrganizationSchema, required=True) role = SanitizedUnicode() affiliations = fields.List(fields.Nested(AffiliationSchema)) @validates_schema def validate_role(self, data, **kwargs): """Validate role.""" if 'role' in data: validate_entry('creators.role', data)
class ReferenceSchema(IdentifierSchema): """Reference schema.""" SCHEMES = ["isni", "grid", "crossreffunderid", "other"] def __init__(self, **kwargs): """Refer schema constructor.""" super().__init__(allowed_schemes=self.SCHEMES, identifier_required=False, **kwargs) reference = SanitizedUnicode(required=True)
class AwardRelationSchema(Schema): """Award relation schema.""" id = SanitizedUnicode() number = SanitizedUnicode() title = i18n_strings identifiers = IdentifierSet( fields.Nested( partial(IdentifierSchema, allowed_schemes=award_schemes, identifier_required=False))) @validates_schema def validate_data(self, data, **kwargs): """Validate either id or number/title are present.""" id_ = data.get("id") number = data.get("number") title = data.get("title") if not id_ and not (number and title): raise ValidationError( _("An existing id or number/title must be present."), "award")
class FunderSchema(IdentifierSchema): """Funder schema.""" def __init__(self, **kwargs): """Constructor.""" super().__init__( fail_on_unknown=False, identifier_required=False, **kwargs) name = SanitizedUnicode( required=True, validate=_not_blank(_('Name cannot be blank.')) )
class AccessSchema(Schema): """Access schema.""" metadata = SanitizedUnicode(required=True) files = SanitizedUnicode(required=True) embargo = NestedAttribute(EmbargoSchema) status = SanitizedUnicode(dump_only=False) owned_by = List(Nested(Agent)) def validate_protection_value(self, value, field_name): """Check that the protection value is valid.""" if value not in AccessStatusEnum.list(): raise ValidationError( _("'{}' must be either '{}', '{}' or '{}'").format( field_name, *AccessStatusEnum.list(), ), "record", ) @validates("metadata") def validate_record_protection(self, value): """Validate the record protection value.""" self.validate_protection_value(value, "metadata") @validates_schema def validate_embargo(self, data, **kwargs): """Validate that the properties are consistent with each other.""" metadata = data.get("metadata", "") embargo = data.get("embargo", "") if AccessStatusEnum.EMBARGOED.value == metadata and not embargo: raise ValidationError( _("Embargo must be set if metadata is Embargoed"), field_name="embargo", ) @validates("files") def validate_files_protection(self, value): """Validate the files protection value.""" self.validate_protection_value(value, "files")
class AwardSchema(BaseVocabularySchema): """Award schema.""" identifiers = IdentifierSet( fields.Nested( partial(IdentifierSchema, allowed_schemes=award_schemes, identifier_required=False))) number = SanitizedUnicode(required=True, validate=validate.Length( min=1, error=_('Number cannot be blank.'))) funder = fields.Nested(FunderRelationSchema) acronym = SanitizedUnicode() id = SanitizedUnicode( validate=validate.Length(min=1, error=_('Pid cannot be blank.'))) @validates_schema def validate_id(self, data, **kwargs): """Validates ID.""" is_create = "record" not in self.context if is_create and "id" not in data: raise ValidationError(_("Missing PID."), "id") if not is_create: data.pop("id", None) @post_load(pass_many=False) def move_id(self, data, **kwargs): """Moves id to pid.""" if "id" in data: data["pid"] = data.pop("id") return data @pre_dump(pass_many=False) def extract_pid_value(self, data, **kwargs): """Extracts the PID value.""" data['id'] = data.pid.pid_value return data
class SubjectSchema(Schema): """Subject schema.""" id = SanitizedUnicode() subject = SanitizedUnicode() scheme = SanitizedUnicode() @validates_schema def validate_subject(self, data, **kwargs): """Validates that either id either name are present.""" id_ = data.get("id") subject = data.get("subject") if id_: data = {"id": id_} elif subject: data = {"subject": subject} if not id_ and not subject: raise ValidationError( _("An existing id or a free text subject must be present"), "subjects" )
class LanguageSchema(Schema): """Language schema.""" class Meta: """Meta class to discard unknown fields.""" unknown = EXCLUDE id = SanitizedUnicode(required=True) # TODO: replace ".en" with UI serialization title = fields.Raw(attribute="metadata.title.en", dump_only=True) description = fields.Raw(attribute="metadata.description.en", dump_only=True) props = fields.Raw(attribute="metadata.props", dump_only=True)
class SecretLink(Schema): """Schema for a secret link.""" id = fields.String(dump_only=True) created_at = TZDateTime(timezone=timezone.utc, format='iso', required=False, dump_only=True) expires_at = TZDateTime(timezone=timezone.utc, format='iso', required=False) permission = fields.String(required=False) token = SanitizedUnicode(dump_only=True)
class AccessSchema(Schema): """Access schema.""" record = SanitizedUnicode(required=True) files = SanitizedUnicode(required=True) embargo = fields.Nested(EmbargoSchema) def validate_protection_value(self, value, field_name): """Check that the protection value is valid.""" if value not in ["public", "restricted"]: raise ValidationError( _("'{}' must be either 'public' or 'restricted'").format( field_name), "record") @validates("record") def validate_record_protection(self, value): """Validate the record protection value.""" self.validate_protection_value(value, "record") @validates("files") def validate_files_protection(self, value): """Validate the files protection value.""" self.validate_protection_value(value, "files")
class MetadataSchema(Schema): """Schema for the record metadata.""" # Metadata fields resource_type = fields.Nested(VocabularySchema, required=True) creators = fields.List(fields.Nested(CreatorSchema), required=True, validate=validate.Length( min=1, error=_("Missing data for required field."))) title = SanitizedUnicode(required=True, validate=validate.Length(min=3)) additional_titles = fields.List(fields.Nested(TitleSchema)) publisher = SanitizedUnicode() publication_date = EDTFDateString(required=True) subjects = fields.List(fields.Nested(SubjectSchema)) contributors = fields.List(fields.Nested(ContributorSchema)) dates = fields.List(fields.Nested(DateSchema)) languages = fields.List(fields.Nested(VocabularySchema)) # alternate identifiers identifiers = IdentifierSet( fields.Nested( partial(IdentifierSchema, allowed_schemes=record_identifiers_schemes))) related_identifiers = fields.List(fields.Nested(RelatedIdentifierSchema)) sizes = fields.List( SanitizedUnicode( validate=_not_blank(_('Size cannot be a blank string.')))) formats = fields.List( SanitizedUnicode( validate=_not_blank(_('Format cannot be a blank string.')))) version = SanitizedUnicode() rights = fields.List(fields.Nested(RightsSchema)) description = SanitizedHTML(validate=validate.Length(min=3)) additional_descriptions = fields.List(fields.Nested(DescriptionSchema)) locations = fields.Nested(FeatureSchema) funding = fields.List(fields.Nested(FundingSchema)) references = fields.List(fields.Nested(ReferenceSchema))
class DescriptionSchema(Schema): """Schema for the additional descriptions.""" DESCRIPTION_TYPES = [ "abstract", "methods", "seriesinformation", "tableofcontents", "technicalinfo", "other" ] description = SanitizedHTML(required=True, validate=validate.Length(min=3)) type = SanitizedUnicode( required=True, validate=validate.OneOf( choices=DESCRIPTION_TYPES, error=_( 'Invalid description type. {input} not one of {choices}.'))) lang = fields.Nested(LanguageSchema)
class AffiliationSchema(Schema): """Affiliation of a creator/contributor.""" name = SanitizedUnicode(required=True) identifiers = fields.Dict() @validates("identifiers") def validate_identifiers(self, value): """Validate well-formed identifiers are passed.""" if len(value) == 0: raise ValidationError(_("Invalid identifier.")) for identifier in value.keys(): validator = getattr(idutils, 'is_' + identifier, None) # NOTE: identifier key cannot be empty string if not identifier or (validator and not validator(value.get(identifier))): raise ValidationError(_(f"Invalid identifier ({identifier})."))
class AccessSchema(Schema): """Access schema.""" metadata = fields.Bool(required=True) owned_by = fields.List(fields.Nested(Agent)) access_right = SanitizedUnicode(required=True) embargo_date = ISODateString() access_condition = fields.Nested(AccessConditionSchema) @validates("embargo_date") def validate_embargo_date(self, value): """Validate that embargo date is in the future.""" if arrow.get(value).date() <= arrow.utcnow().date(): raise ValidationError( _("Embargo date must be in the future."), field_names=["embargo_date"] ) @validates_schema def validate_access_right(self, data, **kwargs): """Validate that access right is one of the allowed ones.""" validate_entry("access_right", data)
class FilesOptionsSchema(Schema): """Basic files options schema class.""" enabled = fields.Bool(missing=True) # allow unsetting default_preview = SanitizedUnicode(allow_none=True) def get_attribute(self, obj, attr, default): """Override how attributes are retrieved when dumping. NOTE: We have to access by attribute because although we are loading from an external pure dict, but we are dumping from a data-layer object whose fields should be accessed by attributes and not keys. Access by key runs into FilesManager key access protection and raises. """ value = getattr(obj, attr, default) if attr == "default_preview" and not value: return default return value
class MetadataSchema(Schema): """Schema for the record metadata.""" field_load_permissions = { # TODO: define "can_admin" action } field_dump_permissions = { # TODO: define "can_admin" action } class Meta: """Meta class to accept unknwon fields.""" unknown = INCLUDE # Metadata fields resource_type = fields.Nested(ResourceTypeSchema, required=True) creators = fields.List(fields.Nested(CreatorSchema), required=True) title = SanitizedUnicode(required=True, validate=validate.Length(min=3)) additional_titles = fields.List(fields.Nested(TitleSchema)) publisher = SanitizedUnicode() publication_date = EDTFDateString(required=True) subjects = fields.List(fields.Nested(SubjectSchema)) contributors = fields.List(fields.Nested(ContributorSchema)) dates = fields.List(fields.Nested(DateSchema)) languages = fields.List(fields.Nested(LanguageSchema)) # alternate identifiers identifiers = fields.List(fields.Nested(IdentifierSchema)) related_identifiers = fields.List( fields.Nested(RelatedIdentifierSchema), validate=_no_duplicates, error=_('Invalid related identifiers cannot contain duplicates.')) sizes = fields.List( SanitizedUnicode( validate=_not_blank(_('Size cannot be a blank string.')))) formats = fields.List( SanitizedUnicode( validate=_not_blank(_('Format cannot be a blank string.')))) version = SanitizedUnicode() rights = fields.List(fields.Nested(RightsSchema)) description = SanitizedUnicode(validate=validate.Length(min=3)) additional_descriptions = fields.List(fields.Nested(DescriptionSchema)) locations = fields.List(fields.Nested(LocationSchema)) funding = fields.List(fields.Nested(FundingSchema)) references = fields.List(fields.Nested(ReferenceSchema))
class AccessSchema(Schema): """Access schema.""" metadata = fields.Bool(required=True) files = fields.Bool(required=True) owned_by = fields.List(fields.Integer, validate=validate.Length(min=1), required=True) access_right = SanitizedUnicode(required=True) embargo_date = ISODateString() access_condition = fields.Nested(AccessConditionSchema) @validates('embargo_date') def validate_embargo_date(self, value): """Validate that embargo date is in the future.""" if arrow.get(value).date() <= arrow.utcnow().date(): raise ValidationError(_('Embargo date must be in the future.'), field_names=['embargo_date']) @validates_schema def validate_access_right(self, data, **kwargs): """Validate that access right is one of the allowed ones.""" validate_entry('access_right', data)
class FileSchema(Schema): """Service schema for files.""" key = SanitizedUnicode(dump_only=True) created = TZDateTime(timezone=timezone.utc, format='iso', dump_only=True) updated = TZDateTime(timezone=timezone.utc, format='iso', dump_only=True) status = GenMethod('dump_status') metadata = Dict(dump_only=True) checksum = Str(dump_only=True, attribute='file.checksum') storage_class = Str(dump_only=True, attribute='file.storage_class') mimetype = Str(dump_only=True, attribute='file.mimetype') size = Number(attribute='file.size') version_id = UUID(attribute='file.version_id') file_id = UUID(attribute='file.file_id') bucket_id = UUID(attribute='file.bucket_id') links = Links() def dump_status(self, obj): """Dump file status.""" return 'completed' if obj.file else 'pending'
class DataCite43Schema(Schema): """DataCite 4.3 Marshmallow Schema.""" # PIDS-FIXME: What about versioning links and related ids types = fields.Method("get_type") titles = fields.Method("get_titles") creators = fields.List(fields.Nested(CreatorSchema43), attribute="metadata.creators") contributors = fields.List(fields.Nested(ContributorSchema43), attribute="metadata.contributors") publisher = fields.Str(attribute="metadata.publisher") publicationYear = fields.Method("get_publication_year") subjects = fields.List(fields.Nested(SubjectSchema43), attribute="metadata.subjects") dates = fields.Method("get_dates") language = fields.Method("get_language") identifiers = fields.Method("get_identifiers") relatedIdentifiers = fields.Method("get_related_identifiers") sizes = fields.List(SanitizedUnicode(), attribute="metadata.sizes") formats = fields.List(SanitizedUnicode(), attribute="metadata.formats") version = SanitizedUnicode(attribute="metadata.version") rightsList = fields.List(fields.Nested(RightSchema43), attribute="metadata.rights") descriptions = fields.Method("get_descriptions") geoLocations = fields.Method("get_locations") fundingReferences = fields.List(fields.Nested(FundingSchema43), attribute="metadata.funding") schemaVersion = fields.Constant("http://datacite.org/schema/kernel-4") def get_type(self, obj): """Get resource type.""" resource_type = obj["metadata"]["resource_type"] return { "resourceTypeGeneral": "FIXME", "resourceType": "FIXME", } def get_titles(self, obj): """Get titles list.""" metadata = obj["metadata"] titles = [{"title": metadata.get("title")}] additional_titles = metadata.get("additional_titles", []) for add_title in additional_titles: title = {"title": add_title.get("title")} type_ = add_title.get("type") if type_: title["titleType"] = type_.capitalize() lang = add_title.get("lang") if lang: title["lang"] = lang titles.append(title) return titles def get_publication_year(self, obj): """Get publication year from edtf date.""" # PIDS-FIXME: Make the EDTFDateString somehow access the year? return missing def get_dates(self, obj): """Get dates.""" dates = [{ "date": obj["metadata"]["publication_date"], "dateType": "Issued" }] for date in obj["metadata"].get("dates", []): to_append = { "date": date["date"], "dateType": date["type"].capitalize() } desc = date.get("description") if desc: to_append["dateInformation"] = desc dates.append(to_append) return dates or missing def get_language(self, obj): """Get language.""" metadata = obj["metadata"] languages = metadata.get("languages") if languages: # PIDS-FIXME: How to choose? the first? return languages[0]["id"] return missing def get_identifiers(self, obj): """Get identifiers list.""" serialized_identifiers = [] # Identifiers field metadata = obj["metadata"] identifiers = metadata.get("identifiers", []) for id_ in identifiers: serialized_identifiers.append({ "identifier": id_["identifier"], "identifierType": id_["scheme"] }) # PIDs field pids = obj["pids"] for scheme, id_ in pids.items(): serialized_identifiers.append({ "identifier": id_["identifier"], "identifierType": scheme.upper() }) return serialized_identifiers or missing def get_related_identifiers(self, obj): """Get related identifiers.""" # PIDS-FIXME: This might get much more complex depending on the id serialized_identifiers = [] metadata = obj["metadata"] identifiers = metadata.get("related_identifiers", []) for rel_id in identifiers: serialized_identifiers.append({ "relatedIdentifier": rel_id["identifier"], "relatedIdentifierType": rel_id["scheme"].upper(), "relationType": rel_id["relation_type"].capitalize(), "resourceTypeGeneral": rel_id["resource_type"]["type"], }) return serialized_identifiers or missing def get_descriptions(self, obj): """Get titles list.""" metadata = obj["metadata"] descriptions = [] description = metadata.get("description") if description: descriptions.append({ "description": description, "descriptionType": "Abstract" }) additional_descriptions = metadata.get("additional_descriptions", []) for add_desc in additional_descriptions: description = { "description": add_desc["description"], "descriptionType": add_desc["type"].capitalize(), } lang = add_desc.get("lang") if lang: description["lang"] = lang descriptions.append(description) return descriptions or missing def get_locations(self, obj): """Get locations.""" locations = [] for location in obj["metadata"].get("locations", []): place = location.get("place") serialized_location = {} if place: serialized_location["geoLocationPlace"] = place geometry = location.get("geometry") if geometry: geo_type = geometry["type"] # PIDS-FIXME: Scalable enough? # PIDS-FIXME: Implement Box and Polygon serialization if geo_type == "Point": serialized_location["geoLocationPoint"] = { "pointLatitude": geometry["coordinates"][0], "pointLongitude": geometry["coordinates"][1], } locations.append(serialized_location) return locations or missing
class AffiliationSchema(Schema): """Affiliation of a creator/contributor.""" name = SanitizedUnicode(required=True) identifiers = IdentifierSet(fields.Nested(IdentifierSchema), )
class LanguageSchema(Schema): """Language schema.""" id = SanitizedUnicode(required=True) title = fields.Raw(dump_only=True) description = fields.Raw(dump_only=True)
class ResourceTypeSchema(Schema): """Resource type schema.""" id = SanitizedUnicode(required=True) title = fields.Dict(dump_only=True)
class DataCite43Schema(Schema): """DataCite JSON 4.3 Marshmallow Schema.""" # PIDS-FIXME: What about versioning links and related ids types = fields.Method('get_type') titles = fields.Method('get_titles') creators = fields.List(fields.Nested(CreatorSchema43), attribute='metadata.creators') contributors = fields.List(fields.Nested(ContributorSchema43), attribute='metadata.contributors') publisher = fields.Str(attribute='metadata.publisher') publicationYear = fields.Method("get_publication_year") subjects = fields.Method("get_subjects") dates = fields.Method('get_dates') language = fields.Method('get_language') identifiers = fields.Method('get_identifiers') relatedIdentifiers = fields.Method('get_related_identifiers') sizes = fields.List(SanitizedUnicode(), attribute="metadata.sizes") formats = fields.List(SanitizedUnicode(), attribute="metadata.formats") version = SanitizedUnicode(attribute="metadata.version") rightsList = fields.Method('get_rights') descriptions = fields.Method('get_descriptions') geoLocations = fields.Method("get_locations") fundingReferences = fields.List(fields.Nested(FundingSchema43), attribute='metadata.funding') schemaVersion = fields.Constant("http://datacite.org/schema/kernel-4") def get_type(self, obj): """Get resource type.""" props = get_vocabulary_props( 'resourcetypes', ['props.datacite_general', 'props.datacite_type'], obj["metadata"]["resource_type"]["id"], ) return { 'resourceTypeGeneral': props.get("datacite_general", "Other"), 'resourceType': props.get("datacite_type", ""), } def _merge_main_and_additional(self, obj, field, default_type=None): """Return merged list of main + additional titles/descriptions.""" result = [] main_value = obj["metadata"].get(field) if main_value: item = {field: strip_html(main_value)} if default_type: item[f"{field}Type"] = default_type result.append(item) additional_values = obj["metadata"].get(f"additional_{field}s", []) for v in additional_values: item = {field: strip_html(v.get(field))} # Type type_id = v.get("type", {}).get("id") if type_id: props = get_vocabulary_props(f"{field}types", ["props.datacite"], type_id) if "datacite" in props: item[f"{field}Type"] = props["datacite"] # Language lang_id = v.get("lang", {}).get("id") if lang_id: item["lang"] = lang_id result.append(item) return result or missing def get_titles(self, obj): """Get titles list.""" return self._merge_main_and_additional(obj, "title") def get_descriptions(self, obj): """Get descriptions list.""" return self._merge_main_and_additional(obj, "description", default_type="Abstract") def get_publication_year(self, obj): """Get publication year from edtf date.""" try: publication_date = obj["metadata"]["publication_date"] parsed_date = parse_edtf(publication_date) return str(parsed_date.lower_strict().tm_year) except ParseException: # Should not fail since it was validated at service schema current_app.logger.error("Error parsing publication_date field for" f"record {obj['metadata']}") raise ValidationError(_("Invalid publication date value.")) def get_dates(self, obj): """Get dates.""" dates = [{ "date": obj["metadata"]["publication_date"], "dateType": "Issued" }] for date in obj["metadata"].get("dates", []): date_type_id = date.get("type", {}).get("id") props = get_vocabulary_props('datetypes', ["props.datacite"], date_type_id) to_append = { "date": date["date"], "dateType": props.get("datacite", "Other") } desc = date.get("description") if desc: to_append["dateInformation"] = desc dates.append(to_append) return dates or missing def get_language(self, obj): """Get language.""" languages = obj["metadata"].get("languages", []) if languages: # DataCite support only one language, so we take the first. return languages[0]["id"] return missing def get_identifiers(self, obj): """Get (main and alternate) identifiers list.""" serialized_identifiers = [] # pids go first so the DOI from the record is included pids = obj["pids"] for scheme, id_ in pids.items(): id_scheme = get_scheme_datacite( scheme, "RDM_RECORDS_IDENTIFIERS_SCHEMES", default=scheme, ) if id_scheme: serialized_identifiers.append({ "identifier": id_["identifier"], "identifierType": id_scheme, }) # Identifiers field identifiers = obj["metadata"].get("identifiers", []) for id_ in identifiers: scheme = id_["scheme"] id_scheme = get_scheme_datacite(scheme, "RDM_RECORDS_IDENTIFIERS_SCHEMES", default=scheme) if id_scheme: # DataCite only accepts a DOI identifier that is the official # registered DOI - ones in the alternate identifier field are # dropped if id_scheme != 'DOI': serialized_identifiers.append({ "identifier": id_["identifier"], "identifierType": id_scheme, }) return serialized_identifiers or missing def get_related_identifiers(self, obj): """Get related identifiers.""" serialized_identifiers = [] metadata = obj["metadata"] identifiers = metadata.get("related_identifiers", []) for rel_id in identifiers: relation_type_id = rel_id.get("relation_type", {}).get("id") props = get_vocabulary_props("relationtypes", ["props.datacite"], relation_type_id) scheme = rel_id["scheme"] id_scheme = get_scheme_datacite( scheme, "RDM_RECORDS_IDENTIFIERS_SCHEMES", default=scheme, ) if id_scheme: serialized_identifier = { "relatedIdentifier": rel_id["identifier"], "relationType": props.get("datacite", ""), "relatedIdentifierType": id_scheme, } resource_type_id = rel_id.get("resource_type", {}).get("id") if resource_type_id: props = get_vocabulary_props( "resourcetypes", # Cache is on both keys so query datacite_type as well # even though it's not accessed. ["props.datacite_general", "props.datacite_type"], resource_type_id) serialized_identifier["resourceTypeGeneral"] = props.get( "datacite_general", "Other") serialized_identifiers.append(serialized_identifier) return serialized_identifiers or missing def get_locations(self, obj): """Get locations.""" locations = [] loc_list = obj["metadata"].get("locations", {}).get("features", []) for location in loc_list: place = location.get("place") serialized_location = {} if place: serialized_location["geoLocationPlace"] = place geometry = location.get("geometry") if geometry: geo_type = geometry["type"] # PIDS-FIXME: Scalable enough? # PIDS-FIXME: Implement Box and Polygon serialization if geo_type == "Point": serialized_location["geoLocationPoint"] = { "pointLatitude": geometry["coordinates"][0], "pointLongitude": geometry["coordinates"][1], } locations.append(serialized_location) return locations or missing def get_subjects(self, obj): """Get datacite subjects.""" subjects = obj["metadata"].get("subjects", []) if not subjects: return missing serialized_subjects = [] ids = [] for subject in subjects: sub_text = subject.get("subject") if sub_text: serialized_subjects.append({"subject": sub_text}) else: ids.append(subject.get("id")) if ids: subjects_service = (current_service_registry.get("subjects")) subjects = subjects_service.read_many(system_identity, ids) validator = validate.URL() for subject in subjects: serialized_subj = { "subject": subject.get("subject"), "subjectScheme": subject.get("scheme"), } id_ = subject.get("id") try: validator(id_) serialized_subj["valueURI"] = id_ except ValidationError: pass serialized_subjects.append(serialized_subj) return serialized_subjects if serialized_subjects else missing def get_rights(self, obj): """Get datacite rigths.""" rights = obj["metadata"].get("rights", []) if not rights: return missing serialized_rights = [] ids = [] for right in rights: _id = right.get("id") if _id: ids.append(_id) else: serialized_right = { "rights": right.get("title").get(current_default_locale()), } link = right.get("link") if link: serialized_right["rightsUri"] = link serialized_rights.append(serialized_right) if ids: rights = vocabulary_service.read_many(system_identity, "licenses", ids) for right in rights: serialized_right = { "rights": right.get("title").get(current_default_locale()), "rightsIdentifierScheme": right.get("props").get("scheme"), "rightsIdentifier": right.get("id"), } link = right.get("props").get("url") if link: serialized_right["rightsUri"] = link serialized_rights.append(serialized_right) return serialized_rights if serialized_rights else missing
) return "", 204 @request_search_args @request_view_args @response_handler(many=True) def search(self): """List secret links for a record.""" items = self.service.read_secret_links( id_=resource_requestctx.view_args["pid_value"], identity=g.identity, ) return items.to_dict(), 200 request_pid_args = request_parser({"client": SanitizedUnicode()}, location='args') class RDMManagedPIDProviderResource(RecordResource): """PID provider resource.""" def create_url_rules(self): """Create the URL rules for the pid provider resource.""" def p(route): """Prefix a route with the URL prefix.""" return f"{self.config.url_prefix}{route}" routes = self.config.routes return [ route("GET", p(routes["item"]), self.create), route("DELETE", p(routes["item"]), self.delete),
class IdentifierSchema(IS): """Identifier schema with optional status field.""" status = SanitizedUnicode()
def test_extensions(app, minimal_record): """Test metadata extensions schema.""" # Setup metadata extensions RDM_RECORDS_METADATA_NAMESPACES = { 'dwc': { '@context': 'https://example.com/dwc/terms' }, 'nubiomed': { '@context': 'https://example.com/nubiomed/terms' } } RDM_RECORDS_METADATA_EXTENSIONS = { 'dwc:family': { 'elasticsearch': 'keyword', 'marshmallow': SanitizedUnicode(required=True) }, 'dwc:behavior': { 'elasticsearch': 'text', 'marshmallow': SanitizedUnicode() }, 'nubiomed:number_in_sequence': { 'elasticsearch': 'long', 'marshmallow': Integer() }, 'nubiomed:scientific_sequence': { 'elasticsearch': 'long', 'marshmallow': List(Integer()) }, 'nubiomed:original_presentation_date': { 'elasticsearch': 'date', 'marshmallow': ISODateString() }, 'nubiomed:right_or_wrong': { 'elasticsearch': 'boolean', 'marshmallow': Bool() } } orig_metadata_extensions = ( app.extensions['invenio-rdm-records'].metadata_extensions ) app.extensions['invenio-rdm-records'].metadata_extensions = ( MetadataExtensions( RDM_RECORDS_METADATA_NAMESPACES, RDM_RECORDS_METADATA_EXTENSIONS ) ) # Minimal if not absent valid_minimal = { 'dwc:family': 'Felidae' } minimal_record['extensions'] = valid_minimal data = MetadataSchema().load(minimal_record) assert valid_minimal == data.get('extensions') # Full valid_full = { 'dwc:family': 'Felidae', 'dwc:behavior': 'Plays with yarn, sleeps in cardboard box.', 'nubiomed:number_in_sequence': 3, 'nubiomed:scientific_sequence': [1, 1, 2, 3, 5, 8], 'nubiomed:original_presentation_date': '2019-02-14', 'nubiomed:right_or_wrong': True, } minimal_record['extensions'] = valid_full data = MetadataSchema().load(minimal_record) assert valid_full == data.get('extensions') # Invalid invalid_number_in_sequence = { 'dwc:family': 'Felidae', 'nubiomed:scientific_sequence': [1, 'l', 2, 3, 5, 8], } minimal_record['extensions'] = invalid_number_in_sequence with pytest.raises(ValidationError): data = MetadataSchema().load(minimal_record) app.extensions['invenio-rdm-records'].metadata_extensions = ( orig_metadata_extensions )
class PropsSchema(Schema): """Schema for the URL schema.""" url = SanitizedUnicode(validate=_valid_url(_('Not a valid URL.'))) scheme = SanitizedUnicode()