class DocumentSchemaV1(RecordMetadataSchemaJSONV1): """Document schema.""" class Meta: """Meta attributes for the schema.""" unknown = EXCLUDE abstract = MultilingualStringV2() alternative_abstracts = MultilingualStringV2() alternative_identifiers = fields.List(fields.Nested(IdentifierSchema)) alternative_titles = MultilingualStringV2() authors = fields.List(fields.Nested(AuthorSchema), required=True) conference_info = fields.Nested(ConferenceInfoSchema) cover_metadata = fields.Dict() curated = fields.Bool() document_type = fields.Str() edition = fields.Str() extensions = fields.Method('dump_extensions', 'load_extensions') identifiers = fields.List(fields.Nested(IdentifierSchema)) imprint = fields.Nested(ImprintSchema) internal_notes = fields.List(fields.Nested(InternalNoteSchema)) keywords = fields.List(fields.Nested(KeywordSchema)) languages = fields.List(fields.Str()) licenses = fields.List(fields.Nested(LicenseSchema)) note = MultilingualStringV2() number_of_pages = fields.Str() other_authors = fields.Bool() publication_info = fields.List(fields.Nested(PublicationInfoSchema)) publication_year = fields.Str(required=True) restricted = fields.Bool(missing=False) source = fields.Str() subjects = fields.List(fields.Nested(SubjectSchema)) table_of_content = fields.List(fields.Str()) tags = fields.List(fields.Str()) title = MultilingualStringV2(required=True) urls = fields.List(fields.Nested(UrlSchema)) def dump_extensions(self, obj): """Dumps the extensions value. :params obj: content of the object's 'extensions' field """ ExtensionSchema = current_app.extensions["invenio-app-ils"] \ .document_metadata_extensions \ .to_schema() return ExtensionSchema().dump(obj) def load_extensions(self, value): """Loads the 'extensions' field. :params value: content of the input's 'extensions' field """ ExtensionSchema = current_app.extensions["invenio-app-ils"] \ .document_metadata_extensions \ .to_schema() return ExtensionSchema().load(value)
class DCObjectSchemaV2Mixin(Schema): title = MultilingualStringV2(required=True) alternative = MultilingualStringV2(required=False) abstract = MultilingualStringV2(required=False) creator = SanitizedUnicode(required=True) contributor = SanitizedUnicode(required=False) dateSubmitted = DateString(required=False) available = DateString(required=False) created = DateString(required=True) modified = DateString(required=True) description = MultilingualStringV2(required=False) identifier = SanitizedUnicode(required=True)
class UrlSchema(Schema): """URL schema.""" class Meta: """Meta attributes for the schema.""" unknown = EXCLUDE description = MultilingualStringV2() value = fields.URL(required=True)
class OpenDefinitionLicenseSchema(Schema): """Open definition license.""" class Meta: """Meta attributes for the schema.""" unknown = EXCLUDE id = fields.Str(required=True) maintainer = fields.Str() status = fields.Str() title = MultilingualStringV2() url = fields.Str()
class PublicationInfoSchema(Schema): """Publication info schema.""" class Meta: """Meta attributes for the schema.""" unknown = EXCLUDE artid = fields.Str() journal_issue = fields.Str() journal_title = MultilingualStringV2() journal_volume = fields.Str() note = fields.Str() pages = fields.Str() year = fields.Int()
class ConferenceInfoSchema(Schema): """Conference info schema.""" class Meta: """Meta attributes for the schema.""" unknown = EXCLUDE acronym = fields.Str() country = fields.Str() dates = fields.Str() identifiers = fields.List(fields.Nested(IdentifierSchema)) place = fields.Str(required=True) series = fields.Str() title = MultilingualStringV2(required=True) year = fields.Int()
class PSHMixin: altLabel = MultilingualStringV2()
class TitledMixin: title = MultilingualStringV2()
class DataSetMetadataSchemaV2(InvenioRecordMetadataFilesMixin, InvenioRecordMetadataSchemaV1Mixin, StrictKeysMixin): """DataSet metaddata schema.""" resource_type = ResourceType(required=True) creators = fields.List(fields.Nested(CreatorSchema), required=True, validate=validate.Length( min=1, error=_("Missing data for required field."))) creator = SanitizedUnicode() title = MultilingualStringV2(required=True) additional_titles = List(MultilingualStringV2()) publisher = SanitizedUnicode() publication_date = EDTFDateString(required=True) subjects = List(fields.Nested(SubjectSchema)) contributors = List(fields.Nested(ContributorSchema)) dates = List(fields.Nested(DateSchema)) languages = TaxonomyField(mixins=[TitledMixin], many=True) # alternate identifiers identifiers = IdentifierSet( fields.Nested( partial(IdentifierSchema, allowed_schemes=RDM_RECORDS_IDENTIFIERS_SCHEMES))) related_identifiers = List(fields.Nested(RelatedIdentifierSchema)) version = SanitizedUnicode() rights = TaxonomyField(mixins=[TitledMixin, RightsMixin], many=True) abstract = MultilingualStringV2( required=True) # WARNING: May contain user-input HTML additional_descriptions = fields.List(MultilingualStringV2()) references = fields.List(fields.Nested(ReferenceSchema)) pids = fields.Dict(keys=fields.String(), values=fields.Nested(PIDSchema)) access = NestedAttribute(AccessSchema) keywords = List(SanitizedUnicode()) @pre_load def sanitize_html_fields(self, data, **kwargs): """Sanitize fields that may contain user-input HTML strings.""" if 'abstract' in data: for lang, val in data.get('abstract').items(): raw = data['abstract'][lang] data['abstract'][lang] = SanitizedHTML()._deserialize( raw, 'abstract', data) return data @pre_load def set_created(self, data, **kwargs): """Set created timestamp if not already set.""" dates = data.get('dates') or [] created = None for dat in dates: if dat.get('type', '') == 'created': created = dat.get('date') if not created: dates.append({ 'date': datetime.today().strftime('%Y-%m-%d'), 'type': 'created' }) data['dates'] = dates return data @pre_load def set_creator(self, data, **kwargs): """Set creator to record metadata if not known.""" if not data.get('creator'): if current_user and current_user.is_authenticated: data['creator'] = current_user.email else: data['creator'] = 'anonymous' return data @validates('pids') def validate_pids(self, value): """Validate the keys of the pids are supported providers.""" for scheme, pid_attrs in value.items(): # The required flag applies to the identifier value # It won't fail for empty allowing the components to reserve one id_schema = IdentifierSchema( identifier_required=True, allowed_schemes=RDM_RECORDS_IDENTIFIERS_SCHEMES) id_schema.load({ "scheme": scheme, "identifier": pid_attrs.get("identifier") })
class MD(marshmallow.Schema): title = MultilingualStringV2()
class TitledMixin: """Mixin that adds a multilingual title field to Schema.""" title = MultilingualStringV2()
class CommonMetadataSchemaV2(InvenioRecordMetadataSchemaV1Mixin, FSMRecordSchemaMixin, OARepoCommunitiesMixin, StrictKeysMixin): """Schema for the record metadata.""" abstract = MultilingualStringV2() accessibility = MultilingualStringV2() accessRights = TaxonomyField(mixins=[TitledMixin, AccessRightsMixin], required=True) creator = List(Nested(PersonSchema), required=True) contributor = List(Nested(ContributorSchema)) dateIssued = NRDate(required=True) dateModified = NRDate() resourceType = TaxonomyField(mixins=[TitledMixin], required=True) extent = List(SanitizedUnicode()) # TODO: pokud nemáme extent, spočítat z PDF - asi nepůjde externalLocation = Url() control_number = SanitizedUnicode(required=True) recordIdentifiers = Nested(RecordIdentifier) workIdentifiers = Nested(WorkIdentifersSchema) isGL = Boolean() language = TaxonomyField(mixins=[TitledMixin], required=True) note = List(SanitizedUnicode()) fundingReference = List(Nested(FundingReferenceSchema)) provider = TaxonomyField(mixins=[TitledMixin, InstitutionsMixin], required=True) entities = TaxonomyField(mixins=[TitledMixin, InstitutionsMixin], many=True) publicationPlace = Nested(PublicationPlaceSchema) publisher = List(SanitizedUnicode()) relatedItem = List(Nested(RelatedItemSchema)) rights = TaxonomyField(mixins=[TitledMixin, RightsMixin], many=True) series = List(Nested(SeriesSchema)) subject = TaxonomyField(mixins=[TitledMixin, SubjectMixin, PSHMixin, CZMeshMixin, MedvikMixin], many=True) keywords = List(MultilingualStringV2()) title = List(MultilingualStringV2(required=True), required=True, validate=Length(min=1)) titleAlternate = List(MultilingualStringV2()) rulesExceptions = List(Nested(RulesExceptionsSchema)) @pre_load def check_keyword(self, data, **kwargs): keywords = data.get("keywords", []) if isinstance(keywords, dict): if "error" in keywords: raise ValidationError(keywords["error"]) return data @post_load def check_language(self, data, **kwargs): language = data.get("language") if not language: raise ValidationError("Language is required field", field_name="language") return data @post_load def validate_keywords_subjects(self, data, **kwargs): subject = [x for x in data.get("subject", []) if not x["is_ancestor"]] keywords = data.get("keywords", []) if len(keywords) + len(subject) < 3: raise ValidationError("At least three subjects or keyword are required", field_name="keywords") return data @post_load def copy_to_entities(self, data, **kwargs): entities = data.get("entities") if not entities: data["entities"] = data["provider"] return data @post_load def rules_exceptions(self, data, **kwargs): if "rulesExceptions" in data: raise ValidationError(f"Some rules raises exception: {data['rulesExceptions']}") return data