def test_languages_indexing(running_app, minimal_record): """Test languages relationship.""" minimal_record["metadata"]["languages"] = [{"id": "eng"}] draft = RDMDraft.create(minimal_record).commit() # Dump draft - dumps will dereference relations which inturn updates the # internal record dict so dump and record should be identical. dump = draft.dumps() assert dump["metadata"]["languages"] == [{ "id": "eng", "title": { "en": "English", "da": "Engelsk" }, "@v": f"{running_app.languages_v._record.id}::1" }] # Load draft again - should produce an identical record. loaded_draft = RDMDraft.loads(dump) assert dict(draft) == dict(loaded_draft) # Calling commit() will clear the dereferenced relation. loaded_draft.commit() assert loaded_draft["metadata"]["languages"] == [{"id": "eng"}]
def test_contributor_affiliations_indexing(running_app, minimal_record_with_contributor): minimal_record = minimal_record_with_contributor draft = RDMDraft.create(minimal_record).commit() # Dump draft - dumps will dereference relations which inturn updates the # internal record dict so dump and record should be identical. dump = draft.dumps() assert dump["metadata"]["contributors"][0]["affiliations"] == [{ "id": "cern", "name": "CERN", "@v": f"{running_app.affiliations_v._record.id}::1" }] # Load draft again - should produce an identical record. loaded_draft = RDMDraft.loads(dump) assert dict(draft) == dict(loaded_draft) # Calling commit() will clear the dereferenced relation. loaded_draft.commit() loaded_aff = loaded_draft["metadata"]["contributors"][0]["affiliations"] assert loaded_aff == [{"id": "cern"}]
def test_subjects_validation(running_app, minimal_record): """Tests data content validation.""" # Valid id minimal_record["metadata"]["subjects"] = [{"id": "A-D000007"}] draft = RDMDraft.create(minimal_record) draft.commit() assert draft["metadata"]["subjects"] == [{"id": "A-D000007"}] # Invalid id minimal_record["metadata"]["subjects"] = [{"id": "invalid"}] pytest.raises(InvalidRelationValue, RDMDraft.create(minimal_record).commit)
def test_resource_type_validation(running_app, minimal_record): """Tests data content validation.""" # Valid id minimal_record["metadata"]["resource_type"] = {"id": "image-photo"} draft = RDMDraft.create(minimal_record) draft.commit() db.session.commit() assert draft["metadata"]["resource_type"] == {"id": "image-photo"} # Invalid id minimal_record["metadata"]["resource_type"] = {"id": "invalid"} pytest.raises(InvalidRelationValue, RDMDraft.create(minimal_record).commit)
def test_languages_invalid(running_app, minimal_record, lang): """Should fail on invalid id's and invalid structure.""" # The id "invalid" does not exists. minimal_record["metadata"]["languages"] = [{"id": "invalid"}] pytest.raises(InvalidRelationValue, RDMDraft.create(minimal_record).commit) # Not a list of objects minimal_record["metadata"]["languages"] = {"id": "eng"} pytest.raises(ValidationError, RDMDraft.create, minimal_record) # no additional keys are allowed minimal_record["metadata"]["languages"] = [{"test": "eng"}] pytest.raises(ValidationError, RDMDraft.create, minimal_record) # non-string types are not allowed as id values minimal_record["metadata"]["languages"] = [{"id": 1}] pytest.raises(ValidationError, RDMDraft.create, minimal_record) # Extra keys are not allowed minimal_record["metadata"]["languages"] = [{"id": "eng", "title": "rm"}] pytest.raises(ValidationError, RDMDraft.create, minimal_record) # No duplicates minimal_record["metadata"]["languages"] = [{"id": "eng"}, {"id": "eng"}] pytest.raises(ValidationError, RDMDraft.create, minimal_record)
def test_idempotence_dumps_loads(running_app, minimal_record): """Idempotence of dumps and loads.""" # This simple test asserts a key property of the dumps and loads methods. # A record that's dumped, must when loaded produce exactly the same dict # representation of a record. This key property ensures that it doesn't # matter if a record is loaded from primary storage (database) or secondary # storages (index, files, ...). A record when loaded behaves like a normal # record. # If this tests fails likely either a system fields pre/post_dump/load # method is having an issue, or it might be an Elasticsearch dumper. # DO NOT CHANGE TEST UNLESS YOU ABSOLUTELY KNOW WHAT YOU'RE DOING draft = RDMDraft.create(minimal_record) loaded_draft = RDMDraft.loads(draft.dumps()) assert dict(draft) == dict(loaded_draft)
def test_creator_affiliations_invalid(running_app, minimal_record): """Should fail on invalid id's and invalid structure.""" # The id "invalid" does not exists. minimal_record["metadata"]["creators"][0]["affiliations"] = ([{ "id": "invalid" }]) pytest.raises(InvalidRelationValue, RDMDraft.create(minimal_record).commit) # Not a list of objects minimal_record["metadata"]["creators"][0]["affiliations"] = {"id": "cern"} pytest.raises(ValidationError, RDMDraft.create, minimal_record) # no additional keys are allowed minimal_record["metadata"]["creators"][0]["affiliations"] = ([{ "test": "cern" }]) pytest.raises(ValidationError, RDMDraft.create, minimal_record) # non-string types are not allowed as id values minimal_record["metadata"]["creators"][0]["affiliations"] = [{"id": 1}] pytest.raises(ValidationError, RDMDraft.create, minimal_record) # No duplicates minimal_record["metadata"]["creators"][0]["affiliations"] = ([{ "id": "cern" }, { "id": "cern" }]) pytest.raises(ValidationError, RDMDraft.create, minimal_record)
def test_languages_validation(running_app, minimal_record, lang): """Test languages relationship.""" minimal_record["metadata"]["languages"] = [{"id": "eng"}] draft = RDMDraft.create(minimal_record) draft.commit() db.session.commit() assert minimal_record["metadata"]["languages"] == [{"id": "eng"}] lang = list(draft.relations.languages())[0] assert isinstance(lang, Vocabulary)
def test_resource_types_indexing(running_app, minimal_record): """Test dereferencing characteristics/features really.""" minimal_record["metadata"]["resource_type"] = {"id": "image-photo"} draft = RDMDraft.create(minimal_record).commit() # TODO/WARNING: draft.dumps() modifies draft dump = draft.dumps() assert dump["metadata"]["resource_type"] == { "id": "image-photo", "title": { "en": "Photo" }, "@v": f"{running_app.resource_type_item._record.id}::1" } # Load draft again - should produce an identical record. loaded_draft = RDMDraft.loads(dump) assert dict(draft) == dict(loaded_draft) # Calling commit() will clear the dereferenced relation. loaded_draft.commit() assert loaded_draft["metadata"]["resource_type"] == {"id": "image-photo"}
def test_subjects_indexing(running_app, minimal_record): """Test dereferencing characteristics/features really.""" minimal_record["metadata"]["subjects"] = [{"id": "A-D000007"}] draft = RDMDraft.create(minimal_record).commit() # Dumping should return dereferenced representation dump = draft.dumps() assert dump["metadata"]["subjects"] == [{ "id": "A-D000007", "title": { "en": "Abdominal Injuries" }, "@v": f"{running_app.subject_v._record.id}::1" }] # NOTE/WARNING: draft.dumps() modifies the draft too assert draft["metadata"]["subjects"] == [{ "id": "A-D000007", "title": { "en": "Abdominal Injuries" }, "@v": f"{running_app.subject_v._record.id}::1" }] # Loading draft again should produce an identical record. loaded_draft = RDMDraft.loads(dump) assert dict(draft) == dict(loaded_draft) # Calling commit() should clear the dereferenced relation. draft.commit() assert draft["metadata"]["subjects"] == [{"id": "A-D000007"}] # subjects should be reachable through relations subject = next(draft.relations.subjects()) assert "A-D000007" == subject["id"]
def test_creator_affiliations_validation(running_app, minimal_record_with_creator): minimal_record = minimal_record_with_creator draft = RDMDraft.create(minimal_record) draft.commit() db.session.commit() # test it did not change creators = minimal_record_with_creator["metadata"]["creators"] affiliations = creators[0]["affiliations"] assert list(affiliations) == [{"id": "cern"}] # test it was saved properly aff = list(list(draft.relations.creator_affiliations())[0])[0] # since it is loaded it will contain more fields assert aff["id"] == "cern"
def test_community_integration(db, c, running_app, minimal_record): """Basic smoke test for communities integration.""" draft = RDMDraft.create(minimal_record) draft.commit() db.session.commit() record = RDMRecord.publish(draft) record.commit() db.session.commit() record.parent.communities.add(c, default=True) record.parent.commit() record.commit() assert record.dumps()['parent']['communities'] == { 'default': str(c.id), 'ids': [str(c.id)], } db.session.commit()
def test_metadata_component(minimal_record, parent, identity_simple, location): """Test the metadata component.""" record = RDMRecord.create(minimal_record, parent=parent) draft = RDMDraft.new_version(record) assert 'publication_date' in record.metadata assert 'title' in record.metadata component = MetadataComponent(RDMRecordService()) component.new_version(identity_simple, draft=draft, record=record) # Make sure publication_date was NOT copied, but that title WAS copied assert 'publication_date' not in draft.metadata assert 'title' in draft.metadata # make sure the reference management is correct assert 'publication_date' in record.metadata
def check_subjects(): """Checks the migration readiness of subjects.""" def _should_be_vocabulary(record): """Checks the schema of the subject.""" subjects = record.get("metadata").get("subjects", []) vocab_subjects = {} for subject in subjects: id_ = subject.get("identifier") if id_: vocab_subjects[id_] = { "id": id_, "scheme": subject["scheme"], "subject": subject["subject"] } return vocab_subjects print("Checking for subject migration readiness...") subjects_to_dump = {} for draft_metadata in RDMDraft.model_cls.query.all(): # Skipping deleted drafts because can't be committed if draft_metadata.is_deleted: continue draft = RDMDraft(draft_metadata.data, model=draft_metadata) subjects_to_dump = _should_be_vocabulary(draft) for record_metadata in RDMRecord.model_cls.query.all(): record = RDMRecord(record_metadata.data, model=record_metadata) # publish record subjects take presedence if id is repeated # | operator is only available from py 3.9 on subjects_to_dump = { **subjects_to_dump, **_should_be_vocabulary(record) } total = len(subjects_to_dump) if subjects_to_dump: print(f"Your instance has {total} subjects that " + "should be custom vocabularies.") with open('custom_subjects.yaml', 'w') as f: yaml.dump(list(subjects_to_dump.values()), f) else: print(f"All your instance's subjects are valid.")
def test_creator_affiliations_with_name_validation( running_app, minimal_record_with_creator): minimal_record = minimal_record_with_creator minimal_record["metadata"]["creators"][0]["affiliations"].append( {"name": "free-text"}) draft = RDMDraft.create(minimal_record) draft.commit() db.session.commit() # test it did not change creators = minimal_record_with_creator["metadata"]["creators"] affiliations = creators[0]["affiliations"] assert list(affiliations) == [{"id": "cern"}, {"name": "free-text"}] # Length should be only 1, since free-text should not be saved aff_list = list(list(draft.relations.creator_affiliations())[0]) assert len(aff_list) == 1 aff = aff_list[0] # since it is loaded it will contain more fields assert aff["id"] == "cern"
def execute_upgrade(): """Execute the upgrade from InvenioRDM 4.0 to 5.0. Please read the disclaimer on this module before thinking about executing this function! """ def update_roles(creatibutors): """Update roles.""" for creatibutor in creatibutors: role = creatibutor.get("role") if role: creatibutor["role"] = {"id": role} return creatibutors def update_creators_roles(record): """Update creator roles.""" creators = record.get("metadata").get("creators", []) if creators: record["metadata"]["creators"] = update_roles(creators) def update_contributors_roles(record): """Update contributors roles.""" contributors = record.get("metadata").get("contributors", []) if contributors: record["metadata"]["contributors"] = update_roles(contributors) def update_additional_titles(record): """Update additional titles type.""" add_titles = record.get("metadata").get("additional_titles", []) for add_title in add_titles: type_ = add_title.get("type") # any other type either stays with the previous value or is not # supported and should fail if type_ == "alternativetitle": add_title["type"] = {"id": "alternative-title"} elif type_ == "translatedtitle": add_title["type"] = {"id": "translated-title"} else: add_title["type"] = {"id": type_} lang = add_title.get("lang") if lang: add_title["lang"] = {"id": lang} def update_additional_descriptions(record): """Update additional descriptions type.""" metadata = record.get("metadata") add_descriptions = metadata.get("additional_descriptions", []) for add_desc in add_descriptions: type_ = add_desc.get("type") # any other type either stays with the previous value or is not # supported and should fail if type_ == "seriesinformation": add_desc["type"] = {"id": "series-information"} elif type_ == "tableofcontents": add_desc["type"] = {"id": "table-of-contentse"} elif type_ == "technicalinfo": add_desc["type"] = {"id": "technical-info"} else: add_desc["type"] = {"id": type_} lang = add_desc.get("lang") if lang: add_desc["lang"] = {"id": lang} def update_list_field_vocabulary(record, parent, field): """Update related identifiers relation type.""" obj_list = record.get("metadata").get(parent, []) for obj in obj_list: obj[field] = {"id": obj[field]} def update_subjects(record): """Updates subjects and keywords.""" subjects = record.get("metadata").get("subjects", []) vocab_subjects = [] vocab_subjects_ids = [] for subject in subjects: id_ = subject.get("identifier") if id_: if id_ not in vocab_subjects_ids: vocab_subjects.append({"id": id_}) vocab_subjects_ids.append(id_) else: vocab_subjects.append({"subject": subject["subject"]}) if vocab_subjects: record["metadata"]["subjects"] = vocab_subjects def update_affiliations(creatibutors): """Updates subjects and keywords.""" for idx, creatibutor in enumerate(creatibutors): affiliations = creatibutor.get("affiliations", []) vocab_affs = [] for aff in affiliations: ids = aff.get("identifiers", []) ror = None for id_ in ids: if id_.get("scheme") == "ror": ror = id_["identifier"] break if ror: vocab_affs.append({"id": ror, "name": aff["name"]}) else: vocab_affs.append({"name": aff["name"]}) if vocab_affs: creatibutors[idx]["affiliations"] = vocab_affs return creatibutors def update_creators_affiliations(record): """Update creator roles.""" creators = record.get("metadata").get("creators", []) if creators: record["metadata"]["creators"] = update_affiliations(creators) def update_contributors_affiliations(record): """Update contributors roles.""" contributors = record.get("metadata").get("contributors", []) if contributors: record["metadata"]["contributors"] = \ update_affiliations(contributors) def update_rights(record): """Update record rights.""" rights = record.get("metadata").get("rights", []) for right in rights: locale = current_app.config.get('BABEL_DEFAULT_LOCALE', 'en') right["title"] = {locale: right["title"]} right["description"] = {locale: right["description"]} def migrate_record(record): """Migrates a record/draft to the new schema's values.""" # Force the new jsonschema record["$schema"] = "local://records/record-v4.0.0.json" update_creators_roles(record) update_contributors_roles(record) update_additional_titles(record) update_additional_descriptions(record) update_list_field_vocabulary(record, "related_identifiers", "relation_type") update_list_field_vocabulary(record, "dates", "type") update_subjects(record) update_creators_affiliations(record) update_contributors_affiliations(record) update_rights(record) return record print("Migrating records...") for record_metadata in RDMRecord.model_cls.query.all(): record = RDMRecord(record_metadata.data, model=record_metadata) record = migrate_record(record) record.commit() print("Migrating drafts...") for draft_metadata in RDMDraft.model_cls.query.all(): # Skipping deleted drafts because can't be committed if draft_metadata.is_deleted: continue draft = RDMDraft(draft_metadata.data, model=draft_metadata) migrate_record(draft) draft.commit() db.session.commit() print("Records and drafts migrated.")
def execute_upgrade(): """Execute the upgrade from InvenioRDM 3.0 to 4.0. Please read the disclaimer on this module before thinking about executing this function! """ def remove_duplicate_languages(record): """Remove duplicate languages.""" if "languages" in record["metadata"]: serialized_languages = map( tuple, map(sorted, map(dict.items, record["metadata"]["languages"])), ) unique_languages = set(serialized_languages) languages_list = list(map(dict, unique_languages)) record["metadata"]["languages"] = languages_list def update_vocabularies(record): """Updates languages and resource_type to become vocabularies.""" def get_res_type_vocabulary(data): """Returns the id value of the resource type vocabulary.""" if "subtype" in data["resource_type"]: return data["resource_type"]["subtype"] elif "type" in data["resource_type"]: return data["resource_type"]["type"] def get_language_vocabulary(data): """Returns the language as vocabulary.""" return dict(id=data) def migrate_language(field): """Migrates language field.""" for idx, val in enumerate(record["metadata"].get(field, [])): if "lang" in val: language_vocab = get_language_vocabulary(val["lang"]) record["metadata"][field][idx]["lang"] = language_vocab # Migrate resource_type if "resource_type" in record["metadata"]: res_type_vocab = get_res_type_vocabulary(record["metadata"]) record["metadata"]["resource_type"] = dict(id=res_type_vocab) # Migrate resource_type of related_identifiers for idx, val in enumerate(record["metadata"].get( "related_identifiers", [])): if "resource_type" in val: res_type_vocab = get_res_type_vocabulary(val) record["metadata"]["related_identifiers"][idx][ "resource_type"] = dict(id=res_type_vocab) # Migrate languages from additional_descriptions migrate_language("additional_descriptions") # Migrate languages from additional_titles migrate_language("additional_titles") record.commit() for record_metadata in RDMRecord.model_cls.query.all(): record = RDMRecord(record_metadata.data, model=record_metadata) remove_duplicate_languages(record) # Updating to new $schema when eventually saved record.pop("$schema", None) # Adding empty pids if record.pids is None: record.pids = {} record.commit() for draft_metadata in RDMDraft.model_cls.query.all(): # Skipping deleted drafts because can't be committed if draft_metadata.is_deleted: continue draft = RDMDraft(draft_metadata.data, model=draft_metadata) remove_duplicate_languages(draft) # Updating to new $schema when eventually saved draft.pop("$schema", None) # Adding empty pids if draft.pids is None: draft.pids = {} draft.commit() db.session.commit() # Need to loop again to update the resource type once the scheme is updated for record_metadata in RDMRecord.model_cls.query.all(): record = RDMRecord(record_metadata.data, model=record_metadata) update_vocabularies(record) for draft_metadata in RDMDraft.model_cls.query.all(): # Skipping deleted drafts because can't be committed if draft_metadata.is_deleted: continue draft = RDMDraft(draft_metadata.data, model=draft_metadata) update_vocabularies(draft) for parent_metadata in RDMParent.model_cls.query.all(): parent = RDMParent(parent_metadata.data, model=parent_metadata) # Updating to new $schema when eventually saved parent.pop("$schema", None) parent.commit() db.session.commit()
def check_affiliations(): """Checks the migration readiness of creatibutor affiliations.""" def _should_be_vocabulary(creatibutors, total, needs_ror): """Checks the schema of the affiliation. If an affiliation has identifiers but no ROR, it is not valid. """ vocab_affs = [] for creatibutor in creatibutors: affiliations = creatibutor.get("affiliations", []) for aff in affiliations: ids = aff.get("identifiers", []) ror = None for id_ in ids: if id_.get("scheme") == "ror": needs_ror = needs_ror or True ror = id_["identifier"] break if not ror and ids: total += 1 vocab_affs.append(aff["name"]) return vocab_affs, total, needs_ror print("Checking for affiliations migration readiness...") invalid_affiliations_rec, total, needs_ror = {}, 0, False for record_metadata in RDMRecord.model_cls.query.all(): record = RDMRecord(record_metadata.data, model=record_metadata) # publish record subjects take presedence if id is repeated # | operator is only available from py 3.9 on inv_affs_creators, total, needs_ror = _should_be_vocabulary( record.get("metadata").get("creators", []), total, needs_ror) inv_affs_contributors, total, needs_ror = _should_be_vocabulary( record.get("metadata").get("contributors", []), total, needs_ror) if inv_affs_creators or inv_affs_contributors: invalid_affiliations_rec[record["id"]] = { "creators": inv_affs_creators, "contributors": inv_affs_contributors } invalid_affiliations_draft = {} for draft_metadata in RDMDraft.model_cls.query.all(): # Skipping deleted drafts because can't be committed if draft_metadata.is_deleted: continue draft = RDMDraft(draft_metadata.data, model=draft_metadata) inv_affs_creators, total, needs_ror = _should_be_vocabulary( draft.get("metadata").get("creators", []), total, needs_ror) inv_affs_contributors, total, needs_ror = _should_be_vocabulary( draft.get("metadata").get("contributors", []), total, needs_ror) if inv_affs_creators or inv_affs_contributors: invalid_affiliations_draft[draft["id"]] = { "type": "draft", "creators": inv_affs_creators, "contributors": inv_affs_contributors } invalid_affiliations = {} if invalid_affiliations_rec: invalid_affiliations["record"] = invalid_affiliations_rec if invalid_affiliations_draft: invalid_affiliations["draft"] = invalid_affiliations_draft if invalid_affiliations: print( f"Your instance has {total} affiliations that need to be " + "fixed. Check the invalid_affiliation.yaml file for more details.") with open('invalid_affiliations.yaml', 'w') as f: yaml.dump(list(invalid_affiliations), f) else: print(f"All your instance's affiliations are valid.") if needs_ror: print( "You have affiliations with ROR identifiers, you need to " + "add its vocabulary. Instructions to do so are available in " "https://inveniordm.docs.cern.ch/customize/vocabularies/affiliations/" # noqa )
def execute_upgrade(): """Execute the upgrade from InvenioRDM 2.0 to 3.0. Please read the disclaimer on this module before thinking about executing this function! """ for record_metadata in RDMRecord.model_cls.query.all(): record = RDMRecord(record_metadata.data, model=record_metadata) # Updating to new $schema when eventually saved record.pop("$schema", None) # Adding empty pids if record.pids is None: record.pids = {} record.commit() for draft_metadata in RDMDraft.model_cls.query.all(): # Skipping deleted drafts because can't be committed if draft_metadata.is_deleted: continue draft = RDMDraft(draft_metadata.data, model=draft_metadata) # Updating to new $schema when eventually saved draft.pop("$schema", None) # Adding empty pids if draft.pids is None: draft.pids = {} draft.commit() for parent_metadata in RDMParent.model_cls.query.all(): parent = RDMParent(parent_metadata.data, model=parent_metadata) # Updating to new $schema when eventually saved parent.pop("$schema", None) parent.commit() # Cleanup associated deleted drafts. drafts = RDMDraftMetadata.query.filter( RDMDraftMetadata.is_deleted == True).all() # noqa for d in drafts: # Delete all file draft records RDMFileDraftMetadata.query.filter_by(record_id=d.id).delete() # Bucket deletion bucket = d.bucket d.bucket = None d.bucket_id = None # Object and bucket not be removed if it's also associated with the # record. r = RDMRecordMetadata.query.filter_by(id=d.id).one_or_none() if r is None or r.bucket_id != bucket.id: bucket.remove() db.session.commit()