def get_linked_book(self, data): parent = get_parent_record(data) if parent and "titles" in parent and "control_number" in parent: endpoint = PidStoreBase.get_endpoint_from_pid_type( PidStoreBase.get_pid_type_from_schema(data["$schema"])) endpoint_item = f"invenio_records_rest.{endpoint}_item" ref = get_value(parent, "self.$ref") or url_for( endpoint_item, pid_value=parent["control_number"], _external=True) return {**parent["titles"][0], "record": {"$ref": ref}} return None
def find_record_endpoint(pid, record_hit=None, **kwargs): """gets endpoint from pid type or from `$schema` if record_data is from search results, as all pid_types from search_result are `recid`. If both ways of resolving endpoint are not available gets it from pid_value - additional db query""" if pid.pid_type != "recid": return current_records_rest.default_endpoint_prefixes[pid.pid_type] elif record_hit and "$schema" in record_hit.get("_source", {}): return PidStoreBase.get_endpoint_from_pid_type( PidStoreBase.get_pid_type_from_schema( record_hit["_source"]["$schema"])) else: return PidStoreBase.get_endpoint_from_pid_type( PidStoreBase.get_pid_type_from_recid(pid.pid_value))
def get_facet_author_name(self, record): """Prepare record for ``facet_author_name`` field.""" authors_with_record = list( InspireRecord.get_linked_records_from_dict_field( record, "authors.record")) found_authors_control_numbers = set([ author["control_number"] for author in authors_with_record if author.get("control_number") ]) authors_without_record = [ author for author in record.get("authors", []) if "record" not in author or int( PidStoreBase.get_pid_from_record_uri(author["record"].get( "$ref"))[1]) not in found_authors_control_numbers ] result = [] for author in authors_with_record: result.append(get_facet_author_name_for_author(author)) for author in authors_without_record: result.append("NOREC_{}".format( get_display_name_for_author_name(author["full_name"]))) return result
def _get_linked_pids_from_field(cls, data, path): """Return a list of (pid_type, pid_value) tuples for all records referenced in the field at the given path Args: data (dict): data from which records should be extracted path (str): the path of the linked records (where $ref is located). Returns: list: tuples containing (pid_type, pid_value) of the linked records Examples: >>> data = { 'references': [ { 'record': { '$ref': 'http://localhost/literature/1' } } ] } >>> record = InspireRecord(data) >>> records = record.get_linked_pids_from_field("references.record") ('lit', 1) """ full_path = ".".join([path, "$ref"]) pids = [ PidStoreBase.get_pid_from_record_uri(rec) for rec in flatten_list(get_value(data, full_path, [])) ] return pids
def redirect_pids(self, pids): if current_app.config.get("FEATURE_FLAG_ENABLE_REDIRECTION_OF_PIDS"): for pid in pids: pid_type, pid_value = PidStoreBase.get_pid_from_record_uri( pid["$ref"]) self.redirect_pid(pid_type, pid_value) return pids
def index_after_commit(sender, changes): """Index a record in ES after it was committed to the DB. This cannot happen in an ``after_record_commit`` receiver from Invenio-Records because, despite the name, at that point we are not yet sure whether the record has been really committed to the DB. """ for model_instance, change in changes: if isinstance(model_instance, RecordMetadata): if change in ("insert", "update", "delete"): LOGGER.debug(f"Record commited", change=change, uuid=str(model_instance.id)) pid_type = PidStoreBase.get_pid_type_from_schema( model_instance.json.get("$schema")) delete = "delete" in changes arguments = InspireRecord.get_subclasses( )[pid_type]._record_index(model_instance.json, _id=str(model_instance.id), force_delete=delete) arguments["record_version"] = model_instance.version_id LOGGER.debug( f"Record sending to index", uuid=str(model_instance.id), delete=delete, ) current_celery_app.send_task( "inspirehep.records.indexer.tasks.index_record", kwargs=arguments) else: raise RuntimeError("Wrong operation `%s` on record %r", change, model_instance.id)
def get_class_for_record(cls, data): type_from_schema = PidStoreBase.get_pid_type_from_schema(data["$schema"]) record_class = cls.get_subclasses().get(type_from_schema) if record_class is None: raise WrongRecordSubclass( f"Wrong subclass {cls} used for record of type {type_from_schema}" ) return record_class
def redirect_references_to_merged_record(self, uuid): record = InspireRecord.get_record(uuid, with_deleted=True) new_record_ref = record["new_record"]["$ref"] deleted_record_ref = record["self"]["$ref"] record_schema = PidStoreBase.get_schema_name_from_uri(record["$schema"]) possible_refs_to_record = get_refs_to_schemas()[record_schema] update_references_pointing_to_merged_record(possible_refs_to_record, deleted_record_ref, new_record_ref)
def test_get_pid_type_from_schema( mock_get_config_pid_types_to_endpointsck_get, mock_get_config_pid_types_to_schema, schema, expected, ): result = PidStoreBase.get_pid_type_from_schema(schema) assert expected == result
def redirect_record(record_path): recid = record_path.split("/")[0] endpoint = PidStoreBase.get_endpoint_for_recid(recid) if not endpoint: abort(404) if endpoint == "data": return redirect( f"{current_app.config['LEGACY_BASE_URL']}/record/{recid}", 302) return redirect(f"/{endpoint}/{recid}", 301)
def get_record_data_from_es(record): """Queries Elastic Search for this record and returns it as dictionary Returns: dict:This record in a way it is represented in Elastic Search """ endpoint = PidStoreBase._get_config_pid_types_to_endpoints()[record.pid_type] search_conf = current_app.config["RECORDS_REST_ENDPOINTS"][endpoint] search_class = search_conf["search_class"]() return search_class.get_source(record.id)
def _create_record(data, save_to_file=False): control_number = data["control_number"] click.echo(f"Creating record {control_number}.") record = InspireRecord.create_or_update(data) db.session.commit() record.index(delay=False) message = (f"Record created uuid:{record.id} with " f"pid:{control_number} has been created.") click.echo(click.style(message, fg="green")) if save_to_file: pid_type = PidStoreBase.get_pid_type_from_schema(data["$schema"]) endpoint = PidStoreBase.get_endpoint_from_pid_type(pid_type) file_path = os.path.join( f"data/records/{endpoint}/{control_number}.json") click.echo(click.style(f"Writing to {file_path}", fg="green")) with open(file_path, "w+") as file: file.write(orjson.dumps(data))
def delete_records_from_deleted_records(cls, data): # Hack for migrator in case new record takes pids from other records # which should be deleted but they are not deleted yet. for pid in data.get("deleted_records", []): pid_type, pid_value = PidStoreBase.get_pid_from_record_uri(pid["$ref"]) try: record_to_delete = cls.get_record_by_pid_value( pid_value, pid_type, original_record=True ) except PIDDoesNotExistError: LOGGER.warning( "This pid is missing while still is marked as deleted by another record.", marked_by=data.get("control_number"), marked_to_delete=(pid_type, pid_value), ) else: record_to_delete.delete()
def resolve_conference_record_as_root(self, pub_info_item): conference_record = pub_info_item.get("conference_record") if conference_record is None: return {} _, recid = PidStoreBase.get_pid_from_record_uri( conference_record.get("$ref")) try: conference = InspireRecord.get_record_by_pid_value(pid_value=recid, pid_type="con") except PIDDoesNotExistError: return {} titles = conference.get("titles") if not titles: return {} pub_info_item.update(conference) return pub_info_item
def test_get_pid_type_from_endpoint( momock_get_config_pid_types_to_endpointsck_get, endpoint, expected): result = PidStoreBase.get_pid_type_from_endpoint(endpoint) assert expected == result
def _schema_type(self): return PidStoreBase.get_pid_type_from_schema(self["$schema"])
def test_get_endpoint_from_pid_type(mock_get_config_pid_types_to_endpoints, pid_type, expected): result = PidStoreBase.get_endpoint_from_pid_type(pid_type) assert expected == result
def test_get_pid_from_record_uri(url, expected): data_result = PidStoreBase.get_pid_from_record_uri(url) assert expected == data_result
def test_get_config_for_schema(appctx): pids_to_endpoints = PidStoreBase._get_config_pid_types_to_schema() assert pids_to_endpoints is not None
def migrate_record_from_mirror(prod_record, disable_orcid_push=True, disable_citation_update=True): """Migrate a mirrored legacy record into an Inspire record. Args: prod_record(LegacyRecordsMirror): the mirrored record to migrate. Returns: dict: the migrated record metadata, which is also inserted into the database. """ logger = LOGGER.bind(recid=prod_record.recid) try: json_record = marcxml2record(prod_record.marcxml) except Exception as exc: logger.exception("Error converting from marcxml") prod_record.error = exc db.session.merge(prod_record) return None if "$schema" in json_record: ensure_valid_schema(json_record) pid_type = PidStoreBase.get_pid_type_from_schema( json_record.get("$schema")) if pid_type in current_app.config.get("MIGRATION_PID_TYPE_BLACKLIST"): prod_record.error = Exception( f"Record: ${prod_record.recid} has blacklisted pid_type: ${pid_type} is blacklisted" ) db.session.merge(prod_record) return try: with db.session.begin_nested(): cls = InspireRecord.get_class_for_record(json_record) for deleted_record in cls.get_linked_records_from_dict_field( json_record, "deleted_records"): deleted_record.pidstore_handler( deleted_record.id, deleted_record).delete_external_pids() record = cls.create_or_update( json_record, disable_orcid_push=disable_orcid_push, disable_citation_update=disable_citation_update, ) except ValidationError as exc: path = ".".join(exc.schema_path) logger.warn( "Migrator validator error", path=path, value=exc.instance, recid=prod_record.recid, ) prod_record.error = exc db.session.merge(prod_record) except PIDValueError as exc: message = f"pid_type:'{exc.pid_type}', pid_value:'{exc.pid_value}'" logger.exception("PIDValueError while migrate from mirror", msg=message) exc.args = (message, ) prod_record.error = exc db.session.merge(prod_record) except Exception as exc: logger.exception("Error while migrating record into mirror") prod_record.error = exc db.session.merge(prod_record) else: prod_record.valid = True db.session.merge(prod_record) return record
def migrate_record_from_mirror(prod_record, disable_external_push=True, disable_relations_update=True): """Migrate a mirrored legacy record into an Inspire record. Args: prod_record(LegacyRecordsMirror): the mirrored record to migrate. Returns: dict: the migrated record metadata, which is also inserted into the database. """ logger = LOGGER.bind(recid=prod_record.recid) try: json_record = marcxml2record(prod_record.marcxml) except NotSupportedError as exc: logger.warning(str(exc), recid=prod_record.recid) prod_record.valid = True db.session.merge(prod_record) return except Exception as exc: logger.exception("Error converting from marcxml") prod_record.error = exc db.session.merge(prod_record) return if "$schema" in json_record: ensure_valid_schema(json_record) pid_type = PidStoreBase.get_pid_type_from_schema( json_record.get("$schema")) if pid_type in current_app.config.get("MIGRATION_PID_TYPE_BLACKLIST"): prod_record.error = Exception( f"Record: {prod_record.recid} has blacklisted pid_type: {pid_type} is blacklisted" ) db.session.merge(prod_record) return try: with db.session.begin_nested(): cls = InspireRecord.get_class_for_record(json_record) original_urls = replace_afs_file_locations_with_local(json_record) record = cls.create_or_update( json_record, disable_external_push=disable_external_push, disable_relations_update=disable_relations_update, ) cache_afs_file_locations(record) except ValidationError as exc: path = ".".join(exc.schema_path) logger.warn( "Migrator validator error", path=path, value=exc.instance, recid=prod_record.recid, ) prod_record.error = exc db.session.merge(prod_record) except DownloadFileError as exc: removed_cached_files = remove_cached_afs_file_locations(original_urls) if not removed_cached_files: logger.exception("DownloadFileError while migrate from mirror") prod_record.error = exc db.session.merge(prod_record) else: return migrate_record_from_mirror( prod_record=prod_record, disable_external_push=disable_external_push, disable_relations_update=disable_relations_update, ) except PIDValueError as exc: message = f"pid_type:'{exc.pid_type}', pid_value:'{exc.pid_value}'" logger.exception("PIDValueError while migrate from mirror", msg=message) exc.args = (message, ) prod_record.error = exc db.session.merge(prod_record) except ThreadsTimeoutError: raise except Exception as exc: logger.exception("Error while migrating record into mirror") prod_record.error = exc db.session.merge(prod_record) else: prod_record.valid = True db.session.merge(prod_record) return record
def get_ref_from_pid(pid_type, pid_value): """Return full $ref for record with pid_type and pid_value""" return get_record_ref(pid_value, PidStoreBase.get_endpoint_from_pid_type(pid_type))
def get_endpoint_from_schema(schema): pid_type = PidStoreBase.get_pid_type_from_schema(schema) return PidStoreBase.get_endpoint_from_pid_type(pid_type)