def create_relation(pid): serial = series_class.get_record_by_pid(pid) create_parent_child_relation(serial, record, SERIAL_RELATION, volume=None) RecordRelationIndexer().index(record, serial)
def link_records_and_serial(record_cls, search): click.echo(f"FOUND {search.count()} serial related records.") for hit in search.params(scroll='1h').scan(): try: click.echo(f"Processing record {hit.pid}.") # Skip linking if the hit doesn't have a legacy recid since it # means it's a volume of a multipart if "legacy_recid" not in hit: continue record = record_cls.get_record_by_pid(hit.pid) check_for_special_series(record) for serial in get_serials_by_child_recid(hit.legacy_recid): volume = get_migrated_volume_by_serial_title( record, serial["title"]) create_parent_child_relation(serial, record, SERIAL_RELATION, volume) RecordRelationIndexer().index(record, serial) # mark done record["_migration"]["has_serial"] = False record.commit() db.session.commit() except Exception as exc: handler = relation_exception_handlers.get(exc.__class__) if handler: legacy_recid = None if hasattr(hit, "legacy_recid"): legacy_recid = hit.legacy_recid handler(exc, new_pid=hit.pid, legacy_id=legacy_recid) else: raise exc
def post(self, record, **kwargs): """Create a new relation.""" def create(payload): try: relation_type = payload.pop("relation_type") except KeyError as key: return abort(400, "The `{}` is a required field".format(key)) rt = Relation.get_relation_by_name(relation_type) if rt in PARENT_CHILD_RELATION_TYPES: modified, first, second = self._create_parent_child_relation( record, rt, payload ) elif rt in SIBLINGS_RELATION_TYPES: modified, first, second = self._create_sibling_relation( record, rt, payload ) elif rt in SEQUENCE_RELATION_TYPES: first, second = self._create_sequence_relation( record, rt, payload ) modified = second else: raise RecordRelationsError( "Invalid relation type `{}`".format(rt.name) ) db.session.commit() records_to_index.append(first) records_to_index.append(second) def is_modified(x, r): return x.pid == r.pid and x._pid_type == r._pid_type # NOTE: modified can be a record or a list of records, if one # matches our record return the modified one. _modified = modified if isinstance(modified, list) else [modified] for mod_record in _modified: if is_modified(mod_record, record): return mod_record return record records_to_index = [] actions = request.get_json() if not isinstance(actions, list): actions = [actions] for action in actions: record = create(action) # Index both parent/child (or first/second) RecordRelationIndexer().index(record, *records_to_index) return self.make_response(record.pid, record, 201)
def delete(self, record, **kwargs): """Delete an existing relation.""" def delete(payload): try: relation_type = payload.pop("relation_type") except KeyError as key: return abort(400, "The `{}` is a required field".format(key)) rt = Relation.get_relation_by_name(relation_type) if rt in PARENT_CHILD_RELATION_TYPES: modified, first, second = self._delete_parent_child_relation( record, rt, payload ) elif rt in SIBLINGS_RELATION_TYPES: modified, first, second = self._delete_sibling_relation( record, rt, payload ) elif rt in SEQUENCE_RELATION_TYPES: first, second = self._delete_sequence_relation( record, rt, payload ) modified = first else: raise RecordRelationsError( "Invalid relation type `{}`".format(rt.name) ) db.session.commit() records_to_index.append(first) records_to_index.append(second) # if the record is the modified, return the modified version if ( modified.pid == record.pid and modified._pid_type == record._pid_type ): return modified return record records_to_index = [] actions = request.get_json() if not isinstance(actions, list): actions = [actions] for action in actions: record = delete(action) # Index both parent/child (or first/second) RecordRelationIndexer().index(record, *records_to_index) return self.make_response(record.pid, record, 200)
def link_records_and_serial(record_cls, search): for hit in search.scan(): # Skip linking if the hit doesn't have a legacy recid since it # means it's a volume of a multipart if "legacy_recid" not in hit: continue record = record_cls.get_record_by_pid(hit.pid) check_for_special_series(record) for serial in get_serials_by_child_recid(hit.legacy_recid): volume = get_migrated_volume_by_serial_title( record, serial["title"]) create_parent_child_relation(serial, record, SERIAL_RELATION, volume) RecordRelationIndexer().index(record, serial)
def import_multivolume(json_record): """Import multivolume type of multipart.""" document_indexer = current_app_ils.document_indexer series_indexer = current_app_ils.series_indexer series_cls, series_pid_provider = model_provider_by_rectype("multipart") document_cls, document_pid_provider = model_provider_by_rectype("document") legacy_recid = json_record["legacy_recid"] # build multipart dict - leave the legacy_recid attached multipart_json = clean_document_json_for_multipart( json_record, include_keys=["legacy_recid"]) # prepare json for each volume document_json_template = exclude_multipart_fields( json_record, exclude_keys=["legacy_recid"]) volume_list = json_record["_migration"]["volumes"] try: get_record_by_legacy_recid(series_cls, legacy_recid) raise MultipartMigrationError(f"Multipart {legacy_recid} was already " f"processed. Aborting.") except PIDDoesNotExistError as e: multipart_record = import_record( multipart_json, series_cls, series_pid_provider, legacy_id_key="title", ) volumes_items_list = json_record["_migration"]["items"] volumes_identifiers_list = json_record["_migration"]["volumes_identifiers"] volumes_urls_list = json_record["_migration"]["volumes_urls"] lists_lenghts = [ len(entry) for entry in [ volumes_urls_list, volumes_items_list, volumes_identifiers_list, ] ] too_many_volumes = any(lists_lenghts) > len(volume_list) if too_many_volumes: raise ManualImportRequired( "Record has more additional volume information " "entries than the number of indicated volumes") for volume in volume_list: replace_fields_in_volume(document_json_template, volume, json_record) document_record = import_record( document_json_template, document_cls, document_pid_provider, legacy_id_key="title", ) document_indexer.index(document_record) series_indexer.index(multipart_record) create_parent_child_relation( multipart_record, document_record, MULTIPART_MONOGRAPH_RELATION, volume.get("volume"), ) RecordRelationIndexer().index(document_record, multipart_record) return multipart_record
def import_multipart(json_record): """Import multipart record.""" document_indexer = current_app_ils.document_indexer series_indexer = current_app_ils.series_indexer series_cls, series_pid_provider = model_provider_by_rectype("multipart") document_cls, document_pid_provider = model_provider_by_rectype("document") multipart_record = None multipart_id = json_record["_migration"].get("multipart_id") # split json for multipart (series rectype) and # document (common data for all volumes, to be stored on document rectype) multipart_json = clean_document_json_for_multipart(json_record) document_json = exclude_multipart_fields(json_record) # volume specific information volumes = json_record["_migration"]["volumes"] if multipart_id: # try to check if the multipart already exists # (from previous dump file) multipart_record = get_multipart_by_multipart_id(multipart_id) # series with record per volume shouldn't have more than one volume # in the list if len(volumes) != 1: raise ManualImportRequired("Matched volumes number incorrect.") # series with separate record per volume # (identified together with multipart id) if not multipart_record: multipart_record = import_record( multipart_json, series_cls, series_pid_provider, legacy_id_key="title", ) try: # check if the document already exists document_record = get_record_by_legacy_recid( document_cls, document_json["legacy_recid"]) # try to create relation (should fail if already exists) create_parent_child_relation( multipart_record, document_record, MULTIPART_MONOGRAPH_RELATION, volumes[0]["volume"], ) return multipart_record except PIDDoesNotExistError as e: document_record = import_record(document_json, document_cls, document_pid_provider) document_indexer.index(document_record) create_parent_child_relation( multipart_record, document_record, MULTIPART_MONOGRAPH_RELATION, volumes[0]["volume"], ) # the multipart needs to be indexed immediately, # because we search multipart_id to match next volumes series_indexer.index(multipart_record) RecordRelationIndexer().index(document_record, multipart_record) return multipart_record
def import_multivolume(json_record): """Import multivolume type of multipart.""" document_indexer = current_app_ils.document_indexer series_indexer = current_app_ils.series_indexer series_cls = current_app_ils.series_record_cls legacy_recid = json_record["legacy_recid"] # build multipart dict - leave the legacy_recid attached multipart_json = clean_document_json_for_multipart( json_record, include_keys=[ "legacy_recid", "alternative_titles", "publication_year", "identifiers" ]) # prepare json for each volume document_json_template = exclude_multipart_fields( json_record, exclude_keys=["legacy_recid", "alternative_titles"]) volume_list = json_record["_migration"]["volumes"] try: legacy_pid_type = current_app.config["CDS_ILS_SERIES_LEGACY_PID_TYPE"] get_record_by_legacy_recid(series_cls, legacy_pid_type, legacy_recid) raise MultipartMigrationError( f"Multipart {legacy_recid} was already processed. Aborting.") except PIDDoesNotExistError as e: add_cds_url(multipart_json) multipart_record = import_record( multipart_json, rectype="multipart", legacy_id=multipart_json["legacy_recid"], ) series_indexer.index(multipart_record) volumes_items_list = json_record["_migration"]["items"] volumes_identifiers_list = json_record["_migration"]["volumes_identifiers"] volumes_urls_list = json_record["_migration"]["volumes_urls"] lists_lengths = [ len(entry) for entry in [ volumes_urls_list, volumes_items_list, volumes_identifiers_list, ] ] too_many_volumes = any(lists_lengths) > len(volume_list) if too_many_volumes: raise ManualImportRequired( "Record has more additional volume information " "entries than the number of indicated volumes") for volume in volume_list: replace_fields_in_volume(document_json_template, volume, json_record) document_record = import_record( document_json_template, rectype="document", legacy_id=json_record["legacy_recid"], # we don't mint the legacy pid for these documents, since they # never were records on legacy, only it's parent multipart was mint_legacy_pid=False, ) document_indexer.index(document_record) create_parent_child_relation( multipart_record, document_record, MULTIPART_MONOGRAPH_RELATION, volume.get("volume"), ) db.session.commit() RecordRelationIndexer().index(document_record, multipart_record) return multipart_record
def import_multipart(json_record): """Import multipart record.""" document_indexer = current_app_ils.document_indexer series_indexer = current_app_ils.series_indexer document_cls = current_app_ils.document_record_cls multipart_record = None multipart_id = json_record["_migration"].get("multipart_id") # volume specific information volumes = json_record["_migration"]["volumes"] if multipart_id: # try to check if the multipart already exists # (from previous dump file) multipart_record = get_multipart_by_multipart_id(multipart_id.upper()) # series with record per volume shouldn't have more than one volume # in the list if len(volumes) != 1: raise ManualImportRequired("Matched volumes number incorrect.") # split json for multipart (series rectype) and # document (common data for all volumes, to be stored on document rectype) multipart_json = clean_document_json_for_multipart(json_record, include_keys=[ "publication_year", ]) publisher = json_record.get("imprint", {}).get("publisher") if publisher: multipart_json["publisher"] = publisher document_json = exclude_multipart_fields(json_record) document_json["title"] = volumes[0]["title"] add_cds_url(document_json) # series with separate record per volume # (identified together with multipart id) if not multipart_record: multipart_record = import_record( multipart_json, rectype="multipart", legacy_id=json_record["legacy_recid"], # we don't mint the legacy pid for these series, since they # never were records on legacy, only it's volumes were mint_legacy_pid=False, ) try: # check if the document already exists legacy_pid_type = current_app.config["CDS_ILS_RECORD_LEGACY_PID_TYPE"] document_record = get_record_by_legacy_recid( document_cls, legacy_pid_type, document_json["legacy_recid"]) # try to create relation (should fail if already exists) create_parent_child_relation( multipart_record, document_record, MULTIPART_MONOGRAPH_RELATION, volumes[0]["volume"], ) db.session.commit() return multipart_record except PIDDoesNotExistError as e: document_record = import_record( document_json, rectype="document", legacy_id=document_json["legacy_recid"]) document_indexer.index(document_record) create_parent_child_relation( multipart_record, document_record, MULTIPART_MONOGRAPH_RELATION, volumes[0]["volume"], ) db.session.commit() # the multipart needs to be indexed immediately, # because we search multipart_id to match next volumes series_indexer.index(multipart_record) RecordRelationIndexer().index(document_record, multipart_record) return multipart_record