Exemple #1
0
 def create_relation(pid):
     serial = series_class.get_record_by_pid(pid)
     create_parent_child_relation(serial,
                                  record,
                                  SERIAL_RELATION,
                                  volume=None)
     RecordRelationIndexer().index(record, serial)
Exemple #2
0
 def link_records_and_serial(record_cls, search):
     click.echo(f"FOUND {search.count()} serial related records.")
     for hit in search.params(scroll='1h').scan():
         try:
             click.echo(f"Processing record {hit.pid}.")
             # Skip linking if the hit doesn't have a legacy recid since it
             # means it's a volume of a multipart
             if "legacy_recid" not in hit:
                 continue
             record = record_cls.get_record_by_pid(hit.pid)
             check_for_special_series(record)
             for serial in get_serials_by_child_recid(hit.legacy_recid):
                 volume = get_migrated_volume_by_serial_title(
                     record, serial["title"])
                 create_parent_child_relation(serial, record,
                                              SERIAL_RELATION, volume)
                 RecordRelationIndexer().index(record, serial)
             # mark done
             record["_migration"]["has_serial"] = False
             record.commit()
             db.session.commit()
         except Exception as exc:
             handler = relation_exception_handlers.get(exc.__class__)
             if handler:
                 legacy_recid = None
                 if hasattr(hit, "legacy_recid"):
                     legacy_recid = hit.legacy_recid
                 handler(exc, new_pid=hit.pid, legacy_id=legacy_recid)
             else:
                 raise exc
Exemple #3
0
    def post(self, record, **kwargs):
        """Create a new relation."""

        def create(payload):
            try:
                relation_type = payload.pop("relation_type")
            except KeyError as key:
                return abort(400, "The `{}` is a required field".format(key))

            rt = Relation.get_relation_by_name(relation_type)
            if rt in PARENT_CHILD_RELATION_TYPES:
                modified, first, second = self._create_parent_child_relation(
                    record, rt, payload
                )
            elif rt in SIBLINGS_RELATION_TYPES:
                modified, first, second = self._create_sibling_relation(
                    record, rt, payload
                )
            elif rt in SEQUENCE_RELATION_TYPES:
                first, second = self._create_sequence_relation(
                    record, rt, payload
                )
                modified = second
            else:
                raise RecordRelationsError(
                    "Invalid relation type `{}`".format(rt.name)
                )

            db.session.commit()

            records_to_index.append(first)
            records_to_index.append(second)

            def is_modified(x, r):
                return x.pid == r.pid and x._pid_type == r._pid_type

            # NOTE: modified can be a record or a list of records, if one
            # matches our record return the modified one.

            _modified = modified if isinstance(modified, list) else [modified]

            for mod_record in _modified:
                if is_modified(mod_record, record):
                    return mod_record
            return record

        records_to_index = []
        actions = request.get_json()
        if not isinstance(actions, list):
            actions = [actions]

        for action in actions:
            record = create(action)

        # Index both parent/child (or first/second)
        RecordRelationIndexer().index(record, *records_to_index)

        return self.make_response(record.pid, record, 201)
Exemple #4
0
    def delete(self, record, **kwargs):
        """Delete an existing relation."""

        def delete(payload):
            try:
                relation_type = payload.pop("relation_type")
            except KeyError as key:
                return abort(400, "The `{}` is a required field".format(key))

            rt = Relation.get_relation_by_name(relation_type)

            if rt in PARENT_CHILD_RELATION_TYPES:
                modified, first, second = self._delete_parent_child_relation(
                    record, rt, payload
                )
            elif rt in SIBLINGS_RELATION_TYPES:
                modified, first, second = self._delete_sibling_relation(
                    record, rt, payload
                )
            elif rt in SEQUENCE_RELATION_TYPES:
                first, second = self._delete_sequence_relation(
                    record, rt, payload
                )
                modified = first
            else:
                raise RecordRelationsError(
                    "Invalid relation type `{}`".format(rt.name)
                )

            db.session.commit()

            records_to_index.append(first)
            records_to_index.append(second)

            # if the record is the modified, return the modified version
            if (
                modified.pid == record.pid
                and modified._pid_type == record._pid_type
            ):
                return modified
            return record

        records_to_index = []
        actions = request.get_json()
        if not isinstance(actions, list):
            actions = [actions]

        for action in actions:
            record = delete(action)

        # Index both parent/child (or first/second)
        RecordRelationIndexer().index(record, *records_to_index)

        return self.make_response(record.pid, record, 200)
Exemple #5
0
 def link_records_and_serial(record_cls, search):
     for hit in search.scan():
         # Skip linking if the hit doesn't have a legacy recid since it
         # means it's a volume of a multipart
         if "legacy_recid" not in hit:
             continue
         record = record_cls.get_record_by_pid(hit.pid)
         check_for_special_series(record)
         for serial in get_serials_by_child_recid(hit.legacy_recid):
             volume = get_migrated_volume_by_serial_title(
                 record, serial["title"])
             create_parent_child_relation(serial, record, SERIAL_RELATION,
                                          volume)
             RecordRelationIndexer().index(record, serial)
Exemple #6
0
def import_multivolume(json_record):
    """Import multivolume type of multipart."""
    document_indexer = current_app_ils.document_indexer
    series_indexer = current_app_ils.series_indexer
    series_cls, series_pid_provider = model_provider_by_rectype("multipart")
    document_cls, document_pid_provider = model_provider_by_rectype("document")

    legacy_recid = json_record["legacy_recid"]

    # build multipart dict - leave the legacy_recid attached
    multipart_json = clean_document_json_for_multipart(
        json_record, include_keys=["legacy_recid"])

    # prepare json for each volume
    document_json_template = exclude_multipart_fields(
        json_record, exclude_keys=["legacy_recid"])

    volume_list = json_record["_migration"]["volumes"]

    try:
        get_record_by_legacy_recid(series_cls, legacy_recid)
        raise MultipartMigrationError(f"Multipart {legacy_recid} was already "
                                      f"processed. Aborting.")
    except PIDDoesNotExistError as e:
        multipart_record = import_record(
            multipart_json,
            series_cls,
            series_pid_provider,
            legacy_id_key="title",
        )
    volumes_items_list = json_record["_migration"]["items"]
    volumes_identifiers_list = json_record["_migration"]["volumes_identifiers"]
    volumes_urls_list = json_record["_migration"]["volumes_urls"]

    lists_lenghts = [
        len(entry) for entry in [
            volumes_urls_list,
            volumes_items_list,
            volumes_identifiers_list,
        ]
    ]

    too_many_volumes = any(lists_lenghts) > len(volume_list)

    if too_many_volumes:
        raise ManualImportRequired(
            "Record has more additional volume information "
            "entries than the number of indicated volumes")

    for volume in volume_list:
        replace_fields_in_volume(document_json_template, volume, json_record)
        document_record = import_record(
            document_json_template,
            document_cls,
            document_pid_provider,
            legacy_id_key="title",
        )
        document_indexer.index(document_record)
        series_indexer.index(multipart_record)

        create_parent_child_relation(
            multipart_record,
            document_record,
            MULTIPART_MONOGRAPH_RELATION,
            volume.get("volume"),
        )

        RecordRelationIndexer().index(document_record, multipart_record)
    return multipart_record
Exemple #7
0
def import_multipart(json_record):
    """Import multipart record."""
    document_indexer = current_app_ils.document_indexer
    series_indexer = current_app_ils.series_indexer
    series_cls, series_pid_provider = model_provider_by_rectype("multipart")
    document_cls, document_pid_provider = model_provider_by_rectype("document")

    multipart_record = None
    multipart_id = json_record["_migration"].get("multipart_id")

    # split json for multipart (series rectype) and
    # document (common data for all volumes, to be stored on document rectype)
    multipart_json = clean_document_json_for_multipart(json_record)
    document_json = exclude_multipart_fields(json_record)

    # volume specific information
    volumes = json_record["_migration"]["volumes"]

    if multipart_id:
        # try to check if the multipart already exists
        # (from previous dump file)
        multipart_record = get_multipart_by_multipart_id(multipart_id)
    # series with record per volume shouldn't have more than one volume
    # in the list
    if len(volumes) != 1:
        raise ManualImportRequired("Matched volumes number incorrect.")

    # series with separate record per volume
    # (identified together with multipart id)
    if not multipart_record:
        multipart_record = import_record(
            multipart_json,
            series_cls,
            series_pid_provider,
            legacy_id_key="title",
        )
    try:
        # check if the document already exists
        document_record = get_record_by_legacy_recid(
            document_cls, document_json["legacy_recid"])
        # try to create relation (should fail if already exists)
        create_parent_child_relation(
            multipart_record,
            document_record,
            MULTIPART_MONOGRAPH_RELATION,
            volumes[0]["volume"],
        )
        return multipart_record
    except PIDDoesNotExistError as e:
        document_record = import_record(document_json, document_cls,
                                        document_pid_provider)
        document_indexer.index(document_record)

        create_parent_child_relation(
            multipart_record,
            document_record,
            MULTIPART_MONOGRAPH_RELATION,
            volumes[0]["volume"],
        )
        # the multipart needs to be indexed immediately,
        # because we search multipart_id to match next volumes
        series_indexer.index(multipart_record)

        RecordRelationIndexer().index(document_record, multipart_record)

        return multipart_record
Exemple #8
0
def import_multivolume(json_record):
    """Import multivolume type of multipart."""
    document_indexer = current_app_ils.document_indexer
    series_indexer = current_app_ils.series_indexer
    series_cls = current_app_ils.series_record_cls

    legacy_recid = json_record["legacy_recid"]

    # build multipart dict - leave the legacy_recid attached
    multipart_json = clean_document_json_for_multipart(
        json_record,
        include_keys=[
            "legacy_recid", "alternative_titles", "publication_year",
            "identifiers"
        ])

    # prepare json for each volume
    document_json_template = exclude_multipart_fields(
        json_record, exclude_keys=["legacy_recid", "alternative_titles"])

    volume_list = json_record["_migration"]["volumes"]

    try:
        legacy_pid_type = current_app.config["CDS_ILS_SERIES_LEGACY_PID_TYPE"]
        get_record_by_legacy_recid(series_cls, legacy_pid_type, legacy_recid)
        raise MultipartMigrationError(
            f"Multipart {legacy_recid} was already processed. Aborting.")
    except PIDDoesNotExistError as e:
        add_cds_url(multipart_json)
        multipart_record = import_record(
            multipart_json,
            rectype="multipart",
            legacy_id=multipart_json["legacy_recid"],
        )
        series_indexer.index(multipart_record)
    volumes_items_list = json_record["_migration"]["items"]
    volumes_identifiers_list = json_record["_migration"]["volumes_identifiers"]
    volumes_urls_list = json_record["_migration"]["volumes_urls"]

    lists_lengths = [
        len(entry) for entry in [
            volumes_urls_list,
            volumes_items_list,
            volumes_identifiers_list,
        ]
    ]

    too_many_volumes = any(lists_lengths) > len(volume_list)

    if too_many_volumes:
        raise ManualImportRequired(
            "Record has more additional volume information "
            "entries than the number of indicated volumes")

    for volume in volume_list:
        replace_fields_in_volume(document_json_template, volume, json_record)
        document_record = import_record(
            document_json_template,
            rectype="document",
            legacy_id=json_record["legacy_recid"],
            # we don't mint the legacy pid for these documents, since they
            # never were records on legacy, only it's parent multipart was
            mint_legacy_pid=False,
        )

        document_indexer.index(document_record)

        create_parent_child_relation(
            multipart_record,
            document_record,
            MULTIPART_MONOGRAPH_RELATION,
            volume.get("volume"),
        )
        db.session.commit()

        RecordRelationIndexer().index(document_record, multipart_record)
    return multipart_record
Exemple #9
0
def import_multipart(json_record):
    """Import multipart record."""
    document_indexer = current_app_ils.document_indexer
    series_indexer = current_app_ils.series_indexer
    document_cls = current_app_ils.document_record_cls

    multipart_record = None
    multipart_id = json_record["_migration"].get("multipart_id")

    # volume specific information
    volumes = json_record["_migration"]["volumes"]

    if multipart_id:
        # try to check if the multipart already exists
        # (from previous dump file)
        multipart_record = get_multipart_by_multipart_id(multipart_id.upper())
    # series with record per volume shouldn't have more than one volume
    # in the list
    if len(volumes) != 1:
        raise ManualImportRequired("Matched volumes number incorrect.")

    # split json for multipart (series rectype) and
    # document (common data for all volumes, to be stored on document rectype)
    multipart_json = clean_document_json_for_multipart(json_record,
                                                       include_keys=[
                                                           "publication_year",
                                                       ])
    publisher = json_record.get("imprint", {}).get("publisher")
    if publisher:
        multipart_json["publisher"] = publisher

    document_json = exclude_multipart_fields(json_record)
    document_json["title"] = volumes[0]["title"]
    add_cds_url(document_json)

    # series with separate record per volume
    # (identified together with multipart id)
    if not multipart_record:
        multipart_record = import_record(
            multipart_json,
            rectype="multipart",
            legacy_id=json_record["legacy_recid"],
            # we don't mint the legacy pid for these series, since they
            # never were records on legacy, only it's volumes were
            mint_legacy_pid=False,
        )
    try:
        # check if the document already exists
        legacy_pid_type = current_app.config["CDS_ILS_RECORD_LEGACY_PID_TYPE"]
        document_record = get_record_by_legacy_recid(
            document_cls, legacy_pid_type, document_json["legacy_recid"])
        # try to create relation (should fail if already exists)
        create_parent_child_relation(
            multipart_record,
            document_record,
            MULTIPART_MONOGRAPH_RELATION,
            volumes[0]["volume"],
        )
        db.session.commit()
        return multipart_record
    except PIDDoesNotExistError as e:
        document_record = import_record(
            document_json,
            rectype="document",
            legacy_id=document_json["legacy_recid"])
        document_indexer.index(document_record)

        create_parent_child_relation(
            multipart_record,
            document_record,
            MULTIPART_MONOGRAPH_RELATION,
            volumes[0]["volume"],
        )
        db.session.commit()
        # the multipart needs to be indexed immediately,
        # because we search multipart_id to match next volumes
        series_indexer.index(multipart_record)

        RecordRelationIndexer().index(document_record, multipart_record)

        return multipart_record