예제 #1
0
    def run(self):
        """Make changes to the records that need them."""
        checked_count, modified_count = 0, 0
        self.logger.info("Starting search, check & do job",
                         reason=self.__doc__)
        for chunk in chunker(self.search(), self.size):
            uuids = [r.meta.id for r in chunk]
            self.logger.info("Received record IDs from ES",
                             num_records=len(uuids))
            records = InspireRecord.get_records(uuids)
            self.logger.info("Fetched chunk of records from DB",
                             num_records=len(records))

            for record in records:
                state = {}
                logger = self.logger.bind(recid=record["control_number"])
                checked_count += 1
                record = InspireRecord.get_class_for_record(record)(
                    record, model=record.model)
                if not self.check(record, logger=logger, state=state):
                    logger.info("Not modifying record, check negative")
                    continue
                modified_count += 1
                logger.info("Modifying record, check positive")
                self.do(record, logger=logger, state=state)
                record.update(dict(record))

            if self.commit_after_each_batch:
                db.session.commit()

        db.session.commit()
        self.logger.info(
            "Search, check & do job finished successfully.",
            num_records_checked=checked_count,
            num_records_modified=modified_count,
        )
예제 #2
0
            "value": "GAMBIT"
        },
        "long_name": "GAMBIT : Global And Modular BSM Inference Tool",
        "inspire_classification": ["Non-experimental|Simulation tools"],
        "description":
        "GAMBIT is a global fitting code for generic Beyond the Standard Model theories, designed to allow fast and easy definition of new models, observables, likelihoods, scanners and backend physics codes.",
        "legacy_name": "GAMBIT",
        "experiment": {
            "value": "GAMBIT",
            "short_name": "GAMBIT"
        },
        "$schema": "https://inspirebeta.net/schemas/records/experiments.json",
        "_collections": ["Experiments"],
    }

    cls = InspireRecord.get_class_for_record(json_record)
    record = cls.create_or_update(json_record,
                                  disable_external_push=True,
                                  disable_relations_update=True)
    pid = PersistentIdentifier.query.filter_by(pid_value="1775082").one()
    assert record.id
    assert InspireRecord.get_record_by_pid_value("1775082", "exp")
    assert pid.status == PIDStatus.DELETED


def test_creating_record_with_id_provided_properly_mints_identifiers(
        inspire_app):
    record_data = {
        "$schema": "https://inspirebeta.net/schemas/records/hep.json",
        "control_number": 1_234_567,
        "arxiv_eprints": [{
예제 #3
0
def migrate_record_from_mirror(prod_record,
                               disable_external_push=True,
                               disable_relations_update=True):
    """Migrate a mirrored legacy record into an Inspire record.
    Args:
        prod_record(LegacyRecordsMirror): the mirrored record to migrate.
    Returns:
        dict: the migrated record metadata, which is also inserted into the database.
    """
    logger = LOGGER.bind(recid=prod_record.recid)
    try:
        json_record = marcxml2record(prod_record.marcxml)
    except NotSupportedError as exc:
        logger.warning(str(exc), recid=prod_record.recid)
        prod_record.valid = True
        db.session.merge(prod_record)
        return
    except Exception as exc:
        logger.exception("Error converting from marcxml")
        prod_record.error = exc
        db.session.merge(prod_record)
        return

    if "$schema" in json_record:
        ensure_valid_schema(json_record)

        pid_type = PidStoreBase.get_pid_type_from_schema(
            json_record.get("$schema"))
        if pid_type in current_app.config.get("MIGRATION_PID_TYPE_BLACKLIST"):
            prod_record.error = Exception(
                f"Record: {prod_record.recid} has blacklisted pid_type: {pid_type} is blacklisted"
            )
            db.session.merge(prod_record)
            return

    try:
        with db.session.begin_nested():
            cls = InspireRecord.get_class_for_record(json_record)
            original_urls = replace_afs_file_locations_with_local(json_record)
            record = cls.create_or_update(
                json_record,
                disable_external_push=disable_external_push,
                disable_relations_update=disable_relations_update,
            )
            cache_afs_file_locations(record)
    except ValidationError as exc:
        path = ".".join(exc.schema_path)
        logger.warn(
            "Migrator validator error",
            path=path,
            value=exc.instance,
            recid=prod_record.recid,
        )
        prod_record.error = exc
        db.session.merge(prod_record)
    except DownloadFileError as exc:
        removed_cached_files = remove_cached_afs_file_locations(original_urls)
        if not removed_cached_files:
            logger.exception("DownloadFileError while migrate from mirror")
            prod_record.error = exc
            db.session.merge(prod_record)
        else:
            return migrate_record_from_mirror(
                prod_record=prod_record,
                disable_external_push=disable_external_push,
                disable_relations_update=disable_relations_update,
            )
    except PIDValueError as exc:
        message = f"pid_type:'{exc.pid_type}', pid_value:'{exc.pid_value}'"
        logger.exception("PIDValueError while migrate from mirror",
                         msg=message)
        exc.args = (message, )
        prod_record.error = exc
        db.session.merge(prod_record)
    except ThreadsTimeoutError:
        raise
    except Exception as exc:
        logger.exception("Error while migrating record into mirror")
        prod_record.error = exc
        db.session.merge(prod_record)
    else:
        prod_record.valid = True
        db.session.merge(prod_record)
        return record
예제 #4
0
def migrate_record_from_mirror(prod_record,
                               disable_orcid_push=True,
                               disable_citation_update=True):
    """Migrate a mirrored legacy record into an Inspire record.
    Args:
        prod_record(LegacyRecordsMirror): the mirrored record to migrate.
    Returns:
        dict: the migrated record metadata, which is also inserted into the database.
    """
    logger = LOGGER.bind(recid=prod_record.recid)
    try:
        json_record = marcxml2record(prod_record.marcxml)
    except Exception as exc:
        logger.exception("Error converting from marcxml")
        prod_record.error = exc
        db.session.merge(prod_record)
        return None

    if "$schema" in json_record:
        ensure_valid_schema(json_record)

        pid_type = PidStoreBase.get_pid_type_from_schema(
            json_record.get("$schema"))
        if pid_type in current_app.config.get("MIGRATION_PID_TYPE_BLACKLIST"):
            prod_record.error = Exception(
                f"Record: ${prod_record.recid} has blacklisted pid_type: ${pid_type} is blacklisted"
            )
            db.session.merge(prod_record)
            return

    try:
        with db.session.begin_nested():
            cls = InspireRecord.get_class_for_record(json_record)
            for deleted_record in cls.get_linked_records_from_dict_field(
                    json_record, "deleted_records"):
                deleted_record.pidstore_handler(
                    deleted_record.id, deleted_record).delete_external_pids()
            record = cls.create_or_update(
                json_record,
                disable_orcid_push=disable_orcid_push,
                disable_citation_update=disable_citation_update,
            )
    except ValidationError as exc:
        path = ".".join(exc.schema_path)
        logger.warn(
            "Migrator validator error",
            path=path,
            value=exc.instance,
            recid=prod_record.recid,
        )
        prod_record.error = exc
        db.session.merge(prod_record)
    except PIDValueError as exc:
        message = f"pid_type:'{exc.pid_type}', pid_value:'{exc.pid_value}'"
        logger.exception("PIDValueError while migrate from mirror",
                         msg=message)
        exc.args = (message, )
        prod_record.error = exc
        db.session.merge(prod_record)
    except Exception as exc:
        logger.exception("Error while migrating record into mirror")
        prod_record.error = exc
        db.session.merge(prod_record)
    else:
        prod_record.valid = True
        db.session.merge(prod_record)
        return record