def test_migrate_records_with_all_makes_records_references_process_enabled(
        proecess_references_mock, inspire_app):
    record_fixture_path = pkg_resources.resource_filename(
        __name__, os.path.join("fixtures", "dummy.xml"))
    populate_mirror_from_file(record_fixture_path)

    migrate_from_mirror()
    proecess_references_mock.s.assert_called_once()
Esempio n. 2
0
def test_migrate_records_with_all_makes_records_references_process_disabled(
        proecess_references_mock, base_app, db, es_clear, datadir,
        create_record):
    record_fixture_path = pkg_resources.resource_filename(
        __name__, os.path.join("fixtures", "dummy.xml"))
    populate_mirror_from_file(record_fixture_path)

    migrate_from_mirror(also_migrate="all")
    proecess_references_mock.assert_not_called()
Esempio n. 3
0
def test_migrate_record_from_specified_date_only(inspire_app):
    raw_record_1 = (
        b"<record>"
        b'  <controlfield tag="001">666</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">On the validity of INSPIRE records</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b"</record>"
    )

    raw_record_2 = (
        b"<record>"
        b'  <controlfield tag="001">667</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">On the validity of INSPIRE records</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b"</record>"
    )

    prod_record_1 = LegacyRecordsMirror.from_marcxml(raw_record_1)
    prod_record_1.last_updated = datetime.datetime(2010, 1, 1)
    db.session.merge(prod_record_1)

    prod_record_2 = LegacyRecordsMirror.from_marcxml(raw_record_2)
    prod_record_1.last_updated = datetime.datetime(2020, 1, 1)
    db.session.merge(prod_record_2)

    migrate_from_mirror(date_from="2015-01-01")

    rec_1 = LegacyRecordsMirror.query.filter_by(recid=666).one()
    rec_2 = LegacyRecordsMirror.query.filter_by(recid=667).one()
    # Only record 2 should be valid as rec_1 migration should not run.
    assert rec_1.valid is None
    assert rec_2.valid is True

    with pytest.raises(PIDDoesNotExistError):
        LiteratureRecord.get_record_by_pid_value("666")
    assert LiteratureRecord.get_record_by_pid_value("667")
Esempio n. 4
0
def continuous_migration():
    """Task to continuously migrate what is pushed up by Legacy."""
    # XXX: temp redis url when we use continuous migration in kb8s
    redis_url = current_app.config.get("MIGRATION_REDIS_URL")
    if redis_url is None:
        redis_url = current_app.config.get("CACHE_REDIS_URL")

    LOGGER.debug("Connected to REDIS", redis_url=redis_url)

    r = StrictRedis.from_url(redis_url)
    lock = Lock(r, "continuous_migration", expire=120, auto_renewal=True)

    message = _next_message(r)
    if not message:
        LOGGER.debug("No records to migrate.")
        return

    if not lock.acquire(blocking=False):
        LOGGER.info("Continuous_migration already executed. Skipping.")
        return

    try:
        num_of_records = r.llen(QUEUE)
        LOGGER.info("Starting migration of records.",
                    records_total=num_of_records)

        while message:
            if message == b"END":
                r.lpop(QUEUE)
                task = migrate_from_mirror(disable_orcid_push=False)
                wait_for_all_tasks(task)
                LOGGER.info("Migration finished.")
                break
            raw_record = zlib.decompress(message)
            (recid, ) = insert_into_mirror([raw_record])
            LOGGER.debug("Inserted record into mirror.", recid=recid)
            r.lpop(QUEUE)
            message = _next_message(r)
        else:
            LOGGER.info("Waiting for more records...")
    finally:
        lock.release()
Esempio n. 5
0
def continuous_migration():
    """Task to continuously migrate what is pushed up by Legacy."""
    # XXX: temp redis url when we use continuous migration in kb8s
    redis_url = current_app.config.get("MIGRATION_REDIS_URL")
    queue = "continuous_migration"

    if redis_url is None:
        redis_url = current_app.config.get("CACHE_REDIS_URL")

    LOGGER.debug("Connected to REDIS", redis_url=redis_url, queue=queue)

    r = StrictRedis.from_url(redis_url)
    lock = Lock(r, queue, expire=120, auto_renewal=True)

    if lock.acquire(blocking=False):
        try:
            migrated_records = None
            num_of_records = r.llen("legacy_records")
            LOGGER.info(f"Starting migration of {num_of_records} records.")

            while r.llen("legacy_records"):
                raw_record = r.lrange("legacy_records", 0, 0)
                if raw_record:
                    migrated_records = insert_into_mirror(
                        [zlib.decompress(raw_record[0])]
                    )
                    LOGGER.debug(f"Migrated {len(migrated_records)} records.")
                r.lpop("legacy_records")
        finally:
            if migrated_records:
                task = migrate_from_mirror(disable_orcid_push=False)
                wait_for_all_tasks(task)
            lock.release()
            LOGGER.info("Migration terminated.")
    else:
        LOGGER.info("Continuous_migration already executed. Skipping.")
Esempio n. 6
0
def test_migrate_recids_from_mirror_all_only_with_literature_author_and_invalid(
        inspire_app, celery_app_with_context, celery_session_worker):
    raw_record_citer = (
        b"<record>"
        b'  <controlfield tag="001">666</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">This is a citer record</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b'   <datafield tag="999" ind1="C" ind2="5">'
        b'    <subfield code="0">667</subfield>'
        b'    <subfield code="h">Achasov, M.N.</subfield>'
        b'    <subfield code="k">snd-2018</subfield>'
        b'    <subfield code="m">(SND Collaboration)</subfield>'
        b'    <subfield code="o">2</subfield>'
        b'    <subfield code="s">Phys.Rev.,D97,012008</subfield>'
        b'    <subfield code="x">'
        b"    [2] M. N. Achasov (SND Collaboration), Phys. Rev. D 97, 012008 (2018)."
        b"    </subfield>"
        b'    <subfield code="y">2018</subfield>'
        b'    <subfield code="z">0</subfield>'
        b'    <subfield code="z">1</subfield>'
        b"    </datafield>"
        b"</record>")
    valid_record_literature_citer = LegacyRecordsMirror.from_marcxml(
        raw_record_citer)
    citer_control_number = 666

    db.session.add(valid_record_literature_citer)

    raw_record_citing = (
        b"<record>"
        b'  <controlfield tag="001">667</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">This is a citing record</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b"</record>")

    valid_record_literature_citing = LegacyRecordsMirror.from_marcxml(
        raw_record_citing)
    citing_control_number = 667
    db.session.add(valid_record_literature_citing)

    raw_record_invalid = (
        b"<record>"
        b'  <controlfield tag="001">668</controlfield>'
        b'  <datafield tag="260" ind1=" " ind2=" ">'
        b'    <subfield code="c">Definitely not a date</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b"</record>")
    invalid_record = LegacyRecordsMirror.from_marcxml(raw_record_invalid)
    db.session.add(invalid_record)
    invalid_control_number = 668

    raw_record_author_valid = (
        b"<record>"
        b'  <controlfield tag="001">669</controlfield>'
        b'  <datafield tag="100" ind1=" " ind2=" ">'
        b'    <subfield code="a">Jessica Jones</subfield>'
        b'    <subfield code="q">Jones Jessica</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEPNAMES</subfield>'
        b"  </datafield>"
        b"</record>")

    valid_record_author = LegacyRecordsMirror.from_marcxml(
        raw_record_author_valid)
    db.session.add(valid_record_author)
    author_control_number = 669

    db.session.commit()

    migrate_from_mirror(also_migrate="all")

    def assert_migrator_task():
        record_citer = InspireRecord.get_record_by_pid_value(
            citer_control_number, "lit")
        record_citing = InspireRecord.get_record_by_pid_value(
            citing_control_number, "lit")

        record_author = InspireRecord.get_record_by_pid_value(
            author_control_number, "aut")

        assert record_citing.citation_count == 1

        record_citer_es = InspireSearch.get_record_data_from_es(record_citer)
        result_citer_control_number = record_citer_es["control_number"]

        assert citer_control_number == result_citer_control_number

        record_citing_es = InspireSearch.get_record_data_from_es(record_citing)
        result_citing_control_number = record_citing_es["control_number"]

        assert citing_control_number == result_citing_control_number

        record_author_es = InspireSearch.get_record_data_from_es(record_author)
        result_author_control_number = record_author_es["control_number"]

        assert author_control_number == result_author_control_number

        with pytest.raises(PIDDoesNotExistError):
            InspireRecord.get_record_by_pid_value(invalid_control_number,
                                                  "lit")

    retry_until_pass(assert_migrator_task)
Esempio n. 7
0
def test_migrate_recids_from_mirror_all_only_with_literature(
        app, celery_app_with_context, celery_session_worker):
    raw_record_citer = (
        b"<record>"
        b'  <controlfield tag="001">666</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">This is a citer record</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b'   <datafield tag="999" ind1="C" ind2="5">'
        b'    <subfield code="0">667</subfield>'
        b'    <subfield code="h">Achasov, M.N.</subfield>'
        b'    <subfield code="k">snd-2018</subfield>'
        b'    <subfield code="m">(SND Collaboration)</subfield>'
        b'    <subfield code="o">2</subfield>'
        b'    <subfield code="s">Phys.Rev.,D97,012008</subfield>'
        b'    <subfield code="x">'
        b"    [2] M. N. Achasov (SND Collaboration), Phys. Rev. D 97, 012008 (2018)."
        b"    </subfield>"
        b'    <subfield code="y">2018</subfield>'
        b'    <subfield code="z">0</subfield>'
        b'    <subfield code="z">1</subfield>'
        b"    </datafield>"
        b"</record>")
    valid_record_literature_citer = LegacyRecordsMirror.from_marcxml(
        raw_record_citer)
    citer_control_number = 666

    db.session.add(valid_record_literature_citer)

    raw_record_citing = (
        b"<record>"
        b'  <controlfield tag="001">667</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">This is a citing record</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b"</record>")

    valid_record_literature_citing = LegacyRecordsMirror.from_marcxml(
        raw_record_citing)
    citing_control_number = 667
    db.session.add(valid_record_literature_citing)
    db.session.commit()

    migrate_from_mirror(also_migrate="all")

    # I don't like timeouts, it's the only way to wait for this chain
    time.sleep(5)

    record_citer = InspireRecord.get_record_by_pid_value(
        citer_control_number, "lit")
    record_citing = InspireRecord.get_record_by_pid_value(
        citing_control_number, "lit")

    assert record_citing.citation_count == 1

    record_citer_es = InspireSearch.get_record_data_from_es(record_citer)
    result_citer_control_number = record_citer_es["control_number"]

    assert citer_control_number == result_citer_control_number

    record_citing_es = InspireSearch.get_record_data_from_es(record_citing)
    result_citing_control_number = record_citing_es["control_number"]

    assert citing_control_number == result_citing_control_number