def test_migrate_records_with_all_makes_records_references_process_enabled( proecess_references_mock, inspire_app): record_fixture_path = pkg_resources.resource_filename( __name__, os.path.join("fixtures", "dummy.xml")) populate_mirror_from_file(record_fixture_path) migrate_from_mirror() proecess_references_mock.s.assert_called_once()
def test_migrate_records_with_all_makes_records_references_process_disabled( proecess_references_mock, base_app, db, es_clear, datadir, create_record): record_fixture_path = pkg_resources.resource_filename( __name__, os.path.join("fixtures", "dummy.xml")) populate_mirror_from_file(record_fixture_path) migrate_from_mirror(also_migrate="all") proecess_references_mock.assert_not_called()
def test_migrate_record_from_specified_date_only(inspire_app): raw_record_1 = ( b"<record>" b' <controlfield tag="001">666</controlfield>' b' <datafield tag="245" ind1=" " ind2=" ">' b' <subfield code="a">On the validity of INSPIRE records</subfield>' b" </datafield>" b' <datafield tag="980" ind1=" " ind2=" ">' b' <subfield code="a">HEP</subfield>' b" </datafield>" b"</record>" ) raw_record_2 = ( b"<record>" b' <controlfield tag="001">667</controlfield>' b' <datafield tag="245" ind1=" " ind2=" ">' b' <subfield code="a">On the validity of INSPIRE records</subfield>' b" </datafield>" b' <datafield tag="980" ind1=" " ind2=" ">' b' <subfield code="a">HEP</subfield>' b" </datafield>" b"</record>" ) prod_record_1 = LegacyRecordsMirror.from_marcxml(raw_record_1) prod_record_1.last_updated = datetime.datetime(2010, 1, 1) db.session.merge(prod_record_1) prod_record_2 = LegacyRecordsMirror.from_marcxml(raw_record_2) prod_record_1.last_updated = datetime.datetime(2020, 1, 1) db.session.merge(prod_record_2) migrate_from_mirror(date_from="2015-01-01") rec_1 = LegacyRecordsMirror.query.filter_by(recid=666).one() rec_2 = LegacyRecordsMirror.query.filter_by(recid=667).one() # Only record 2 should be valid as rec_1 migration should not run. assert rec_1.valid is None assert rec_2.valid is True with pytest.raises(PIDDoesNotExistError): LiteratureRecord.get_record_by_pid_value("666") assert LiteratureRecord.get_record_by_pid_value("667")
def continuous_migration(): """Task to continuously migrate what is pushed up by Legacy.""" # XXX: temp redis url when we use continuous migration in kb8s redis_url = current_app.config.get("MIGRATION_REDIS_URL") if redis_url is None: redis_url = current_app.config.get("CACHE_REDIS_URL") LOGGER.debug("Connected to REDIS", redis_url=redis_url) r = StrictRedis.from_url(redis_url) lock = Lock(r, "continuous_migration", expire=120, auto_renewal=True) message = _next_message(r) if not message: LOGGER.debug("No records to migrate.") return if not lock.acquire(blocking=False): LOGGER.info("Continuous_migration already executed. Skipping.") return try: num_of_records = r.llen(QUEUE) LOGGER.info("Starting migration of records.", records_total=num_of_records) while message: if message == b"END": r.lpop(QUEUE) task = migrate_from_mirror(disable_orcid_push=False) wait_for_all_tasks(task) LOGGER.info("Migration finished.") break raw_record = zlib.decompress(message) (recid, ) = insert_into_mirror([raw_record]) LOGGER.debug("Inserted record into mirror.", recid=recid) r.lpop(QUEUE) message = _next_message(r) else: LOGGER.info("Waiting for more records...") finally: lock.release()
def continuous_migration(): """Task to continuously migrate what is pushed up by Legacy.""" # XXX: temp redis url when we use continuous migration in kb8s redis_url = current_app.config.get("MIGRATION_REDIS_URL") queue = "continuous_migration" if redis_url is None: redis_url = current_app.config.get("CACHE_REDIS_URL") LOGGER.debug("Connected to REDIS", redis_url=redis_url, queue=queue) r = StrictRedis.from_url(redis_url) lock = Lock(r, queue, expire=120, auto_renewal=True) if lock.acquire(blocking=False): try: migrated_records = None num_of_records = r.llen("legacy_records") LOGGER.info(f"Starting migration of {num_of_records} records.") while r.llen("legacy_records"): raw_record = r.lrange("legacy_records", 0, 0) if raw_record: migrated_records = insert_into_mirror( [zlib.decompress(raw_record[0])] ) LOGGER.debug(f"Migrated {len(migrated_records)} records.") r.lpop("legacy_records") finally: if migrated_records: task = migrate_from_mirror(disable_orcid_push=False) wait_for_all_tasks(task) lock.release() LOGGER.info("Migration terminated.") else: LOGGER.info("Continuous_migration already executed. Skipping.")
def test_migrate_recids_from_mirror_all_only_with_literature_author_and_invalid( inspire_app, celery_app_with_context, celery_session_worker): raw_record_citer = ( b"<record>" b' <controlfield tag="001">666</controlfield>' b' <datafield tag="245" ind1=" " ind2=" ">' b' <subfield code="a">This is a citer record</subfield>' b" </datafield>" b' <datafield tag="980" ind1=" " ind2=" ">' b' <subfield code="a">HEP</subfield>' b" </datafield>" b' <datafield tag="999" ind1="C" ind2="5">' b' <subfield code="0">667</subfield>' b' <subfield code="h">Achasov, M.N.</subfield>' b' <subfield code="k">snd-2018</subfield>' b' <subfield code="m">(SND Collaboration)</subfield>' b' <subfield code="o">2</subfield>' b' <subfield code="s">Phys.Rev.,D97,012008</subfield>' b' <subfield code="x">' b" [2] M. N. Achasov (SND Collaboration), Phys. Rev. D 97, 012008 (2018)." b" </subfield>" b' <subfield code="y">2018</subfield>' b' <subfield code="z">0</subfield>' b' <subfield code="z">1</subfield>' b" </datafield>" b"</record>") valid_record_literature_citer = LegacyRecordsMirror.from_marcxml( raw_record_citer) citer_control_number = 666 db.session.add(valid_record_literature_citer) raw_record_citing = ( b"<record>" b' <controlfield tag="001">667</controlfield>' b' <datafield tag="245" ind1=" " ind2=" ">' b' <subfield code="a">This is a citing record</subfield>' b" </datafield>" b' <datafield tag="980" ind1=" " ind2=" ">' b' <subfield code="a">HEP</subfield>' b" </datafield>" b"</record>") valid_record_literature_citing = LegacyRecordsMirror.from_marcxml( raw_record_citing) citing_control_number = 667 db.session.add(valid_record_literature_citing) raw_record_invalid = ( b"<record>" b' <controlfield tag="001">668</controlfield>' b' <datafield tag="260" ind1=" " ind2=" ">' b' <subfield code="c">Definitely not a date</subfield>' b" </datafield>" b' <datafield tag="980" ind1=" " ind2=" ">' b' <subfield code="a">HEP</subfield>' b" </datafield>" b"</record>") invalid_record = LegacyRecordsMirror.from_marcxml(raw_record_invalid) db.session.add(invalid_record) invalid_control_number = 668 raw_record_author_valid = ( b"<record>" b' <controlfield tag="001">669</controlfield>' b' <datafield tag="100" ind1=" " ind2=" ">' b' <subfield code="a">Jessica Jones</subfield>' b' <subfield code="q">Jones Jessica</subfield>' b" </datafield>" b' <datafield tag="980" ind1=" " ind2=" ">' b' <subfield code="a">HEPNAMES</subfield>' b" </datafield>" b"</record>") valid_record_author = LegacyRecordsMirror.from_marcxml( raw_record_author_valid) db.session.add(valid_record_author) author_control_number = 669 db.session.commit() migrate_from_mirror(also_migrate="all") def assert_migrator_task(): record_citer = InspireRecord.get_record_by_pid_value( citer_control_number, "lit") record_citing = InspireRecord.get_record_by_pid_value( citing_control_number, "lit") record_author = InspireRecord.get_record_by_pid_value( author_control_number, "aut") assert record_citing.citation_count == 1 record_citer_es = InspireSearch.get_record_data_from_es(record_citer) result_citer_control_number = record_citer_es["control_number"] assert citer_control_number == result_citer_control_number record_citing_es = InspireSearch.get_record_data_from_es(record_citing) result_citing_control_number = record_citing_es["control_number"] assert citing_control_number == result_citing_control_number record_author_es = InspireSearch.get_record_data_from_es(record_author) result_author_control_number = record_author_es["control_number"] assert author_control_number == result_author_control_number with pytest.raises(PIDDoesNotExistError): InspireRecord.get_record_by_pid_value(invalid_control_number, "lit") retry_until_pass(assert_migrator_task)
def test_migrate_recids_from_mirror_all_only_with_literature( app, celery_app_with_context, celery_session_worker): raw_record_citer = ( b"<record>" b' <controlfield tag="001">666</controlfield>' b' <datafield tag="245" ind1=" " ind2=" ">' b' <subfield code="a">This is a citer record</subfield>' b" </datafield>" b' <datafield tag="980" ind1=" " ind2=" ">' b' <subfield code="a">HEP</subfield>' b" </datafield>" b' <datafield tag="999" ind1="C" ind2="5">' b' <subfield code="0">667</subfield>' b' <subfield code="h">Achasov, M.N.</subfield>' b' <subfield code="k">snd-2018</subfield>' b' <subfield code="m">(SND Collaboration)</subfield>' b' <subfield code="o">2</subfield>' b' <subfield code="s">Phys.Rev.,D97,012008</subfield>' b' <subfield code="x">' b" [2] M. N. Achasov (SND Collaboration), Phys. Rev. D 97, 012008 (2018)." b" </subfield>" b' <subfield code="y">2018</subfield>' b' <subfield code="z">0</subfield>' b' <subfield code="z">1</subfield>' b" </datafield>" b"</record>") valid_record_literature_citer = LegacyRecordsMirror.from_marcxml( raw_record_citer) citer_control_number = 666 db.session.add(valid_record_literature_citer) raw_record_citing = ( b"<record>" b' <controlfield tag="001">667</controlfield>' b' <datafield tag="245" ind1=" " ind2=" ">' b' <subfield code="a">This is a citing record</subfield>' b" </datafield>" b' <datafield tag="980" ind1=" " ind2=" ">' b' <subfield code="a">HEP</subfield>' b" </datafield>" b"</record>") valid_record_literature_citing = LegacyRecordsMirror.from_marcxml( raw_record_citing) citing_control_number = 667 db.session.add(valid_record_literature_citing) db.session.commit() migrate_from_mirror(also_migrate="all") # I don't like timeouts, it's the only way to wait for this chain time.sleep(5) record_citer = InspireRecord.get_record_by_pid_value( citer_control_number, "lit") record_citing = InspireRecord.get_record_by_pid_value( citing_control_number, "lit") assert record_citing.citation_count == 1 record_citer_es = InspireSearch.get_record_data_from_es(record_citer) result_citer_control_number = record_citer_es["control_number"] assert citer_control_number == result_citer_control_number record_citing_es = InspireSearch.get_record_data_from_es(record_citing) result_citing_control_number = record_citing_es["control_number"] assert citing_control_number == result_citing_control_number