Esempio n. 1
0
def test_create_records_from_mirror_recids_with_different_types_of_record(inspire_app):
    raw_record_literature_valid = (
        b"<record>"
        b'  <controlfield tag="001">666</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">On the validity of INSPIRE records</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b"</record>"
    )
    valid_record_literature = LegacyRecordsMirror.from_marcxml(
        raw_record_literature_valid
    )
    db.session.add(valid_record_literature)

    raw_record_invalid = (
        b"<record>"
        b'  <controlfield tag="001">667</controlfield>'
        b'  <datafield tag="260" ind1=" " ind2=" ">'
        b'    <subfield code="c">Definitely not a date</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b"</record>"
    )
    invalid_record = LegacyRecordsMirror.from_marcxml(raw_record_invalid)
    db.session.add(invalid_record)

    raw_record_author_valid = (
        b"<record>"
        b'  <controlfield tag="001">668</controlfield>'
        b'  <datafield tag="100" ind1=" " ind2=" ">'
        b'    <subfield code="a">Jessica Jones</subfield>'
        b'    <subfield code="q">Jones Jessica</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEPNAMES</subfield>'
        b"  </datafield>"
        b"</record>"
    )

    valid_record_author = LegacyRecordsMirror.from_marcxml(raw_record_author_valid)
    db.session.add(valid_record_author)

    task_results = create_records_from_mirror_recids([666, 667, 668])
    record_literature = InspireRecord.get_record_by_pid_value(666, "lit")
    assert str(record_literature.id) in task_results

    record_author = InspireRecord.get_record_by_pid_value(668, "aut")
    assert str(record_author.id) in task_results

    with pytest.raises(PIDDoesNotExistError):
        InspireRecord.get_record_by_pid_value(667, "lit")
Esempio n. 2
0
def test_inspire_prod_records_from_marcxml_raises_for_invalid_recid():
    raw_record = """
        <record>
          <controlfield tag="001">foo</controlfield>
          <controlfield tag="005">20171011194718.0</controlfield>
          <datafield tag="100" ind1=" " ind2=" ">
            <subfield code="a">Chetyrkin, K.G.</subfield>
          </datafield>
        </record>
        """

    with pytest.raises(ValueError):
        LegacyRecordsMirror.from_marcxml(raw_record)
def test_create_record_from_mirror_recids_retries_on_timeout_error(
        retry_mock, inspire_app, s3):
    raw_record_literature = (
        b"<record>"
        b'  <controlfield tag="001">666</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">On the validity of INSPIRE records</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b'  <datafield tag="FFT" ind1=" " ind2=" ">'
        b'    <subfield code="a">/opt/cds-invenio/var/data/files/g97/1940001/content.pdf;2</subfield>'
        b'    <subfield code="d"></subfield>'
        b'    <subfield code="f">.pdf</subfield>'
        b'    <subfield code="n">arXiv:1409.0794</subfield>'
        b'    <subfield code="r"></subfield>'
        b'    <subfield code="s">2015-01-12 03:41:58</subfield>'
        b'    <subfield code="v">2</subfield>'
        b'    <subfield code="z"></subfield>'
        b"  </datafield>"
        b"</record>")
    record_literature = LegacyRecordsMirror.from_marcxml(raw_record_literature)
    db.session.add(record_literature)
    with patch.dict(current_app.config,
                    {"FILES_UPLOAD_THREAD_TIMEOUT": 1}), patch.object(
                        current_s3_instance, "is_s3_url") as is_s3_url_mock:

        def sleep_2s(*args):
            sleep(2)

        is_s3_url_mock.side_effect = sleep_2s
        with pytest.raises(Retry):
            create_records_from_mirror_recids([666])
Esempio n. 4
0
def migrate_and_insert_record(raw_record,
                              disable_external_push=False,
                              disable_relations_update=False):
    """Migrate a record and insert it if valid, or log otherwise."""
    prod_record = LegacyRecordsMirror.from_marcxml(raw_record)
    db.session.merge(prod_record)
    return migrate_record_from_mirror(prod_record, disable_external_push,
                                      disable_relations_update)
Esempio n. 5
0
def test_inspire_prod_records_error():
    raw_record = b"""
        <record>
          <controlfield tag="001">12345</controlfield>
          <controlfield tag="005">20171011194718.0</controlfield>
          <datafield tag="100" ind1=" " ind2=" ">
            <subfield code="a">Chetyrkin, K.G.</subfield>
          </datafield>
        </record>
        """

    record = LegacyRecordsMirror(recid="12345", _marcxml=raw_record)
    error = ValueError("This is an error with ùnicode")

    record.error = error

    assert record.error == "ValueError: This is an error with ùnicode"
Esempio n. 6
0
def test_migrating_deleted_record_registers_control_number_with_deleted_status(
        inspire_app, datadir):
    raw_record_xml = (datadir / "dummy_deleted.xml").read_text()
    deleted_record = LegacyRecordsMirror.from_marcxml(raw_record_xml)
    db.session.add(deleted_record)
    create_records_from_mirror_recids([12345])
    pid = PersistentIdentifier.query.filter_by(pid_value="12345").one()
    assert pid.status == PIDStatus.DELETED
Esempio n. 7
0
def insert_into_mirror(raw_records):
    migrated_records = []
    for raw_record in raw_records:
        prod_record = LegacyRecordsMirror.from_marcxml(raw_record)
        db.session.merge(prod_record)
        if prod_record:
            migrated_records.append(prod_record.recid)
    db.session.commit()
    return migrated_records
Esempio n. 8
0
def test_migrate_record_from_specified_date_only(inspire_app):
    raw_record_1 = (
        b"<record>"
        b'  <controlfield tag="001">666</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">On the validity of INSPIRE records</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b"</record>"
    )

    raw_record_2 = (
        b"<record>"
        b'  <controlfield tag="001">667</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">On the validity of INSPIRE records</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b"</record>"
    )

    prod_record_1 = LegacyRecordsMirror.from_marcxml(raw_record_1)
    prod_record_1.last_updated = datetime.datetime(2010, 1, 1)
    db.session.merge(prod_record_1)

    prod_record_2 = LegacyRecordsMirror.from_marcxml(raw_record_2)
    prod_record_1.last_updated = datetime.datetime(2020, 1, 1)
    db.session.merge(prod_record_2)

    migrate_from_mirror(date_from="2015-01-01")

    rec_1 = LegacyRecordsMirror.query.filter_by(recid=666).one()
    rec_2 = LegacyRecordsMirror.query.filter_by(recid=667).one()
    # Only record 2 should be valid as rec_1 migration should not run.
    assert rec_1.valid is None
    assert rec_2.valid is True

    with pytest.raises(PIDDoesNotExistError):
        LiteratureRecord.get_record_by_pid_value("666")
    assert LiteratureRecord.get_record_by_pid_value("667")
Esempio n. 9
0
def test_migrating_deleted_record_registers_control_number_regression(inspire_app):
    raw_deleted_record = b'<record>\n  <controlfield tag="001">1775082</controlfield>\n  <controlfield tag="005">20200131230810.0</controlfield>\n  <datafield tag="856" ind1="4" ind2=" ">\n    <subfield code="u">https://gambit.hepforge.org/</subfield>\n  </datafield>\n  <datafield tag="909" ind1="C" ind2="O">\n    <subfield code="o">oai:inspirehep.net:1775082</subfield>\n    <subfield code="q">INSPIRE:Experiments</subfield>\n  </datafield>\n  <datafield tag="961" ind1=" " ind2=" ">\n    <subfield code="x">2020-01-13</subfield>\n    <subfield code="c">2020-01-31</subfield>\n  </datafield>\n  <datafield tag="980" ind1=" " ind2=" ">\n    <subfield code="a">CORE</subfield>\n  </datafield>\n  <datafield tag="980" ind1=" " ind2=" ">\n    <subfield code="a">EXPERIMENT</subfield>\n  </datafield>\n  <datafield tag="980" ind1=" " ind2=" ">\n    <subfield code="c">DELETED</subfield>\n  </datafield>\n  <datafield tag="710" ind1=" " ind2=" ">\n    <subfield code="g">GAMBIT</subfield>\n  </datafield>\n  <datafield tag="245" ind1=" " ind2=" ">\n    <subfield code="a">GAMBIT : Global And Modular BSM Inference Tool</subfield>\n  </datafield>\n  <datafield tag="372" ind1=" " ind2=" ">\n    <subfield code="9">INSPIRE</subfield>\n    <subfield code="a">9.2</subfield>\n  </datafield>\n  <datafield tag="520" ind1=" " ind2=" ">\n    <subfield code="a">GAMBIT is a global fitting code for generic Beyond the Standard Model theories, designed to allow fast and easy definition of new models, observables, likelihoods, scanners and backend physics codes.</subfield>\n  </datafield>\n  <datafield tag="119" ind1=" " ind2=" ">\n    <subfield code="a">GAMBIT</subfield>\n    <subfield code="c">GAMBIT</subfield>\n    <subfield code="d">GAMBIT</subfield>\n  </datafield>\n</record>'

    deleted_record = LegacyRecordsMirror.from_marcxml(raw_deleted_record)
    db.session.add(deleted_record)

    create_records_from_mirror_recids([1775082])
    pid = PersistentIdentifier.query.filter_by(pid_value="1775082").one()

    assert InspireRecord.get_record_by_pid_value("1775082", "exp")
    assert pid.status == PIDStatus.DELETED
Esempio n. 10
0
def test_inspire_prod_records_from_marcxml():
    raw_record = b"""
        <record>
          <controlfield tag="001">1591551</controlfield>
          <controlfield tag="005">20171011194718.0</controlfield>
          <datafield tag="100" ind1=" " ind2=" ">
            <subfield code="a">Chetyrkin, K.G.</subfield>
          </datafield>
        </record>
        """

    record = LegacyRecordsMirror.from_marcxml(raw_record)

    assert record.recid == 1591551
    assert record.marcxml == raw_record
    assert record.valid is None
    assert record.error is None
Esempio n. 11
0
def test_migrate_recids_from_mirror_all_only_with_literature_author_and_invalid(
        inspire_app, celery_app_with_context, celery_session_worker):
    raw_record_citer = (
        b"<record>"
        b'  <controlfield tag="001">666</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">This is a citer record</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b'   <datafield tag="999" ind1="C" ind2="5">'
        b'    <subfield code="0">667</subfield>'
        b'    <subfield code="h">Achasov, M.N.</subfield>'
        b'    <subfield code="k">snd-2018</subfield>'
        b'    <subfield code="m">(SND Collaboration)</subfield>'
        b'    <subfield code="o">2</subfield>'
        b'    <subfield code="s">Phys.Rev.,D97,012008</subfield>'
        b'    <subfield code="x">'
        b"    [2] M. N. Achasov (SND Collaboration), Phys. Rev. D 97, 012008 (2018)."
        b"    </subfield>"
        b'    <subfield code="y">2018</subfield>'
        b'    <subfield code="z">0</subfield>'
        b'    <subfield code="z">1</subfield>'
        b"    </datafield>"
        b"</record>")
    valid_record_literature_citer = LegacyRecordsMirror.from_marcxml(
        raw_record_citer)
    citer_control_number = 666

    db.session.add(valid_record_literature_citer)

    raw_record_citing = (
        b"<record>"
        b'  <controlfield tag="001">667</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">This is a citing record</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b"</record>")

    valid_record_literature_citing = LegacyRecordsMirror.from_marcxml(
        raw_record_citing)
    citing_control_number = 667
    db.session.add(valid_record_literature_citing)

    raw_record_invalid = (
        b"<record>"
        b'  <controlfield tag="001">668</controlfield>'
        b'  <datafield tag="260" ind1=" " ind2=" ">'
        b'    <subfield code="c">Definitely not a date</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b"</record>")
    invalid_record = LegacyRecordsMirror.from_marcxml(raw_record_invalid)
    db.session.add(invalid_record)
    invalid_control_number = 668

    raw_record_author_valid = (
        b"<record>"
        b'  <controlfield tag="001">669</controlfield>'
        b'  <datafield tag="100" ind1=" " ind2=" ">'
        b'    <subfield code="a">Jessica Jones</subfield>'
        b'    <subfield code="q">Jones Jessica</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEPNAMES</subfield>'
        b"  </datafield>"
        b"</record>")

    valid_record_author = LegacyRecordsMirror.from_marcxml(
        raw_record_author_valid)
    db.session.add(valid_record_author)
    author_control_number = 669

    db.session.commit()

    migrate_from_mirror(also_migrate="all")

    def assert_migrator_task():
        record_citer = InspireRecord.get_record_by_pid_value(
            citer_control_number, "lit")
        record_citing = InspireRecord.get_record_by_pid_value(
            citing_control_number, "lit")

        record_author = InspireRecord.get_record_by_pid_value(
            author_control_number, "aut")

        assert record_citing.citation_count == 1

        record_citer_es = InspireSearch.get_record_data_from_es(record_citer)
        result_citer_control_number = record_citer_es["control_number"]

        assert citer_control_number == result_citer_control_number

        record_citing_es = InspireSearch.get_record_data_from_es(record_citing)
        result_citing_control_number = record_citing_es["control_number"]

        assert citing_control_number == result_citing_control_number

        record_author_es = InspireSearch.get_record_data_from_es(record_author)
        result_author_control_number = record_author_es["control_number"]

        assert author_control_number == result_author_control_number

        with pytest.raises(PIDDoesNotExistError):
            InspireRecord.get_record_by_pid_value(invalid_control_number,
                                                  "lit")

    retry_until_pass(assert_migrator_task)
Esempio n. 12
0
def test_migrate_recids_from_mirror_all_only_with_literature(
        app, celery_app_with_context, celery_session_worker):
    raw_record_citer = (
        b"<record>"
        b'  <controlfield tag="001">666</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">This is a citer record</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b'   <datafield tag="999" ind1="C" ind2="5">'
        b'    <subfield code="0">667</subfield>'
        b'    <subfield code="h">Achasov, M.N.</subfield>'
        b'    <subfield code="k">snd-2018</subfield>'
        b'    <subfield code="m">(SND Collaboration)</subfield>'
        b'    <subfield code="o">2</subfield>'
        b'    <subfield code="s">Phys.Rev.,D97,012008</subfield>'
        b'    <subfield code="x">'
        b"    [2] M. N. Achasov (SND Collaboration), Phys. Rev. D 97, 012008 (2018)."
        b"    </subfield>"
        b'    <subfield code="y">2018</subfield>'
        b'    <subfield code="z">0</subfield>'
        b'    <subfield code="z">1</subfield>'
        b"    </datafield>"
        b"</record>")
    valid_record_literature_citer = LegacyRecordsMirror.from_marcxml(
        raw_record_citer)
    citer_control_number = 666

    db.session.add(valid_record_literature_citer)

    raw_record_citing = (
        b"<record>"
        b'  <controlfield tag="001">667</controlfield>'
        b'  <datafield tag="245" ind1=" " ind2=" ">'
        b'    <subfield code="a">This is a citing record</subfield>'
        b"  </datafield>"
        b'  <datafield tag="980" ind1=" " ind2=" ">'
        b'    <subfield code="a">HEP</subfield>'
        b"  </datafield>"
        b"</record>")

    valid_record_literature_citing = LegacyRecordsMirror.from_marcxml(
        raw_record_citing)
    citing_control_number = 667
    db.session.add(valid_record_literature_citing)
    db.session.commit()

    migrate_from_mirror(also_migrate="all")

    # I don't like timeouts, it's the only way to wait for this chain
    time.sleep(5)

    record_citer = InspireRecord.get_record_by_pid_value(
        citer_control_number, "lit")
    record_citing = InspireRecord.get_record_by_pid_value(
        citing_control_number, "lit")

    assert record_citing.citation_count == 1

    record_citer_es = InspireSearch.get_record_data_from_es(record_citer)
    result_citer_control_number = record_citer_es["control_number"]

    assert citer_control_number == result_citer_control_number

    record_citing_es = InspireSearch.get_record_data_from_es(record_citing)
    result_citing_control_number = record_citing_es["control_number"]

    assert citing_control_number == result_citing_control_number