Exemple #1
0
def test_kb_writer_multiple_runs(tmpdir):
    kb_file = tmpdir.join('multiple_flushes.kb')

    with KbWriter(str(kb_file)) as writer:
        writer.add_entry('Journal of Testing', 'J.Testing')

    with KbWriter(str(kb_file)) as writer:
        writer.add_entry('Second Journal of Testing', 'Sec.J.Testing')

    expected = [
        'SECOND JOURNAL OF TESTING---Sec.J.Testing\n',
    ]

    assert expected == kb_file.readlines()
Exemple #2
0
def test_kb_writer_many_lines(tmpdir):
    kb_file = tmpdir.join('many_lines.kb')

    with KbWriter(str(kb_file)) as writer:
        for numlines in xrange(100000):
            writer.add_entry('Journal of Testing', 'J.Testing')

    assert len(kb_file.readlines()) == 100000
Exemple #3
0
def test_kb_writer_unicode(tmpdir):
    kb_file = tmpdir.join('unicode.kb')

    with KbWriter(str(kb_file)) as writer:
        writer.add_entry(u'Journal de l\'Académie', 'J.Acad.')

    expected = [
        'JOURNAL DE L ACADÉMIE---J.Acad.\n',
    ]

    assert expected == kb_file.readlines()
def test_kb_writer_keeps_colons(tmpdir):
    kb_file = tmpdir.join('keeps_colons.kb')

    with KbWriter(str(kb_file)) as writer:
        writer.add_entry('J PHYS G: NUCL PART PHYS', 'J.Phys.')

    expected = [
        'J PHYS G: NUCL PART PHYS---J.Phys.\n',
    ]

    assert expected == kb_file.readlines()
Exemple #5
0
def create_journal_kb_file():
    """Populate refextracts's journal KB from the database.

    Uses two raw DB queries that use syntax specific to PostgreSQL to generate
    a file in the format that refextract expects, that is a list of lines like::

        SOURCE---DESTINATION

    which represents that ``SOURCE`` is translated to ``DESTINATION`` when found.

    Note that refextract expects ``SOURCE`` to be normalized, which means removing
    all non alphanumeric characters, collapsing all contiguous whitespace to one
    space and uppercasing the resulting string.
    """
    refextract_journal_kb_path = current_app.config[
        'REFEXTRACT_JOURNAL_KB_PATH']

    titles_query = db.session.execute("""
        SELECT
            r.json -> 'short_title' AS short_title,
            r.json -> 'journal_title' -> 'title' AS journal_title
        FROM
            records_metadata AS r
        WHERE
            (r.json -> '_collections')::jsonb ? 'Journals'
    """)

    title_variants_query = db.session.execute("""
        SELECT
            r.json -> 'short_title' AS short_title,
            jsonb_array_elements((r.json -> 'title_variants')::jsonb) AS title_variant
        FROM
            records_metadata AS r
        WHERE
            (r.json -> '_collections')::jsonb ? 'Journals'
    """)

    with KbWriter(kb_path=refextract_journal_kb_path) as kb_fd:
        for row in titles_query:
            kb_fd.add_entry(
                value=row['short_title'],
                kb_key=row['short_title'],
            )
            kb_fd.add_entry(
                value=row['journal_title'],
                kb_key=row['short_title'],
            )

        for row in title_variants_query:
            kb_fd.add_entry(
                value=row['title_variant'],
                kb_key=row['short_title'],
            )
Exemple #6
0
def test_kb_writer_two_entries(tmpdir):
    kb_file = tmpdir.join('two_entries.kb')

    with KbWriter(str(kb_file)) as writer:
        writer.add_entry('Journal of Testing', 'J.Testing')
        writer.add_entry('J.Testing', 'J.Testing')

    expected = [
        'JOURNAL OF TESTING---J.Testing\n',
        'J TESTING---J.Testing\n',
    ]

    assert expected == kb_file.readlines()