Ejemplo n.º 1
0
def test_require_title_and_author(models, mock_hlom):

    """
    Records that don't have both a title and an author should be ignored.
    """

    # No author, no title:
    m1 = mock_hlom.add_marc(title='', author='')

    # Title, no author:
    m2 = mock_hlom.add_marc(title='War and Peace', author='')

    # Author, no title:
    m3 = mock_hlom.add_marc(title='', author='Leo Tolstoy')

    # Title and author:
    m4 = mock_hlom.add_marc(title='War and Peace', author='Leo Tolstoy')

    HLOM_Record.insert_records()

    # Should just insert 1 record.
    assert HLOM_Record.select().count() == 1

    # Should insert the record with title/author.
    assert HLOM_Record.get(
        HLOM_Record.control_number==m4.control_number()
    )
Ejemplo n.º 2
0
Archivo: hlom.py Proyecto: overview/osp
def queue_queries():

    """
    Queue citation extraction queries.
    """

    for record in ServerSide(HLOM_Record.select()):
        config.rq.enqueue(query, record.id)
Ejemplo n.º 3
0
    def copy_records(cls, min_rank=1000):

        """
        Copy in cited records.

        Args:
            min_rank (int): The cutoff for "frequent" words.
        """

        cited = (

            HLOM_Record.select()
            .join(HLOM_Citation)
            .group_by(HLOM_Record.id)

            # Coalesce duplicates.
            .distinct([HLOM_Record.metadata['deduping_hash']])
            .order_by(
                HLOM_Record.metadata['deduping_hash'],
                HLOM_Record.id
            )

        )

        counts = Counts()

        for r in cited:

            t = termify(r.marc.title())
            a = termify(r.marc.author())

            # Title and author empty.
            if not t or not a:
                continue

            # Title and author repeat words.
            if set.intersection(t, a):
                continue

            # No focused words in title.
            if counts.max_rank(t) < min_rank:
                continue

            # No focused words in author.
            if counts.max_rank(a) < min_rank:
                continue

            cls.create(**r._data)
Ejemplo n.º 4
0
def test_insert_records(models, mock_hlom):

    """
    HLOM_Record.insert_records() should create a row for each MARC record.
    """

    records = []

    # 10 segments:
    for i in range(10):

        # 10 records in each:
        for j in range(10):

            marc = mock_hlom.add_marc(
                data_file=str(i),
                title='title',
                author='author'
            )

            records.append(marc)

    # Insert record rows.
    HLOM_Record.insert_records()

    # Should insert 100 records.
    assert HLOM_Record.select().count() == 100

    for marc in records:

        # Pop out the `hlom_record` row.
        row = HLOM_Record.get(
            HLOM_Record.control_number==marc.control_number()
        )

        # Should store the record body.
        assert row.marc.as_marc() == marc.as_marc()