Esempio n. 1
0
def test_read_db():
    "Test the low level make_db_readings functionality with various settings."
    # Prep the inputs.
    db = get_db_with_pubmed_content()
    tcids = {tcid for tcid, in db.select_all(db.TextContent.id)}
    reader = get_readers('SPARSER')[0]

    # Run the reading with default batch size and reading_mode set to 'unread'
    worker1 = rdb.DatabaseReader(tcids, reader, db=db, reading_mode='unread')
    worker1.get_readings()
    N1 = len(worker1.new_readings)
    N1_exp = len(tcids)
    assert N1 == N1_exp, \
        'Expected %d readings, but got %d.' % (N1_exp, N1)
    worker1.dump_readings_to_db()
    N1_db = len(db.select_all(db.Reading))
    assert N1_db == N1, \
        'Expected %d readings to be copied to db, only %d found.' % (N1, N1_db)

    # Run the reading with default batch size, reading_mode set to 'all'. (this
    # should produce new readings.)
    reader.reset()
    worker2 = rdb.DatabaseReader(tcids, reader, db=db, reading_mode='all')
    worker2.get_readings()

    N2_old = len(worker2.extant_readings)
    N2_new = len(worker2.new_readings)
    print(N2_old, N2_new, N1, N1_db)
    assert N2_old == 0,\
        "Got %d old readings despite reading_mode set to 'all'." % N2_old
    assert N1 == N2_new, \
        "Got %d readings from run 1 but %d from run 2." % (N1, N2_new)

    # Run the reading with default batch size, with reading_mode set to
    # 'unread', again. (this should NOT produce new readings.)
    reader.reset()
    worker3 = rdb.DatabaseReader(tcids, reader, db=db, reading_mode='unread')
    worker3.get_readings()

    N_new = len(worker3.new_readings)
    N_old = len(worker3.extant_readings)

    assert N_new == 0,\
        "Got new readings when reading_mode was 'unread' and readings existed."
    assert N_old == N1, \
        ("Missed old readings when reading_mode was 'unread' and readings "
         "existed: expected %d, but got %d." % (N1, N_old))
Esempio n. 2
0
def test_get_content():
    "Test that content queries are correctly formed."
    db = get_db_with_pubmed_content()
    tcids = {tcid for tcid, in db.select_all(db.TextContent.id)}
    readers = get_readers()
    for reader in readers:
        worker = rdb.DatabaseReader(tcids, reader, db=db)

        N_exp = db.filter_query(db.TextContent).count()
        N_1 = sum([1 for _ in worker.iter_over_content()])
        assert N_1 == N_exp,\
            "Expected %d results in our query, got %d." % (N_exp, N_1)

        # Test response to empyt dict.
        worker = rdb.DatabaseReader([], reader, db=db)
        assert not any([c for c in worker.iter_over_content()]), \
            "Expected no results when passing no ids."
Esempio n. 3
0
def test_reading_content_insert():
    "Test the content primary through-put of make_db_readings."
    db = get_db_with_pubmed_content()

    print("Test reading")
    tcids = {tcid for tcid, in db.select_all(db.TextContent.id)}
    readers = get_readers()
    workers = [
        rdb.DatabaseReader(tcids, reader, verbose=True, db=db)
        for reader in readers
    ]
    reading_output = []
    for worker in workers:
        worker.get_readings()

        expected_output_len = len(tcids)
        N_new = len(worker.new_readings)
        reading_output.extend(worker.new_readings)
        assert N_new == expected_output_len, \
            ("Not all text content successfully read by %s."
             "Expected %d outputs, but got %d."
             % (worker.reader.name, expected_output_len, N_new))

    print("Test reading insert")
    for worker in workers:
        worker.dump_readings_to_db()
    r_list = db.select_all(db.Reading)

    def is_complete_match(r_list, reading_output):
        return all(
            [any([rd.matches(r) for r in r_list]) for rd in reading_output])

    assert is_complete_match(r_list, reading_output), \
        "Not all reading output posted."

    print("Test making statements")
    num_stmts = 0
    for worker in workers:
        worker.get_statements()
        num_stmts += len(worker.statement_outputs)

        worker.dump_statements_to_db()

    num_db_sids = db.count(db.RawStatements.id)
    assert num_db_sids == num_stmts, \
        "Only %d/%d statements added." % (num_db_sids, num_stmts)
    assert len(db.select_all(db.RawAgents)), "No agents added."