Example #1
0
def test_sparser_parallel():
    "Test running sparser in parallel."
    db = get_db_with_pubmed_content()
    sparser_reader = SparserReader(n_proc=2)
    tc_list = db.select_all(db.TextContent)
    result = sparser_reader.read([process_content(tc) for tc in tc_list],
                                 verbose=True, log=True)
    N_exp = len(tc_list)
    N_res = len(result)
    assert N_exp == N_res, \
        "Expected to get %d results, but got %d." % (N_exp, N_res)
Example #2
0
def test_sparser_parallel_one_batch():
    "Test that sparser runs with multiple procs with batches of 1."
    db = get_db_with_pubmed_content()
    sparser_reader = SparserReader(n_proc=2)
    tc_list = db.select_all(db.TextContent)
    result = sparser_reader.read([process_content(tc) for tc in tc_list],
                                 verbose=True, n_per_proc=1)
    N_exp = len(tc_list)
    N_res = len(result)
    assert N_exp == N_res, \
        "Expected to get %d results, but got %d." % (N_exp, N_res)
Example #3
0
def test_reading_content_insert():
    "Test the content primary through-put of make_db_readings."
    db = get_db_with_pubmed_content()

    print("Test reading")
    tc_list = db.select_all(db.TextContent)
    readers = get_readers()
    reading_output = []
    for reader in readers:
        reading_output += reader.read([process_content(tc) for tc in tc_list],
                                      verbose=True)
    expected_output_len = len(tc_list)*len(readers)
    assert len(reading_output) == expected_output_len, \
        ("Not all text content successfully read."
         "Expected %d outputs, but got %d.") % (expected_output_len,
                                                len(reading_output))

    print("Test reading insert")
    rdb.upload_readings(reading_output, db=db)
    r_list = db.select_all(db.Readings)

    def is_complete_match(r_list, reading_output):
        return all([any([rd.matches(r) for r in r_list])
                    for rd in reading_output])

    assert is_complete_match(r_list, reading_output), \
        "Not all reading output posted."
    rdb.upload_readings(reading_output, db=db)
    assert is_complete_match(r_list, reading_output), \
        "Uniqueness constraints failed."

    print("Test enrichement")
    assert all([rd.reading_id is None for rd in reading_output]), \
        "No readings should have reading_ids already."
    rdb._enrich_reading_data(reading_output, db=db)
    assert all([rd.reading_id is not None for rd in reading_output]),\
        "Some reading data objects didn't have reading_ids after enrichment."

    print("Test making statements")
    stmts = rdb.produce_statements(reading_output, db=db)
    assert len(stmts), 'No statements created.'
    db_stmts = db.select_all(db.Statements)
    assert len(db_stmts) == len(stmts), \
        "Only %d/%d statements added." % (len(db_stmts), len(stmts))
    assert len(db.select_all(db.Agents)), "No agents added."