예제 #1
0
def test_authors_in_cache():
    create_cache(drop=True, file=test_cache)
    # Variables
    expected_auth = ["53164702100", "57197093438"]
    search_auth = ["55317901900"]
    # Test empty cache
    df1 = pd.DataFrame(expected_auth, columns=["auth_id"], dtype="int64")
    incache, tosearch = authors_in_cache(df1, file=test_cache)
    expected_cols = ['auth_id', 'eid', 'surname', 'initials', 'givenname',
                     'affiliation', 'documents', 'affiliation_id', 'city',
                     'country', 'areas']
    expected_auth = [int(au) for au in expected_auth]
    assert_equal(tosearch, expected_auth)
    assert_equal(len(incache), 0)
    assert_equal(incache.columns.tolist(), expected_cols)
    # Test partial retrieval
    q = "AU-ID({})".format(') OR AU-ID('.join([str(a) for a in expected_auth]))
    res = pd.DataFrame(AuthorSearch(q).authors, dtype="int64")
    res["auth_id"] = res["eid"].str.split("-").str[-1]
    res = res[expected_cols]
    cache_insert(res, table="authors", file=test_cache)
    df2 = pd.DataFrame(expected_auth + search_auth, columns=["auth_id"],
                       dtype="int64")
    incache, tosearch = authors_in_cache(df2, file=test_cache)
    assert_equal(tosearch, [55317901900])
    assert_equal(len(incache), 2)
    # Test full retrieval
    incache, tosearch = authors_in_cache(df1, file=test_cache)
    assert_equal(tosearch, [])
    assert_equal(len(incache), 2)
예제 #2
0
def test_sources_afids_in_cache_empty():
    create_cache(drop=True, file=test_cache)
    df = pd.DataFrame(list(product([22900], [2010, 2005])),
                      columns=["source_id", "year"], dtype="int64")
    sa_incache, sa_search = sources_in_cache(df, file=test_cache, afid=True)
    assert_frame_equal(sa_search, df)
    assert_true(sa_incache.empty)
예제 #3
0
def test_sources_afids_in_sources_cache():
    create_cache(drop=True, file=test_cache)
    # Variables
    expected_sources = [22900]
    expected_years = [2010, 2005]
    df = pd.DataFrame(list(product(expected_sources, expected_years)),
                      columns=["source_id", "year"], dtype="int64")
    # Populate cache
    res = query_year(expected_years[0], expected_sources, False, False, afid=True)
    cache_insert(res, table="sources", file=test_cache)
    # Retrieve from cache
    sources_ys_incache, sources_ys_search = sources_in_cache(df, file=test_cache)
    expected_sources = [int(s) for s in expected_sources]
    assert_equal(sources_ys_incache.source_id.tolist(), expected_sources)
    assert_equal(sources_ys_incache.year.tolist(), [expected_years[0]])
    assert_equal(sources_ys_search.source_id.tolist(), expected_sources)
    assert_equal(sources_ys_search.year.tolist(), [expected_years[1]])
예제 #4
0
def test_sources_afids_in_cache_partial():
    create_cache(drop=True, file=test_cache)
    # Variables
    expected_sources = [22900]
    expected_years = [2010, 2005]
    df = pd.DataFrame(list(product(expected_sources, expected_years)),
                      columns=["source_id", "year"], dtype="int64")
    sa_incache, sa_search = sources_in_cache(df, file=test_cache, afid=True)
    # Populate cache
    res = query_year(expected_years[0], expected_sources, False, False, afid=True)
    cache_insert(res, table="sources_afids", file=test_cache)
    # Retrieve from cache
    sa_incache, sa_search = sources_in_cache(df, file=test_cache, afid=True)
    expected_sources = set([int(s) for s in expected_sources])
    assert_equal(set(sa_incache.source_id.tolist()), set(expected_sources))
    assert_equal(set(sa_incache.year.tolist()), set([expected_years[0]]))
    assert_equal(set(sa_search.source_id.tolist()), set(expected_sources))
    assert_equal(set(sa_search.year.tolist()), set([expected_years[1]]))
    expected = range(182-5, 182+5)
    assert_true(len(sa_incache) in expected)
    assert_true(len(sa_incache.afid.drop_duplicates()) in expected)
예제 #5
0
def test_author_year_in_cache():
    create_cache(drop=True, file=test_cache)
    # Variables
    expected_auth = ["53164702100", "57197093438"]
    search_auth = ["55317901900"]
    year = 2016
    # Test empty cache
    df1 = pd.DataFrame(expected_auth, columns=["auth_id"],
                       dtype="int64")
    df1["year"] = year
    auth_y_incache, auth_y_search = author_year_in_cache(df1, file=test_cache)
    assert_frame_equal(auth_y_search, df1)
    assert_equal(len(auth_y_incache), 0)
    # Test partial retrieval
    fill = ') OR AU-ID('.join([str(a) for a in expected_auth])
    q = "(AU-ID({})) AND PUBYEAR BEF {}".format(fill, year+1)
    res = build_dict(ScopusSearch(q).results, expected_auth)
    res = pd.DataFrame.from_dict(res, orient="index", dtype="int64")
    res["year"] = year
    cols = ["year", "first_year", "n_pubs", "n_coauth"]
    res = res[cols].reset_index().rename(columns={"index": "auth_id"})
    cache_insert(res, table="author_year", file=test_cache)
    df2 = pd.DataFrame(expected_auth + search_auth,
                       columns=["auth_id"], dtype="int64")
    df2["year"] = year
    auth_y_incache, auth_y_search = author_year_in_cache(df2, file=test_cache)
    expected_auth = [int(au) for au in expected_auth]
    search_auth = [int(au) for au in search_auth]
    assert_equal(sorted(auth_y_incache.auth_id.tolist()), expected_auth)
    assert_equal(auth_y_incache.year.tolist(), [year, year])
    assert_equal(auth_y_search.auth_id.tolist(), search_auth)
    assert_equal(auth_y_search.year.tolist(), [year])
    # Test full retrieval
    auth_year_incache, auth_year_search = author_year_in_cache(df1, file=test_cache)
    assert_equal(sorted(auth_year_incache.auth_id.tolist()), expected_auth)
    assert_equal(auth_year_incache.year.tolist(), [year, year])
    assert_true(auth_year_search.empty)
예제 #6
0
def test_author_size_in_cache():
    create_cache(drop=True, file=test_cache)
    # Variables
    expected_auth = 53164702100
    expected_years = [2010, 2017]
    pubs1 = 0
    pubs2 = 6
    cols = ["auth_id", "year"]
    df = pd.DataFrame(list(product([expected_auth], expected_years)),
                      columns=cols, dtype="int64")
    # Test empty cache
    size = author_size_in_cache(df, file=test_cache)
    assert_equal(len(size), 0)
    assert_true(isinstance(size, pd.DataFrame))
    # Test adding to and retrieving from cache
    tp1 = (expected_auth, expected_years[0], pubs1)
    cache_insert(tp1, table="author_size", file=test_cache)
    tp2 = (expected_auth, expected_years[1], pubs2)
    cache_insert(tp2, table="author_size", file=test_cache)
    size = author_size_in_cache(df, file=test_cache)
    assert_equal(len(size), 2)
    assert_frame_equal(size[cols], df)
    assert_equal(size[size.year == expected_years[0]]["n_pubs"][0], pubs1)
    assert_equal(size[size.year == expected_years[1]]["n_pubs"][1], pubs2)