Exemple #1
0
    def get_by_eid(self, eid):
        """Return the h-index of an author by a EID if found, None otherwise.
        """

        au = AuthorSearch('AU-ID({})'.format(eid))
        if au.get_results_size() == 0:
            return None

        assert au.get_results_size() == 1
        res = AuthorRetrieval(au.authors[0][0])
        return res.h_index
Exemple #2
0
    def get_by_name(self, first, last):
        """Return a table of EID, affiliation, town, country otherwise."""

        au = AuthorSearch("AUTHLAST({}) and AUTHFIRST({})".format(last, first))

        if au.get_results_size() == 0:
            return None

        df = pd.DataFrame(au.authors)
        ret = []
        for x in zip(df["eid"], df["affiliation"], df["city"], df["country"]):
            tokens = x[0].split("-")
            ret.append([tokens[-1], x[1], x[2], x[3]])

        return ret
Exemple #3
0
def test_authors_in_cache():
    create_cache(drop=True, file=test_cache)
    # Variables
    expected_auth = ["53164702100", "57197093438"]
    search_auth = ["55317901900"]
    # Test empty cache
    df1 = pd.DataFrame(expected_auth, columns=["auth_id"], dtype="int64")
    incache, tosearch = authors_in_cache(df1, file=test_cache)
    expected_cols = ['auth_id', 'eid', 'surname', 'initials', 'givenname',
                     'affiliation', 'documents', 'affiliation_id', 'city',
                     'country', 'areas']
    expected_auth = [int(au) for au in expected_auth]
    assert_equal(tosearch, expected_auth)
    assert_equal(len(incache), 0)
    assert_equal(incache.columns.tolist(), expected_cols)
    # Test partial retrieval
    q = "AU-ID({})".format(') OR AU-ID('.join([str(a) for a in expected_auth]))
    res = pd.DataFrame(AuthorSearch(q).authors, dtype="int64")
    res["auth_id"] = res["eid"].str.split("-").str[-1]
    res = res[expected_cols]
    cache_insert(res, table="authors", file=test_cache)
    df2 = pd.DataFrame(expected_auth + search_auth, columns=["auth_id"],
                       dtype="int64")
    incache, tosearch = authors_in_cache(df2, file=test_cache)
    assert_equal(tosearch, [55317901900])
    assert_equal(len(incache), 2)
    # Test full retrieval
    incache, tosearch = authors_in_cache(df1, file=test_cache)
    assert_equal(tosearch, [])
    assert_equal(len(incache), 2)
Exemple #4
0
 def create_obj(params):
     if q_type == "author":
         return AuthorSearch(**params)
     elif q_type == "docs":
         params["integrity_fields"] = fields
         params["view"] = view
         return ScopusSearch(**params)
Exemple #5
0
    def get_by_name(self, first, last):
        """Return the h-index of an author if there is only one matching, None if none is
        found, or a table with EID, affiliation, town, country otherwise.
        """

        au = AuthorSearch('AUTHLAST({}) and AUTHFIRST({})'.format(last, first))

        if au.get_results_size() == 0:
            return [None, False]

        elif au.get_results_size() == 1:
            res = AuthorRetrieval(au.authors[0][0])
            return [res.h_index, False]

        else:
            df = pd.DataFrame(au.authors)
            ret = []
            for x in zip(df['eid'], df['affiliation'], df['city'],
                         df['country']):
                tokens = x[0].split('-')
                ret.append([tokens[-1], x[1], x[2], x[3]])
            return [ret, True]
Exemple #6
0
def test_retrieve_authors_insert():
    make_database(test_cache, drop=True)
    conn = connect_database(test_cache)
    # Variables
    expected_auth = [53164702100, 57197093438]
    search_auth = [55317901900]
    expected_cols = [
        'auth_id', 'eid', 'surname', 'initials', 'givenname', 'affiliation',
        'documents', 'affiliation_id', 'city', 'country', 'areas'
    ]
    # Insert data
    q = f"AU-ID({robust_join(expected_auth, sep=') OR AU-ID(')})"
    res = pd.DataFrame(AuthorSearch(q, refresh=refresh).authors, dtype="int64")
    res["auth_id"] = res["eid"].str.split("-").str[-1]
    res = res[expected_cols]
    insert_data(res, conn, table="authors")
    # Retrieve data
    df = pd.DataFrame(expected_auth + search_auth,
                      columns=["auth_id"],
                      dtype="int64")
    incache, missing = retrieve_authors(df, conn)
    assert_equal(incache.shape[0], 2)
    assert_equal(missing, [55317901900])
 def search_data(self):
     pdb.set_trace()
     s = AuthorSearch('AUTHLAST(Selten) and AUTHFIRST(Reinhard)',
                      refresh=True)
     pdb.set_trace()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Tests for `scopus.AuthorSearch` module."""

from collections import namedtuple
from nose.tools import assert_equal, assert_true

from pybliometrics.scopus import AuthorSearch

s1 = AuthorSearch('authlast(selten) and authfirst(reinhard)', refresh=True)
s2 = AuthorSearch('authlast(selten)', download=False)


def test_authors():
    order = 'eid surname initials givenname affiliation documents '\
            'affiliation_id city country areas'
    Author = namedtuple('Author', order)
    expected = [
        Author(eid='9-s2.0-6602907525',
               surname='Selten',
               initials='R.',
               givenname='Reinhard',
               affiliation='Universität Bonn',
               documents='74',
               affiliation_id='60007493',
               city='Bonn',
               country='Germany',
               areas='ECON (73); MATH (19); BUSI (16)')
    ]
    assert_equal(s1.authors, expected)
Exemple #9
0
def base_query(q_type, query, refresh=False, fields=None, size_only=False):
    """Wrapper function to perform a particular search query.

    Parameters
    ----------
    q_type : str
        Determines the query search that will be used.  Allowed values:
        "author", "docs".

    query : str
        The query string.

    refresh : bool (optional, default=False)
        Whether to refresh cached files if they exist, or not.

    fields : list of field names (optional, default=None)
        Fields in the Scopus query that must always present.  To be passed
         onto pybliometrics.scopus.ScopusSearch.  Will be ignored
         when q_type = "author".

    size_only : bool (optional, default=False)
        Whether to not download results and return the number
        of results instead.

    tsleep: float
        Seconds to wait in case of failure due to errors.

    Returns
    -------
    res : list of namedtuples (if size_only is False) or int
        Documents represented by namedtuples as returned from scopus or
        number of search results.

    Raises
    ------
    ValueError:
        If q_type is none of the allowed values.
    """
    params = {"query": query, "refresh": refresh, "download": not size_only}
    # Download query until server is available
    try:
        if q_type == "author":
            obj = AuthorSearch(**params)
        elif q_type == "docs":
            params["integrity_fields"] = fields
            obj = ScopusSearch(**params)
    except (AttributeError, Scopus500Error, KeyError, HTTPError):
        # exception of all errors here has to be maintained due to the
        # occurrence of not replicable errors (e.g. 'cursor', HTTPError)
        sleep(2.0)
        return base_query(q_type,
                          query,
                          refresh=True,
                          fields=None,
                          size_only=size_only)
    if size_only:
        return obj.get_results_size()
    # Parse results, refresh once if integrity check fails or when server
    # sends bad results (in this case pause querying for a while)
    try:
        if q_type == "author":
            res = obj.authors or []
        elif q_type == "docs":
            res = obj.results or []
    except (AttributeError, Scopus500Error, KeyError, HTTPError):
        # exception of all errors here has to be maintained due to the
        # occurrence of not replicable errors (e.g. 'cursor', HTTPError)
        return base_query(q_type,
                          query,
                          refresh=True,
                          fields=None,
                          size_only=size_only)
    return res
Exemple #10
0
def base_query(q_type,
               query,
               refresh=False,
               view="COMPLETE",
               fields=None,
               size_only=False):
    """Wrapper function to perform a particular search query.

    Parameters
    ----------
    q_type : str
        Determines the query search that will be used.  Allowed values:
        "author", "docs".

    query : str
        The query string.

    refresh : bool (optional, default=False)
        Whether to refresh cached files if they exist, or not.

    fields : list of field names (optional, default=None)
        Fields in the Scopus query that must always present.  To be passed
        onto pybliometrics.scopus.ScopusSearch.  Will be ignored
        when q_type = "author".

    size_only : bool (optional, default=False)
        Whether to not download results and return the number
        of results instead.

    Returns
    -------
    res : list of namedtuples (if size_only is False) or int
        Documents represented by namedtuples as returned from scopus or
        number of search results.

    Raises
    ------
    ValueError:
        If q_type is none of the allowed values.
    """

    from pybliometrics.scopus import AuthorSearch, ScopusSearch

    params = {"query": query, "refresh": refresh, "download": not size_only}

    if q_type == "author":
        au = AuthorSearch(**params)
        if size_only:
            return au.get_results_size()
        else:
            return au.authors or []
    elif q_type == "docs":
        params["integrity_fields"] = fields
        params["view"] = view
        if size_only:
            return ScopusSearch(**params).get_results_size()
        try:
            return ScopusSearch(**params).results or []
        except AttributeError:
            params.pop("integrity_fields")
            params["refresh"] = True
            return ScopusSearch(**params).results or []
Exemple #11
0
        names = []
        largest_name = 0
        largest_title = 0
        with open(args.input, "r") as infile:
            for line in infile:
                tokens = line.rstrip().split(";")
                if len(tokens) == 4 and tokens[3]:
                    initials = []
                    for firstname in tokens[1].split(" "):
                        initials.append(firstname[0] + ".")

                    hindex = 0
                    if args.fake:
                        hindex = int(random.expovariate(1 / 20.0))
                    else:
                        au = AuthorSearch(f"AU-ID({tokens[3]})")
                        if au.get_results_size() > 0:
                            assert au.get_results_size() == 1
                            hindex = int(
                                AuthorRetrieval(au.authors[0][0]).h_index)

                    fullname = tokens[0] + " " + " ".join(initials)
                    largest_name = max(largest_name, len(fullname))
                    largest_title = max(largest_title, len(tokens[2]))

                    names.append([fullname, tokens[2], hindex])

        for name in sorted(names, key=itemgetter(2), reverse=True):
            print(
                f'{name[0] + " " * (largest_name - len(name[0]))} ({name[1]}) {" " * (largest_title - len(name[1]))} {"▇" * name[2]} {name[2]}'
            )
with open('cis_academics.csv', 'r') as readFile:
    reader = csv.reader(readFile, delimiter=',')
    for row in reader:
        if first_line_flag:
            first_line_flag = False
            continue

        given_name = row[0]
        family_name = row[1]
        orcid_id = row[2]
        print(family_name + " " + given_name)

        if len(orcid_id) > 1:
            print("orcid:" + orcid_id)
            authorSearch = AuthorSearch('ORCID(' + orcid_id + ')')
            authors = authorSearch.authors

        if authors == None:
            print("no result with orcid!")
            authorSearch = AuthorSearch('AUTHLAST(' + family_name +
                                        ') and AUTHFIRST(' + given_name +
                                        ') and AFFIL(University)')
            authors = authorSearch.authors

        if authors == None:
            print("no result with first")
            authorSearch = AuthorSearch('AUTHLAST(' + given_name +
                                        ') and AUTHFIRST(' + family_name +
                                        ') and AFFIL(University)')
            authors = authorSearch.authors