Ejemplo n.º 1
0
def test_find_location():
    auth_id = 6701809842
    pubs = ScopusSearch("AU-ID({})".format(auth_id)).results
    ctry, aid, aff = find_location([str(auth_id)], pubs, 2000, refresh=False)
    assert_equal(ctry, "Germany")
    assert_equal(aid, "60028717")
    assert_equal(aff, "University of Munich")
Ejemplo n.º 2
0
def test_retrieve_author_info_authoryear():
    make_database(test_cache, drop=True)
    conn = connect_database(test_cache)
    # Variables
    table = "author_year"
    expected_auth = [53164702100, 57197093438]
    search_auth = [55317901900]
    year = 2016
    df2 = pd.DataFrame(expected_auth + search_auth,
                       columns=["auth_id"],
                       dtype="int64")
    df2["year"] = year
    # Insert data
    fill = robust_join(expected_auth, sep=') OR AU-ID(')
    q = f"(AU-ID({fill})) AND PUBYEAR BEF {year+1}"
    d = build_dict(ScopusSearch(q, refresh=refresh).results, expected_auth)
    expected = pd.DataFrame.from_dict(d, orient="index", dtype="int64")
    expected = expected.sort_index().rename_axis('auth_id').reset_index()
    expected["year"] = year
    expected = expected[[
        'auth_id', 'year', 'first_year', 'n_pubs', 'n_coauth'
    ]]
    insert_data(expected, conn, table=table)
    # Retrieve data
    incache, missing = retrieve_author_info(df2, conn, table)
    assert_frame_equal(incache, expected)
    assert_equal(missing['auth_id'].tolist(), search_auth)
    assert_equal(missing['year'].tolist(), [year])
Ejemplo n.º 3
0
    def performSearch(self, searchWords):
        # Create Search-String
        # Searching in TITLE-ABStract-KEYwords is the default search mode on scopus
        searchString = 'TITLE-ABS-KEY('
        for i, word in enumerate(searchWords):
            searchString = searchString + word
            if (i != len(searchWords)-1):
                searchString = searchString + ' AND '
            #Last Item
            else:   
                searchString = searchString + ')'

        self.searchResult = ScopusSearch(searchString)
        self.searchWords = searchWords

        self.storeResultsInDB()
Ejemplo n.º 4
0
 def create_obj(params):
     if q_type == "author":
         return AuthorSearch(**params)
     elif q_type == "docs":
         params["integrity_fields"] = fields
         params["view"] = view
         return ScopusSearch(**params)
Ejemplo n.º 5
0
def test_expand_affiliation():
    pubs = ScopusSearch(f"AU-ID(6701809842)", refresh=refresh).results
    res = pd.DataFrame(pubs)
    res = expand_affiliation(res)
    assert_true(len(res) >= 180)
    expect_columns = ['source_id', 'author_ids', 'afid']
    assert_equal(set(res.columns), set(expect_columns))
    assert_true(any(res['author_ids'].str.contains(";")))
    assert_true(all(isinstance(x, (int, float)) for x in res['afid'].unique()))
Ejemplo n.º 6
0
def test_expand_affiliation():
    auth_id = 6701809842
    pubs = ScopusSearch("AU-ID({})".format(auth_id)).results
    res = pd.DataFrame(pubs)
    res = expand_affiliation(res)
    assert_equal(len(res), 185)
    expect_columns = ['source_id', 'author_ids', 'afid']
    assert_equal(set(res.columns.tolist()), set(expect_columns))
    assert_true(any(res.author_ids.str.contains(";")))
    assert_false(any(res.afid.str.contains(";")))
Ejemplo n.º 7
0
    def get_data_from_doi(self, doi, title):
        id = None
        affil = None
        pub_name = None
        pub_type = None
        # try:
        try:
            doi_doc = ScopusSearch(doi, subscriber=False)
            if 'pubmed-id' in doi_doc._json[0].keys():
                id = doi_doc._json[0]["pubmed-id"]
            if 'affiliation' in doi_doc._json[0].keys():
                affil = doi_doc._json[0]['affiliation']
            pub_name = doi_doc._json[0]['prism:publicationName']
            pub_type = doi_doc._json[0]['subtypeDescription']
        except:
            print("failed with scopus")
        if id == None:
            doi_doc = FullDoc(doi=doi)
            if doi_doc.read(self.client):
                # print("doi_doc.title: ", doi_doc.title)
                doi_doc.write()
                pub_name = doi_doc.data['coredata']['prism:publicationName']
                if 'pubType' in doi_doc.data['coredata'].keys():
                    pub_type = str(doi_doc.data['coredata']['pubType']).strip()
            else:
                print(
                    "Read document failed. no id for doi {}. trying with title"
                    .format(doi))
                doi_doc = None
                # return doi, affil
            id = None
            if doi_doc == None or (not 'pubmed-id' in doi_doc._data.keys()):
                print("trying with title")
                # try with title
                Entrez.email = '*****@*****.**'
                if doi_doc == None:
                    query = title
                else:
                    query = doi_doc.title
                handle = Entrez.esearch(db='pubmed', retmode='xml', term=query)
                results = Entrez.read(handle)
                if int(results['Count']) > 0:
                    id = results['IdList']
            else:
                id = doi_doc._data['pubmed-id']
        if id != None:
            return self.fetch_data_from_pubmed(id), affil, pub_name, pub_type

        else:
            print("no pubmed id found for doi {}".format(doi))
            return doi, affil, pub_name, pub_type
Ejemplo n.º 8
0
def query_scopus_by_doi(doi, verbose=True):
    """
    get crossref records by paper doi

    :param doi: (str) doi of a paper
    :param verbose: (bool) print diagnosis message or not
    :return: (dict) result from crossref api
    """
    # goal
    scopus_results = None

    # query crossref
    query_results = ScopusSearch('DOI({})'.format(doi),
                                 max_entries=None,
                                 cursor=True)

    # filter out empty query results
    if query_results.results is not None:
        scopus_results = query_results.results[0]._asdict()
    else:
        warnings.warn(
            'Empty result from scopus when searching doi: {}'.format(doi))

    return scopus_results
Ejemplo n.º 9
0
AUTHORS = {}
meta_authors = pubtrack.meta_author.get()['results']
for meta_author in meta_authors:
    for author in meta_author['authors']:
        if author['scopus_id']:
            full_name = '{} {}'.format(author['first_name'], author['last_name'])
            AUTHORS[author['scopus_id']] = full_name
            logger.info(' * Adding author {}({}) to be processed'.format(full_name, author['scopus_id']))
logger.info('==> Processing total of {} authors'.format(len(AUTHORS)))


DATE_LIMIT = datetime.datetime(year=SINCE, month=1, day=1)

for author_id, full_name in AUTHORS.items():
    publication_count = 0
    search = ScopusSearch(f'AU-ID ( {author_id} )')
    logger.info('STARTING SEARCH FOR AUTHOR {}({})'.format(full_name, author_id))

    for result in search.results:

        # We'll only take publications, which have a DOI
        if result.doi is None:
            continue

        # Requesting the detailed information from the scopus database for the current publication from the search
        # results
        try:
            abstract_retrieval = AbstractRetrieval(result.doi)
            logger.info(' * FETCHED publication {}'.format(result.doi))
        except Exception as e:
            logger.error(' ! Could not retrieve scopus abstract for DOI "{}". ERROR: {}'.format(result.doi, str(e)))