def test_find_location(): auth_id = 6701809842 pubs = ScopusSearch("AU-ID({})".format(auth_id)).results ctry, aid, aff = find_location([str(auth_id)], pubs, 2000, refresh=False) assert_equal(ctry, "Germany") assert_equal(aid, "60028717") assert_equal(aff, "University of Munich")
def test_retrieve_author_info_authoryear(): make_database(test_cache, drop=True) conn = connect_database(test_cache) # Variables table = "author_year" expected_auth = [53164702100, 57197093438] search_auth = [55317901900] year = 2016 df2 = pd.DataFrame(expected_auth + search_auth, columns=["auth_id"], dtype="int64") df2["year"] = year # Insert data fill = robust_join(expected_auth, sep=') OR AU-ID(') q = f"(AU-ID({fill})) AND PUBYEAR BEF {year+1}" d = build_dict(ScopusSearch(q, refresh=refresh).results, expected_auth) expected = pd.DataFrame.from_dict(d, orient="index", dtype="int64") expected = expected.sort_index().rename_axis('auth_id').reset_index() expected["year"] = year expected = expected[[ 'auth_id', 'year', 'first_year', 'n_pubs', 'n_coauth' ]] insert_data(expected, conn, table=table) # Retrieve data incache, missing = retrieve_author_info(df2, conn, table) assert_frame_equal(incache, expected) assert_equal(missing['auth_id'].tolist(), search_auth) assert_equal(missing['year'].tolist(), [year])
def performSearch(self, searchWords): # Create Search-String # Searching in TITLE-ABStract-KEYwords is the default search mode on scopus searchString = 'TITLE-ABS-KEY(' for i, word in enumerate(searchWords): searchString = searchString + word if (i != len(searchWords)-1): searchString = searchString + ' AND ' #Last Item else: searchString = searchString + ')' self.searchResult = ScopusSearch(searchString) self.searchWords = searchWords self.storeResultsInDB()
def create_obj(params): if q_type == "author": return AuthorSearch(**params) elif q_type == "docs": params["integrity_fields"] = fields params["view"] = view return ScopusSearch(**params)
def test_expand_affiliation(): pubs = ScopusSearch(f"AU-ID(6701809842)", refresh=refresh).results res = pd.DataFrame(pubs) res = expand_affiliation(res) assert_true(len(res) >= 180) expect_columns = ['source_id', 'author_ids', 'afid'] assert_equal(set(res.columns), set(expect_columns)) assert_true(any(res['author_ids'].str.contains(";"))) assert_true(all(isinstance(x, (int, float)) for x in res['afid'].unique()))
def test_expand_affiliation(): auth_id = 6701809842 pubs = ScopusSearch("AU-ID({})".format(auth_id)).results res = pd.DataFrame(pubs) res = expand_affiliation(res) assert_equal(len(res), 185) expect_columns = ['source_id', 'author_ids', 'afid'] assert_equal(set(res.columns.tolist()), set(expect_columns)) assert_true(any(res.author_ids.str.contains(";"))) assert_false(any(res.afid.str.contains(";")))
def get_data_from_doi(self, doi, title): id = None affil = None pub_name = None pub_type = None # try: try: doi_doc = ScopusSearch(doi, subscriber=False) if 'pubmed-id' in doi_doc._json[0].keys(): id = doi_doc._json[0]["pubmed-id"] if 'affiliation' in doi_doc._json[0].keys(): affil = doi_doc._json[0]['affiliation'] pub_name = doi_doc._json[0]['prism:publicationName'] pub_type = doi_doc._json[0]['subtypeDescription'] except: print("failed with scopus") if id == None: doi_doc = FullDoc(doi=doi) if doi_doc.read(self.client): # print("doi_doc.title: ", doi_doc.title) doi_doc.write() pub_name = doi_doc.data['coredata']['prism:publicationName'] if 'pubType' in doi_doc.data['coredata'].keys(): pub_type = str(doi_doc.data['coredata']['pubType']).strip() else: print( "Read document failed. no id for doi {}. trying with title" .format(doi)) doi_doc = None # return doi, affil id = None if doi_doc == None or (not 'pubmed-id' in doi_doc._data.keys()): print("trying with title") # try with title Entrez.email = '*****@*****.**' if doi_doc == None: query = title else: query = doi_doc.title handle = Entrez.esearch(db='pubmed', retmode='xml', term=query) results = Entrez.read(handle) if int(results['Count']) > 0: id = results['IdList'] else: id = doi_doc._data['pubmed-id'] if id != None: return self.fetch_data_from_pubmed(id), affil, pub_name, pub_type else: print("no pubmed id found for doi {}".format(doi)) return doi, affil, pub_name, pub_type
def query_scopus_by_doi(doi, verbose=True): """ get crossref records by paper doi :param doi: (str) doi of a paper :param verbose: (bool) print diagnosis message or not :return: (dict) result from crossref api """ # goal scopus_results = None # query crossref query_results = ScopusSearch('DOI({})'.format(doi), max_entries=None, cursor=True) # filter out empty query results if query_results.results is not None: scopus_results = query_results.results[0]._asdict() else: warnings.warn( 'Empty result from scopus when searching doi: {}'.format(doi)) return scopus_results
AUTHORS = {} meta_authors = pubtrack.meta_author.get()['results'] for meta_author in meta_authors: for author in meta_author['authors']: if author['scopus_id']: full_name = '{} {}'.format(author['first_name'], author['last_name']) AUTHORS[author['scopus_id']] = full_name logger.info(' * Adding author {}({}) to be processed'.format(full_name, author['scopus_id'])) logger.info('==> Processing total of {} authors'.format(len(AUTHORS))) DATE_LIMIT = datetime.datetime(year=SINCE, month=1, day=1) for author_id, full_name in AUTHORS.items(): publication_count = 0 search = ScopusSearch(f'AU-ID ( {author_id} )') logger.info('STARTING SEARCH FOR AUTHOR {}({})'.format(full_name, author_id)) for result in search.results: # We'll only take publications, which have a DOI if result.doi is None: continue # Requesting the detailed information from the scopus database for the current publication from the search # results try: abstract_retrieval = AbstractRetrieval(result.doi) logger.info(' * FETCHED publication {}'.format(result.doi)) except Exception as e: logger.error(' ! Could not retrieve scopus abstract for DOI "{}". ERROR: {}'.format(result.doi, str(e)))