def __init__(self, author_id, refresh=False, refresh_aff=False, level=1): """Class to represent a Scopus Author query by the scopus-id. Parameters ---------- author_id : str or int The ID of the author to search for. Optionally expressed as an Elsevier EID (i.e., in the form 9-s2.0-nnnnnnnn). refresh : bool (optional, default=False) Whether to refresh the cached file (if it exists) or not. refresh_aff : bool (optional, default=False) Whether to refresh the cached corresponding affiliation views (if they exist) or not. level : int (optional, default=1) Number of * to print in property __str__. Notes ----- The files are cached in ~/.scopus/author/{author_id} (without eventually leading '9-s2.0-'). """ author_id = str(int(str(author_id).split('-')[-1])) self.level = level qfile = os.path.join(SCOPUS_AUTHOR_DIR, author_id) url = ('https://api.elsevier.com/content/author/' 'author_id/{}').format(author_id) params = {'author_id': author_id, 'view': 'ENHANCED'} self.xml = ET.fromstring(get_content(qfile, url=url, refresh=refresh, params=params))
def __init__(self, aff_id, refresh=False): """Class to represent an Affiliation in Scopus. Parameters ---------- aff_id : str or int The Scopus Affiliation ID. Optionally expressed as an Elsevier EID (i.e., in the form 10-s2.0-nnnnnnnn). refresh : bool (optional, default=False) Whether to refresh the cached file if it exists or not. Notes ----- The files are cached in ~/.scopus/affiliation/{aff_id}. """ if config.getboolean('Warnings', 'Affiliation'): text = config.get('Warnings', 'Text').format('ContentAffiliationRetrieval') warnings.warn(text, DeprecationWarning) config.set('Warnings', 'Affiliation', '0') aff_id = str(int(str(aff_id).split('-')[-1])) qfile = os.path.join(SCOPUS_AFFILIATION_DIR, aff_id) url = ('https://api.elsevier.com/content/affiliation/' 'affiliation_id/{}'.format(aff_id)) self.xml = ET.fromstring(get_content(qfile, url=url, refresh=refresh))
def __init__(self, aff_id, refresh=False): """Class to represent an Affiliation in Scopus. Parameters ---------- aff_id : str or int The Scopus Affiliation ID. Optionally expressed as an Elsevier EID (i.e., in the form 10-s2.0-nnnnnnnn). refresh : bool (optional, default=False) Whether to refresh the cached file if it exists or not. Notes ----- The files are cached in ~/.scopus/affiliation_retrieval/{aff_id}. """ aff_id = str(int(str(aff_id).split('-')[-1])) qfile = join(config.get('Directories', 'ContentAffiliationRetrieval'), aff_id) url = ('https://api.elsevier.com/content/affiliation/' 'affiliation_id/{}'.format(aff_id)) res = get_content(qfile, url=url, refresh=refresh, accept='json') self._json = loads( res.decode('utf-8'))['affiliation-retrieval-response']
def __init__(self, aff_id, refresh=False): """Class to represent an Affiliation in Scopus. Parameters ---------- aff_id : str or int The Scopus Affiliation ID. Optionally expressed as an Elsevier EID (i.e., in the form 10-s2.0-nnnnnnnn). refresh : bool (optional, default=False) Whether to refresh the cached file if it exists or not. Notes ----- The files are cached in ~/.scopus/affiliation/{aff_id}. """ if config.getboolean('Warnings', 'Affiliation'): text = config.get('Warnings', 'Text').format('ContentAffiliationRetrieval') warnings.warn(text, DeprecationWarning) config.set('Warnings', 'Affiliation', '0') aff_id = str(int(str(aff_id).split('-')[-1])) qfile = os.path.join(SCOPUS_AFFILIATION_DIR, aff_id) url = ('https://api.elsevier.com/content/affiliation/' 'affiliation_id/{}'.format(aff_id)) self.xml = ET.fromstring(get_content(qfile, url=url, refresh=refresh))
def __init__(self, aff_id, refresh=False): """Class to represent an Affiliation in Scopus. Parameters ---------- aff_id : str or int The Scopus Affiliation ID. refresh : bool (optional, default=False) Whether to refresh the cached file if it exists or not. Notes ----- The files are cached in ~/.scopus/affiliation/{aff_id}. """ self._affiliation_id = aff_id qfile = os.path.join(SCOPUS_AFFILIATION_DIR, str(aff_id)) url = ('http://api.elsevier.com/content/affiliation/' 'affiliation_id/{}'.format(aff_id)) xml = ET.fromstring(get_content(qfile, url=url, refresh=refresh)) # public url self._url = xml.find('coredata/link[@rel="scopus-affiliation"]') if self._url is not None: self._url = self.url.get('href') self.api_url = get_encoded_text(xml, 'coredata/prism:url') self._nauthors = get_encoded_text(xml, 'coredata/author-count') self._ndocuments = get_encoded_text(xml, 'coredata/document-count') self._name = get_encoded_text(xml, 'affiliation-name') self._address = get_encoded_text(xml, 'address') self._city = get_encoded_text(xml, 'city') self._country = get_encoded_text(xml, 'country')
def __init__(self, eid, start, end=datetime.now().year, refresh=False): """Class to represent the results from a Scopus Citation Overview. See https://api.elsevier.com/documentation/guides/AbstractCitationViews.htm. Parameters ---------- eid : str The EID of the abstract. start : str or int The first year for which the citation count should be loaded end : str or int (optional, default=datetime.now().year) The last year for which the citation count should be loaded. Default is the current year. refresh : bool (optional, default=False) Whether to refresh the cached file if it exists or not. Notes ----- The files are cached in ~/.scopus/citation_overview/{eid}. Your API Key needs to be approved by Elsevier to access this view. """ # Get file content scopus_id = eid.split('0-')[-1] qfile = os.path.join(CITATION_OVERVIEW_DIR, eid) url = "https://api.elsevier.com/content/abstract/citations/{}".format( scopus_id) params = {'scopus_id': scopus_id, 'date': '{}-{}'.format(start, end)} res = get_content(qfile, url=url, refresh=refresh, params=params, accept='json') data = loads(res.decode('utf-8'))['abstract-citations-response'] self.start = int(start) self.end = int(end) # citeInfoMatrix m = data['citeInfoMatrix']['citeInfoMatrixXML']['citationMatrix'][ 'citeInfo'][0] self.citeInfoMatrix = {k.split(":", 1)[-1]: v for k, v in m.items()} # h-index self.hindex = data['h-index'] # identifier-legend l = data['identifier-legend']['identifier'][0] self.identifierlegend = {k.split(":", 1)[-1]: v for k, v in l.items()} # citeColumnTotalXML self.citeColumnTotalXML = data['citeColumnTotalXML'] # not used
def __init__(self, EID, view='META_ABS', refresh=False): """Class to represent the results from a Scopus abstract. Parameters ---------- EID : str The Scopus ID (EID) of an abstract. view : str (optional, default=META_ABS) The view of the file that should be downloaded. Will not take effect for already cached files. Supported values: META, META_ABS, FULL, where FULL includes all information of META_ABS view and META_ABS includes all information of the META view . See https://dev.elsevier.com/guides/AbstractRetrievalViews.htm for details. refresh : bool (optional, default=False) Whether to refresh the cached file if it exists or not. Notes ----- The files are cached in ~/.scopus/xml/{eid}. """ if config.getboolean('Warnings', 'Abstract'): text = config.get('Warnings', 'Text').format('AbstractRetrieval') warnings.warn(text, DeprecationWarning) config.set('Warnings', 'Abstract', '0') allowed_views = ('META', 'META_ABS', 'FULL') if view not in allowed_views: raise ValueError('view parameter must be one of ' + ', '.join(allowed_views)) # Get file content qfile = os.path.join(SCOPUS_XML_DIR, EID) url = "https://api.elsevier.com/content/abstract/eid/{}".format(EID) params = {'view': view} self.xml = ET.fromstring(get_content(qfile, url=url, refresh=refresh, params=params)) # Remove default namespace if present remove = u'{http://www.elsevier.com/xml/svapi/abstract/dtd}' nsl = len(remove) for elem in self.xml.getiterator(): if elem.tag.startswith(remove): elem.tag = elem.tag[nsl:] if self.xml.tag == 'service-error': raise Exception('\n{0}\n{1}'.format(EID, self.xml)) self.coredata = self.xml.find('coredata', ns) self.items = self.xml.find('item', ns)
def __init__(self, EID, view='META_ABS', refresh=False): """Class to represent the results from a Scopus abstract. Parameters ---------- EID : str The Scopus ID (EID) of an abstract. view : str (optional, default=META_ABS) The view of the file that should be downloaded. Will not take effect for already cached files. Supported values: META, META_ABS, FULL, where FULL includes all information of META_ABS view and META_ABS includes all information of the META view . See https://dev.elsevier.com/guides/AbstractRetrievalViews.htm for details. refresh : bool (optional, default=False) Whether to refresh the cached file if it exists or not. Notes ----- The files are cached in ~/.scopus/xml/{eid}. """ if config.getboolean('Warnings', 'Abstract'): text = config.get('Warnings', 'Text').format('AbstractRetrieval') warnings.warn(text, DeprecationWarning) config.set('Warnings', 'Abstract', '0') allowed_views = ('META', 'META_ABS', 'FULL') if view not in allowed_views: raise ValueError('view parameter must be one of ' + ', '.join(allowed_views)) # Get file content qfile = os.path.join(SCOPUS_XML_DIR, EID) url = "https://api.elsevier.com/content/abstract/eid/{}".format(EID) params = {'view': view} self.xml = ET.fromstring( get_content(qfile, url=url, refresh=refresh, params=params)) # Remove default namespace if present remove = u'{http://www.elsevier.com/xml/svapi/abstract/dtd}' nsl = len(remove) for elem in self.xml.getiterator(): if elem.tag.startswith(remove): elem.tag = elem.tag[nsl:] if self.xml.tag == 'service-error': raise Exception('\n{0}\n{1}'.format(EID, self.xml)) self.coredata = self.xml.find('coredata', ns) self.items = self.xml.find('item', ns)
def __init__(self, aff_id, refresh=False): """Class to represent an Affiliation in Scopus. Parameters ---------- aff_id : str or int The Scopus Affiliation ID. Optionally expressed as an Elsevier EID (i.e., in the form 10-s2.0-nnnnnnnn). refresh : bool (optional, default=False) Whether to refresh the cached file if it exists or not. Notes ----- The files are cached in ~/.scopus/affiliation/{aff_id}. """ aff_id = str(int(str(aff_id).split('-')[-1])) qfile = os.path.join(SCOPUS_AFFILIATION_DIR, aff_id) url = ('https://api.elsevier.com/content/affiliation/' 'affiliation_id/{}'.format(aff_id)) xml = ET.fromstring(get_content(qfile, url=url, refresh=refresh)) # coredata self._url = xml.find('coredata/link[@rel="scopus-affiliation"]') _aff_id = get_encoded_text(xml, 'coredata/dc:identifier') self._aff_id = _aff_id.split(":")[-1] if self._url is not None: self._url = self.url.get('href') self._api_url = get_encoded_text(xml, 'coredata/prism:url') self._nauthors = get_encoded_text(xml, 'coredata/author-count') self._ndocuments = get_encoded_text(xml, 'coredata/document-count') self._name = get_encoded_text(xml, 'affiliation-name') self._address = get_encoded_text(xml, 'address') self._city = get_encoded_text(xml, 'city') self._country = get_encoded_text(xml, 'country') # institution-profile date_created = xml.find('institution-profile/date-created') if date_created is not None: self._date_created = (int(date_created.attrib['year']), int(date_created.attrib['month']), int(date_created.attrib['day'])) else: self._date_created = (None, None, None) self._org_type = get_encoded_text(xml, 'institution-profile/org-type') self._org_domain = get_encoded_text(xml, 'institution-profile/org-domain') self._org_url = get_encoded_text(xml, 'institution-profile/org-URL')
def __init__(self, author_id, refresh=False): """Class to represent a Scopus Author query by the scopus-id. Parameters ---------- author_id : str or int The ID of the author to search for. Optionally expressed as an Elsevier EID (i.e., in the form 9-s2.0-nnnnnnnn). refresh : bool (optional, default=False) Whether to refresh the cached file (if it exists) or not. Notes ----- The files are cached in ~/.scopus/author_retrieval/{author_id} (without eventually leading '9-s2.0-'). """ self._id = str(int(str(author_id).split('-')[-1])) qfile = join(config.get('Directories', 'AuthorRetrieval'), self._id) url = ('https://api.elsevier.com/content/author/' 'author_id/{}').format(self._id) params = {'author_id': self._id, 'view': 'ENHANCED'} res = get_content(qfile, url=url, refresh=refresh, accept='json', params=params) self._json = loads(res.decode('utf-8'))['author-retrieval-response'] try: self._json = self._json[0] except KeyError: alias_json = self._json['alias']['prism:url'] if not isinstance(alias_json, list): alias_json = [alias_json] alias = ', '.join([d['$'].split(':')[-1] for d in alias_json]) text = 'Author profile with ID {} has been merged and the main '\ 'profile is now one of {}. Please update your records '\ 'manually. Functionality of this object is '\ 'reduced.'.format(author_id, alias) warn(text, UserWarning)
def __init__(self, author_id, refresh=False, refresh_aff=False, level=1): """Class to represent a Scopus Author query by the scopus-id. Parameters ---------- author_id : str or int The ID of the author to search for. Optionally expressed as an Elsevier EID (i.e., in the form 9-s2.0-nnnnnnnn). refresh : bool (optional, default=False) Whether to refresh the cached file (if it exists) or not. refresh_aff : bool (optional, default=False) Whether to refresh the cached corresponding affiliation views (if they exist) or not. level : int (optional, default=1) Number of * to print in property __str__. Notes ----- The files are cached in ~/.scopus/author/{author_id} (without eventually leading '9-s2.0-'). """ if config.getboolean('Warnings', 'Author'): text = config.get('Warnings', 'Text').format('AuthorRetrieval') warnings.warn(text, DeprecationWarning) config.set('Warnings', 'Author', '0') author_id = str(int(str(author_id).split('-')[-1])) self.level = level qfile = os.path.join(SCOPUS_AUTHOR_DIR, author_id) url = ('https://api.elsevier.com/content/author/' 'author_id/{}').format(author_id) params = {'author_id': author_id, 'view': 'ENHANCED'} self.xml = ET.fromstring(get_content(qfile, url=url, refresh=refresh, params=params))
def __init__(self, query, filepath, url, refresh, count=200, start=0, max_entries=5000): """Class intended for use a superclass to perform a search query. Parameters ---------- query : str A string of the query. filepath : str The complete filepath and -name of the cached file. url : str The API access point. refresh : bool Whether to refresh the cached file if it exists or not. count : int (optional, default=200) The number of entries to be displayed at once. A smaller number means more queries with each query having less results. start : int (optional, default=0) The entry number of the first search item to start with. max_entries : int (optional, default=5000) Raise error when the number of results is beyond this number. The Scopus Search Engine does not allow more than 5000 entries. Raises ------ Exception If the number of search results exceeds max_entries. """ # Read the file contents if it exists and we are not refreshing. if not refresh and exists(filepath): self._json = [] with open(filepath) as f: for r in f.readlines(): self._json.append(loads(r)) # If cached file doesn't exists, or we are refreshing, download file. else: # First, we get a count of how many things to retrieve. params = {'query': query, 'count': 0, 'start': 0} res = get_content(filepath, url=url, refresh=refresh, params=params, accept='json') data = loads(res.decode('utf-8'))['search-results'] N = int(data.get('opensearch:totalResults', 0)) if N > max_entries: raise Exception(('Found {} matches. ' 'Set max_entries to a higher number or ' 'change your query ({})').format(N, query)) # Then we download the information in chunks. self._json = [] while N > 0: params = {'query': query, 'count': count, 'start': start} resp = download(url=url, params=params, accept="json") results = resp.json() if 'entry' in results.get('search-results', []): for r in results['search-results']['entry']: self._json.append({f: r[f] for f in r.keys()}) start += count N -= count # Finally write out the file. with open(filepath, 'wb') as f: for author in self._json: f.write('{}\n'.format(dumps(author)).encode('utf-8'))
def __init__(self, EID, view='META_ABS', refresh=False, id_type=None): """Class to represent the results from a Scopus abstract. Parameters ---------- EID : str The Scopus ID (EID) of an abstract. view : str (optional, default=META_ABS) The view of the file that should be downloaded. Will not take effect for already cached files. Allowed values: META, META_ABS, FULL, where FULL includes all information of META_ABS view and META_ABS includes all information of the META view . See https://dev.elsevier.com/guides/AbstractRetrievalViews.htm for details. refresh : bool (optional, default=False) Whether to refresh the cached file if it exists or not. id_type: str (optional, default=None) The type of used ID. Allowed values: None, 'eid','pii', 'scopus_id','pubmed_id','doi'. If the value is None, the function tries to infer the ID type itself. Other values manually set the ID type to one of the types supported by Scopus. ValueError If the view parameters contains invalid entries. Notes ----- The files are cached in ~/.scopus/abstract_retrieval/{eid}. DOI always contains '/' symbol, which is a path separator in some operating systems so '/' has to be replaced in the filename for caching. """ EID = str(EID) allowed_views = ('META', 'META_ABS', 'FULL') if view not in allowed_views: raise ValueError('view parameter must be one of ' + ', '.join(allowed_views)) if id_type is None: id_type = detect_id_type(EID) else: allowed_id_types = ('eid', 'pii', 'scopus_id', 'pubmed_id', 'doi') if id_type not in allowed_id_types: raise ValueError('id_type parameter must be one of ' + ', '.join(allowed_id_types)) qfile = join(config.get('Directories', 'AbstractRetrieval'), EID.replace('/', '_')) url = "https://api.elsevier.com/content/abstract/{}/{}".format( id_type, EID) res = get_content(qfile, url=url, refresh=refresh, accept='json', params={'view': view}) self._json = loads(res.decode('utf-8'))['abstracts-retrieval-response'] self._head = self._json.get('item', {}).get('bibrecord', {}).get('head', {}) self._tail = self._json.get('item', {}).get('bibrecord', {}).get('tail', {}) if self._tail is None: self._tail = {} self._confinfo = self._head.get('source', {}).get('additional-srcinfo', {}).get('conferenceinfo', {})
def __init__(self, ISSN, refresh=False): ISSN = str(ISSN) self.issn = ISSN qfile = os.path.join(SCOPUS_ISSN_DIR, ISSN) url = ("http://api.elsevier.com/content/serial/title/issn:" + ISSN) self.xml = get_content(qfile, refresh, url) self.publisher = get_encoded_text(self.xml, 'entry/dc:publisher') self.title = get_encoded_text(self.xml, 'entry/dc:title') self.aggregationType = get_encoded_text(self.xml, 'entry/prism:aggregationType') self.prism_url = get_encoded_text(self.xml, 'entry/prism:url') # Impact factors SNIP = get_encoded_text(self.xml, 'entry/SNIPList/SNIP') SNIP_year = self.xml.find('entry/SNIPList/SNIP', ns) if SNIP_year is not None: SNIP_year = SNIP_year.get('year') else: SNIP_year = -1 IPP = get_encoded_text(self.xml, 'entry/IPPList/IPP') IPP_year = self.xml.find('entry/IPPList/IPP', ns) if IPP_year is not None: IPP_year = IPP_year.get('year') else: IPP_year = -1 SJR = get_encoded_text(self.xml, 'entry/SJRList/SJR') SJR_year = self.xml.find('entry/SJRList/SJR', ns) if SJR_year is not None: SJR_year = SJR_year.get('year') else: SJR_year = -1 if SNIP: self.SNIP = float(SNIP) self.SNIP_year = int(SNIP_year) else: self.SNIP = None self.SNIP_year = None if IPP: self.IPP = float(IPP) self.IPP_year = int(IPP_year) else: self.IPP = None self.IPP_year = None if SJR: self.SJR = float(SJR) self.SJR_year = int(SJR_year) else: self.SJR = None self.SJR_year = None scopus_url = self.xml.find('entry/link[@ref="scopus-source"]') if scopus_url is not None: self.scopus_url = scopus_url.attrib['href'] else: self.scopus_url = None homepage = self.xml.find('entry/link[@ref="homepage"]') if homepage is not None: self.homepage = homepage.attrib['href'] else: self.homepage = None
def __init__(self, EID, view='META_ABS', refresh=False): """Class to represent the results from a Scopus abstract. Parameters ---------- EID : str The Scopus ID of an abstract. view : str (optional, default=META_ABS) The view of the file that should be downloaded. Currently supported values: META, META_ABS, FULL. refresh : bool (optional, default=False) Whether to refresh the cached file if it exists or not. Notes ----- The files are cached in ~/.scopus/xml/{eid}. """ allowed_views = ('META', 'META_ABS', 'FULL') if view not in allowed_views: raise ValueError('view parameter must be one of ' + ', '.join(allowed_views)) # Get file content qfile = os.path.join(SCOPUS_XML_DIR, EID) url = "http://api.elsevier.com/content/abstract/eid/{}".format(EID) params = {'view': view} xml = ET.fromstring( get_content(qfile, url=url, refresh=refresh, params=params)) self.xml = xml if xml.tag == 'service-error': raise Exception('\n{0}\n{1}'.format(EID, self.xml)) # Parse coredata coredata = xml.find('dtd:coredata', ns) self._url = get_encoded_text(coredata, 'prism:url') self.identifier = get_encoded_text(coredata, 'dc:identifier') self.eid = get_encoded_text(coredata, 'dtd:eid') self._doi = get_encoded_text(coredata, 'prism:doi') self._title = get_encoded_text(coredata, 'dc:title') self._aggregationType = get_encoded_text(coredata, 'prism:aggregationType') self._publicationName = get_encoded_text(coredata, 'prism:publicationName') self._srctype = get_encoded_text(coredata, 'dtd:srctype') self._citedby_count = get_encoded_text(coredata, 'dtd:citedby-count') self._publisher = get_encoded_text(coredata, 'dc:publisher') self._source_id = get_encoded_text(coredata, 'dtd:source-id') self._issn = get_encoded_text(coredata, 'prism:issn') self._volume = get_encoded_text(coredata, 'prism:volume') self._issueIdentifier = get_encoded_text(coredata, 'prism:issueIdentifier') self._article_number = get_encoded_text(coredata, 'dtd:article-number') self._startingPage = get_encoded_text(coredata, 'prism:startingPage') self._endingPage = get_encoded_text(coredata, 'prism:endingPage') self._pageRange = get_encoded_text(coredata, 'prism:pageRange') self._coverDate = get_encoded_text(coredata, 'prism:coverDate') self.creator = get_encoded_text(coredata, 'dc:creator') self.description = get_encoded_text(coredata, 'dc:description') sl = coredata.find('dtd:link[@rel="scopus"]', ns).get('href') self_link = coredata.find('dtd:link[@rel="self"]', ns).get('href') cite_link = coredata.find('dtd:link[@rel="cited-by"]', ns) if cite_link: cite_link = cite_link.get('href') self.scopus_link = sl self.self_link = self_link self.cite_link = cite_link # Parse subject-areas subjectAreas = xml.find('dtd:subject-areas', ns) try: self._subjectAreas = [a.text for a in subjectAreas] except: self._subjectAreas = None # Parse authors authors = xml.find('dtd:authors', ns) self._authors = [_ScopusAuthor(author) for author in authors] self._affiliations = [ _ScopusAffiliation(aff) for aff in xml.findall('dtd:affiliation', ns) ] # Parse items items = xml.find('item', ns) self._website = get_encoded_text( items, 'bibrecord/head/source/website/ce:e-address') try: self._citationType = items.find( 'bibrecord/head/citation-info/citation-type').get("code") except: self._citationType = None try: self._citationLang = items.find( 'bibrecord/head/citation-info/citation-language').get( "language") except: self._citationLang = None try: self._references = tail.find('bibrecord/tail/bibliography', ns) except: self._references = None
def __init__(self, identifier, api, refresh, id_type=None, view=None, date=None): """Class intended as superclass to perform retrievals. Parameters ---------- identifier : str or int A string of the query. api : str The name of the Scopus API to be accessed. Allowed values: AbstractRetrieval, AuthorRetrieval, CitationOverview, ContentAffiliationRetrieval. refresh : bool Whether to refresh the cached file if it exists or not. id_type : str (optional, default=None) The type of used ID. Note: Will only take effect for the AbstractRetrieval API. view : str (optional, default=None) The view of the file that should be downloaded. Will not take effect for already cached files. Allowed values: STANDARD, COMPLETE. Note: Will only take effect for the AbstractRetrieval API. date : str (optional, default=None) A string combining two years with a hyphen for which citations should be looked up for. Note: Will only take effect for the CitationOverview API. Raises ------ ValueError If the api parameter or view parameter is an invalid entry. """ # Checks if api not in URL: raise ValueError('api parameter must be one of ' + ', '.join(URL.keys())) if not config.has_section('Directories'): create_config() # Construct parameters url = URL[api] if api == "AbstractRetrieval": url += id_type + "/" elif api == "AuthorRetrieval": view = 'ENHANCED' params = {'view': view} if api == 'CitationOverview': params.update({'date': date, 'scopus_id': identifier.split('0-')[-1]}) url += identifier # Parse file contents qfile = join(config.get('Directories', api), identifier.replace('/', '_')) res = get_content(qfile, refresh, url=url, accept='json', params=params) self._json = loads(res.decode('utf-8'))
def __init__(self, author_id, refresh=False, refresh_aff=False, level=1): """Class to represent a Scopus Author query by the scopus-id. Parameters ---------- author_id : str or int The ID of the author to search for. refresh : bool (optional, default=False) Whether to refresh the cached file (if it exists) or not. refresh_aff : bool (optional, default=False) Whether to refresh the cached corresponding affiliation views (if they exist) or not. level : int (optional, default=1) Number of * to print in property __str__. Notes ----- The files are cached in ~/.scopus/author/{author_id}. """ author_id = str(int(author_id)) self.level = level qfile = os.path.join(SCOPUS_AUTHOR_DIR, author_id) url = ('http://api.elsevier.com/content/author/' 'author_id/{}').format(author_id) params = {'author_id': author_id, 'view': 'ENHANCED'} xml = ET.fromstring( get_content(qfile, url=url, refresh=refresh, params=params)) self.xml = xml self._orcid = get_encoded_text(xml, 'coredata/orcid') hindex = get_encoded_text(xml, 'h-index') self._hindex = int(hindex) if hindex is not None else 0 ndocuments = get_encoded_text(xml, 'coredata/document-count') self._ndocuments = int(ndocuments) if ndocuments is not None else 0 _author_id = get_encoded_text(xml, 'coredata/dc:identifier') self._author_id = _author_id.split(":")[-1] citation_count = get_encoded_text(xml, 'coredata/citation-count') self._citation_count = int( citation_count) if citation_count is not None else 0 ncited_by = get_encoded_text(xml, 'coredata/cited-by-count') self._ncited_by = int(ncited_by) if ncited_by is not None else 0 ncoauthors = get_encoded_text(xml, 'coauthor-count') self._ncoauthors = int(ncoauthors) if ncoauthors is not None else 0 self._current_affiliation = get_encoded_text( xml, 'author-profile/affiliation-current/affiliation/ip-doc/afdispname') # affiliation history (sort out faulty historic affiliations) aff_ids = [ el.attrib.get('affiliation-id') for el in xml.findall( 'author-profile/affiliation-history/affiliation') if el is not None and len(list(el.find("ip-doc").iter())) > 1 ] affs = [ ScopusAffiliation(aff_id, refresh=refresh_aff) for aff_id in aff_ids ] self._affiliation_history = affs date_created = xml.find('author-profile/date-created', ns) if date_created is not None: self._date_created = (int(date_created.attrib['year']), int(date_created.attrib['month']), int(date_created.attrib['day'])) else: self._date_created = (None, None, None) # Research areas self._area_elements = xml.findall('subject-areas/subject-area') # {code: name} d = {int(ae.attrib['code']): ae.text for ae in self._area_elements} freqs = xml.findall('author-profile/classificationgroup/' 'classifications[@type="ASJC"]/classification') # {code: frequency} c = {int(cls.text): int(cls.attrib['frequency']) for cls in freqs} self._subject_freq = c categories = [(d[code], c[code]) for code in d] categories.sort(reverse=True, key=itemgetter(1)) self.categories = categories self._firstname = (get_encoded_text( xml, 'author-profile/preferred-name/given-name') or '') self._lastname = (get_encoded_text( xml, 'author-profile/preferred-name/surname') or '') self._name = ( (get_encoded_text(xml, 'author-profile/preferred-name/given-name') or '') + ' ' + (get_encoded_text(xml, 'author-profile/preferred-name/surname') or '')) # Real website for the author self._scopus_url = xml.find('coredata/link[@rel="scopus-author"]') if self._scopus_url is not None: self._scopus_url = self._scopus_url.get('href') # API URL for coauthors self._coauthor_url = xml.find('coredata/link[@rel="coauthor-search"]') if self._coauthor_url is not None: self._coauthor_url = self._coauthor_url.get('href') # Publication history pub_hist_elements = self.xml.findall('author-profile/journal-history/') self._pub_hist = pub_hist_elements
def __init__(self, query, filepath, url, refresh, count=200, start=0, max_entries=5000, view='STANDARD'): """Class intended as superclass to perform a search query. Parameters ---------- query : str A string of the query. filepath : str The complete filepath and -name of the cached file. url : str The API access point. refresh : bool Whether to refresh the cached file if it exists or not. count : int (optional, default=200) The number of entries to be displayed at once. A smaller number means more queries with each query having less results. start : int (optional, default=0) The entry number of the first search item to start with. max_entries : int (optional, default=5000) Raise error when the number of results is beyond this number. The Scopus Search Engine does not allow more than 5000 entries. view : str (optional, default=STANDARD) The view of the file that should be downloaded. Will not take effect for already cached files. Allowed values: STANDARD, COMPLETE. Note: Only the Scopus search API additionally uses view COMPLETE. Raises ------ Exception If the number of search results exceeds max_entries. ValueError If the view parameters contains invalid entries. """ allowed_views = ('STANDARD', 'COMPLETE') if view not in allowed_views: raise ValueError('view parameter must be one of ' + ', '.join(allowed_views)) # Read the file contents if it exists and we are not refreshing if not refresh and exists(filepath): self._json = [] with open(filepath) as f: for r in f.readlines(): self._json.append(loads(r)) # Download file if cached file doesn't exists or we are refreshing else: # First, get a count of how many things to retrieve params = {'query': query, 'count': 0, 'start': 0, 'view': view} res = get_content(filepath, url=url, refresh=refresh, params=params, accept='json') data = loads(res.decode('utf-8'))['search-results'] N = int(data.get('opensearch:totalResults', 0)) if N > max_entries: raise Exception(('Found {} matches. ' 'Set max_entries to a higher number or ' 'change your query ({})').format(N, query)) # Then download the information in chunks self._json = [] while N > 0: params.update({'count': count, 'start': start}) res = download(url=url, params=params, accept="json") results = res.json() if 'entry' in results.get('search-results', []): for r in results['search-results']['entry']: self._json.append({f: r[f] for f in r.keys()}) start += count N -= count # Finally write out the file with open(filepath, 'wb') as f: for item in self._json: f.write('{}\n'.format(dumps(item)).encode('utf-8'))
def __init__(self, identifier, api, refresh, id_type=None, view=None, date=None): """Class intended as superclass to perform retrievals. Parameters ---------- identifier : str or int A string of the query. api : str The name of the Scopus API to be accessed. Allowed values: AbstractRetrieval, AuthorRetrieval, CitationOverview, ContentAffiliationRetrieval. refresh : bool Whether to refresh the cached file if it exists or not. id_type : str (optional, default=None) The type of used ID. Note: Will only take effect for the AbstractRetrieval API. view : str (optional, default=None) The view of the file that should be downloaded. Will not take effect for already cached files. Allowed values: STANDARD, COMPLETE. Note: Will only take effect for the AbstractRetrieval API. date : str (optional, default=None) A string combining two years with a hyphen for which citations should be looked up for. Note: Will only take effect for the CitationOverview API. Raises ------ ValueError If the api parameter or view parameter is an invalid entry. """ # Checks if api not in URL: raise ValueError('api parameter must be one of ' + ', '.join(URL.keys())) if not config.has_section('Directories'): create_config() # Construct parameters url = URL[api] if api == "AbstractRetrieval": url += id_type + "/" elif api == "AuthorRetrieval": view = 'ENHANCED' params = {'view': view} if api == 'CitationOverview': params.update({ 'date': date, 'scopus_id': identifier.split('0-')[-1] }) url += identifier # Parse file contents qfile = join(config.get('Directories', api), identifier.replace('/', '_')) res = get_content(qfile, refresh, url=url, accept='json', params=params) self._json = loads(res.decode('utf-8'))
def __init__(self, ISSN, refresh=False): ISSN = str(ISSN) self.issn = ISSN qfile = os.path.join(SCOPUS_ISSN_DIR, ISSN) url = ("https://api.elsevier.com/content/serial/title/issn:" + ISSN) self.xml = ET.fromstring(get_content(qfile, refresh, url)) self.publisher = get_encoded_text(self.xml, 'entry/dc:publisher') self.title = get_encoded_text(self.xml, 'entry/dc:title') self.aggregationType = get_encoded_text(self.xml, 'entry/prism:aggregationType') self.prism_url = get_encoded_text(self.xml, 'entry/prism:url') # Impact factors SNIP = get_encoded_text(self.xml, 'entry/SNIPList/SNIP') SNIP_year = self.xml.find('entry/SNIPList/SNIP', ns) if SNIP_year is not None: SNIP_year = SNIP_year.get('year') else: SNIP_year = -1 IPP = get_encoded_text(self.xml, 'entry/IPPList/IPP') IPP_year = self.xml.find('entry/IPPList/IPP', ns) if IPP_year is not None: IPP_year = IPP_year.get('year') else: IPP_year = -1 SJR = get_encoded_text(self.xml, 'entry/SJRList/SJR') SJR_year = self.xml.find('entry/SJRList/SJR', ns) if SJR_year is not None: SJR_year = SJR_year.get('year') else: SJR_year = -1 if SNIP: self.SNIP = float(SNIP) self.SNIP_year = int(SNIP_year) else: self.SNIP = None self.SNIP_year = None if IPP: self.IPP = float(IPP) self.IPP_year = int(IPP_year) else: self.IPP = None self.IPP_year = None if SJR: self.SJR = float(SJR) self.SJR_year = int(SJR_year) else: self.SJR = None self.SJR_year = None scopus_url = self.xml.find('entry/link[@ref="scopus-source"]') if scopus_url is not None: self.scopus_url = scopus_url.attrib['href'] else: self.scopus_url = None homepage = self.xml.find('entry/link[@ref="homepage"]') if homepage is not None: self.homepage = homepage.attrib['href'] else: self.homepage = None
def __init__(self, EID, view='META_ABS', refresh=False): """Class to represent the results from a Scopus abstract. Parameters ---------- EID : str The Scopus ID (EID) of an abstract. view : str (optional, default=META_ABS) The view of the file that should be downloaded. Will not take effect for already cached files. Supported values: META, META_ABS, FULL, where FULL includes all information of META_ABS view and META_ABS includes all information of the META view . See https://dev.elsevier.com/guides/AbstractRetrievalViews.htm for details. refresh : bool (optional, default=False) Whether to refresh the cached file if it exists or not. Notes ----- The files are cached in ~/.scopus/xml/{eid}. """ allowed_views = ('META', 'META_ABS', 'FULL') if view not in allowed_views: raise ValueError('view parameter must be one of ' + ', '.join(allowed_views)) # Get file content qfile = os.path.join(SCOPUS_XML_DIR, EID) url = "https://api.elsevier.com/content/abstract/eid/{}".format(EID) params = {'view': view} xml = ET.fromstring( get_content(qfile, url=url, refresh=refresh, params=params)) # Remove default namespace if present remove = u'{http://www.elsevier.com/xml/svapi/abstract/dtd}' nsl = len(remove) for elem in xml.getiterator(): if elem.tag.startswith(remove): elem.tag = elem.tag[nsl:] self.xml = xml if xml.tag == 'service-error': raise Exception('\n{0}\n{1}'.format(EID, self.xml)) # Parse coredata coredata = xml.find('coredata', ns) self._url = get_encoded_text(coredata, 'prism:url') self.identifier = get_encoded_text(coredata, 'dc:identifier') self.eid = get_encoded_text(coredata, 'eid') self._doi = get_encoded_text(coredata, 'prism:doi') self._title = get_encoded_text(coredata, 'dc:title') self._aggregationType = get_encoded_text(coredata, 'prism:aggregationType') self._publicationName = get_encoded_text(coredata, 'prism:publicationName') self._srctype = get_encoded_text(coredata, 'srctype') self._citedby_count = get_encoded_text(coredata, 'citedby-count') self._publisher = get_encoded_text(coredata, 'dc:publisher') self._source_id = get_encoded_text(coredata, 'source-id') self._issn = get_encoded_text(coredata, 'prism:issn') self._volume = get_encoded_text(coredata, 'prism:volume') self._issueIdentifier = get_encoded_text(coredata, 'prism:issueIdentifier') self._article_number = get_encoded_text(coredata, 'article-number') self._startingPage = get_encoded_text(coredata, 'prism:startingPage') self._endingPage = get_encoded_text(coredata, 'prism:endingPage') self._pageRange = get_encoded_text(coredata, 'prism:pageRange') self._coverDate = get_encoded_text(coredata, 'prism:coverDate') self.creator = get_encoded_text(coredata, 'dc:creator') self._description = get_encoded_text(coredata, 'dc:description') self._abstract = get_encoded_text(coredata, 'dc:description/abstract/ce:para') self.scopus_link = coredata.find('link[@rel="scopus"]', ns).get('href') self.self_link = coredata.find('link[@rel="self"]', ns).get('href') cite_link = coredata.find('link[@rel="cited-by"]', ns) if cite_link: cite_link = cite_link.get('href') self.cite_link = cite_link # Parse authkeywords author_keywords = xml.find('authkeywords', ns) try: self._authkeywords = [a.text for a in author_keywords] except: self._authkeywords = None # Parse subject-areas subjectAreas = xml.find('subject-areas', ns) try: self._subjectAreas = [a.text for a in subjectAreas] except: self._subjectAreas = None # Parse authors authors = xml.find('authors', ns) try: self._authors = [_ScopusAuthor(author) for author in authors] except TypeError: self._authors = None self._affiliations = [ _ScopusAffiliation(aff) for aff in xml.findall('affiliation', ns) ] # Parse items items = xml.find('item', ns) self._website = get_encoded_text( items, 'bibrecord/head/source/website/ce:e-address') try: self._citationType = items.find( 'bibrecord/head/citation-info/citation-type').get("code") except: self._citationType = None try: self._citationLang = items.find( 'bibrecord/head/citation-info/citation-language').get( "language") except: self._citationLang = None try: self._references = items.find('bibrecord/tail/bibliography', ns) except: self._references = None