def refine_scopus(docs: DocumentSet, *, search_title=True ) -> Tuple[DocumentSet, DocumentSet]: """Attempt to fetch Scopus metadata for each document in the given set. Returns a tuple containing two sets: the documents available on Scopus and the remaining documents not found on Scopus. Documents are retrieved based on their identifier (DOI, Pubmed ID, or Scopus ID). Documents without a unique identifier are retrieved by performing a fuzzy search based on their title. This is not ideal and can lead to false positives (i.e., another document is found having the same title), thus it can be disabled if necessary. :param search_title: Flag to toggle searching by title. """ from pybliometrics.scopus import ScopusSearch def callback(doc): id = doc.id if isinstance(doc, ScopusDocument): return doc if doi := id.doi: try: return ScopusDocument.from_doi(doi) except Exception as e: logging.warn(f'no document found for DOI {doi}: {e}') return None title = canonical(id.title) if len(title) > 10 and search_title: query = f'TITLE({title})' response = ScopusSearch(query, view='STANDARD', download=False) nresults = response.get_results_size() if nresults > 0 and nresults < 10: response = ScopusSearch(query, view='STANDARD') for record in response.results or []: if canonical(record.title) == title: return ScopusDocument.from_eid(record.eid) return None
# Takes an advanced Scopus search as input from the console. # ===================================================================== query_main = str(input('Enter Scopus Advanced Search query:')) # API Query # Downloads results database for search query # ===================================================================== # Gets number of results from Scopus dout_tmp = Scopus(query_main, refresh=False, subscriber=True, view=None, download=False) dout_size = dout_tmp.get_results_size() print('Found', dout_size, 'paper(s) for search query: >>', query_main, '<<') # Gets results from Scopus as list of namedtuples try: start = time.time() print('Downloading data for query >>', query_main, '<<') dout_dld = Scopus(query_main, refresh=False, subscriber=True, view=None, download=True, verbose=True) finally: end = time.time() print('Download complete, elapsed time=', end - start, 'seconds'), print()
# Descomente a próxima linha para configurar a chave de acesso durante a primeira execução do programa. # create_config() # Configurando os critérios de pesquisa. query = 'TITLE-ABS-KEY("protected area" OR "conservation" OR "ecology" OR "marine protected" OR "national forest")' \ ' AND TITLE-ABS-KEY("remote sensing" OR "earth observation" OR "Landsat" OR "Lidar" OR "MODIS" OR "Radar")' \ ' AND TITLE-ABS-KEY("Brazil" OR "Brasil")' \ ' AND PUBYEAR BEF 2021 AND PUBYEAR AFT 1999' \ ' AND LANGUAGE(english OR portuguese)' # Cria um objeto de pesquisa ScopusSearch contendo as informações para busca. scopus = ScopusSearch(query, max_entries=None, subscriber=False, verbose=True) # Retorna o número de registros coletados pela API. print("Número total de publicações: {}.".format(scopus.get_results_size())) # Obtêm uma lista contendo todos os identificadores digitais (EID) resgatados da API durante a busca. eids_documentos = scopus.get_eids() # Coleta as informações sobre os artigos, a partir dos EID e da função auxiliar. df = coletar_artigos(eids_documentos, api_view) # Armazena todas as entradas em um arquivo .csv, para consulta posterior df.to_csv("data/resultado_pesquisa_scopus.csv", index=False, quoting=csv.QUOTE_ALL) """------------------------------------------------------------- EXTRA: Plotando o histórico temporal de publicações -------------------------------------------------------------""" # Converte o tipo de dado da coluna "data_publicacao" para datetime
def main(): s = ScopusSearch('ISSN ( 0022-3514 )') print(s.get_results_size())
from pybliometrics.scopus import AbstractRetrieval from pybliometrics.scopus import AuthorRetrieval from lxml import etree as et import os import requests from config import dcmappings saf_root_directory = 'saf' science_direct_base_url = 'https://api.elsevier.com/content/article/doi/' apiKey = os.environ['SCOPUS_API_KEY'] scopus_search_string = os.environ['SCOPUS_SEARCH_STRING'] s = ScopusSearch(scopus_search_string, refresh=True, view='COMPLETE') print(s.get_results_size()) eids = s.get_eids() counter = 0 orcid_mapping = { 'schema': 'local', 'attributes': { 'element': 'contributor', 'qualifier': 'author_orcid_id' } } def GetOrcidFromScopusID(scopus_id): try: author = AuthorRetrieval(scopus_id)
def lookup(): search = input('Enter Search Terms\n') option = input('Enter 1 for Exact search, 0 for inexact search\n') if option == '1': query = '{' + search + '}' # exact search else: query = 'TITLE-ABS-KEY( ' + search + ')' # inexact search s = ScopusSearch(query, download=False) print('Number of results: ') length = s.get_results_size() print(length) if length > 0: dl = input('Would you like to download the results y/n\n') if dl == 'y': s = ScopusSearch(query, download=True) dataframe = pd.DataFrame(pd.DataFrame( s.results)) # converts results into a dataframe pd.options.display.max_colwidth = 150 pd.options.display.max_rows = None print(dataframe[['eid', 'title']]) dataframe.iloc[:, 0] = dataframe.iloc[:, 0].astype( str) # converts the eid dataframe objects to string option2 = input( '\n Enter the row of the abstract you want to download, or enter ALL to download all\n' ) if option2 == 'ALL': for i in progressbar(range(length), "Download Progress ", 40): ab = AbstractRetrieval( dataframe.iloc[i, 0], view='FULL') # searches for abstracts using eid with open( os.path.join( '/home/benjamin/Python_Codes/Abstracts', dataframe.iloc[i, 0] + '.txt'), 'w') as f: f.write( "%s\n" % ab.abstract ) #creates individual txt files titled by their eid else: try: val = int(option2) print('Attempting to download abstract with eid ' + dataframe.iloc[val, 0]) ab = AbstractRetrieval( dataframe.iloc[val, 0], view='FULL') # searches for abstracts using eid with open( os.path.join( '/home/benjamin/Python_Codes/Abstracts', dataframe.iloc[val, 0] + '.txt'), 'w') as f: f.write("%s\n" % ab.abstract) print('Success!\n') except ValueError: print('Invalid row number\n') else: print('No results found, please try again\n')
class ScopusMiner: def __init__(self): self.noOfResults = 3 self.database = Database() def setNoOfResults(self,noOfResults): self.noOfResults = noOfResults def performSearch(self, searchWords): # Create Search-String # Searching in TITLE-ABStract-KEYwords is the default search mode on scopus searchString = 'TITLE-ABS-KEY(' for i, word in enumerate(searchWords): searchString = searchString + word if (i != len(searchWords)-1): searchString = searchString + ' AND ' #Last Item else: searchString = searchString + ')' self.searchResult = ScopusSearch(searchString) self.searchWords = searchWords self.storeResultsInDB() def storeResultsInDB(self): NoOfResultsStored = 0 i = 0 while(NoOfResultsStored < self.noOfResults): if(i >= self.searchResult.get_results_size() or self.searchResult.results == None): break doc = Document() doc.setAbstract(self.searchResult.results[i][27]) doc.setAuthor(self.searchResult.results[i][13]) doc.setDOI(self.searchResult.results[i][1]) doc.setEntryNo(i) doc.setKeywords(self.searchWords) doc.setTitle(self.searchResult.results[i][4]) i = i + 1 if(self.database.addItem(doc)): NoOfResultsStored = NoOfResultsStored + 1 def run(self, *keywords): self.permIterator = CombinationIterator(*keywords) for i in tqdm(range(self.permIterator.noOfIterations)): self.performSearch(self.permIterator.getNextCombination()) self.writeToExcel() def writeToExcel(self): now = datetime.now() date_time = now.strftime("%Y%m%d_%H%M") workbook = xlsxwriter.Workbook('CombinatorialSearchResults'+date_time+'.xlsx') worksheet = workbook.add_worksheet() row = 0 for key in self.database.entries: entry = self.database.entries.get(key) for i, word in enumerate(entry.keywords): worksheet.write(row, i, word) worksheet.write(row, i+1, entry.doi) worksheet.write(row, i+2, entry.author) worksheet.write(row, i+3, entry.title) worksheet.write(row, i+4, entry.entryNo) worksheet.write(row, i+5, entry.abstract) row = row + 1 workbook.close()