Beispiel #1
0
def refine_scopus(docs: DocumentSet, *, search_title=True
                  ) -> Tuple[DocumentSet, DocumentSet]:
    """Attempt to fetch Scopus metadata for each document in the given
    set. Returns a tuple containing two sets: the documents available on
    Scopus and the remaining documents not found on Scopus.


    Documents are retrieved based on their identifier (DOI, Pubmed ID, or
    Scopus ID). Documents without a unique identifier are retrieved by
    performing a fuzzy search based on their title. This is not ideal
    and can lead to false positives (i.e., another document is found having
    the same title), thus it can be disabled if necessary.

    :param search_title: Flag to toggle searching by title.
    """
    from pybliometrics.scopus import ScopusSearch

    def callback(doc):
        id = doc.id
        if isinstance(doc, ScopusDocument):
            return doc

        if doi := id.doi:
            try:
                return ScopusDocument.from_doi(doi)
            except Exception as e:
                logging.warn(f'no document found for DOI {doi}: {e}')
                return None

        title = canonical(id.title)
        if len(title) > 10 and search_title:
            query = f'TITLE({title})'
            response = ScopusSearch(query, view='STANDARD', download=False)
            nresults = response.get_results_size()

            if nresults > 0 and nresults < 10:
                response = ScopusSearch(query, view='STANDARD')

                for record in response.results or []:
                    if canonical(record.title) == title:
                        return ScopusDocument.from_eid(record.eid)

        return None
# Takes an advanced Scopus search as input from the console.
# =====================================================================

query_main = str(input('Enter Scopus Advanced Search query:'))

# API Query
# Downloads results database for search query
# =====================================================================

# Gets number of results from Scopus
dout_tmp = Scopus(query_main,
                  refresh=False,
                  subscriber=True,
                  view=None,
                  download=False)
dout_size = dout_tmp.get_results_size()
print('Found', dout_size, 'paper(s) for search query: >>', query_main, '<<')

# Gets results from Scopus as list of namedtuples
try:
    start = time.time()
    print('Downloading data for query >>', query_main, '<<')
    dout_dld = Scopus(query_main,
                      refresh=False,
                      subscriber=True,
                      view=None,
                      download=True,
                      verbose=True)
finally:
    end = time.time()
    print('Download complete, elapsed time=', end - start, 'seconds'), print()
Beispiel #3
0
# Descomente a próxima linha para configurar a chave de acesso durante a primeira execução do programa.
# create_config()

# Configurando os critérios de pesquisa.
query = 'TITLE-ABS-KEY("protected area" OR "conservation" OR "ecology" OR "marine protected" OR "national forest")' \
        ' AND TITLE-ABS-KEY("remote sensing" OR "earth observation" OR "Landsat" OR "Lidar" OR "MODIS" OR "Radar")' \
        ' AND TITLE-ABS-KEY("Brazil" OR "Brasil")' \
        ' AND PUBYEAR BEF 2021 AND PUBYEAR AFT 1999' \
        ' AND LANGUAGE(english OR portuguese)'

# Cria um objeto de pesquisa ScopusSearch contendo as informações para busca.
scopus = ScopusSearch(query, max_entries=None, subscriber=False, verbose=True)

# Retorna o número de registros coletados pela API.
print("Número total de publicações: {}.".format(scopus.get_results_size()))

# Obtêm uma lista contendo todos os identificadores digitais (EID) resgatados da API durante a busca.
eids_documentos = scopus.get_eids()

# Coleta as informações sobre os artigos, a partir dos EID e da função auxiliar.
df = coletar_artigos(eids_documentos, api_view)

# Armazena todas as entradas em um arquivo .csv, para consulta posterior
df.to_csv("data/resultado_pesquisa_scopus.csv",
          index=False,
          quoting=csv.QUOTE_ALL)
"""-------------------------------------------------------------
    EXTRA: Plotando o histórico temporal de publicações
-------------------------------------------------------------"""
# Converte o tipo de dado da coluna "data_publicacao" para datetime
def main():
    s = ScopusSearch('ISSN ( 0022-3514 )')
    print(s.get_results_size())
Beispiel #5
0
from pybliometrics.scopus import AbstractRetrieval
from pybliometrics.scopus import AuthorRetrieval
from lxml import etree as et
import os
import requests
from config import dcmappings

saf_root_directory = 'saf'
science_direct_base_url = 'https://api.elsevier.com/content/article/doi/'

apiKey = os.environ['SCOPUS_API_KEY']
scopus_search_string = os.environ['SCOPUS_SEARCH_STRING']

s = ScopusSearch(scopus_search_string, refresh=True, view='COMPLETE')

print(s.get_results_size())
eids = s.get_eids()
counter = 0

orcid_mapping = {
    'schema': 'local',
    'attributes': {
        'element': 'contributor',
        'qualifier': 'author_orcid_id'
    }
}


def GetOrcidFromScopusID(scopus_id):
    try:
        author = AuthorRetrieval(scopus_id)
def lookup():
    search = input('Enter Search Terms\n')
    option = input('Enter 1 for Exact search, 0 for inexact search\n')

    if option == '1':
        query = '{' + search + '}'  # exact search
    else:
        query = 'TITLE-ABS-KEY( ' + search + ')'  # inexact search

    s = ScopusSearch(query, download=False)

    print('Number of results: ')
    length = s.get_results_size()
    print(length)

    if length > 0:
        dl = input('Would you like to download the results y/n\n')
        if dl == 'y':
            s = ScopusSearch(query, download=True)
            dataframe = pd.DataFrame(pd.DataFrame(
                s.results))  # converts results into a dataframe
            pd.options.display.max_colwidth = 150
            pd.options.display.max_rows = None
            print(dataframe[['eid', 'title']])
            dataframe.iloc[:, 0] = dataframe.iloc[:, 0].astype(
                str)  # converts the eid dataframe objects to string

            option2 = input(
                '\n Enter the row of the abstract you want to download, or enter ALL to download all\n'
            )

            if option2 == 'ALL':
                for i in progressbar(range(length), "Download Progress ", 40):
                    ab = AbstractRetrieval(
                        dataframe.iloc[i, 0],
                        view='FULL')  # searches for abstracts using eid
                    with open(
                            os.path.join(
                                '/home/benjamin/Python_Codes/Abstracts',
                                dataframe.iloc[i, 0] + '.txt'), 'w') as f:
                        f.write(
                            "%s\n" % ab.abstract
                        )  #creates individual txt files titled by their eid
            else:
                try:
                    val = int(option2)
                    print('Attempting to download abstract with eid ' +
                          dataframe.iloc[val, 0])
                    ab = AbstractRetrieval(
                        dataframe.iloc[val, 0],
                        view='FULL')  # searches for abstracts using eid
                    with open(
                            os.path.join(
                                '/home/benjamin/Python_Codes/Abstracts',
                                dataframe.iloc[val, 0] + '.txt'), 'w') as f:
                        f.write("%s\n" % ab.abstract)
                    print('Success!\n')
                except ValueError:
                    print('Invalid row number\n')
    else:
        print('No results found, please try again\n')
Beispiel #7
0
class ScopusMiner:

    def __init__(self):
        self.noOfResults = 3
        self.database = Database()
    
    def setNoOfResults(self,noOfResults):
        self.noOfResults = noOfResults
    
    def performSearch(self, searchWords):
        # Create Search-String
        # Searching in TITLE-ABStract-KEYwords is the default search mode on scopus
        searchString = 'TITLE-ABS-KEY('
        for i, word in enumerate(searchWords):
            searchString = searchString + word
            if (i != len(searchWords)-1):
                searchString = searchString + ' AND '
            #Last Item
            else:   
                searchString = searchString + ')'

        self.searchResult = ScopusSearch(searchString)
        self.searchWords = searchWords

        self.storeResultsInDB()
    
    def storeResultsInDB(self): 
        NoOfResultsStored = 0
        i = 0
        while(NoOfResultsStored < self.noOfResults):
            if(i >= self.searchResult.get_results_size() or self.searchResult.results == None):
                break 
            doc = Document()
            doc.setAbstract(self.searchResult.results[i][27])
            doc.setAuthor(self.searchResult.results[i][13])
            doc.setDOI(self.searchResult.results[i][1])
            doc.setEntryNo(i)
            doc.setKeywords(self.searchWords)
            doc.setTitle(self.searchResult.results[i][4])
            i = i + 1

            if(self.database.addItem(doc)):
                NoOfResultsStored = NoOfResultsStored + 1
    
    def run(self, *keywords):
        self.permIterator = CombinationIterator(*keywords)
        for i in tqdm(range(self.permIterator.noOfIterations)):
            self.performSearch(self.permIterator.getNextCombination())
        
        self.writeToExcel()
        
    def writeToExcel(self):
        now = datetime.now()
        date_time = now.strftime("%Y%m%d_%H%M")
        workbook = xlsxwriter.Workbook('CombinatorialSearchResults'+date_time+'.xlsx')
        worksheet = workbook.add_worksheet()
        
        row = 0
        for key in self.database.entries:
            entry = self.database.entries.get(key)

            for i, word in enumerate(entry.keywords):
                worksheet.write(row, i, word)
            worksheet.write(row, i+1, entry.doi)
            worksheet.write(row, i+2, entry.author)
            worksheet.write(row, i+3, entry.title)
            worksheet.write(row, i+4, entry.entryNo)
            worksheet.write(row, i+5, entry.abstract)
            
            row = row + 1
            
        workbook.close()