def get_data_from_doi(self, doi):
        doi_doc = FullDoc(doi=doi)
        if doi_doc.read(self.client):
            # print("doi_doc.title: ", doi_doc.title)
            doi_doc.write()
        else:
            print("Read document failed.")
            return doi
        id = None
        if not 'pubmed-id' in doi_doc._data.keys():
            print("no pubmed-id, trying with title")
            # try with title
            Entrez.email = '*****@*****.**'
            query = doi_doc.title
            handle = Entrez.esearch(db='pubmed', retmode='xml', term=query)
            results = Entrez.read(handle)
            if int(results['Count']) > 0:
                id = results['IdList']
        else:
            id = doi_doc._data['pubmed-id']
        if id != None:
            return self.fetch_data_from_pubmed(id)

        else:
            print("no pubmed id")
            return doi
 def readFullDocWithDOI(self, doiID='10.1016/S1525-1578(10)60571-5'):
     ## ScienceDirect (full-text) document example using DOI
     doi_doc = FullDoc(doi=doiID)
     if doi_doc.read(self.client):
         print("doi_doc.title: ", doi_doc.title)
         doi_doc.write()
     else:
         print("Read document failed.")
 def readFullDocWithPII(self, sd_piiID='S1270963817323015'):
     ## ScienceDirect (full-text) document example using PII
     pii_doc = FullDoc(sd_pii=sd_piiID)
     if pii_doc.read(self.client):
         print(pii_doc)
         print("pii_doc.title: ", pii_doc.title)
         pii_doc.write()
     else:
         print("Read document failed.")
Beispiel #4
0
def find_abstract(doi):
    client = elsevier_auth()
    ## ScienceDirect (full-text) document example using DOI
    doi_doc = FullDoc(doi=doi)
    if doi_doc.read(client):
        print("doi_doc.title: ", doi_doc.title)
        print("doi_doc.abstract: ", doi_doc.data['coredata']['dc:description'])
        doi_doc.write()
    else:
        print("Read document failed.")
    def get_authors_data_by_doi(self, doi):
        doi_doc = FullDoc(doi=doi)
        if doi_doc.read(self.client):
            print("doi_doc.title: ", doi_doc.title)
            doi_doc.write()
        else:
            print("Read document failed.")
            return doi
        id = None
        if not 'pubmed-id' in doi_doc._data.keys():
            print("no pubmed-id, trying with title")
            # try with title
            Entrez.email = '*****@*****.**'
            query = doi_doc.title
            handle = Entrez.esearch(db='pubmed', retmode='xml', term=query)
            results = Entrez.read(handle)
            if int(results['Count']) > 0:
                id = results['IdList']
        else:
            id = doi_doc._data['pubmed-id']
        if id != None:
            Entrez.email = '*****@*****.**'
            handle = Entrez.efetch(db='pubmed', retmode='xml', id=id)
            results = Entrez.read(handle)
            print(results)
            if len(results['PubmedArticle']) > 0 and (
                    'MedlineCitation' in results['PubmedArticle'][0].keys()
            ) and ('Article'
                   in results['PubmedArticle'][0]['MedlineCitation'].keys()):
                if 'AuthorList' in results['PubmedArticle'][0][
                        'MedlineCitation']['Article'].keys():
                    authors_list = results['PubmedArticle'][0][
                        'MedlineCitation']['Article']['AuthorList']
                    dates = results['PubmedArticle'][0]['PubmedData'][
                        'History']
                else:
                    print("no authors list {}".format(
                        results['PubmedArticle'][0]['MedlineCitation']
                        ['Article']))
                    return doi
            else:
                print("missing keys")
                return doi

        else:
            print("no pubmed id")
            return doi

        return authors_list
    def get_doc(self, dtype, identity):
        """
        This method retrieves a 'Doc' object from the Elsevier API. The doc object contains metadata and full-text information
        about a publication associated with a given PII.

        Parameters:
        -----------
        dtype(str,required): The type of identification string being used to access the document. (Almost always PII in our case.)

        identity: The actual identification string/ PII that will be used to query.
        """
        if dtype == 'pii':
            doc = FullDoc(sd_pii = identity)
        elif dtype == 'doi':
            doc= FullDoc(doi = identity)

        if doc.read(ElsClient(self.API_list[0])):
                #print ("doc.title: ", doc.title)
                doc.write()
        else:
            print ("Read document failed.")

        return doc
    config = json.load(config_file)

GET_ALL = config[
    'get_all']  # False gets one chunk (25) True gets all or max (5000)
FULL_TEXT = config['full_text']  # Save fulltext
OPEN_ACCESS = config[
    'open_access']  # Search only openaccess documents (so we can get the full text)

# "public policy AND (impact OR result OR evaluation OR evidence) AND (climate OR environment)"
query = config['query']

if OPEN_ACCESS:
    query = "openaccess(1) AND " + query

client = ElsClient(config['api_key'])

doc_srch = ElsSearch(query, 'sciencedirect')
doc_srch.execute(client, get_all=GET_ALL)

for doc in doc_srch.results:
    doi = doc['dc:identifier']
    print(doi)
    if FULL_TEXT:
        ## ScienceDirect (full-text) document example using DOI
        doi_doc = FullDoc(doi=doi)
        if doi_doc.read(client):
            doi_doc.write()
        else:
            print("Read full-text failed for DOI", doi)

print("# Found", len(doc_srch.results), "results.")
Beispiel #8
0
    print ("Read affiliation failed.")

## Scopus (Abtract) document example
# Initialize document with ID as integer
scp_doc = AbsDoc(scp_id = 84872135457)
if scp_doc.read(client):
    print ("scp_doc.title: ", scp_doc.title)
    scp_doc.write()   
else:
    print ("Read document failed.")

## ScienceDirect (full-text) document example using PII
pii_doc = FullDoc(sd_pii = 'S1674927814000082')
if pii_doc.read(client):
    print ("pii_doc.title: ", pii_doc.title)
    pii_doc.write()   
else:
    print ("Read document failed.")

## ScienceDirect (full-text) document example using DOI
doi_doc = FullDoc(doi = '10.1016/S1525-1578(10)60571-5')
if doi_doc.read(client):
    print ("doi_doc.title: ", doi_doc.title)
    doi_doc.write()   
else:
    print ("Read document failed.")


## Load list of documents from the API into affilation and author objects.
# Since a document list is retrieved for 25 entries at a time, this is
#  a potentially lenghty operation - hence the prompt.
                    else:
                        n += 1
                    #   get url
                    url = pii_doc.data["coredata"]["link"][1]["@href"]
                    #   save data
                    abs = pd.DataFrame([url, title, text])
                    abstract.loc[len(abstract)] = [url, title, text]
                    abs.to_csv(r'abstracts1.txt',
                               sep='\t',
                               mode='a',
                               header=False,
                               index=False)
                    print(str(j) + ") pii_doc.title: ", title)
                    print("pii_doc.description: ", text)
                    print("pii_doc.url: ", url)
                    pii_doc.write()
                    j += 1
                else:
                    print("Read document failed.")
            except (ValueError, Exception) as e:
                print(str(j) + ") " + str(e))
                append_report(
                    reportName, "Fail reading document " + str(j) + ", pii: " +
                    str(p) + "\n")
                append_report(reportName, "Error message: " + str(e) + "\n")
                err += 1
                j += 1
        append_report(reportName, "Total " + str(n) + " null abstract \n\n")
        ntotal += n

#   output results