Example #1
0
class Tester(TestCase):
  def setUp(self):
    self.downloader = ArticleDownloader(environ.get('ELS_API_KEY'))
    self.doi = '10.1016/j.nantod.2008.10.014'
    self.pdf_file = TemporaryFile(mode='wb')

    self.txt_file = TemporaryFile(mode='rb+')
    self.txt_file.write('10.1016/j.nantod.2008.10.014')

    self.csv_file = TemporaryFile(mode='rb+')
    self.csv_file.write('nanomaterial+synthesis,')
    self.csv_file.write('battery+electrode,')

  def test_download(self):
    #Single download test
    self.downloader.get_pdf_from_doi(self.doi, self.pdf_file, 'elsevier')
    self.downloader.get_pdf_from_doi(self.doi, self.pdf_file, 'crossref')
    self.downloader.get_pdf_from_doi(self.doi, self.pdf_file, 'rsc')

  def test_abstract_download(self):
    self.downloader.get_abstract_from_doi(self.doi, 'elsevier')

  def test_entitlement(self):
    #Test entitlement - want to check this works, but pass/fail depends on IP addr
    self.assertTrue(self.downloader.check_els_entitlement(self.doi) in [True, False])

  def test_search(self):
    #Search test
    queries = self.downloader.load_queries_from_csv(self.csv_file)
    for query in queries:
      self.downloader.get_dois_from_search(query, rows=1000)

  def tearDown(self):
    pass
Example #2
0
    def setUp(self):
        self.downloader = ArticleDownloader(environ.get('ELS_API_KEY'))
        self.doi = '10.1016/j.nantod.2008.10.014'
        self.pdf_file = TemporaryFile(mode='wb')

        self.txt_file = TemporaryFile(mode='rb+')
        self.txt_file.write('10.1016/j.nantod.2008.10.014')

        self.csv_file = TemporaryFile(mode='rb+')
        self.csv_file.write('nanomaterial+synthesis,')
        self.csv_file.write('battery+electrode,')
Example #3
0
class Tester(TestCase):
    def setUp(self):
        self.downloader = ArticleDownloader(environ.get('ELS_API_KEY'))
        self.doi = '10.1016/j.nantod.2008.10.014'
        self.pdf_file = TemporaryFile(mode='wb')

        self.txt_file = TemporaryFile(mode='rb+')
        self.txt_file.write('10.1016/j.nantod.2008.10.014')

        self.csv_file = TemporaryFile(mode='rb+')
        self.csv_file.write('nanomaterial+synthesis,')
        self.csv_file.write('battery+electrode,')

    def test_download(self):
        #Single download test
        self.downloader.get_pdf_from_doi(self.doi, self.pdf_file, 'elsevier')

    def test_abstract_download(self):
        self.downloader.get_abstract_from_doi(self.doi, 'elsevier')

    def test_search(self):
        #Search test
        queries = self.downloader.load_queries_from_csv(self.csv_file)
        for query in queries:
            self.downloader.get_dois_from_search(query, rows=10)

    def tearDown(self):
        pass
Example #4
0
class Tester(TestCase):
  def setUp(self):
    self.downloader = ArticleDownloader(environ.get('ELS_API_KEY'))
    self.doi = '10.1016/j.nantod.2008.10.014'
    self.pdf_file = TemporaryFile(mode='wb')

    self.txt_file = TemporaryFile(mode='rb+')
    self.txt_file.write('10.1016/j.nantod.2008.10.014')

    self.csv_file = TemporaryFile(mode='rb+')
    self.csv_file.write('nanomaterial+synthesis,')
    self.csv_file.write('battery+electrode,')

  def test_download(self):
    #Single download test
    self.downloader.get_pdf_from_doi(self.doi, self.pdf_file, 'elsevier')

  def test_abstract_download(self):
    self.downloader.get_abstract_from_doi(self.doi, 'elsevier')

  def test_search(self):
    #Search test
    queries = self.downloader.load_queries_from_csv(self.csv_file)
    for query in queries:
      self.downloader.get_dois_from_search(query, rows=10)

  def tearDown(self):
    pass
Example #5
0
  def setUp(self):
    self.downloader = ArticleDownloader('NO_API_KEY')
    self.doi = '10.1016/j.nantod.2008.10.014'
    self.pdf_file = TemporaryFile(mode='wb')

    self.txt_file = TemporaryFile(mode='rb+')
    self.txt_file.write('10.1016/j.nantod.2008.10.014')

    self.csv_file = TemporaryFile(mode='rb+')
    self.csv_file.write('nanomaterial+synthesis,')
    self.csv_file.write('battery+electrode,')
Example #6
0
class Tester(TestCase):
  def setUp(self):
    self.downloader = ArticleDownloader('NO_API_KEY')
    self.doi = '10.1016/j.nantod.2008.10.014'
    self.pdf_file = TemporaryFile(mode='wb')

    self.txt_file = TemporaryFile(mode='rb+')
    self.txt_file.write('10.1016/j.nantod.2008.10.014')

    self.csv_file = TemporaryFile(mode='rb+')
    self.csv_file.write('nanomaterial+synthesis,')
    self.csv_file.write('battery+electrode,')

  def test_download(self):
    #Single download test
    self.downloader.get_pdf_from_doi(self.doi, self.pdf_file, 'elsevier')
    self.downloader.get_pdf_from_doi(self.doi, self.pdf_file, 'crossref')

  def test_entitlement(self):
    #Test entitlement
    self.assertFalse(self.downloader.check_els_entitlement(self.doi))

  def test_search(self):
    #Search test
    queries = self.downloader.load_queries_from_csv(self.csv_file)
    for query in queries:
      self.downloader.get_dois_from_search(query, rows=1200)

  def tearDown(self):
    pass
Example #7
0
class Tester(TestCase):
    def setUp(self):
        self.downloader = ArticleDownloader('NO_API_KEY')
        self.doi = '10.1016/j.nantod.2008.10.014'
        self.pdf_file = TemporaryFile(mode='wb')

        self.txt_file = TemporaryFile(mode='rb+')
        self.txt_file.write('10.1016/j.nantod.2008.10.014')

        self.csv_file = TemporaryFile(mode='rb+')
        self.csv_file.write('nanomaterial+synthesis,')
        self.csv_file.write('battery+electrode,')

    def test_download(self):
        #Single download test
        self.downloader.get_pdf_from_doi(self.doi, self.pdf_file, 'elsevier')
        self.downloader.get_pdf_from_doi(self.doi, self.pdf_file, 'crossref')

    def test_entitlement(self):
        #Test entitlement
        self.assertFalse(self.downloader.check_els_entitlement(self.doi))

    def test_search(self):
        #Search test
        queries = self.downloader.load_queries_from_csv(self.csv_file)
        for query in queries:
            self.downloader.get_dois_from_search(query, rows=1200)

    def tearDown(self):
        pass
class DownloadManager:
    ad = ArticleDownloader(environ.get('ELS_API_KEY'),
                           environ.get('CRF_API_KEY'))
    connection = MongoClient()
    dl_doi_pdf_map = {}
    doi_fails = []
    dl_dois = []
    rows_per_query = 0

    def __init__(self, db):
        self.db = db

    def set_dois_per_query(self, num_docs):
        self.rows_per_query = int(num_docs)

    def get_dois(self, queries, mode, wait_time=0):
        if mode == 'mp':
            self.__logger.info('Searching with MP queries')
            mpr = MPRester(environ.get('MAPI_KEY'),
                           endpoint="https://www.materialsproject.org/rest")
            mpids = []

            for query in queries:
                try:
                    entries = mpr.get_entries(query)
                    for entry in entries:
                        mpids.extend(entry.data['task_ids'])
                    for mpid in mpids:
                        mpid = mpr.get_materials_id_from_task_id(
                            mpid)['materials_id']
                        bibtex = mpr.get_materials_id_references(mpid)
                        parsed_bibtex = bibtexparser.loads(bibtex)
                        for item in parsed_bibtex.entries:
                            if 'doi' in item:
                                if item['doi'] not in self.dl_dois:
                                    self.dl_dois.append(item['doi'])
                except:
                    self.__logger.warning(
                        'FAILURE: Failed to get DOIs from MP:' + str(query))
        elif mode == 'cr':
            self.__logger.info('Searching with CR queries')

            for query in queries:
                dl_dois = []
                try:
                    dl_dois = self.ad.get_dois_from_search(
                        query, rows=self.rows_per_query)
                except Exception, e:
                    self.__logger.warning(
                        'FAILURE: Failed to get DOIs from CR: ' + str(query))
                    self.__logger.warning('EXCEPTION: ' + str(e))

                sleep(wait_time)
                self.dl_dois.extend(dl_dois)
        elif mode == 'issn':
            self.__logger.info('Searching with ISSN queries')

            for query in queries:
                dl_dois = []
                try:
                    dl_dois = self.ad.get_dois_from_journal_issn(
                        query, rows=self.rows_per_query, pub_after=1900)
                except Exception, e:
                    self.__logger.warning(
                        'FAILURE: Failed to get DOIs from CR by ISSN: ' +
                        str(query))
                    self.__logger.warning('EXCEPTION: ' + str(e))

                sleep(wait_time)
                self.dl_dois.extend(dl_dois)
Example #9
0
                                for sec in c.iter():
                                    if sec.tag.find('section-title') != -1:
                                        #print(sec.text)
                                        rawtext += sec.text + '\n'
                                    if sec.tag.find('para') != -1:
                                        #print(sec.text)
                                        rawtext += sec.text + '\n'
                    #print(rawtext)
                    return rawtext


restclient.add_resource(resource_name='thesis')
#text = findText('./elsevier/A quick method for the simultaneous determination of ascorbic acid and sorbic acid in fruit juices by capillary zone el.xml')
#sys.exit(0)
try:
    downloader = ArticleDownloader(
        els_api_key='e88e30b8118b3ed42ca752c0d6b59686')
    #https://api.elsevier.com/content/search/sciencedirect?query=nutrition&APIKey=e88e30b8118b3ed42ca752c0d6b59686
    #dois = downloader.get_dois_from_journal_issn('1476-4686', rows=500, pub_after=2000)
    filetype = 'xml'
    #78 is for elsevier
    records = downloader.get_dict_from_search(
        'ascorbic acid+extraction+fruit&filter=member:78', 3000)
    for i, record in enumerate(records):
        print(i)
        cur_title = re.sub('[\[\]\'\.\/]', '', str(record['title']))
        replaced_doi = re.sub('[\[\]\'\.\/()]', '', str(record['doi']))

        print(replaced_doi)
        cur_filename = './elsevier/' + replaced_doi + '.' + filetype
        try:
            my_file = open(cur_filename, 'wb')  # Need to use 'wb' on Windows
Example #10
0
import bibtexparser
import requests
import os
from articledownloader.articledownloader import ArticleDownloader
import time
from sys import platform

downloader = ArticleDownloader(els_api_key='11acc1dbb49e1a44d49d46d48469a2f7')

if platform == "linux" or platform == "linux2":
    cpdf = './cpdf-binaries/Linux-Intel-64bit/cpdf'
elif platform == "darwin":
    cpdf = './cpdf-binaries/OSX-Intel/cpdf'
elif platform == "win32":
    cpdf = './cpdf-binaries/Windows32bit/cpdf.exe'

aux_lines = []
with open('publist_biobib.aux') as aux_file:
    aux_lines = aux_file.readlines()

with open('bib_publications.bib') as bibtex_file:
    bib_database = bibtexparser.load(bibtex_file)

article_dir = 'Research_Articles'
os.makedirs(article_dir, exist_ok=True)

for l in bib_database.entries:
    print('Checking %s' % l['ID'])
    if 'doi' in l and 'keywords' in l and 'recent' in l['keywords']:
        name = ''
        for line in aux_lines: