Example #1
def get_paper_info(doi=None, url=None):
    # Resolve DOI or URL through PyPub pub_resolve methods
    publisher_base_url, full_url = pub_resolve.get_publisher_urls(doi=doi, url=url)
    pub_dict = pub_resolve.get_publisher_site_info(publisher_base_url)

    # Create a PaperInfo object to hold all information and call appropriate scraper
    paper_info = PaperInfo(doi=doi, scraper_obj=pub_dict['object'], url=full_url)

    return paper_info
Example #2
    def science_direct(self):
        # Sample journal article
        sd_link = 'http://www.sciencedirect.com/science/article/pii/S0006899313013048'

        # Make a PaperInfo object from the live site information
        pi = PaperInfo(url=sd_link, scraper_obj='sciencedirect_selenium')
        pi.publisher_interface = None

        # Write saved version of the PaperInfo object
        file_path_name = os.path.join(self.dirname, 'sd_info.txt')
        with open(file_path_name, 'wb') as file:
            pickle.dump(pi, file)
Example #3
    def springer(self):
        # Sample journal article
        sp_link = 'http://link.springer.com/article/10.1007/s10237-015-0706-9'
        sp_doi = '10.1007/s10237-015-0706-9'

        # Make a PaperInfo object from the live site information
        pi = PaperInfo(url=sp_link, doi=sp_doi, scraper_obj='springer')
        pi.publisher_interface = None

        # Write saved version of the PaperInfo object
        file_path_name = os.path.join(self.dirname, 'sp_info.txt')
        with open(file_path_name, 'wb') as file:
            pickle.dump(pi, file)
Example #4
    def nature_nrg(self):
        # Sample journal article
        nrg_link = 'http://www.nature.com/nrg/journal/v15/n5/full/nrg3686.html'
        nrg_doi = '10.1038/nrg3686'

        # Make a PaperInfo object from the live site information
        pi = PaperInfo(url=nrg_link, doi=nrg_doi, scraper_obj='nature_nrg')
        pi.publisher_interface = None

        # Write saved version of the PaperInfo object
        file_path_name = os.path.join(self.dirname, 'nrg_info.txt')
        with open(file_path_name, 'wb') as file:
            pickle.dump(pi, file)
def doi_to_info(doi=None, url=None):
    Gets entry and references information for an article DOI.

    Uses saved dicts matching DOI prefixes to publishers and web scrapers
    to retrieve information. Will fail if DOI prefix hasn't been saved
    with a publisher link or if a scraper for a specific publisher
    site hasn't been built.

    doi : str
        Unique ID assigned to a journal article.
    url : str
        The CrossRef URL to the article page.
        I.e. http://dx.doi.org/10.######

    paper_info : PaperInfo
        Class containing parameters including the following:

        entry_dict : dict
            Contains information about the paper referenced by the DOI.
            Includes title, authors, affiliations, publish date, journal
            title, volume, and pages, and keywords. Some values are other
            dicts (for example, the author info with affiliation values).
            Formatted to be JSON serializable.

        refs_dicts : list of dicts
            Each list item is a dict corresponding to an individual reference
            from the article's reference list. Includes title, authors,
            publishing date, journal title, volume, and pages (if listed),
            and any external URL links available (i.e. to where it is hosted
            on other sites, or pdf links).

        full_url : str
            URL to the journal article page on publisher's website.

    # Resolve DOI or URL through PyPub pub_resolve methods
    publisher_base_url, full_url = pub_resolve.get_publisher_urls(doi=doi, url=url)
    pub_dict = pub_resolve.get_publisher_site_info(publisher_base_url)

    # Create a PaperInfo object to hold all information and call appropriate scraper
    paper_info = PaperInfo(doi=doi, scraper_obj=pub_dict['object'], url=full_url)

    return paper_info
Example #6
    def taylor_francis(self):
        # NOTE: The current version of the T&F scraper is for a deprecated version
        # of the site. All of the HTML tags need to be changed.
        # Sample journal article
        tf_link = 'http://www.tandfonline.com/doi/full/10.1080/21624054.2016.1184390'
        tf_doi = '10.1080/21624054.2016.1184390'

        # Make a PaperInfo object from the live site information
        pi = PaperInfo(url=tf_link, doi=tf_doi, scraper_obj='taylorfrancis')
        pi.publisher_interface = None

        # Write saved version of the PaperInfo object
        file_path_name = os.path.join(self.dirname, 'tf_info.txt')
        with open(file_path_name, 'wb') as file:
            pickle.dump(pi, file)
Example #7
    def wiley(self):
        # Sample journal article
        wy_link = 'http://onlinelibrary.wiley.com/doi/10.1002/biot.201400046/references'
        wy_doi = '10.1002/biot.201400046'

        # Make a PaperInfo object from the live site information
        # pi.publisher_interface needs to be set to None or else
        # the object could not be saved.
        pi = PaperInfo(url=wy_link, doi=wy_doi, scraper_obj='wiley')
        pi.publisher_interface = None

        # Write saved version of the PaperInfo object
        file_path_name = os.path.join(self.dirname, 'wy_info.txt')
        with open(file_path_name, 'wb') as file:
            pickle.dump(pi, file)
Example #8
    def __init__(self):
        self.curpath = str(os.path.dirname(os.path.abspath(__file__)))
        self.link = 'http://www.sciencedirect.com/science/article/pii/S0006899313013048'
        self.doi = 'S0006899313013048'

        # Make a PaperInfo object from the live site information
            pi = PaperInfo(url=self.link, doi=self.doi, scraper_obj='sciencedirect')
        except Exception:
            self.pi = None
            self.entry_dict = None
            self.pi = pi
            self.entry_dict = self.pi.entry.__dict__

        # Load saved version of the PaperInfo object
        saved_dir = os.path.join(self.curpath, 'saved_info')
        saved_file_path = os.path.join(saved_dir, 'sp_info.txt')
        self.saved_pi = pickle.load(open(saved_file_path, 'rb'))

        # Make the saved versions into dicts
        self.saved_entry_dict = self.saved_pi.entry.__dict__
Example #9
    def __init__(self):
        self.curpath = str(os.path.dirname(os.path.abspath(__file__)))
        self.link = 'http://www.nature.com/nrg/journal/v15/n5/full/nrg3686.html'
        self.doi = '10.1038/nrg3686'

        # Make a PaperInfo object from the live site information
            pi = PaperInfo(url=self.link, doi=self.doi, scraper_obj='nature')
        except Exception:
            self.pi = None
            self.entry_dict = None
            self.pi = pi
            self.entry_dict = self.pi.entry.__dict__

        # Load saved version of the PaperInfo object
        saved_dir = os.path.join(self.curpath, 'saved_info')
        saved_file_path = os.path.join(saved_dir, 'sp_info.txt')
        self.saved_pi = pickle.load(open(saved_file_path, 'rb'))

        # Make the saved versions into dicts
        self.saved_entry_dict = self.saved_pi.entry.__dict__
Example #10
    def __init__(self):
        self.curpath = str(os.path.dirname(os.path.abspath(__file__)))
        self.link = 'http://link.springer.com/article/10.1186/s12984-016-0150-9'
        self.doi = '10.1186/s12984-016-0150-9'

        # Make a PaperInfo object from the live site information
            pi = PaperInfo(url=self.link, doi=self.doi, scraper_obj='Springer')
        except Exception:
            self.pi = None
            self.entry_dict = None
            self.pi = pi
            self.entry_dict = self.pi.entry.__dict__

        # Load saved version of the PaperInfo object
        saved_dir = os.path.join(self.curpath, 'saved_info')
        saved_file_path = os.path.join(saved_dir, 'sp_info.txt')
        self.saved_pi = pickle.load(open(saved_file_path, 'rb'))

        # Make the saved versions into dicts
        self.saved_entry_dict = self.saved_pi.entry.__dict__
Example #11
    def __init__(self):
        self.curpath = str(os.path.dirname(os.path.abspath(__file__)))
        self.link = 'http://onlinelibrary.wiley.com/doi/10.1002/biot.201400046/references'
        self.doi = '10.1002/biot.201400046'

        # Make a PaperInfo object from the live site information
            pi = PaperInfo(url=self.link, doi=self.doi, scraper_obj='wiley')
        except Exception:
            self.pi = None
            self.entry_dict = None
            self.pi = pi
            self.entry_dict = self.pi.entry.__dict__

        # Load saved version of the PaperInfo object
        saved_dir = os.path.join(self.curpath, 'saved_info')
        saved_file_path = os.path.join(saved_dir, 'wy_info.txt')
        self.saved_pi = pickle.load(open(saved_file_path, 'rb'))

        # Make the saved versions into dicts
        self.saved_entry_dict = self.saved_pi.entry.__dict__