def get_all_info(input, verbose=False): """ This function is so that all information can be returned at once. It handles calls to get pdf link, reference information, and entry information. This is generally in order to limit needing to make a requests call three times for each function. For the Wiley implementation, the number of requests calls can't be reduced because the entry and reference information are on separate pages. This function still organizes the return values. Parameters ---------- input verbose Returns ------- """ entry_info = get_entry_info(input, verbose) references = get_references(input, verbose) pdf_link = get_pdf_link(input) entry_dict = convert_to_dict(entry_info) pass
import json import os curpath = str(os.path.dirname(os.path.abspath(__file__))) # Sample journal article link = 'http://onlinelibrary.wiley.com/doi/10.1002/biot.201400046/references' pii = '10.1002/biot.201400046' # Run scraper on live site entry = wy.get_entry_info(link) refs = wy.get_references(pii) # Make scraped entry into a dict entry_dict = convert_to_dict(entry) # Make a list of refs as dict objects refs_dicts = [] for x in range(len(refs)): refs_dicts.append(convert_to_dict(refs[x])) # Load saved version of content and references with open(curpath + '/saved_sites/wy_entry.txt') as fe: saved_entry = fe.read() with open(curpath + '/saved_sites/wy_references.txt') as fr: saved_refs = fr.read() # Make the saved versions into dicts
def populate_info(self): input = self._make_input() self.entry = utils.convert_to_dict(self.publisher_interface.get_entry_info(input)) self.references = utils.refs_to_list(self.publisher_interface.get_references(input)) self.pdf_link = self.publisher_interface.get_pdf_link(input)