def __init__(self, filename, mode="rb"): if mode not in ("rb", "wb"): raise IOError("mode not supported:", mode) self._file = builtin_open(filename, mode) self.filename = filename if mode == "rb": self.header = self._decode_header() self.size = int(round(math.sqrt(self.header.count))) self.index = self._decode_index()
def fetch(bibcode, filename=None): """ Attempt to fetch a PDF file from ADS. If successful, then add it into the database. If the fetch succeeds but the bibcode is not in th database, download file to current folder. Parameters ---------- bibcode: String ADS bibcode of entry to update. filename: String Filename to assign to the PDF file. If None, get from guess_name() funcion. """ replace, arxiv = True, False print('Fetching PDF file from Journal website:') req = request_ads(bibcode, source='journal') if req is None: return if req.status_code != 200: print('Fetching PDF file from ADS website:') req = request_ads(bibcode, source='ads') if req is None: return if req.status_code != 200: print('Fetching PDF file from ArXiv website:') req = request_ads(bibcode, source='arxiv') replace, arxiv = False, True if req is None: return if req.status_code == 200: if bm.find(bibcode=bibcode) is None: if filename is None: filename = f'{bibcode}.pdf' with builtin_open(filename, 'wb') as f: f.write(req.content) print(f"Saved PDF to: '{filename}'.\n" "(Note that BibTex entry is not in the Bibmanager database)") else: set_pdf(bibcode, bin_pdf=req.content, filename=filename, arxiv=arxiv, replace=replace) return print('Could not fetch PDF from any source.')
def set_pdf(bib, pdf=None, bin_pdf=None, filename=None, arxiv=False, replace=False): """ Update the PDF file of the given BibTex entry in database If pdf is not None, move the file into the database pdf folder. Parameters ---------- bibcode: String or Bib() instance Entry to be updated (must exist in the Bibmanager database). If string, the ADS bibcode of key ID of the entry. pdf: String Path to an existing PDF file. Only one of pdf and bin_pdf must be not None. bin_pdf: String PDF content in binary format (e.g., as in req.content). Only one of pdf and bin_pdf must be not None. arxiv: Bool Flag indicating the source of the PDF. If True, filename: String Filename to assign to the PDF file. If None, take name from pdf input argument, or else from guess_name(). replace: Bool Replace without asking if the entry already has a PDF assigned; else, ask the user. """ if isinstance(bib, str): e = bm.find(key=bib) bib = bm.find(bibcode=bib) if e is None else e if bib is None: raise ValueError('BibTex entry is not in Bibmanager database') if (pdf is None) + (bin_pdf is None) != 1: raise ValueError('Exactly one of pdf or bin_pdf must be not None') # Let's have a guess, if needed: guess_filename = guess_name(bib, arxiv=arxiv) if filename is None: filename = os.path.basename(pdf) if pdf is not None else guess_filename if not filename.lower().endswith('.pdf'): raise ValueError('Invalid filename, must have a .pdf extension') if os.path.dirname(filename) != '': raise ValueError('filename must not have a path') if pdf is not None and bib.pdf is not None: pdf_is_not_bib_pdf = os.path.abspath(pdf) != f'{u.BM_PDF()}{bib.pdf}' else: pdf_is_not_bib_pdf = True # PDF files in BM_PDF (except for the entry being fetched): pdf_names = [ file for file in os.listdir(u.BM_PDF()) if os.path.splitext(file)[1].lower() == '.pdf' ] with u.ignored(ValueError): pdf_names.remove(bib.pdf) if pdf == f'{u.BM_PDF()}{filename}': pdf_names.remove(filename) if not replace and bib.pdf is not None and pdf_is_not_bib_pdf: rep = u.req_input( f"Bibtex entry already has a PDF file: '{bib.pdf}' " "Replace?\n[]yes, [n]o.\n", options=['', 'y', 'yes', 'n', 'no']) if rep in ['n', 'no']: return while filename in pdf_names: overwrite = input( f"A filename '{filename}' already exists. Overwrite?\n" f"[]yes, [n]o, or type new file name (e.g., {guess_filename}).\n") if overwrite in ['n', 'no']: return elif overwrite in ['', 'y', 'yes']: break elif overwrite.lower().endswith('.pdf'): filename = overwrite # Delete pre-existing file only if not merely renaming: if pdf is None or pdf_is_not_bib_pdf: with u.ignored(OSError): os.remove(f"{u.BM_PDF()}{bib.pdf}") if pdf is not None: shutil.move(pdf, f"{u.BM_PDF()}{filename}") else: with builtin_open(f"{u.BM_PDF()}{filename}", 'wb') as f: f.write(bin_pdf) print(f"Saved PDF to: '{u.BM_PDF()}{filename}'.") # Update entry and database: bibs = bm.load() index = bibs.index(bib) bib.pdf = filename bibs[index] = bib bm.save(bibs) bm.export(bibs, meta=True)
def fetch(bibcode, filename=None, replace=None): """ Attempt to fetch a PDF file from ADS. If successful, then add it into the database. If the fetch succeeds but the bibcode is not in the database, download file to current folder. Parameters ---------- bibcode: String ADS bibcode of entry to update. filename: String Filename to assign to the PDF file. If None, get from guess_name() function. Replace: Bool If True, enforce replacing a PDF regardless of a pre-existing one. If None (default), only ask when fetched PDF comes from arxiv. Returns ------- filename: String If successful, return the full path of the file name. If not, return None. """ arxiv = False print('Fetching PDF file from Journal website:') req = request_ads(bibcode, source='journal') if req is None: return if req.status_code != 200: print('Fetching PDF file from ADS website:') req = request_ads(bibcode, source='ads') if req is None: return if req.status_code != 200: print('Fetching PDF file from ArXiv website:') req = request_ads(bibcode, source='arxiv') arxiv = True if replace is None: replace = False if req is None: return if replace is None: replace = True if req.status_code == 200: if bm.find(bibcode=bibcode) is None: if filename is None: filename = f'{bibcode}.pdf' with builtin_open(filename, 'wb') as f: f.write(req.content) print(f"Saved PDF to: '{filename}'.\n" "(Note that BibTex entry is not in the Bibmanager database)") else: filename = set_pdf(bibcode, bin_pdf=req.content, filename=filename, arxiv=arxiv, replace=replace) return filename print('Could not fetch PDF from any source.')