def rename_file(pdf, bibitem): """Attempt to rename pdf according to bibitem.""" year = _get_bib_element(bibitem, "year") author = _get_bib_element(bibitem, "author") if author: author = author.split(",")[0].replace(' ', '') title = _get_bib_element(bibitem, "title") # pick one word for the title title = title.strip().split()[0] filename = secure_filename('%s%s%s.pdf' % (author, year[-2:], title)) newfile = pdf.replace(os.path.basename(pdf), filename) print print "Will rename:" print print " %s" % pdf print print "to" print print " %s" % newfile print print "Proceed? [y/N]" answer = raw_input() if answer == 'y': print "Renaming %s to %s" % (pdf, newfile) os.rename(pdf, newfile) else: print "Aborting."
def download(url, usecache=True, cached=None, cachedir='cache~/', cachedonly=False, **opts): """ Download (or cache) ``url`` to file. On success: return file name of stored contents. Upon failure: return None. Will retry ``tries`` times with ``pause`` seconds between each attempt to download. Download will timeout after ``timeout`` seconds. If ``cachedonly`` is enabled, this function will not download anything. It will simply return the cached filename if it exists. """ if not cached: if cachedir: mkdir(cachedir) cached = os.path.join(cachedir, secure_filename(url)) else: assert not usecache, 'must specify cachedir' # only return something for cached files if cachedonly and not os.path.exists(cached): return if usecache and os.path.exists(cached): return cached # use wget for ftp files if url.startswith('ftp'): return wget(url, cached) if url.startswith('http'): return robust_download(url, cached, **opts)
def cache_document(src, dest=None): "Cache a document, return filename of the dest file." # TODO: use a staging area in case something breaks in the middle of adding. src = Path(src) if dest is None: # Find a reasonable filename if dest isn't specified if is_url(src): dest = CACHE / secure_filename(src) else: dest = CACHE / src.basename() else: dest = CACHE / dest if dest.exists(): # TODO: Suggest update methods or renaming the file raise SkidFileExists(dest) if is_url(src): cache_url(src, dest) elif src.exists(): # is this something on disk? src.copy2(dest) print('copy:', src, '->', dest) else: raise SkidError( "cache_document doesn't know what to do with source %r\n" "Trying to add a nonexistent file?" % str(src)) return dest
def cache_document(src, dest=None): "Cache a document, return filename of the dest file." # TODO: use a staging area in case something breaks in the middle of adding. src = path(src) if dest is None: # Find a reasonable filename if dest isn't specified if is_url(src): dest = CACHE / secure_filename(src) else: dest = CACHE / src.basename() else: dest = CACHE / dest if dest.exists(): # TODO: Suggest update methods or renaming the file raise SkidFileExists(dest) if is_url(src): cache_url(src, dest) elif src.exists(): # is this something on disk? src.copy2(dest) print 'copy:', src, '->', dest else: raise SkidError("cache_document doesn't know what to do with source %r\n" "Trying to add a nonexistent file?" % str(src)) return dest
def rename_file(pdf, bibitem): """Attempt to rename pdf according to bibitem.""" year = _get_bib_element(bibitem, "year") author = _get_bib_element(bibitem, "author") if author: author = author.split(",")[0].replace(' ','') title = _get_bib_element(bibitem, "title") # pick one word for the title title = title.strip().split()[0] filename = secure_filename('%s%s%s.pdf' % (author, year[-2:], title)) newfile = pdf.replace(os.path.basename(pdf), filename) print print "Will rename:" print print " %s" % pdf print print "to" print print " %s" % newfile print print "Proceed? [y/N]" answer = raw_input() if answer == 'y': print "Renaming %s to %s" % (pdf, newfile) os.rename(pdf, newfile) else: print "Aborting."
def cache_url(url): """ Download url, write contents to file. Return filename of contents, None on failure to download. """ cached = CACHE / secure_filename(url) assert not cached.exists(), 'File %s already exists.' % cached # TODO: we should tell download where to store stuff explicitly... right now # we just both have the same convention. if not download(url, timeout=60, usecache=False, cached=cached): raise Exception('Failed to download %s.' % url) return cached