def get(self, ifile): self.bsdata = ifile if isinstance(ifile, FileInfo): self.bsdata = BeautifulSoup(open(ifile.path).read(), features="lxml") elif isinstance(ifile, (PosixPath, Path)): self.bsdata = BeautifulSoup(open(ifile).read(), features="lxml") elif isinstance(ifile, str): finfo = FileInfo(ifile) if finfo.isFile(): if finfo.ext == ".p": self.bsdata = BeautifulSoup(PickleIO().get(ifile), features="lxml") elif finfo.ext in [".html", ".htm"]: self.bsdata = BeautifulSoup(open(ifile).read(), features="lxml") else: raise ValueError( "File extention [{0}] is not recognized".format( finfo.ext)) else: self.bsdata = BeautifulSoup(StringIO(ifile), features="lxml") elif isinstance(ifile, bytes): self.bsdata = BeautifulSoup(ifile, features="lxml") else: raise ValueError( "Can't create BeautifulSoup() from [{0}] input".format( type(ifile))) return self.bsdata
def get(self, ifile, **kwargs): finfo = FileInfo(ifile) extIO = self.getIO(finfo) if extIO is None: raise ValueError("Unknown file extension: [{0}] [{1}]".format( ifile, finfo.ext)) if finfo.exists(): retval = extIO.get(finfo.str, **kwargs) if self.debug: tsGet.stop() return retval else: print("[{0}] does not exist. Returning None".format(ifile)) return None
def __init__(self, ifile, now=None, lastModified=None): self.lastModified = lastModified self.err = False self.now = Timestamp.today().round('s') if now is None else now ########################################################################### # Find Mod Times ########################################################################### #modTime = datetime.fromtimestamp(getmtime(ifile)) modTime = FileInfo(ifile).time() modTime = Timestamp(modTime) if modTime is not None else None ########################################################################### # Compute Time Deltas ########################################################################### if modTime is None: deltaLastMod = None deltaNow = None else: deltaLastMod = self.lastModified - modTime if self.lastModified is not None else None deltaNow = self.now - modTime ########################################################################### # Set Variables ########################################################################### self.modTime = modTime self.deltaLastMod = Timedelta(deltaLastMod).round( 's') if deltaLastMod is not None else None self.deltaNow = Timedelta(deltaNow).round( 's') if deltaNow is not None else None
def getFilesByRecency(self, expr, debug=False): self.setFileTimeDelta(expr) self.setFileModTimes() if debug: self.ftd.show() N = len(self.fileModTimes) for i, (ifile, fmtd) in enumerate(self.fileModTimes.items()): print("{0}/{1}\t{2: <60}{3} {4} {5}".format( i, N, FileInfo(ifile).basename, fmtd.modTime, fmtd.getDeltaNow(), self.ftd.isTrue(fmtd.getDeltaNow()))) retval = [ ifile for ifile, fmtd in self.fileModTimes.items() if self.ftd.isTrue(fmtd.getDeltaNow()) ] return retval
def save(self, ifile, idata): finfo = FileInfo(ifile) extIO = self.getIO(finfo) extIO.save(finfo.str, idata)
def setLastModTimeFromFile(self, ifile): self.lastModified = FileInfo(ifile).time() self.lastModified = Timestamp( self.lastModified) if self.lastModified is not None else None