def openfile(*paths, **kwargs): ''' Returns a read-only file-like object even if the path terminates inside a container file. If the path is a regular os accessible path mode may be passed through as a keyword argument. If the path terminates in a container file, mode is ignored. If req=True (Default) NoSuchPath will also be raised if the path exists, but is a directory Example: fd = openfile('/foo/bar/baz.egg/path/inside/zip/to/file') if fd == None: return fbuf = fd.read() fd.close() ''' reqd = kwargs.get('req', True) mode = kwargs.get('mode', 'r') fpath = parsePath(*paths) paths = [p for p in paths if p] if not fpath: if reqd: raise s_common.NoSuchPath(path='/'.join(paths)) return None if not fpath.isfile(): if reqd: fpath.close() raise s_common.NoSuchPath(path=fpath.path()) return None return FpOpener(fpath)
def openfiles(*paths, **kwargs): ''' Yields a read-only file-like object for each path even if the path terminates inside a container file. Paths may use python's fnmatch glob matching If the path is a regular os accessible path mode may be passed through as a keyword argument. If the path terminates in a container file, mode is ignored. If req=True (Default) NoSuchPath will also be raised if ANY matching path exists, but is a directory Example: for fd in openfiles('/foo/bar/*.egg/dir0/zz*/nest.zip'): fbuf = fd.read() ''' reqd = kwargs.get('req', False) mode = kwargs.get('mode', 'r') nopaths = True for fpath in parsePaths(*paths): nopaths = False if not fpath.isfile(): if not reqd: continue fpath.close() raise s_common.NoSuchPath(path=fpath.path()) yield FpOpener(fpath) if nopaths and reqd: raise s_common.NoSuchPath(path='/'.join(paths))
def next(self): ''' This is the workhorse method for path specific processing of container children. The object should consume as much as possible of the path before instantiating a new object At a minimum, each container should override: innrOpen(path) innrEnum(path) ''' # get longest consistent path partlen = len(self.pparts) # the end of the line if self.idx == partlen - 1: return # since it's a container we only care about the "inner path" maxpath = None checkpath = None cidx = self.idx + 1 while cidx < partlen: checkpath = normpath(*self.pparts[self.idx + 1:cidx + 1]) if not self.innrExists(checkpath): break maxpath = checkpath cidx += 1 if maxpath is None: self.close() raise s_common.NoSuchPath(path=os.path.join(*self.pparts)) self.maxidx = cidx # if the max path is a dir, we're finished if self.innrIsdir(maxpath): self._isfile = False if self.maxidx != partlen: self.close() raise s_common.NoSuchPath(path=os.path.join(*self.pparts)) # if end of the path we're finished if self.maxidx == partlen: return # supported file tempfd = self.innrTmpExtract(maxpath) cls = _fdClass(tempfd) if not cls: return tempfd.seek(0) return cls(self.pparts, self.maxidx - 1, parent=self, fd=tempfd)
def _pathClass(*paths): ''' Returns the class to handle the type of item located at path. This function only operates on regular os.accessible paths ''' path = s_common.genpath(*paths) if not os.path.exists(path): raise s_common.NoSuchPath(path=path) if os.path.isdir(path): return path_ctors.get('fs.reg.file') with open(path, 'rb') as fd: mime = _mimeFile(fd) return path_ctors.get(mime)
def next(self): ''' This is the workhorse method that can contain path specific processing of children. The object should consume as much as possible of the path before creating the child class NOTE: Override for container formats ''' # get longest consistent path partlen = len(self.pparts) # the end of the line if self.idx == partlen - 1: if os.path.isdir(s_common.genpath(*self.pparts[:self.idx + 1])): self._isfile = False return maxpath = None checkpath = s_common.genpath(*self.pparts[:self.idx + 1]) cidx = self.idx + 1 while cidx < partlen: checkpath = s_common.genpath(*self.pparts[:cidx + 1]) if not os.path.exists(checkpath): break maxpath = checkpath cidx += 1 self.maxidx = cidx # if the max path is a dir, we're finished if os.path.isdir(maxpath): self._isfile = False if self.maxidx != partlen: self.close() raise s_common.NoSuchPath(path=os.path.join(*self.pparts)) # if end of the path we're finished if self.maxidx == partlen: return # supported container file cls = _pathClass(*self.pparts[:self.maxidx]) return cls(self.pparts, self.maxidx - 1, parent=self)
def addPyPath(self, path, name=None, datfiles=False): ''' Add a path full of python code to the mind meld archive. If a directory is specififed, it is treated as a package. Example: meld.addPyPath('/home/visi/foobar/') meld.addPyPath('/home/visi/grok.py') Notes: * specify datfiles=True to pick up all binary files and allow access via synapse.datfile API. ''' if os.path.isfile(path): if name is None: name = os.path.basename(path).rsplit('.', 1)[0] with open(path, 'rb') as fd: sorc = fd.read() self.addPySource(name, sorc) return if os.path.isdir(path): pkgname = os.path.basename(path) todo = collections.deque([(path, pkgname)]) while todo: path, pkgname = todo.popleft() pkgfile = os.path.join(path, '__init__.py') if not os.path.isfile(pkgfile): continue self.addPyPath(pkgfile, name=pkgname) for subname in os.listdir(path): if subname in ('.', '..', '__init__.py', '__pycache__'): continue if subname.startswith('.'): continue subpath = os.path.join(path, subname) if os.path.isdir(subpath): todo.append((subpath, '%s.%s' % (pkgname, subname))) continue if not os.path.isfile(subpath): continue # handle basic python module first... if subname.endswith('.py'): modname = subname.rsplit('.', 1)[0] modpath = '%s.%s' % (pkgname, modname) self.addPyPath(subpath, name=modpath) continue # always skip pyc files for now... if subname.endswith('.pyc'): continue # should we allow datfiles? if not datfiles: continue # save up binary data into the meld info with open(subpath, 'rb') as fd: datpath = '%s/%s' % (pkgname, subname) self.info['datfiles'][datpath] = fd.read() return raise s_common.NoSuchPath(path=path)