Example #1
0
def openfile(*paths, **kwargs):
    '''
    Returns a read-only file-like object even if the path terminates inside a container file.

    If the path is a regular os accessible path mode may be passed through as a keyword argument.
    If the path terminates in a container file, mode is ignored.

    If req=True (Default) NoSuchPath will also be raised if the path exists, but is a directory

    Example:
        fd = openfile('/foo/bar/baz.egg/path/inside/zip/to/file')
        if fd == None:
            return
        fbuf = fd.read()
        fd.close()
    '''
    reqd = kwargs.get('req', True)
    mode = kwargs.get('mode', 'r')
    fpath = parsePath(*paths)
    paths = [p for p in paths if p]

    if not fpath:
        if reqd:
            raise s_common.NoSuchPath(path='/'.join(paths))
        return None
    if not fpath.isfile():
        if reqd:
            fpath.close()
            raise s_common.NoSuchPath(path=fpath.path())
        return None

    return FpOpener(fpath)
Example #2
0
def openfiles(*paths, **kwargs):
    '''
    Yields a read-only file-like object for each path even if the path terminates inside a container file.
    Paths may use python's fnmatch glob matching

    If the path is a regular os accessible path mode may be passed through as a keyword argument.
    If the path terminates in a container file, mode is ignored.

    If req=True (Default) NoSuchPath will also be raised if ANY matching path exists, but is a directory

    Example:
        for fd in openfiles('/foo/bar/*.egg/dir0/zz*/nest.zip'):
            fbuf = fd.read()
    '''
    reqd = kwargs.get('req', False)
    mode = kwargs.get('mode', 'r')

    nopaths = True
    for fpath in parsePaths(*paths):
        nopaths = False
        if not fpath.isfile():
            if not reqd:
                continue
            fpath.close()
            raise s_common.NoSuchPath(path=fpath.path())
        yield FpOpener(fpath)
    if nopaths and reqd:
        raise s_common.NoSuchPath(path='/'.join(paths))
Example #3
0
    def next(self):
        '''
        This is the workhorse method for path specific processing of container children.
        The object should consume as much as possible of the path before instantiating a new
        object

        At a minimum, each container should override:
        innrOpen(path)
        innrEnum(path)
        '''

        # get longest consistent path
        partlen = len(self.pparts)

        # the end of the line
        if self.idx == partlen - 1:
            return

        # since it's a container we only care about the "inner path"
        maxpath = None
        checkpath = None
        cidx = self.idx + 1

        while cidx < partlen:
            checkpath = normpath(*self.pparts[self.idx + 1:cidx + 1])
            if not self.innrExists(checkpath):
                break
            maxpath = checkpath
            cidx += 1

        if maxpath is None:
            self.close()
            raise s_common.NoSuchPath(path=os.path.join(*self.pparts))
        self.maxidx = cidx

        # if the max path is a dir, we're finished
        if self.innrIsdir(maxpath):
            self._isfile = False

            if self.maxidx != partlen:
                self.close()
                raise s_common.NoSuchPath(path=os.path.join(*self.pparts))

        # if end of the path we're finished
        if self.maxidx == partlen:
            return

        # supported file
        tempfd = self.innrTmpExtract(maxpath)

        cls = _fdClass(tempfd)
        if not cls:
            return
        tempfd.seek(0)

        return cls(self.pparts, self.maxidx - 1, parent=self, fd=tempfd)
Example #4
0
def _pathClass(*paths):
    '''
    Returns the class to handle the type of item located at path.  This function
    only operates on regular os.accessible paths
    '''
    path = s_common.genpath(*paths)
    if not os.path.exists(path):
        raise s_common.NoSuchPath(path=path)

    if os.path.isdir(path):
        return path_ctors.get('fs.reg.file')
    with open(path, 'rb') as fd:
        mime = _mimeFile(fd)
    return path_ctors.get(mime)
Example #5
0
    def next(self):
        '''
        This is the workhorse method that can contain path specific processing of children.
        The object should consume as much as possible of the path before creating the
        child class

        NOTE: Override for container formats
        '''

        # get longest consistent path
        partlen = len(self.pparts)
        # the end of the line
        if self.idx == partlen - 1:
            if os.path.isdir(s_common.genpath(*self.pparts[:self.idx + 1])):
                self._isfile = False
            return

        maxpath = None
        checkpath = s_common.genpath(*self.pparts[:self.idx + 1])
        cidx = self.idx + 1

        while cidx < partlen:
            checkpath = s_common.genpath(*self.pparts[:cidx + 1])
            if not os.path.exists(checkpath):
                break
            maxpath = checkpath
            cidx += 1

        self.maxidx = cidx

        # if the max path is a dir, we're finished
        if os.path.isdir(maxpath):
            self._isfile = False

            if self.maxidx != partlen:
                self.close()
                raise s_common.NoSuchPath(path=os.path.join(*self.pparts))

        # if end of the path we're finished
        if self.maxidx == partlen:
            return

        # supported container file
        cls = _pathClass(*self.pparts[:self.maxidx])
        return cls(self.pparts, self.maxidx - 1, parent=self)
Example #6
0
    def addPyPath(self, path, name=None, datfiles=False):
        '''
        Add a path full of python code to the mind meld archive.
        If a directory is specififed, it is treated as a package.

        Example:

            meld.addPyPath('/home/visi/foobar/')
            meld.addPyPath('/home/visi/grok.py')

        Notes:

            * specify datfiles=True to pick up all binary files
              and allow access via synapse.datfile API.

        '''
        if os.path.isfile(path):
            if name is None:
                name = os.path.basename(path).rsplit('.', 1)[0]

            with open(path, 'rb') as fd:
                sorc = fd.read()

            self.addPySource(name, sorc)
            return

        if os.path.isdir(path):
            pkgname = os.path.basename(path)

            todo = collections.deque([(path, pkgname)])
            while todo:

                path, pkgname = todo.popleft()
                pkgfile = os.path.join(path, '__init__.py')
                if not os.path.isfile(pkgfile):
                    continue

                self.addPyPath(pkgfile, name=pkgname)
                for subname in os.listdir(path):
                    if subname in ('.', '..', '__init__.py', '__pycache__'):
                        continue

                    if subname.startswith('.'):
                        continue

                    subpath = os.path.join(path, subname)
                    if os.path.isdir(subpath):
                        todo.append((subpath, '%s.%s' % (pkgname, subname)))
                        continue

                    if not os.path.isfile(subpath):
                        continue

                    # handle basic python module first...
                    if subname.endswith('.py'):
                        modname = subname.rsplit('.', 1)[0]
                        modpath = '%s.%s' % (pkgname, modname)
                        self.addPyPath(subpath, name=modpath)
                        continue

                    # always skip pyc files for now...
                    if subname.endswith('.pyc'):
                        continue

                    # should we allow datfiles?
                    if not datfiles:
                        continue

                    # save up binary data into the meld info
                    with open(subpath, 'rb') as fd:
                        datpath = '%s/%s' % (pkgname, subname)
                        self.info['datfiles'][datpath] = fd.read()

            return

        raise s_common.NoSuchPath(path=path)