예제 #1
0
def default_docs(func):
    """
    A decorator which automatically takes care of default parameter
    documentation for common pipeline factory parameters
    """
    docs = func.__doc__
    new_docs = ""
    signature = funcsigs.signature(func)

    try:
        buf = StringIO(docs)
        line = True
        while line:
            line = buf.readline()

            if "Parameters" in line:
                artificial_param_docs = [line, buf.readline()]
                # Include the `-----` line
                for param in signature.parameters.keys():
                    doc = _PARAMETER_MAPPING.get(param, None)
                    if doc:
                        if not doc.endswith("\n"):
                            doc += "\n"
                        if doc.startswith("\n"):
                            doc = doc[1:]
                        artificial_param_docs.append(doc)
                new_docs += "".join(artificial_param_docs)
                continue
            new_docs = "".join([new_docs, line])
        func.__doc__ = new_docs
    except Exception as ex:
        func.__doc__ = str(ex)
    return func
예제 #2
0
class _FileWrapper(object):
    def set_file(self, filename):
        self.f = open(filename, 'r')

    def set_stdin(self):
        self.f = sys.stdin

    def set_text(self, text):
        from six.moves import cStringIO as StringIO
        self.f = StringIO(text)

    def readline(self):
        return self.f.readline()

    def close(self):
        self.f.close()
class _FileWrapper(object):            
    def set_file(self, filename):
        self.f = open(filename, 'r')
        
    def set_stdin(self):
        self.f = sys.stdin
        
    def set_text(self, text):
        from six.moves import cStringIO as StringIO
        self.f = StringIO(text)

    def readline(self):
        return self.f.readline()

    def close(self):
        self.f.close()
예제 #4
0
class FileCache( Iterator ):
    """
    Wrapper for a file that cache blocks of data in memory. 
    
    **NOTE:** this is currently an incomplete file-like object, it only
    supports seek, tell, and readline (plus iteration). Reading bytes is
    currently not implemented.
    """
    def __init__( self, file, size, cache_size=DEFAULT_CACHE_SIZE, 
                                    block_size=DEFAULT_BLOCK_SIZE ):
        """
        Create a new `FileCache` wrapping the file-like object `file` that
        has total size `size` and caching blocks of size `block_size`.
        """
        self.file = file
        self.size = size
        self.cache_size = cache_size
        self.block_size = block_size
        # Setup the cache
        self.nblocks = ( self.size // self.block_size ) + 1
        self.cache = LRUCache( self.cache_size )
        # Position in file
        self.dirty = True
        self.at_eof = False
        self.file_pos = 0
        self.current_block_index = -1
        self.current_block = None
    def fix_dirty( self ):
        chunk, offset = self.get_block_and_offset( self.file_pos )
        if self.current_block_index != chunk:
            self.current_block = StringIO( self.load_block( chunk ) )
            self.current_block.read( offset )
            self.current_block_index = chunk
        else:
            self.current_block.seek( offset )
        self.dirty = False
    def get_block_and_offset( self, index ):
        return int( index // self.block_size ), int( index % self.block_size )
    def load_block( self, index ):
        if index in self.cache:
            return self.cache[index]
        else:
            real_offset = index * self.block_size
            self.file.seek( real_offset )
            block = self.file.read( self.block_size )
            self.cache[index] = block
            return block
    def seek( self, offset, whence=0 ):
        """
        Move the file pointer to a particular offset.
        """
        # Determine absolute target position
        if whence == 0:
            target_pos = offset
        elif whence == 1:
            target_pos = self.file_pos + offset
        elif whence == 2:
            target_pos = self.size - offset
        else:
            raise Exception( "Invalid `whence` argument: %r", whence )
        # Check if this is a noop
        if target_pos == self.file_pos:
            return    
        # Verify it is valid
        assert 0 <= target_pos < self.size, "Attempt to seek outside file"
        # Move the position
        self.file_pos = target_pos
        # Mark as dirty, the next time a read is done we need to actually
        # move the position in the bzip2 file
        self.dirty = True
    def readline( self ):
        if self.dirty:
            self.fix_dirty()
        if self.at_eof:
            return ""
        rval = []
        while 1:
            line = self.current_block.readline()
            rval.append( line )
            if len( line ) > 0 and line[-1] == '\n':
                break
            elif self.current_block_index == self.nblocks - 1:
                self.at_eof = True
                break
            else:
                self.current_block_index += 1
                self.current_block = StringIO( self.load_block( self.current_block_index ) )      
        return "".join( rval )     
    def __next__( self ):
        line = self.readline()
        if line == "":
            raise StopIteration
    def __iter__( self ):
        return self
    def close( self ):
        self.file.close()
예제 #5
0
class SeekableLzopFile(Iterator):
    """
    Filelike object supporting read-only semi-random access to bz2 compressed
    files for which an offset table (bz2t) has been generated by `bzip-table`.
    """
    def __init__(self, filename, table_filename, block_cache_size=0, **kwargs):
        self.filename = filename
        self.table_filename = table_filename
        self.init_table()
        self.file = open(self.filename, "r")
        self.dirty = True
        self.at_eof = False
        self.file_pos = 0
        self.current_block_index = -1
        self.current_block = None
        if block_cache_size > 0:
            self.cache = lrucache.LRUCache(block_cache_size)
        else:
            self.cache = None

    def init_table(self):
        self.block_size = None
        self.block_info = []
        # Position of corresponding block in compressed file (in bytes)
        for line in open(self.table_filename):
            fields = line.split()
            if fields[0] == "s":
                self.block_size = int(fields[1])
            if fields[0] == "o":
                offset = int(fields[1])
                compressed_size = int(fields[2])
                size = int(fields[3])
                self.block_info.append((offset, compressed_size, size))
        self.nblocks = len(self.block_info)

    def close(self):
        self.file.close()

    def load_block(self, index):
        if self.cache is not None and index in self.cache:
            return self.cache[index]
        else:
            offset, csize, size = self.block_info[index]
            # Get the block of compressed data
            self.file.seek(offset)
            data = self.file.read(csize)
            # Need to prepend a header for python-lzo module (silly)
            data = ''.join(('\xf0', struct.pack("!I", size), data))
            value = lzo.decompress(data)
            if self.cache is not None:
                self.cache[index] = value
            return value

    def fix_dirty(self):
        chunk, offset = self.get_block_and_offset(self.file_pos)
        if self.current_block_index != chunk:
            self.current_block = StringIO(self.load_block(chunk))
            self.current_block.read(offset)
            self.current_block_index = chunk
        else:
            self.current_block.seek(offset)
        self.dirty = False

    def get_block_and_offset(self, index):
        return int(index // self.block_size), int(index % self.block_size)

    def seek(self, offset, whence=0):
        """
        Move the file pointer to a particular offset.
        """
        # Determine absolute target position
        if whence == 0:
            target_pos = offset
        elif whence == 1:
            target_pos = self.file_pos + offset
        elif whence == 2:
            raise Exception("seek from end not supported")
            ## target_pos = self.size - offset
        else:
            raise Exception("Invalid `whence` argument: %r", whence)
        # Check if this is a noop
        if target_pos == self.file_pos:
            return
        # Verify it is valid
        ## assert 0 <= target_pos < self.size, "Attempt to seek outside file"
        # Move the position
        self.file_pos = target_pos
        # Mark as dirty, the next time a read is done we need to actually
        # move the position in the bzip2 file
        self.dirty = True

    def tell(self):
        return self.file_pos

    def readline(self):
        if self.dirty:
            self.fix_dirty()
        if self.at_eof:
            return ""
        rval = []
        while 1:
            line = self.current_block.readline()
            self.file_pos += len(line)
            rval.append(line)
            if len(line) > 0 and line[-1] == '\n':
                break
            elif self.current_block_index == self.nblocks - 1:
                self.at_eof = True
                break
            else:
                self.current_block_index += 1
                self.current_block = StringIO(
                    self.load_block(self.current_block_index))
        return "".join(rval)

    def __next__(self):
        line = self.readline()
        if line == "":
            raise StopIteration

    def __iter__(self):
        return self
예제 #6
0
class SeekableLzopFile( Iterator ):
    """
    Filelike object supporting read-only semi-random access to bz2 compressed
    files for which an offset table (bz2t) has been generated by `bzip-table`.
    """
    
    def __init__( self, filename, table_filename, block_cache_size=0, **kwargs ):
        self.filename = filename
        self.table_filename = table_filename
        self.init_table()
        self.file = open( self.filename, "r" )
        self.dirty = True
        self.at_eof = False
        self.file_pos = 0
        self.current_block_index = -1
        self.current_block = None
        if block_cache_size > 0:
            self.cache = lrucache.LRUCache( block_cache_size )
        else:
            self.cache = None
        
    def init_table( self ):
        self.block_size = None
        self.block_info = []
        # Position of corresponding block in compressed file (in bytes)
        for line in open( self.table_filename ):
            fields = line.split()
            if fields[0] == "s":
                self.block_size = int( fields[1] )
            if fields[0] == "o":
                offset = int( fields[1] )
                compressed_size = int( fields[2] )
                size = int( fields[3] )
                self.block_info.append( ( offset, compressed_size, size ) )
        self.nblocks = len( self.block_info )
        
    def close( self ):
        self.file.close()
        
    def load_block( self, index ):
        if self.cache is not None and index in self.cache:
            return self.cache[index]
        else:      
            offset, csize, size = self.block_info[ index ]
            # Get the block of compressed data
            self.file.seek( offset )
            data = self.file.read( csize )
            # Need to prepend a header for python-lzo module (silly)
            data = ''.join( ( '\xf0', struct.pack( "!I", size ), data ) )
            value = lzo.decompress( data )
            if self.cache is not None:
                self.cache[index] = value
            return value
        
    def fix_dirty( self ):
        chunk, offset = self.get_block_and_offset( self.file_pos )
        if self.current_block_index != chunk:
            self.current_block = StringIO( self.load_block( chunk ) )
            self.current_block.read( offset )
            self.current_block_index = chunk
        else:
            self.current_block.seek( offset )
        self.dirty = False
        
    def get_block_and_offset( self, index ):
        return int( index // self.block_size ), int( index % self.block_size )

    def seek( self, offset, whence=0 ):
        """
        Move the file pointer to a particular offset.
        """
        # Determine absolute target position
        if whence == 0:
            target_pos = offset
        elif whence == 1:
            target_pos = self.file_pos + offset
        elif whence == 2:
            raise Exception( "seek from end not supported" )
            ## target_pos = self.size - offset
        else:
            raise Exception( "Invalid `whence` argument: %r", whence )
        # Check if this is a noop
        if target_pos == self.file_pos:
            return    
        # Verify it is valid
        ## assert 0 <= target_pos < self.size, "Attempt to seek outside file"
        # Move the position
        self.file_pos = target_pos
        # Mark as dirty, the next time a read is done we need to actually
        # move the position in the bzip2 file
        self.dirty = True
        
    def tell( self ):
        return self.file_pos
        
    def readline( self ):
        if self.dirty:
            self.fix_dirty()
        if self.at_eof:
            return ""
        rval = []
        while 1:
            line = self.current_block.readline()
            self.file_pos += len( line )
            rval.append( line )
            if len( line ) > 0 and line[-1] == '\n':
                break
            elif self.current_block_index == self.nblocks - 1:
                self.at_eof = True
                break
            else:
                self.current_block_index += 1
                self.current_block = StringIO( self.load_block( self.current_block_index ) )      
        return "".join( rval ) 
            
    def __next__( self ):
        line = self.readline()
        if line == "":
            raise StopIteration
            
    def __iter__( self ):
        return self