Example #1
0
 def __getitem__(self, index):
     '''
     [n] returns a character (string of length 1)
     [n:] returns a new StreamView instance that starts at the offset n
     [n:m] returns a sequence (ie string, list, etc)
     '''
     
     # [n]
     if isinstance(index, int):
         (line, index) = self.__at(index, True)
         return line.line[index]
     
     if index.step is not None:
         raise IndexError('Slice step not supported')
     
     if index.start is None:
         raise IndexError('Slice start must be specified')
     
     # [n:]
     if open_stop(index):
         if index.start == 0 and self.__offset == 0:
             return self
         return StreamView(*self.__at(index.start))
     
     # [n:m]
     length = index.stop - index.start
     if not length:
         return self.__line.source.join([])
     (line, start) = self.__at(index.start, True)
     line_length = len(line.line)
     remainder = length - (line_length - start)
     lines = [line.line[start:min(start+length, line_length)]]
     while line.line and remainder > 0:
         line = line.next
         if line.line:
             line_length = len(line.line)
             lines.append(line.line[0:min(remainder, line_length)])
             remainder -= line_length
     if remainder > 0:
         raise IndexError(format('Missing {0:d} items', remainder))
     else:
         # in the code above we are careful to accumulate exactly what
         # we need; you might think it simpler to accumulate whole lines
         # and then take a slice below, but not all joins preserve length
         # (consider SOL/EOL).
         return line.source.join(lines)
Example #2
0
    def __getitem__(self, index):
        '''
        [n] returns a character (string of length 1)
        [n:] returns a new StreamView instance that starts at the offset n
        [n:m] returns a sequence (ie string, list, etc)
        '''

        # [n]
        if isinstance(index, int):
            (line, index) = self.__at(index, True)
            return line.line[index]

        if index.step is not None:
            raise IndexError('Slice step not supported')

        if index.start is None:
            raise IndexError('Slice start must be specified')

        # [n:]
        if open_stop(index):
            if index.start == 0 and self.__offset == 0:
                return self
            return StreamView(*self.__at(index.start))

        # [n:m]
        length = index.stop - index.start
        if not length:
            return self.__line.source.join([])
        (line, start) = self.__at(index.start, True)
        line_length = len(line.line)
        remainder = length - (line_length - start)
        lines = [line.line[start:min(start + length, line_length)]]
        while line.line and remainder > 0:
            line = line.next
            if line.line:
                line_length = len(line.line)
                lines.append(line.line[0:min(remainder, line_length)])
                remainder -= line_length
        if remainder > 0:
            raise IndexError(format('Missing {0:d} items', remainder))
        else:
            # in the code above we are careful to accumulate exactly what
            # we need; you might think it simpler to accumulate whole lines
            # and then take a slice below, but not all joins preserve length
            # (consider SOL/EOL).
            return line.source.join(lines)
Example #3
0
    def __getitem__(self, indices):
        '''
        **self[start:stop:algorithm, separator, ...]** - Repetition and lists.
        
        This is a complex statement that modifies the current matcher so
        that it matches several times.  A separator may be specified
        (eg for comma-separated lists) and the results may be combined with
        "+" (so repeated matching of characters would give a word).
        
        start:stop:algorithm
          This controls the number of matches made and the order in which
          different numbers of matches are returned.
          
          [start]
            Repeat exactly *start* times
            
          [start:stop]
            Repeat *start* to *stop* times (starting with as many matches
            as possible, and then decreasing as necessary).
            
          [start:stop:algorithm]
            Direction selects the algorithm for searching.
            
            'b' (BREADTH_FIRST)
              A breadth first search is used, which tends to give shorter
              matches before longer ones.  This tries all possible matches for 
              the sub-matcher first (before repeating calls to consume more 
              of the stream).  If the sub-matcher does not backtrack then this 
              guarantees that the number of matches returned will not decrease 
              (ie will monotonically increase) on backtracking.
              
            'd' (DEPTH_FIRST)
              A depth first search is used, which tends to give longer
              matches before shorter ones.  This tries to repeats matches 
              with the sub-matcher, consuming as much of the stream as 
              possible, before backtracking to find alternative matchers.
              If the sub-matcher does not backtrack then this guarantees
              that the number of matches returned will not increase (ie will
              monotonically decrease) on backtracking.
              
            'g' (GREEDY)
              An exhaustive search is used, which finds all results (by 
              breadth first search) and orders them by length before returning 
              them ordered from longest to shortest.  This guarantees that
              the number of matches returned will not increase (ie will
              monotonically decrease) on backtracking, but can consume a lot 
              of resources.
              
            'n' (NON_GREEDY)
              As for 'g' (GREEDY), but results are ordered shortest to 
              longest.  This guarantees that the number of matches returned 
              will not decrease (ie will monotonically increase) on 
              backtracking, but can consume a lot of resources,
            
          Values may be omitted; the defaults are: *start* = 0, *stop* = 
          infinity, *algorithm* = 'd' (DEPTH_FIRST).

        separator
          If given, this must appear between repeated values.  Matched
          separators are returned as part of the result (unless, of course,
          they are implemented with a matcher that returns nothing).  If 
          *separator* is a string it is converted to a literal match.

        ...
          If ... (an ellipsis) is given then the results are joined together
          with "+".           

        Examples
        --------
        
        Any()[0:3,...] will match 3 or less characters, joining them
        together so that the result is a single string.
        
        Word()[:,','] will match a comma-separated list of words.
        
        value[:] or value[0:] or value[0::'d'] is a "greedy" match that,
        if value does not backtrack, is equivalent to the "*" in a regular
        expression.
        value[::'n'] is the "non-greedy" equivalent (preferring as short a 
        match as possible) and value[::'g'] is greedy even when value does
        provide alternative matches on backtracking.
        '''
        start = 0
        stop = None
        step = DEPTH_FIRST
        separator = None
        add = False
        have_index = False
        if not isinstance(indices, tuple):
            indices = [indices]
        for index in indices:
            if isinstance(index, int):
                if have_index:
                    raise TypeError(
                        fmt('Multiple slices not supported: {0!r}', index))
                start = index
                stop = index
                step = DEPTH_FIRST
                have_index = True
            elif isinstance(index, slice):
                if have_index:
                    raise TypeError(
                        fmt('Multiple slices not supported: {0!r}', index))
                start = index.start if index.start is not None else 0
                stop = index.stop if not open_stop(index) else None
                step = index.step if index.step is not None else DEPTH_FIRST
                have_index = True
            elif index == Ellipsis:
                add = True
            elif separator is None:
                separator = index
            else:
                raise TypeError(index)
        # important for rewriting
        if stop == 1:
            add = False
        return self._lookup(REPEAT)(self, start, stop, step, separator, add,
                                    self._lookup(REDUCE))
Example #4
0
    def __getitem__(self, indices):
        '''
        **self[start:stop:algorithm, separator, ...]** - Repetition and lists.
        
        This is a complex statement that modifies the current matcher so
        that it matches several times.  A separator may be specified
        (eg for comma-separated lists) and the results may be combined with
        "+" (so repeated matching of characters would give a word).
        
        start:stop:algorithm
          This controls the number of matches made and the order in which
          different numbers of matches are returned.
          
          [start]
            Repeat exactly *start* times
            
          [start:stop]
            Repeat *start* to *stop* times (starting with as many matches
            as possible, and then decreasing as necessary).
            
          [start:stop:algorithm]
            Direction selects the algorithm for searching.
            
            'b' (BREADTH_FIRST)
              A breadth first search is used, which tends to give shorter
              matches before longer ones.  This tries all possible matches for 
              the sub-matcher first (before repeating calls to consume more 
              of the stream).  If the sub-matcher does not backtrack then this 
              guarantees that the number of matches returned will not decrease 
              (ie will monotonically increase) on backtracking.
              
            'd' (DEPTH_FIRST)
              A depth first search is used, which tends to give longer
              matches before shorter ones.  This tries to repeats matches 
              with the sub-matcher, consuming as much of the stream as 
              possible, before backtracking to find alternative matchers.
              If the sub-matcher does not backtrack then this guarantees
              that the number of matches returned will not increase (ie will
              monotonically decrease) on backtracking.
              
            'g' (GREEDY)
              An exhaustive search is used, which finds all results (by 
              breadth first search) and orders them by length before returning 
              them ordered from longest to shortest.  This guarantees that
              the number of matches returned will not increase (ie will
              monotonically decrease) on backtracking, but can consume a lot 
              of resources.
              
            'n' (NON_GREEDY)
              As for 'g' (GREEDY), but results are ordered shortest to 
              longest.  This guarantees that the number of matches returned 
              will not decrease (ie will monotonically increase) on 
              backtracking, but can consume a lot of resources,
            
          Values may be omitted; the defaults are: *start* = 0, *stop* = 
          infinity, *algorithm* = 'd' (DEPTH_FIRST).

        separator
          If given, this must appear between repeated values.  Matched
          separators are returned as part of the result (unless, of course,
          they are implemented with a matcher that returns nothing).  If 
          *separator* is a string it is converted to a literal match.

        ...
          If ... (an ellipsis) is given then the results are joined together
          with "+".           

        Examples
        --------
        
        Any()[0:3,...] will match 3 or less characters, joining them
        together so that the result is a single string.
        
        Word()[:,','] will match a comma-separated list of words.
        
        value[:] or value[0:] or value[0::'d'] is a "greedy" match that,
        if value does not backtrack, is equivalent to the "*" in a regular
        expression.
        value[::'n'] is the "non-greedy" equivalent (preferring as short a 
        match as possible) and value[::'g'] is greedy even when value does
        provide alternative matches on backtracking.
        '''
        start = 0
        stop = None
        step = DEPTH_FIRST
        separator = None
        add = False
        if not isinstance(indices, tuple):
            indices = [indices]
        for index in indices:
            if isinstance(index, int):
                start = index
                stop = index
                step = DEPTH_FIRST
            elif isinstance(index, slice):
                start = index.start if index.start != None else 0
                stop = index.stop if not open_stop(index) else None
                step = index.step if index.step != None else DEPTH_FIRST
            elif index == Ellipsis:
                add = True
            elif separator is None:
                separator = index
            else:
                raise TypeError(index)
        return self._lookup(REPEAT)(self, start, stop, step, separator, add)
Example #5
0
 def __getitem__(self, spec):
     if isinstance(spec, slice) and open_stop(spec):
         return Facade(self.__stream.__getitem__(spec), self.__memory)
     else:
         return self.__stream.__getitem__(spec)
Example #6
0
 def __getitem__(self, spec):
     if isinstance(spec, slice) and open_stop(spec):
         return Facade(self.__stream.__getitem__(spec), self.__memory)
     else:
         return self.__stream.__getitem__(spec)