Example #1
0
class S3Iterable(object):
  def __init__(self):
    '''
    Subclasses must handle setting up config including:
    * bucketname
    * parser
    '''
    self.bucketname = None
    self.parser = None
    self.cache = Cache()
    self.iterator = iter
    self.decompress = None

  def subsets(self):
    l = self.cache.s3listcontents(self.bucketname)
    o = []
    for i in l:
      o.append(i.key)
    return o

  def iter(self, subset):
    h = self.cache.directhandle(self.bucketname, subset, decompress=self.decompress)
    for l in self.iterator(h):
      if self.parser is None:
        yield l
      else:
        yield self.parser(l)

  def filter(self, subset, f):
    h = self.cache.directhandle(self.bucketname, subset, decompress=self.decompress)
    for l in self.iterator(h):
      if self.parser is None:
        j = l
      else:
        j = self.parser(l)
      if f(j):
        yield j

  def byid(self, index):
    (subset, i) = index
    h = self.cache.directhandle(self.bucketname, subset, decompress=self.decompress)
    c = 0
    for l in self.iterator(h):
      if c == i:
        if self.parser is None:
          return l
        else:
          return self.parser(l)
      else:
        c += 1
    return None 

  def display(self, items):
    for i in items:
      print i