Ejemplo n.º 1
0
  def __init__(self, library_name, parser, **kwargs):
    self.library_name = library_name
    self.parser = parser(self.library_name)
    self.files = []

    if 'index' in kwargs:
      local_file = resolver.cache(kwargs['index'])
      root = html.parse(local_file)
      uris = root.xpath(kwargs['xpath'])

      self.files.extend([ kwargs['base'] + f for f in filter(lambda x : re.match(kwargs['regexp'], x), uris) ])
      self.files.sort()
    else:
      self.files = kwargs['files']

    for f in self.files:
      log.debug(self.library_name + ': ' + f)
Ejemplo n.º 2
0
 def parse(self):
   for url in self.files:
     local_file = resolver.cache(url)
     self.parser.parse(local_file)