Ejemplo n.º 1
0
class Hose:
    """Hose.

    This class should be instantiated with a dictionary of url and glob pairs,
    it will use a walker to recursively decend each URL and map each URL
    to a file.

    It can be used as an iterator to yield (key, url) for each URL where
    key is one of the dictionary keys or None if none matched.
    """

    def __init__(self, filters=(), log_parent=None):
        self.log = log.get_logger("Hose", log_parent)
        self.filter = Filter(filters, log_parent=self.log)
        self.urls = self.reduceWork([pattern.base_url for pattern in filters])

    def reduceWork(self, url_list):
        """Simplify URL list to remove children of other elements.

        Reduces the amount of work we need to do by removing any URL from
        the list whose parent also appears in the list.  Returns the
        reduced list.
        """
        self.log.info("Reducing URL list.")
        urls = []
        url_list = list(url_list)
        while len(url_list):
            url = url_list.pop(0)
            for check_url in urls + url_list:
                if url.startswith(check_url):
                    self.log.debug("Discarding %s as have %s", url, check_url)
                    break
            else:
                urls.append(url)

        return urls

    def run(self):
        """Run over the URL list."""
        self.log.info("Identifying URLs")
        for base_url in self.urls:
            for dirpath, dirnames, filenames in walk(base_url, self.log):
                for filename in filenames:
                    url = combine_url(base_url, dirpath, filename)
                    key = self.filter.check(url)
                    yield (key, url)
                # To affect which directories the walker descends
                # into, we must update the dirnames list in place.
                i = 0
                while i < len(dirnames):
                    url = combine_url(base_url, dirpath, dirnames[i])
                    if self.filter.isPossibleParent(url):
                        i += 1
                    else:
                        self.log.info('Skipping %s', url)
                        del dirnames[i]

    __iter__ = run
Ejemplo n.º 2
0
class Hose:
    """Hose.

    This class should be instantiated with a dictionary of url and glob pairs,
    it will use a walker to recursively decend each URL and map each URL
    to a file.

    It can be used as an iterator to yield (key, url) for each URL where
    key is one of the dictionary keys or None if none matched.
    """
    def __init__(self, filters=(), log_parent=None):
        self.log = log.get_logger("Hose", log_parent)
        self.filter = Filter(filters, log_parent=self.log)
        self.urls = self.reduceWork([pattern.base_url for pattern in filters])

    def reduceWork(self, url_list):
        """Simplify URL list to remove children of other elements.

        Reduces the amount of work we need to do by removing any URL from
        the list whose parent also appears in the list.  Returns the
        reduced list.
        """
        self.log.info("Reducing URL list.")
        urls = []
        url_list = list(url_list)
        while len(url_list):
            url = url_list.pop(0)
            for check_url in urls + url_list:
                if url.startswith(check_url):
                    self.log.debug("Discarding %s as have %s", url, check_url)
                    break
            else:
                urls.append(url)

        return urls

    def run(self):
        """Run over the URL list."""
        self.log.info("Identifying URLs")
        for base_url in self.urls:
            for dirpath, dirnames, filenames in walk(base_url, self.log):
                for filename in filenames:
                    url = combine_url(base_url, dirpath, filename)
                    key = self.filter.check(url)
                    yield (key, url)
                # To affect which directories the walker descends
                # into, we must update the dirnames list in place.
                i = 0
                while i < len(dirnames):
                    url = combine_url(base_url, dirpath, dirnames[i])
                    if self.filter.isPossibleParent(url):
                        i += 1
                    else:
                        self.log.info('Skipping %s', url)
                        del dirnames[i]

    __iter__ = run
Ejemplo n.º 3
0
 def testCreatesChildLogger(self):
     """Filter creates a child logger if given a parent."""
     from lp.registry.scripts.productreleasefinder.filter import (Filter)
     from logging import getLogger
     parent = getLogger("foo")
     f = Filter(log_parent=parent)
     self.assertEquals(f.log.parent, parent)
Ejemplo n.º 4
0
 def __init__(self, filters=(), log_parent=None):
     self.log = log.get_logger("Hose", log_parent)
     self.filter = Filter(filters, log_parent=self.log)
     self.urls = self.reduceWork([pattern.base_url for pattern in filters])
Ejemplo n.º 5
0
 def testCreatesDefaultLogger(self):
     """Filter creates a default logger."""
     from lp.registry.scripts.productreleasefinder.filter import (Filter)
     from logging import Logger
     f = Filter()
     self.failUnless(isinstance(f.log, Logger))
Ejemplo n.º 6
0
 def makeFilter(self, key, urlglob):
     from lp.registry.scripts.productreleasefinder.filter import (
         Filter, FilterPattern)
     pattern = FilterPattern(key, urlglob)
     return Filter([pattern])
Ejemplo n.º 7
0
 def testNoFilters(self):
     """Filter.check returns None if there are no filters."""
     from lp.registry.scripts.productreleasefinder.filter import (Filter)
     f = Filter()
     self.assertEquals(f.check("file:///subdir/file"), None)
Ejemplo n.º 8
0
 def testFiltersPropertyGiven(self):
     """Filter constructor accepts argument to set filters property."""
     from lp.registry.scripts.productreleasefinder.filter import (Filter)
     f = Filter(["wibble"])
     self.assertEquals(len(f.filters), 1)
     self.assertEquals(f.filters[0], "wibble")
Ejemplo n.º 9
0
 def testDefaultFiltersProperty(self):
     """Filter constructor initializes filters property to empty dict."""
     from lp.registry.scripts.productreleasefinder.filter import (Filter)
     f = Filter()
     self.assertEquals(f.filters, [])
Ejemplo n.º 10
0
 def __init__(self, filters=(), log_parent=None):
     self.log = log.get_logger("Hose", log_parent)
     self.filter = Filter(filters, log_parent=self.log)
     self.urls = self.reduceWork([pattern.base_url for pattern in filters])
Ejemplo n.º 11
0
 def testNoFilters(self):
     """Filter.check returns None if there are no filters."""
     from lp.registry.scripts.productreleasefinder.filter import (
         Filter)
     f = Filter()
     self.assertEquals(f.check("file:///subdir/file"), None)