Ejemplo n.º 1
0
    def qualify_urls(self, ref_url, urls, rule, newqueue):
        for url in urls:
            _dump, _fetch, _spider = False, False, False

            # apply patterns to determine how to qualify url
            if recipe.apply_mask(rule.get("dump"), url):
                _dump = True
            if recipe.apply_mask(rule.get("fetch"), url):
                _fetch = True
            if (recipe.apply_mask(rule.get("spider"), url) and
                    recipe.apply_hostfilter(rule.get("host_filter"), url)):
                _spider = True

            # build a record based on qualification
            record = {"url": url}
            if url not in self.session.wb:
                if _dump:
                    ioutils.write_out("%s\n" % url)
                if _fetch and _spider:
                    record["mode"] = fetch.Fetcher.SPIDER_FETCH
                elif _fetch:
                    record["mode"] = fetch.Fetcher.FETCH
                elif _spider:
                    record["mode"] = fetch.Fetcher.SPIDER

                if _fetch or _spider:
                    newqueue.append(record)

            # add url to web if it was matched by anything
            if _dump or _fetch or _spider:
                self.session.wb.add_url(ref_url, [url])

        return newqueue
Ejemplo n.º 2
0
 def print_refs(self, url, out=True):
     self.assert_in_web(url)
     node = self.index.get(url)
     l = node.outgoing
     if not out:
         l = node.incoming
     for u in l:
         ioutils.write_out("%s\n" % u)
Ejemplo n.º 3
0
 def print_aliases(self, url):
     self.assert_in_web(url)
     for u in self.index.get(url).aliases:
         ioutils.write_out("%s\n" % u)
Ejemplo n.º 4
0
 def dump(self):
     for u in self.index:
         ioutils.write_out("%s\n" % u)