def __init__(self, linktrees): self.linkstree = RecursiveDict() for t, c in [(Links.Type.ANCHOR, AbstractAnchor), (Links.Type.FORM, AbstractForm), (Links.Type.REDIRECT, AbstractRedirect)]: if any(t in lt for lt in linktrees): self.buildtree(self.linkstree, t, [lt[t] for lt in linktrees], c)
def __init__(self, anchors=[], forms=[], redirects=[]): self.logger = logging.getLogger(self.__class__.__name__) # leaves in linkstree are counter of how many times that url occurred # therefore use that counter when compuing number of urls with "nleaves" linkstree = RecursiveDict(lambda x: len(x)) for ltype, links in [(Links.Type.ANCHOR, anchors), (Links.Type.FORM, forms), (Links.Type.REDIRECT, redirects)]: for l in links: urlv = [ltype] urlv += [l.dompath] if l.dompath else [] urlv += list(l.linkvector) linkstree.applypath(urlv, lambda x: self.addlink(x, l)) if not linkstree: # all pages with no links will end up in the same special bin linkstree.setapplypathvalue(("<EMPTY>", ), [None], lambda x: x + [None]) self.linkstree = linkstree