for x in nodes for a in x.leaves()}), len(nodes) def toboolvec(length, indices): """Convert a list of indices into a list of booleans.""" return [n in indices for n in range(length)] # Functions that accept item object with item.tree and item.sent members; # return tuple (wordhighlights, sentweight). FILTERS = { 'average dependency length': getdeplen, 'd-level': lambda i: (None, treebanktransforms.dlevel(i.tree)), 'rare words': lambda i: ( list(~pandas.Index(t.lower() for t in i.sent).isin(WORDLIST) & pandas.Series([ # filter names 'eigen' not in n.source[treebank.MORPH] for n in sorted(i.tree.subtrees(lambda n: isinstance( n[0], int)), key=lambda n: n[0]) ])), None), 'PP/REL modifiers': getmodifiers, 'punctuation': lambda i: (None, max('.,\'"?!(:;'.find(t) + 1 for t in i.sent)), 'direct speech':
def getmodifiers(item): nodes = list(item.tree.subtrees(lambda n: n.label in ('REL', 'PP') and treebanktransforms.function(n) == 'mod')) return toboolvec(len(item.sent), {a for x in nodes for a in x.leaves()}), len(nodes) def toboolvec(length, indices): return [n in indices for n in range(length)] # Functions that accept item object with item.tree and item.sent members; # return tuple (wordhighlights, sentweight). FILTERS = { 'average dependency length': getdeplen, 'd-level': lambda i: (None, treebanktransforms.dlevel(i.tree)), 'rare words': lambda i: (list(~pandas.Index( t.lower() for t in i.sent ).isin(WORDLIST) & pandas.Series([ # filter names 'eigen' not in n.source[treebank.MORPH] for n in sorted(i.tree.subtrees(lambda n: isinstance(n[0], int)), key=lambda n: n[0])]) ), None), 'PP/REL modifiers': getmodifiers, 'punctuation': lambda i: ( max('.,\'"?!(:;'.find(t) + 1 for t in i.sent)), 'direct speech': lambda i: (None, re.match(r"^- .*$|(?:^|.* )['\"](?: .*|$)", ' '.join(i.sent)) is not None),