Example #1
0
                                      for x in nodes
                                      for a in x.leaves()}), len(nodes)


def toboolvec(length, indices):
    """Convert a list of indices into a list of booleans."""
    return [n in indices for n in range(length)]


# Functions that accept item object with item.tree and item.sent members;
# return tuple (wordhighlights, sentweight).
FILTERS = {
    'average dependency length':
    getdeplen,
    'd-level':
    lambda i: (None, treebanktransforms.dlevel(i.tree)),
    'rare words':
    lambda i: (
        list(~pandas.Index(t.lower() for t in i.sent).isin(WORDLIST)
             & pandas.Series([  # filter names
                 'eigen' not in n.source[treebank.MORPH]
                 for n in sorted(i.tree.subtrees(lambda n: isinstance(
                     n[0], int)),
                                 key=lambda n: n[0])
             ])),
        None),
    'PP/REL modifiers':
    getmodifiers,
    'punctuation':
    lambda i: (None, max('.,\'"?!(:;'.find(t) + 1 for t in i.sent)),
    'direct speech':
Example #2
0
def getmodifiers(item):
	nodes = list(item.tree.subtrees(lambda n: n.label in ('REL', 'PP')
			and treebanktransforms.function(n) == 'mod'))
	return toboolvec(len(item.sent), {a for x in nodes
		for a in x.leaves()}), len(nodes)


def toboolvec(length, indices):
	return [n in indices for n in range(length)]


# Functions that accept item object with item.tree and item.sent members;
# return tuple (wordhighlights, sentweight).
FILTERS = {
		'average dependency length': getdeplen,
		'd-level': lambda i: (None, treebanktransforms.dlevel(i.tree)),
		'rare words': lambda i: (list(~pandas.Index(
			t.lower() for t in i.sent
			).isin(WORDLIST)
			& pandas.Series([  # filter names
			'eigen' not in n.source[treebank.MORPH]
			for n in
			sorted(i.tree.subtrees(lambda n: isinstance(n[0], int)),
				key=lambda n: n[0])])
			), None),
		'PP/REL modifiers': getmodifiers,
		'punctuation': lambda i: (
			max('.,\'"?!(:;'.find(t) + 1 for t in i.sent)),
		'direct speech': lambda i:
			(None, re.match(r"^- .*$|(?:^|.* )['\"](?: .*|$)",
			' '.join(i.sent)) is not None),