def not_head(num): """ Print trees N, N+1, etc. (indices 1-based). """ for i, tree in enumerate(conll.read_trees_conll(sys.stdin)): if i + 1 >= num: conll.write_tree_conll(sys.stdout, tree)
def tail(num): queue = collections.deque([], maxlen=num) for i, tree in enumerate(conll.read_trees_conll(sys.stdin)): if len(queue) == num: queue.popleft() queue.append(tree) for tree in queue: conll.write_tree_conll(sys.stdout, tree)
def sed(scripts_filename): # Read scripts. with open(scripts_filename, 'rt') as f: scripts = tree_script.parse_scripts(f.read().decode('utf-8')) # Encode output UTF8Writer = codecs.getwriter('utf8') # Edit trees. for tree in conll.read_trees_conll(sys.stdin): tree = tree_script.run_tree_scripts(tree, scripts) for tree in list_of_trees: conll.write_tree_conll(UTF8Writer(sys.stdout), tree)
def _grep_text(pattern): """ Read trees from stdin and print those who match the pattern. """ # Parse pattern. pattern = tree_script.parse_pattern(pattern) UTF8Writer = codecs.getwriter('utf8') for tree in conll.read_trees_conll(sys.stdin): # Match. match = False for node in range(1, len(tree) + 1): if pattern.match(tree, node, {}): match = True break # Print. if match: conll.write_tree_conll(UTF8Writer(sys.stdout), tree)
def shuf(): trees = list(conll.read_trees_conll(sys.stdin)) random.shuffle(trees) for tree in trees: conll.write_tree_conll(sys.stdout, tree)
def head(num): for i, tree in enumerate(conll.read_trees_conll(sys.stdin)): if i < num: conll.write_tree_conll(sys.stdout, tree)
def nth(num): trees_read = 0 for i, tree in enumerate(conll.read_trees_conll(sys.stdin)): if i + 1 == num: conll.write_tree_conll(sys.stdout, tree)