Ejemplo n.º 1
0
    def test_incrementaltreereader(self):
        data = '''
		(top (smain (noun 0=Het) (verb 1=had) (inf (verb 5=kunnen)
				(inf (np (det 2=een) (adj 3=prachtige) (noun 4=dag))
				(verb 6=zijn) (pp (prep 7=in) (noun 8=Londen))))) (punct 9=.))
		'''
        result = list(incrementaltreereader([data]))
        assert len(result) == 1
        _tree, sent, _rest = result[0]
        assert sent[0] == u'Het', sent[0]
        assert len(sent) == 10
        assert sent == (
            u'Het had een prachtige dag kunnen zijn in Londen .'.split())

        data = '''
#BOS 0
is	VB	--	--	500
John	NP	--	--	0
rich	JJ	--	--	500
?	?	--	--	0
#500	VP	--	--	0
#EOS 0
		'''
        result = list(incrementaltreereader(data.splitlines()))
        assert len(result) == 1
        _tree, sent, _rest = result[0]
        assert sent[0] == u'is', sent[0]
        assert len(sent) == 4

        data = '''(S (NP Mary) (VP
			(VB is) (JJ rich)) (. .))'''
        result = list(incrementaltreereader(data.splitlines()))
        assert len(result) == 1
Ejemplo n.º 2
0
	def test_incrementaltreereader(self):
		data = '''
		(top (smain (noun 0) (verb 1) (inf (verb 5) (inf (np (det 2)
				(adj 3) (noun 4)) (verb 6) (pp (prep 7) (noun 8))))) (punct 9))
		Het had een prachtige dag kunnen zijn in Londen .
		'''
		result = list(incrementaltreereader([data]))
		assert len(result) == 1
		_tree, sent, _rest = result[0]
		assert sent[0] == u'Het', sent[0]
		assert len(sent) == 10

		data = '''
#BOS 0
is	VB	--	--	500
John	NP	--	--	0
rich	JJ	--	--	500
?	?	--	--	0
#500	VP	--	--	0
#EOS 0
		'''
		result = list(incrementaltreereader(data.splitlines()))
		assert len(result) == 1
		_tree, sent, _rest = result[0]
		assert sent[0] == u'is', sent[0]
		assert len(sent) == 4

		data = '''(S (NP Mary) (VP
			(VB is) (JJ rich)) (. .))'''
		result = list(incrementaltreereader(data.splitlines()))
		assert len(result) == 1
Ejemplo n.º 3
0
	def test_incrementaltreereader(self):
		data = '''
		(top (smain (noun 0=Het) (verb 1=had) (inf (verb 5=kunnen)
				(inf (np (det 2=een) (adj 3=prachtige) (noun 4=dag))
				(verb 6=zijn) (pp (prep 7=in) (noun 8=Londen))))) (punct 9=.))
		'''
		result = list(incrementaltreereader([data]))
		assert len(result) == 1
		_tree, sent, _rest = result[0]
		assert sent[0] == u'Het', sent[0]
		assert len(sent) == 10
		assert sent == (
				u'Het had een prachtige dag kunnen zijn in Londen .'.split())

		data = '''
#BOS 0
is	VB	--	--	500
John	NP	--	--	0
rich	JJ	--	--	500
?	?	--	--	0
#500	VP	--	--	0
#EOS 0
		'''
		result = list(incrementaltreereader(data.splitlines()))
		assert len(result) == 1
		_tree, sent, _rest = result[0]
		assert sent[0] == u'is', sent[0]
		assert len(sent) == 4

		data = '''(S (NP Mary) (VP
			(VB is) (JJ rich)) (. .))'''
		result = list(incrementaltreereader(data.splitlines()))
		assert len(result) == 1
Ejemplo n.º 4
0
def draw():
	""" Wrapper to parse & draw tree(s). """
	if len(request.args['tree']) > LIMIT:
		return 'Too much data. Limit: %d bytes' % LIMIT
	dts = [DrawTree(tree, sent, abbr='abbr' in request.args)
				for tree, sent in incrementaltreereader(
					request.args['tree'].splitlines())]
	return drawtrees(request.args, dts)
Ejemplo n.º 5
0
def main():
	"""Text-based tree viewer."""
	from getopt import gnu_getopt, GetoptError
	flags = ('test', 'help', 'abbr', 'plain')
	options = ('fmt=', 'encoding=', 'functions=', 'morphology=', 'numtrees=')
	try:
		opts, args = gnu_getopt(sys.argv[1:], 'n:', flags + options)
	except GetoptError as err:
		print('error: %s\n%s' % (err, USAGE))
		sys.exit(2)
	opts = dict(opts)
	if '--test' in opts:
		test()
		return
	elif '--help' in opts:
		print(USAGE)
		return
	limit = opts.get('--numtrees', opts.get('-n'))
	limit = int(limit) if limit else None
	if args and opts.get('--fmt', 'export') != 'auto':
		reader = READERS[opts.get('--fmt', 'export')]
		corpora = []
		for path in args:
			corpus = reader(
					path,
					encoding=opts.get('--encoding', 'utf8'),
					functions=opts.get('--functions'),
					morphology=opts.get('--morphology'))
			corpora.append((corpus.trees(), corpus.sents()))
		numsents = len(corpus.sents())
		print('Viewing:', ' '.join(args))
		for n, sentid in enumerate(islice(corpora[0][0], 0, limit), 1):
			print('%d of %s (sentid=%s; len=%d):' % (
					n, numsents, sentid, len(corpora[0][1][sentid])))
			for trees, sents in corpora:
				tree, sent = trees[sentid], sents[sentid]
				print(DrawTree(tree, sent, abbr='--abbr' in opts
						).text(unicodelines=True, ansi='--plain' not in opts))
	else:  # read from stdin + detect format
		reader = codecs.getreader(opts.get('--encoding', 'utf8'))
		stdin = (chain.from_iterable(reader(open(a)) for a in args)
				if args else reader(sys.stdin))
		trees = islice(incrementaltreereader(stdin,
				morphology=opts.get('--morphology'),
				functions=opts.get('--functions')),
				0, limit)
		try:
			for n, (tree, sent, rest) in enumerate(trees, 1):
				print('%d. (len=%d): %s' % (n, len(sent), rest))
				print(DrawTree(tree, sent, abbr='--abbr' in opts).text(
						unicodelines=True, ansi='--plain' not in opts))
		except (IOError, KeyboardInterrupt):
			pass
Ejemplo n.º 6
0
def draw():
	""" Wrapper to parse & draw tree(s). """
	if len(request.args['tree']) > LIMIT:
		return 'Too much data. Limit: %d bytes' % LIMIT
	dts = []
	try:
		trees = list(incrementaltreereader(
				request.args['tree'].splitlines(),
				morphology='between' if 'morph' in request.args else None,
				functions='between' if 'func' in request.args else None))
	except Exception as err:
		return Response(str(err), mimetype='text/plain')
	for tree, sent, _rest in trees:
		try:
			dts.append(DrawTree(tree, sent, abbr='abbr' in request.args))
		except Exception as err:
			return Response(str(err), mimetype='text/plain')
	if not dts:
		return Response('No trees!', mimetype='text/plain')
	return drawtrees(request.args, dts)
Ejemplo n.º 7
0
def draw():
	""" Wrapper to parse & draw tree(s). """
	if len(request.args['tree']) > LIMIT:
		return 'Too much data. Limit: %d bytes' % LIMIT
	dts = []
	try:
		trees = list(incrementaltreereader(
				request.args['tree'].splitlines(),
				morphology='add' if 'morph' in request.args else None,
				functions='add' if 'func' in request.args else None))
	except Exception as err:  # pylint: disable=broad-except
		return Response(str(err), mimetype='text/plain')
	for tree, sent, _rest in trees:
		try:
			dts.append(DrawTree(tree, sent, abbr='abbr' in request.args))
		except Exception as err:  # pylint: disable=broad-except
			return Response(str(err), mimetype='text/plain')
	if not dts:
		return Response('No trees!', mimetype='text/plain')
	return drawtrees(request.args, dts)