def test_multiple_files_multiple_readers(self): vocab_en = vocabs.Vocab(vocab_file="examples/data/head.en.vocab") vocab_ja = vocabs.Vocab(vocab_file="examples/data/head.ja.vocab") cr = input_readers.CompoundReader(readers=[input_readers.PlainTextReader(vocab_en), input_readers.PlainTextReader(vocab_ja)]) mixed_sents = list(cr.read_sents(filename=["examples/data/head.en", "examples/data/head.ja"])) self.assertEqual(len(mixed_sents), 10) self.assertIsInstance(mixed_sents[0], sent.CompoundSentence) self.assertEqual(" ".join([vocab_en.i2w[w] for w in mixed_sents[0].sents[0].words]), "can you do it in one day ? </s>") self.assertEqual(" ".join([vocab_ja.i2w[w] for w in mixed_sents[0].sents[1].words]), "君 は 1 日 で それ が でき ま す か 。 </s>")
def test_one_file_multiple_readers(self): vocab = vocabs.Vocab(vocab_file="examples/data/head.en.vocab") cr = input_readers.CompoundReader(readers=[input_readers.PlainTextReader(vocab), input_readers.LengthTextReader()]) en_sents = list(cr.read_sents(filename="examples/data/head.en")) self.assertEqual(len(en_sents), 10) self.assertIsInstance(en_sents[0], sent.CompoundSentence) self.assertEqual(" ".join([vocab.i2w[w] for w in en_sents[0].sents[0].words]), "can you do it in one day ? </s>") self.assertEqual(en_sents[0].sents[1].value, len("can you do it in one day ?".split()))
def test_read_tree(self): vocab = vocabs.Vocab(vocab_file="test/data/dep_tree.vocab") reader = input_readers.CoNLLToRNNGActionsReader(vocab, vocab) tree = list(reader.read_sents(filename="test/data/dep_tree.conll")) expected = [sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("David")), sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("Gallo")), sent.RNNGAction(sent.RNNGAction.Type.REDUCE, True), sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert(":")), sent.RNNGAction(sent.RNNGAction.Type.REDUCE, False), sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("This")), sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("is")), sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("Bill")), sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("Lange")), sent.RNNGAction(sent.RNNGAction.Type.REDUCE, True), sent.RNNGAction(sent.RNNGAction.Type.REDUCE, True), sent.RNNGAction(sent.RNNGAction.Type.REDUCE, True), sent.RNNGAction(sent.RNNGAction.Type.REDUCE, False), sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert(".")), sent.RNNGAction(sent.RNNGAction.Type.REDUCE, False)] self.assertListEqual(tree[0].actions, expected)
def test_read_tree(self): vocab = vocabs.Vocab(vocab_file="examples/data/head.en.vocab") reader = input_readers.CoNLLToRNNGActionsReader(vocab, vocab, None) tree = list(reader.read_sents(filename="examples/data/parse/head.en.conll")) expected = [sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("can")), sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("you")), sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("do")), sent.RNNGAction(sent.RNNGAction.Type.REDUCE_LEFT), sent.RNNGAction(sent.RNNGAction.Type.REDUCE_LEFT), sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("it")), sent.RNNGAction(sent.RNNGAction.Type.REDUCE_RIGHT), sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("in")), sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("one")), sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("day")), sent.RNNGAction(sent.RNNGAction.Type.REDUCE_LEFT), sent.RNNGAction(sent.RNNGAction.Type.REDUCE_LEFT), sent.RNNGAction(sent.RNNGAction.Type.REDUCE_RIGHT), sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("?")), sent.RNNGAction(sent.RNNGAction.Type.REDUCE_RIGHT), sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.ES), sent.RNNGAction(sent.RNNGAction.Type.REDUCE_RIGHT)] self.assertListEqual(tree[0].actions, expected)
def __init__(self): self.vocab = vocabs.Vocab(i2w=["READ", "WRITE"])
import argparse import xnmt.vocabs as vocabs import xnmt.input_readers as input_readers parser = argparse.ArgumentParser() parser.add_argument("input") parser.add_argument("surface_vocab_file") parser.add_argument("nt_vocab_file") parser.add_argument("edg_vocab_file") args = parser.parse_args() reader = input_readers.CoNLLToRNNGActionsReader( surface_vocab=vocabs.Vocab(vocab_file=args.surface_vocab_file), nt_vocab=vocabs.Vocab(vocab_file=args.nt_vocab_file), edg_vocab=vocabs.Vocab(vocab_file=args.edg_vocab_file)) for tree in reader.read_sents(args.input): print(str(tree) + " NONE()")