Example #1
0
 def test_multiple_files_multiple_readers(self):
   vocab_en = vocabs.Vocab(vocab_file="examples/data/head.en.vocab")
   vocab_ja = vocabs.Vocab(vocab_file="examples/data/head.ja.vocab")
   cr = input_readers.CompoundReader(readers=[input_readers.PlainTextReader(vocab_en),
                                              input_readers.PlainTextReader(vocab_ja)])
   mixed_sents = list(cr.read_sents(filename=["examples/data/head.en", "examples/data/head.ja"]))
   self.assertEqual(len(mixed_sents), 10)
   self.assertIsInstance(mixed_sents[0], sent.CompoundSentence)
   self.assertEqual(" ".join([vocab_en.i2w[w] for w in mixed_sents[0].sents[0].words]), "can you do it in one day ? </s>")
   self.assertEqual(" ".join([vocab_ja.i2w[w] for w in mixed_sents[0].sents[1].words]), "君 は 1 日 で それ が でき ま す か 。 </s>")
Example #2
0
 def test_one_file_multiple_readers(self):
   vocab = vocabs.Vocab(vocab_file="examples/data/head.en.vocab")
   cr = input_readers.CompoundReader(readers=[input_readers.PlainTextReader(vocab),
                                              input_readers.LengthTextReader()])
   en_sents = list(cr.read_sents(filename="examples/data/head.en"))
   self.assertEqual(len(en_sents), 10)
   self.assertIsInstance(en_sents[0], sent.CompoundSentence)
   self.assertEqual(" ".join([vocab.i2w[w] for w in en_sents[0].sents[0].words]), "can you do it in one day ? </s>")
   self.assertEqual(en_sents[0].sents[1].value, len("can you do it in one day ?".split()))
Example #3
0
 def test_read_tree(self):
   vocab = vocabs.Vocab(vocab_file="test/data/dep_tree.vocab")
   reader = input_readers.CoNLLToRNNGActionsReader(vocab, vocab)
   tree = list(reader.read_sents(filename="test/data/dep_tree.conll"))
   expected = [sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("David")),
               sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("Gallo")),
               sent.RNNGAction(sent.RNNGAction.Type.REDUCE, True),
               sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert(":")),
               sent.RNNGAction(sent.RNNGAction.Type.REDUCE, False),
               sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("This")),
               sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("is")),
               sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("Bill")),
               sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("Lange")),
               sent.RNNGAction(sent.RNNGAction.Type.REDUCE, True),
               sent.RNNGAction(sent.RNNGAction.Type.REDUCE, True),
               sent.RNNGAction(sent.RNNGAction.Type.REDUCE, True),
               sent.RNNGAction(sent.RNNGAction.Type.REDUCE, False),
               sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert(".")),
               sent.RNNGAction(sent.RNNGAction.Type.REDUCE, False)]
   self.assertListEqual(tree[0].actions, expected)
Example #4
0
 def test_read_tree(self):
   vocab = vocabs.Vocab(vocab_file="examples/data/head.en.vocab")
   reader = input_readers.CoNLLToRNNGActionsReader(vocab, vocab, None)
   tree = list(reader.read_sents(filename="examples/data/parse/head.en.conll"))
   expected = [sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("can")),
               sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("you")),
               sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("do")),
               sent.RNNGAction(sent.RNNGAction.Type.REDUCE_LEFT),
               sent.RNNGAction(sent.RNNGAction.Type.REDUCE_LEFT),
               sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("it")),
               sent.RNNGAction(sent.RNNGAction.Type.REDUCE_RIGHT),
               sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("in")),
               sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("one")),
               sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("day")),
               sent.RNNGAction(sent.RNNGAction.Type.REDUCE_LEFT),
               sent.RNNGAction(sent.RNNGAction.Type.REDUCE_LEFT),
               sent.RNNGAction(sent.RNNGAction.Type.REDUCE_RIGHT),
               sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.convert("?")),
               sent.RNNGAction(sent.RNNGAction.Type.REDUCE_RIGHT),
               sent.RNNGAction(sent.RNNGAction.Type.GEN, vocab.ES),
               sent.RNNGAction(sent.RNNGAction.Type.REDUCE_RIGHT)]
   self.assertListEqual(tree[0].actions, expected)
Example #5
0
 def __init__(self):
     self.vocab = vocabs.Vocab(i2w=["READ", "WRITE"])
Example #6
0
import argparse
import xnmt.vocabs as vocabs
import xnmt.input_readers as input_readers

parser = argparse.ArgumentParser()
parser.add_argument("input")
parser.add_argument("surface_vocab_file")
parser.add_argument("nt_vocab_file")
parser.add_argument("edg_vocab_file")
args = parser.parse_args()

reader = input_readers.CoNLLToRNNGActionsReader(
    surface_vocab=vocabs.Vocab(vocab_file=args.surface_vocab_file),
    nt_vocab=vocabs.Vocab(vocab_file=args.nt_vocab_file),
    edg_vocab=vocabs.Vocab(vocab_file=args.edg_vocab_file))

for tree in reader.read_sents(args.input):
    print(str(tree) + " NONE()")