Beispiel #1
0
def load_vocab(vocab_file):
  """Returns a lookup table and the vocabulary size."""
  vocab_size = count_lines(vocab_file) + 1  # Add UNK.
  vocab = tf.contrib.lookup.index_table_from_file(
      vocab_file,
      vocab_size=vocab_size - 1,
      num_oov_buckets=1)
  return vocab, vocab_size
 def _initialize(self, metadata):
   self.inputter.initialize(metadata)
   self.labels_vocabulary_file = metadata[self.labels_vocabulary_file_key]
   self.num_labels = count_lines(self.labels_vocabulary_file)
Beispiel #3
0
      "length": tf.shape(x)[0]})
  dataset = dataset.padded_batch(64, {
      "ids": [None],
      "length": []})
  return dataset.make_initializable_iterator()

if args.direction == 1:
  src_file, tgt_file = args.src, args.tgt
  src_vocab_file, tgt_vocab_file = args.src_vocab, args.tgt_vocab
else:
  src_file, tgt_file = args.tgt, args.src
  src_vocab_file, tgt_vocab_file = args.tgt_vocab, args.src_vocab

from opennmt.utils.misc import count_lines

tgt_vocab_size = count_lines(tgt_vocab_file) + 1
src_vocab_size = count_lines(src_vocab_file) + 1
src_vocab = tf.contrib.lookup.index_table_from_file(
    src_vocab_file,
    vocab_size=src_vocab_size - 1,
    num_oov_buckets=1)

with tf.device("cpu:0"):
  src_iterator = load_data(src_file, src_vocab)

src = src_iterator.get_next()


# Step 2

Beispiel #4
0
 def _initialize(self, metadata):
   self.inputter.initialize(metadata)
   self.labels_vocabulary_file = metadata[self.labels_vocabulary_file_key]
   self.num_labels = count_lines(self.labels_vocabulary_file)
    dataset = dataset.map(input_vocab.lookup)
    dataset = dataset.map(lambda x: {"ids": x, "length": tf.shape(x)[0]})
    dataset = dataset.padded_batch(64, {"ids": [None], "length": []})
    return dataset.make_initializable_iterator()


if args.direction == 1:
    src_file, tgt_file = args.src, args.tgt
    src_vocab_file, tgt_vocab_file = args.src_vocab, args.tgt_vocab
else:
    src_file, tgt_file = args.tgt, args.src
    src_vocab_file, tgt_vocab_file = args.tgt_vocab, args.src_vocab

from opennmt.utils.misc import count_lines

tgt_vocab_size = count_lines(tgt_vocab_file) + 1
src_vocab_size = count_lines(src_vocab_file) + 1
from tensorflow.contrib import lookup
src_vocab = lookup.index_table_from_file(src_vocab_file,
                                         vocab_size=src_vocab_size - 1,
                                         num_oov_buckets=1)

with tf.device("cpu:0"):
    src_iterator = load_data(src_file, src_vocab)

src = src_iterator.get_next()

# Step 2

hidden_size = 768
from bert.modeling import BertModel, BertConfig
 def get_dataset_size(self, data_file):
     return count_lines(data_file)
 def _initialize(self, metadata, asset_dir=None):
     assets = super(SequenceClassifier,
                    self)._initialize(metadata, asset_dir=asset_dir)
     self.labels_vocabulary_file = metadata[self.labels_vocabulary_file_key]
     self.num_labels = count_lines(self.labels_vocabulary_file)
     return assets