def __init__(self, session, bilm_params): self.params = bilm_params # Create a Batcher to map text to character ids. self.batcher = Batcher(self.params.vocab_file, self.params.max_char_len) # Input placeholders to the biLM. self.sentence_character_ids = tf.placeholder( 'int32', shape=(None, None, self.params.max_char_len)) # Build the biLM graph. bilm = BidirectionalLanguageModel( self.params.options_file, self.params.weights_file, ) # Get ops to compute the LM embeddings. sentence_embeddings_op = bilm(self.sentence_character_ids) self.elmo_sentence_input = weight_layers('input', sentence_embeddings_op, l2_coef=0.0, use_top_only=True) self.sess = session self.sess.run(tf.global_variables_initializer())
def list_to_token_embeddings(self, outfile_to_dump=None): ''' Given an input vocabulary file, dump all the token embeddings to the outfile. The result can be used as the embedding_weight_file when constructing a BidirectionalLanguageModel. ''' #batcher = TokenBatcher(vocab_file) vocab = UnicodeCharsVocabulary(self.voc_file_path, self.max_word_length) batcher = Batcher(self.voc_file_path, self.max_word_length) embedding_op = self.ops['token_embeddings'] n_tokens = vocab.size embed_dim = int(embedding_op.shape[2]) embeddings = np.zeros((n_tokens, embed_dim), dtype=DTYPE) config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) for k in tqdm(range(n_tokens)): token = vocab.id_to_word(k) char_ids = batcher.batch_sentences([[token] ])[0, 1, :].reshape(1, 1, -1) embeddings[k, :] = sess.run( embedding_op, feed_dict={self.ids_placeholder: char_ids}) with h5py.File(outfile_to_dump, 'w') as fout: ds = fout.create_dataset('embedding', embeddings.shape, dtype='float32', data=embeddings) return embeddings, vocab._word_to_id
def __init__(self, train_corpus_fname, test_corpus_fname, vocab_fname, options_fname, pretrain_model_fname, model_save_path, max_characters_per_token=30, batch_size=32, num_labels=2): # Load a corpus. super().__init__(train_corpus_fname=train_corpus_fname, tokenized_train_corpus_fname=train_corpus_fname + ".elmo-tokenized", test_corpus_fname=test_corpus_fname, tokenized_test_corpus_fname=test_corpus_fname + ".elmo-tokenized", model_name="elmo", vocab_fname=vocab_fname, model_save_path=model_save_path, batch_size=batch_size) # configurations self.options_fname = options_fname self.pretrain_model_fname = pretrain_model_fname self.max_characters_per_token = max_characters_per_token self.num_labels = 2 # positive, negative self.num_train_steps = (int((len(self.train_data) - 1) / self.batch_size) + 1) * self.num_epochs self.eval_every = int(self.num_train_steps / self.num_epochs) # epoch마다 평가 # Create a Batcher to map text to character ids. # lm_vocab_file = ELMo는 token vocab이 없어도 on-the-fly로 입력 id들을 만들 수 있다 # 하지만 자주 나오는 char sequence, 즉 vocab을 미리 id로 만들어 놓으면 좀 더 빠른 학습이 가능 # max_token_length = the maximum number of characters in each token self.batcher = Batcher(lm_vocab_file=vocab_fname, max_token_length=self.max_characters_per_token) self.training = tf.placeholder(tf.bool) # build train graph self.ids_placeholder, self.labels_placeholder, self.dropout_keep_prob, self.logits, self.loss = make_elmo_graph(options_fname, pretrain_model_fname, max_characters_per_token, num_labels, tune=True)
def __init__( self, tune_model_fname="/notebooks/embedding/data/sentence-embeddings/elmo/tune-ckpt", pretrain_model_fname="/notebooks/embedding/data/sentence-embeddings/elmo/pretrain-ckpt/elmo.model", options_fname="/notebooks/embedding/data/sentence-embeddings/elmo/pretrain-ckpt/options.json", vocab_fname="/notebooks/embedding/data/sentence-embeddings/elmo/pretrain-ckpt/elmo-vocab.txt", max_characters_per_token=30, dimension=256, num_labels=2, use_notebook=False): # configurations super().__init__("elmo", dimension, use_notebook) self.tokenizer = get_tokenizer("mecab") self.batcher = Batcher(lm_vocab_file=vocab_fname, max_token_length=max_characters_per_token) self.ids_placeholder, self.elmo_embeddings, self.probs = make_elmo_graph( options_fname, pretrain_model_fname, max_characters_per_token, num_labels, tune=False) # restore model saver = tf.train.Saver(tf.global_variables()) self.sess = tf.Session() checkpoint_path = tf.train.latest_checkpoint(tune_model_fname) saver.restore(self.sess, checkpoint_path)
def __init__(self, FLAGS, id2word, word2id, emb_matrix, id2char, char2id): self.FLAGS = FLAGS self.id2word = id2word self.word2id = word2id self.emb_matrix = emb_matrix self.id2char = id2char self.char2id = char2id self.batcher = Batcher( "/Users/lam/Desktop/Lam-cs224n/Projects/qa/squad/data/elmo/elmo_vocab.txt", 50) self.filters = [(5, 10)] #change back to 100 after self.options_file = "/Users/lam/Desktop/Lam-cs224n/Projects/qa/squad/data/elmo/elmo.json" self.weight_file = "/Users/lam/Desktop/Lam-cs224n/Projects/qa/squad/data/elmo/lm_weight.hdf5" with tf.variable_scope( "QAModel", initializer=tf.contrib.layers.variance_scaling_initializer( factor=1.0, uniform=True)): self.add_placeholders() self.add_embedding_layer(emb_matrix) self.add_elmo_embedding_layer(self.options_file, self.weight_file) with tf.variable_scope( "QAModel", initializer=tf.contrib.layers.variance_scaling_initializer( factor=1.0, uniform=True)): self.build_graph() self.add_loss() # Define trainable parameters, gradient, gradient norm, and clip by gradient norm params = tf.trainable_variables( "QAModel") #since only one scope "QAModel" gradients = tf.gradients( self.loss, params) # d(loss)/d(params) return list of (length len(params)) self.gradient_norm = tf.global_norm(gradients) clipped_gradients, _ = tf.clip_by_global_norm( gradients, 5.0) #return list_clipped, global_norm(here we don't need this) self.param_norm = tf.global_norm(params) # Define optimizer and updates # (updates is what you need to fetch in session.run to do a gradient update) self.global_step = tf.Variable(0, name="global_step", trainable=False) #This will increment the global step if global_step is not None opt = tf.train.AdamOptimizer( learning_rate=0.001) # you can try other optimizers self.updates = opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) self.bestmodel_saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) self.summaries = tf.summary.merge_all()
def __init__(self, model_path): vocab_file = os.path.join(model_path, 'vocabs.txt') options_file = os.path.join(model_path, 'options.json') weight_file = os.path.join(model_path, 'weights.hdf5') with open(options_file, "r") as fj: options = json.load(fj) self.max_characters_per_token = options['char_cnn']['max_characters_per_token'] # Create a Batcher to map text to character ids. self.batcher = Batcher(vocab_file, self.max_characters_per_token) # Build the biLM graph. self.bilm = BidirectionalLanguageModel(options_file, weight_file)
def __init__(self, spec: str, vocab_file='./datar/vocab/vocab.txt', max_word_length=50, elmo_output_names: Optional[List] = None, dim: Optional[int] = None, pad_zero: bool = False, concat_last_axis: bool = True, max_token: Optional[int] = None, mini_batch_size: int = 32, **kwargs) -> None: self.spec = spec if '://' in spec else str(expand_path(spec)) self.max_word_length = max_word_length self.vocab_file = vocab_file self.batcher = Batcher(self.vocab_file, self.max_word_length) self.pad_zero = pad_zero self.concat_last_axis = concat_last_axis self.max_token = max_token self.mini_batch_size = mini_batch_size self.elmo_outputs, self.sess, self.ids_placeholder = self._load()
def __init__(self, params): self.data_path = params.data_path self.params = params if params.IS_DEBUG: print('debug mode') # load data for debugging self.train = self.load_data(self.data_path + self.params.DATA_DEBUG) self.dev = self.load_data(self.data_path + self.params.DATA_DEBUG) self.test = self.load_data(self.data_path + self.params.DATA_DEBUG) else: # load data self.train = self.load_data(self.data_path + self.params.DATA_TRAIN) self.dev = self.load_data(self.data_path + self.params.DATA_DEV) self.test = self.load_data(self.data_path + self.params.DATA_TEST) # batcher for ELMo if self.params.USE_CHAR_ELMO: print('[INFO] character-level ELMo') self.batcher = Batcher(self.data_path + self.params.DIC, 50) else: print('[INFO] cached-token-level ELMo') self.batcher = TokenBatcher(self.data_path + self.params.DIC) self.dic_size = 0 with open(self.data_path + self.params.DIC, 'r') as f: self.dic = f.readlines() self.dic = [x.strip() for x in self.dic] self.dic_size = len(self.dic) print('[completed] load data, dic_size: ', self.dic_size)
def load_elmo_embeddings(directory, top=False): """ :param directory: directory with an ELMo model ('model.hdf5', 'options.json' and 'vocab.txt.gz') :param top: use ony top ELMo layer :return: ELMo batcher, character id placeholders, op object """ if os.path.isfile(os.path.join(directory, 'vocab.txt.gz')): vocab_file = os.path.join(directory, 'vocab.txt.gz') elif os.path.isfile(os.path.join(directory, 'vocab.txt')): vocab_file = os.path.join(directory, 'vocab.txt') else: raise SystemExit('Error: no vocabulary file found in the directory.') options_file = os.path.join(directory, 'options.json') weight_file = os.path.join(directory, 'model.hdf5') with open(options_file, 'r') as f: m_options = json.load(f) max_chars = m_options['char_cnn']['max_characters_per_token'] # Create a Batcher to map text to character ids. batcher = Batcher(vocab_file, max_chars) # Input placeholders to the biLM. sentence_character_ids = tf.compat.v1.placeholder('int32', shape=(None, None, max_chars)) # Build the biLM graph. bilm = BidirectionalLanguageModel(options_file, weight_file, max_batch_size=128) # Get ops to compute the LM embeddings. sentence_embeddings_op = bilm(sentence_character_ids) # Get an op to compute ELMo (weighted average of the internal biLM layers) elmo_sentence_input = weight_layers('input', sentence_embeddings_op, use_top_only=top) return batcher, sentence_character_ids, elmo_sentence_input
def load_elmo_embeddings(directory, top=True): if os.path.isfile(os.path.join(directory, 'vocab.txt.gz')): vocab_file = os.path.join(directory, 'vocab.txt.gz') elif os.path.isfile(os.path.join(directory, 'vocab.txt')): vocab_file = os.path.join(directory, 'vocab.txt') else: raise SystemExit('Error: no vocabulary file found in the directory.') options_file = os.path.join(directory, 'options.json') weight_file = os.path.join(directory, 'model.hdf5') # Create a Batcher to map text to character ids. batcher = Batcher(vocab_file, 50) # Input placeholders to the biLM. sentence_character_ids = tf.placeholder('int32', shape=(None, None, 50)) # Build the biLM graph. bilm = BidirectionalLanguageModel(options_file, weight_file, max_batch_size=300) # Get ops to compute the LM embeddings. sentence_embeddings_op = bilm(sentence_character_ids) # Get an op to compute ELMo (weighted average of the internal biLM layers) # Our model includes ELMo at both the input and output layers # of the task GRU, so we need 2x ELMo representations at each of the input and output. elmo_sentence_input = weight_layers('input', sentence_embeddings_op, use_top_only=top) return batcher, sentence_character_ids, elmo_sentence_input
def load_elmo_embeddings(directory, top=False): """ :param directory: directory with an ELMo model ('model.hdf5', 'options.json' and 'vocab.txt.gz') :param top: use ony top ELMo layer :return: ELMo batcher, character id placeholders, op object """ vocab_file = os.path.join(directory, 'vocab.txt.gz') options_file = os.path.join(directory, 'options.json') weight_file = os.path.join(directory, 'model.hdf5') # Create a Batcher to map text to character ids. batcher = Batcher(vocab_file, 50) # Input placeholders to the biLM. sentence_character_ids = tf.placeholder('int32', shape=(None, None, 50)) # Build the biLM graph. bilm = BidirectionalLanguageModel(options_file, weight_file, max_batch_size=300) # Get ops to compute the LM embeddings. sentence_embeddings_op = bilm(sentence_character_ids) # Get an op to compute ELMo (weighted average of the internal biLM layers) elmo_sentence_input = weight_layers('input', sentence_embeddings_op, use_top_only=top) return batcher, sentence_character_ids, elmo_sentence_input
def get_feed_dict(self, words, words_raw, labels=None, lr=None, dropout=None): char_ids, word_ids = zip(*words) self.word = word_ids word_ids, sequence_lengths = pad_sequences( word_ids, self.config.vocab_words['$pad$'], self.max_word_lengths, self.max_sequence_lengths) char_ids, word_lengths = pad_sequences( char_ids, self.config.vocab_chars['$pad$'], self.max_word_lengths, self.max_sequence_lengths, nlevels=2) if self.config.use_emlo: batcher = Batcher("model_emlo/vocab.txt", 50) elmo_char_ids = batcher.batch_sentences(words_raw, self.max_sequence_lengths) # build feed dictionary feed = { self.word_ids: word_ids, self.sequence_lengths: sequence_lengths } if self.config.use_char_cnn or self.config.use_char_lstm: feed[self.char_ids] = char_ids feed[self.word_lengths] = word_lengths if self.config.use_emlo: feed[self.char_ids_elmo] = elmo_char_ids if labels is not None: labels, _ = pad_sequences(labels, 0, self.max_word_lengths, self.max_sequence_lengths) feed[self.labels] = labels if lr is not None: feed[self.lr] = lr if dropout is not None: feed[self.dropout] = dropout return feed, sequence_lengths
def __init__(self, config): super(NERModel, self).__init__(config) self.idx_to_tag = { idx: tag for tag, idx in list(self.config.vocab_tags.items()) } if self.config.use_elmo: # self.elmo_inputs = [] self.batcher = Batcher(self.config.filename_words, 50) self.bilm = BidirectionalLanguageModel( self.config.filename_elmo_options, self.config.filename_elmo_weights) self.elmo_token_ids = tf.placeholder('int32', shape=(None, None, 50)) self.elmo_embeddings_op = self.bilm(self.elmo_token_ids) self.elmo_embeddings_input = weight_layers('input', self.elmo_embeddings_op, l2_coef=0.0)
def get_batcher(): with open(FLAGS.elmo_options, 'r') as fin: options = json.load(fin) max_word_length = options['char_cnn']['max_characters_per_token'] elmo_batcher = Batcher(FLAGS.elmo_vocab, max_word_length) return elmo_batcher
def get_elmo_embeddings(config): batcher = Batcher(config.filename_words, 50) token_ids = tf.placeholder('int32', shape=(None, None, 50)) bilm = BidirectionalLanguageModel( config.filename_elmo_options, config.filename_elmo_weights, ) elmo_embeddings_op = bilm(token_ids) elmo_context_input = weight_layers('input', elmo_embeddings_op, l2_coef=0.0) with tf.Session() as sess: # It is necessary to initialize variables once before running inference. sess.run(tf.global_variables_initializer()) # Create batches of data. train = CoNLLDataset(config.filename_train) sents_train = [entry[0] for entry in train] sent_ids_train = batcher.batch_sentences(sents_train) # Compute ELMo representations (here for the input only, for simplicity). elmo_input = sess.run([elmo_context_input['weighted_op']], feed_dict={token_ids: sent_ids_train[0]}) for batch in sent_ids_train[1:]: elmo_input_ = sess.run([elmo_context_input['weighted_op']], feed_dict={token_ids: batch}) elmo_input = np.hstack(elmo_input, elmo_input_) test = CoNLLDataset(config.filename_test) sents_test = [entry[0] for entry in test] sent_ids_test = batcher.batch_sentences(sents_test) elmo_context_output_ = sess.run([elmo_context_input['weighted_op']], feed_dict={token_ids: sent_ids_test}) return elmo_context_input_, elmo_context_output_
def list_to_embeddings_with_dump(self, batch: List[List[str]], outfile_to_dump=None): """ Parameters ---------- batch : ``List[List[str]]``, required A list of tokenized sentences. """ document_embeddings = [] if batch == [[]]: raise ValueError('Batch should not be empty') else: if self.word_embedding_file is None: batcher = Batcher(self.voc_file_path, self.max_word_length) else: batcher = TokenBatcher(self.voc_file_path) config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) ids_list = batcher.batch_sentences(batch) with h5py.File(outfile_to_dump, 'w') as fout: for i, ids in enumerate(tqdm(ids_list, total=len(ids_list))): _ops = sess.run( self.ops, feed_dict={self.ids_placeholder: [ids]}) mask = _ops['mask'] lm_embeddings = _ops['lm_embeddings'][0, :] token_embeddings = _ops['token_embeddings'] lengths = _ops['lengths'] length = int(mask.sum()) document_embeddings.append(lm_embeddings) ds = fout.create_dataset('{}'.format(i), lm_embeddings.shape, dtype='float32', data=lm_embeddings) document_embeddings = np.asarray(document_embeddings) return document_embeddings
def list_to_embeddings(self, batch: List[List[str]], slice=None): """ Parameters ---------- batch : ``List[List[str]]``, required A list of tokenized sentences. """ elmo_embeddings = [] if batch == [[]]: if slice is None: elmo_embeddings.append(empty_embedding(self.dims)) else: if slice > 2: raise ValueError('Slice can not be larger than 3') elmo_embeddings.append(empty_embedding(self.dims, True)) else: batcher = Batcher(self.voc_file_path, self.max_word_length) config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) for i, _contents in enumerate(tqdm(batch, total=len(batch))): char_ids = batcher.batch_sentences([_contents]) _ops = sess.run(self.ops, feed_dict={self.ids_placeholder: char_ids}) mask = _ops['mask'] lm_embeddings = _ops['lm_embeddings'] token_embeddings = _ops['token_embeddings'] lengths = _ops['lengths'] length = int(mask.sum()) if slice is None: lm_embeddings_mean = np.apply_over_axes( np.mean, lm_embeddings[0], (0, 1)) else: lm_embeddings_mean = np.apply_over_axes( np.mean, lm_embeddings[0][slice], (0)) elmo_embeddings.append(lm_embeddings_mean) return elmo_embeddings
def _load_embeddings(self, vocab="vocab.txt", options="elmo_options.json", weights="elmo_weights.hdf5"): self.elmo_model = BidirectionalLanguageModel(options, weights) self.batcher = Batcher(vocab, 50) self.character_ids = tf.placeholder('int32', shape=(None, None, 50)) context_embeddings_op = self.elmo_model(self.character_ids) self.elmo_context_output = weight_layers('output', context_embeddings_op, l2_coef=0.0) tf.global_variables_initializer().run()
def __init__(self, vocab_file, max_seq_length, max_token_length=None, stroke_vocab_file=None, tran2sim=False, sim2tran=False): self.vocab_file = vocab_file self.max_seq_length = max_seq_length self.max_token_length = max_token_length max_seq_length = self.max_seq_length - 2 # 因會加 <bos> and <eos>,所以 -2 self.token_batcher = TokenBatcher(self.vocab_file, max_seq_length) if max_token_length: self.batcher = Batcher(self.vocab_file, self.max_token_length, max_seq_length, stroke_vocab_file) self.convert_config = None if tran2sim and sim2tran: assert tran2sim != sim2tran elif tran2sim: self.convert_config = "t2s.json" elif sim2tran: self.convert_config = "s2t.json"
def prepro(config): word_counter, char_counter = Counter(), Counter() train_examples, train_eval = process_file(config.train_file, "train", word_counter, char_counter) dev_examples, dev_eval = process_file(config.dev_file, "dev", word_counter, char_counter) test_examples, test_eval = process_file(config.test_file, "test", word_counter, char_counter) word_emb_file = config.fasttext_file if config.fasttext else config.glove_word_file char_emb_file = config.glove_char_file if config.pretrained_char else None char_emb_size = config.glove_char_size if config.pretrained_char else None char_emb_dim = config.glove_dim if config.pretrained_char else config.char_dim word_emb_mat, word2idx_dict = get_embedding(word_counter, "word", emb_file=word_emb_file, size=config.glove_word_size, vec_size=config.glove_dim) char_emb_mat, char2idx_dict = get_embedding(char_counter, "char", emb_file=char_emb_file, size=char_emb_size, vec_size=char_emb_dim) batcher = Batcher(config.elmo_vocab_file, config.cont_char_limit) build_features(config, train_examples, "train", config.train_record_file, word2idx_dict, char2idx_dict, False, batcher) dev_meta = build_features(config, dev_examples, "dev", config.dev_record_file, word2idx_dict, char2idx_dict, False, batcher) test_meta = build_features(config, test_examples, "test", config.test_record_file, word2idx_dict, char2idx_dict, True, batcher) save(config.word_emb_file, word_emb_mat, message="word embedding") save(config.char_emb_file, char_emb_mat, message="char embedding") save(config.train_eval_file, train_eval, message="train eval") save(config.dev_eval_file, dev_eval, message="dev eval") save(config.test_eval_file, test_eval, message="test eval") save(config.dev_meta, dev_meta, message="dev meta") save(config.test_meta, test_meta, message="test meta") save(config.dev_meta, dev_meta, message="dev meta") save(config.test_meta, test_meta, message="test meta") save(config.word_dictionary, word2idx_dict, message="word dictionary") save(config.char_dictionary, char2idx_dict, message="char dictionary")
class ElmoEmbedding: def __init__(self, model_path): vocab_file = os.path.join(model_path, 'vocabs.txt') options_file = os.path.join(model_path, 'options.json') weight_file = os.path.join(model_path, 'weights.hdf5') with open(options_file, "r") as fj: options = json.load(fj) self.max_characters_per_token = options['char_cnn']['max_characters_per_token'] # Create a Batcher to map text to character ids. self.batcher = Batcher(vocab_file, self.max_characters_per_token) # Build the biLM graph. self.bilm = BidirectionalLanguageModel(options_file, weight_file) def __call__(self, tokenized_sentences_lst): # Input placeholders to the biLM. context_character_ids = tf.placeholder('int32', shape=(None, None, self.max_characters_per_token)) # Get ops to compute the LM embeddings. context_embeddings_op = self.bilm(context_character_ids) # Get an op to compute ELMo (weighted average of the internal biLM layers) elmo_context_input = weight_layers('input', context_embeddings_op, l2_coef=0.0) elmo_context_output = weight_layers('output', context_embeddings_op, l2_coef=0.0) # Now we can compute embeddings. context_tokens = [sentence.split() for sentence in tokenized_sentences_lst] with tf.Session() as sess: # It is necessary to initialize variables once before running inference. sess.run(tf.global_variables_initializer()) # Create batches of data. context_ids = self.batcher.batch_sentences(context_tokens) # Compute ELMo representations (here for the input only, for simplicity). elmo_context_vecs = sess.run( [elmo_context_input['weighted_op']], feed_dict={context_character_ids: context_ids} ) return elmo_context_vecs[0] #, context_tokens, context_ids
class ELMoRunner: def __init__(self, session, bilm_params): self.params = bilm_params # Create a Batcher to map text to character ids. self.batcher = Batcher(self.params.vocab_file, self.params.max_char_len) # Input placeholders to the biLM. self.sentence_character_ids = tf.placeholder( 'int32', shape=(None, None, self.params.max_char_len)) # Build the biLM graph. bilm = BidirectionalLanguageModel( self.params.options_file, self.params.weights_file, ) # Get ops to compute the LM embeddings. sentence_embeddings_op = bilm(self.sentence_character_ids) self.elmo_sentence_input = weight_layers('input', sentence_embeddings_op, l2_coef=0.0, use_top_only=True) self.sess = session self.sess.run(tf.global_variables_initializer()) def preprocess(self, sentences_words): return self.batcher.batch_sentences(sentences_words) def __call__(self, batch_sentence_ids): (elmo_sentence_input_, ) = self.sess.run( [self.elmo_sentence_input['weighted_op']], feed_dict={self.sentence_character_ids: batch_sentence_ids}) return elmo_sentence_input_
def __init__( self, request_names=['train', 'valid', 'test'], new_names=['train', 'valid', 'test'], classes_name='classes', op_type='vectorizer', op_name='elmo', dimension=1024, file_type='bin', #TODO: ? options_file='./embeddingsruwiki_pp_1.0_elmo/options.json', #TODO: ? weights_file='./embeddingsruwiki_pp_1.0_elmo/weights.hdf5', #TODO: ? vocab_file='./embeddingsruwiki_pp_1.0_elmo/vocab.txt' #TODO: ? ): super().__init__(request_names, new_names, op_type, op_name) self.file_type = file_type self.classes_name = classes_name self.dimension = dimension # Location of pretrained LM. self.options_file = options_file self.weights_file = weights_file self.vocab_file = vocab_file # Create a Batcher to map text to character ids. char_per_token = 50 self.batcher = Batcher(self.vocab_file, char_per_token) # Input placeholders to the biLM. self.character_ids = tf.placeholder('int32', shape=(None, None, char_per_token)) # Build the biLM graph. bilm = BidirectionalLanguageModel(self.options_file, self.weights_file) # Get ops to compute the LM embeddings. embeddings_op = bilm(character_ids) # Get an op to compute ELMo (weighted average of the internal biLM layers) self.elmo_output = weight_layers('elmo_output', embeddings_op, l2_coef=0.0)
def __init__(self, args, is_training=True, emb_class='glove', use_crf=True): self.emb_path = args.emb_path self.embvec = pkl.load(open( self.emb_path, 'rb')) # resources(glove, vocab, path, etc) self.wrd_dim = args.wrd_dim # size of word embedding(glove) self.chr_dim = 25 # size of character embedding self.pos_dim = 7 # size of part of speech embedding self.class_size = len(self.embvec.tag_vocab) # number of class(tags) self.word_length = args.word_length # maximum character size of word for convolution self.restore = args.restore # checkpoint path if available self.use_crf = use_crf # use crf decoder or not self.emb_class = emb_class # class of embedding(glove, elmo, bert) self.keep_prob = 0.7 # keep probability for dropout self.chr_conv_type = 'conv1d' # conv1d | conv2d self.filter_sizes = [3] # filter sizes self.num_filters = 53 # number of filters self.highway_used = False # use highway network on the concatenated input self.rnn_used = True # use rnn layer or not self.rnn_num_layers = 2 # number of RNN layers self.rnn_type = 'fused' # normal | fused self.rnn_size = 200 # size of RNN hidden unit self.tf_used = False # use transformer encoder layer or not if self.tf_used: # modified for transformer self.starter_learning_rate = 0.0003 self.tf_num_layers = 4 # number of layers for transformer encoder self.tf_keep_prob = 0.8 # keep probability for transformer encoder self.tf_mh_num_heads = 4 # number of head for multi head attention self.tf_mh_num_units = 64 # Q,K,V dimension for multi head attention self.tf_mh_keep_prob = 0.8 # keep probability for multi head attention self.tf_ffn_kernel_size = 3 # conv1d kernel size for feed forward net self.tf_ffn_keep_prob = 0.8 # keep probability for feed forward net self.starter_learning_rate = 0.001 # default learning rate self.decay_steps = 12000 self.decay_rate = 0.9 self.clip_norm = 10 self.is_training = is_training if self.is_training: self.epoch = args.epoch self.batch_size = args.batch_size self.dev_batch_size = 2 * self.batch_size self.checkpoint_dir = args.checkpoint_dir self.summary_dir = args.summary_dir ''' modified for glove(300, 6B), self.tf_used == False self.rnn_size = 276 self.keep_prob = 0.32 ''' if self.emb_class == 'elmo': from bilm import Batcher, BidirectionalLanguageModel self.word_length = 50 # replace to fixed word length for the pre-trained elmo : 'max_characters_per_token' self.elmo_batcher = Batcher( self.embvec.elmo_vocab_path, self.word_length) # map text to character ids self.elmo_bilm = BidirectionalLanguageModel( self.embvec.elmo_options_path, self.embvec.elmo_weight_path) # biLM graph self.elmo_keep_prob = 0.7 # modified for elmo self.highway_used = False if self.emb_class == 'bert': from bert import modeling from bert import tokenization self.bert_config = modeling.BertConfig.from_json_file( self.embvec.bert_config_path) self.bert_tokenizer = tokenization.FullTokenizer( vocab_file=self.embvec.bert_vocab_path, do_lower_case=self.embvec.bert_do_lower_case) self.bert_init_checkpoint = self.embvec.bert_init_checkpoint self.bert_max_seq_length = self.embvec.bert_max_seq_length self.bert_keep_prob = 0.8 # modified for bert self.highway_used = False self.rnn_size = 256 self.starter_learning_rate = 2e-5 self.decay_steps = 5000 self.decay_rate = 0.9 self.clip_norm = 1.5 self.use_bert_optimization = False self.num_train_steps = 0 # number of total training steps self.num_warmup_steps = 0 # number of warmup steps self.warmup_proportion = 0.1 # proportion of training to perform linear learning rate warmup for if self.is_training: self.dev_batch_size = self.batch_size # set batch_size == dev_batch_size
Below, we show usage for SQuAD where each input example consists of both a question and a paragraph of context. ''' import tensorflow as tf import os from bilm import Batcher, BidirectionalLanguageModel, weight_layers # Location of pretrained LM. Here we use the test fixtures. datadir = os.path.join('tests', 'fixtures', 'model') vocab_file = os.path.join(datadir, 'vocab_test.txt') options_file = os.path.join(datadir, 'options.json') weight_file = os.path.join(datadir, 'lm_weights.hdf5') # Create a Batcher to map text to character ids. batcher = Batcher(vocab_file, 50) # Input placeholders to the biLM. context_character_ids = tf.placeholder('int32', shape=(None, None, 50)) question_character_ids = tf.placeholder('int32', shape=(None, None, 50)) # Build the biLM graph. bilm = BidirectionalLanguageModel(options_file, weight_file) # Get ops to compute the LM embeddings. context_embeddings_op = bilm(context_character_ids) question_embeddings_op = bilm(question_character_ids) # Get an op to compute ELMo (weighted average of the internal biLM layers) # Our SQuAD model includes ELMo at both the input and output layers # of the task GRU, so we need 4x ELMo representations for the question
class QAModel(object): def __init__(self, FLAGS, id2word, word2id, emb_matrix, id2char, char2id): self.FLAGS = FLAGS self.id2word = id2word self.word2id = word2id self.emb_matrix = emb_matrix self.id2char = id2char self.char2id = char2id self.batcher = Batcher( "/Users/lam/Desktop/Lam-cs224n/Projects/qa/squad/data/elmo/elmo_vocab.txt", 50) self.filters = [(5, 10)] #change back to 100 after self.options_file = "/Users/lam/Desktop/Lam-cs224n/Projects/qa/squad/data/elmo/elmo.json" self.weight_file = "/Users/lam/Desktop/Lam-cs224n/Projects/qa/squad/data/elmo/lm_weight.hdf5" with tf.variable_scope( "QAModel", initializer=tf.contrib.layers.variance_scaling_initializer( factor=1.0, uniform=True)): self.add_placeholders() self.add_embedding_layer(emb_matrix) self.add_elmo_embedding_layer(self.options_file, self.weight_file) with tf.variable_scope( "QAModel", initializer=tf.contrib.layers.variance_scaling_initializer( factor=1.0, uniform=True)): self.build_graph() self.add_loss() # Define trainable parameters, gradient, gradient norm, and clip by gradient norm params = tf.trainable_variables( "QAModel") #since only one scope "QAModel" gradients = tf.gradients( self.loss, params) # d(loss)/d(params) return list of (length len(params)) self.gradient_norm = tf.global_norm(gradients) clipped_gradients, _ = tf.clip_by_global_norm( gradients, 5.0) #return list_clipped, global_norm(here we don't need this) self.param_norm = tf.global_norm(params) # Define optimizer and updates # (updates is what you need to fetch in session.run to do a gradient update) self.global_step = tf.Variable(0, name="global_step", trainable=False) #This will increment the global step if global_step is not None opt = tf.train.AdamOptimizer( learning_rate=0.001) # you can try other optimizers self.updates = opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) self.bestmodel_saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) self.summaries = tf.summary.merge_all() def add_placeholders(self): self.context_ids = tf.placeholder(tf.int32) self.context_mask = tf.placeholder(tf.int32) self.qn_ids = tf.placeholder(tf.int32) self.qn_mask = tf.placeholder(tf.int32) self.ans_span = tf.placeholder(tf.int32, shape=[None, 2]) #NOTE:CHANGE #self.context_char = tf.placeholder(tf.int32, shape=[None, self.FLAGS.context_len, self.FLAGS.max_word_len]) #self.qn_char = tf.placeholder(tf.int32, shape=[None, self.FLAGS.question_len, self.FLAGS.max_word_len]) #The following two may not be necessary #self.context_char_mask = tf.placeholder(tf.int32, shape=[None, self.FLAGS.context_len, self.FLAGS.max_word_len]) #self.qn_char_mask = tf.placeholder(tf.int32, shape=[None, self.FLAGS.question_len, self.FLAGS.max_word_len]) self.context_elmo = tf.placeholder('int32', shape=[None, None, 50]) self.qn_elmo = tf.placeholder('int32', shape=[None, None, 50]) # Add a placeholder to feed in the keep probability (for dropout). # This is necessary so that we can instruct the model to use dropout when training, but not when testing self.keep_prob = tf.placeholder_with_default(1.0, shape=()) def add_embedding_layer(self, emb_matrix): with tf.variable_scope("embeddings"): #set to constant so its untrainable embedding_matrix = tf.constant( emb_matrix, dtype=tf.float32, name="emb_matrix") # shape (400002, embedding_size) # Get the word embeddings for the context and question, self.context_embs = tf.nn.embedding_lookup(embedding_matrix, self.context_ids) self.qn_embs = tf.nn.embedding_lookup(embedding_matrix, self.qn_ids) #self.add_char_embedding_layer() def add_elmo_embedding_layer(self, options_file, weight_file, output_use=False): """ Adds ELMo lstm embeddings to the graph. Inputs: options_file: json_file for the pretrained model weight_file: weights hdf5 file for the pretrained model output_use: determine if use elmo in output of biRNN (default False) """ #Build biLM graph bilm = BidirectionalLanguageModel(options_file, weight_file) context_embeddings_op = bilm(self.context_elmo) question_embeddings_op = bilm(self.qn_elmo) # Get an op to compute ELMo (weighted average of the internal biLM layers) # Our SQuAD model includes ELMo at both the input and output layers # of the task GRU, so we need 4x ELMo representations for the question # and context at each of the input and output. # We use the same ELMo weights for both the question and context # at each of the input and output. #compute the final ELMo representations. self.elmo_context_input = weight_layers( 'input', context_embeddings_op, l2_coef=0.001)['weighted_op'] #(batch size, context size, ????) with tf.variable_scope('', reuse=True): # the reuse=True scope reuses weights from the context for the question self.elmo_question_input = weight_layers( 'input', question_embeddings_op, l2_coef=0.001)['weighted_op'] if output_use: self.elmo_context_output = weight_layers( 'output', context_embeddings_op, l2_coef=0.001)['weighted_op'] with tf.variable_scope('', reuse=True): # the reuse=True scope reuses weights from the context for the question self.elmo_question_output = weight_layers( 'output', question_embeddings_op, l2_coef=0.001)['weighted_op'] def build_graph(self): context_embs_concat = tf.concat( [self.elmo_context_input, self.context_embs], 2) #(batch_size, qn_len, 1024+self.FLAGS.embedding_size) context_embs_concat.set_shape( (None, None, 1024 + self.FLAGS.embedding_size)) #qn_embs_concat.set_shape((None, None, 1024+self.FLAGS.embedding_size)) self.qn_mask.set_shape((None, None)) self.context_mask.set_shape((None, None)) with tf.variable_scope("start"): softmax_start = SimpleSoftmaxLayer() self.logits_start, self.probdist_start = softmax_start.build_graph( context_embs_concat, self.context_mask) with tf.variable_scope("end"): softmax_end = SimpleSoftmaxLayer() self.logits_end, self.probdist_end = softmax_end.build_graph( context_embs_concat, self.context_mask) def add_loss(self): with tf.variable_scope("loss"): # Calculate loss for prediction of start position loss_start = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.logits_start, labels=self.ans_span[:, 0]) # loss_start has shape (batch_size) self.loss_start = tf.reduce_mean(loss_start) tf.summary.scalar('loss_start', self.loss_start) # Calculate loss for prediction of end position loss_end = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.logits_end, labels=self.ans_span[:, 1]) self.loss_end = tf.reduce_mean(loss_end) tf.summary.scalar('loss_end', self.loss_end) # Add two losses self.loss = self.loss_start + self.loss_end tf.summary.scalar('loss', self.loss) def run_train_iter(self, session, batch, summary_writer): input_feed = {} input_feed[self.context_ids] = batch.context_ids input_feed[self.context_mask] = batch.context_mask #NOTE: CHANGE added context_char #input_feed[self.context_char] = batch.context_char input_feed[self.context_elmo] = self.batcher.batch_sentences( batch.context_tokens) input_feed[self.qn_ids] = batch.qn_ids input_feed[self.qn_mask] = batch.qn_mask #NOTE: CHANGE added qn_char #input_feed[self.qn_char] = batch.qn_char input_feed[self.qn_elmo] = self.batcher.batch_sentences( batch.qn_tokens) input_feed[self.ans_span] = batch.ans_span input_feed[self.keep_prob] = 1.0 - self.FLAGS.dropout # apply dropout output_feed = [ self.updates, self.summaries, self.loss, self.global_step, self.param_norm, self.gradient_norm ] #output_feed = [self.elmo_context_input] [_, summaries, loss, global_step, param_norm, gradient_norm] = sess.run(output_feed, feed_dict=input_feed) print("FINISHED") def train(self, session, train_context_path, train_qn_path, train_ans_path, dev_qn_path, dev_context_path, dev_ans_path): summary_writer = tf.summary.FileWriter( "/Users/lam/Desktop/Lam-cs224n/Projects/qa/squad", session.graph) for batch in get_batch_generator(self.word2id, self.char2id, train_context_path, train_qn_path, train_ans_path, self.FLAGS.batch_size, self.FLAGS.context_len, self.FLAGS.question_len, self.FLAGS.max_word_len, discard_long=True): self.sample_batch = batch self.run_train_iter(session, batch, summary_writer) break
args = parse_args() dtypes = args.dtypes.split(':') trial_num = max(1, args.trial_num) ### #args.exptdir = pwd/data #args.datadir = trial # We will use "${args.exptdir}/alltrain.epitope.elmo" as the model directory model_dir = join(args.exptdir, 'alltrain.epitope.elmo', 'best_model') vocab_file = join(args.exptdir, 'alltrain.epitope.vocab') options_file = join(model_dir, 'pred.options.json') weight_file = join(model_dir, 'weights.h5') # Create a Batcher to map text to character ids. batcher = Batcher(vocab_file, 50) # Input placeholders to the biLM. context_character_ids = tf.placeholder('int32', shape=(None, None, 50)) bilm = BidirectionalLanguageModel(options_file, weight_file) context_embeddings_op = bilm(context_character_ids) elmo_context_input = weight_layers('input', context_embeddings_op, l2_coef=0.0) elmo_context_output = weight_layers('output', context_embeddings_op, l2_coef=0.0) with tf.Session() as sess: # It is necessary to initialize variables once before running inference.
def build_features(config, examples, data_type, out_file, word2idx_dict, is_test=False): para_limit = config.para_limit ques_limit = config.ques_limit turn_limit = config.turn_limit def filter_func(example): return len(example["tokenized_context"] ) > para_limit #or len(example["ques_tokens"]) > ques_limit print("Processing {} examples...".format(data_type)) writer = tf.python_io.TFRecordWriter(out_file) total = 0 total_ = 0 meta = {} max_char_length = config.max_char_length batcher = Batcher(config.elmo_vocab_file, max_char_length) for example in tqdm(examples): total_ += 1 if filter_func(example): continue total += 1 context_idxs = np.zeros([para_limit], dtype=np.int32) questions_idxs = np.zeros([turn_limit, ques_limit], dtype=np.int32) context_char_idxs = np.zeros([para_limit + 2, max_char_length], dtype=np.int32) questions_char_idxs = np.zeros( [turn_limit, ques_limit + 2, max_char_length], dtype=np.int32) starts = np.zeros([turn_limit, para_limit], dtype=np.float32) ends = np.zeros([turn_limit, para_limit], dtype=np.float32) em = np.zeros([turn_limit, para_limit], dtype=np.int32) yes_answers = np.zeros([turn_limit], dtype=np.int32) no_answers = np.zeros([turn_limit], dtype=np.int32) unk_answers = np.zeros([turn_limit], dtype=np.int32) span_flag = np.zeros([turn_limit], dtype=np.int32) def _get_word(word): for each in (word, word.lower(), word.capitalize(), word.upper()): if each in word2idx_dict: return word2idx_dict[each] return 1 def _check_word_in_question(word, question): for token in question: if word.lower() == token.lower(): return True return False # type: List[str] tokenized_context = example["tokenized_context"] length = len(tokenized_context) + 2 context_char_idxs_without_mask = batcher._lm_vocab.encode_chars( tokenized_context, split=False) context_char_idxs[:length, :] = context_char_idxs_without_mask + 1 for k, sent in enumerate(example["tokenized_questions"]): length = len(sent) + 2 question_char_idxs_without_mask = batcher._lm_vocab.encode_chars( sent, split=False) questions_char_idxs[ k, :length, :] = question_char_idxs_without_mask + 1 # get em and context indexes vector for i, token in enumerate(tokenized_context): context_idxs[i] = _get_word(token) for j, tokenized_question in enumerate( example["tokenized_questions"]): if _check_word_in_question(token, tokenized_question): em[j, i] = 1 # get question indexes vector for i, tokenized_question in enumerate(example["tokenized_questions"]): for j, token in enumerate(tokenized_question): questions_idxs[i, j] = _get_word(token) # get start vector for i, idx in enumerate(example["starts"]): starts[i, idx] = 1.0 # get end vector for i, idx in enumerate(example["ends"]): ends[i, idx] = 1.0 # get label of yes/no questions length = len(example["yes_answers"]) yes_answers[:length] = example["yes_answers"] no_answers[:length] = example["no_answers"] unk_answers[:length] = example["unk_answers"] span_flag[:length] = example["span_flag"] feature_dict = { "context_idxs": tf.train.Feature(bytes_list=tf.train.BytesList( value=[context_idxs.tostring()])), "questions_idxs": tf.train.Feature(bytes_list=tf.train.BytesList( value=[questions_idxs.tostring()])), "context_char_idxs": tf.train.Feature(bytes_list=tf.train.BytesList( value=[context_char_idxs.tostring()])), "questions_char_idxs": tf.train.Feature(bytes_list=tf.train.BytesList( value=[questions_char_idxs.tostring()])), "starts": tf.train.Feature(bytes_list=tf.train.BytesList( value=[starts.tostring()])), "ends": tf.train.Feature(bytes_list=tf.train.BytesList( value=[ends.tostring()])), "em": tf.train.Feature(bytes_list=tf.train.BytesList( value=[em.tostring()])), "yes_answers": tf.train.Feature(bytes_list=tf.train.BytesList( value=[yes_answers.tostring()])), "no_answers": tf.train.Feature(bytes_list=tf.train.BytesList( value=[no_answers.tostring()])), "unk_answers": tf.train.Feature(bytes_list=tf.train.BytesList( value=[unk_answers.tostring()])), "span_flag": tf.train.Feature(bytes_list=tf.train.BytesList( value=[span_flag.tostring()])) } record = tf.train.Example(features=tf.train.Features( feature=feature_dict)) writer.write(record.SerializeToString()) print("Build {} / {} instances of features in total".format(total, total_)) meta["total"] = total writer.close() return meta
if os.path.isfile(os.path.join(datadir, 'vocab.txt.gz')): vocab_file = os.path.join(datadir, 'vocab.txt.gz') elif os.path.isfile(os.path.join(datadir, 'vocab.txt')): vocab_file = os.path.join(datadir, 'vocab.txt') else: print('No vocabulary file found. Exiting...') exit() options_file = os.path.join(datadir, 'options.json') weight_file = os.path.join(datadir, 'model.hdf5') with open(options_file, 'r') as f: m_options = json.load(f) max_chars = m_options['char_cnn']['max_characters_per_token'] # Create a Batcher to map text to character ids. batcher = Batcher(vocab_file, max_chars) # Input placeholders to the biLM. sentence_character_ids = tf.compat.v1.placeholder('int32', shape=(None, None, max_chars)) # Build the biLM graph. bilm = BidirectionalLanguageModel(options_file, weight_file, max_batch_size=200) # Get ops to compute the LM embeddings. sentence_embeddings_op = bilm(sentence_character_ids) # Get an op to compute ELMo (weighted average of the internal biLM layers)
class ELMoTuner(Tuner): def __init__(self, train_corpus_fname, test_corpus_fname, vocab_fname, options_fname, pretrain_model_fname, model_save_path, max_characters_per_token=30, batch_size=32, num_labels=2): # Load a corpus. super().__init__(train_corpus_fname=train_corpus_fname, tokenized_train_corpus_fname=train_corpus_fname + ".elmo-tokenized", test_corpus_fname=test_corpus_fname, tokenized_test_corpus_fname=test_corpus_fname + ".elmo-tokenized", model_name="elmo", vocab_fname=vocab_fname, model_save_path=model_save_path, batch_size=batch_size) # configurations self.options_fname = options_fname self.pretrain_model_fname = pretrain_model_fname self.max_characters_per_token = max_characters_per_token self.num_labels = 2 # positive, negative self.num_train_steps = (int((len(self.train_data) - 1) / self.batch_size) + 1) * self.num_epochs self.eval_every = int(self.num_train_steps / self.num_epochs) # epoch마다 평가 # Create a Batcher to map text to character ids. # lm_vocab_file = ELMo는 token vocab이 없어도 on-the-fly로 입력 id들을 만들 수 있다 # 하지만 자주 나오는 char sequence, 즉 vocab을 미리 id로 만들어 놓으면 좀 더 빠른 학습이 가능 # max_token_length = the maximum number of characters in each token self.batcher = Batcher(lm_vocab_file=vocab_fname, max_token_length=self.max_characters_per_token) self.training = tf.placeholder(tf.bool) # build train graph self.ids_placeholder, self.labels_placeholder, self.dropout_keep_prob, self.logits, self.loss = make_elmo_graph(options_fname, pretrain_model_fname, max_characters_per_token, num_labels, tune=True) def tune(self): global_step = tf.train.get_or_create_global_step() optimizer = tf.train.AdamOptimizer(learning_rate=0.0001) grads_and_vars = optimizer.compute_gradients(self.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) output_feed = [train_op, global_step, self.logits, self.loss] saver = tf.train.Saver(max_to_keep=1) sess = tf.Session() sess.run(tf.global_variables_initializer()) self.train(sess, saver, global_step, output_feed) def make_input(self, sentences, labels, is_training): current_input = self.batcher.batch_sentences(sentences) current_output = np.array(labels) if is_training: input_feed = { self.ids_placeholder: current_input, self.labels_placeholder: current_output, self.dropout_keep_prob: self.dropout_keep_prob_rate, self.training: True } else: input_feed_ = { self.ids_placeholder: current_input, self.labels_placeholder: current_output, self.dropout_keep_prob: 1.0, self.training: False } input_feed = [input_feed_, current_output] return input_feed
test_datas, test_sample_num = transform_data(test_datas, all_tokens, batch_size) test_batch_num = len(test_datas[0]) test_m_datas, test_m_sample_num = transform_data(test_m_datas, all_tokens, batch_size) test_m_batch_num = len(test_m_datas[0]) test_h_datas, test_h_sample_num = transform_data(test_h_datas, all_tokens, batch_size) test_h_batch_num = len(test_h_datas[0]) # build and save vocab file with open(vocab_file, 'w') as fout: fout.write('\n'.join(all_tokens)) # Create a Batcher to map text to character ids. batcher = Batcher(vocab_file, 50, max_context_length) batcher2 = Batcher(vocab_file, 50, max_q_o_length) # *** build models *** # Input placeholders to the biLM. context_character_ids = tf.placeholder('int32', shape=(None, None, 50)) question_character_ids = tf.placeholder('int32', shape=(None, None, 50)) options_character_ids = tf.placeholder('int32', shape=(None, None, 50)) context_lengths = tf.placeholder('int32', shape=(None, )) question_lengths = tf.placeholder('int32', shape=(None, )) options_lengths = tf.placeholder('int32', shape=(None, )) labels = tf.placeholder('int32', shape=(None, )) # Build the biLM graph. bilm = BidirectionalLanguageModel(options_file, weight_file)