def wrapper_model(desc, source_dir, dest_dir, sents): if (not isConcat): with tf.Session(graph=tf.Graph()) as sess: vocab, word_embeddings = load_processed_embeddings(sess) get_sents_embeddings(sents, word_embeddings, vocab, sess, dest_dir) with tf.Session(graph=tf.Graph()) as sess: saver = tf.train.import_meta_graph(dest_dir + 'temp.ckpt.meta') saver.restore(sess, dest_dir + 'temp.ckpt') sent_embed = sess.run("embeds:0") #sent_embed = tf.convert_to_tensor(sent_embed) n_examples, n_visible, embd_dim = sent_embed.shape #print(n_examples) #DIMENSION = int(embd_dim) batched = batching(sent_embed, n_examples, sess, dest_dir) #batching(sent_embed,n_examples,sess,dest_dir,batchSize=BATCH_SIZE) ''' with tf.Session(graph = tf.Graph()) as sess: saver = tf.train.import_meta_graph(dest_dir+'temp.ckpt.meta') saver.restore(sess,dest_dir+'temp.ckpt') batched = sess.run("batches:0") ''' else: batched = sents format_file(STATUS_LOG, display_batch) processed_sent = run_model(desc, batched, source_dir, dest_dir) return processed_sent
def wrapper_model(desc, source_dir, dest_dir, sents): if (not isDim): with tf.Session(graph=tf.Graph()) as sess: vocab, word_embeddings = load_processed_embeddings(sess) #sents = batching(sents,len(sents),sess,dest_dir) #sents=[get_sent_embeddings(sent,word_embeddings,vocab,sess,dest_dir) for sent in sents] sent_embed = get_sent_embeddings(sents, word_embeddings, vocab, sess, dest_dir) n_examples, n_visible, embd_dim = sent_embed.shape sents = batching(sent_embed, n_examples, sess, dest_dir) processed_docs = run_model(desc, sents, source_dir, dest_dir) return processed_docs
def lookUp_batch_embeddings(dest_dir, sents, extra_pad=False): global vocab_size, embd_dim, seq_len with tf.Session(graph=tf.Graph()) as sess: vocab, word_embeddings = load_processed_embeddings(sess) vocab_size = len(vocab) ids = get_sents_embedding(sents, word_embeddings, vocab, sess, dest_dir, extra_pad) with tf.Session(graph=tf.Graph()) as sess: saver = tf.train.import_meta_graph(dest_dir + 'temp.ckpt.meta') saver.restore(sess, dest_dir + 'temp.ckpt') sent_embed = sess.run("embeds:0") n_examples, seq_len, embd_dim = sent_embed.shape return ids, sent_embed
def wrapper_rbm(desc,dest_dir,num_epochs,raw_sents=None): global DIMENSION #preprocesses raw sentences, looks up their embeddings and groups them in batches if not raw_sents is None: with tf.Session(graph = tf.Graph()) as sess: vocab, word_embeddings = load_processed_embeddings(sess) get_sents_embeddings(raw_sents,word_embeddings,vocab,sess,dest_dir) train_writer = tf.summary.FileWriter(dest_dir) write_to_file(STATUS_LOG,desc) error = 0 for step in range(num_epochs): with tf.Session(graph = tf.Graph()) as sess: if(is_exists_saved_model(dest_dir)): print("training from saved model") #model = restore_model(dest_dir) model = step_restore_model(dest_dir) cost,err = train_from_saved(model,dest_dir,train_writer,step) error += err else: print("training from scratch") cost,err = train_from_scratch(dest_dir,train_writer,step) error += err print("Step ", step, " Cost: ", "{:.5f}".format(err)) #writing to file m = int(n / display_batch) width = 7 + (10 * m) format_av_cost = '{0: >' + str(width) + '}' av_cost = format_av_cost.format('Av.Cost') #av_cost = '{0: <10}'.format('Av.Cost') av_err = error/num_epochs _status = str("{:.5f}".format(av_err)) status = av_cost + '{0: >10}'.format(_status) status += '\n' write_to_file(STATUS_LOG,status) path_to_trained_model = dest_dir + 'model.ckpt' train_writer.close() #print_tensors_in_checkpoint_file(file_name=path_to_trained_model,tensor_name='',all_tensors=False) #tf.reset_default_graph() return path_to_trained_model
def rbmE_gruD(mode, features, labels, params): inp = features["x"] if state != "Infering": ids = features["ids"] weights = features["weights"] batch_size = params["batch_size"] #Encoder enc_cell = rnn.NASCell(num_units=NUM_UNITS) enc_out, enc_state = tf.nn.dynamic_rnn(enc_cell, inp, time_major=False, dtype=tf.float32) #Decoder cell = rnn.NASCell(num_units=NUM_UNITS) _, embeddings = load_processed_embeddings(sess=tf.InteractiveSession()) out_lengths = tf.constant(seq_len, shape=[batch_size]) if state != "Infering": #sampling method for training train_helper = seq2seq.TrainingHelper(labels, out_lengths, time_major=False) ''' train_helper=seq2seq.ScheduledEmbeddingTrainingHelper(inputs=labels, sequence_length=out_lengths, embedding=embeddings, sampling_probability=probs) ''' #sampling method for evaluation start_tokens = tf.zeros([batch_size], dtype=tf.int32) infer_helper = seq2seq.GreedyEmbeddingHelper(embedding=embeddings, start_tokens=start_tokens, end_token=END) #infer_helper = seq2seq.SampleEmbeddingHelper(embeddings,start_tokens=start_tokens,end_token=END) #infer_helper=seq2seq.ScheduledEmbeddingTrainingHelper(inputs=inp,sequence_length=out_lengths,embedding=embeddings,sampling_probability=1.0) projection_layer = layers_core.Dense(vocab_size, use_bias=False) def decode(helper): decoder = seq2seq.BasicDecoder(cell=cell, helper=helper, initial_state=enc_state, output_layer=projection_layer) #decoder.tracks_own_finished=True (dec_outputs, _, _) = seq2seq.dynamic_decode(decoder, maximum_iterations=seq_len) #(dec_outputs,_,_) = seq2seq.dynamic_decode(decoder) dec_ids = dec_outputs.sample_id logits = dec_outputs.rnn_output return dec_ids, logits #equalize logits, labels and weight lengths incase of early finish in decoder def norm_logits_loss(logts, ids, weights): current_ts = tf.to_int32( tf.minimum(tf.shape(ids)[1], tf.shape(logts)[1])) logts = tf.slice(logts, begin=[0, 0, 0], size=[-1, current_ts, -1]) ids = tf.slice(ids, begin=[0, 0], size=[-1, current_ts]) weights = tf.slice(weights, begin=[0, 0], size=[-1, current_ts]) return logts, ids, weights #training mode if state == "Training": dec_ids, logits = decode(train_helper) # some sample_id are overwritten with '-1's #dec_ids = tf.argmax(logits, axis=2) tf.identity(dec_ids, name="predictions") logits, ids, weights = norm_logits_loss(logits, ids, weights) loss = tf.contrib.seq2seq.sequence_loss(logits, ids, weights=weights) learning_rate = 0.001 #0.0001 tf.identity(learning_rate, name="learning_rate") #evaluation mode if state == "Evaluating" or state == "Testing": eval_dec_ids, eval_logits = decode(infer_helper) #eval_dec_ids = tf.argmax(eval_logits, axis=2) tf.identity(eval_dec_ids, name="predictions") #equalize logits, labels and weight lengths incase of early finish in decoder eval_logits, ids, weights = norm_logits_loss(eval_logits, ids, weights) ''' current_ts = tf.to_int32(tf.minimum(tf.shape(ids)[1], tf.shape(eval_logits)[1])) ids = tf.slice(ids, begin=[0, 0], size=[-1, current_ts]) weights = tf.slice(weights, begin=[0, 0], size=[-1, current_ts]) #mask_ = tf.sequence_mask(lengths=target_sequence_length, maxlen=current_ts, dtype=eval_logits.dtype) eval_logits = tf.slice(eval_logits, begin=[0,0,0], size=[-1, current_ts, -1]) ''' eval_loss = tf.contrib.seq2seq.sequence_loss(eval_logits, ids, weights=weights) #beamSearch decoder init_state = tf.contrib.seq2seq.tile_batch(enc_state, multiplier=5) beamSearch_decoder = seq2seq.BeamSearchDecoder( cell, embeddings, start_tokens, end_token=END, initial_state=init_state, beam_width=5, output_layer=projection_layer) (infer_outputs, _, _) = seq2seq.dynamic_decode(beamSearch_decoder, maximum_iterations=seq_len) infer_ids = infer_outputs.predicted_ids infer_probs = infer_outputs.beam_search_decoder_output.scores infer_probs = tf.reduce_prod(infer_probs, axis=1) infer_pos = tf.argmax(infer_probs, axis=1) infers = {"ids": infer_ids, "pos": infer_pos} if mode == tf.estimator.ModeKeys.TRAIN: train_op = layers.optimize_loss(loss, tf.train.get_global_step(), optimizer='Adam', learning_rate=learning_rate, clip_gradients=5.0) spec = tf.estimator.EstimatorSpec(mode=mode, predictions=dec_ids, loss=loss, train_op=train_op) #evaluation mode elif mode == tf.estimator.ModeKeys.EVAL: spec = tf.estimator.EstimatorSpec(mode=mode, loss=eval_loss, predictions=eval_dec_ids) else: spec = tf.estimator.EstimatorSpec(mode=mode, predictions=infers) return spec