Python load_processed_embeddings Examples, word2vec.load_processed_embeddings Python Examples

Example #1

0

Show file

File: fuseability_checker.py Project: Elozino-Egonmwan/text-summarization

def wrapper_model(desc, source_dir, dest_dir, sents):
    if (not isConcat):
        with tf.Session(graph=tf.Graph()) as sess:
            vocab, word_embeddings = load_processed_embeddings(sess)
            get_sents_embeddings(sents, word_embeddings, vocab, sess, dest_dir)

        with tf.Session(graph=tf.Graph()) as sess:
            saver = tf.train.import_meta_graph(dest_dir + 'temp.ckpt.meta')
            saver.restore(sess, dest_dir + 'temp.ckpt')

            sent_embed = sess.run("embeds:0")
            #sent_embed = tf.convert_to_tensor(sent_embed)
            n_examples, n_visible, embd_dim = sent_embed.shape
            #print(n_examples)
            #DIMENSION = int(embd_dim)
            batched = batching(sent_embed, n_examples, sess, dest_dir)
            #batching(sent_embed,n_examples,sess,dest_dir,batchSize=BATCH_SIZE)
        '''    
        with tf.Session(graph = tf.Graph()) as sess:        
                saver = tf.train.import_meta_graph(dest_dir+'temp.ckpt.meta')   
                saver.restore(sess,dest_dir+'temp.ckpt')
                batched = sess.run("batches:0") 
        '''
    else:
        batched = sents

    format_file(STATUS_LOG, display_batch)
    processed_sent = run_model(desc, batched, source_dir, dest_dir)

    return processed_sent

Example #2

0

Show file

def wrapper_model(desc, source_dir, dest_dir, sents):
    if (not isDim):
        with tf.Session(graph=tf.Graph()) as sess:
            vocab, word_embeddings = load_processed_embeddings(sess)
            #sents = batching(sents,len(sents),sess,dest_dir)
            #sents=[get_sent_embeddings(sent,word_embeddings,vocab,sess,dest_dir) for sent in sents]

            sent_embed = get_sent_embeddings(sents, word_embeddings, vocab,
                                             sess, dest_dir)
            n_examples, n_visible, embd_dim = sent_embed.shape
            sents = batching(sent_embed, n_examples, sess, dest_dir)

    processed_docs = run_model(desc, sents, source_dir, dest_dir)
    return processed_docs

Example #3

0

Show file

File: fuse.py Project: Elozino-Egonmwan/text-summarization

def lookUp_batch_embeddings(dest_dir, sents, extra_pad=False):
    global vocab_size, embd_dim, seq_len

    with tf.Session(graph=tf.Graph()) as sess:
        vocab, word_embeddings = load_processed_embeddings(sess)
        vocab_size = len(vocab)
        ids = get_sents_embedding(sents, word_embeddings, vocab, sess,
                                  dest_dir, extra_pad)

    with tf.Session(graph=tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(dest_dir + 'temp.ckpt.meta')
        saver.restore(sess, dest_dir + 'temp.ckpt')

        sent_embed = sess.run("embeds:0")
        n_examples, seq_len, embd_dim = sent_embed.shape
    return ids, sent_embed

Example #4

0

Show file

File: fuseability_model.py Project: Elozino-Egonmwan/text-summarization

def wrapper_rbm(desc,dest_dir,num_epochs,raw_sents=None):
    global DIMENSION
#preprocesses raw sentences, looks up their embeddings and groups them in batches
    if not raw_sents is None:
        with tf.Session(graph = tf.Graph()) as sess:        
            vocab, word_embeddings = load_processed_embeddings(sess)            
            get_sents_embeddings(raw_sents,word_embeddings,vocab,sess,dest_dir)                    
        
    train_writer = tf.summary.FileWriter(dest_dir)     
    write_to_file(STATUS_LOG,desc)    
    error = 0    
    
    for step in range(num_epochs):         
        with tf.Session(graph = tf.Graph()) as sess:             
            if(is_exists_saved_model(dest_dir)): 
                print("training from saved model")
                #model = restore_model(dest_dir)
                model = step_restore_model(dest_dir)
                cost,err = train_from_saved(model,dest_dir,train_writer,step)
                error += err                 
            else:  
                print("training from scratch")                                                                            
                cost,err = train_from_scratch(dest_dir,train_writer,step)                  
                error += err             
            print("Step ", step, " Cost: ", "{:.5f}".format(err))
    
    #writing to file 
    m = int(n / display_batch)       
    width = 7 + (10 * m)
    format_av_cost = '{0: >' + str(width) + '}'
    av_cost = format_av_cost.format('Av.Cost')
    #av_cost = '{0: <10}'.format('Av.Cost')
    av_err = error/num_epochs
    _status = str("{:.5f}".format(av_err))
    status = av_cost + '{0: >10}'.format(_status) 
    status += '\n'
    write_to_file(STATUS_LOG,status)
            
    path_to_trained_model = dest_dir + 'model.ckpt' 
    train_writer.close()
    #print_tensors_in_checkpoint_file(file_name=path_to_trained_model,tensor_name='',all_tensors=False)  
    #tf.reset_default_graph()
    
    return path_to_trained_model

Example #5

0

Show file

File: fuse.py Project: Elozino-Egonmwan/text-summarization

def rbmE_gruD(mode, features, labels, params):
    inp = features["x"]

    if state != "Infering":
        ids = features["ids"]
        weights = features["weights"]

    batch_size = params["batch_size"]

    #Encoder
    enc_cell = rnn.NASCell(num_units=NUM_UNITS)
    enc_out, enc_state = tf.nn.dynamic_rnn(enc_cell,
                                           inp,
                                           time_major=False,
                                           dtype=tf.float32)

    #Decoder
    cell = rnn.NASCell(num_units=NUM_UNITS)

    _, embeddings = load_processed_embeddings(sess=tf.InteractiveSession())
    out_lengths = tf.constant(seq_len, shape=[batch_size])
    if state != "Infering":
        #sampling method for training
        train_helper = seq2seq.TrainingHelper(labels,
                                              out_lengths,
                                              time_major=False)
        '''
        train_helper=seq2seq.ScheduledEmbeddingTrainingHelper(inputs=labels,
                                                              sequence_length=out_lengths,
                                                              embedding=embeddings,
                                                              sampling_probability=probs)
        '''
    #sampling method for evaluation
    start_tokens = tf.zeros([batch_size], dtype=tf.int32)
    infer_helper = seq2seq.GreedyEmbeddingHelper(embedding=embeddings,
                                                 start_tokens=start_tokens,
                                                 end_token=END)
    #infer_helper = seq2seq.SampleEmbeddingHelper(embeddings,start_tokens=start_tokens,end_token=END)
    #infer_helper=seq2seq.ScheduledEmbeddingTrainingHelper(inputs=inp,sequence_length=out_lengths,embedding=embeddings,sampling_probability=1.0)
    projection_layer = layers_core.Dense(vocab_size, use_bias=False)

    def decode(helper):
        decoder = seq2seq.BasicDecoder(cell=cell,
                                       helper=helper,
                                       initial_state=enc_state,
                                       output_layer=projection_layer)
        #decoder.tracks_own_finished=True
        (dec_outputs, _,
         _) = seq2seq.dynamic_decode(decoder, maximum_iterations=seq_len)
        #(dec_outputs,_,_) = seq2seq.dynamic_decode(decoder)
        dec_ids = dec_outputs.sample_id
        logits = dec_outputs.rnn_output
        return dec_ids, logits

    #equalize logits, labels and weight lengths incase of early finish in decoder
    def norm_logits_loss(logts, ids, weights):
        current_ts = tf.to_int32(
            tf.minimum(tf.shape(ids)[1],
                       tf.shape(logts)[1]))
        logts = tf.slice(logts, begin=[0, 0, 0], size=[-1, current_ts, -1])
        ids = tf.slice(ids, begin=[0, 0], size=[-1, current_ts])
        weights = tf.slice(weights, begin=[0, 0], size=[-1, current_ts])
        return logts, ids, weights

    #training mode
    if state == "Training":
        dec_ids, logits = decode(train_helper)
        # some sample_id are overwritten with '-1's
        #dec_ids = tf.argmax(logits, axis=2)
        tf.identity(dec_ids, name="predictions")
        logits, ids, weights = norm_logits_loss(logits, ids, weights)
        loss = tf.contrib.seq2seq.sequence_loss(logits, ids, weights=weights)
        learning_rate = 0.001  #0.0001

        tf.identity(learning_rate, name="learning_rate")

    #evaluation mode
    if state == "Evaluating" or state == "Testing":
        eval_dec_ids, eval_logits = decode(infer_helper)
        #eval_dec_ids = tf.argmax(eval_logits, axis=2)
        tf.identity(eval_dec_ids, name="predictions")

        #equalize logits, labels and weight lengths incase of early finish in decoder
        eval_logits, ids, weights = norm_logits_loss(eval_logits, ids, weights)
        '''
        current_ts = tf.to_int32(tf.minimum(tf.shape(ids)[1], tf.shape(eval_logits)[1]))
        ids = tf.slice(ids, begin=[0, 0], size=[-1, current_ts])
        weights = tf.slice(weights, begin=[0, 0], size=[-1, current_ts])
        #mask_ = tf.sequence_mask(lengths=target_sequence_length, maxlen=current_ts, dtype=eval_logits.dtype)
        eval_logits = tf.slice(eval_logits, begin=[0,0,0], size=[-1, current_ts, -1])       
        '''
        eval_loss = tf.contrib.seq2seq.sequence_loss(eval_logits,
                                                     ids,
                                                     weights=weights)

    #beamSearch decoder
    init_state = tf.contrib.seq2seq.tile_batch(enc_state, multiplier=5)
    beamSearch_decoder = seq2seq.BeamSearchDecoder(
        cell,
        embeddings,
        start_tokens,
        end_token=END,
        initial_state=init_state,
        beam_width=5,
        output_layer=projection_layer)
    (infer_outputs, _, _) = seq2seq.dynamic_decode(beamSearch_decoder,
                                                   maximum_iterations=seq_len)
    infer_ids = infer_outputs.predicted_ids
    infer_probs = infer_outputs.beam_search_decoder_output.scores
    infer_probs = tf.reduce_prod(infer_probs, axis=1)
    infer_pos = tf.argmax(infer_probs, axis=1)
    infers = {"ids": infer_ids, "pos": infer_pos}

    if mode == tf.estimator.ModeKeys.TRAIN:
        train_op = layers.optimize_loss(loss,
                                        tf.train.get_global_step(),
                                        optimizer='Adam',
                                        learning_rate=learning_rate,
                                        clip_gradients=5.0)

        spec = tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=dec_ids,
                                          loss=loss,
                                          train_op=train_op)
    #evaluation mode
    elif mode == tf.estimator.ModeKeys.EVAL:
        spec = tf.estimator.EstimatorSpec(mode=mode,
                                          loss=eval_loss,
                                          predictions=eval_dec_ids)
    else:
        spec = tf.estimator.EstimatorSpec(mode=mode, predictions=infers)
    return spec