Beispiel #1
0
 def generator_train():
     for data in train_data:
         x, y = data[0], data[1]
         x_emb = embedding_bert.get_batch_emb([x], doc_len, sen_len,
                                              tokenizer, estimator)
         doc_seq_len, sen_seq_len = embedding_bert.get_batch_seq([x],
                                                                 doc_len,
                                                                 sen_len,
                                                                 tokenizer,
                                                                 tol=2)
         yield x_emb[0], y, doc_seq_len, sen_seq_len
Beispiel #2
0
        #acc_train /= n_batch
        loss_train /= n_batch
        rank_train /= n_batch

        #if (epoch + 1) % (epochs//10) == 0:
        #if (epoch + 0) % 10 == 0:
        #print('Epoch:', '%d' % (epoch + 0), 'cost =', '{:.6f}'.format(loss))

        # testing
        n_batch = len(X_test_doc) // batch_size
        #for i in range(n_batch):
        for i in tqdm(range(n_batch), total=n_batch):
            X_batch, y_batch = utils.get_batch_test(X_test_doc, y_test, i,
                                                    batch_size)
            X_batch_emb = embedding_bert.get_batch_emb(X_batch, doc_len,
                                                       sen_len, tokenizer,
                                                       estimator)
            #X_batch_emb = X_batch_emb[:, :, 0, :]
            #X_batch_seq, _ = embedding_bert.get_batch_seq(X_batch, doc_len, sen_len, tokenizer, tol = 2)
            doc_seq_len, sen_seq_len = embedding_bert.get_batch_seq(X_batch,
                                                                    doc_len,
                                                                    sen_len,
                                                                    tokenizer,
                                                                    tol=2)
            #batch_seq_len = np.array([list(x).index(0) + 1 for x in X_batch])
            loss, y_pred_val = sess.run(
                [cost, y_pred],
                feed_dict={
                    X_emb: X_batch_emb,
                    y: y_batch,
                    doc_seq_length: doc_seq_len,
Beispiel #3
0
doc_len = 5
sen_len = 10

batch_size = 32

# In[63]:

tokenizer, estimator = embedding_bert.prepare_bert(bert_vocab_file,
                                                   bert_config_file,
                                                   init_checkpoint, sen_len,
                                                   select_layers, batch_size,
                                                   graph_file, model_dir)

# In[11]:

xx = embedding_bert.get_batch_emb([data_part[0][0]], doc_len, sen_len,
                                  tokenizer, estimator)

# In[12]:

xx.shape

# In[ ]:

# In[9]:

a, b = embedding_bert.get_batch_seq([data_part[0][0], data_part[1][0]],
                                    doc_len,
                                    sen_len,
                                    tokenizer,
                                    tol=2)
a, b