Ejemplo n.º 1
0
 def generator_train():
     for data in train_data:
         x, y = data[0], data[1]
         #x_emb = embedding_bert.get_batch_emb(x, doc_len, sen_len, tokenizer, estimator)
         doc_seq_len, sen_seq_len = embedding_bert.get_batch_seq([x],
                                                                 doc_len,
                                                                 sen_len,
                                                                 tokenizer,
                                                                 tol=2)
         yield doc_seq_len, sen_seq_len
Ejemplo n.º 2
0
        #print('Epoch:', '%d' % (epoch + 0), 'cost =', '{:.6f}'.format(loss))

        # testing
        n_batch = len(X_test_doc) // batch_size
        #for i in range(n_batch):
        for i in tqdm(range(n_batch), total=n_batch):
            X_batch, y_batch = utils.get_batch_test(X_test_doc, y_test, i,
                                                    batch_size)
            X_batch_emb = embedding_bert.get_batch_emb(X_batch, doc_len,
                                                       sen_len, tokenizer,
                                                       estimator)
            #X_batch_emb = X_batch_emb[:, :, 0, :]
            #X_batch_seq, _ = embedding_bert.get_batch_seq(X_batch, doc_len, sen_len, tokenizer, tol = 2)
            doc_seq_len, sen_seq_len = embedding_bert.get_batch_seq(X_batch,
                                                                    doc_len,
                                                                    sen_len,
                                                                    tokenizer,
                                                                    tol=2)
            #batch_seq_len = np.array([list(x).index(0) + 1 for x in X_batch])
            loss, y_pred_val = sess.run(
                [cost, y_pred],
                feed_dict={
                    X_emb: X_batch_emb,
                    y: y_batch,
                    doc_seq_length: doc_seq_len,
                    sen_seq_length: sen_seq_len,
                    is_training: False
                })  #seq_length: batch_seq_len
            #acc_test += acc
            # loss_train = loss_tr * DELTA + loss_train * (1 - DELTA), delta = 0.5??
            loss_test += loss

### test


# In[19]:


X_train[:3]


# In[20]:


a = embedding_bert.get_batch_emb(X_train[:3], doc_len, sen_len, tokenizer, estimator)
b, c = embedding_bert.get_batch_seq(X_train[:3], doc_len, sen_len, tokenizer, tol = 2)


# In[ ]:





# In[21]:


a.shape, b.shape, c.shape


# In[22]:
Ejemplo n.º 4
0

prepare_bert(bert_vocab_file, bert_config_file, init_checkpoint, sen_len, select_layers,  batch_size, graph_file)


# In[17]:


### test


# In[18]:


a = embedding_bert.get_batch_emb(X_train[:4], doc_len, sen_len, tokenizer, estimator)
b, c = embedding_bert.get_batch_seq(X_train[:4], doc_len, sen_len, tokenizer, tol = 2)


# In[19]:


a.shape, b.shape, c.shape


# In[20]:


c


# In[22]:
Ejemplo n.º 5
0
# In[11]:

xx = embedding_bert.get_batch_emb([data_part[0][0]], doc_len, sen_len,
                                  tokenizer, estimator)

# In[12]:

xx.shape

# In[ ]:

# In[9]:

a, b = embedding_bert.get_batch_seq([data_part[0][0], data_part[1][0]],
                                    doc_len,
                                    sen_len,
                                    tokenizer,
                                    tol=2)
a, b

# In[10]:

a[0], b[0]

# In[11]:

xx[0].shape

# In[24]:

embedding_bert.get_batch_seq([data_part[4][0]],