def generator_train(): for data in train_data: x, y = data[0], data[1] #x_emb = embedding_bert.get_batch_emb(x, doc_len, sen_len, tokenizer, estimator) doc_seq_len, sen_seq_len = embedding_bert.get_batch_seq([x], doc_len, sen_len, tokenizer, tol=2) yield doc_seq_len, sen_seq_len
#print('Epoch:', '%d' % (epoch + 0), 'cost =', '{:.6f}'.format(loss)) # testing n_batch = len(X_test_doc) // batch_size #for i in range(n_batch): for i in tqdm(range(n_batch), total=n_batch): X_batch, y_batch = utils.get_batch_test(X_test_doc, y_test, i, batch_size) X_batch_emb = embedding_bert.get_batch_emb(X_batch, doc_len, sen_len, tokenizer, estimator) #X_batch_emb = X_batch_emb[:, :, 0, :] #X_batch_seq, _ = embedding_bert.get_batch_seq(X_batch, doc_len, sen_len, tokenizer, tol = 2) doc_seq_len, sen_seq_len = embedding_bert.get_batch_seq(X_batch, doc_len, sen_len, tokenizer, tol=2) #batch_seq_len = np.array([list(x).index(0) + 1 for x in X_batch]) loss, y_pred_val = sess.run( [cost, y_pred], feed_dict={ X_emb: X_batch_emb, y: y_batch, doc_seq_length: doc_seq_len, sen_seq_length: sen_seq_len, is_training: False }) #seq_length: batch_seq_len #acc_test += acc # loss_train = loss_tr * DELTA + loss_train * (1 - DELTA), delta = 0.5?? loss_test += loss
### test # In[19]: X_train[:3] # In[20]: a = embedding_bert.get_batch_emb(X_train[:3], doc_len, sen_len, tokenizer, estimator) b, c = embedding_bert.get_batch_seq(X_train[:3], doc_len, sen_len, tokenizer, tol = 2) # In[ ]: # In[21]: a.shape, b.shape, c.shape # In[22]:
prepare_bert(bert_vocab_file, bert_config_file, init_checkpoint, sen_len, select_layers, batch_size, graph_file) # In[17]: ### test # In[18]: a = embedding_bert.get_batch_emb(X_train[:4], doc_len, sen_len, tokenizer, estimator) b, c = embedding_bert.get_batch_seq(X_train[:4], doc_len, sen_len, tokenizer, tol = 2) # In[19]: a.shape, b.shape, c.shape # In[20]: c # In[22]:
# In[11]: xx = embedding_bert.get_batch_emb([data_part[0][0]], doc_len, sen_len, tokenizer, estimator) # In[12]: xx.shape # In[ ]: # In[9]: a, b = embedding_bert.get_batch_seq([data_part[0][0], data_part[1][0]], doc_len, sen_len, tokenizer, tol=2) a, b # In[10]: a[0], b[0] # In[11]: xx[0].shape # In[24]: embedding_bert.get_batch_seq([data_part[4][0]],