def main(_): model = BERT_NER(param) model.build(input_shape=(4, param.batch_size, param.maxlen)) model.summary() # 写入数据 通过check_exist=True参数控制仅在第一次调用时写入 writer = TFWriter(param.maxlen, vocab_file, data_dir=FLAGS.data_dir, modes=["test"], check_exist=True) ner_load = TFLoader(param.maxlen, param.batch_size, data_dir=FLAGS.data_dir) # Metrics f1score = Metric.SparseF1Score("macro", predict_sparse=True) precsionscore = Metric.SparsePrecisionScore("macro", predict_sparse=True) recallscore = Metric.SparseRecallScore("macro", predict_sparse=True) accuarcyscore = Metric.SparseAccuracy(predict_sparse=True) # 保存模型 checkpoint = tf.train.Checkpoint(model=model) checkpoint.restore(tf.train.latest_checkpoint('./save')) # For test model # print(dir(checkpoint)) Batch = 0 f1s = [] precisions = [] recalls = [] accuracys = [] for X, token_type_id, input_mask, Y in ner_load.load_test(): predict = model.predict([X, token_type_id, input_mask, Y]) # [batch_size, max_length,label_size] f1s.append(f1score(Y, predict)) precisions.append(precsionscore(Y, predict)) recalls.append(recallscore(Y, predict)) accuracys.append(accuarcyscore(Y, predict)) print("Sentence", writer.convert_id_to_vocab(tf.reshape(X, [-1]).numpy())) print("Label", writer.convert_id_to_label(tf.reshape(predict, [-1]).numpy())) print("f1:{}\tprecision:{}\trecall:{}\taccuracy:{}\n".format( np.mean(f1s), np.mean(precisions), np.mean(recalls), np.mean(accuracys)))
model = BERT_NER(param) model.build(input_shape=(3, param.batch_size, param.maxlen)) model.summary() # 写入数据 通过check_exist=True参数控制仅在第一次调用时写入 writer = TFWriter(param.maxlen, vocab_file, modes=["valid"], check_exist=False) ner_load = TFLoader(param.maxlen, param.batch_size, epoch=3) # Metrics f1score = Metric.SparseF1Score(average="macro") precsionscore = Metric.SparsePrecisionScore(average="macro") recallscore = Metric.SparseRecallScore(average="macro") accuarcyscore = Metric.SparseAccuracy() # 保存模型 checkpoint = tf.train.Checkpoint(model=model) checkpoint.restore(tf.train.latest_checkpoint('./save')) # For test model Batch = 0 f1s = [] precisions = [] recalls = [] accuracys = [] for X, token_type_id, input_mask, Y in ner_load.load_valid(): predict = model.predict([X, token_type_id, input_mask]) # [batch_size, max_length,label_size]
bert_init_weights_from_checkpoint(model, model_path, param.num_hidden_layers, pooler=False) # 写入数据 通过check_exist=True参数控制仅在第一次调用时写入 writer = TFWriter(param.maxlen, vocab_file, modes=["train"], check_exist=False) ner_load = TFLoader(param.maxlen, param.batch_size, epoch=5) # 训练模型 # 使用tensorboard summary_writer = tf.summary.create_file_writer("./tensorboard") # Metrics f1score = Metric.SparseF1Score(average="macro", predict_sparse=True) precsionscore = Metric.SparsePrecisionScore(average="macro", predict_sparse=True) recallscore = Metric.SparseRecallScore(average="macro", predict_sparse=True) accuarcyscore = Metric.SparseAccuracy(predict_sparse=True) # 保存模型 checkpoint = tf.train.Checkpoint(model=model) manager = tf.train.CheckpointManager(checkpoint, directory="./save", checkpoint_name="model.ckpt", max_to_keep=3) # For train model Batch = 0 for X, token_type_id, input_mask, Y in ner_load.load_train(): with tf.GradientTape(persistent=True) as tape:
patience = 100 # penalty = 0.0005 # for cora and citeseer penalty = 0.001 # for pubmed loader = graphloader.GCNLoader(dataset="pubmed", loop=True, features_norm=True) features, adj, y_train, y_val, y_test, train_mask, val_mask, test_mask = loader.load( ) model = GAT.GATLayer(hidden_dim=hidden_dim, num_class=num_class, dropout_rate=drop_rate) optimizer = tf.keras.optimizers.Adam(0.01) crossentropy = Losess.MaskCategoricalCrossentropy() accscore = Metric.MaskAccuracy() stop_monitor = EarlyStopping(monitor="loss", patience=patience) # --------------------------------------------------------- # For train for p in range(epoch): t = time.time() with tf.GradientTape() as tape: predict = model(features, adj, training=True) loss = crossentropy(y_train, predict, train_mask) loss += tf.add_n([ tf.nn.l2_loss(v) for v in model.variables if "bias" not in v.name ]) * penalty grads = tape.gradient(loss, model.variables) optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
writer = TFWriter(param.maxlen, vocab_file, modes=["train"], check_exist=False, task='cls', tokenizer="sentencepiece", spm_model_file=spm_model_file) ner_load = TFLoader(param.maxlen, param.batch_size, task='cls', epoch=3) # 训练模型 # 使用tensorboard summary_writer = tf.summary.create_file_writer("./tensorboard") # Metrics accuarcyscore = Metric.SparseAccuracy() # 保存模型 checkpoint = tf.train.Checkpoint(model=model) manager = tf.train.CheckpointManager(checkpoint, directory="./save", checkpoint_name="model.ckpt", max_to_keep=3) # For train model Batch = 0 for X, token_type_id, input_mask, Y in ner_load.load_train(): with tf.GradientTape() as tape: predict = model([X, token_type_id, input_mask]) loss = sparse_categotical_loss(Y, predict) accuracy = accuarcyscore(Y, predict) if Batch % 100 == 0:
def main(_): model = BERT_NER(param) model.build(input_shape=(4, param.batch_size, param.maxlen)) model.summary() # 构建优化器 optimizer_bert = optim.AdamWarmup(learning_rate=2e-5, # 重要参数 decay_steps=10000, # 重要参数 warmup_steps=1000, ) optimizer_crf = optim.AdamWarmup(learning_rate=1e-3, decay_steps=10000, # 重要参数 warmup_steps=1000, ) # # 初始化参数 bert_init_weights_from_checkpoint(model, model_path, param.num_hidden_layers, pooler=False) # 写入数据 通过check_exist=True参数控制仅在第一次调用时写入 writer = TFWriter(param.maxlen, vocab_file, data_dir=FLAGS.data_dir, modes=["train"], check_exist=False) ner_load = TFLoader(param.maxlen, param.batch_size, data_dir=FLAGS.data_dir, epoch=5) # 训练模型 # 使用tensorboard summary_writer = tf.summary.create_file_writer("./tensorboard") # Metrics f1score = Metric.SparseF1Score(average="macro", predict_sparse=True) precsionscore = Metric.SparsePrecisionScore(average="macro", predict_sparse=True) recallscore = Metric.SparseRecallScore(average="macro", predict_sparse=True) accuarcyscore = Metric.SparseAccuracy(predict_sparse=True) # 保存模型 checkpoint = tf.train.Checkpoint(model=model) manager = tf.train.CheckpointManager(checkpoint, directory="./save", checkpoint_name="model.ckpt", max_to_keep=3) # For train model Batch = 0 for X, token_type_id, input_mask, Y in ner_load.load_train(): with tf.GradientTape(persistent=True) as tape: loss, predict = model([X, token_type_id, input_mask, Y]) f1 = f1score(Y, predict) precision = precsionscore(Y, predict) recall = recallscore(Y, predict) accuracy = accuarcyscore(Y, predict) if Batch % 101 == 0: print("Batch:{}\tloss:{:.4f}".format(Batch, loss.numpy())) print("Batch:{}\tacc:{:.4f}".format(Batch, accuracy)) print("Batch:{}\tprecision{:.4f}".format(Batch, precision)) print("Batch:{}\trecall:{:.4f}".format(Batch, recall)) print("Batch:{}\tf1score:{:.4f}".format(Batch, f1)) print("Sentence", writer.convert_id_to_vocab(tf.reshape(X, [-1]).numpy())) print("predict", writer.convert_id_to_label(tf.reshape(predict, [-1]).numpy())) print("label", writer.convert_id_to_label(tf.reshape(Y, [-1]).numpy())) manager.save(checkpoint_number=Batch) with summary_writer.as_default(): tf.summary.scalar("loss", loss, step=Batch) tf.summary.scalar("acc", accuracy, step=Batch) tf.summary.scalar("f1", f1, step=Batch) tf.summary.scalar("precision", precision, step=Batch) tf.summary.scalar("recall", recall, step=Batch) grads_bert = tape.gradient(loss, model.bert.variables + model.dense.variables) grads_crf = tape.gradient(loss, model.crf.variables) optimizer_bert.apply_gradients(grads_and_vars=zip(grads_bert, model.bert.variables + model.dense.variables)) optimizer_crf.apply_gradients(grads_and_vars=zip(grads_crf, model.crf.variables)) Batch += 1 model.save("model_save")
epoch = 300 lr = 0.001 split = 10 # 10-fold tf.random.set_seed(1124) accs_all = [] dataloader = TUDataset(name="PROTEINS", split=split) for block_index in range(split): model = GINLayer(dim, num_class, drop_rate) optimize = tf.optimizers.Adam(lr) cross_entropy = Losess.MaskSparseCategoricalCrossentropy() acc_score = Metric.SparseAccuracy() train_data, test_data = dataloader.load(batch_size=128, block_index=block_index) for i in range(epoch): t = time.time() loss_all = [] acc_all = [] for x, y, edge_index, edge_attr, batch in train_data: x, y, edge_index, edge_attr, batch = merge_batch_graph( x, y, edge_index, edge_attr, batch) with tf.GradientTape() as tape: predict = model(x, edge_index, batch, training=True) loss = cross_entropy(y, predict) acc = acc_score(y, predict)
from fennlp.metrics import Losess, Metric _HIDDEN_DIM = 64 _NUM_CLASS = 7 _DROP_OUT_RATE = 0.5 _EPOCH = 100 loader = graphloader.GCNLoader() features, adj, labels, idx_train, idx_val, idx_test = loader.load() model = GCN.GCN2Layer(_HIDDEN_DIM, _NUM_CLASS, _DROP_OUT_RATE) optimizer = tf.keras.optimizers.Adam(0.01) crossentropy = Losess.MaskSparseCategoricalCrossentropy(from_logits=False) accscore = Metric.SparseAccuracy() f1score = Metric.SparseF1Score(average="macro") # --------------------------------------------------------- # For train for epoch in range(_EPOCH): with tf.GradientTape() as tape: output = model(features, adj, training=True) predict = tf.gather(output, list(idx_train)) label = tf.gather(labels, list(idx_train)) loss = crossentropy(label, predict, use_mask=False) acc = accscore(label, predict) f1 = f1score(label, predict) print("Epoch {} | Loss {:.4f} | Acc {:.4f} | F1 {:.4f}".format( epoch, loss.numpy(), acc, f1)) grads = tape.gradient(loss, model.variables) optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
import numpy as np import tensorflow as tf from fennlp.datas.graphloader import TuckERLoader from fennlp.metrics import Metric from fennlp.models import tucker lr = 0.005 label_smoothing = 0.1 batch_size = 128 training = True loader = TuckERLoader(base_path="data") er_vocab, er_vocab_pairs = loader.data_dump("train") evaluate = Metric.HitN_MR_MRR(loader, mode="valid") model = tucker.TuckER(loader) lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(lr, decay_steps=2000, decay_rate=0.995) optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule) # 构建损失函数 binary_loss = tf.keras.losses.BinaryCrossentropy( from_logits=False, label_smoothing=label_smoothing) # 保存模型 checkpoint = tf.train.Checkpoint(model=model) manager = tf.train.CheckpointManager(checkpoint,