Ejemplo n.º 1
0
def main(_):
    model = BERT_NER(param)

    model.build(input_shape=(4, param.batch_size, param.maxlen))

    model.summary()

    # 写入数据 通过check_exist=True参数控制仅在第一次调用时写入
    writer = TFWriter(param.maxlen,
                      vocab_file,
                      data_dir=FLAGS.data_dir,
                      modes=["test"],
                      check_exist=True)

    ner_load = TFLoader(param.maxlen,
                        param.batch_size,
                        data_dir=FLAGS.data_dir)

    # Metrics
    f1score = Metric.SparseF1Score("macro", predict_sparse=True)
    precsionscore = Metric.SparsePrecisionScore("macro", predict_sparse=True)
    recallscore = Metric.SparseRecallScore("macro", predict_sparse=True)
    accuarcyscore = Metric.SparseAccuracy(predict_sparse=True)

    # 保存模型
    checkpoint = tf.train.Checkpoint(model=model)
    checkpoint.restore(tf.train.latest_checkpoint('./save'))
    # For test model
    # print(dir(checkpoint))
    Batch = 0
    f1s = []
    precisions = []
    recalls = []
    accuracys = []
    for X, token_type_id, input_mask, Y in ner_load.load_test():
        predict = model.predict([X, token_type_id, input_mask,
                                 Y])  # [batch_size, max_length,label_size]

        f1s.append(f1score(Y, predict))
        precisions.append(precsionscore(Y, predict))
        recalls.append(recallscore(Y, predict))
        accuracys.append(accuarcyscore(Y, predict))
        print("Sentence",
              writer.convert_id_to_vocab(tf.reshape(X, [-1]).numpy()))

        print("Label",
              writer.convert_id_to_label(tf.reshape(predict, [-1]).numpy()))
    print("f1:{}\tprecision:{}\trecall:{}\taccuracy:{}\n".format(
        np.mean(f1s), np.mean(precisions), np.mean(recalls),
        np.mean(accuracys)))
Ejemplo n.º 2
0

model = BERT_NER(param)

model.build(input_shape=(3, param.batch_size, param.maxlen))

model.summary()

# 写入数据 通过check_exist=True参数控制仅在第一次调用时写入
writer = TFWriter(param.maxlen, vocab_file,
                    modes=["valid"], check_exist=False)

ner_load = TFLoader(param.maxlen, param.batch_size, epoch=3)

# Metrics
f1score = Metric.SparseF1Score(average="macro")
precsionscore = Metric.SparsePrecisionScore(average="macro")
recallscore = Metric.SparseRecallScore(average="macro")
accuarcyscore = Metric.SparseAccuracy()

# 保存模型
checkpoint = tf.train.Checkpoint(model=model)
checkpoint.restore(tf.train.latest_checkpoint('./save'))
# For test model
Batch = 0
f1s = []
precisions = []
recalls = []
accuracys = []
for X, token_type_id, input_mask, Y in ner_load.load_valid():
    predict = model.predict([X, token_type_id, input_mask])  # [batch_size, max_length,label_size]
Ejemplo n.º 3
0
bert_init_weights_from_checkpoint(model,
                                  model_path,
                                  param.num_hidden_layers,
                                  pooler=False)

# 写入数据 通过check_exist=True参数控制仅在第一次调用时写入
writer = TFWriter(param.maxlen, vocab_file, modes=["train"], check_exist=False)

ner_load = TFLoader(param.maxlen, param.batch_size, epoch=5)

# 训练模型
# 使用tensorboard
summary_writer = tf.summary.create_file_writer("./tensorboard")

# Metrics
f1score = Metric.SparseF1Score(average="macro", predict_sparse=True)
precsionscore = Metric.SparsePrecisionScore(average="macro",
                                            predict_sparse=True)
recallscore = Metric.SparseRecallScore(average="macro", predict_sparse=True)
accuarcyscore = Metric.SparseAccuracy(predict_sparse=True)

# 保存模型
checkpoint = tf.train.Checkpoint(model=model)
manager = tf.train.CheckpointManager(checkpoint,
                                     directory="./save",
                                     checkpoint_name="model.ckpt",
                                     max_to_keep=3)
# For train model
Batch = 0
for X, token_type_id, input_mask, Y in ner_load.load_train():
    with tf.GradientTape(persistent=True) as tape:
Ejemplo n.º 4
0
patience = 100
# penalty = 0.0005 # for cora and citeseer
penalty = 0.001  # for pubmed

loader = graphloader.GCNLoader(dataset="pubmed", loop=True, features_norm=True)

features, adj, y_train, y_val, y_test, train_mask, val_mask, test_mask = loader.load(
)

model = GAT.GATLayer(hidden_dim=hidden_dim,
                     num_class=num_class,
                     dropout_rate=drop_rate)

optimizer = tf.keras.optimizers.Adam(0.01)
crossentropy = Losess.MaskCategoricalCrossentropy()
accscore = Metric.MaskAccuracy()
stop_monitor = EarlyStopping(monitor="loss", patience=patience)

# ---------------------------------------------------------
# For train
for p in range(epoch):
    t = time.time()
    with tf.GradientTape() as tape:
        predict = model(features, adj, training=True)
        loss = crossentropy(y_train, predict, train_mask)
        loss += tf.add_n([
            tf.nn.l2_loss(v) for v in model.variables if "bias" not in v.name
        ]) * penalty
    grads = tape.gradient(loss, model.variables)
    optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
Ejemplo n.º 5
0
writer = TFWriter(param.maxlen,
                  vocab_file,
                  modes=["train"],
                  check_exist=False,
                  task='cls',
                  tokenizer="sentencepiece",
                  spm_model_file=spm_model_file)

ner_load = TFLoader(param.maxlen, param.batch_size, task='cls', epoch=3)

# 训练模型
# 使用tensorboard
summary_writer = tf.summary.create_file_writer("./tensorboard")

# Metrics
accuarcyscore = Metric.SparseAccuracy()

# 保存模型
checkpoint = tf.train.Checkpoint(model=model)
manager = tf.train.CheckpointManager(checkpoint,
                                     directory="./save",
                                     checkpoint_name="model.ckpt",
                                     max_to_keep=3)
# For train model
Batch = 0
for X, token_type_id, input_mask, Y in ner_load.load_train():
    with tf.GradientTape() as tape:
        predict = model([X, token_type_id, input_mask])
        loss = sparse_categotical_loss(Y, predict)
        accuracy = accuarcyscore(Y, predict)
        if Batch % 100 == 0:
Ejemplo n.º 6
0
def main(_):
    model = BERT_NER(param)

    model.build(input_shape=(4, param.batch_size, param.maxlen))

    model.summary()

# 构建优化器

    optimizer_bert = optim.AdamWarmup(learning_rate=2e-5,  # 重要参数
                                      decay_steps=10000,  # 重要参数
                                      warmup_steps=1000, )
    optimizer_crf = optim.AdamWarmup(learning_rate=1e-3,
                                     decay_steps=10000,  # 重要参数
                                     warmup_steps=1000,
                                     )
    #
    # 初始化参数
    bert_init_weights_from_checkpoint(model,
                                      model_path,
                                      param.num_hidden_layers,
                                      pooler=False)

    # 写入数据 通过check_exist=True参数控制仅在第一次调用时写入
    writer = TFWriter(param.maxlen, vocab_file, data_dir=FLAGS.data_dir,
                      modes=["train"], check_exist=False)

    ner_load = TFLoader(param.maxlen, param.batch_size, data_dir=FLAGS.data_dir, epoch=5)

    # 训练模型
    # 使用tensorboard
    summary_writer = tf.summary.create_file_writer("./tensorboard")

    # Metrics
    f1score = Metric.SparseF1Score(average="macro", predict_sparse=True)
    precsionscore = Metric.SparsePrecisionScore(average="macro", predict_sparse=True)
    recallscore = Metric.SparseRecallScore(average="macro", predict_sparse=True)
    accuarcyscore = Metric.SparseAccuracy(predict_sparse=True)

    # 保存模型
    checkpoint = tf.train.Checkpoint(model=model)
    manager = tf.train.CheckpointManager(checkpoint, directory="./save",
                                         checkpoint_name="model.ckpt",
                                         max_to_keep=3)
    # For train model
    Batch = 0
    for X, token_type_id, input_mask, Y in ner_load.load_train():
        with tf.GradientTape(persistent=True) as tape:
            loss, predict = model([X, token_type_id, input_mask, Y])

            f1 = f1score(Y, predict)
            precision = precsionscore(Y, predict)
            recall = recallscore(Y, predict)
            accuracy = accuarcyscore(Y, predict)
            if Batch % 101 == 0:
                print("Batch:{}\tloss:{:.4f}".format(Batch, loss.numpy()))
                print("Batch:{}\tacc:{:.4f}".format(Batch, accuracy))
                print("Batch:{}\tprecision{:.4f}".format(Batch, precision))
                print("Batch:{}\trecall:{:.4f}".format(Batch, recall))
                print("Batch:{}\tf1score:{:.4f}".format(Batch, f1))

                print("Sentence", writer.convert_id_to_vocab(tf.reshape(X, [-1]).numpy()))
                print("predict", writer.convert_id_to_label(tf.reshape(predict, [-1]).numpy()))
                print("label", writer.convert_id_to_label(tf.reshape(Y, [-1]).numpy()))
                manager.save(checkpoint_number=Batch)

            with summary_writer.as_default():
                tf.summary.scalar("loss", loss, step=Batch)
                tf.summary.scalar("acc", accuracy, step=Batch)
                tf.summary.scalar("f1", f1, step=Batch)
                tf.summary.scalar("precision", precision, step=Batch)
                tf.summary.scalar("recall", recall, step=Batch)

        grads_bert = tape.gradient(loss, model.bert.variables + model.dense.variables)
        grads_crf = tape.gradient(loss, model.crf.variables)
        optimizer_bert.apply_gradients(grads_and_vars=zip(grads_bert, model.bert.variables + model.dense.variables))
        optimizer_crf.apply_gradients(grads_and_vars=zip(grads_crf, model.crf.variables))
        Batch += 1
    
    model.save("model_save")
Ejemplo n.º 7
0
epoch = 300
lr = 0.001
split = 10  # 10-fold

tf.random.set_seed(1124)
accs_all = []
dataloader = TUDataset(name="PROTEINS", split=split)

for block_index in range(split):

    model = GINLayer(dim, num_class, drop_rate)

    optimize = tf.optimizers.Adam(lr)

    cross_entropy = Losess.MaskSparseCategoricalCrossentropy()
    acc_score = Metric.SparseAccuracy()

    train_data, test_data = dataloader.load(batch_size=128,
                                            block_index=block_index)
    for i in range(epoch):
        t = time.time()
        loss_all = []
        acc_all = []
        for x, y, edge_index, edge_attr, batch in train_data:
            x, y, edge_index, edge_attr, batch = merge_batch_graph(
                x, y, edge_index, edge_attr, batch)

            with tf.GradientTape() as tape:
                predict = model(x, edge_index, batch, training=True)
                loss = cross_entropy(y, predict)
                acc = acc_score(y, predict)
Ejemplo n.º 8
0
from fennlp.metrics import Losess, Metric

_HIDDEN_DIM = 64
_NUM_CLASS = 7
_DROP_OUT_RATE = 0.5
_EPOCH = 100

loader = graphloader.GCNLoader()
features, adj, labels, idx_train, idx_val, idx_test = loader.load()

model = GCN.GCN2Layer(_HIDDEN_DIM, _NUM_CLASS, _DROP_OUT_RATE)

optimizer = tf.keras.optimizers.Adam(0.01)

crossentropy = Losess.MaskSparseCategoricalCrossentropy(from_logits=False)
accscore = Metric.SparseAccuracy()
f1score = Metric.SparseF1Score(average="macro")
# ---------------------------------------------------------
# For train
for epoch in range(_EPOCH):
    with tf.GradientTape() as tape:
        output = model(features, adj, training=True)
        predict = tf.gather(output, list(idx_train))
        label = tf.gather(labels, list(idx_train))
        loss = crossentropy(label, predict, use_mask=False)
        acc = accscore(label, predict)
        f1 = f1score(label, predict)
        print("Epoch {} | Loss {:.4f} | Acc {:.4f} | F1 {:.4f}".format(
            epoch, loss.numpy(), acc, f1))
    grads = tape.gradient(loss, model.variables)
    optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
Ejemplo n.º 9
0
import numpy as np
import tensorflow as tf

from fennlp.datas.graphloader import TuckERLoader
from fennlp.metrics import Metric
from fennlp.models import tucker

lr = 0.005
label_smoothing = 0.1
batch_size = 128
training = True

loader = TuckERLoader(base_path="data")
er_vocab, er_vocab_pairs = loader.data_dump("train")

evaluate = Metric.HitN_MR_MRR(loader, mode="valid")

model = tucker.TuckER(loader)

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(lr,
                                                             decay_steps=2000,
                                                             decay_rate=0.995)

optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
# 构建损失函数
binary_loss = tf.keras.losses.BinaryCrossentropy(
    from_logits=False, label_smoothing=label_smoothing)

# 保存模型
checkpoint = tf.train.Checkpoint(model=model)
manager = tf.train.CheckpointManager(checkpoint,