def predict(self, data, pretrained_model = ""): tf.reset_default_graph() if pretrained_model == "": ner = GCNNer(ner_filename = self.model_file, trans_prob_file = self.trans_prob_file_name) else: ner = pretrained_model file = open(data, "r") d = file.readlines()[2:] sentence = self.convertData(d) x = sentence.strip().split("\n") entities = [] for each in x: entity_tuples = ner.get_entity_tuples_from_text(each) entities.append(entity_tuples) start = entities[0][0][2] final_list = [] for each in entities: for i in each: a = (i[0], i[1], start, i[3]) start = start + i[3] + 1 final_list.append(a) new_list = [] print("*************Predicted entity types: [word, predicted_type, start_position, span]**************") for each in final_list: a = (self.ground_truth[final_list.index(each)][0], each[1], each[2], each[3]) new_list.append(a) print(a) output_list = [] for each in new_list: a = (self.ground_truth[new_list.index(each)][0], self.ground_truth[new_list.index(each)][1], each[1]) output_list.append(a) file.close() return output_list
def evaluate(self, predictions, groundTruths, pretrained_model = ""): tf.reset_default_graph() if pretrained_model == "": ner = GCNNer(ner_filename = self.model_file, trans_prob_file = self.trans_prob_file_name) else: ner = pretrained_model (precision, recall, f1) = ner.test(predictions, groundTruths, self.test_data) return (precision, recall, f1)
# GCNNer.train_and_save(dataset='./data/labeled.conll', saving_dir='./data/unlabeled_50_random', epochs=20, al_args=al_list, load_ckpt="./data/unlabeled_50/ner-gcn-9.tf") # my_data = genfromtxt('unlabeled_50_scores_sorted.csv', delimiter=',') # al_length = 3750 # al_list = list(my_data[:3750,0].astype(np.int)) # print("Total finetuning samples: {}".format(len(al_list))) # GCNNer.train_and_save(dataset='./data/labeled.conll', saving_dir='./data/unlabeled_50_uncertain_2', epochs=20, al_args=al_list, load_ckpt="./data/unlabeled_50/ner-gcn-9.tf") my_data = genfromtxt('unlabeled_50_scores_sorted.csv', delimiter=',') al_length = 3750 al_list = list(my_data[:3750, 0].astype(np.int)) al_list.extend(range(45112, 45112 + 15177)) print("Total finetuning samples: {}".format(len(al_list))) GCNNer.train_and_save(dataset='./data/labeled_and_unlabeled_50.conll', saving_dir='./data/unlabeled_50_uncertain_combined', epochs=20, al_args=al_list, load_ckpt="./data/unlabeled_50/ner-gcn-9.tf") # al_length = 3750 # al_list = list(np.random.randint(0,45112,al_length)) # al_list.extend(range(45112, 45112+15177)) # print("Total finetuning samples: {}".format(len(al_list))) # GCNNer.train_and_save(dataset='./data/labeled_and_unlabeled_50.conll', saving_dir='./data/unlabeled_50_random_combined', epochs=20, al_args=al_list, load_ckpt="./data/unlabeled_50/ner-gcn-9.tf") # my_data = genfromtxt('unlabeled_50_scores_sorted.csv', delimiter=',') # al_length = 3750 # al_list = list(my_data[:3750,0].astype(np.int)) # al_list.extend(range(45112, 45112+15177)) # print("Total finetuning samples: [UC] {}".format(len(al_list))) # GCNNer.train_and_save(dataset='./data/labeled_and_unlabeled_50.conll', saving_dir='./data/unlabeled_50_uncertain_combined_scratch', epochs=30, al_args=al_list)
from gcn_ner import GCNNer if __name__ == '__main__': ner = GCNNer(ner_filename='./data/ner-gcn-21.tf', trans_prob_file='./data/trans_prob.pickle') ner.test('./data/dev.conll')
from gcn_ner import GCNNer if __name__ == '__main__': GCNNer.train_and_save(dataset='./data/train.conll', saving_dir='./data/', epochs=31)
def train(self, data, saving_dir = './data/', epochs=2, bucket_size=10): tf.reset_default_graph() (file, gcn_model) = GCNNer.train_and_save(dataset = data, saving_dir = saving_dir, epochs = epochs, bucket_size = bucket_size) self.save_model(file, gcn_model)
def load_model(self, file): ner = GCNNer(file, './data/trans_prob.pickle') print("Loaded model from ", file) return ner
def _get_entity_tuples_from_sentence(sentence): from gcn_ner import GCNNer ner = GCNNer(ner_filename='./data/ner-gcn-21.tf', trans_prob_file='./data/trans_prob.pickle') entity_tuples = ner.get_entity_tuples_from_text(sentence) return entity_tuples
from gcn_ner import GCNNer import sys if __name__ == '__main__': # ner = GCNNer(ner_filename='./data/unlabeled_50/ner-gcn-9.tf', trans_prob_file='./data/trans_prob.pickle') ner = GCNNer(ner_filename='./data/{}/ner-gcn-{}.tf'.format( sys.argv[1], sys.argv[2]), trans_prob_file='./data/trans_prob.pickle') print('./data/{}/ner-gcn-{}.tf'.format(sys.argv[1], sys.argv[2])) ner.test('./data/dev.conll')