elif q[2:]!=chunktags[p-1][2:]: chunktags[p] = "I-" + q[2:] elif q.startswith("B-"): if p==0: chunktags[p] = "I-" + q[2:] else: if q[2:]!=chunktags[p-1][2:]: chunktags[p] = "I-" + q[2:] return chunktags print('loading model...') model = load_model(model_path) print('loading model finished.') for each in test_data: embed_index, hash_index, pos, chunk, label, length, sentence = prepare.prepare_ner(batch=[each], gram='tri', form='BIOES') pos = np.array([(np.concatenate([np_utils.to_categorical(p, pos_length), np.zeros((step_length-length[l], pos_length))])) for l,p in enumerate(pos)]) chunk = np.array([(np.concatenate([np_utils.to_categorical(c, chunk_length), np.zeros((step_length-length[l], chunk_length))])) for l,c in enumerate(chunk)]) gazetteer, length_2 = prepare.prepare_gazetteer(batch=[each]) gazetteer = np.array([(np.concatenate([a, np.zeros((step_length-length_2[l], gazetteer_length))])) for l,a in enumerate(gazetteer)]) prob = model.predict_on_batch([embed_index, hash_index, pos, chunk, gazetteer]) for i, l in enumerate(length): predict_label = np_utils.categorical_probas_to_classes(prob[i]) chunktags = [IOB[j] for j in predict_label][:l] word_pos_chunk = list(zip(*each)) # convert word_pos_chunk = list(zip(*word_pos_chunk)) word_pos_chunk = [list(x) for x in word_pos_chunk]
start = datetime.now() print('-' * 60) print('epoch %d start at %s' % (epoch, str(start))) log.write('-' * 60 + '\n') log.write('epoch %d start at %s\n' % (epoch, str(start))) train_loss = 0 dev_loss = 0 np.random.shuffle(train_data) for i in range(number_of_train_batches): train_batch = train_data[i * batch_size:(i + 1) * batch_size] embed_index, hash_index, pos, chunk, label, length, sentence = prepare.prepare_ner( batch=train_batch, form='BIOES', gram='bi') pos = np.array([(np.concatenate([ np_utils.to_categorical(p, pos_length), np.zeros((step_length - length[l], pos_length)) ])) for l, p in enumerate(pos)]) chunk = np.array([(np.concatenate([ np_utils.to_categorical(c, chunk_length), np.zeros((step_length - length[l], chunk_length)) ])) for l, c in enumerate(chunk)]) gazetteer, length_2 = prepare.prepare_gazetteer(batch=train_batch) gazetteer = np.array([(np.concatenate( [a, np.zeros((step_length - length_2[l], gazetteer_length))])) for l, a in enumerate(gazetteer)]) y = np.array( [np_utils.to_categorical(each, output_length) for each in label])
chunktags[p] = "I-" + q[2:] elif q.startswith("B-"): if p == 0: chunktags[p] = "I-" + q[2:] else: if q[2:] != chunktags[p - 1][2:]: chunktags[p] = "I-" + q[2:] return chunktags print('loading model...') model = load_model(model_path) print('loading model finished.') for each in test_data: embed_index, hash_index, pos, chunk, label, length, sentence = prepare.prepare_ner( batch=[each]) pos = np.array([(np.concatenate([ np_utils.to_categorical(p, pos_length), np.zeros((step_length - length[l], pos_length)) ])) for l, p in enumerate(pos)]) chunk = np.array([(np.concatenate([ np_utils.to_categorical(c, chunk_length), np.zeros((step_length - length[l], chunk_length)) ])) for l, c in enumerate(chunk)]) prob = model.predict_on_batch([embed_index, pos, chunk]) for i, l in enumerate(length): predict_label = np_utils.categorical_probas_to_classes(prob[i]) chunktags = [IOB[j] for j in predict_label][:l] word_pos_chunk = list(zip(*each))
start = datetime.now() print('-' * 60) print('epoch %d start at %s' % (epoch, str(start))) log.write('-' * 60 + '\n') log.write('epoch %d start at %s\n' % (epoch, str(start))) train_loss = 0 dev_loss = 0 np.random.shuffle(train_data) for i in range(number_of_train_batches): train_batch = train_data[i * batch_size:(i + 1) * batch_size] embed_index, hash_index, pos, chunk, label, length, sentence = prepare.prepare_ner( batch=train_batch) pos = np.array([(np.concatenate([ np_utils.to_categorical(p, pos_length), np.zeros((step_length - length[l], pos_length)) ])) for l, p in enumerate(pos)]) chunk = np.array([(np.concatenate([ np_utils.to_categorical(c, chunk_length), np.zeros((step_length - length[l], chunk_length)) ])) for l, c in enumerate(chunk)]) y = np.array( [np_utils.to_categorical(each, output_length) for each in label]) train_metrics = model.train_on_batch([embed_index, pos, chunk], y) train_loss += train_metrics[0] all_train_loss.append(train_loss)