if p==0: chunktags[p] = "I-" + q[2:] else: if q[2:]!=chunktags[p-1][2:]: chunktags[p] = "I-" + q[2:] return chunktags print('loading model...') model = load_model(model_path) print('loading model finished.') for each in test_data: embed_index, hash_index, pos, chunk, label, length, sentence = prepare.prepare_ner(batch=[each], gram='bi', form='BIOES') pos = np.array([(np.concatenate([np_utils.to_categorical(p, pos_length), np.zeros((step_length-length[l], pos_length))])) for l,p in enumerate(pos)]) chunk = np.array([(np.concatenate([np_utils.to_categorical(c, chunk_length), np.zeros((step_length-length[l], chunk_length))])) for l,c in enumerate(chunk)]) gazetteer, length_2 = prepare.prepare_gazetteer_BIOES(batch=[each], gazetteer='conll') gazetteer = np.array([(np.concatenate([a, np.zeros((step_length-length_2[l], gazetteer_length))])) for l,a in enumerate(gazetteer)]) prob = model.predict_on_batch([embed_index, hash_index, pos, chunk, gazetteer]) for i, l in enumerate(length): predict_label = np_utils.categorical_probas_to_classes(prob[i]) chunktags = [IOB[j] for j in predict_label][:l] word_pos_chunk = list(zip(*each)) # convert word_pos_chunk = list(zip(*word_pos_chunk)) word_pos_chunk = [list(x) for x in word_pos_chunk] word_pos_chunk[3] = convert(word_pos_chunk[3]) word_pos_chunk = list(zip(*word_pos_chunk))
print('loading model...') model = load_model(model_path) print('loading model finished.') for each in test_data: embed_index, hash_index, pos, chunk, label, length, sentence = prepare.prepare_ner( batch=[each], gram='bi') pos = np.array([(np.concatenate([ np_utils.to_categorical(p, pos_length), np.zeros((step_length - length[l], pos_length)) ])) for l, p in enumerate(pos)]) chunk = np.array([(np.concatenate([ np_utils.to_categorical(c, chunk_length), np.zeros((step_length - length[l], chunk_length)) ])) for l, c in enumerate(chunk)]) gazetteer, length_2 = prepare.prepare_gazetteer_BIOES(batch=[each]) gazetteer = np.array([(np.concatenate( [a, np.zeros((step_length - length_2[l], gazetteer_length))])) for l, a in enumerate(gazetteer)]) prob = model.predict_on_batch( [embed_index, hash_index, pos, chunk, gazetteer]) for i, l in enumerate(length): predict_label = np_utils.categorical_probas_to_classes(prob[i]) chunktags = [IOB[j] for j in predict_label][:l] word_pos_chunk = list(zip(*each)) for ind, chunktag in enumerate(chunktags): result.write(' '.join(word_pos_chunk[ind]) + ' ' + chunktag + '\n') result.write('\n')
log.write('-'*60+'\n') log.write('epoch %d start at %s\n'%(epoch, str(start))) train_loss = 0 dev_loss = 0 np.random.shuffle(train_data) for i in range(number_of_train_batches): train_batch = train_data[i*batch_size: (i+1)*batch_size] embed_index, hash_index, pos, chunk, label, length, sentence = prepare.prepare_ner(batch=train_batch, gram='bi') pos = np.array([(np.concatenate([np_utils.to_categorical(p, pos_length), np.zeros((step_length-length[l], pos_length))])) for l,p in enumerate(pos)]) chunk = np.array([(np.concatenate([np_utils.to_categorical(c, chunk_length), np.zeros((step_length-length[l], chunk_length))])) for l,c in enumerate(chunk)]) gazetteer, length_2 = prepare.prepare_gazetteer_BIOES(batch=train_batch, gazetteer='conll') gazetteer = np.array([(np.concatenate([a, np.zeros((step_length-length_2[l], gazetteer_length))])) for l,a in enumerate(gazetteer)]) y = np.array([np_utils.to_categorical(each, output_length) for each in label]) train_metrics = model.train_on_batch([embed_index, hash_index, pos, chunk, gazetteer], y) train_loss += train_metrics[0] all_train_loss.append(train_loss) correct_predict = 0 all_predict = 0 for j in range(number_of_dev_batches): dev_batch = dev_data[j*batch_size: (j+1)*batch_size] embed_index, hash_index, pos, chunk, label, length, sentence = prepare.prepare_ner(batch=dev_batch, gram='bi') pos = np.array([(np.concatenate([np_utils.to_categorical(p, pos_length), np.zeros((step_length-length[l], pos_length))])) for l,p in enumerate(pos)])
log.write('-'*60+'\n') log.write('epoch %d start at %s\n'%(epoch, str(start))) train_loss = 0 dev_loss = 0 np.random.shuffle(train_data) for i in range(number_of_train_batches): train_batch = train_data[i*batch_size: (i+1)*batch_size] embed_index, hash_index, pos, chunk, label, length, sentence = prepare.prepare_ner(batch=train_batch, form='BIOES', gram='bi') pos = np.array([(np.concatenate([np_utils.to_categorical(p, pos_length), np.zeros((step_length-length[l], pos_length))])) for l,p in enumerate(pos)]) chunk = np.array([(np.concatenate([np_utils.to_categorical(c, chunk_length), np.zeros((step_length-length[l], chunk_length))])) for l,c in enumerate(chunk)]) gazetteer, length_2 = prepare.prepare_gazetteer_BIOES(batch=train_batch) gazetteer = np.array([(np.concatenate([a, np.zeros((step_length-length_2[l], gazetteer_length))])) for l,a in enumerate(gazetteer)]) y = np.array([np_utils.to_categorical(each, output_length) for each in label]) train_metrics = model.train_on_batch([embed_index, hash_index, pos, chunk, gazetteer], y) train_loss += train_metrics[0] all_train_loss.append(train_loss) correct_predict = 0 all_predict = 0 for j in range(number_of_dev_batches): dev_batch = dev_data[j*batch_size: (j+1)*batch_size] embed_index, hash_index, pos, chunk, label, length, sentence = prepare.prepare_ner(batch=dev_batch, form='BIOES', gram='bi') pos = np.array([(np.concatenate([np_utils.to_categorical(p, pos_length), np.zeros((step_length-length[l], pos_length))])) for l,p in enumerate(pos)])