if p==0:
                chunktags[p] = "I-" + q[2:]
            else:
                if q[2:]!=chunktags[p-1][2:]:
                    chunktags[p] = "I-" + q[2:]
    return chunktags

print('loading model...')
model = load_model(model_path)
print('loading model finished.')

for each in test_data:
    embed_index, hash_index, pos, chunk, label, length, sentence = prepare.prepare_ner(batch=[each], gram='bi', form='BIOES')
    pos = np.array([(np.concatenate([np_utils.to_categorical(p, pos_length), np.zeros((step_length-length[l], pos_length))])) for l,p in enumerate(pos)])
    chunk = np.array([(np.concatenate([np_utils.to_categorical(c, chunk_length), np.zeros((step_length-length[l], chunk_length))])) for l,c in enumerate(chunk)])
    gazetteer, length_2 = prepare.prepare_gazetteer_BIOES(batch=[each], gazetteer='conll')
    gazetteer = np.array([(np.concatenate([a, np.zeros((step_length-length_2[l], gazetteer_length))])) for l,a in enumerate(gazetteer)])
    prob = model.predict_on_batch([embed_index, hash_index, pos, chunk, gazetteer])

    for i, l in enumerate(length):
        predict_label = np_utils.categorical_probas_to_classes(prob[i])
        chunktags = [IOB[j] for j in predict_label][:l]

    word_pos_chunk = list(zip(*each))
    
    # convert
    word_pos_chunk = list(zip(*word_pos_chunk))
    word_pos_chunk = [list(x) for x in word_pos_chunk]
    word_pos_chunk[3] = convert(word_pos_chunk[3])
    word_pos_chunk = list(zip(*word_pos_chunk))
print('loading model...')
model = load_model(model_path)
print('loading model finished.')

for each in test_data:
    embed_index, hash_index, pos, chunk, label, length, sentence = prepare.prepare_ner(
        batch=[each], gram='bi')
    pos = np.array([(np.concatenate([
        np_utils.to_categorical(p, pos_length),
        np.zeros((step_length - length[l], pos_length))
    ])) for l, p in enumerate(pos)])
    chunk = np.array([(np.concatenate([
        np_utils.to_categorical(c, chunk_length),
        np.zeros((step_length - length[l], chunk_length))
    ])) for l, c in enumerate(chunk)])
    gazetteer, length_2 = prepare.prepare_gazetteer_BIOES(batch=[each])
    gazetteer = np.array([(np.concatenate(
        [a, np.zeros((step_length - length_2[l], gazetteer_length))]))
                          for l, a in enumerate(gazetteer)])
    prob = model.predict_on_batch(
        [embed_index, hash_index, pos, chunk, gazetteer])

    for i, l in enumerate(length):
        predict_label = np_utils.categorical_probas_to_classes(prob[i])
        chunktags = [IOB[j] for j in predict_label][:l]

    word_pos_chunk = list(zip(*each))

    for ind, chunktag in enumerate(chunktags):
        result.write(' '.join(word_pos_chunk[ind]) + ' ' + chunktag + '\n')
    result.write('\n')
Exemple #3
0
    log.write('-'*60+'\n')
    log.write('epoch %d start at %s\n'%(epoch, str(start)))

    train_loss = 0
    dev_loss = 0

    np.random.shuffle(train_data)

    for i in range(number_of_train_batches):
        train_batch = train_data[i*batch_size: (i+1)*batch_size]
        embed_index, hash_index, pos, chunk, label, length, sentence = prepare.prepare_ner(batch=train_batch, gram='bi')

        pos = np.array([(np.concatenate([np_utils.to_categorical(p, pos_length), np.zeros((step_length-length[l], pos_length))])) for l,p in enumerate(pos)])
        chunk = np.array([(np.concatenate([np_utils.to_categorical(c, chunk_length), np.zeros((step_length-length[l], chunk_length))])) for l,c in enumerate(chunk)])
        gazetteer, length_2 = prepare.prepare_gazetteer_BIOES(batch=train_batch, gazetteer='conll')
        gazetteer = np.array([(np.concatenate([a, np.zeros((step_length-length_2[l], gazetteer_length))])) for l,a in enumerate(gazetteer)])
        y = np.array([np_utils.to_categorical(each, output_length) for each in label])

        train_metrics = model.train_on_batch([embed_index, hash_index, pos, chunk, gazetteer], y)
        train_loss += train_metrics[0]
    all_train_loss.append(train_loss)

    correct_predict = 0
    all_predict = 0

    for j in range(number_of_dev_batches):
        dev_batch = dev_data[j*batch_size: (j+1)*batch_size]
        embed_index, hash_index, pos, chunk, label, length, sentence = prepare.prepare_ner(batch=dev_batch, gram='bi')

        pos = np.array([(np.concatenate([np_utils.to_categorical(p, pos_length), np.zeros((step_length-length[l], pos_length))])) for l,p in enumerate(pos)])
Exemple #4
0
    log.write('-'*60+'\n')
    log.write('epoch %d start at %s\n'%(epoch, str(start)))

    train_loss = 0
    dev_loss = 0

    np.random.shuffle(train_data)

    for i in range(number_of_train_batches):
        train_batch = train_data[i*batch_size: (i+1)*batch_size]
        embed_index, hash_index, pos, chunk, label, length, sentence = prepare.prepare_ner(batch=train_batch, form='BIOES', gram='bi')

        pos = np.array([(np.concatenate([np_utils.to_categorical(p, pos_length), np.zeros((step_length-length[l], pos_length))])) for l,p in enumerate(pos)])
        chunk = np.array([(np.concatenate([np_utils.to_categorical(c, chunk_length), np.zeros((step_length-length[l], chunk_length))])) for l,c in enumerate(chunk)])
        gazetteer, length_2 = prepare.prepare_gazetteer_BIOES(batch=train_batch)
        gazetteer = np.array([(np.concatenate([a, np.zeros((step_length-length_2[l], gazetteer_length))])) for l,a in enumerate(gazetteer)])
        y = np.array([np_utils.to_categorical(each, output_length) for each in label])

        train_metrics = model.train_on_batch([embed_index, hash_index, pos, chunk, gazetteer], y)
        train_loss += train_metrics[0]
    all_train_loss.append(train_loss)

    correct_predict = 0
    all_predict = 0

    for j in range(number_of_dev_batches):
        dev_batch = dev_data[j*batch_size: (j+1)*batch_size]
        embed_index, hash_index, pos, chunk, label, length, sentence = prepare.prepare_ner(batch=dev_batch, form='BIOES', gram='bi')

        pos = np.array([(np.concatenate([np_utils.to_categorical(p, pos_length), np.zeros((step_length-length[l], pos_length))])) for l,p in enumerate(pos)])