Exemple #1
0
            elif q[2:]!=chunktags[p-1][2:]:
                chunktags[p] = "I-" + q[2:]
        elif q.startswith("B-"):
            if p==0:
                chunktags[p] = "I-" + q[2:]
            else:
                if q[2:]!=chunktags[p-1][2:]:
                    chunktags[p] = "I-" + q[2:]
    return chunktags

print('loading model...')
model = load_model(model_path)
print('loading model finished.')

for each in test_data:
    embed_index, hash_index, pos, chunk, label, length, sentence = prepare.prepare_ner(batch=[each], gram='tri', form='BIOES')
    pos = np.array([(np.concatenate([np_utils.to_categorical(p, pos_length), np.zeros((step_length-length[l], pos_length))])) for l,p in enumerate(pos)])
    chunk = np.array([(np.concatenate([np_utils.to_categorical(c, chunk_length), np.zeros((step_length-length[l], chunk_length))])) for l,c in enumerate(chunk)])
    gazetteer, length_2 = prepare.prepare_gazetteer(batch=[each])
    gazetteer = np.array([(np.concatenate([a, np.zeros((step_length-length_2[l], gazetteer_length))])) for l,a in enumerate(gazetteer)])
    prob = model.predict_on_batch([embed_index, hash_index, pos, chunk, gazetteer])

    for i, l in enumerate(length):
        predict_label = np_utils.categorical_probas_to_classes(prob[i])
        chunktags = [IOB[j] for j in predict_label][:l]

    word_pos_chunk = list(zip(*each))
    
    # convert
    word_pos_chunk = list(zip(*word_pos_chunk))
    word_pos_chunk = [list(x) for x in word_pos_chunk]
Exemple #2
0
    start = datetime.now()

    print('-' * 60)
    print('epoch %d start at %s' % (epoch, str(start)))

    log.write('-' * 60 + '\n')
    log.write('epoch %d start at %s\n' % (epoch, str(start)))

    train_loss = 0
    dev_loss = 0

    np.random.shuffle(train_data)

    for i in range(number_of_train_batches):
        train_batch = train_data[i * batch_size:(i + 1) * batch_size]
        embed_index, hash_index, pos, chunk, label, length, sentence = prepare.prepare_ner(
            batch=train_batch, form='BIOES', gram='bi')

        pos = np.array([(np.concatenate([
            np_utils.to_categorical(p, pos_length),
            np.zeros((step_length - length[l], pos_length))
        ])) for l, p in enumerate(pos)])
        chunk = np.array([(np.concatenate([
            np_utils.to_categorical(c, chunk_length),
            np.zeros((step_length - length[l], chunk_length))
        ])) for l, c in enumerate(chunk)])
        gazetteer, length_2 = prepare.prepare_gazetteer(batch=train_batch)
        gazetteer = np.array([(np.concatenate(
            [a, np.zeros((step_length - length_2[l], gazetteer_length))]))
                              for l, a in enumerate(gazetteer)])
        y = np.array(
            [np_utils.to_categorical(each, output_length) for each in label])
Exemple #3
0
                chunktags[p] = "I-" + q[2:]
        elif q.startswith("B-"):
            if p == 0:
                chunktags[p] = "I-" + q[2:]
            else:
                if q[2:] != chunktags[p - 1][2:]:
                    chunktags[p] = "I-" + q[2:]
    return chunktags


print('loading model...')
model = load_model(model_path)
print('loading model finished.')

for each in test_data:
    embed_index, hash_index, pos, chunk, label, length, sentence = prepare.prepare_ner(
        batch=[each])
    pos = np.array([(np.concatenate([
        np_utils.to_categorical(p, pos_length),
        np.zeros((step_length - length[l], pos_length))
    ])) for l, p in enumerate(pos)])
    chunk = np.array([(np.concatenate([
        np_utils.to_categorical(c, chunk_length),
        np.zeros((step_length - length[l], chunk_length))
    ])) for l, c in enumerate(chunk)])
    prob = model.predict_on_batch([embed_index, pos, chunk])

    for i, l in enumerate(length):
        predict_label = np_utils.categorical_probas_to_classes(prob[i])
        chunktags = [IOB[j] for j in predict_label][:l]

    word_pos_chunk = list(zip(*each))
Exemple #4
0
    start = datetime.now()

    print('-' * 60)
    print('epoch %d start at %s' % (epoch, str(start)))

    log.write('-' * 60 + '\n')
    log.write('epoch %d start at %s\n' % (epoch, str(start)))

    train_loss = 0
    dev_loss = 0

    np.random.shuffle(train_data)

    for i in range(number_of_train_batches):
        train_batch = train_data[i * batch_size:(i + 1) * batch_size]
        embed_index, hash_index, pos, chunk, label, length, sentence = prepare.prepare_ner(
            batch=train_batch)

        pos = np.array([(np.concatenate([
            np_utils.to_categorical(p, pos_length),
            np.zeros((step_length - length[l], pos_length))
        ])) for l, p in enumerate(pos)])
        chunk = np.array([(np.concatenate([
            np_utils.to_categorical(c, chunk_length),
            np.zeros((step_length - length[l], chunk_length))
        ])) for l, c in enumerate(chunk)])
        y = np.array(
            [np_utils.to_categorical(each, output_length) for each in label])

        train_metrics = model.train_on_batch([embed_index, pos, chunk], y)
        train_loss += train_metrics[0]
    all_train_loss.append(train_loss)