start = datetime.now()

    print('-' * 60)
    print('epoch %d start at %s' % (epoch, str(start)))

    log.write('-' * 60 + '\n')
    log.write('epoch %d start at %s\n' % (epoch, str(start)))

    train_loss = 0
    dev_loss = 0

    np.random.shuffle(train_data)

    for i in range(number_of_train_batches):
        train_batch = train_data[i * batch_size:(i + 1) * batch_size]
        embed_index, hash_index, pos, label, length, sentence = prepare.prepare_chunk(
            batch=train_batch)

        pos = np.array([(np.concatenate([
            np_utils.to_categorical(p, pos_length),
            np.zeros((step_length - length[l], pos_length))
        ])) for l, p in enumerate(pos)])
        y = np.array(
            [np_utils.to_categorical(each, output_length) for each in label])

        train_metrics = model.train_on_batch([embed_index, hash_index, pos], y)
        train_loss += train_metrics[0]
    all_train_loss.append(train_loss)

    correct_predict = 0
    all_predict = 0
best_epoch = sys.argv[1]

model_name = os.path.basename(__file__)[9:-3]
folder_path = './model/%s'%model_name

model_path = '%s/model_epoch_%s.h5'%(folder_path, best_epoch)
result = open('%s/predict.txt'%folder_path, 'w')


print('loading model...')
model = load_model(model_path)
print('loading model finished.')

for each in test_data:
    embed_index, hash_index, pos, label, length, sentence = prepare.prepare_chunk(batch=[each], trigram=True)
    
    embed_index_1 = embed_index[:,:-2]
    embed_index_2 = embed_index[:,1:-1]
    embed_index_3 = embed_index[:,2:]
    
    pos = [np.concatenate([np_utils.to_categorical(p[:-2],pos_length),np_utils.to_categorical(p[1:-1],pos_length),np_utils.to_categorical(p[2:],pos_length)],axis=1) for p in pos]
    pos = np.array([(np.concatenate([p, np.zeros((step_length-length[l], pos_length*3))])) for l,p in enumerate(pos)])

    prob = model.predict_on_batch([embed_index_1, embed_index_2, embed_index_3, pos])

    for i, l in enumerate(length):
        predict_label = np_utils.categorical_probas_to_classes(prob[i])
        chunktags = [IOB[j] for j in predict_label][:l]

    word_pos_chunk = list(zip(*each))
Exemple #3
0
    start = datetime.now()

    print('-'*60)
    print('epoch %d start at %s'%(epoch, str(start)))

    log.write('-'*60+'\n')
    log.write('epoch %d start at %s\n'%(epoch, str(start)))

    train_loss = 0
    dev_loss = 0

    np.random.shuffle(train_data)

    for i in range(number_of_train_batches):
        train_batch = train_data[i*batch_size: (i+1)*batch_size]
        embed_index, hash_index, pos, label, length, sentence = prepare.prepare_chunk(batch=train_batch, gram='bi', chunk_type="ALL")

        pos = np.array([(np.concatenate([np_utils.to_categorical(p, pos_length), np.zeros((step_length-length[l], pos_length))])) for l,p in enumerate(pos)])
        y = np.array([np_utils.to_categorical(each, output_length) for each in label])

        train_metrics = model.train_on_batch([embed_index, hash_index, pos], y)
        train_loss += train_metrics[0]
    all_train_loss.append(train_loss)

    correct_predict = 0
    all_predict = 0

    for j in range(number_of_dev_batches):
        dev_batch = dev_data[j*batch_size: (j+1)*batch_size]
        embed_index, hash_index, pos, label, length, sentence = prepare.prepare_chunk(batch=dev_batch, gram='bi', chunk_type="ALL")
Exemple #4
0
tokens = [len(x[0]) for x in test_data]
print(sum(tokens))
print('%s shape:' % data, len(test_data))

model_name = os.path.basename(__file__)[9:-3]
folder_path = './model/%s' % model_name

model_path = '%s/model_epoch_%s.h5' % (folder_path, best_epoch)
result = open('%s/predict.txt' % folder_path, 'w')

print('loading model...')
model = load_model(model_path)
print('loading model finished.')

for each in test_data:
    embed_index, hash_index, pos, label, length, sentence = prepare.prepare_chunk(
        batch=[each], chunk_type="ALL")
    pos = np.array([(np.concatenate([
        np_utils.to_categorical(p, pos_length),
        np.zeros((step_length - length[l], pos_length))
    ])) for l, p in enumerate(pos)])
    prob = model.predict_on_batch([embed_index, hash_index, pos])

    for i, l in enumerate(length):
        predict_label = np_utils.categorical_probas_to_classes(prob[i])
        chunktags = [IOB[j] for j in predict_label][:l]

    word_pos_chunk = list(zip(*each))

    for ind, chunktag in enumerate(chunktags):
        result.write(' '.join(word_pos_chunk[ind]) + ' ' + chunktag + '\n')
    result.write('\n')