start = datetime.now() print('-' * 60) print('epoch %d start at %s' % (epoch, str(start))) log.write('-' * 60 + '\n') log.write('epoch %d start at %s\n' % (epoch, str(start))) train_loss = 0 dev_loss = 0 np.random.shuffle(train_data) for i in range(number_of_train_batches): train_batch = train_data[i * batch_size:(i + 1) * batch_size] embed_index, hash_index, pos, label, length, sentence = prepare.prepare_chunk( batch=train_batch) pos = np.array([(np.concatenate([ np_utils.to_categorical(p, pos_length), np.zeros((step_length - length[l], pos_length)) ])) for l, p in enumerate(pos)]) y = np.array( [np_utils.to_categorical(each, output_length) for each in label]) train_metrics = model.train_on_batch([embed_index, hash_index, pos], y) train_loss += train_metrics[0] all_train_loss.append(train_loss) correct_predict = 0 all_predict = 0
best_epoch = sys.argv[1] model_name = os.path.basename(__file__)[9:-3] folder_path = './model/%s'%model_name model_path = '%s/model_epoch_%s.h5'%(folder_path, best_epoch) result = open('%s/predict.txt'%folder_path, 'w') print('loading model...') model = load_model(model_path) print('loading model finished.') for each in test_data: embed_index, hash_index, pos, label, length, sentence = prepare.prepare_chunk(batch=[each], trigram=True) embed_index_1 = embed_index[:,:-2] embed_index_2 = embed_index[:,1:-1] embed_index_3 = embed_index[:,2:] pos = [np.concatenate([np_utils.to_categorical(p[:-2],pos_length),np_utils.to_categorical(p[1:-1],pos_length),np_utils.to_categorical(p[2:],pos_length)],axis=1) for p in pos] pos = np.array([(np.concatenate([p, np.zeros((step_length-length[l], pos_length*3))])) for l,p in enumerate(pos)]) prob = model.predict_on_batch([embed_index_1, embed_index_2, embed_index_3, pos]) for i, l in enumerate(length): predict_label = np_utils.categorical_probas_to_classes(prob[i]) chunktags = [IOB[j] for j in predict_label][:l] word_pos_chunk = list(zip(*each))
start = datetime.now() print('-'*60) print('epoch %d start at %s'%(epoch, str(start))) log.write('-'*60+'\n') log.write('epoch %d start at %s\n'%(epoch, str(start))) train_loss = 0 dev_loss = 0 np.random.shuffle(train_data) for i in range(number_of_train_batches): train_batch = train_data[i*batch_size: (i+1)*batch_size] embed_index, hash_index, pos, label, length, sentence = prepare.prepare_chunk(batch=train_batch, gram='bi', chunk_type="ALL") pos = np.array([(np.concatenate([np_utils.to_categorical(p, pos_length), np.zeros((step_length-length[l], pos_length))])) for l,p in enumerate(pos)]) y = np.array([np_utils.to_categorical(each, output_length) for each in label]) train_metrics = model.train_on_batch([embed_index, hash_index, pos], y) train_loss += train_metrics[0] all_train_loss.append(train_loss) correct_predict = 0 all_predict = 0 for j in range(number_of_dev_batches): dev_batch = dev_data[j*batch_size: (j+1)*batch_size] embed_index, hash_index, pos, label, length, sentence = prepare.prepare_chunk(batch=dev_batch, gram='bi', chunk_type="ALL")
tokens = [len(x[0]) for x in test_data] print(sum(tokens)) print('%s shape:' % data, len(test_data)) model_name = os.path.basename(__file__)[9:-3] folder_path = './model/%s' % model_name model_path = '%s/model_epoch_%s.h5' % (folder_path, best_epoch) result = open('%s/predict.txt' % folder_path, 'w') print('loading model...') model = load_model(model_path) print('loading model finished.') for each in test_data: embed_index, hash_index, pos, label, length, sentence = prepare.prepare_chunk( batch=[each], chunk_type="ALL") pos = np.array([(np.concatenate([ np_utils.to_categorical(p, pos_length), np.zeros((step_length - length[l], pos_length)) ])) for l, p in enumerate(pos)]) prob = model.predict_on_batch([embed_index, hash_index, pos]) for i, l in enumerate(length): predict_label = np_utils.categorical_probas_to_classes(prob[i]) chunktags = [IOB[j] for j in predict_label][:l] word_pos_chunk = list(zip(*each)) for ind, chunktag in enumerate(chunktags): result.write(' '.join(word_pos_chunk[ind]) + ' ' + chunktag + '\n') result.write('\n')