def valid(in_path, slot_path, intent_path): data_processor_valid = DataProcessor(in_path, slot_path, intent_path, in_vocab, slot_vocab, intent_vocab) pred_intents = [] correct_intents = [] slot_outputs = [] correct_slots = [] input_words = [] gate_seq = [] while True: in_data, slot_data, slot_weight, length, intents, in_seq, slot_seq, intent_seq = data_processor_valid.get_batch( arg.batch_size) if len(in_data) <= 0: break feed_dict = { input_data.name: in_data, sequence_length.name: length } ret = sess.run(inference_outputs, feed_dict) for i in ret[0]: pred_intents.append(np.argmax(i)) for i in intents: correct_intents.append(i) pred_slots = ret[1].reshape( (slot_data.shape[0], slot_data.shape[1], -1)) for p, t, i, l in zip(pred_slots, slot_data, in_data, length): if arg.use_crf: p = p.reshape([-1]) else: p = np.argmax(p, 1) tmp_pred = [] tmp_correct = [] tmp_input = [] for j in range(l): tmp_pred.append(slot_vocab['rev'][p[j]]) tmp_correct.append(slot_vocab['rev'][t[j]]) tmp_input.append(in_vocab['rev'][i[j]]) slot_outputs.append(tmp_pred) correct_slots.append(tmp_correct) input_words.append(tmp_input) if data_processor_valid.end == 1: break pred_intents = np.array(pred_intents) correct_intents = np.array(correct_intents) accuracy = (pred_intents == correct_intents) semantic_acc = accuracy accuracy = accuracy.astype(float) accuracy = np.mean(accuracy) * 100.0 index = 0 for t, p in zip(correct_slots, slot_outputs): # Process Semantic Error if len(t) != len(p): raise ValueError('Error!!') for j in range(len(t)): if p[j] != t[j]: semantic_acc[index] = False break index += 1 semantic_acc = semantic_acc.astype(float) semantic_acc = np.mean(semantic_acc) * 100.0 f1, precision, recall = computeF1Score(correct_slots, slot_outputs) if "test" in in_path: print("save result_intent.out") with open(str(epochs) + "intent.out", "w") as outfile: for true, pred in zip(correct_intents, pred_intents): outfile.write("{} {}\n".format(true, pred)) print("save slot.out") with open(in_path) as infile: data = infile.readlines() lines = [line.split() for line in data] with open(str(epochs) + "-slot.out", "w") as outfile: print(len(lines), len(correct_slots), len(slot_outputs)) for i in range(len(lines)): for w, true, pred in zip( lines[i], correct_slots[i], slot_outputs[i]): outfile.write("{} {} {}\n".format( w, true, pred)) outfile.write("\n") logging.info('slot f1: ' + str(f1)) logging.info('intent accuracy: ' + str(accuracy)) logging.info('semantic Acc(intent, slots are all correct): ' + str(semantic_acc)) data_processor_valid.close() return f1, accuracy, semantic_acc, pred_intents, correct_intents, slot_outputs, correct_slots, input_words, gate_seq
def valid(in_path, slot_path, intent_path): data_processor_valid = DataProcessor(in_path, slot_path, intent_path, in_vocab, slot_vocab, intent_vocab) pred_intents = [] correct_intents = [] slot_outputs = [] correct_slots = [] input_words = [] # used to gate gate_seq = [] while True: ( in_data, slot_data, slot_weight, length, intents, in_seq, slot_seq, intent_seq, ) = data_processor_valid.get_batch(arg.batch_size) feed_dict = { input_data.name: in_data, sequence_length.name: length } ret = sess.run(inference_outputs, feed_dict) for i in ret[0]: pred_intents.append(np.argmax(i)) for i in intents: correct_intents.append(i) pred_slots = ret[1].reshape( (slot_data.shape[0], slot_data.shape[1], -1)) for p, t, i, l in zip(pred_slots, slot_data, in_data, length): p = np.argmax(p, 1) tmp_pred = [] tmp_correct = [] tmp_input = [] for j in range(l): tmp_pred.append(slot_vocab["rev"][p[j]]) tmp_correct.append(slot_vocab["rev"][t[j]]) tmp_input.append(in_vocab["rev"][i[j]]) slot_outputs.append(tmp_pred) correct_slots.append(tmp_correct) input_words.append(tmp_input) if data_processor_valid.end == 1: break pred_intents = np.array(pred_intents) correct_intents = np.array(correct_intents) accuracy = pred_intents == correct_intents semantic_error = accuracy accuracy = accuracy.astype(float) accuracy = np.mean(accuracy) * 100.0 index = 0 for t, p in zip(correct_slots, slot_outputs): # Process Semantic Error if len(t) != len(p): raise ValueError("Error!!") for j in range(len(t)): if p[j] != t[j]: semantic_error[index] = False break index += 1 semantic_error = semantic_error.astype(float) semantic_error = np.mean(semantic_error) * 100.0 f1, precision, recall = computeF1Score(correct_slots, slot_outputs) logging.info("slot f1: " + str(f1)) logging.info("intent accuracy: " + str(accuracy)) logging.info( "semantic error(intent, slots are all correct): " + str(semantic_error)) data_processor_valid.close() return ( f1, accuracy, semantic_error, pred_intents, correct_intents, slot_outputs, correct_slots, input_words, gate_seq, )
def valid(in_path, slot_path, intent_path): data_processor_valid = DataProcessor(in_path, slot_path, intent_path, in_vocab, slot_vocab, intent_vocab, use_bert=arg.use_bert) pred_intents = [] correct_intents = [] slot_outputs = [] correct_slots = [] input_words = [] while True: in_data, slot_data, slot_weight, length, intents, in_seq, slot_seq, intent_seq = data_processor_valid.get_batch( arg.batch_size) input_seq_embeddings = np.empty( shape=[0, 0, arg.embed_dim]) if arg.use_bert: input_seq_embeddings = get_bert_embeddings(in_seq) feed_dict = { input_data.name: in_data, sequence_length.name: length, input_sequence_embeddings.name: input_seq_embeddings } if len(in_data) != 0: ret = sess.run(inference_outputs, feed_dict) for i in ret[2]: pred_intents.append(np.argmax(i)) for i in intents: correct_intents.append(i) pred_slots = ret[3][-1, :, :, :].reshape( (slot_data.shape[0], slot_data.shape[1], -1)) for p, t, i, l, s in zip(pred_slots, slot_data, in_data, length, slot_seq): p = np.argmax(p, 1) tmp_pred = [] tmp_correct = [] tmp_input = [] for j in range(l): tmp_pred.append(slot_vocab['rev'][p[j]]) tmp_correct.append(slot_vocab['rev'][t[j]]) tmp_input.append(in_vocab['rev'][i[j]]) slot_outputs.append(tmp_pred) correct_slots.append(tmp_correct) input_words.append(tmp_input) if data_processor_valid.end == 1: break pred_intents = np.array(pred_intents) correct_intents = np.array(correct_intents) from sklearn.metrics import classification_report logging.info( classification_report(y_true=correct_intents, y_pred=pred_intents, digits=4)) accuracy = (pred_intents == correct_intents) semantic_error = accuracy accuracy = accuracy.astype(float) accuracy = np.mean(accuracy) * 100.0 index = 0 for t, p in zip(correct_slots, slot_outputs): # Process Semantic Error if len(t) != len(p): raise ValueError('Error!!') for j in range(len(t)): if p[j] != t[j]: semantic_error[index] = False break index += 1 semantic_error = semantic_error.astype(float) semantic_error = np.mean(semantic_error) * 100.0 f1, precision, recall = computeF1Score(correct_slots, slot_outputs) logging.info('slot f1: ' + str(f1)) logging.info('intent accuracy: ' + str(accuracy)) logging.info( 'semantic error(intent, slots are all correct): ' + str(semantic_error)) return f1, accuracy, semantic_error, pred_intents, correct_intents, slot_outputs, correct_slots, input_words
def valid(in_path, slot_path, intent_path): data_processor_valid = DataProcessor(in_path, slot_path, intent_path, in_vocab, slot_vocab, intent_vocab, use_bert=arg.use_bert) pred_intents = [] correct_intents = [] slot_outputs = [] correct_slots = [] input_words = [] gate_seq = [] while True: in_data, slot_data, slot_weight, length, intents, in_seq, slot_seq, intent_seq = data_processor_valid.get_batch( arg.batch_size) if len(in_data) <= 0: break input_seq_embeddings = np.empty( shape=[0, 0, arg.embed_dim]) if arg.use_bert: input_seq_embeddings = get_bert_embeddings(in_seq) feed_dict = { input_data.name: in_data, sequence_length.name: length, input_sequence_embeddings.name: input_seq_embeddings } ret = sess.run(inference_outputs, feed_dict) for i in ret[0]: pred_intents.append(np.argmax(i)) for i in intents: correct_intents.append(i) pred_slots = ret[1].reshape( (slot_data.shape[0], slot_data.shape[1], -1)) for p, t, i, l in zip(pred_slots, slot_data, in_data, length): if arg.use_crf: p = p.reshape([-1]) else: p = np.argmax(p, 1) tmp_pred = [] tmp_correct = [] tmp_input = [] for j in range(l): tmp_pred.append(slot_vocab['rev'][p[j]]) tmp_correct.append(slot_vocab['rev'][t[j]]) tmp_input.append(in_vocab['rev'][i[j]]) slot_outputs.append(tmp_pred) correct_slots.append(tmp_correct) input_words.append(tmp_input) if data_processor_valid.end == 1: break pred_intents = np.array(pred_intents) correct_intents = np.array(correct_intents) accuracy = (pred_intents == correct_intents) semantic_acc = accuracy accuracy = accuracy.astype(float) accuracy = np.mean(accuracy) * 100.0 index = 0 for t, p in zip(correct_slots, slot_outputs): # Process Semantic Error if len(t) != len(p): raise ValueError('Error!!') for j in range(len(t)): if p[j] != t[j]: semantic_acc[index] = False break index += 1 semantic_acc = semantic_acc.astype(float) semantic_acc = np.mean(semantic_acc) * 100.0 f1, precision, recall = computeF1Score(correct_slots, slot_outputs) logging.info('slot f1: ' + str(f1)) logging.info('intent accuracy: ' + str(accuracy)) logging.info('semantic Acc(intent, slots are all correct): ' + str(semantic_acc)) data_processor_valid.close() return f1, accuracy, semantic_acc, pred_intents, correct_intents, slot_outputs, correct_slots, input_words, gate_seq
def run_validate(in_path, slot_path, intent_path): data_processor_valid = DataProcessor(in_path, slot_path, intent_path, in_vocab, slot_vocab, intent_vocab) pred_intents = [] correct_intents = [] slot_outputs = [] correct_slots = [] input_words = [] # used to gate gate_seq = [] while True: in_data, slot_data, slot_weight, length, \ intents, in_seq, slot_seq, intent_seq = data_processor_valid.get_batch(arg.batch_size) if len(in_data) <= 0: break feed_dict = { input_data.name: in_data, sequence_length.name: length } [infer_intent_out, infer_slot_out] = sess.run( [inference_intent_output, inference_slot_output], feed_dict) # infer_intent_output:[batch, intent_size] for input_seq in infer_intent_out: # pred_intents:list(max_intent_id) pred_intents.append(np.argmax(input_seq)) # intent label for input_seq in intents: correct_intents.append(input_seq) # infer_slot_out, crf:[batch, max_seq_length] # softmax:[batch, max_seq_length, slot_size] # pred_slots, crf:[batch, max_seq_length, 1] # softmax:[batch, max_seq_length, slot_size] pred_slots = infer_slot_out.reshape( (slot_data.shape[0], slot_data.shape[1], -1)) for pred_slot, target_slot, input_seq, length in zip( pred_slots, slot_data, in_data, length): if arg.use_crf: # p:[max_seq_length,1] => [max_seq_length] pred_slot = pred_slot.reshape([-1]) else: # use_batch or other # p:[max_seq_length,slot_size] => [max_seq_length] pred_slot = np.argmax(pred_slot, axis=1) tmp_pred = [] tmp_correct = [] tmp_input = [] for j in range(length): tmp_pred.append(slot_vocab['rev'][ pred_slot[j]]) # id->slot_word tmp_correct.append( slot_vocab['rev'][target_slot[j]]) tmp_input.append(in_vocab['rev'][input_seq[j]]) slot_outputs.append(tmp_pred) correct_slots.append(tmp_correct) input_words.append(tmp_input) if data_processor_valid.end == 1: break # 对所有数据N进行计算 pred_intents = np.array(pred_intents) # [N] correct_intents = np.array(correct_intents) # [N] accuracy = (pred_intents == correct_intents) # [N] semantic_acc = accuracy # intent_acc, [N] accuracy = accuracy.astype(float) accuracy = np.mean(accuracy) * 100.0 index = 0 # correct_slots:[N, input_seq_length] # slot_outputs:[N, input_seq_length] for target_slot, pred_slot in zip(correct_slots, slot_outputs): # Process Semantic Error if len(target_slot) != len(pred_slot): raise ValueError('Error!!') # target_slot:[input_seq_length] for j in range(len(target_slot)): # TODO:此处计算语义准确率是不是有些严格 if pred_slot[j] != target_slot[ j]: # 如果有一个slot不对,则将整个句子设为语义错误 semantic_acc[index] = False break index += 1 semantic_acc = semantic_acc.astype(float) semantic_acc = np.mean(semantic_acc) * 100.0 f1, precision, recall = computeF1Score(correct_slots, slot_outputs) logging.info('slot f1: ' + str(f1)) logging.info('intent accuracy: ' + str(accuracy)) logging.info('semantic Acc(intent, slots are all correct): ' + str(semantic_acc)) data_processor_valid.close() return f1, accuracy, semantic_acc, pred_intents, correct_intents, \ slot_outputs, correct_slots, input_words, gate_seq
def valid(in_path, slot_path, intent_path): data_processor_valid = DataProcessor(in_path, slot_path, intent_path, in_vocab, slot_vocab, intent_vocab) pred_intents = [] correct_intents = [] slot_outputs = [] correct_slots = [] input_words = [] #used to gate gate_seq = [] while True: in_data, slot_data, slot_weight, length, intents, in_seq, slot_seq, intent_seq = data_processor_valid.get_batch(arg.batch_size) feed_dict = {input_data.name: in_data, sequence_length.name: length} ret = sess.run(inference_outputs, feed_dict) for i in ret[0]: pred_intents.append(np.argmax(i)) for i in intents: correct_intents.append(i) pred_slots = ret[1].reshape((slot_data.shape[0], slot_data.shape[1], -1)) for p, t, i, l in zip(pred_slots, slot_data, in_data, length): p = np.argmax(p, 1) tmp_pred = [] tmp_correct = [] tmp_input = [] for j in range(l): tmp_pred.append(slot_vocab['rev'][p[j]]) tmp_correct.append(slot_vocab['rev'][t[j]]) tmp_input.append(in_vocab['rev'][i[j]]) slot_outputs.append(tmp_pred) correct_slots.append(tmp_correct) input_words.append(tmp_input) if data_processor_valid.end == 1: break pred_intents = np.array(pred_intents) correct_intents = np.array(correct_intents) accuracy = (pred_intents==correct_intents) semantic_error = accuracy accuracy = accuracy.astype(float) accuracy = np.mean(accuracy)*100.0 index = 0 for t, p in zip(correct_slots, slot_outputs): # Process Semantic Error if len(t) != len(p): raise ValueError('Error!!') for j in range(len(t)): if p[j] != t[j]: semantic_error[index] = False break index += 1 semantic_error = semantic_error.astype(float) semantic_error = np.mean(semantic_error)*100.0 f1, precision, recall = computeF1Score(correct_slots, slot_outputs) logging.info('slot f1: ' + str(f1)) logging.info('intent accuracy: ' + str(accuracy)) logging.info('semantic error(intent, slots are all correct): ' + str(semantic_error)) data_processor_valid.close() return f1,accuracy,semantic_error,pred_intents,correct_intents,slot_outputs,correct_slots,input_words,gate_seq
def valid(in_path, slot_path, intent_path, intent_path_2): #바뀐 부분 data_processor_valid = DataProcessor( in_path, slot_path, intent_path, intent_path_2, in_vocab, slot_vocab, intent_one_vocab, intent_two_vocab) pred_intents = [] pred_intents_2 = [] correct_intents = [] correct_intents_2 = [] slot_outputs = [] correct_slots = [] input_words = [] #used to gate gate_seq = [] while True: in_data, slot_data, slot_weight, length, intents, intents_2, in_seq, slot_seq, intent_seq, intent_seq_2 = data_processor_valid.get_batch( arg.batch_size) feed_dict = { input_data.name: in_data, sequence_length.name: length } ret = sess.run(inference_outputs, feed_dict) for i in ret[0]: pred_intents.append(np.argmax(i)) for i in intents: correct_intents.append(i) for i in ret[2]: pred_intents_2.append(np.argmax(i)) for i in intents_2: correct_intents_2.append(i) pred_slots = ret[1].reshape( (slot_data.shape[0], slot_data.shape[1], -1)) for p, t, i, l in zip(pred_slots, slot_data, in_data, length): p = np.argmax(p, 1) tmp_pred = [] tmp_correct = [] tmp_input = [] for j in range(l): tmp_pred.append(slot_vocab['rev'][p[j]]) tmp_correct.append(slot_vocab['rev'][t[j]]) tmp_input.append(in_vocab['rev'][i[j]]) slot_outputs.append(tmp_pred) correct_slots.append(tmp_correct) input_words.append(tmp_input) if data_processor_valid.end == 1: break #앞에 부분만 따졌을때 accuracy_1 #뒷부분까지 accuracy pred_intents = np.array(pred_intents) correct_intents = np.array(correct_intents) pred_intents_2 = np.array(pred_intents_2) correct_intents_2 = np.array(correct_intents_2) accuracy_1 = (pred_intents == correct_intents) accuracy = ((pred_intents == correct_intents) & (pred_intents_2 == correct_intents_2)) semantic_error = accuracy accuracy_1 = accuracy_1.astype(float) accuracy_1 = np.mean(accuracy_1) * 100.0 accuracy = accuracy.astype(float) accuracy = np.mean(accuracy) * 100.0 index = 0 for t, p in zip(correct_slots, slot_outputs): # Process Semantic Error if len(t) != len(p): raise ValueError('Error!!') for j in range(len(t)): if p[j] != t[j]: semantic_error[index] = False break index += 1 semantic_error = semantic_error.astype(float) semantic_error = np.mean(semantic_error) * 100.0 f1, precision, recall = computeF1Score(correct_slots, slot_outputs) logging.info('slot f1: ' + str(f1)) logging.info('앞에거만 따졌을때: ' + str(accuracy_1)) logging.info('모두 따졌을때 (2개 다): ' + str(accuracy)) logging.info( 'semantic error(intent, slots are all correct): ' + str(semantic_error)) logging.info('\n') data_processor_valid.close() return f1, accuracy, semantic_error, pred_intents, correct_intents, slot_outputs, correct_slots, input_words, gate_seq