def iteration_model(model, dataset, parameter, train=True): precision_count = np.array([0., 0.]) recall_count = np.array([0., 0.]) # 학습 avg_cost = 0.0 avg_correct = 0.0 total_labels = 0.0 for morph, ne_dict, character, seq_len, char_len, label, step in dataset.get_data_batch_size( parameter["batch_size"], train): feed_dict = { model.morph: morph, model.ne_dict: ne_dict, model.character: character, model.sequence: seq_len, model.character_len: char_len, model.label: label, model.dropout_rate: parameter["keep_prob"], model.weight_dropout_keep_prob: parameter["weight_keep_prob"], model.lstm_dropout_keep_prob: parameter["lstm_keep_prob"], model.emb_dropout_keep_prob: parameter["emb_keep_prob"], model.dense_dropout_keep_prob: parameter["dense_keep_prob"], model.learning_rate: parameter["learning_rate"] } if train: cost, tf_viterbi_sequence, _ = sess.run( [model.cost, model.viterbi_sequence, model.train_op], feed_dict=feed_dict) else: cost, tf_viterbi_sequence = sess.run( [model.cost, model.viterbi_sequence], feed_dict=feed_dict) avg_cost += cost mask = (np.expand_dims(np.arange(parameter["sentence_length"]), axis=0) < np.expand_dims(seq_len, axis=1)) total_labels += np.sum(seq_len) correct_labels = np.sum((label == tf_viterbi_sequence) * mask) avg_correct += correct_labels precision_count, recall_count = diff_model_label( dataset, precision_count, recall_count, tf_viterbi_sequence, label, seq_len) if train and step % 5 == 0: print('[Train step: {:>4}] cost = {:>.9} Accuracy = {:>.6}'.format( step + 1, avg_cost / (step + 1), 100.0 * avg_correct / float(total_labels))) else: if step % 5 == 0: print( '[Dev step: {:>4}] cost = {:>.9} Accuracy = {:>.6}'.format( step + 1, avg_cost / (step + 1), 100.0 * avg_correct / float(total_labels))) if step > 5: break return avg_cost / (step + 1), 100.0 * avg_correct / float( total_labels), precision_count, recall_count
def iteration_model(models, dataset, parameter, train=True): precision_count = np.zeros((parameter["num_ensemble"], 2)) recall_count = np.zeros((parameter["num_ensemble"], 2)) avg_cost = np.zeros(parameter["num_ensemble"]) avg_correct = np.zeros(parameter["num_ensemble"]) total_labels = np.zeros(parameter["num_ensemble"]) correct_labels = np.zeros(parameter["num_ensemble"]) dataset.shuffle_data() e_precision_count = np.array([ 0. , 0. ]) e_recall_count = np.array([ 0. , 0. ]) e_avg_correct = 0.0 e_total_labels = 0.0 if train: keep_prob = parameter["keep_prob"] else: keep_prob = 1.0 batch_gen = dataset.get_data_batch_size(parameter["batch_size"], train) total_iter = int(len(dataset) / parameter["batch_size"]) for morph, ne_dict, character, seq_len, char_len, label, step in tqdm(batch_gen, total=total_iter): ensemble = [] for i, model in enumerate(models): feed_dict = {model.morph: morph, model.ne_dict: ne_dict, model.character: character, model.sequence: seq_len, model.character_len: char_len, model.label: label, model.dropout_rate: keep_prob } if train: cost, tf_viterbi_sequence, _ = sess.run([model.cost, model.viterbi_sequence, model.train_op], feed_dict=feed_dict) else: cost, tf_viterbi_sequence = sess.run([model.cost, model.viterbi_sequence], feed_dict=feed_dict) ensemble.append(tf_viterbi_sequence) avg_cost[i] += cost mask = (np.expand_dims(np.arange(parameter["sentence_length"]), axis=0) < np.expand_dims(seq_len, axis=1)) total_labels[i] += np.sum(seq_len) correct_labels[i] = np.sum((label == tf_viterbi_sequence) * mask) avg_correct[i] += correct_labels[i] precision_count[i], recall_count[i] = diff_model_label(dataset, precision_count[i], recall_count[i], tf_viterbi_sequence, label, seq_len) # Calculation for ensemble measure ensemble = np.array(stats.mode(ensemble)[0][0]) mask = (np.expand_dims(np.arange(parameter["sentence_length"]), axis=0) < np.expand_dims(seq_len, axis=1)) e_total_labels += np.sum(seq_len) e_correct_labels = np.sum((label == ensemble) * mask) e_avg_correct += e_correct_labels e_precision_count, e_recall_count = diff_model_label(dataset, e_precision_count, e_recall_count, ensemble, label, seq_len) return avg_cost / (step + 1), 100.0 * avg_correct / total_labels.astype(float), precision_count, recall_count, \ 100.0 * e_avg_correct / e_total_labels.astype(float), e_precision_count, e_recall_count
def iteration_model(model, dataset, parameter, train=True): # train avg_cost = 0.0 avg_correct = 0.0 total_labels = 0.0 precision_cnt = np.array([0., 0.]) recall_cnt = np.array([0., 0.]) for morph, ne_dict, character, seq_len, char_len, label, tr_step in dataset.get_data_batch_size( parameter["batch_size"], train): feed_dict = { model.morph: morph, model.ne_dict: ne_dict, model.character: character, model.sequence: seq_len, model.character_len: char_len, model.label: label, model.dropout_rate: parameter["keep_prob"], } cost, tf_viterbi_sequence, _ = sess.run( [model.cost, model.viterbi_sequence, model.train_op], feed_dict=feed_dict) avg_cost += cost mask = (np.expand_dims(np.arange(parameter["sentence_length"]), axis=0) < np.expand_dims(seq_len, axis=1)) total_labels += np.sum(seq_len) correct_labels = np.sum((label == tf_viterbi_sequence) * mask) avg_correct += correct_labels precision_cnt, recall_cnt = diff_model_label(dataset, precision_cnt, recall_cnt, tf_viterbi_sequence, label, seq_len) if tr_step % 100 == 0: print('[Train step: {:>4}] cost = {:>.9} Accuracy = {:>.6}'.format( tr_step + 1, avg_cost / (tr_step + 1), 100.0 * avg_correct / float(total_labels))) tr_avg_cost = avg_cost / (tr_step + 1) tr_acc = 100.0 * avg_correct / float(total_labels) tr_precision_cnt, tr_recall_cnt = precision_cnt, recall_cnt # valid avg_cost = 0.0 avg_correct = 0.0 total_labels = 0.0 precision_cnt = np.array([0., 0.]) recall_cnt = np.array([0., 0.]) for morph, ne_dict, character, seq_len, char_len, label, te_step in dataset.get_data_batch_size( parameter["batch_size"], train, valid=True): feed_dict = { model.morph: morph, model.ne_dict: ne_dict, model.character: character, model.sequence: seq_len, model.character_len: char_len, model.label: label, model.dropout_rate: parameter["keep_prob"], } cost, tf_viterbi_sequence = sess.run( [model.cost, model.viterbi_sequence], feed_dict=feed_dict) avg_cost += cost mask = (np.expand_dims(np.arange(parameter["sentence_length"]), axis=0) < np.expand_dims(seq_len, axis=1)) total_labels += np.sum(seq_len) correct_labels = np.sum((label == tf_viterbi_sequence) * mask) avg_correct += correct_labels precision_cnt, recall_cnt = diff_model_label(dataset, precision_cnt, recall_cnt, tf_viterbi_sequence, label, seq_len) if te_step % 100 == 0: print('[valid step: {:>4}] cost = {:>.9} Accuracy = {:>.6}'.format( te_step + 1, avg_cost / (te_step + 1), 100.0 * avg_correct / float(total_labels))) te_avg_cost = avg_cost / (te_step + 1) te_acc = 100.0 * avg_correct / float(total_labels) te_precision_cnt, te_recall_cnt = precision_cnt, recall_cnt return [tr_avg_cost, tr_acc, tr_precision_cnt, tr_recall_cnt, tr_step], \ [te_avg_cost, te_acc, te_precision_cnt, te_recall_cnt, te_step]
def iteration_model(model, dataset, parameter, train=True): precision_count = np.array([0., 0.]) recall_count = np.array([0., 0.]) # 학습 avg_cost = 0.0 avg_correct = 0.0 total_labels = 0.0 # test_list = {} # test_list["sentence"] = [] # test_list["labeled"] = [] # test_list["test_labeled"] = [] for morph, ne_dict, character, seq_len, char_len, label, step in dataset.get_data_batch_size( parameter["batch_size"], train): feed_dict = { model.morph: morph, model.ne_dict: ne_dict, model.character: character, model.sequence: seq_len, model.character_len: char_len, model.label: label, model.dropout_rate: parameter["keep_prob"], model.weight_dropout_keep_prob: parameter["weight_keep_prob"], model.lstm_dropout_keep_prob: parameter["lstm_keep_prob"], model.emb_dropout_keep_prob: parameter["emb_keep_prob"], model.dense_dropout_keep_prob: parameter["dense_keep_prob"], model.learning_rate: parameter["learning_rate"] } if train: cost, tf_viterbi_sequence, _ = sess.run( [model.cost, model.viterbi_sequence, model.train_op], feed_dict=feed_dict) else: cost, tf_viterbi_sequence = sess.run( [model.cost, model.viterbi_sequence], feed_dict=feed_dict) avg_cost += cost mask = (np.expand_dims(np.arange(parameter["sentence_length"]), axis=0) < np.expand_dims(seq_len, axis=1)) total_labels += np.sum(seq_len) correct_labels = np.sum((label == tf_viterbi_sequence) * mask) avg_correct += correct_labels # if parameter["mode"] == "test": # test_list["sentence"].append(dataset.extern_data[step][1]) # test_list["labeled"].append(dataset.extern_data[step][2]) # temp = [] # for j in range(len(dataset.extern_data[step][1])): # if tf_viterbi_sequence[step][j] ==0: # temp.append('-') # else: # for ner, index in dataset.necessary_data["ner_tag"].items(): # if index == tf_viterbi_sequence[step][j]: # temp.append(ner) # test_list["test_labeled"].append(temp) precision_count, recall_count = diff_model_label( dataset, precision_count, recall_count, tf_viterbi_sequence, label, seq_len) if train and step % 5 == 0: print('[Train step: {:>4}] cost = {:>.9} Accuracy = {:>.6}'.format( step + 1, avg_cost / (step + 1), 100.0 * avg_correct / float(total_labels))) else: if step % 5 == 0: print( '[Dev step: {:>4}] cost = {:>.9} Accuracy = {:>.6}'.format( step + 1, avg_cost / (step + 1), 100.0 * avg_correct / float(total_labels))) if step > 10: break # if parameter["mode"] == "test": # file = open('./data/labeled.txt', 'w', encoding = 'utf8') # for i,j,k in zip(test_list["sentence"], test_list["labeled"], test_list["test_labeled"]): # line = (i+'\t'+j+'\t'+k+'\n') # file.write(line) # file.close() return avg_cost / (step + 1), 100.0 * avg_correct / float( total_labels), precision_count, recall_count