def lookup_JM(WIDTH, DEPTH): MAPPING_PATH = './Ch01_Data_load/data/dict.csv' lookup = pd.read_csv(MAPPING_PATH, encoding='cp949') keys = list(lookup.iloc[:, 0]) values = list(lookup.iloc[:, 1]) JM = jmu.JasoMapping(WIDTH=WIDTH, DEPTH=DEPTH, MAPPING_KEY=keys, MAPPING_VALUE=values) return JM
def generate_batch_jaso(INDEX, MODEL, DOC, LABEL, MAXLEN, SESS): jaso_splitted = jmu.jaso_split(DOC[INDEX], MAXLEN=MAXLEN) _input = SESS.run(MODEL.jaso_Onehot, {MODEL.X_Onehot: jaso_splitted}) _, del_list = length(_input) _label = LABEL[INDEX] batch_input = np.delete(_input, del_list, axis=0) batch_label = np.delete(_label, del_list, axis=0) return batch_input, batch_label
def lookup_JM(WIDTH, DEPTH): MAPPING_PATH = '/Users/1003874/bsdev/Text-classification-with-CNN-RNN-with-Tensorflow/Ch01_Data_load/data/dict.csv' #'./Ch01_Data_load/data/dict.csv' lookup = pd.read_csv(MAPPING_PATH, encoding='cp949') keys = list(lookup.iloc[:, 0]) values = list(lookup.iloc[:, 1]) JM = jmu.JasoMapping(WIDTH=WIDTH, DEPTH=DEPTH, MAPPING_KEY=keys, MAPPING_VALUE=values) return JM
def lookup_WM(MAXLEN, IMAGE_WIDTH, IMAGE_DEPTH): MAPPING_PATH = './Ch01_Data_load/data/vocab.npy' lookup = np.load(MAPPING_PATH) keys = list(lookup.iloc[:, 0]) values = list(lookup.iloc[:, 1]) JM = jmu.JasoMapping(MAXLEN=MAXLEN, WIDTH=IMAGE_WIDTH, DEPTH=IMAGE_DEPTH, MAPPING_KEY=keys, MAPPING_VALUE=values) return JM
def generate_batch_jaso(INDEX, MODEL, DOC, LABEL, MAXLEN, SESS, ATTENTION): if ATTENTION: jaso_splitted = jmu.jaso_split(DOC[INDEX], MAXLEN=MAXLEN) seq_len = np.array(list(map(lambda x: find_length(x), jaso_splitted))) seq_len = np.ceil(seq_len / (2**MODEL.MAX_POOL_TIME)).astype(np.int32) batch_input = SESS.run(MODEL.jaso_Onehot, {MODEL.X_Onehot: jaso_splitted}) batch_label = LABEL[INDEX] return batch_input, batch_label, seq_len else: jaso_splitted = jmu.jaso_split(DOC[INDEX], MAXLEN=MAXLEN) _input = SESS.run(MODEL.jaso_Onehot, {MODEL.X_Onehot: jaso_splitted}) _, del_list = length(_input) _label = LABEL[INDEX] batch_input = np.delete(_input, del_list, axis=0) batch_label = np.delete(_label, del_list, axis=0) return batch_input, batch_label
def generate_batch_jaso(INDEX, MODEL, DOC, LABEL, MAXLEN, SESS): #print(INDEX.shape) jaso_splitted = jmu.jaso_split(DOC[INDEX], MAXLEN=MAXLEN) _input = SESS.run(MODEL.jaso_Onehot, {MODEL.X_Onehot: jaso_splitted}) _, del_list = length(_input) _label = LABEL[INDEX].reshape(INDEX.shape[0], -1) if len(del_list) > 0: print(jaso_splitted[del_list[0]], del_list) print(_input.shape, _label.shape, del_list) batch_input = np.delete(_input, del_list, axis=0) batch_label = np.delete(_label, del_list, axis=0) return _input, _label
def main(): args = parse_args() if args is None: exit() # test file filename = args.inputdir + args.input # init model config #TRAIN_DOC, TRAIN_LABEL, TEST_DOC, TEST_LABEL, LABEL_IDX = data_load.digi_data_load() TRAIN_DOC, TRAIN_LABEL, TEST_DOC, TEST_LABEL, LABEL_IDX = data_load.testcase_shuffle_data_load( ) #data_load.testcase_add_data_load() class_num = TRAIN_LABEL.shape[1] FLAGS.NUM_OF_CLASS = class_num JM = utils.lookup_JM(FLAGS.INPUT_WIDTH, FLAGS.INPUT_DEPTH) # Start Session sess = tf.Session() print("Session Ready!") model = MODEL(sess=sess, JM=JM, FLAGS=FLAGS) # Initialization sess.run(tf.global_variables_initializer()) model.JM.init_table(sess) # Restore parameter saver = tf.train.Saver() saver.restore(sess, "./Saver/{}/{}.ckpt".format(FLAGS.WRITER, FLAGS.WRITER)) if args.printtest == 'True': index = np.array(range(0, len(TEST_DOC))) batch_input, batch_label = utils.generate_batch_jaso( INDEX=index, MODEL=model, DOC=TEST_DOC, LABEL=TEST_LABEL, MAXLEN=FLAGS.INPUT_WIDTH, SESS=sess) proba, ts_loss, ts_acc, ts_merged = sess.run( [model.y_proba, model.cross_entropy, model.accuracy, model.merge], feed_dict={ model.X: batch_input, model.Y: batch_label, model.LEARNING_RATE: FLAGS.lr_value, model.TRAIN_PH: False }) pred_idx = np.apply_along_axis(np.argmax, 1, proba) real_idx = np.apply_along_axis(np.argmax, 1, batch_label) pos_idx = np.where(np.equal(pred_idx, real_idx) == True)[0] neg_idx = np.where(np.equal(pred_idx, real_idx) == False)[0] print('[ TEST ]') desc = """ size:{}, correct:{}, wrong:{}, acc:{}, f1_score:{}, ts_loss:{}, ts_acc:{} """.format(index.shape[0], pos_idx.shape[0], neg_idx.shape[0], round(pos_idx.shape[0] / index.shape[0] * 100, 3), round(f1_score(real_idx, pred_idx, average='weighted'), 4), ts_loss, ts_acc) print(desc) for idx in pos_idx: print('Positive Case:\t', TEST_DOC[index[idx]], '\t->\t', LABEL_IDX[np.argmax(proba[idx])], '({0:.2f})\t'.format(round(max(proba[idx]), 3)), LABEL_IDX[np.argmax(batch_label[idx])]) for idx in neg_idx: print( 'Negative Case:\t', TEST_DOC[index[idx]], '\t->\t', LABEL_IDX[np.argmax(proba[idx])], '({0:.2f})\t'.format(round(max(proba[idx]), 3)), LABEL_IDX[np.argmax(batch_label[idx])], '({0:.2f})\t'.format(proba[idx][np.argmax(batch_label[idx])])) print() if args.input == 'a.txt': testfile = open(filename) for line in testfile: line = line.rstrip('\n\r').lower() jaso_splitted = jmu.jaso_split([line], MAXLEN=FLAGS.INPUT_WIDTH) batch_input = sess.run(model.jaso_Onehot, {model.X_Onehot: jaso_splitted}) y_proba = sess.run(model.y_proba, feed_dict={ model.X: batch_input, model.TRAIN_PH: False }) #print(batch_input.shape, y_proba.shape) label = LABEL_IDX[np.argmax(y_proba[0])] if round(max(y_proba[0]), 3) > 0.8: print(line, '\t->\t', label, round(max(y_proba[0]), 3)) if args.input == 'tmp.txt': tmp_df = pd.read_csv(filename, sep='\t', header=None) tmp_df['class'] = tmp_df[1].apply(lambda x: json2intent(x)) sentences = tmp_df[0].tolist() labels = [] probas = [] for sent in sentences: jaso_splitted = jmu.jaso_split([sent], MAXLEN=FLAGS.INPUT_WIDTH) batch_input = sess.run(model.jaso_Onehot, {model.X_Onehot: jaso_splitted}) y_proba = sess.run(model.y_proba, feed_dict={ model.X: batch_input, model.TRAIN_PH: False }) # print(batch_input.shape, y_proba.shape) label = LABEL_IDX[np.argmax(y_proba[0])] labels.append(label) probas.append(round(max(y_proba[0]), 3)) tmp_df['pred'] = labels tmp_df['proba'] = probas for index, row in tmp_df.iterrows(): # if row['class'] is None: # continue if row['class'] is not None: continue if round(row['proba'], 3) > 0.8: print(row[0], '\t->\t', row['pred'], round(row['proba'], 3), '\t', row['class']) if args.input == 'tmp_not_under.txt': tmp_df = pd.read_csv(filename, sep='\t', header=None) tmp_df['class'] = tmp_df[1].apply(lambda x: json2intent(x)) sentences = tmp_df[0].tolist() labels = [] probas = [] for sent in sentences: jaso_splitted = jmu.jaso_split([sent], MAXLEN=FLAGS.INPUT_WIDTH) batch_input = sess.run(model.jaso_Onehot, {model.X_Onehot: jaso_splitted}) y_proba = sess.run(model.y_proba, feed_dict={ model.X: batch_input, model.TRAIN_PH: False }) # print(batch_input.shape, y_proba.shape) label = LABEL_IDX[np.argmax(y_proba[0])] labels.append(label) probas.append(round(max(y_proba[0]), 3)) tmp_df['pred'] = labels tmp_df['proba'] = probas for index, row in tmp_df.iterrows(): if round(row['proba'], 3) > 0.1: print(row[1]) print(row[0], '\t->\t', row['pred'], round(row['proba'], 3), '\t') print()
################################################################################ # Get performance scores ################################################################################ from sklearn import metrics from sklearn.metrics import confusion_matrix # Calculate logits LOGIT_list = np.empty([0, 2]) LABEL_list = np.empty([0, 2]) for i in range(int(len(TEST_DOC) / FLAGS.TEST_BATCH) + 1): index = np.unique( np.clip(np.arange(i * FLAGS.TEST_BATCH, (i + 1) * FLAGS.TEST_BATCH), a_min=0, a_max=len(TEST_DOC) - 1)) jaso_splitted = jmu.jaso_split(TEST_DOC[index], MAXLEN=FLAGS.MAXLEN) batch_input = sess.run(model.jaso_Onehot, {model.X_Onehot: jaso_splitted}) batch_label = TEST_LABEL[index] ts_acc, y_logit = sess.run([model.accuracy, model.y_logits], feed_dict={ model.X: batch_input, model.Y: batch_label, model.TRAIN_PH: False }) LOGIT_list = np.concatenate([LOGIT_list, y_logit]) LABEL_list = np.concatenate([LABEL_list, batch_label]) print(i, '||', ts_acc) # Calculate AUROC, accuracy from confusion matrix softmax_logit = np.array( list(map(lambda x: np.exp(x) / sum(np.exp(x)), LOGIT_list)))
################################################################################ # Get performance scores ################################################################################ from sklearn import metrics from sklearn.metrics import confusion_matrix # Calculate logits LOGIT_list = np.empty([0, 2]) LABEL_list = np.empty([0, 2]) for i in range(int(len(TEST_DOC) / FLAGS.TEST_BATCH) + 1): index = np.unique( np.clip(np.arange(i * FLAGS.TEST_BATCH, (i + 1) * FLAGS.TEST_BATCH), a_min=0, a_max=len(TEST_DOC) - 1)) jaso_splitted = jmu.jaso_split(TEST_DOC[index], MAXLEN=FLAGS.INPUT_WIDTH) batch_input = sess.run(model.jaso_Onehot, {model.X_Onehot: jaso_splitted}) batch_label = TEST_LABEL[index] ts_acc, y_logit = sess.run([model.accuracy, model.y_logits], feed_dict={ model.X: batch_input, model.Y: batch_label, model.TRAIN_PH: False }) LOGIT_list = np.concatenate([LOGIT_list, y_logit]) LABEL_list = np.concatenate([LABEL_list, batch_label]) print(i, '||', ts_acc) # Calculate AUROC, accuracy from confusion matrix softmax_logit = np.array( list(map(lambda x: np.exp(x) / sum(np.exp(x)), LOGIT_list)))