def evaluate(): vocab_map, _ = dataset.read_map('corpus/mapping') sess = tf.Session() Model = create_model(sess, 'test') Model.batch_size = 1 df = pd.read_csv('corpus/SAD.csv', header=None) df = df.dropna() #df=df.head() idx = list(df.index) random.seed(SEED) random.shuffle(idx) df = df.ix[idx] cut_by = int(0.9 * df.shape[0]) train_df = df.iloc[:cut_by] val_df = df.iloc[cut_by:] for df in [train_df, val_df]: sentences = df[3] answers = df[1] scores = [] for i, sentence in enumerate(sentences): if i % 1000 == 0: print(i) token_ids = dataset.convert_to_token(sentence, vocab_map) encoder_input, encoder_length, _ = Model.get_batch( [(0, token_ids)], shuffle=False) score = Model.step(sess, encoder_input, encoder_length) #print(i,score) scores.append(score) scores = [s[0][0] for s in scores] auc = roc_auc_score(answers, scores) yield auc
def evaluate(cut_mode): if cut_mode == "word": import jieba_fast as jieba jieba.load_userdict("dict_fasttext.txt") vocab_map, _ = dataset.read_map('corpus/mapping') sess = tf.Session() Model = create_model(sess, 'test') Model.batch_size = 1 sys.stdout.write('>') sys.stdout.flush() sentence = sys.stdin.readline() sentence = sentence_cutter(sentence, cut_mode) while (sentence): print('sentence: ', sentence) token_ids = dataset.convert_to_token(sentence, vocab_map) print('toekn_ids: ', token_ids) encoder_input, encoder_length, _ = Model.get_batch([(0, token_ids)]) print('encoder_input: ', encoder_input, encoder_input.shape) print('encoder_length: ', encoder_length) score = Model.step(sess, encoder_input, encoder_length) print('Score: ', score[0][0]) print('>', end='') sys.stdout.flush() sentence = sys.stdin.readline() sentence = sentence_cutter(sentence, cut_mode)
def evaluate(): vocab_map, _ = dataset.read_map('corpus/mapping') sess = tf.Session() Model = create_model(sess, 'test') Model.batch_size = 1 sys.stdout.write('>') sys.stdout.flush() sentence = sys.stdin.readline() while(sentence): token_ids = dataset.convert_to_token(sentence, vocab_map) encoder_input, encoder_length, _ = Model.get_batch([(0, token_ids)]) score = Model.step(sess, encoder_input, encoder_length) print('Score: ' + str(score[0][0])) print('>', end = '') sys.stdout.flush() sentence = sys.stdin.readline()