コード例 #1
0
data_te =  pd.read_csv('df_concat_test.csv', encoding="latin1").fillna(method="ffill")
data_te.loc[~data_te['Word'].isin(words),'Word'] = "UNK"
# data_te = data_te[:20000]

te_pub = list(set(data_te["Pub_id"].values))


test = subdata_getter(te_pub,data_te)
te_generator =  DataGenerator(te_pub, test, Type = 'test')

#get true tags
y_temp = test['tags']
y_te = []
for pub_id in te_pub:
    length=int(np.floor(len(y_temp[pub_id]) /BATCH_SIZE )*BATCH_SIZE)
    y_te=y_te+ y_temp[pub_id][:length]

# Eval
pred_cat = model.predict_generator(generator=te_generator, verbose=0)
pred = np.argmax(pred_cat, axis=-1)
y_te_true = np.argmax(y_te, -1)

# Convert the index to tag
pred_tag = [[idx2tag[i] for i in row] for row in pred]
y_te_true_tag = [[idx2tag[i] for i in row] for row in y_te_true] 

report = flat_classification_report(y_pred=pred_tag, y_true=y_te_true_tag)
print(report)