data_te = pd.read_csv('df_concat_test.csv', encoding="latin1").fillna(method="ffill") data_te.loc[~data_te['Word'].isin(words),'Word'] = "UNK" # data_te = data_te[:20000] te_pub = list(set(data_te["Pub_id"].values)) test = subdata_getter(te_pub,data_te) te_generator = DataGenerator(te_pub, test, Type = 'test') #get true tags y_temp = test['tags'] y_te = [] for pub_id in te_pub: length=int(np.floor(len(y_temp[pub_id]) /BATCH_SIZE )*BATCH_SIZE) y_te=y_te+ y_temp[pub_id][:length] # Eval pred_cat = model.predict_generator(generator=te_generator, verbose=0) pred = np.argmax(pred_cat, axis=-1) y_te_true = np.argmax(y_te, -1) # Convert the index to tag pred_tag = [[idx2tag[i] for i in row] for row in pred] y_te_true_tag = [[idx2tag[i] for i in row] for row in y_te_true] report = flat_classification_report(y_pred=pred_tag, y_true=y_te_true_tag) print(report)