def testSem(): network.eval() ids_fn_sem = ddir.sp_testSem_ids_fn ids_fn = ddir.sp_test_ids_fn with open(ids_fn, "r") as fo: ids = map(lambda x: x.strip('\n'), fo.readlines()) with open(ids_fn_sem, "r") as fo: ids_sem = map(lambda x: x.strip('\n'), fo.readlines()) mapping = data_io.get_mapping(ddir.flickr8k_keywords, ddir.keywords_test) value_bow = data_io.get_semValues(ddir.labels_csv, ddir.keywords_test) count_bow = data_io.get_semCounts(ddir.counts_csv, ddir.keywords_test) pred_multi = np.zeros([len(ids_sem), len(mapping)]) for i in range(len(ids)): idx = [ids[i]] z = idx[0].split("_") del z[0] idxnew = "_".join(z) if (idxnew in ids_sem): Xs, _ = data_io.load_mfcc(ddir.mfcc_dir, idx, args.n_pad) Xs = np.transpose(Xs, (0, 2, 1)) Ys = np.stack(map(lambda x: vision_bow_vec[x[4:-2]], idx), axis=0) caption_Ys = np.stack(map(lambda x: caption_bow_vec[x], idx), axis=0) pred = getKWprob(Xs) predMapped = pred[0][mapping] pred_multi[ids_sem.index(idxnew)] = predMapped eer, ap, spearman, prec10, precN = utils.get_metrics( pred_multi, value_bow, count_bow) pcont = "Subjective ratings: EER: %f, Average precision: %f, Precision@10: %f, Precision@N: %f, Spearman's rho: %f" % ( eer, ap, prec10, precN, spearman) print(pcont) with open(saveLog, "a+") as fo: fo.write(pcont + "\n")
def get_attn_weights(subset): network.eval() if subset == "dev": ids_fn = ddir.sp_dev_ids_fn else: ids_fn = ddir.sp_train_ids_fn # ids_fn = dev_ids_fn if subset == "dev" else test_ids_fn with open(ids_fn, "r") as fo: ids = map(lambda x: x.strip(), fo.readlines()) for i in range(0, len(ids), 1): # fns = imnames[i: i+test_batch_size] idx = ids[i:i + 1] Xs, lengths = data_io.load_mfcc(ddir.mfcc_dir, idx, args.n_pad) Xs = np.transpose(Xs, (0, 2, 1)) Ys = np.stack(map(lambda x: vision_bow_vec[x[4:-2]], idx), axis=0) _, _, attn_weights = run_net(Xs, Ys) attn_weights = F.upsample(attn_weights, size=(1, Xs.shape[2]), mode="bilinear").cpu().data.numpy().reshape( (1, -1)) attn_name = os.path.join(args.attn_dir, idx[0]) lb = int(args.n_pad / 2 - lengths[0] / 2) rb = lb + int(lengths[0]) unpadded_attn_weights = attn_weights[:, lb:rb] / ( attn_weights[:, lb:rb].sum()) sio.savemat(attn_name, {"weight": unpadded_attn_weights}) return 0
def test(epoch, subset): network.eval() pcont4 = " " ids_fn = ddir.sp_dev_ids_fn if subset == "dev" else ddir.sp_test_ids_fn with open(ids_fn, "r") as fo: ids = map(lambda x: x.strip(), fo.readlines()) pred_multi, grt_multi, pred_multiBoW, grt_multiBoW, vis_multi, = [], [], [], [], [], [], [] for i in range(0, len(ids), args.test_batch_size): idx = ids[i:i + args.test_batch_size] Xs, _ = data_io.load_mfcc(ddir.mfcc_dir, idx, args.n_pad) Xs = np.transpose(Xs, (0, 2, 1)) vision_Ys = np.stack(map(lambda x: vision_bow_vec[x[4:-2]], idx), axis=0) # GT from vision model caption_Ys1 = np.stack( map(lambda x: caption_bow_vec1[x], idx), axis=0) # GT for evaluating exact match kw pred metrics caption_Ys2 = np.stack(map(lambda x: caption_bow_vec2[x], idx), axis=0) # GT for bow loss if (args.mt): l, lBoW, pred, predBoW = run_net(Xs, vision_Ys, caption_Ys2) else: l, pred = run_net(Xs, vision_Ys) pred_multi.append(pred) grt_multi.append(caption_Ys1) if (args.mt): pred_multiBoW.append(predBoW) grt_multiBoW.append(caption_Ys2) if (args.mt): pred_multiBoW, grt_multiBoW = np.concatenate( pred_multiBoW, axis=0), np.concatenate(grt_multiBoW, axis=0) pred_multiBoW = np.concatenate( (pred_multiBoW, np.zeros((pred_multiBoW.shape[0], grt_multiBoW.shape[1] - pred_multiBoW.shape[1])).astype(np.float32)), axis=1) if (subset == 'test'): # On keyword spotting # precisionBoW, recallBoW, fscoreBoW = utils.get_fscore(pred_multiBoW >= args.threshold, grt_multiBoW) # pcont3 = "Threshold = %.1f: precision BoW: %.3f, recall BoW: %.3f, fscore BoW: %.3f" % (args.threshold, precisionBoW, recallBoW, fscoreBoW) eer, ap, prec10, precN = utils.get_metrics(pred_multiBoW.T, grt_multiBoW.T) pcont5 = "Overall ratings (on BoW): EER: %f, Average precision: %f, Precision@10: %f, Precision@N: %f" % ( eer, ap, prec10, precN) with open("aux_exact.csv", "a+") as fo: fo.write(args.mtType + ',' + str(args.alpha) + ',' + str(args.n_bow2) + ',' + str(prec10 * 100) + ',' + str(precN * 100) + ',' + str(eer * 100) + ',' + str(ap * 100) + '\n') # print(pcont3+"\n") print(pcont5 + "\n") # with open(saveLog, "a+") as fo: # fo.write("\n"+pcont5+"\n") return 0
def testSem(): network.eval() ids_fn_sem = ddir.sp_testSem_ids_fn ids_fn = ddir.sp_test_ids_fn with open(ids_fn, "r") as fo: ids = map(lambda x: x.strip('\n'), fo.readlines()) with open(ids_fn_sem, "r") as fo: ids_sem = map(lambda x: x.strip('\n'), fo.readlines()) # with open(os.path.join(ddir.flickr8k_dir, "word_ids/captions_dict.pkl"),'rb') as f: # captions_dict = pkl.load(f) predKwList = [] value_bow, gtKwDict, captionsDict = data_io.get_semValues( ddir.labels_csv, ddir.keywords_test) count_bow = data_io.get_semCounts(ddir.counts_csv, ddir.keywords_test) pred_multi, pred_multiBoW = np.zeros([len(ids_sem), len(mapping)]), np.zeros( [len(ids_sem), len(mapping)]) vis_multi = np.zeros([len(ids_sem), len(mapping)]) for i in range(len(ids)): # caption = ' '.join(captions_dict[ids[i]]) idx = [ids[i]] z = idx[0].split("_") del z[0] idxnew = "_".join(z) if (idxnew in ids_sem): Xs, _ = data_io.load_mfcc(ddir.mfcc_dir, idx, args.n_pad) Xs = np.transpose(Xs, (0, 2, 1)) Ys = np.stack(map(lambda x: vision_bow_vec[x[4:-2]], idx), axis=0) visMapped = Ys[0][mapping] if (args.mt): pred, predBoW = getKWprob(Xs) predBoWMapped = predBoW[0][mapping] else: pred = getKWprob(Xs) predMapped = pred[0][mapping] pred_multi[ids_sem.index(idxnew)] = predMapped pred_multiBoW[ids_sem.index(idxnew)] = predBoWMapped vis_multi[ids_sem.index(idxnew)] = visMapped eer, ap, spearman, prec10, precN = utils.get_metrics( pred_multiBoW, value_bow, count_bow) pcont = "Subjective ratings: EER: %f, Average precision: %f, Precision@10: %f, Precision@N: %f, Spearman's rho: %f" % ( eer, ap, prec10, precN, spearman) print(pcont) with open("aux_sem.csv", "a+") as fo: fo.write(args.mtType + ',' + str(args.alpha) + ',' + str(args.n_bow2) + ',' + str(spearman) + ',' + str(prec10 * 100) + ',' + str(precN * 100) + ',' + str(eer * 100) + ',' + str(ap * 100) + '\n')
def getNegFeatures(train_ids_sp, idx): # idx: speech ids idxSp = getNegEx(train_ids_sp, idx) # B*N negSpInput, _ = data_io.load_mfcc(ddir.mfcc_dir, idxSp, args.n_pad) negSpInput = torch.from_numpy(np.transpose(negSpInput, (0, 2, 1))).to( args.device).unsqueeze(dim=1) idxVs = ['_'.join(ids.split('_')[1:-1]) for ids in idxSp] negVsInput = torch.tensor(itemgetter(*idxVs)(repDict)).squeeze(1).to( args.device) # B*N X 2048 neg_fVectorSp = modelSp(negSpInput)[0].view(args.batch_size, args.nNegEx, -1) # B x N x 1024 neg_fVectorVs = modelVs(negVsInput).view(args.batch_size, args.nNegEx, -1) # B x N x 1024 return neg_fVectorSp, neg_fVectorVs
def test(epoch, subset): network.eval() ids_fn_sem = ddir.sp_testSem_ids_fn ids_fn = ddir.sp_dev_ids_fn if subset == "dev" else ddir.sp_test_ids_fn with open(ids_fn, "r") as fo: ids = map(lambda x: x.strip(), fo.readlines()) with open(ids_fn_sem, "r") as fo: ids_sem = map(lambda x: x.strip(), fo.readlines()) lt, pred_multi, grt_multi, pred_multiBoW, ltKW, ltBoW = [], [], [], [], [], [] for i in range(0, len(ids), args.test_batch_size): idx = ids[i:i + args.test_batch_size] Xs, _ = data_io.load_mfcc(ddir.mfcc_dir, idx, args.n_pad) Xs = np.transpose(Xs, (0, 2, 1)) Ys = np.stack(map(lambda x: vision_bow_vec[x[4:-2]], idx), axis=0) caption_Ys = np.stack(map(lambda x: caption_bow_vec[x], idx), axis=0) if (args.attn): l, pred, _ = run_net(Xs, Ys) elif (args.mt): l, lBoW, pred, predBoW = run_net(Xs, Ys, caption_Ys) else: l, pred = run_net(Xs, Ys) pred_multi.append(pred) grt_multi.append(caption_Ys) if (args.mt): pred_multiBoW.append(predBoW) lossTot = args.alpha * l + (1 - args.alpha) * lBoW ltKW.append(l.cpu().item()) ltBoW.append(lBoW.cpu().item()) else: lossTot = torch.tensor(l, requires_grad=True) lt.append(lossTot.cpu().item()) pred_multi, grt_multi = np.concatenate(pred_multi, axis=0), np.concatenate(grt_multi, axis=0) pred_multi = np.concatenate( (pred_multi, np.zeros( (pred_multi.shape[0], grt_multi.shape[1] - pred_multi.shape[1])).astype(np.float32)), axis=1) if (args.mt): pred_multiBoW = np.concatenate(pred_multiBoW, axis=0) pred_multiBoW = np.concatenate( (pred_multiBoW, np.zeros((pred_multiBoW.shape[0], grt_multi.shape[1] - pred_multiBoW.shape[1])).astype(np.float32)), axis=1) precision, recall, fscore = utils.get_fscore(pred_multi >= args.threshold, grt_multi) if (args.mt): precisionBoW, recallBoW, fscoreBoW = utils.get_fscore( pred_multiBoW >= args.threshold, grt_multi) pcont1 = "epoch: %d, dev loss: %.3f" % (epoch, sum(lt) / len(lt)) pcont2 = "dev loss KW: %.3f, precision KW: %.3f, recall KW: %.3f, fscore KW: %.3f" % ( sum(ltKW) / len(ltKW), precision, recall, fscore) pcont3 = "dev loss BoW: %.3f, precision BoW: %.3f, recall BoW: %.3f, fscore BoW: %.3f" % ( sum(ltBoW) / len(ltBoW), precisionBoW, recallBoW, fscoreBoW) print(pcont1) print(pcont2) print(pcont3) else: pcont = "epoch: %d, dev loss: %.3f, precision: %.3f, recall: %.3f, fscore: %.3f" % ( epoch, sum(lt) / len(lt), precision, recall, fscore) print(pcont) with open(saveLog, "a+") as fo: if (args.mt): fo.write("\n" + pcont1 + "\n" + pcont2 + "\n" + pcont3 + "\n") else: fo.write(pcont + "\n") return fscore
def train(epoch): network.train() with open(ddir.sp_train_ids_fn, "r") as fo: train_ids = map(lambda x: x.strip(), fo.readlines()) perm = np.random.permutation(len(train_ids)).tolist() perm = range(len(train_ids)) lt, pred_multi, grt_multi, pred_multiBoW, ltKW, ltBoW = [], [], [], [], [], [] for i in range(0, len(perm), args.batch_size): optimizer.zero_grad() idx = map(lambda x: train_ids[x], perm[i:i + args.batch_size]) train_Xs, _ = data_io.load_mfcc(ddir.mfcc_dir, idx, args.n_pad) train_Xs = np.transpose(train_Xs, (0, 2, 1)) train_Ys = np.stack(map(lambda x: vision_bow_vec[x[4:-2]], idx), axis=0) caption_Ys = np.stack(map(lambda x: caption_bow_vec[x], idx), axis=0) if (args.attn): l, pred, _ = run_net(train_Xs, train_Ys) elif (args.mt): l, lBoW, pred, predBoW = run_net(train_Xs, train_Ys, YsBoW=caption_Ys) else: l, pred = run_net(train_Xs, train_Ys) pred_multi.append(pred) grt_multi.append(caption_Ys) if (args.mt): pred_multiBoW.append(predBoW) lossTot = args.alpha * l + (1 - args.alpha) * lBoW ltKW.append(l.cpu().item()) ltBoW.append(lBoW.cpu().item()) else: lossTot = torch.tensor(l, requires_grad=True) lossTot.backward() torch.nn.utils.clip_grad_norm(network.parameters(), 5.0) optimizer.step() lt.append(lossTot.cpu().item()) if len(lt) % args.print_interval == 0: # save model pred_multi, grt_multi = np.concatenate( pred_multi, axis=0), np.concatenate(grt_multi, axis=0) pred_multi = np.concatenate( (pred_multi, np.zeros((pred_multi.shape[0], grt_multi.shape[1] - pred_multi.shape[1])).astype(np.float32)), axis=1) if (args.mt): pred_multiBoW = np.concatenate(pred_multiBoW, axis=0) pred_multiBoW = np.concatenate( (pred_multiBoW, np.zeros((pred_multiBoW.shape[0], grt_multi.shape[1] - pred_multiBoW.shape[1])).astype(np.float32)), axis=1) # pred_multi, grt_multi = np.concatenate(pred_multi, axis=0), np.concatenate(grt_multi, axis=0) # pred_multi = np.concatenate((pred_multi, np.zeros((pred_multi.shape[0], grt_multi.shape[1]-pred_multi.shape[1])).astype(np.float32)), axis=1) precision, recall, fscore = get_fscore( pred_multi >= args.threshold, grt_multi) if (args.mt): precisionBoW, recallBoW, fscoreBoW = utils.get_fscore( pred_multiBoW >= args.threshold, grt_multi) pcont1 = "epoch: %d, train loss: %.3f" % (epoch, sum(lt) / len(lt)) pcont2 = "train loss KW: %.3f, precision KW: %.3f, recall KW: %.3f, fscore KW: %.3f" % ( sum(ltKW) / len(ltKW), precision, recall, fscore) pcont3 = "train loss BoW: %.3f, precision BoW: %.3f, recall BoW: %.3f, fscore BoW: %.3f" % ( sum(ltBoW) / len(ltBoW), precisionBoW, recallBoW, fscoreBoW) print(pcont1) print(pcont2) print(pcont3) else: pcont = "epoch: %d, train loss: %.3f, precision: %.3f, recall: %.3f, fscore: %.3f" % ( epoch, sum(lt) / len(lt), precision, recall, fscore) print(pcont) with open(saveLog, "a+") as fo: if (args.mt): fo.write("\n" + pcont1 + "\n" + pcont2 + "\n" + pcont3 + "\n") else: fo.write(pcont + "\n") lt, pred_multi, grt_multi, pred_multiBoW, ltKW, ltBoW = [], [], [], [], [], [] return 0
def testSem(): network.eval() ids_fn_sem = ddir.sp_testSem_ids_fn ids_fn = ddir.sp_test_ids_fn with open(ids_fn, "r") as fo: ids = map(lambda x: x.strip('\n'), fo.readlines()) with open(ids_fn_sem, "r") as fo: ids_sem = map(lambda x: x.strip('\n'), fo.readlines()) predKwList = [] value_bow, gtKwList = data_io.get_semValues(ddir.labels_csv, ddir.keywords_test) count_bow = data_io.get_semCounts(ddir.counts_csv, ddir.keywords_test) pred_multi = np.zeros([len(ids_sem), len(mapping)]) for i in range(len(ids)): idx = [ids[i]] z = idx[0].split("_") del z[0] idxnew = "_".join(z) if (idxnew in ids_sem): with open(saveLog, "a+") as fo: fo.write(str(idxnew) + "\n") Xs, _ = data_io.load_mfcc(ddir.mfcc_dir, idx, args.n_pad) Xs = np.transpose(Xs, (0, 2, 1)) Ys = np.stack(map(lambda x: vision_bow_vec[x[4:-2]], idx), axis=0) caption_Ys = np.stack(map(lambda x: caption_bow_vec[x], idx), axis=0) pred = getKWprob(Xs) predMapped = pred[0][mapping] pred_multi[ids_sem.index(idxnew)] = predMapped ## Printing results for analysis purpose indices = np.argwhere(predMapped > args.threshold) temp = '' for k in range(len(indices)): word = kwList[indices[k][0]] prob = predMapped[indices[k][0]] temp += (word + '(' + str(prob) + '), ') temp = temp[:-2] out_str2 = 'Pred (' + str(args.threshold) + '): ' + temp + '\n' n = len(gtKwList[ids_sem.index(idxnew)]) indices = np.argpartition(predMapped, -n)[-n:] temp = '' for k in range(len(indices)): prob = predMapped[indices[k]] word = kwList[indices[k]] temp += (word + '(' + str(prob) + '), ') temp = temp[:-2] out_str3 = 'Pred (top n): ' + temp + '\n' temp = '' for word in gtKwList[ids_sem.index(idxnew)]: prob = predMapped[kwList.index(word)] temp += (word + '(' + str(prob) + '), ') temp = temp[:-2] out_str4 = 'GT probabilities: ' + temp + '\n' out_str1 = 'GT: ' + ', '.join( gtKwList[ids_sem.index(idxnew)]) + '\n' with open(saveLog, "a+") as fo: fo.write(out_str1 + out_str2 + out_str3 + out_str4 + "\n") predKwList = [] # pdb.set_trace() eer, ap, spearman, prec10, precN = utils.get_metrics( pred_multi, value_bow, count_bow) pcont = "Subjective ratings: EER: %f, Average precision: %f, Precision@10: %f, Precision@N: %f, Spearman's rho: %f" % ( eer, ap, prec10, precN, spearman) print(pcont) with open("tempcsv.csv", "a+") as fo: fo.write(args.mtType + ',' + str(args.alpha) + ',' + str(100 * spearman) + ',' + str(100 * prec10) + ',' + str(100 * precN) + ',' + str(100 * eer) + ',' + str(100 * ap) + '\n') with open(saveLog, "a+") as fo: fo.write(pcont + "\n")
def test(epoch, subset): network.eval() pcont4 = " " ids_fn_sem = ddir.sp_testSem_ids_fn ids_fn = ddir.sp_dev_ids_fn if subset == "dev" else ddir.sp_test_ids_fn with open(ids_fn, "r") as fo: ids = map(lambda x: x.strip(), fo.readlines()) with open(ids_fn_sem, "r") as fo: ids_sem = map(lambda x: x.strip(), fo.readlines()) lt, pred_multi, grt_multi, pred_multiBoW, ltKW, ltBoW, pred_multi_kw, grt_multi_kw = [], [], [], [], [], [], [], [] for i in range(0, len(ids), args.test_batch_size): idx = ids[i:i + args.test_batch_size] Xs, _ = data_io.load_mfcc(ddir.mfcc_dir, idx, args.n_pad) Xs = np.transpose(Xs, (0, 2, 1)) Ys = np.stack(map(lambda x: vision_bow_vec[x[4:-2]], idx), axis=0) caption_Ys = np.stack(map(lambda x: caption_bow_vec[x], idx), axis=0) if (args.mt): l, lBoW, pred, predBoW, _ = run_net(Xs, Ys, caption_Ys) else: l, pred, _ = run_net(Xs, Ys) pred_multi.append(pred) grt_multi.append(caption_Ys) if (args.mt): pred_multiBoW.append(predBoW) lossTot = args.alpha * l + (1 - args.alpha) * lBoW ltKW.append(l.cpu().item()) ltBoW.append(lBoW.cpu().item()) else: lossTot = torch.tensor(l, requires_grad=True) lt.append(lossTot.cpu().item()) # if('taslp' in args.dataset): predMapped = (pred.T[mapping]).T pred_multi_kw.append(predMapped) # pdb.set_trace() grtMapped = (caption_Ys.T[mapping1]).T grt_multi_kw.append(grtMapped) pred_multi, grt_multi = np.concatenate(pred_multi, axis=0), np.concatenate(grt_multi, axis=0) pred_multi = np.concatenate( (pred_multi, np.zeros( (pred_multi.shape[0], grt_multi.shape[1] - pred_multi.shape[1])).astype(np.float32)), axis=1) # if('taslp' in args.dataset): pred_multi_kw, grt_multi_kw = np.concatenate( pred_multi_kw, axis=0), np.concatenate(grt_multi_kw, axis=0) pred_multi_kw = np.concatenate( (pred_multi_kw, np.zeros((pred_multi_kw.shape[0], grt_multi_kw.shape[1] - pred_multi_kw.shape[1])).astype(np.float32)), axis=1) precision1, recall1, fscore1 = utils.get_fscore( pred_multi_kw >= args.threshold, grt_multi_kw) if (epoch == args.epoch - 1 or subset == "test"): ap1 = utils.get_metrics(pred_multi, grt_multi, flag='onlyAP') eer, ap, prec10, precN = utils.get_metrics(pred_multi_kw, grt_multi_kw) pcont4 = "Overall ratings: EER: %f, Average precision: %f, Average precision on all keywords: %f, Precision@10: %f, Precision@N: %f" % ( eer, ap, ap1, prec10, precN) with open("tempcsvap.csv", "a+") as fo: fo.write(args.mtType + ',' + str(args.alpha) + ',' + str(ap1 * 100) + ',' + str(ap * 100) + '\n') if (args.mt): pred_multiBoW = np.concatenate(pred_multiBoW, axis=0) pred_multiBoW = np.concatenate( (pred_multiBoW, np.zeros((pred_multiBoW.shape[0], grt_multi.shape[1] - pred_multiBoW.shape[1])).astype(np.float32)), axis=1) precision, recall, fscore = utils.get_fscore(pred_multi >= args.threshold, grt_multi) if (args.mt): precisionBoW, recallBoW, fscoreBoW = utils.get_fscore( pred_multiBoW >= args.threshold, grt_multi) pcont1 = "epoch: %d, dev loss: %.3f" % (epoch, sum(lt) / len(lt)) pcont2 = "dev loss KW: %.3f, precision KW: %.3f, recall KW: %.3f, fscore KW: %.3f" % ( sum(ltKW) / len(ltKW), precision, recall, fscore) pcont3 = "dev loss BoW: %.3f, precision BoW: %.3f, recall BoW: %.3f, fscore BoW: %.3f" % ( sum(ltBoW) / len(ltBoW), precisionBoW, recallBoW, fscoreBoW) print(pcont1) print(pcont2) print(pcont3) else: pcont = "epoch: %d, dev loss: %.3f, precision: %.3f, recall: %.3f, fscore: %.3f" % ( epoch, sum(lt) / len(lt), precision, recall, fscore) print(pcont) print(pcont4) with open(saveLog, "a+") as fo: if (args.mt): fo.write("\n" + pcont1 + "\n" + pcont2 + "\n" + pcont3 + "\n" + pcont4 + "\n") else: fo.write(pcont + "\n" + pcont4 + "\n") # with open(saveLog, "a+") as fo: # if(args.mt): fo.write("\n"+pcont1+"\n"+pcont2+"\n"+pcont3+"\n") # else: fo.write(pcont+"\n") # return fscore return fscore1