def PerformCV(qid_file, diff_feat_dir, feat_file, true_labels_file, noisy_labels_dir, batch_size, count_annts): #""" #diff_feat_dir: directory where the diff features are stored #noisy_labels_dir: directory where the noisy labels are stored #true_labels_file: the true labels file used for test set evaluation #qid_file: the qid file location #noisy_labels_dir: directory containing noisy preferences corresponding to #features in the diff_feat_dir #count_annts: number of annotators # #Example values: #qid_file= '/auto/rcf-proj/pg/guptarah/RankingExp/data/wine_quality/qids' #diff_feat_dir='/auto/rcf-proj/pg/guptarah/RankingExp/data/wine_quality/noisy_features/labels/' #feat_file='/auto/rcf-proj/pg/guptarah/RankingExp/data/wine_quality/features' #true_labels_file='/auto/rcf-proj/pg/guptarah/RankingExp/data/wine_quality/labels' #noisy_labels_dir='/auto/rcf-proj/pg/guptarah/RankingExp/data/wine_quality/noisy_labels_pairwise' #batch_size=1 #count_annts=6 #""" qids = numpy.genfromtxt(qid_file,dtype='int') qids_unique = numpy.unique(qids) features = numpy.genfromtxt(feat_file,delimiter=',') labels = numpy.genfromtxt(true_labels_file,delimiter=',') if numpy.remainder(len(qids_unique),batch_size): print "Please provide a split that divides number of unique qids" return num_batches = len(qids_unique)/batch_size mean_result_storage = numpy.zeros((4,5+count_annts)) # 5 for True, EM, EMRelEst, Borda results and Majority vote and other for each annotator # 2 rows for correcit pairwise identification: first for test, second for dev # 2 rows for spearman correlation: first for test, second for dev for i in range(num_batches): # Determine the qids in test, dev and train sets test_id = i test_batch_qids = qids_unique[numpy.arange(batch_size*test_id,batch_size*(test_id+1))] dev_id = numpy.remainder(i+1,num_batches) dev_batch_qids = qids_unique[numpy.arange(batch_size*dev_id,batch_size*(dev_id+1))] train_batch_qids = numpy.setdiff1d(qids_unique,numpy.union1d(test_batch_qids,dev_batch_qids)) # find the features and labels for the train and the dev set test_features = features[numpy.in1d(qids,test_batch_qids).T,:] test_labels = numpy.matrix(labels[numpy.in1d(qids,test_batch_qids)]).T test_diff_features = numpy.empty([0,test_features.shape[1]]) for test_batch_qid in test_batch_qids: feature_diff_file = diff_feat_dir + '/labels/' + str(int(test_batch_qid)) + '.features' feature_diff = numpy.genfromtxt(feature_diff_file,delimiter=',') test_diff_features = numpy.vstack((test_diff_features,feature_diff)) dev_features = features[numpy.in1d(qids,dev_batch_qids),:] dev_labels = numpy.matrix(labels[numpy.in1d(qids,dev_batch_qids)]).T dev_diff_features = numpy.empty([0,dev_features.shape[1]]) for dev_batch_qid in dev_batch_qids: feature_diff_file = diff_feat_dir + '/labels/' + str(int(dev_batch_qid)) + '.features' feature_diff = numpy.genfromtxt(feature_diff_file,delimiter=',') dev_diff_features = numpy.vstack((dev_diff_features,feature_diff)) # get all train set features together train_diff_features = numpy.empty([0,test_features.shape[1]]) annt_labels = numpy.empty([count_annts,0]).tolist() for train_batch_qid in train_batch_qids: feature_diff_file = diff_feat_dir + '/labels/' + str(int(train_batch_qid)) + '.features' feature_diff = numpy.genfromtxt(feature_diff_file,delimiter=',') train_diff_features = numpy.vstack((train_diff_features,feature_diff)) # getting the labels on train set from different annotators for annt_id in range(count_annts): cur_annt_labels = annt_labels[annt_id] annt_lables_for_qid_file = noisy_labels_dir + '/' + str(train_batch_qid) + '.noisy_labels' + str(annt_id+1) annt_lables_for_qid = numpy.genfromtxt(annt_lables_for_qid_file) cur_annt_labels = numpy.hstack((cur_annt_labels,annt_lables_for_qid)) annt_labels[annt_id] = cur_annt_labels print annt_labels[0].shape for annt_id in range(count_annts): annt_labels[annt_id] = numpy.ravel(numpy.random.uniform(0,1,size=(train_diff_features.shape[0],1)) > .3)*1 print annt_labels[0].shape ext_diff_feats = numpy.hstack((train_diff_features,numpy.ones((train_diff_features.shape[0],1)))) max_iter = 20 w,k = TrainEM.TrainModel(ext_diff_feats,annt_labels,max_iter) print numpy.mean(k>0.5) w = .01*numpy.ones((1,1+train_diff_features.shape[1])) n_epochs, learning_rate, lambda_w = 2800, .01, .001 last_dev_result = 0 for train_epoch in range(n_epochs): w = SVRankerSoft.svr_optimization(train_diff_features,1*(k>0.5),w,.01,1,.001) cur_dev_result = GetResults(w,dev_diff_features) delta_performance = cur_dev_result - last_dev_result if (delta_performance < .0000) and (cur_dev_result > last_dev_result): print 'break at iter ',train_epoch break last_dev_result = cur_dev_result print 'Basic multiple annotator Test Results:' mean_result_storage[2,1] += PrintResultsStats(w,test_features,test_labels) mean_result_storage[0,1] += PrintResults(w,test_diff_features) print 'Basic multiple annotator Dev Results:' mean_result_storage[3,1] += PrintResultsStats(w,dev_features,dev_labels) mean_result_storage[1,1] += PrintResults(w,dev_diff_features) print 'Correct identifications on train set: %f' %(numpy.mean(k>.5)) print '-----------------------------' print '' # # Training model using TrainEMRelEst function # w,k = TrainEMRelEst.TrainModel(ext_diff_feats,annt_labels,max_iter) # w = SVRankerSoft.svr_optimization(train_diff_features,numpy.around(k),w,.02,2000,.001) # # print 'Rel est multiple annotator Test Results:' # mean_result_storage[2,2] += PrintResultsStats(w,test_features,test_labels) # mean_result_storage[0,2] += PrintResults(w,test_diff_features) # # print 'Rel est multiple annotator Dev Results:' # mean_result_storage[3,2] += PrintResultsStats(w,dev_features,dev_labels) # mean_result_storage[1,2] += PrintResults(w,dev_diff_features) # # print 'Correct identifications on train set: %f' %(numpy.mean(k>.5)) # # print '-----------------------------' # print '' # Getting results using majority vote majority_vote = ((numpy.mean(numpy.matrix(annt_labels),axis=0) > .5)*1).T w = .01*numpy.ones((1,1+train_diff_features.shape[1])) #w = SVRankerSoft.svr_optimization(train_diff_features,majority_vote,w,.02,2000,.001) last_dev_result = 0 for train_epoch in range(n_epochs): w = SVRankerSoft.svr_optimization(train_diff_features,majority_vote,w,.01,1,.001) cur_dev_result = GetResults(w,dev_diff_features) delta_performance = cur_dev_result - last_dev_result if (delta_performance < .0000) and (cur_dev_result > last_dev_result): print 'break at iter ',train_epoch break last_dev_result = cur_dev_result print 'Majority vote Test Results:' mean_result_storage[2,4] += PrintResultsStats(w,test_features,test_labels) mean_result_storage[0,4] += PrintResults(w,test_diff_features) print 'Majority vote Dev Results:' mean_result_storage[3,4] += PrintResultsStats(w,dev_features,dev_labels) mean_result_storage[1,4] += PrintResults(w,dev_diff_features) print 'Correct identifications on train set: %f' %(numpy.mean(majority_vote)) print '-----------------------------' print '' # Getting results using true labels during training w = .01*numpy.ones((1,1+train_diff_features.shape[1])) print 'training true baseline model for iter ... %d' % (i) n_epochs, learning_rate, lambda_w = 2000, .01, .001 #w = SVRanker3.svr_optimization(train_diff_features,w,learning_rate,n_epochs,lambda_w) w = SVRankerSoft.svr_optimization(train_diff_features,numpy.ones(majority_vote.shape),w,learning_rate,n_epochs,lambda_w) print 'True Baseline Test Results:' mean_result_storage[2,0] += PrintResultsStats(w,test_features,test_labels) mean_result_storage[0,0] += PrintResults(w,test_diff_features) print 'True Baseline Dev Results:' mean_result_storage[3,0] += PrintResultsStats(w,dev_features,dev_labels) mean_result_storage[1,0] += PrintResults(w,dev_diff_features) print '-----------------------------' print '' print 'Getting results for each annotator' # Getting results on each annotator w_borda = numpy.zeros((1,1+train_diff_features.shape[1])) for noisy_annt_id in range(count_annts): print 'at noisy annotor id: %d' %(noisy_annt_id) # train_diff_features = numpy.empty([0,test_features.shape[1]]) # for train_batch_qid in train_batch_qids: # feature_diff_file = diff_feat_dir + '/noisy_labels' + str(noisy_annt_id+1) + '/' + str(int(train_batch_qid)) + '.features' # feature_diff = numpy.genfromtxt(feature_diff_file,delimiter=',') # train_diff_features = numpy.vstack((train_diff_features,feature_diff)) cur_annt_labels = numpy.matrix(annt_labels[noisy_annt_id]).T w = .01*numpy.ones((1,1+train_diff_features.shape[1])) # initial w w = SVRankerSoft.svr_optimization(train_diff_features,cur_annt_labels,w,learning_rate,n_epochs,lambda_w) print 'Annotator %d Test Results:' %(noisy_annt_id) mean_result_storage[2,5+noisy_annt_id] += PrintResultsStats(w,test_features,test_labels) mean_result_storage[0,5+noisy_annt_id] += PrintResults(w,test_diff_features) print 'Annotator %d Dev Results:' %(noisy_annt_id) mean_result_storage[3,5+noisy_annt_id] += PrintResultsStats(w,dev_features,dev_labels) mean_result_storage[1,5+noisy_annt_id] += PrintResults(w,dev_diff_features) w_borda = w_borda + w/numpy.linalg.norm(w,2) print 'Correct identifications on train set: %f' %(numpy.mean(cur_annt_labels)) print 'Borda count Test Results:' mean_result_storage[2,3] += PrintResultsStats(w_borda,test_features,test_labels) mean_result_storage[0,3] += PrintResults(w,test_diff_features) print 'Borda count Dev Results:' mean_result_storage[3,3] += PrintResultsStats(w_borda,dev_features,dev_labels) mean_result_storage[1,3] += PrintResults(w,dev_diff_features) print 'Running mean of performances:' print mean_result_storage
def TrainModel(ext_diff_feats,annt_comparison_labels,max_iter=100): """ diff_feats: difference between features extended with ones annt_comparison_labels: comparison labels (0/1) showing if annotator said if x_g > x_l (labels it: 1) or x_g < x_l (labels it: 0) """ N = ext_diff_feats.shape[0] # number of data point comparisons R = len(annt_comparison_labels) # number of annotators D = ext_diff_feats.shape[1] # feature dimensionality # Initialization k = InitializeK(N) w = InitializeW(D) A = numpy.empty((R,0)).tolist() for i in range(R): A[i] = InitializeA(N) convergence_flag = 1 iter_counter = 0 while convergence_flag: iter_counter = iter_counter + 1 # E step. Estimating k model_probs = SigmoidProb(ext_diff_feats, w) prod_probs_E1 = model_probs # probability that assumed diff is correct prod_probs_E0 = numpy.ones(model_probs.shape) - model_probs # probability that assumed diff is incorrect for i in range(R): A_cur = A[i] cur_annt_labels = annt_comparison_labels[i] cur_label_mat = numpy.vstack((numpy.logical_not(cur_annt_labels),cur_annt_labels)) # Below for E1 if an annotator said 0, flipping probability is multiplied and otherwise cur_annt_probs_E1 = numpy.matrix(numpy.sum(numpy.multiply(A_cur,cur_label_mat),axis=0)) prod_probs_E1 = numpy.multiply(prod_probs_E1,cur_annt_probs_E1.T) # Below for E0 if an annotator said 1, flipping probability is multiplied and otherwise cur_annt_probs_E0 = numpy.matrix(numpy.sum(numpy.multiply(A_cur,numpy.logical_not(cur_label_mat)),axis=0)) prod_probs_E0 = numpy.multiply(prod_probs_E0,cur_annt_probs_E0.T) k_term1 = prod_probs_E1 k_term2 = prod_probs_E1+prod_probs_E0 #+ .001*numpy.ones(prod_probs_E1.shape) k = numpy.divide(k_term1,k_term2) # M step. # Estimating w diff_feats = ext_diff_feats[:,:-1] # unfortunately ones are appended again in SVRankerSoft learning_rate = 0.02 n_epochs = 20 lambda_w = .001 w = SVRankerSoft.svr_optimization(diff_feats,k,w,learning_rate,n_epochs,lambda_w) # Estimating A's for i in range(R): cur_annt_labels = annt_comparison_labels[i] A[i] = ComputeA(k,cur_annt_labels,ext_diff_feats) if iter_counter > max_iter: convergence_flag = 0 print 'Finished training' for i in range(R): print numpy.mean(A[i], axis=1) return w,k
def PerformCV(qid_file, diff_feat_dir, feat_file, true_labels_file, noisy_labels_dir, batch_size, count_annts): #""" #diff_feat_dir: directory where the diff features are stored #noisy_labels_dir: directory where the noisy labels are stored #true_labels_file: the true labels file used for test set evaluation #qid_file: the qid file location #noisy_labels_dir: directory containing noisy preferences corresponding to #features in the diff_feat_dir #count_annts: number of annotators # #Example values: #qid_file= '/auto/rcf-proj/pg/guptarah/RankingExp/data/wine_quality/qids' #diff_feat_dir='/auto/rcf-proj/pg/guptarah/RankingExp/data/wine_quality/noisy_features/labels/' #feat_file='/auto/rcf-proj/pg/guptarah/RankingExp/data/wine_quality/features' #true_labels_file='/auto/rcf-proj/pg/guptarah/RankingExp/data/wine_quality/labels' #noisy_labels_dir='/auto/rcf-proj/pg/guptarah/RankingExp/data/wine_quality/noisy_labels_pairwise' #batch_size=1 #count_annts=6 #""" qids = numpy.genfromtxt(qid_file,dtype='int') qids_unique = numpy.unique(qids) features = numpy.genfromtxt(feat_file,delimiter=',') labels = numpy.genfromtxt(true_labels_file,delimiter=',') if numpy.remainder(len(qids_unique),batch_size): print "Please provide a split that divides number of unique qids" return num_batches = len(qids_unique)/batch_size mean_result_storage = numpy.zeros((4,5+count_annts)) # 5 for True, EM, EMRelEst, Borda results and Majority vote and other for each annotator # 2 rows for correcit pairwise identification: first for test, second for dev # 2 rows for spearman correlation: first for test, second for dev for i in range(num_batches): # Determine the qids in test, dev and train sets test_id = i test_batch_qids = qids_unique[numpy.arange(batch_size*test_id,batch_size*(test_id+1))] dev_id = numpy.remainder(i+1,num_batches) dev_batch_qids = qids_unique[numpy.arange(batch_size*dev_id,batch_size*(dev_id+1))] train_batch_qids = numpy.setdiff1d(qids_unique,numpy.union1d(test_batch_qids,dev_batch_qids)) # find the features and labels for the train and the dev set test_features = features[numpy.in1d(qids,test_batch_qids).T,:] test_labels = numpy.matrix(labels[numpy.in1d(qids,test_batch_qids)]).T test_diff_features = numpy.empty([0,test_features.shape[1]]) for test_batch_qid in test_batch_qids: feature_diff_file = diff_feat_dir + '/labels/' + str(int(test_batch_qid)) + '.features' feature_diff = numpy.genfromtxt(feature_diff_file,delimiter=',') test_diff_features = numpy.vstack((test_diff_features,feature_diff)) dev_features = features[numpy.in1d(qids,dev_batch_qids),:] dev_labels = numpy.matrix(labels[numpy.in1d(qids,dev_batch_qids)]).T dev_diff_features = numpy.empty([0,dev_features.shape[1]]) for dev_batch_qid in dev_batch_qids: feature_diff_file = diff_feat_dir + '/labels/' + str(int(dev_batch_qid)) + '.features' feature_diff = numpy.genfromtxt(feature_diff_file,delimiter=',') dev_diff_features = numpy.vstack((dev_diff_features,feature_diff)) # get all train set features together train_diff_features = numpy.empty([0,test_features.shape[1]]) annt_labels = numpy.empty([count_annts,0]).tolist() for train_batch_qid in train_batch_qids: feature_diff_file = diff_feat_dir + '/labels/' + str(int(train_batch_qid)) + '.features' feature_diff = numpy.genfromtxt(feature_diff_file,delimiter=',') train_diff_features = numpy.vstack((train_diff_features,feature_diff)) # getting the labels on train set from different annotators for annt_id in range(count_annts): cur_annt_labels = annt_labels[annt_id] annt_lables_for_qid_file = noisy_labels_dir + '/' + str(train_batch_qid) + '.noisy_labels' + str(annt_id+1) annt_lables_for_qid = numpy.genfromtxt(annt_lables_for_qid_file) cur_annt_labels = numpy.hstack((cur_annt_labels,annt_lables_for_qid)) annt_labels[annt_id] = cur_annt_labels ext_diff_feats = numpy.hstack((train_diff_features,numpy.ones((train_diff_features.shape[0],1)))) # Getting results using true labels during training for check_id in range(10): w = .01*numpy.ones((1,1+train_diff_features.shape[1])) print 'training true baseline model for iter ... %d' % (i) n_epochs, learning_rate, lambda_w = 2000, .01, .001 random_scores = (numpy.random.uniform(0,1,size=(train_diff_features.shape[0],1)) > (.05*check_id))*1 print 'correct supplied: ', numpy.mean(random_scores) w = SVRankerSoft.svr_optimization(train_diff_features,random_scores,w,learning_rate,n_epochs,lambda_w) print 'True Baseline Test Results:' mean_result_storage[2,0] += PrintResultsStats(w,test_features,test_labels) mean_result_storage[0,0] += PrintResults(w,test_diff_features) print 'True Baseline Dev Results:' mean_result_storage[3,0] += PrintResultsStats(w,dev_features,dev_labels) mean_result_storage[1,0] += PrintResults(w,dev_diff_features) print '-----------------------------' print ''