Python SVRanker3 Examples

Programming Language: Python

Class/Type: SVRanker3

Examples at hotexamples.com: 2

Python SVRanker3 - 2 examples found. These are the top rated real world Python examples of SVRanker3 extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

svr_optimization(2)

Example #1

Show file

File: PerformCVBordaCount.py Project: guptarah/RankingExp

def PerformCV(qid_file,diff_feat_dir,feat_file,labels_file_dir,cnt_noisy_labels_file,batch_size=5):
   """
   Given the list of qids, it takes the training features from diff_feat folder per noisy annotator,
   test and dev features from feat_file.
   Model is trained on diff_feat_dir files and evaluated on feat_file features for Kendall tau

   Make sure that the sum of split ratio is a divisor of number of unique qids
   """

   qids = numpy.genfromtxt(qid_file)
   qids_unique = numpy.unique(qids)

   features = numpy.genfromtxt(feat_file,delimiter=',')

   # creating labels list
   true_labels = numpy.genfromtxt(labels_file,delimiter=',') 

   if numpy.remainder(len(qids_unique),batch_size):
      print "Please provide a split that divides number of unique qids"
      return

   num_batches = len(qids_unique)/batch_size

   all_test_scores, all_dev_scores = numpy.empty([0,1]), numpy.empty([0,1])
   all_test_labels, all_dev_labels = numpy.empty([0,1]), numpy.empty([0,1])
   for i in range(num_batches):
      # Determine the qids in test, dev and train sets
      test_id = i 
      test_batch_qids = qids_unique[numpy.arange(batch_size*test_id,batch_size*(test_id+1))]

      dev_id = numpy.remainder(i+1,num_batches)
      dev_batch_qids = qids_unique[numpy.arange(batch_size*dev_id,batch_size*(dev_id+1))]
      
      train_batch_qids = numpy.setdiff1d(qids_unique,numpy.union1d(test_batch_qids,dev_batch_qids))  
   
      # find the features and labels for the train and the dev set
      test_features = features[numpy.in1d(qids,test_batch_qids).T,:] 
      test_labels = numpy.matrix(true_labels[numpy.in1d(qids,test_batch_qids)]).T
      dev_features = features[numpy.in1d(qids,dev_batch_qids),:] 
      dev_labels = numpy.matrix(true_labels[numpy.in1d(qids,dev_batch_qids)]).T

      # get all train set features together from all the annotators
      w_per_annt = numpy.empty((cnt_noisy_labels_file,0)).tolist() 
      print 'training model for iter ... %d' % (i)
      for noisy_annt_id in range(cnt_noisy_labels_file):
         print 'at noisy annotor id: %d' %(noisy_annt_id)
         train_diff_features = numpy.empty([0,test_features.shape[1]]) 
         for train_batch_qid in train_batch_qids:
            feature_diff_file = diff_feat_dir + '/noisy_labels' + str(noisy_annt_id+1) + '/' + str(int(train_batch_qid)) + '.features' 
            feature_diff = numpy.genfromtxt(feature_diff_file,delimiter=',')
            train_diff_features = numpy.vstack((train_diff_features,feature_diff))
      
         w = numpy.ones((1,1+train_diff_features.shape[1])) # initial w
         for iter in range(1):
            n_epochs, learning_rate, lambda_w = 2400, .02, .001
            w = SVRanker3.svr_optimization(train_diff_features,w,learning_rate,n_epochs,lambda_w)		

         w_per_annt[noisy_annt_id] = w
   
      print 'Model Trained.'
      print 'Results:'
      # perform unweighted fusion to get results on test and dev set
      test_features_ext = numpy.hstack((test_features,numpy.ones((test_features.shape[0],1))))
      test_scores = 0
      dev_features_ext = numpy.hstack((dev_features,numpy.ones((dev_features.shape[0],1))))
      dev_scores = 0
      for noisy_annt_id in range(cnt_noisy_labels_file):
         cur_test_scores = numpy.dot(test_features_ext,w_per_annt[noisy_annt_id].T)
         print 'Annotator specific results for annotator %d' % (noisy_annt_id)
         print 'TEST: Kendall Tau: %f, Spearman correlation: %f, Pearson correlation: %f' \
         %(stats.kendalltau(cur_test_scores,test_labels)[0], stats.spearmanr(cur_test_scores,test_labels)[0], \
         numpy.corrcoef(cur_test_scores.T,test_labels.T)[0,1]) 
         test_scores += numpy.dot(test_features_ext,w_per_annt[noisy_annt_id].T)

         cur_dev_scores = numpy.dot(dev_features_ext,w_per_annt[noisy_annt_id].T)
         print 'DEV: Kendall Tau: %f, Spearman correlation: %f, Pearson correlation: %f' \
         %(stats.kendalltau(cur_dev_scores,dev_labels)[0], stats.spearmanr(cur_dev_scores,dev_labels)[0], \
         numpy.corrcoef(cur_dev_scores.T,dev_labels.T)[0,1]) 
         dev_scores += numpy.dot(dev_features_ext,w_per_annt[noisy_annt_id].T)
         
      print ''
      print 'Borda count results'
      print 'TEST: Kendall Tau: %f, Spearman correlation: %f, Pearson correlation: %f' \
      %(stats.kendalltau(test_scores,test_labels)[0], stats.spearmanr(test_scores,test_labels)[0], \
      numpy.corrcoef(test_scores.T,test_labels.T)[0,1]) 

      print 'DEV: Kendall Tau: %f, Spearman correlation: %f, Pearson correlation: %f' \
      %(stats.kendalltau(dev_scores,dev_labels)[0], stats.spearmanr(dev_scores,dev_labels)[0], \
      numpy.corrcoef(dev_scores.T,dev_labels.T)[0,1])

Example #2

Show file

File: TrainOnAll.py Project: guptarah/RankingExp

def TrainOnAll(qid_file, diff_feat_dir, feat_file, true_labels_file, noisy_labels_dir, batch_size, count_annts):
#"""
#diff_feat_dir: directory where the diff features are stored
#noisy_labels_dir: directory where the noisy labels are stored
#true_labels_file: the true labels file used for test set evaluation
#qid_file: the qid file location
#noisy_labels_dir: directory containing noisy preferences corresponding to
#features in the diff_feat_dir
#count_annts: number of annotators
#
#Example values:
#qid_file= '/auto/rcf-proj/pg/guptarah/RankingExp/data/wine_quality/qids'
#diff_feat_dir='/auto/rcf-proj/pg/guptarah/RankingExp/data/wine_quality/noisy_features/labels/'
#feat_file='/auto/rcf-proj/pg/guptarah/RankingExp/data/wine_quality/features'
#true_labels_file='/auto/rcf-proj/pg/guptarah/RankingExp/data/wine_quality/labels'
#noisy_labels_dir='/auto/rcf-proj/pg/guptarah/RankingExp/data/wine_quality/noisy_labels_pairwise'
#batch_size=1
#count_annts=6
#"""

   qids = numpy.genfromtxt(qid_file,dtype='int')
   qids_unique = numpy.unique(qids)

   features = numpy.genfromtxt(feat_file,delimiter=',')
   labels = numpy.genfromtxt(true_labels_file,delimiter=',')

   if numpy.remainder(len(qids_unique),batch_size):
      print "Please provide a split that divides number of unique qids"
      return

   num_batches = 1 
   mean_result_storage = numpy.zeros((2,5+count_annts))
   # 3 for True, EM, EMRelEst, Borda results and Majority vote and other for each annotator
   # 2 rows: first for dev, second for dev
   
   for i in range(num_batches):
      train_batch_qids = qids_unique  
      # get all train set features together
      train_diff_features = numpy.empty([0,features.shape[1]])
      annt_labels = numpy.empty([count_annts,0]).tolist()
      for train_batch_qid in train_batch_qids:
         feature_diff_file = diff_feat_dir + '/labels/' + str(int(train_batch_qid)) + '.features' 
         feature_diff = numpy.genfromtxt(feature_diff_file,delimiter=',')
         train_diff_features = numpy.vstack((train_diff_features,feature_diff))

         # getting the labels on train set from different annotators
         for annt_id in range(count_annts):
            cur_annt_labels = annt_labels[annt_id]
            annt_lables_for_qid_file = noisy_labels_dir + '/' + str(train_batch_qid) + '.noisy_labels' + str(annt_id+1)
            annt_lables_for_qid = numpy.genfromtxt(annt_lables_for_qid_file)
            cur_annt_labels = numpy.hstack((cur_annt_labels,annt_lables_for_qid))
            annt_labels[annt_id] = cur_annt_labels

      ext_diff_feats = numpy.hstack((train_diff_features,numpy.ones((train_diff_features.shape[0],1))))
      max_iter = 20
      w,k = TrainEM.TrainModel(ext_diff_feats,annt_labels,max_iter)
      print 'TrainEM Results:'
      print 'Correct identifications on train set: %f' %(numpy.mean(k>.5))

      print '-----------------------------'
      print ''

      # Training model using TrainEMRelEst function
      w,k = TrainEMRelEst.TrainModel(ext_diff_feats,annt_labels,max_iter)
      print 'TrainEMRelEst Results:'
      print 'Correct identifications on train set: %f' %(numpy.mean(k>.5)+(.5*numpy.mean(k==.5)))

      print '-----------------------------'
      print ''

      # Getting results using majority vote 
      majority_vote = (((numpy.mean(numpy.matrix(annt_labels),axis=0) > .5)+.5*(numpy.mean(numpy.matrix(annt_labels),axis=0) == .5))*1).T
      print 'Majority vote Results:'
      print 'Correct identifications on train set: %f' %(numpy.mean(majority_vote))
      
 
      print '-----------------------------'
      print ''
      
      print 'Getting results for each annotator'
      # Getting results on each annotator
      w_borda = numpy.zeros((1,1+train_diff_features.shape[1]))
      for noisy_annt_id in range(count_annts):
         print 'at noisy annotor id: %d' %(noisy_annt_id)
         train_diff_features = numpy.empty([0,features.shape[1]])
         for train_batch_qid in train_batch_qids:
            feature_diff_file = diff_feat_dir + '/noisy_labels' + str(noisy_annt_id+1) + '/' + str(int(train_batch_qid)) + '.features'
            feature_diff = numpy.genfromtxt(feature_diff_file,delimiter=',')
            train_diff_features = numpy.vstack((train_diff_features,feature_diff)) 
         
         w = numpy.ones((1,1+train_diff_features.shape[1])) # initial w
         n_epochs, learning_rate, lambda_w = 2000, .02, .001
         w = SVRanker3.svr_optimization(train_diff_features,w,learning_rate,n_epochs,lambda_w)
         print 'Annotator %d results:' %(noisy_annt_id)
         PrintResults(w,train_diff_features)
 
         print 'Annotator correct identification:', numpy.mean(annt_labels[noisy_annt_id])
 
         w_borda = w_borda + w/numpy.linalg.norm(w,2) 

      print 'Borda count Results:' 
      PrintResults(w_borda,train_diff_features)