def _score_one_vs_rest(self, Kyx, cy): """ Score each class against the rest 15. I don't have a NULL class for the testing set at the moment. """ cy = np.array(cy) ap_scores = np.zeros(self.nr_classes - 1) print for ii in xrange(self.nr_classes - 1): # Scenario 1. Each class vs rest classes # good_idxs = cy != self.null_class_idx # Scenario 2. Each class vs NULL good_idxs = (cy == ii) | (cy == self.null_class_idx) K_good_idxs = np.ix_(good_idxs, self.cx_idxs[ii]) # Get a +1, -1 vector of labels. cy_ = map(lambda label: +1 if label == ii else -1, cy[good_idxs]) # Predict. confidence_values = self.clf[ii].predict_proba(Kyx[K_good_idxs])[:, 1] ap_scores[ii] = rff.get_ap(confidence_values, cy_) print "Score for class %d as positive is %2.4f MAP." % ( ii, ap_scores[ii]) return mean(ap_scores) * 100
def _crossvalidate_C_one_vs_rest(self, K, cc, idx_clf): # TODO Try to avoid duplication of some of this code. # 1. Split Gram matrix and labels into a training set and a validation # set. pp = 0.3 # Proportion of examples used for cross-validation. M, N = K.shape assert M == N, 'K is not Gram matrix.' classes = list(set(cc)) nr_classes = len(classes) assert nr_classes == 2, 'Number of classes is not two.' # Randomly pick a subset of the data for cross-validation, but enforce # to get a proportion of pp points from each of the two classes. idxs_0 = [ii for ii, ci in enumerate(cc) if ci == classes[0]] idxs_1 = [ii for ii, ci in enumerate(cc) if ci == classes[1]] rand_idxs_0 = np.random.permutation(idxs_0) rand_idxs_1 = np.random.permutation(idxs_1) P0 = ceil(pp * len(rand_idxs_0)) P1 = ceil(pp * len(rand_idxs_1)) cv_idxs = np.hstack((rand_idxs_0[:P0], rand_idxs_1[:P1])) tr_idxs = np.hstack((rand_idxs_0[P0:], rand_idxs_1[P1:])) # Get indices in numpy format. cv_ix_ = np.ix_(cv_idxs, tr_idxs) tr_ix_ = np.ix_(tr_idxs, tr_idxs) # Slice Gram matrix K. cv_K = K[cv_ix_] tr_K = K[tr_ix_] # Get corresponding labels. cc = array(cc) cv_cc = cc[cv_idxs] tr_cc = cc[tr_idxs] # 2. Try different values for the regularization term C and pick the # one that yields the best score on the cross-validation set. log3cs = arange(-2, 8) # Vary C on an exponantional scale. best_score = -Inf best_C = 0 for log3c in log3cs: self.clf[idx_clf].C = 3**log3c weight = np.ones(len(tr_cc)) weight[tr_cc == +1] *= len(tr_cc[tr_cc == -1]) self.clf[idx_clf].fit(tr_K, tr_cc, sample_weight=weight) confidence_values = self.clf[idx_clf].predict_proba(cv_K)[:, 1] score = rff.get_ap(confidence_values, cv_cc) if score >= best_score: best_score = score best_C = self.clf[idx_clf].C print "Best score for class %d as positive is %2.4f MAP." % ( idx_clf, best_score) return best_C
def _crossvalidate_C_one_vs_rest(self, K, cc, idx_clf): # TODO Try to avoid duplication of some of this code. # 1. Split Gram matrix and labels into a training set and a validation # set. pp = 0.3 # Proportion of examples used for cross-validation. M, N = K.shape assert M == N, "K is not Gram matrix." classes = list(set(cc)) nr_classes = len(classes) assert nr_classes == 2, "Number of classes is not two." # Randomly pick a subset of the data for cross-validation, but enforce # to get a proportion of pp points from each of the two classes. idxs_0 = [ii for ii, ci in enumerate(cc) if ci == classes[0]] idxs_1 = [ii for ii, ci in enumerate(cc) if ci == classes[1]] rand_idxs_0 = np.random.permutation(idxs_0) rand_idxs_1 = np.random.permutation(idxs_1) P0 = ceil(pp * len(rand_idxs_0)) P1 = ceil(pp * len(rand_idxs_1)) cv_idxs = np.hstack((rand_idxs_0[:P0], rand_idxs_1[:P1])) tr_idxs = np.hstack((rand_idxs_0[P0:], rand_idxs_1[P1:])) # Get indices in numpy format. cv_ix_ = np.ix_(cv_idxs, tr_idxs) tr_ix_ = np.ix_(tr_idxs, tr_idxs) # Slice Gram matrix K. cv_K = K[cv_ix_] tr_K = K[tr_ix_] # Get corresponding labels. cc = array(cc) cv_cc = cc[cv_idxs] tr_cc = cc[tr_idxs] # 2. Try different values for the regularization term C and pick the # one that yields the best score on the cross-validation set. log3cs = arange(-2, 8) # Vary C on an exponantional scale. best_score = -Inf best_C = 0 for log3c in log3cs: self.clf[idx_clf].C = 3 ** log3c weight = np.ones(len(tr_cc)) weight[tr_cc == +1] *= len(tr_cc[tr_cc == -1]) self.clf[idx_clf].fit(tr_K, tr_cc, sample_weight=weight) confidence_values = self.clf[idx_clf].predict_proba(cv_K)[:, 1] score = rff.get_ap(confidence_values, cv_cc) if score >= best_score: best_score = score best_C = self.clf[idx_clf].C print "Best score for class %d as positive is %2.4f MAP." % (idx_clf, best_score) return best_C
def _score_one_vs_rest(self, Kyx, cy): """ Score each class against the rest 15. I don't have a NULL class for the testing set at the moment. """ cy = np.array(cy) ap_scores = np.zeros(self.nr_classes - 1) print for ii in xrange(self.nr_classes - 1): # Scenario 1. Each class vs rest classes # good_idxs = cy != self.null_class_idx # Scenario 2. Each class vs NULL good_idxs = (cy == ii) | (cy == self.null_class_idx) K_good_idxs = np.ix_(good_idxs, self.cx_idxs[ii]) # Get a +1, -1 vector of labels. cy_ = map(lambda label: +1 if label == ii else -1, cy[good_idxs]) # Predict. confidence_values = self.clf[ii].predict_proba(Kyx[K_good_idxs])[:, 1] ap_scores[ii] = rff.get_ap(confidence_values, cy_) print "Score for class %d as positive is %2.4f MAP." % (ii, ap_scores[ii]) return mean(ap_scores) * 100
def average_precision(y_true, y_pred): """ Swaps arguments for Adrien's function, so it is compatible with sklearn. """ return result_file_functions.get_ap(y_pred, y_true)