def rank(self): X, Y = self.makeFeatures() #pdb.set_trace() X = np.asarray(X, dtype=np.float32) Y = np.asarray(Y, dtype=np.int32) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.75, random_state=0) tuned_parameters = [{ 'kernel': ['rbf'], 'gamma': [1e-3, 1e-2, 1e-1, 1, 10], 'C': [1e-2, 1e-1, 1, 10, 100, 1000] }, { 'kernel': ['linear'], 'C': [1, 10, 100, 1000] }] clf = GridSearchCV(ranking.RankSVM(C=1), tuned_parameters, cv=5) clf.fit(X_train, y_train) print("Best parameters set found on development set:") print(clf.best_params_) print("Grid scores on development set:") means = clf.cv_results_['mean_test_score'] stds = clf.cv_results_['std_test_score'] for mean, std, params in zip(means, stds, clf.cv_results_['params']): print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params)) be = clf.best_estimator_
def featureSelect(self): X, y = self.makeFeatures() X = np.asarray(X, dtype=np.float32) y = np.asarray(y, dtype=np.int32)[:, 0] svc = ranking.RankSVM(kernel='linear', C=10) rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(2)) rfecv.fit(X, y) print("Optimal number of features : %d" % rfecv.n_features_)
def rankSVM(self): X, Y = self.makeFeatures() X = np.asarray(X, dtype=np.float32) Y = np.asarray(Y, dtype=np.int32) y = Y group = Y[:, 1] gkf = GroupKFold(n_splits=10) for train, test in gkf.split(X, y, groups=group): svc = ranking.RankSVM(C=1e-1, kernel='rbf', gamma=10) svc.fit(X[train], y[train]) print(svc.score(X[test], y[test]))
def svmRankingTrain(self): X, Y = self.makeFeatures() y = self.setLabel(Y) X = np.asarray(X, dtype=np.float64) Y = np.asarray(Y, dtype=np.int32) group = Y[:, 1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.65, random_state=0) svc = ranking.RankSVM(C=1e1, kernel='rbf', gamma=1) svc.fit(X_train, y_train) y_out = svc.predict(X) Y[:, 0] = y_out ranks = self.relativeToAbsHalf(X, Y) for i, j in enumerate(ranks): self.pairs[i].append(j)