def __init__(self, X_valid, Y_valid, qids_valid = None, measure=sqmprank, maxiter=10): self.X_valid = array_tools.as_matrix(X_valid) self.Y_valid = array_tools.as_2d_array(Y_valid) self.qids_valid = qids_to_splits(qids_valid) self.measure = measure self.bestperf = None self.bestA = None self.iter = 0 self.last_update = 0 self.maxiter = maxiter
def __init__(self, X, Y, regparam = 1.0, qids = None, callbackfun=None, **kwargs): self.regparam = regparam self.callbackfun = None self.Y = array_tools.as_2d_array(Y) #Number of training examples self.size = Y.shape[0] if self.Y.shape[1] > 1: raise Exception('CGRankRLS does not currently work in multi-label mode') self.learn_from_labels = True self.callbackfun = callbackfun self.X = csc_matrix(X.T) if qids is not None: self.qids = map_qids(qids) self.splits = qids_to_splits(self.qids) else: self.qids = None regparam = self.regparam qids = self.qids if qids is not None: P = sp.lil_matrix((self.size, len(set(qids)))) for qidind in range(len(self.splits)): inds = self.splits[qidind] qsize = len(inds) for i in inds: P[i, qidind] = 1. / sqrt(qsize) P = P.tocsr() PT = P.tocsc().T else: P = 1./sqrt(self.size)*(np.mat(np.ones((self.size,1), dtype=np.float64))) PT = P.T X = self.X.tocsc() X_csr = X.tocsr() def mv(v): v = np.mat(v).T return X_csr*(X.T*v)-X_csr*(P*(PT*(X.T*v)))+regparam*v G = LinearOperator((X.shape[0],X.shape[0]), matvec=mv, dtype=np.float64) Y = self.Y if not self.callbackfun is None: def cb(v): self.A = np.mat(v).T self.b = np.mat(np.zeros((1,1))) self.callbackfun.callback(self) else: cb = None XLY = X_csr*Y-X_csr*(P*(PT*Y)) try: self.A = np.mat(cg(G, XLY, callback=cb)[0]).T except Finished: pass self.b = np.mat(np.zeros((1,1))) self.predictor = predictor.LinearPredictor(self.A, self.b)
def __init__(self, X_valid, Y_valid, qids_valid=None, measure=sqmprank, maxiter=10): self.X_valid = array_tools.as_matrix(X_valid) self.Y_valid = array_tools.as_2d_array(Y_valid) self.qids_valid = qids_to_splits(qids_valid) self.measure = measure self.bestperf = None self.bestA = None self.iter = 0 self.last_update = 0 self.maxiter = maxiter
def __init__(self, X, Y, qids, regparam = 1.0, kernel='LinearKernel', basis_vectors = None, **kwargs): kwargs["bias"] = 0. kwargs['kernel'] = kernel kwargs['X'] = X if basis_vectors is not None: kwargs['basis_vectors'] = basis_vectors self.svdad = adapter.createSVDAdapter(**kwargs) self.Y = np.mat(array_tools.as_2d_array(Y)) self.regparam = regparam self.svals = np.mat(self.svdad.svals) self.svecs = self.svdad.rsvecs self.size = self.Y.shape[0] self.size = self.Y.shape[0] self.qids = map_qids(qids) self.qidlist = qids_to_splits(self.qids) self.solve(self.regparam)
def __init__(self, X, Y, qids, regparam=1.0, kernel='LinearKernel', basis_vectors=None, **kwargs): kwargs["bias"] = 0. kwargs['kernel'] = kernel kwargs['X'] = X if basis_vectors is not None: kwargs['basis_vectors'] = basis_vectors self.svdad = adapter.createSVDAdapter(**kwargs) self.Y = np.mat(array_tools.as_2d_array(Y)) self.regparam = regparam self.svals = np.mat(self.svdad.svals) self.svecs = self.svdad.rsvecs self.size = self.Y.shape[0] self.size = self.Y.shape[0] self.qids = map_qids(qids) self.qidlist = qids_to_splits(self.qids) self.solve(self.regparam)
train_labels = np.loadtxt("./examples/data/rank_train.labels") test_labels = np.loadtxt("./examples/data/rank_test.labels") train_qids = read_qids("./examples/data/rank_train.qids") test_features = read_sparse("./examples/data/rank_test.features") train_features = read_sparse("./examples/data/rank_train.features") test_qids = read_qids("./examples/data/rank_test.qids") kwargs = {} kwargs['measure'] = cindex kwargs['regparams'] = [2**i for i in range(-10, 11)] kwargs["Y"] = train_labels kwargs["X"] = train_features kwargs["qids"] = train_qids learner = LeaveQueryOutRankRLS(**kwargs) grid = kwargs['regparams'] perfs = learner.cv_performances for i in range(len(grid)): print "parameter %f cv_performance %f" % (grid[i], perfs[i]) P = learner.predict(test_features) from rlscore.measure.measure_utilities import UndefinedPerformance from rlscore.measure.measure_utilities import qids_to_splits test_qids = qids_to_splits(test_qids) perfs = [] for query in test_qids: try: perf = cindex(test_labels[query], P[query]) perfs.append(perf) except UndefinedPerformance: pass test_perf = np.mean(perfs) print "test set performance: %f" % test_perf
import numpy as np from rlscore.learner.query_rankrls import QueryRankRLS from rlscore.utilities.reader import read_qids from rlscore.utilities.reader import read_sparse from rlscore.measure import cindex train_labels = np.loadtxt("./legacy_tests/data/rank_train.labels") test_labels = np.loadtxt("./legacy_tests/data/rank_test.labels") train_qids = read_qids("./legacy_tests/data/rank_train.qids") test_features = read_sparse("./legacy_tests/data/rank_test.features") train_features = read_sparse("./legacy_tests/data/rank_train.features") test_qids = read_qids("./legacy_tests/data/rank_test.qids") kwargs = {} kwargs["Y"] = train_labels kwargs["X"] = train_features kwargs["qids"] = train_qids kwargs["regparam"] = 1 learner = QueryRankRLS(**kwargs) P = learner.predict(test_features) from rlscore.measure.measure_utilities import UndefinedPerformance from rlscore.measure.measure_utilities import qids_to_splits test_qids = qids_to_splits(test_qids) perfs = [] for query in test_qids: try: perf = cindex(test_labels[query], P[query]) perfs.append(perf) except UndefinedPerformance: pass test_perf = np.mean(perfs) print("test set performance: %f" %test_perf)
def __init__(self, X, Y, regparam=1.0, qids=None, callbackfun=None, **kwargs): self.regparam = regparam self.callbackfun = None self.Y = array_tools.as_2d_array(Y) #Number of training examples self.size = Y.shape[0] if self.Y.shape[1] > 1: raise Exception( 'CGRankRLS does not currently work in multi-label mode') self.learn_from_labels = True self.callbackfun = callbackfun self.X = csc_matrix(X.T) if qids is not None: self.qids = map_qids(qids) self.splits = qids_to_splits(self.qids) else: self.qids = None regparam = self.regparam qids = self.qids if qids is not None: P = sp.lil_matrix((self.size, len(set(qids)))) for qidind in range(len(self.splits)): inds = self.splits[qidind] qsize = len(inds) for i in inds: P[i, qidind] = 1. / sqrt(qsize) P = P.tocsr() PT = P.tocsc().T else: P = 1. / sqrt(self.size) * (np.mat( np.ones((self.size, 1), dtype=np.float64))) PT = P.T X = self.X.tocsc() X_csr = X.tocsr() def mv(v): v = np.mat(v).T return X_csr * (X.T * v) - X_csr * (P * (PT * (X.T * v))) + regparam * v G = LinearOperator((X.shape[0], X.shape[0]), matvec=mv, dtype=np.float64) Y = self.Y if not self.callbackfun is None: def cb(v): self.A = np.mat(v).T self.b = np.mat(np.zeros((1, 1))) self.callbackfun.callback(self) else: cb = None XLY = X_csr * Y - X_csr * (P * (PT * Y)) try: self.A = np.mat(cg(G, XLY, callback=cb)[0]).T except Finished: pass self.b = np.mat(np.zeros((1, 1))) self.predictor = predictor.LinearPredictor(self.A, self.b)