def svm_train(X, y, b, alpha, n_samples, n_features, learner, loop, eta, max_iter=100, step_probability=0.5): """ Minimizes an expression of the form Loss(X, y, b) + 0.5 * alpha * (||w|| ** 2) where Loss is an Hinge loss defined on pairs of images Parameters ---------- X : input data y : target labels b : blocks (aka query_id) alpha: float loop : {'rank', 'combined-ranking', 'roc', 'stochastic', 'balanced-stochastic'} Returns ------- coef """ if isinstance(X, bstring): if n_features is None: n_features = 2**17 # the default in sofia-ml TODO: parse file to see w = _sofia_ml.train(X, n_features, alpha, max_iter, False, learner.value, loop.value, eta.value, step_probability) elif isinstance(X, np.ndarray): if n_features is None: n_features = X.shape[1] if n_samples is None: n_samples = X.shape[0] w = _sofia_ml.train_fast(np.float64(X), np.float64(y), n_samples, n_features, alpha, max_iter, False, learner.value, loop.value, eta.value, step_probability) else: if n_features is None: n_features = X.shape[1] with tempfile.NamedTemporaryFile() as f: datasets.dump_svmlight_file(X, y, f.name, query_id=b) w = _sofia_ml.train(f.name, n_features, alpha, max_iter, False, learner.value, loop.value, eta.value, step_probability) return w
def sgd_train(X, y, b, alpha, n_features=None, model='rank', max_iter=100, step_probability=0.5): """ Minimizes an expression of the form Loss(X, y, b) + 0.5 * alpha * (||w|| ** 2) where Loss is an Hinge loss defined on pairs of images Parameters ---------- X : input data y : target labels b : blocks (aka query_id) alpha: float model : {'rank', 'combined-ranking', 'roc'} Returns ------- coef None """ if isinstance(X, bstring): if n_features is None: n_features = 2**17 # the default in sofia-ml TODO: parse file to see w = _sofia_ml.train(X, n_features, alpha, max_iter, False, model, step_probability) else: with tempfile.NamedTemporaryFile() as f: datasets.dump_svmlight_file(X, y, f.name, query_id=b) w = _sofia_ml.train(f.name, X.shape[1], alpha, max_iter, False, model, step_probability) return w, None
def train(X, y, alpha, query_id, max_iter=100, model='rank', step_probability=0.5): """ model : {'rank', 'combined-ranking'} """ if query_id is None: query_id = np.ones(y.size) with tempfile.NamedTemporaryFile() as f: datasets.dump_svmlight_file(X, y, f.name, query_id=query_id) w = _sofia_ml.train(f.name, X.shape[1], alpha, max_iter, False, model, step_probability) return w
def sgd_train(X, y, b, alpha, n_features=None, model='rank', max_iter=100, step_probability=0.5): """ Minimizes an expression of the form Loss(X, y, b) + 0.5 * alpha * (||w|| ** 2) where Loss is an Hinge loss defined on pairs of images Parameters ---------- X : input data y : target labels b : blocks (aka query_id) alpha: float model : {'rank', 'combined-ranking', 'roc'} Returns ------- coef None """ if isinstance(X, bstring): if n_features is None: n_features = 2 ** 17 # the default in sofia-ml TODO: parse file to see w = _sofia_ml.train(X, n_features, alpha, max_iter, False, model, step_probability) else: with tempfile.NamedTemporaryFile() as f: datasets.dump_svmlight_file(X, y, f.name, query_id=b) w = _sofia_ml.train(f.name, X.shape[1], alpha, max_iter, False, model, step_probability) return w, None