def fit(self, X, Y): """Fits linear model on data points. Warning: serializing the classifier object won't save the model; specifying a model file is necessary. Parameters ---------- X: numpy array, shape = [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. Y: numpy array Target Values. """ self._dim = X.shape[1] data = make_svmlight(X, Y).name argv = map(str, self.fit_argv) argv += ["--training_file", data] self._default(argv)
def predict(self, X, Y=None): """Returns model prediction of data points. Parameters ---------- X: numpy array, shape = [n_samples, n_features] Testing vectors, where n_samples is the number of samples and n_features is the number of features. Y: Not used for computation, optional """ assert self._dim == X.shape[1] data = make_svmlight(X, Y).name results_file = tempfile().name argv = map(str, self.pred_argv) argv += ["--test_file", data] argv += ["--results_file", results_file] self._default(argv) Y = read_csv(results_file, sep='\t', header=None, index_col=False) return Y.as_matrix()[:, 0]
def _preprocess(self, X): X = make_matrix(X) temporary_file = make_svmlight(X).name self._dim = str(X.shape[1]) self._data = temporary_file