def __init__(self, n_estimators=10, # 30 max_depth=0, max_leaf_nodes=0, max_features=0, min_samples_leaf=1, # 5 # random_state=123456, # feature_type="float", # n_jobs=1, model_file=None, verbose=False ): if model_file is None: model_file = tempfile().name self.model_file = model_file self.fte_per_split = max_features self.verbose = verbose self.fit_argv = [WiseRFRegressor.wiserf_src] self.fit_argv += ["learn-regress"] self.fit_argv += ["--model-file", self.model_file] self.fit_argv += ["--criteria", "variance"] self.fit_argv += ["--num-trees", n_estimators] self.fit_argv += ["--max-depth", max_depth] self.fit_argv += ["--max-leaf-nodes", max_leaf_nodes] self.fit_argv += ["--min-instances-per-node", min_samples_leaf] self.pred_argv = [WiseRFRegressor.wiserf_src] self.pred_argv += ["test-regress"] self.pred_argv += ["--model-file", self.model_file]
def _make_wiserf_csvs(X, Y): infile = tempfile().name descfile = tempfile().name tmpX = DataFrame(X).add_prefix("x") if Y is not None: tmpY = DataFrame(Y).add_prefix("y") tmpX = tmpX.join(tmpY, how='outer') columns = tmpX.columns tmpX.to_csv(infile, header=False, index=False) with open(descfile, 'w') as outfile: outfile.write("label-type regression\n") outfile.write("num-features %d\n" % WiseRFRegressor._max_fte(X)) for indx, column in enumerate(columns): if column.startswith('y'): outfile.write("class-column %d\n" % indx) return [infile, descfile]
def _default(self, model, fit=False): if fit: self._modelfile = tempfile().name model = model or self._modelfile assert model is not None argv = map(str, self._argv) argv += ["--dimensionality", self._dim] argv += [("--test_file", "--training_file")[fit], self._data] argv += [("--model_in", "--model_out")[fit], model] if not fit: outfile = tempfile().name argv += ["--cluster_mapping_out", outfile] call(argv, verbose=self.verbose) X, y = read_svmlight(outfile) return X.todense() else: call(argv, verbose=self.verbose) self._modelfile = model
def __init__(self, learner_type="pegasos", loop_type="stochastic", eta_type="pegasos", prediction_type="linear", bias_term=True, iterations=100000, lambda_=0.1, passive_aggressive_c=None, passive_aggressive_lambda=0, perceptron_margin_size=1.0, rank_step_probability=0.5, hash_mask_bits=0, random_state=0, model_file=None, verbose=False ): assert learner_type in ["pegasos", "sgd-svm", "passive-aggressive", "margin-perceptron", "romma", "logreg-pegasos", "logreg", "least-mean-squares"] assert loop_type in ["stochastic", "balanced-stochastic", "rank", "roc", "query-norm-rank", "combined-ranking", "combined-roc"] assert eta_type in ["basic", "pegasos", "constant"] assert prediction_type in ["linear", "logistic"] if model_file is None: model_file = tempfile().name self.verbose = verbose self.fit_argv = [SofiaML.sofia_src] self.fit_argv += ["--learner_type", learner_type] self.fit_argv += ["--loop_type", loop_type] self.fit_argv += ["--eta_type", eta_type] self.fit_argv += ["--iterations", iterations] self.fit_argv += ["--prediction_type", prediction_type] if learner_type in ["pegasos", "sgd-svm", "logreg-pegasos"]: self.fit_argv += ["--lambda", lambda_] elif learner_type == "passive-aggressive": if passive_aggressive_c is not None: self.fit_argv += ["--passive_aggressive_c", passive_aggressive_c] self.fit_argv += ["--passive_aggressive_lambda", passive_aggressive_lambda] elif learner_type == "margin-perceptron": self.fit_argv += ["--perceptron-margin-size", perceptron_margin_size] if loop_type in ["combined-ranking", "combined-roc"]: self.fit_argv += ["--rank_step_probability", rank_step_probability] if not bias_term: self.fit_argv += ["--no_bias_term"] self.fit_argv += ["--hash_mask_bits", hash_mask_bits] self.fit_argv += ["--random_seed", random_state] self.fit_argv += ["--model_out", model_file] self.pred_argv = [SofiaML.sofia_src] if not bias_term: self.pred_argv += ["--no_bias_term"] self.pred_argv += ["--hash_mask_bits", hash_mask_bits] self.pred_argv += ["--random_seed", random_state] self.pred_argv += ["--model_in", model_file]
def predict(self, X, Y=None): """Returns model prediction of data points. Parameters ---------- X: numpy array, shape = [n_samples, n_features] Testing vectors, where n_samples is the number of samples and n_features is the number of features. Y: Not used for computation, optional """ argv = map(str, self.pred_argv) pred_file = tempfile().name argv += ["--predictions-file", pred_file] self._default(argv, X) tmpY = read_csv(pred_file, header=None, index_col=False, sep=" ") tmpY = tmpY.as_matrix()[0, :-1] print X.shape print tmpY.shape return tmpY
def predict(self, X, Y=None): """Returns model prediction of data points. Parameters ---------- X: numpy array, shape = [n_samples, n_features] Testing vectors, where n_samples is the number of samples and n_features is the number of features. Y: Not used for computation, optional """ assert self._dim == X.shape[1] data = make_svmlight(X, Y).name results_file = tempfile().name argv = map(str, self.pred_argv) argv += ["--test_file", data] argv += ["--results_file", results_file] self._default(argv) Y = read_csv(results_file, sep='\t', header=None, index_col=False) return Y.as_matrix()[:, 0]