Esempio n. 1
0
    def __init__(self,
                 n_estimators=10,  # 30
                 max_depth=0,
                 max_leaf_nodes=0,
                 max_features=0,
                 min_samples_leaf=1,  # 5
                 # random_state=123456,
                 # feature_type="float",
                 # n_jobs=1,
                 model_file=None,
                 verbose=False
                 ):
        if model_file is None:
            model_file = tempfile().name
        self.model_file = model_file
        self.fte_per_split = max_features
        self.verbose = verbose

        self.fit_argv = [WiseRFRegressor.wiserf_src]
        self.fit_argv += ["learn-regress"]
        self.fit_argv += ["--model-file", self.model_file]
        self.fit_argv += ["--criteria", "variance"]
        self.fit_argv += ["--num-trees", n_estimators]
        self.fit_argv += ["--max-depth", max_depth]
        self.fit_argv += ["--max-leaf-nodes", max_leaf_nodes]
        self.fit_argv += ["--min-instances-per-node", min_samples_leaf]

        self.pred_argv = [WiseRFRegressor.wiserf_src]
        self.pred_argv += ["test-regress"]
        self.pred_argv += ["--model-file", self.model_file]
Esempio n. 2
0
    def _make_wiserf_csvs(X, Y):
        infile = tempfile().name
        descfile = tempfile().name

        tmpX = DataFrame(X).add_prefix("x")
        if Y is not None:
            tmpY = DataFrame(Y).add_prefix("y")
            tmpX = tmpX.join(tmpY, how='outer')
        columns = tmpX.columns
        tmpX.to_csv(infile, header=False, index=False)
        with open(descfile, 'w') as outfile:
            outfile.write("label-type regression\n")
            outfile.write("num-features %d\n" % WiseRFRegressor._max_fte(X))
            for indx, column in enumerate(columns):
                if column.startswith('y'):
                    outfile.write("class-column %d\n" % indx)
        return [infile, descfile]
Esempio n. 3
0
 def _default(self, model, fit=False):
     if fit:
         self._modelfile = tempfile().name
     model = model or self._modelfile
     assert model is not None
     argv = map(str, self._argv)
     argv += ["--dimensionality", self._dim]
     argv += [("--test_file", "--training_file")[fit], self._data]
     argv += [("--model_in", "--model_out")[fit], model]
     if not fit:
         outfile = tempfile().name
         argv += ["--cluster_mapping_out", outfile]
         call(argv, verbose=self.verbose)
         X, y = read_svmlight(outfile)
         return X.todense()
     else:
         call(argv, verbose=self.verbose)
         self._modelfile = model
Esempio n. 4
0
    def __init__(self,
                 learner_type="pegasos",
                 loop_type="stochastic",
                 eta_type="pegasos",
                 prediction_type="linear",
                 bias_term=True,
                 iterations=100000,
                 lambda_=0.1,
                 passive_aggressive_c=None,
                 passive_aggressive_lambda=0,
                 perceptron_margin_size=1.0,
                 rank_step_probability=0.5,
                 hash_mask_bits=0,
                 random_state=0,
                 model_file=None,
                 verbose=False
                 ):
        assert learner_type in ["pegasos", "sgd-svm", "passive-aggressive", "margin-perceptron", "romma", "logreg-pegasos", "logreg", "least-mean-squares"]
        assert loop_type in ["stochastic", "balanced-stochastic", "rank", "roc", "query-norm-rank", "combined-ranking", "combined-roc"]
        assert eta_type in ["basic", "pegasos", "constant"]
        assert prediction_type in ["linear", "logistic"]
        if model_file is None:
            model_file = tempfile().name
        self.verbose = verbose

        self.fit_argv = [SofiaML.sofia_src]
        self.fit_argv += ["--learner_type", learner_type]
        self.fit_argv += ["--loop_type", loop_type]
        self.fit_argv += ["--eta_type", eta_type]
        self.fit_argv += ["--iterations", iterations]
        self.fit_argv += ["--prediction_type", prediction_type]
        if learner_type in ["pegasos", "sgd-svm", "logreg-pegasos"]:
            self.fit_argv += ["--lambda", lambda_]
        elif learner_type == "passive-aggressive":
            if passive_aggressive_c is not None:
                self.fit_argv += ["--passive_aggressive_c", passive_aggressive_c]
            self.fit_argv += ["--passive_aggressive_lambda", passive_aggressive_lambda]
        elif learner_type == "margin-perceptron":
            self.fit_argv += ["--perceptron-margin-size", perceptron_margin_size]
        if loop_type in ["combined-ranking", "combined-roc"]:
            self.fit_argv += ["--rank_step_probability", rank_step_probability]
        if not bias_term:
            self.fit_argv += ["--no_bias_term"]

        self.fit_argv += ["--hash_mask_bits", hash_mask_bits]
        self.fit_argv += ["--random_seed", random_state]
        self.fit_argv += ["--model_out", model_file]

        self.pred_argv = [SofiaML.sofia_src]
        if not bias_term:
            self.pred_argv += ["--no_bias_term"]
        self.pred_argv += ["--hash_mask_bits", hash_mask_bits]
        self.pred_argv += ["--random_seed", random_state]
        self.pred_argv += ["--model_in", model_file]
Esempio n. 5
0
    def predict(self, X, Y=None):
        """Returns model prediction of data points.

        Parameters
        ----------
        X: numpy array, shape = [n_samples, n_features]
            Testing vectors, where n_samples is the number of samples and n_features is the number of features.
        Y: Not used for computation, optional
        """
        argv = map(str, self.pred_argv)
        pred_file = tempfile().name
        argv += ["--predictions-file", pred_file]
        self._default(argv, X)
        tmpY = read_csv(pred_file, header=None, index_col=False, sep=" ")
        tmpY = tmpY.as_matrix()[0, :-1]
        print X.shape
        print tmpY.shape
        return tmpY
Esempio n. 6
0
    def predict(self, X, Y=None):
        """Returns model prediction of data points.

        Parameters
        ----------
        X: numpy array, shape = [n_samples, n_features]
            Testing vectors, where n_samples is the number of samples and n_features is the number of features.
        Y: Not used for computation, optional
        """
        assert self._dim == X.shape[1]
        data = make_svmlight(X, Y).name
        results_file = tempfile().name
        argv = map(str, self.pred_argv)
        argv += ["--test_file", data]
        argv += ["--results_file", results_file]
        self._default(argv)
        Y = read_csv(results_file, sep='\t', header=None, index_col=False)
        return Y.as_matrix()[:, 0]