Esempio n. 1
0
    def __getitem__(self, item):
        if not isinstance(item, (list, slice, int, basestring)):
            raise ValueError('Only support sub-setting on columns')

        col_names = self.colnames

        if isinstance(item, (int, basestring)):
            item = [item]
        elif isinstance(item, slice):
            item = range(*item.indices(len(col_names)))

        assert isinstance(item, list)

        projected_cols = []
        for x in item:
            if isinstance(x, int):
                if x < 0 or x >= len(col_names):
                    raise ValueError('Invalid column index: {}'.format(x))
                projected_cols.append(col_names[x])
            elif isinstance(x, basestring):
                if x not in col_names:
                    raise ValueError('Invalid column name: {}'.format(x))
                projected_cols.append(x)

        return DistributedDataFrame(self._jddf.getViewHandler().project(
                util.to_java_array(projected_cols, self._gateway_client.jvm.String, self._gateway_client)),
                self._gateway_client)
Esempio n. 2
0
    def predict(self, data):
        """
        Predict the result of a sample using this ML model

        :param data:  the candidate sample data to be predicted, vector is expected
        :return: predict result, class tag for classification,
        """
        return self._jml_model.predict(util.to_java_array(data, self._gateway_client.jvm.double, self._gateway_client))
Esempio n. 3
0
File: ml.py Progetto: datascibox/DDF
def logistic_regression_gd(data, step_size=1.0, max_iters=10):
    """

    :param data:
    :param step_size:
    :param max_iters:
    :return:
    """
    ml_obj = java_gateway.get_field(data._jddf, 'ML')
    gateway = data._gateway_client
    model = ml_obj.train('logisticRegressionWithSGD',
                         util.to_java_array([max_iters, step_size],
                                            gateway.jvm.Object, gateway))
    weights = [float(model.getRawModel().intercept())] + list(model.getRawModel().weights().toArray())
    weights = pd.DataFrame(data=[weights], columns=['Intercept'] + data.colnames[:-1])
    return LogisticRegressionModel(model, gateway, weights)