Beispiel #1
0
from sparktk.loggers import log_load; log_load(__name__); del log_load

from sparktk.propobj import PropertiesObject
from sparktk import TkContext

__all__ = ["train", "load", "KMeansModel"]

def train(frame, columns, k=2, scalings=None, max_iter=20, epsilon=1e-4, seed=None, init_mode="k-means||"):
    """
    Creates a KMeansModel by training on the given frame

    :param frame: (Frame) frame of training data
    :param columns: (List[str]) names of columns containing the observations for training
    :param k: (Optional (int)) number of clusters
    :param scalings: (Optional(List[float])) column scalings for each of the observation columns.  The scaling value is multiplied by
     the corresponding value in the observation column
    :param max_iter: (Optional(int)) number of iterations for which the algorithm should run
    :param epsilon: (Optional(float)) distance threshold within which we consider k-means to have converged. Default is 1e-4.
     If all centers move less than this Euclidean distance, we stop iterating one run
    :param seed: Optional(long) seed for randomness
    :param init_mode: (Optional(str)) the initialization technique for the algorithm.   It can be either "random" to choose
     random points as initial clusters or "k-means||" to use a parallel variant of k-means++. Default is "k-means||
    :return: (KMeansModel) trained KMeans model

    """
    tc = frame._tc
    _scala_obj = get_scala_obj(tc)
    if isinstance(columns, basestring):
        columns = [columns]
    scala_columns = tc.jutils.convert.to_scala_vector_string(columns)
Beispiel #2
0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

from sparktk.frame.ops.classification_metrics_value import ClassificationMetricsValue
from sparktk.models.logistic_regression_summary_table import LogisticRegressionSummaryTable
from sparktk.loggers import log_load
from sparktk.propobj import PropertiesObject
from sparktk import TkContext
from sparktk.arguments import affirm_type

log_load(__name__)
del log_load

__all__ = ["train", "LogisticRegressionModel"]

def train(frame,
          observation_columns,
          label_column,
          frequency_column=None,
          num_classes=2,
          optimizer="LBFGS",
          compute_covariance=True,
          intercept=True,
          feature_scaling=False,
          threshold=0.5,
          reg_type="L2",
Beispiel #3
0
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

from sparktk.loggers import log_load
log_load(__name__)
del log_load

from sparktk.propobj import PropertiesObject
from sparktk import TkContext

__all__ = ["train", "load", "PcaModel"]


def train(frame, columns, mean_centered=True, k=None):
    """
    Creates a PcaModel by training on the given frame

    Parameters
    ----------
Beispiel #4
0
#  Copyright (c) 2016 Intel Corporation 
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

from sparktk.loggers import log_load; log_load(__name__); del log_load

from sparktk.propobj import PropertiesObject
from sparktk.frame.ops.classification_metrics_value import ClassificationMetricsValue
from sparktk import TkContext

__all__ = ["train", "load", "SvmModel"]

def train(frame,
          label_column,
          observation_columns,
          intercept = True,
          num_iterations = 100,
          step_size = 1.0,
          reg_type = None,
          reg_param = 0.01,