Esempio n. 1
0
# coding=utf-8
from __future__ import absolute_import, print_function

from suanpan.docker import DockerComponent as dc
from suanpan.docker.arguments import Int, String, Table, Bool, Float, ListOfString
from arguments import SklearnModel
from catboost import CatBoostRegressor


@dc.input(
    Table(
        key="inputData", table="inputTable", partition="inputPartition", required=True
    )
)
@dc.column(ListOfString(key="featureColumns", default=[]))
@dc.column(String(key="labelColumn", default="MEDV"))
@dc.param(
    Int(
        key="iterations",
        default=1000,
        help="The maximum number of trees that can be built when solving machine learning problems.",
    )
)
@dc.param(Float(key="learningRate", default=0.03, help="The learning rate."))
@dc.param(Int(key="depth", default=6, help="Depth of the tree."))
@dc.param(
    Float(
        key="l2LeafReg",
        default=3.0,
        help="Coefficient at the L2 regularization term of the cost function.",
    )
Esempio n. 2
0
# coding=utf-8
from __future__ import absolute_import, print_function

from suanpan.docker import DockerComponent as dc
from suanpan.docker.arguments import Int, String, Bool, Float, ListOfString, Table
import lightgbm as lgb
from arguments import SklearnModel


@dc.input(
    Table(key="inputData",
          table="inputTable",
          partition="inputPartition",
          required=True))
@dc.column(ListOfString(key="featureColumns", default=["f1", "f2", "f3",
                                                       "f4"]))
@dc.column(String(key="labelColumn", default="label"))
@dc.param(
    Int(key="maxDepth",
        default=-1,
        help="Maximum tree depth for base learners"))
@dc.param(
    String(
        key="boostingType",
        default="gbdt",
        help="Specify which booster to use: 'goss', 'rf' or 'dart'",
    ))
@dc.param(
    Int(key="numLeaves",
        default=31,
        help="Maximum tree leaves for base learners."))
Esempio n. 3
0
# coding=utf-8
from __future__ import absolute_import, print_function

from suanpan.docker import DockerComponent as dc
from suanpan.docker.arguments import Csv, ListOfString, String, Int
import statsmodels.api as sm
from arguments import SklearnModel


@dc.input(Csv(key="inputData"))
@dc.column(ListOfString(key="featureColumns", default=["a", "b", "c", "d"]))
@dc.column(String(key="labelColumn", default="e"))
@dc.param(
    String(
        key="missing",
        default="none",
        help="Available options are ‘none’, ‘drop’, and ‘raise’.",
    )
)
@dc.param(
    String(
        key="method",
        default="lbfgs",
        help="‘newton’, ‘bfgs’, ‘lbfgs’, ‘powell’, ‘cg’, ‘ncg’, ‘basinhopping’,"
             " ‘minimize’",
    )
)
@dc.param(
    Int(key="maxiter", default=35, help="The maximum number of iterations to perform.")
)
@dc.param(Int(key="disp", default=1, help="Set to True to print convergence messages."))
Esempio n. 4
0
    if not predictColumns:
        predictColumns = [
            "prediction_{}".format(str(i)) for i in range(actualColumnCount)
        ]

    if len(predictColumns) != actualColumnCount:
        raise RecursionError(
            "Actual predict column count is: {}, but the length of predict columns given is: {}"
            .format(actualColumnCount, len(predictColumns)))

    return predictColumns


@dc.input(Csv(key="inputData"))
@dc.input(SklearnModel(key="inputModel"))
@dc.column(ListOfString(key="featureColumns", default=[]))
@dc.column(ListOfString(key="predictColumns", default="prediction"))
@dc.output(Csv(key="outputData"))
def SPPredict(context):
    args = context.args

    df = args.inputData

    model = common_util.encapsulateModel(args.inputModel)
    featureColumns = args.featureColumns
    predictColumns = args.predictColumns

    X = df[featureColumns].values if len(featureColumns) > 0 else df.values

    predictions = model.predict(X)
    isEstimator = model.isEstimator()