# coding=utf-8 from __future__ import absolute_import, print_function from suanpan.docker import DockerComponent as dc from suanpan.docker.arguments import Int, String, Table, Bool, Float, ListOfString from arguments import SklearnModel from catboost import CatBoostRegressor @dc.input( Table( key="inputData", table="inputTable", partition="inputPartition", required=True ) ) @dc.column(ListOfString(key="featureColumns", default=[])) @dc.column(String(key="labelColumn", default="MEDV")) @dc.param( Int( key="iterations", default=1000, help="The maximum number of trees that can be built when solving machine learning problems.", ) ) @dc.param(Float(key="learningRate", default=0.03, help="The learning rate.")) @dc.param(Int(key="depth", default=6, help="Depth of the tree.")) @dc.param( Float( key="l2LeafReg", default=3.0, help="Coefficient at the L2 regularization term of the cost function.", )
# coding=utf-8 from __future__ import absolute_import, print_function from suanpan.docker import DockerComponent as dc from suanpan.docker.arguments import Int, String, Bool, Float, ListOfString, Table import lightgbm as lgb from arguments import SklearnModel @dc.input( Table(key="inputData", table="inputTable", partition="inputPartition", required=True)) @dc.column(ListOfString(key="featureColumns", default=["f1", "f2", "f3", "f4"])) @dc.column(String(key="labelColumn", default="label")) @dc.param( Int(key="maxDepth", default=-1, help="Maximum tree depth for base learners")) @dc.param( String( key="boostingType", default="gbdt", help="Specify which booster to use: 'goss', 'rf' or 'dart'", )) @dc.param( Int(key="numLeaves", default=31, help="Maximum tree leaves for base learners."))
# coding=utf-8 from __future__ import absolute_import, print_function from suanpan.docker import DockerComponent as dc from suanpan.docker.arguments import Csv, ListOfString, String, Int import statsmodels.api as sm from arguments import SklearnModel @dc.input(Csv(key="inputData")) @dc.column(ListOfString(key="featureColumns", default=["a", "b", "c", "d"])) @dc.column(String(key="labelColumn", default="e")) @dc.param( String( key="missing", default="none", help="Available options are ‘none’, ‘drop’, and ‘raise’.", ) ) @dc.param( String( key="method", default="lbfgs", help="‘newton’, ‘bfgs’, ‘lbfgs’, ‘powell’, ‘cg’, ‘ncg’, ‘basinhopping’," " ‘minimize’", ) ) @dc.param( Int(key="maxiter", default=35, help="The maximum number of iterations to perform.") ) @dc.param(Int(key="disp", default=1, help="Set to True to print convergence messages."))
if not predictColumns: predictColumns = [ "prediction_{}".format(str(i)) for i in range(actualColumnCount) ] if len(predictColumns) != actualColumnCount: raise RecursionError( "Actual predict column count is: {}, but the length of predict columns given is: {}" .format(actualColumnCount, len(predictColumns))) return predictColumns @dc.input(Csv(key="inputData")) @dc.input(SklearnModel(key="inputModel")) @dc.column(ListOfString(key="featureColumns", default=[])) @dc.column(ListOfString(key="predictColumns", default="prediction")) @dc.output(Csv(key="outputData")) def SPPredict(context): args = context.args df = args.inputData model = common_util.encapsulateModel(args.inputModel) featureColumns = args.featureColumns predictColumns = args.predictColumns X = df[featureColumns].values if len(featureColumns) > 0 else df.values predictions = model.predict(X) isEstimator = model.isEstimator()