예제 #1
0
# coding=utf-8
from __future__ import absolute_import, print_function

import os
from suanpan.app import app
from suanpan.docker.arguments import Folder, String
from suanpan.storage import StorageProxy


@app.param(String(key="storageType", default="oss"))
@app.param(
    String(key="folder",
           default="man_face_25k",
           help="girl_face_50k  man_face_25k"))
@app.output(Folder(key="modelDir"))
def SPModels(context):
    args = context.args

    storage = StorageProxy(None, None)
    storage.setBackend(type=args.storageType)

    storage.download(os.path.join("common/model/facelab", args.folder),
                     args.modelDir)

    return args.modelDir


if __name__ == "__main__":
    SPModels()  # pylint: disable=no-value-for-parameter
예제 #2
0
# coding=utf-8
from __future__ import absolute_import, print_function

from suanpan.docker import DockerComponent as dc
from suanpan.docker.arguments import Int, Csv, String, Bool
import statsmodels.api as sm
import pandas as pd
from arguments import SklearnModel


@dc.input(Csv(key="inputData"))
@dc.column(Bool(key="timestampIndex", default=False))
@dc.column(String(key="timestampColumn", default="date"))
@dc.column(String(key="labelColumn", default="y"))
@dc.param(
    String(
        key="missing",
        default="none",
        help="Available options are ‘none’, ‘drop’, and ‘raise’.",
    ))
@dc.param(
    String(
        key="trend",
        default="c",
        help=
        "Whether to include a constant or not. ‘c’ includes constant, ‘nc’ no constant.",
    ))
@dc.param(String(key="method", default="cmle", help="‘cmle’, ‘mle’"))
@dc.param(
    Int(key="maxiter",
        default=35,
예제 #3
0
# coding=utf-8
from __future__ import absolute_import, print_function

from suanpan.docker import DockerComponent as dc
from suanpan.docker.arguments import Int, String, Table, Bool, Float, ListOfString
from arguments import SklearnModel
from catboost import CatBoostRegressor


@dc.input(
    Table(
        key="inputData", table="inputTable", partition="inputPartition", required=True
    )
)
@dc.column(ListOfString(key="featureColumns", default=[]))
@dc.column(String(key="labelColumn", default="MEDV"))
@dc.param(
    Int(
        key="iterations",
        default=1000,
        help="The maximum number of trees that can be built when solving machine learning problems.",
    )
)
@dc.param(Float(key="learningRate", default=0.03, help="The learning rate."))
@dc.param(Int(key="depth", default=6, help="Depth of the tree."))
@dc.param(
    Float(
        key="l2LeafReg",
        default=3.0,
        help="Coefficient at the L2 regularization term of the cost function.",
    )
예제 #4
0
from __future__ import absolute_import, print_function

from suanpan.docker import DockerComponent as dc
from suanpan.docker.arguments import Int, String, Bool, Float, ListOfString, Table
import lightgbm as lgb
from arguments import SklearnModel


@dc.input(
    Table(key="inputData",
          table="inputTable",
          partition="inputPartition",
          required=True))
@dc.column(ListOfString(key="featureColumns", default=["f1", "f2", "f3",
                                                       "f4"]))
@dc.column(String(key="labelColumn", default="label"))
@dc.param(
    Int(key="maxDepth",
        default=-1,
        help="Maximum tree depth for base learners"))
@dc.param(
    String(
        key="boostingType",
        default="gbdt",
        help="Specify which booster to use: 'goss', 'rf' or 'dart'",
    ))
@dc.param(
    Int(key="numLeaves",
        default=31,
        help="Maximum tree leaves for base learners."))
@dc.param(
예제 #5
0
# coding=utf-8
from __future__ import absolute_import, print_function

from suanpan.docker import DockerComponent as dc
from suanpan.docker.arguments import Csv, ListOfString, String, Int
import statsmodels.api as sm
from arguments import SklearnModel


@dc.input(Csv(key="inputData"))
@dc.column(ListOfString(key="featureColumns", default=["a", "b", "c", "d"]))
@dc.column(String(key="labelColumn", default="e"))
@dc.param(
    String(
        key="missing",
        default="none",
        help="Available options are ‘none’, ‘drop’, and ‘raise’.",
    )
)
@dc.param(
    String(
        key="method",
        default="lbfgs",
        help="‘newton’, ‘bfgs’, ‘lbfgs’, ‘powell’, ‘cg’, ‘ncg’, ‘basinhopping’,"
             " ‘minimize’",
    )
)
@dc.param(
    Int(key="maxiter", default=35, help="The maximum number of iterations to perform.")
)
@dc.param(Int(key="disp", default=1, help="Set to True to print convergence messages."))
예제 #6
0
# coding=utf-8
from __future__ import absolute_import, print_function

from suanpan.docker import DockerComponent as dc
from suanpan.docker.arguments import Int, String, Csv, Bool, Float, ListOfString
from catboost import CatBoostClassifier
from arguments import SklearnModel


@dc.input(Csv(key="inputData", required=True))
@dc.column(ListOfString(key="featureColumns", default=["a", "b", "c", "d"]))
@dc.column(String(key="labelColumn", default="e"))
@dc.param(
    Int(
        key="iterations",
        default=1000,
        help=
        "The maximum number of trees that can be built when solving machine learning problems.",
    ))
@dc.param(Float(key="learningRate", default=0.03, help="The learning rate."))
@dc.param(Int(key="depth", default=6, help="Depth of the tree."))
@dc.param(
    Float(
        key="l2LeafReg",
        default=3.0,
        help="Coefficient at the L2 regularization term of the cost function.",
    ))
@dc.param(
    Float(
        key="rsm",
        default=1,
예제 #7
0
# coding=utf-8
from __future__ import absolute_import, print_function

from suanpan.docker import DockerComponent as dc
from suanpan.docker.arguments import Csv, ListOfString, String
import statsmodels.api as sm
from arguments import SklearnModel


@dc.input(Csv(key="inputData"))
@dc.column(ListOfString(key="featureColumns", default=["a", "b", "c", "d"]))
@dc.column(String(key="labelColumn", default="e"))
@dc.param(
    String(
        key="missing",
        default="none",
        help="Available options are ‘none’, ‘drop’, and ‘raise’.",
    ))
@dc.param(String(key="method", default="pinv", help="Can be “pinv”, “qr”. "))
@dc.output(SklearnModel(key="outputModel"))
def SPGLS(context):
    # 从 Context 中获取相关数据
    args = context.args
    # 查看上一节点发送的 args.inputData 数据
    df = args.inputData

    featureColumns = args.featureColumns
    labelColumn = args.labelColumn

    features = df[featureColumns].values
    label = df[labelColumn].values
예제 #8
0
# coding=utf-8
from __future__ import absolute_import, print_function

from suanpan.docker import DockerComponent as dc
from suanpan.docker.arguments import Folder, String
from suanpan.storage import storage

DATESET_PATH_PREFIX = "common/data"


@dc.param(
    String(
        key="dataset",
        required=True,
        help=
        "allowed values: ['boston_housing', 'breast_cancer', 'california_housing', "
        "'covertype', 'diabetes', 'digits', 'iris', 'kddcup', 'linnerud', 'wine', 'titanic'"
        ", 'sun_spots', 'macrodata']",
    ))
@dc.output(Folder(key="outputDir"))
def SPClassicDatasets(context):
    args = context.args

    remotePath = storage.storagePathJoin(DATESET_PATH_PREFIX, args.dataset)
    storage.download(remotePath, args.outputDir)

    return args.outputDir


if __name__ == "__main__":
    SPClassicDatasets()  # pylint: disable=no-value-for-parameter
예제 #9
0
# coding=utf-8
from __future__ import absolute_import, print_function

import os
from suanpan.app import app
from suanpan.docker.arguments import Folder, String
from suanpan.storage import StorageProxy


@app.param(String(key="storageType", default="oss"))
@app.param(
    String(key="folder", default="man_1", help="girl_0  man_0 girl_1 man_1"))
@app.output(Folder(key="outputData"))
def SPMaterial(context):
    args = context.args

    storage = StorageProxy(None, None)
    storage.setBackend(type=args.storageType)

    storage.download(
        os.path.join("common/data/facelab_material", args.folder, "data.mp4"),
        os.path.join(args.outputData, "data.mp4"),
    )

    return args.outputData


if __name__ == "__main__":
    SPMaterial()  # pylint: disable=no-value-for-parameter
예제 #10
0
import numpy as np
from statsmodels.tsa.ar_model import ARResultsWrapper
from statsmodels.tsa.statespace.sarimax import SARIMAXResultsWrapper
from statsmodels.tsa.arima_model import ARMAResultsWrapper, ARIMAResultsWrapper
from statsmodels.regression.linear_model import RegressionResultsWrapper
from statsmodels.discrete.discrete_model import (
    BinaryResultsWrapper,
    MultinomialResultsWrapper,
)
from arguments import SklearnModel


@dc.input(Csv(key="inputData"))
@dc.input(SklearnModel(key="inputModel"))
@dc.column(ListOfString(key="featureColumns", default=["a", "b", "c", "d"]))
@dc.column(String(key="predictColumn", default="prediction"))
@dc.param(String(key="start", default="2000-11-30"))
@dc.param(String(key="end", default="2001-05-31"))
@dc.param(Bool(key="dynamic", default=True))
@dc.output(Csv(key="outputData"))
def SPStatsPredict(context):
    args = context.args

    model = args.inputModel
    if isinstance(
            model,
        (
            ARResultsWrapper,
            ARMAResultsWrapper,
            ARIMAResultsWrapper,
            SARIMAXResultsWrapper,
예제 #11
0
# coding=utf-8
from __future__ import absolute_import, print_function

from suanpan.docker import DockerComponent as dc
from suanpan.docker.arguments import Int, Csv, ListOfInt, String, Bool
import statsmodels.api as sm
import pandas as pd
from arguments import SklearnModel


@dc.input(Csv(key="inputData"))
@dc.column(Bool(key="timestampIndex", default=False))
@dc.column(String(key="timestampColumn", default="date"))
@dc.column(String(key="labelColumn", default="y"))
@dc.param(
    ListOfInt(
        key="order",
        default=[2, 0],
        help="The (p,q) order of the model for the number of AR parameters, differences, and MA parameters to use.",
    )
)
@dc.param(
    String(
        key="trend",
        default="c",
        help="Whether to include a constant or not. ‘c’ includes constant, ‘nc’ no constant.",
    )
)
@dc.param(
    String(
        key="method",
예제 #12
0
@dc.param(
    ListOfFloat(
        key="ma",
        default=[0.65, 0.35],
        help=
        "coefficient for moving-average lag polynomial, including zero lag",
    ))
@dc.param(
    Int(key="nsample", default=250, help="length of simulated time series"))
@dc.param(Float(key="sigma", default=1.0, help="standard deviation of noise"))
@dc.param(Int(key="randomSeed", default=12345, help="random seed"))
@dc.param(Bool(key="dateCol", default=True, help="date in dataset"))
@dc.param(
    String(
        key="startDate",
        default="19800131",
        help="The first abbreviated date, for instance, '1965q1' or '1965m1'",
    ))
@dc.param(String(key="freq", default="M", help="DateOffset"))
@dc.output(Csv(key="outputData"))
def SPARMASample(context):
    # 从 Context 中获取相关数据
    args = context.args
    # 查看上一节点发送的 args.inputData 数据
    np.random.seed(args.randomSeed)
    arparams = np.array(args.ar)
    maparams = np.array(args.ma)
    nobs = args.nsample
    sample = arma_generate_sample(arparams, maparams, nobs, sigma=args.sigma)
    if args.dateCol:
        dates = pd.date_range(start=args.startDate,
예제 #13
0
# coding=utf-8
from __future__ import absolute_import, print_function

from suanpan.docker import DockerComponent as dc
from suanpan.docker.arguments import Int, String, Csv, Bool, Float, ListOfString
from catboost import CatBoostClassifier
from arguments import SklearnModel


@dc.input(Csv(key="inputData", required=True))
@dc.column(ListOfString(key="featureColumns", default=["a", "b", "c", "d"]))
@dc.column(String(key="labelColumn", default="e"))
@dc.param(
    Int(
        key="iterations",
        default=1000,
        help=
        "The maximum number of trees that can be built when solving machine learning problems.",
    ))
@dc.param(Float(key="learningRate", default=0.03, help="The learning rate."))
@dc.param(Int(key="depth", default=6, help="Depth of the tree."))
@dc.param(
    Float(
        key="l2LeafReg",
        default=3.0,
        help="Coefficient at the L2 regularization term of the cost function.",
    ))
@dc.param(
    Float(
        key="rsm",
        default=1,
예제 #14
0
# coding=utf-8
from __future__ import absolute_import, print_function

from suanpan.docker import DockerComponent as dc
from suanpan.docker.arguments import Csv, ListOfString, String, Int
import statsmodels.api as sm
from arguments import SklearnModel


@dc.input(Csv(key="inputData"))
@dc.column(ListOfString(key="featureColumns", default=["a", "b", "c", "d"]))
@dc.column(String(key="labelColumn", default="e"))
@dc.param(
    String(
        key="family",
        default="Gaussian",
        help=
        "The default is Gaussian. Binomial, Gamma, Gaussian, InverseGaussian"
        "NegativeBinomial, Poisson, Tweedie",
    ))
@dc.param(
    String(
        key="missing",
        default="none",
        help="Available options are ‘none’, ‘drop’, and ‘raise’.",
    ))
@dc.param(Int(key="maxiter", default=100, help="Default is 100."))
@dc.output(SklearnModel(key="outputModel"))
def SPGLM(context):
    # 从 Context 中获取相关数据
    args = context.args
예제 #15
0
# coding=utf-8
from __future__ import absolute_import, print_function

from suanpan.docker import DockerComponent as dc
from suanpan.docker.arguments import Csv, ListOfString, String, Int
import statsmodels.api as sm
import statsmodels
from arguments import SklearnModel


@dc.input(Csv(key="inputData"))
@dc.column(ListOfString(key="featureColumns", default=["a", "b", "c", "d"]))
@dc.column(String(key="labelColumn", default="e"))
@dc.param(
    String(
        key="M",
        default="HuberT",
        help=
        "The default is LeastSquares. HuberT, RamsayE, AndrewWave, TrimmedMean"
        "Hampel, TukeyBiweight",
    ))
@dc.param(
    String(
        key="missing",
        default="none",
        help="Available options are ‘none’, ‘drop’, and ‘raise’.",
    ))
@dc.param(
    Int(key="maxiter",
        default=50,
        help="The maximum number of iterations to try."))
예제 #16
0
# coding=utf-8
from __future__ import absolute_import, print_function

from suanpan.docker import DockerComponent as dc
from suanpan.docker.arguments import Int, Csv, ListOfInt, String, Bool
import statsmodels.api as sm
import pandas as pd
from arguments import SklearnModel


@dc.input(Csv(key="inputData"))
@dc.column(Bool(key="timestampIndex", default=False))
@dc.column(String(key="timestampColumn", default="date"))
@dc.column(String(key="labelColumn", default="y"))
@dc.param(
    ListOfInt(
        key="order",
        default=[1, 0, 0],
        help="The (p,d,q) order of the model for the number of AR parameters, "
        "differences, and MA parameters.",
    )
)
@dc.param(
    ListOfInt(
        key="seasonalOrder",
        default=[0, 0, 0, 0],
        help="The (P,D,Q,s) order of the seasonal component of the model for the"
        " AR parameters, differences, MA parameters, and periodicity.",
    )
)
@dc.param(