Python Pipeline.feature_namesの例

プログラミング言語: Python

名前空間/パッケージ名: sklearn.pipeline

クラス/型: Pipeline

メソッド/関数: feature_names

hotexamples.comのコード掲載数: 4

Python Pipeline.feature_names - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのsklearn.pipeline.Pipeline.feature_namesの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

fit_transform(30)

Pipeline(30)

fit(30)

decision_function(30)

fit_predict(28)

__init__(11)

configure(7)

feature_names(4)

_pre_transform(4)

fit_resample(3)

fit_sync(3)

_transform(3)

_fit(2)

append(2)

fitness(1)

C(1)

fitting_time(1)

inclination(1)

moment_of_inertia_tensor(1)

position_angle(1)

scale(1)

get_axes(1)

fcsdata_scaler(1)

finalize(1)

features(1)

feature_keys(1)

FeatureUnion(1)

executePipeline(1)

evaluate(1)

estimatorChoices(1)

addStep(1)

accur(1)

_check_fit_params(1)

__str__(1)

__getattribute__(1)

score_(1)

コード例 #1

ファイルを表示

def train(data_conf, model_conf, **kwargs):
    hyperparams = model_conf["hyperParameters"]

    create_context(host=os.environ["AOA_CONN_HOST"],
                   username=os.environ["AOA_CONN_USERNAME"],
                   password=os.environ["AOA_CONN_PASSWORD"],
                   database=data_conf["schema"] if "schema" in data_conf and data_conf["schema"] != "" else None)

    feature_names = ["NumTimesPrg", "PlGlcConc", "BloodP", "SkinThick", "TwoHourSerIns", "BMI", "DiPedFunc", "Age"]
    target_name = "HasDiabetes"

    # read training dataset from Teradata and convert to pandas
    train_df = DataFrame(data_conf["table"])
    train_df = train_df.select([feature_names + [target_name]])
    train_pdf = train_df.to_pandas()

    # split data into X and y
    X_train = train_pdf.drop(target_name, 1)
    y_train = train_pdf[target_name]

    print("Starting training...")

    # fit model to training data
    model = Pipeline([('scaler', MinMaxScaler()),
                      ('xgb', XGBClassifier(eta=hyperparams["eta"],
                                            max_depth=hyperparams["max_depth"]))])
    # xgboost saves feature names but lets store on pipeline for easy access later
    model.feature_names = feature_names
    model.target_name = target_name

    model.fit(X_train, y_train)

    print("Finished training")

    # export model artefacts
    joblib.dump(model, "artifacts/output/model.joblib")

    # we can also save as pmml so it can be used for In-Vantage scoring etc.
    xgboost_to_pmml(pipeline=model, col_names=feature_names, target_name=target_name,
                    pmml_f_name="artifacts/output/model.pmml")

    print("Saved trained model")

    from xgboost import plot_importance
    model["xgb"].get_booster().feature_names = feature_names
    plot_importance(model["xgb"].get_booster(), max_num_features=10)
    save_plot("feature_importance.png")

    feature_importance = model["xgb"].get_booster().get_score(importance_type="weight")
    stats.record_stats(train_df,
                       features=feature_names,
                       predictors=["HasDiabetes"],
                       categorical=["HasDiabetes"],
                       importance=feature_importance,
                       category_labels={"HasDiabetes": {0: "false", 1: "true"}})

コード例 #2

ファイルを表示

def train(data_conf, model_conf, **kwargs):
    hyperparams = model_conf["hyperParameters"]

    feature_names = [
        "NumTimesPrg", "PlGlcConc", "BloodP", "SkinThick", "TwoHourSerIns",
        "BMI", "DiPedFunc", "Age"
    ]
    target_name = "HasDiabetes"

    # in a real world scenario, you would read from S3, HDFS, Teradata,
    # etc but for demo reading from url. we could read via pandas.read_csv but just to show pyspark ...
    urllib.request.urlretrieve(data_conf["url"], "/tmp/data.csv")
    all_columns = feature_names + [target_name]
    train_df = spark.read.format("csv")\
        .option("inferSchema", "true")\
        .load("/tmp/data.csv")\
        .toDF(*all_columns)

    # do feature eng in spark / joins whatever reason you're using pyspark...
    # split into test and train
    train_df = train_df.randomSplit([0.7, 0.3], 42)[0].toPandas()

    # split data into X and y
    X_train = train_df.drop(target_name, 1)
    y_train = train_df[target_name]

    print("Starting training...")

    # fit model to training data
    model = Pipeline([('scaler', MinMaxScaler()),
                      ('xgb',
                       XGBClassifier(eta=hyperparams["eta"],
                                     max_depth=hyperparams["max_depth"]))])
    # xgboost saves feature names but lets store on pipeline for easy access
    model.feature_names = feature_names

    model.fit(X_train, y_train)

    print("Finished training")

    # export model artefacts
    joblib.dump(model, "artifacts/output/model.joblib")

    # we can also save as pmml so it can be used for In-Vantage scoring etc.
    xgboost_to_pmml(pipeline=model,
                    col_names=feature_names[0:8],
                    target_name=target_name,
                    pmml_f_name="artifacts/output/model.pmml")

    print("Saved trained model")

コード例 #3

ファイルを表示

ファイル: training.py プロジェクト: moen-chishti/AoaDemoModels

def train(data_conf, model_conf, **kwargs):
    hyperparams = model_conf["hyperParameters"]

    create_context(host=os.environ["AOA_CONN_HOST"],
                   username=os.environ["AOA_CONN_USERNAME"],
                   password=os.environ["AOA_CONN_PASSWORD"])

    feature_names = [
        "NumTimesPrg", "PlGlcConc", "BloodP", "SkinThick", "TwoHourSerIns",
        "BMI", "DiPedFunc", "Age"
    ]
    target_name = "HasDiabetes"

    # read training dataset from Teradata and convert to pandas
    train_df = DataFrame(data_conf["table"])
    train_df = train_df.select([feature_names + [target_name]])
    train_df = train_df.to_pandas()

    # split data into X and y
    X_train = train_df.drop(target_name, 1)
    y_train = train_df[target_name]

    print("Starting training...")

    # fit model to training data
    model = Pipeline([('scaler', MinMaxScaler()),
                      ('xgb',
                       XGBClassifier(eta=hyperparams["eta"],
                                     max_depth=hyperparams["max_depth"]))])
    # xgboost saves feature names but lets store on pipeline for easy access later
    model.feature_names = feature_names
    model.target_name = target_name

    model.fit(X_train, y_train)

    print("Finished training")

    # export model artefacts
    joblib.dump(model, "artifacts/output/model.joblib")

    # we can also save as pmml so it can be used for In-Vantage scoring etc.
    xgboost_to_pmml(pipeline=model,
                    col_names=feature_names,
                    target_name=target_name,
                    pmml_f_name="artifacts/output/model.pmml")

    print("Saved trained model")

コード例 #4

ファイルを表示

ファイル: training.py プロジェクト: trishlugtu/AoaDemoModels

def train(data_conf, model_conf, **kwargs):
    hyperparams = model_conf["hyperParameters"]

    feature_names = [
        "NumTimesPrg", "PlGlcConc", "BloodP", "SkinThick", "TwoHourSerIns",
        "BMI", "DiPedFunc", "Age"
    ]
    target_name = "HasDiabetes"

    train_df = read_dataframe(spark, data_conf["url"])

    # do feature eng in spark / joins whatever reason you're using pyspark...
    # split into test and train
    train_df = train_df.randomSplit([0.7, 0.3], 42)[0].toPandas()

    # split data into X and y
    X_train = train_df.drop(target_name, 1)
    y_train = train_df[target_name]

    print("Starting training...")

    # fit model to training data
    model = Pipeline([('scaler', MinMaxScaler()),
                      ('xgb',
                       XGBClassifier(eta=hyperparams["eta"],
                                     max_depth=hyperparams["max_depth"]))])
    # xgboost saves feature names but lets store on pipeline for easy access
    model.feature_names = feature_names

    model.fit(X_train, y_train)

    print("Finished training")

    # export model artefacts
    joblib.dump(model, "artifacts/output/model.joblib")

    # we can also save as pmml so it can be used for In-Vantage scoring etc.
    xgboost_to_pmml(pipeline=model,
                    col_names=feature_names[0:8],
                    target_name=target_name,
                    pmml_f_name="artifacts/output/model.pmml")

    print("Saved trained model")