Python RandomForestClassifier.label_identification_の例

プログラミング言語: Python

名前空間/パッケージ名: sklearn.ensemble

メソッド/関数: label_identification_

hotexamples.comのコード掲載数: 1

Python RandomForestClassifier.label_identification_ - 1件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのsklearn.ensemble.RandomForestClassifier.label_identification_の実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

add(30)

fit(30)

RandomForestClassifier(30)

apply(30)

get_params(30)

decision_path(24)

compile(22)

decision_function(17)

fit_transform(12)

n_estimators(12)

n_classes_(12)

evaluate(11)

__init__(11)

classes_(10)

estimators_(7)

max_depth(4)

error(3)

min_samples_leaf(3)

class_weight(3)

lower(3)

eval(2)

input_features(2)

expand(2)

id(2)

encode(2)

feature_importances_(2)

feature_names(2)

fit_predict(2)

C(2)

criterion(2)

max_features(2)

mlinit(2)

compress(2)

Predict(2)

base_estimator(2)

append(2)

output_feature(2)

_get_param_names(2)

get_variable_value(1)

variable_feature_importances(1)

values(1)

get_weights(1)

get_xgb_params(1)

predicted_proba(1)

name1(1)

k_best(1)

min_weight_fraction_leaf(1)

make_classification(1)

kneighbors(1)

min_samples_split(1)

コード例 #1

ファイルを表示

ファイル: supervisedfit.py プロジェクト: ThomasRieutord/bl-classification

def train_sblc(
    idflabelspath,
    algo,
    outputDir="../working-directories/4-pre-trained-classifiers/",
    savePickle=False,
):
    """Train the specified algorithm on the specified dataset. Save the
    trained classifier into a Pickle object.
    
    
    Parameters
    ----------
    idflabelspath: str
        Path to the dataset with identified labels
    
    algo: str
        Name of the supervised algorithm to use. Possible choices:
        {RandomForestClassifier, KNeighborsClassifier, DecisionTreeClassifier,
        AdaBoostClassifier, LabelSpreading}
        For more details, refer to the documentation of scikit-learn 0.22
    
    outputDir: str
        Directory where will be stored the outputs
    
    savePickle: bool, default=False
        If False, the trained classifier is not saved
    
    
    Returns
    -------
    Generate `algo.pkl` object in `outputDir`
    
    clf: sklearn Classifier object
        Trained classifier. To be used with `predict` method
    
    
    Example
    -------
    >>> from blusc.supervisedfit import train_sblc
    >>> inputDir = "../working-directories/3-identified-labels/"
    >>> idfname = "IDFLABELS_2015_0219.PASSY2015_BT-T_linear_dz40_dt30_zmax2000.nc"
    >>> clf = train_sblc(inputDir + idfname, algo ="KNeighborsClassifier")
    Classifier not saved because savePickle= False
    >>> clf.classes_
    array([0, 1, 2], dtype=int32)
    """

    # Load dataset
    # ------------
    X_raw, z_common, t_common, rawlabl, lablid, lablnames = utils.load_dataset(
        idflabelspath,
        variables_to_load=["X_raw", "altitude", "time", "rawlabels"],
        fields_to_load=["label_identification", "label_long_names"],
    )

    # Normalization
    # -------------
    scaler = StandardScaler()
    scaler.fit(X_raw)
    X = scaler.transform(X_raw)
    
    # Instantiate classifiers
    # -----------------------
    if algo in ["rf", "RandomForest", "RandomForestClassifier"]:
        from sklearn.ensemble import RandomForestClassifier

        clf = RandomForestClassifier(n_estimators=50, max_depth=3)
    elif algo in ["knn", "nearestneighbors", "KNeighborsClassifier"]:
        from sklearn.neighbors import KNeighborsClassifier

        clf = KNeighborsClassifier(n_neighbors=6)
    elif algo in ["dt", "DecisionTree", "DecisionTreeClassifier"]:
        from sklearn.tree import DecisionTreeClassifier

        clf = DecisionTreeClassifier(max_depth=5)
    elif algo in ["ab", "adab", "AdaBoost", "AdaBoostClassifier"]:
        from sklearn.ensemble import AdaBoostClassifier
        from sklearn.tree import DecisionTreeClassifier

        clf = AdaBoostClassifier(
            base_estimator=DecisionTreeClassifier(max_depth=4), n_estimators=50
        )
    elif algo in ["ls", "LabelSpreading"]:
        from sklearn.semi_supervised import LabelSpreading

        clf = LabelSpreading(kernel="knn", alpha=0.2)
    else:
        raise ValueError("Not supported algorithm:", algo)

    # Fit supervised model
    # ---------------------
    clf.fit(X, rawlabl)

    # Exports
    # -----------
    clf.label_identification_ = lablid
    clf.label_long_names_ = lablnames
    clf.scaler = scaler
    
    n_classes = clf.classes_.size
    centroids = np.zeros((n_classes, X.shape[1]))
    for k in range(n_classes):
        idx = np.where(rawlabl==k)[0]
        centroids[k,:] = np.mean(X[idx,:],axis=0)
    
    clf.training_class_centroids_ = centroids

    idflabelsname = idflabelspath.split("/")[-1]
    prefx, prepkey, dotnc = idflabelsname.split(".")
    dropfilename = str(clf).split("(")[0] + "." + prepkey + ".pkl"

    if savePickle:
        fc = open(outputDir + dropfilename, "wb")
        pickle.dump(clf, fc)
        fc.close()
        print("Trained classifier saved in ", outputDir + dropfilename)
    else:
        print("Classifier not saved because savePickle=", savePickle)

    return clf