コード例 #1
0
def data():

    dataset_parameters = {
        'data_distribution': [0.2, 0.1, 0.7],
        'sample_size': 0.02,
        'sampling_strategy': None,
        'verbose': None
    }

    dataset = Poker(**dataset_parameters)

    X_train = dataset.X_train
    X_val = dataset.X_validate
    X_test = dataset.X_test
    y_train = dataset.y_train
    y_val = dataset.y_validate
    y_test = dataset.y_test

    return X_train, X_val, X_test, y_train, y_val, y_test
コード例 #2
0
    }


# Importing dataset
dataset_parameters = {
    'data_distribution': [0.2, 0.1, 0.7],
    'sample_size': 0.02,
    #'sampling_strategy': "SMOTE",
    #'sampling_strategy': "over_and_under_sampling",
    #'sampling_strategy': "4SMOTE",
    #'sampling_strategy': "WSMOTE",
    'sampling_strategy': None,
    'verbose': False
}

dataset = Poker(**dataset_parameters)

# Setting parameters
model_parameters = {
    'n_jobs':
    -1,  # None, The number of jobs to run in parallel for both fit and predict. None means 1 unless in a joblib.parallel_backend context. -1 means using all processors.
    'random_state': 42,
    'verbose': 0,
    #'n_estimators': 100, #200 # 100, The number of trees in the forest.
    #'max_depth': 25, # None, The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.
    #'min_samples_leaf': 3, # 1, The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least min_samples_leaf training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. If int, then consider min_samples_leaf as the minimum number. If float, then min_samples_leaf is a fraction and ceil(min_samples_leaf * n_samples) are the minimum number of samples for each node.
    #'min_samples_split': 2, # 2, The minimum number of samples required to split an internal node: If int, then consider min_samples_split as the minimum number. If float, then min_samples_split is a fraction and ceil(min_samples_split * n_samples) are the minimum number of samples for each split.
    #'min_weight_fraction_leaf': 0, # 0, The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided.
    #'max_features': 'auto', # 'auto', The number of features to consider when looking for the best split: If int, then consider max_features features at each split. If float, then max_features is a fraction and int(max_features * n_features) features are considered at each split. If “auto”, then max_features=sqrt(n_features). If “sqrt”, then max_features=sqrt(n_features) (same as “auto”). If “log2”, then max_features=log2(n_features). If None, then max_features=n_features.
    #'max_leaf_nodes': None, # None, Grow trees with max_leaf_nodes in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes.
    #'min_impurity_decrease': 0, # 0, A node will be split if this split induces a decrease of the impurity greater than or equal to this value.