def main(args):
    cnn_model = build_cnn_model(backbone=MobileNetV2, use_imagenet=None)
    gbm_model = LGBMClassifier(
        boosting_type='gbdt',
        objective='binary',
        n_jobs=3,  # Updated from 'nthread'
        silent=False,
        max_depth=params['max_depth'],
        max_bin=params['max_bin'],
        subsample_for_bin=params['subsample_for_bin'],
        subsample=params['subsample'],
        subsample_freq=params['subsample_freq'],
        min_split_gain=params['min_split_gain'],
        min_child_weight=params['min_child_weight'],
        min_child_samples=params['min_child_samples'],
        scale_pos_weight=params['scale_pos_weight'])

    if use_nsml:
        bind_nsml(cnn_model, gbm_model)
    if args.pause:
        nsml.paused(scope=locals())

    if (args.mode == 'train'):
        #train_loader, dataset_sizes =
        get_data_loader(root=os.path.join(DATASET_PATH, 'train', 'train_data',
                                          'train_data'),
                        phase='train',
                        batch_size=args.batch_size)

        start_time = datetime.datetime.now()
        TotalX = np.load('TrainX.npy')
        TotalY = np.load('TrainY.npy')
        print('TotalX.shape', TotalX.shape, 'TotalY.shape', TotalY.shape)
        X_train, X_test, Y_train, Y_test = train_test_split(TotalX,
                                                            TotalY,
                                                            test_size=0.05,
                                                            random_state=777)
        print('X_train.shape', X_train.shape, 'X_test.shape', X_test.shape,
              'Y_train.shape', Y_train.shape, 'Y_test.shape', Y_test.shape)

        # To view the default model params:
        gbm_model.get_params().keys()
        eval_set = (X_test, Y_test)
        gbm_model.fit(
            X_train,
            Y_train,
        )

        gbm_model.fit(X_train,
                      Y_train,
                      eval_set=[(X_test, Y_test)],
                      eval_metric='binary_error',
                      early_stopping_rounds=50)

        nsml.save('last')
Exemple #2
0
def do_generate_metrics_lgbm_optimazed_model(X_train, y_train, X_test, y_test,
                                             grid):
    model = LGBMClassifier(random_state=0)
    model.set_params(**grid.best_params_)
    model.fit(X_train, y_train)
    metrics = calculate_metrics(model, X_test, y_test)
    print(model.get_params(), " ", model.score)
    print(grid.best_params_, " ", grid.best_score_)

    return model, metrics
def do_generate_metrics_lgbm_optimazed_model(X_train, y_train, X_test, y_test,
                                             grid):
    file_operations.write_logs(FILENAME, "LGBM metrics calculation\n")
    model = LGBMClassifier(random_state=0)
    model.set_params(**grid.best_params_)
    model.fit(X_train, y_train)
    metrics = calculate_metrics(model, X_test, y_test)
    file_operations.write_logs(
        FILENAME, "Generated model params and results\n params:" +
        str(model.get_params()) + "\nscore " +
        str(model.score(X_test, y_test)))
    file_operations.write_logs(
        FILENAME, "Search grid best params and results\n params:" +
        str(grid.best_params_) + "\nscore " + str(grid.best_score_))

    return model, metrics
def train_lgb(model=False):
    global log

    params = grid_search_lgb(True)

    clf = LGBMClassifier().set_params(**params)

    if model:
        return clf

    params = clf.get_params()
    log += 'lgb'
    log += ', learning_rate: %.3f' % params['learning_rate']
    log += ', n_estimators: %d' % params['n_estimators']
    log += ', num_leaves: %d' % params['num_leaves']
    log += ', min_split_gain: %.1f' % params['min_split_gain']
    log += ', min_child_weight: %.4f' % params['min_child_weight']
    log += ', min_child_samples: %d' % params['min_child_samples']
    log += ', subsample: %.1f' % params['subsample']
    log += ', colsample_bytree: %.1f' % params['colsample_bytree']
    log += '\n\n'

    return train(clf)
# full_index = np.array([95,94,82,59,0])
# data_index = np.array([44,179,112,59,82,58,84])
# data_index = np.array([0])

data_index = np.array([0, 59, 94, 95, 84, 161, 44, 179, 82, 112, 58])
# classes = ['WWW', 'MAIL', 'FTP-CONTROL', 'FTP-PASV', 'ATTACK', 'P2P', 'DATABASE', 'FTP-DATA', 'MULTIMEDIA', 'SERVICES',
#            'INTERACTIVE', 'GAMES']
classes = ['WWW', 'MAIL', 'FTP-CONTROL', 'FTP-PASV', 'ATTACK', 'P2P', 'DATABASE', 'FTP-DATA',
           'MULTIMEDIA', 'SERVICES', 'INTERACTIVE']

# file used to train, who generates x_train,x_test,y_train,y_test
# I also resampled the file `entry12`
file = os.path.join(data_dir, filename)
test_file = os.path.join(data_dir, test_filename)

if __name__ == '__main__':
    acc = []
    x_train, _, y_train, _ = get_data(file)
    _, x_test, _, y_test = get_data(test_file)
    np_dir = os.path.join(data_dir, 'estimators_100_150_5.txt')
    for i in range(50, 150, 5):
        clf = LGBMClassifier(n_estimators=i)
        clf.fit(x_train, y_train)
        print(clf.get_params())
        accuracy = clf.score(x_test, y_test)
        acc.append(accuracy)
    acc = np.array(acc)
    print(acc)
    np.savetxt(np_dir, acc)
import matplotlib.lines as lines
lines.lineStyles
Exemple #6
0
from lightgbm import LGBMClassifier

classifier_lgbm_corr = LGBMClassifier(max_depth=500,
                                      learning_rate=0.01,
                                      num_leaves=1000,
                                      min_data_in_leaf=200,
                                      n_estimators=2000,
                                      objective='binary',
                                      metric='binary_logloss',
                                      random_state=42)

#Parâmetros Utilizados pelo Modelo

from pprint import pprint
print('Parameters Currently In Use:\n')
pprint(classifier_lgbm_corr.get_params())

# Fit e Predição

import time
start = time.time()

classifier_lgbm_corr.fit(X_corr_train, Y_corr_train)

end = time.time()
print("Tempo de Execução: {} sec".format(end - start))

Y_pred_lgbm_corr = classifier_lgbm_corr.predict(X_corr_test)

#Análise de Métricas
    eval_metric='auc',
    # base_score = proportion_2j,
    n_jobs=cpu_n_jobs,
    random_state=42,
    silent=True)

clf_org_lgb = LGBMClassifier(n_estimators=1000,
                             learning_rate=0.1,
                             objective='binary',
                             n_jobs=cpu_n_jobs,
                             random_state=42,
                             silent=True)

xgb_params = clf_org_xgb.get_xgb_params()

lgb_params = clf_org_lgb.get_params()
lgb_params.pop('n_estimators')
lgb_params.pop('silent')

xgb_cv_early_stopping = CV_EarlyStoppingTrigger(
    stopping_rounds=early_stopping_rounds, maximize_score=True, method='xgb')

lgb_cv_early_stopping = CV_EarlyStoppingTrigger(
    stopping_rounds=early_stopping_rounds, maximize_score=True, method='lgb')

from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import roc_auc_score
# from sklearn.model_selection import StratifiedKFold
import scipy.stats as sp_stats
# from scipy.stats import norm as sp_normal
from lightgbm import LGBMClassifier

#28° Teste - Random Search

classifier_lgbm = LGBMClassifier( max_depth = 1880, 
                                  learning_rate = 0.1,
                                  num_leaves = 100,
                                  n_estimators = 4500,
                                  min_data_in_leaf = 140,
                                  n_jobs = 4 )

#Parâmetros Utilizados pelo Modelo

from pprint import pprint
print('Parameters Currently In Use:\n')
pprint(classifier_lgbm.get_params())

# Fit e Predição

import time
start = time.time()

classifier_lgbm.fit(X, Y)

end = time.time()
print("Tempo de Execução: {:.2f} min".format((end - start)/60))
#Tempo de Execução: 11.01 min

#Learning Curve - X, Y

import matplotlib.pyplot as plt
Exemple #9
0
def experiment(train=None, test=None, seed=None):
    """experiment func
    """

    cv_name = now()
    cv_log_path = f'cv/LightGBM/{cv_name}/'
    Path(cv_log_path).mkdir(parents=True, exist_ok=True)

    log_fname = cv_log_path + 'cv.log'
    cv_logger = Logger('CV_log', log_fname)
    cv_logger.info("Experiment Start")

    with cv_logger.interval_timer('load data'):
        if train:
            train_df = load_feather(train)
            # train_df = train_df.sample(100000)
        else:
            fs = Path('preprocessed/features').glob('train_*.csv')
            # fs = ['preprocessed/features/train_nextClick.csv',
            #       'preprocessed/features/train_ip_app_nextClick.csv']
            train_df = load_data(config.TRAIN_PATH,
                                 fs,
                                 cv_logger,
                                 dump='preprocessed/train.ftr')
            # offset = pd.to_datetime('2017-11-07 16:00:00')
        # train_df = train_df[train_df.click_time >= offset]
        gc.collect()
        if test:
            test_df = load_feather(test)

        else:
            fs = Path('preprocessed/features').glob('test_*.csv')
            # fs = ['preprocessed/features/test_nextClick.csv',
            #       'preprocessed/features/test_ip_app_nextClick.csv']
            test_df = load_data(config.TEST_PATH,
                                fs,
                                cv_logger,
                                dump='preprocessed/test.ftr')
            gc.collect()

    train_df = train_df.reset_index(drop=True)
    test_df = test_df.reset_index(drop=True)

    cv_logger.info(config.SEP_TIME)
    with cv_logger.interval_timer('split'):
        split_gen = enumerate(timeseries_cv(train_df, config.SEP_TIME))

    # dump configuration
    aucs = []

    # add ip_day_hour_nunique, app_device_channel_nextClick,
    # ip_os_device_nextClick,
    train_cols = [
        'app', 'app_channel_ce', 'channel', 'device', 'hour', 'ip_app_ce',
        'ip_app_channel_hour_mean', 'ip_app_channel_nextClick',
        'ip_app_device_os_channel_nextClick', 'ip_app_device_os_nextClick',
        'ip_app_nextClick', 'ip_app_nunique', 'ip_app_os_ce',
        'ip_app_os_nunique', 'ip_channel_nunique', 'ip_day_hour_ce',
        'ip_day_nunique', 'ip_day_hour_nunique', 'ip_device_nunique',
        'ip_device_os_app_cumcount', 'ip_nextClick', 'ip_os_device_nextClick',
        'app_device_channel_nextClick', 'ip_os_cumcount',
        'ip_os_device_app_nunique', 'os'
    ]
    # encode_list = config.ENCODE_LIST
    # threshold = config.TE_THR

    valid_time = [4, 5, 6, 9, 10, 11, 13, 14, 15]
    # public_time = [5, 6, 9, 10, 11, 13, 14, 15]
    train_df = proc_bf_cv(train_df)
    gc.collect()

    for num, (train_idx, valid_idx) in split_gen:
        cv_logger.kiritori()
        cv_logger.info(f"fold {num} start")

        with cv_logger.interval_timer('train test split'):
            cvtrain_df = train_df.loc[train_idx]
            valid_df = train_df.loc[valid_idx]
            valid_df2 = valid_df[valid_df.hour.isin(valid_time)]
            cv_logger.info(f'train size {cvtrain_df.shape}')
            cv_logger.info(f'valid size {valid_df2.shape}')
            # valid_df3 = valid_df[valid_df.hour == 4]
            # valid_df4 = valid_df[valid_df.hour.isin(public_time)]
            # with cv_logger.interval_timer('target encode'):
            # cvtrain_df, valid_df, tes = custom_encode(cvtrain_df,
            #                                           valid_df,
            #                                           encode_list,
            #                                           threshold,
            #                                           cv_logger)
            # cvtrain_df = proc_bf_cv(cvtrain_df)
            # valid_df = proc_bf_cv(valid_df)
            # train_cols += [c for c in cvtrain_df.columns if '_te' in c]

        cv_logger.info("LGBM Baseline validation")

        eval_names = ['valid_lb']
        train_X, train_y = cvtrain_df[train_cols], cvtrain_df.is_attributed
        eval_set = []
        with cv_logger.interval_timer('valid make'):
            for df in [valid_df2]:
                X, y = df[train_cols], df.is_attributed
                eval_set.append((X, y))
            cv_logger.info(f'train size {train_X.shape}')
            cv_logger.info(f'valid size {eval_set[0][0].shape}')

            cv_logger.info(list(train_X.columns))
            gc.collect()

        lgbm = LGBMClassifier(n_estimators=1000,
                              learning_rate=0.1,
                              num_leaves=31,
                              max_depth=-1,
                              min_child_samples=20,
                              min_child_weight=5,
                              max_bin=255,
                              scale_pos_weight=200,
                              colsample_bytree=0.3,
                              subsample=0.6,
                              subsample_freq=0,
                              random_state=seed,
                              n_jobs=24)

        cv_logger.info(lgbm.get_params())
        lgbm.fit(train_X,
                 train_y,
                 eval_metric="auc",
                 eval_set=eval_set,
                 eval_names=eval_names,
                 early_stopping_rounds=30,
                 verbose=10)
        auc = lgbm.best_score_
        aucs.append(auc)

        cv_logger.info(f"naive LGBM AUC : {auc}")
        cv_logger.info(pformat(lgbm.evals_result_))

        cv_logger.info("feature importance")
        fi = dict(zip(train_X.columns, lgbm.feature_importances_))
        cv_logger.info(pformat(fi))
        cv_logger.info(f"fold {num} end")

    del train_df
    cv_logger.double_kiritori()
    cv_logger.info("Cross Validation Done")
    cv_logger.info("Naive LGBM")
    cv_logger.info(f"AUC {auc}")

    cv_logger.info("Predict")

    # with cv_logger.interval_timer('all target encode'):
    #     for te in tes:
    #         test_df = te.transform(test_df)

    test_df = proc_bf_cv(test_df)

    test_X = test_df[train_cols]
    pred = lgbm.predict_proba(test_X, num_iteration=lgbm.best_iteration_)
    test_df['is_attributed'] = pred[:, 1]
    test_df['click_id'] = test_df.click_id.astype('uint32')
    sub = test_sup_merge(test_df)
    sub[['click_id', 'is_attributed']].to_csv(f'sub/{cv_name}_{seed}.csv',
                                              index=False)

    cv_logger.info("Experiment Done")
def fit_model(X_train,
              y_train,
              X_val,
              y_val,
              train_cols,
              bayes,
              model_type="lgb"):

    if model_type == "lgb":
        from lightgbm import LGBMRegressor, LGBMClassifier
        import lightgbm as lgb

        if bayes != 0:

            params = {
                'early_stopping_rounds':
                10,  #early stopping
                'eval_set': [(X_val[train_cols], y_val),
                             (X_train[train_cols], y_train)]
            }

            fit_params = {
                'n_estimators': (100, 500),  #number of trees.
                # 'num_leaves': (30, 50),
                'learning_rate': (0.01, 0.05),
                'num_boost_round': (3000, 3500),
                'subsample':
                (0.7,
                 0.75),  #part of the dataset used for training on each round
                'reg_alpha': (0, 0.1),  #reg alpha
                'reg_lambda': (0, 0.1),  #reg lambda
                'early_stopping_rounds': (10, 11),  #early stopping
                # 'min_data_in_leaf' :  (1000, 1001),  #important to prevent overfitting
            }

            estimator = LGBMClassifier(**params, eval_metric=metric)
            model = bayesian_opt(estimator, X_train[train_cols], y_train,
                                 X_val, y_val, fit_params, params, bayes)
            model = model.best_estimator_

        else:

            params = {
                'objective': "multiclass",
                'boosting_type': "gbdt",  #boosting algorithm: gbdt, dart, goss
                # 'n_estimators': 100,      #number of trees.
                # 'num_leaves' :  64,      #number of leaves, to control the complexity of the tree. Either set this parameter of max_depth
                'learning_rate': 0.01,  #learning rate
                'num_boost_round': 100,  #max number of iterations
                'subsample':
                0.7,  #part of the dataset used for training on each round
                'reg_alpha': 0.1,  #reg alpha
                'reg_lambda': 0.1,  #reg lambda
                'verbose_eval': 20,  #verbose
                'early_stopping_rounds': 10,  #early stopping
                # 'min_data_in_leaf' :  1000,  #important to prevent overfitting
                'n_jobs': -1
            }

            model = LGBMClassifier(**params, eval_metric=metric)

            model = model.fit(X=X_train[train_cols],
                              y=y_train,
                              eval_set=[(X_val[train_cols], y_val),
                                        (X_train[train_cols], y_train)])

        for k, v in model.get_params(False).items():
            if (type(v) == int) | (type(v) == float):
                neptune.log_text(k, str(v))
            elif (type(v) == str):
                neptune.log_text(k, v)
        history = model

    elif model_type == "keras":
        import tensorflow as tf
        from tensorflow.keras.constraints import max_norm
        import tensorflow.keras.backend as K

        inp = tf.keras.layers.Input(shape=(len(train_cols), ))
        x = tf.keras.layers.Dense(10, activation='relu')(inp)
        x = tf.keras.layers.BatchNormalization()(x)
        # x = tf.keras.layers.Dropout(0.4)(x)
        x = tf.keras.layers.Dense(10, activation='relu')(x)
        # x = tf.keras.layers.BatchNormalization()(x)
        # x = tf.keras.layers.Dropout(0.4)(x)
        # x = tf.keras.layers.Dense(24, activation = 'relu')(x)
        # x = tf.keras.layers.BatchNormalization()(x)
        # x = tf.keras.layers.Dropout(0.4)(x)
        x = tf.keras.layers.Dense(51, activation='relu')(x)
        # x = tf.keras.layers.BatchNormalization()(x)
        # x = tf.keras.layers.Dropout(0.4)(x)
        out = tf.keras.layers.Dense(len(np.unique(y_train)),
                                    activation='softmax')(x)
        model = tf.keras.models.Model(inputs=inp, outputs=out)

        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            mode='auto',
            patience=10,
            restore_best_weights=True)
        reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                         mode='auto',
                                                         factor=0.5,
                                                         patience=5)

        adam = tf.keras.optimizers.Adam(learning_rate=0.001)
        neptune.log_text("init_lr", str(K.eval(adam.lr)))
        model.compile(optimizer=adam, loss=metric, metrics=['accuracy'])

        history = model.fit(X_train[train_cols],
                            pd.get_dummies(y_train),
                            validation_data=(X_val[train_cols],
                                             pd.get_dummies(y_val)),
                            epochs=100,
                            batch_size=4,
                            callbacks=[early_stopping, reduce_lr],
                            verbose=1)
        neptune.log_text("epochs", str(history.params["epochs"]))
        neptune.log_text("steps", str(history.params["steps"]))
        neptune.log_text("early_stopping_rounds",
                         str(K.eval(early_stopping.patience)))

        layers = [
            str(type(l)).split()[1].split(">")[0].split(".")[-1][:-1]
            for l in model.layers
        ]
        neptune.log_text("Layers", "_".join(layers))

    gc.collect()
    return [model, history]
Exemple #11
0
class LGBMModel(Model):
    def __init__(self, config: Optional[Dict] = None):
        self.config = config
        model_cache_path = os.path.join(config["model_output_path"],
                                        "model.pkl")
        self.featurizer = LGBMFeaturizer(
            os.path.join(config['featurizer_output_path'], 'featurizer.pkl'),
            config)

        if "evaluate" in config and config["evaluate"] and not os.path.exists(
                model_cache_path):
            raise ValueError(
                "Non Existant Model output path in Evaluation Mode!")

        if model_cache_path and os.path.exists(model_cache_path):
            logger.info("Loading Model from Cache")
            with open(model_cache_path, "rb") as f:
                self.model = pickle.load(f)

        else:
            logger.info("Initializing Model from scratch....")
            self.model = LGBMClassifier(**self.config['params'])

    def train(self,
              train_datapoints: List[Datapoint],
              val_datapoints: List[Datapoint],
              cache_featurizer: Optional[bool] = False) -> None:
        self.featurizer.fit(train_datapoints)

        # caching(if True) we don't have go through more featurizing steps in features/LGBMFeaturizer
        if cache_featurizer:
            feature_names = self.featurizer.get_all_feature_names()
            with open(
                    os.path.join(self.config['featurizer_output_path'],
                                 "feature_names.pkl"), "wb") as f:
                pickle.dump(feature_names, f)
            self.featurizer.save(
                os.path.join(self.config["featurizer_output_path"],
                             "featurizer.pkl"))

        logger.info("Featurizing From Scratch")
        train_features = self.featurizer.featurizer(
            train_datapoints)  #transform

        targets = [datapoint.target for datapoint in train_datapoints]

        self.model.fit(train_features, targets)

    def compute_metrics(self, eval_datapoints: List[Datapoint]) -> Dict:
        expected_labels = [datapoint.target for datapoint in eval_datapoints]
        predicted_proba = self.predict(eval_datapoints)
        predicted_labels = np.argmax(predicted_proba, axis=1)

        accuracy = accuracy_score(expected_labels, predicted_labels)
        f1 = f1_score(expected_labels, predicted_labels)
        auc = roc_auc_score(expected_labels, predicted_labels)
        confusion_matrix_ = confusion_matrix(expected_labels, predicted_labels)
        tn, fp, fn, tp = confusion_matrix_.ravel()

        return {
            "Accuracy": accuracy,
            "f1": f1,
            "AUC": auc,
            "True Negative": tn,
            "False Positive": fp,
            "False Negative": fn,
            "True Positive": tp
        }

    def predict(self, datapoints: List[Datapoint]) -> np.array:
        features = self.featurizer.featurizer(datapoints)
        return self.model.predict_proba(features)

    def get_params(self) -> Dict:
        return self.model.get_params()

    def save(self, model_cache_path: str) -> None:
        logger.info("Saving Model To Disk")
        with open(model_cache_path, "wb") as f:
            pickle.dump(self.model, f)
Exemple #12
0
#                                       learning_rate = 0.1,
#                                       num_leaves = 2000,
#                                       min_data_in_leaf = 200,
#                                       n_estimators = 2000 )

classifier_lgbm_smtk = LGBMClassifier( max_depth = 1880, 
                                       learning_rate = 0.1,
                                       num_leaves = 100,
                                       n_estimators = 4500,
                                       min_data_in_leaf = 140 )

#Parâmetros Utilizados pelo Modelo

from pprint import pprint
print('Parameters Currently In Use:\n')
pprint(classifier_lgbm_smtk.get_params())

#Fit e Predição
					
import time
start = time.time()
			  
classifier_lgbm_smtk.fit(X_train_smtk, Y_train_smtk)

end = time.time()
print("Tempo de Execução: {:.2f} min".format((end - start)/60))

Tempo de Execução: 34.12 min

Y_pred_lgbm_smtk = classifier_lgbm_smtk.predict(X_test)
# saving hyperparameters and model
cur_dir = os.getcwd()
os.chdir('outputs/hyperparameters/')
pickle.dump(params, open("hyperparameters.pkl", 'wb'))  # hyperparameters
pickle.dump(lgbm_cv_model, open("lightgbm_model.pkl", 'wb'))  # model
os.chdir(cur_dir)

print("Best hyperparameters", params)


# loading and prediction with model

# del lgbm_cv_model
cur_dir = os.getcwd()
os.chdir('/Users/mvahit/Documents/GitHub/home_credit/outputs/hyperparameters/')
model = pickle.load(open('lightgbm_model.pkl', 'rb'))
os.chdir(cur_dir)
model.predict(X_train.head())

# loading hyperparameters
del model
del params
cur_dir = os.getcwd()
os.chdir('/Users/mvahit/Documents/GitHub/home_credit/outputs/hyperparameters/')
params = pickle.load(open('hyperparameters.pkl', 'rb'))
final_lgbm = LGBMClassifier(**params).fit(X_train, y_train)
final_lgbm.get_params()
final_lgbm.predict(X_train.head())

Exemple #14
0
classifier_lgbm_fvalue = LGBMClassifier(max_depth=500,
                                        learning_rate=0.01,
                                        num_leaves=1000,
                                        min_data_in_leaf=200,
                                        n_estimators=2000,
                                        objective='binary',
                                        metric='binary_logloss',
                                        random_state=42)

#Parâmetros Utilizados pelo Modelo

from pprint import pprint

print('Parameters Currently In Use:\n')
pprint(classifier_lgbm_fvalue.get_params())

#Bloco 03: Fit e Predição

import time

start = time.time()

classifier_lgbm_fvalue.fit(X_train_fvalue, Y_train)

end = time.time()
print("Tempo de Execução: {} sec".format(end - start))

Y_pred_lgbm_fvalue = classifier_lgbm_fvalue.predict(X_test_fvalue)

#Bloco 04: Análise de Métricas
Exemple #15
0
from lightgbm import LGBMClassifier

classifier_lgbm_chi2 = LGBMClassifier(max_depth=500,
                                      learning_rate=0.01,
                                      num_leaves=1000,
                                      min_data_in_leaf=200,
                                      n_estimators=2000,
                                      objective='binary',
                                      metric='binary_logloss',
                                      random_state=42)

#Parâmetros Utilizados pelo Modelo

from pprint import pprint
print('Parameters Currently In Use:\n')
pprint(classifier_lgbm_chi2.get_params())

#Bloco 03: Fit e Predição

import time
start = time.time()

classifier_lgbm_chi2.fit(X_train_chi2, Y_train)

end = time.time()
print("Tempo de Execução: {} sec".format(end - start))

Y_pred_lgbm_chi2 = classifier_lgbm_chi2.predict(X_test_chi2)

#Bloco 04: Análise de Métricas