def test_non_wait_fit(self):

        #Test the non wait fit.

        model = Mljar(project=self.proj_title,
                      experiment=self.expt_title,
                      algorithms=['xgb'],
                      metric='logloss',
                      validation_kfolds=3,
                      tuning_mode='Normal',
                      single_algorithm_time_limit=1)
        self.assertTrue(model is not None)
        # fit models, just start computation and do not wait
        start_time = time.time()
        model.fit(X=self.X, y=self.y, wait_till_all_done=False)
        end_time = time.time()
        # time to initialize models should not be greater than 5 minutes
        self.assertTrue(end_time - start_time < 5 * 60)
        # run prediction
        # good model is not guaranteed
        # but there should be at least one
        max_trys = 50
        pred = None
        while True:
            pred = model.predict(self.X)
            if pred is None:
                # there is no model ready, please wait
                time.sleep(10)
            else:
                break
            max_trys -= 1
            if max_trys <= 0:
                break

        self.assertTrue(pred is not None)
        # get MSE
        score = self.mse(pred, self.y)
        self.assertTrue(score < 0.99)
    def test_usage_with_train_split(self):

        #Test usage with train split.

        model = Mljar(project=self.proj_title,
                      experiment=self.expt_title,
                      validation_train_split=0.8,
                      algorithms=['xgb'],
                      tuning_mode='Normal',
                      single_algorithm_time_limit=1)
        self.assertTrue(model is not None)
        # fit models and wait till all models are trained
        model.fit(X=self.X, y=self.y, wait_till_all_done=False)
        # wait some time
        time.sleep(80)
        # run prediction
        pred = model.predict(self.X)
        # get MSE
        score = self.mse(pred, self.y)
        self.assertTrue(score < 0.9)
        # check default validation
        self.assertEqual(model.selected_algorithm.validation_scheme,
                         "Split 80/20, Shuffle, Stratify")
Esempio n. 3
0
    experiment='Ex 1.5',  # as 1.4, but with validation_train_split=0.95
    metric='auc',
    algorithms=['rfc'],
    validation_kfolds=None,
    validation_shuffle=False,
    validation_stratify=True,
    validation_train_split=0.95,
    tuning_mode='Normal',  # Used Sport for experiments 1-3
    create_ensemble=False,
    single_algorithm_time_limit='1')

print("fit")
clf_mlj.fit(X, y)  #,dataset_title="Ones and zeros")

print("predict")
pred_proba_mlj = clf_mlj.predict(X)
pred_proba_mlj = pred_proba_mlj.squeeze().values
print("pred_proba_mlj", pred_proba_mlj)  # shows values = 0 or 1
pred_mlj = [2 if x == 1 else 0 for x in pred_proba_mlj]
print("prediction mljar == actual", (pred_mlj == y).all())  # returns True

# mljar_fit_params = {'max_features': 0.5, 'min_samples_split': 50, 'criterion': "gini",    'min_samples_leaf': 1}
# mljar_fit_params = {'max_features': 0.7, 'min_samples_split':  4, 'criterion': "entropy", 'min_samples_leaf': 2}
mljar_fit_params = clf_mlj.selected_algorithm.params['model_params'][
    'fit_params']
print("mljar_fit_params", mljar_fit_params)

########################
print("Random forest with same params")
clf_skl = RandomForestClassifier(
    max_features=mljar_fit_params['max_features'],
 def test_predict_without_fit(self):
     # Call predict without calling first fit method should return None
     model = Mljar(project=self.proj_title, experiment=self.expt_title)
     pred = model.predict(self.X)
     self.assertTrue(pred is None)
target_col = df.columns[14]

# Let's define MLJAR project
mljar = Mljar(
    project=
    'UCI-Adult',  # project's title that we will use to find it among our projects in MLJAR
    experiment='Try tree methods',  # experiment's title
    validation_kfolds=5,  # we will use 5-fold CV with stratify and shuffle
    validation_shuffle=True,
    validation_stratify=True,
    metric='auc',  # we will use Area Under ROC Curve (AUC) as a metric,
    tuning_mode='Normal',  # select tuning mode
    algorithms=[
        'rfc', 'xgb', 'lgb'
    ],  # we want to tune Random Forest, LightGBM and Xgboost models
    create_ensemble=True,  # create ensemble of all models,
    single_algorithm_time_limit=
    5  # time limit for single algorithm training in minutes
)

# Run models prediction
mljar.fit(df[input_cols], df[target_col])

# Print out the most useful algorithm
print str(mljar.selected_algorithm)

# Run prediction on train dataset just for example
pred = mljar.predict(df[input_cols])

print 'Please go to your mljar account and check details of all models'