def test_non_wait_fit(self): #Test the non wait fit. model = Mljar(project=self.proj_title, experiment=self.expt_title, algorithms=['xgb'], metric='logloss', validation_kfolds=3, tuning_mode='Normal', single_algorithm_time_limit=1) self.assertTrue(model is not None) # fit models, just start computation and do not wait start_time = time.time() model.fit(X=self.X, y=self.y, wait_till_all_done=False) end_time = time.time() # time to initialize models should not be greater than 5 minutes self.assertTrue(end_time - start_time < 5 * 60) # run prediction # good model is not guaranteed # but there should be at least one max_trys = 50 pred = None while True: pred = model.predict(self.X) if pred is None: # there is no model ready, please wait time.sleep(10) else: break max_trys -= 1 if max_trys <= 0: break self.assertTrue(pred is not None) # get MSE score = self.mse(pred, self.y) self.assertTrue(score < 0.99)
def test_usage_with_train_split(self): #Test usage with train split. model = Mljar(project=self.proj_title, experiment=self.expt_title, validation_train_split=0.8, algorithms=['xgb'], tuning_mode='Normal', single_algorithm_time_limit=1) self.assertTrue(model is not None) # fit models and wait till all models are trained model.fit(X=self.X, y=self.y, wait_till_all_done=False) # wait some time time.sleep(80) # run prediction pred = model.predict(self.X) # get MSE score = self.mse(pred, self.y) self.assertTrue(score < 0.9) # check default validation self.assertEqual(model.selected_algorithm.validation_scheme, "Split 80/20, Shuffle, Stratify")
experiment='Ex 1.5', # as 1.4, but with validation_train_split=0.95 metric='auc', algorithms=['rfc'], validation_kfolds=None, validation_shuffle=False, validation_stratify=True, validation_train_split=0.95, tuning_mode='Normal', # Used Sport for experiments 1-3 create_ensemble=False, single_algorithm_time_limit='1') print("fit") clf_mlj.fit(X, y) #,dataset_title="Ones and zeros") print("predict") pred_proba_mlj = clf_mlj.predict(X) pred_proba_mlj = pred_proba_mlj.squeeze().values print("pred_proba_mlj", pred_proba_mlj) # shows values = 0 or 1 pred_mlj = [2 if x == 1 else 0 for x in pred_proba_mlj] print("prediction mljar == actual", (pred_mlj == y).all()) # returns True # mljar_fit_params = {'max_features': 0.5, 'min_samples_split': 50, 'criterion': "gini", 'min_samples_leaf': 1} # mljar_fit_params = {'max_features': 0.7, 'min_samples_split': 4, 'criterion': "entropy", 'min_samples_leaf': 2} mljar_fit_params = clf_mlj.selected_algorithm.params['model_params'][ 'fit_params'] print("mljar_fit_params", mljar_fit_params) ######################## print("Random forest with same params") clf_skl = RandomForestClassifier( max_features=mljar_fit_params['max_features'],
def test_predict_without_fit(self): # Call predict without calling first fit method should return None model = Mljar(project=self.proj_title, experiment=self.expt_title) pred = model.predict(self.X) self.assertTrue(pred is None)
target_col = df.columns[14] # Let's define MLJAR project mljar = Mljar( project= 'UCI-Adult', # project's title that we will use to find it among our projects in MLJAR experiment='Try tree methods', # experiment's title validation_kfolds=5, # we will use 5-fold CV with stratify and shuffle validation_shuffle=True, validation_stratify=True, metric='auc', # we will use Area Under ROC Curve (AUC) as a metric, tuning_mode='Normal', # select tuning mode algorithms=[ 'rfc', 'xgb', 'lgb' ], # we want to tune Random Forest, LightGBM and Xgboost models create_ensemble=True, # create ensemble of all models, single_algorithm_time_limit= 5 # time limit for single algorithm training in minutes ) # Run models prediction mljar.fit(df[input_cols], df[target_col]) # Print out the most useful algorithm print str(mljar.selected_algorithm) # Run prediction on train dataset just for example pred = mljar.predict(df[input_cols]) print 'Please go to your mljar account and check details of all models'