def test_local_seriesprocessing(self): data = np.random.randint(0, 2, size=(1000, 3)) data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"]) model = Model( x_train=data, target_field="col3", test_split_percentage=0.5, report_name="modelweights", ) model.logistic_regression( random_state=2, penalty="l2", model_name="l1", run=True ) model.logistic_regression( random_state=2, penalty="l2", model_name="l2", run=True ) model.logistic_regression( random_state=2, penalty="l2", model_name="l3", run=True ) model.run_models(method="series") self.assertTrue(len(model._models) == 3 and len(model._queued_models) == 0)
def test_unsupervisedcv(self): data = np.random.randint(0, 2, size=(1000, 3)) data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"]) model = Model(x_train=data, target_field="col3", test_split_percentage=0.2) model.kmeans(cv=2, random_state=2, learning_curve=True) self.assertTrue(True)
def test_interpretmodel_behaviour_dependence(self): data = np.random.randint(0, 2, size=(1000, 3)) data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"]) model = Model(x_train=data, target_field="col3", test_split_percentage=0.4) model.logistic_regression(random_state=2, run=True) model.log_reg.interpret_model_behavior(method="dependence", show=False) self.assertTrue(True)
def test_model_dbscan(self): data = [[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]] data = pd.DataFrame(data=data, columns=["col1", "col2"]) model = Model(x_train=data, split=False) model.dbscan(eps=3, min_samples=2, run=True) validate = model.dbscan_clusters is not None self.assertTrue(validate)
def test_model_kmeans(self): data = [[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]] data = pd.DataFrame(data=data, columns=["col1", "col2"]) model = Model(x_train=data, split=False) model.kmeans(n_clusters=3, random_state=0, run=True) validate = model.kmeans_clusters is not None self.assertTrue(validate)
def test_plot_clusters3d(self): data = [[1, 2, 3], [2, 2, 3], [2, 3, 4], [8, 7, 5], [8, 8, 5], [25, 80, 4]] data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"]) model = Model(x_train=data, split=False) model.kmeans(n_clusters=3, random_state=0, run=True) model.km.plot_clusters(dim=3) self.assertTrue(True)
def test_model_gaussianmixture(self): data = [[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]] data = pd.DataFrame(data=data, columns=["col1", "col2"]) model = Model(x_train=data, split=False) model.gaussian_mixture_clustering(run=True) validate = model.gm_cluster is not None self.assertTrue(validate)
def test_model_meanshift(self): data = [[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]] data = pd.DataFrame(data=data, columns=["col1", "col2"]) model = Model(x_train=data, split=False) model.mean_shift(run=True) validate = model.mshift is not None self.assertTrue(validate)
def test_model_xgbc(self): data = np.random.randint(0, 2, size=(1000, 3)) data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"]) model = Model(x_train=data, target_field="col3") model.xgboost_classification(run=True) validate = model.xgb_cls is not None self.assertTrue(validate)
def test_model_report_modelweights(self): data = np.random.randint(0, 2, size=(1000, 3)) data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"]) model = Model(x_train=data, target_field="col3", report_name="modelweights") model.logistic_regression(random_state=2, penalty="l2", run=True) model.log_reg.model_weights() self.assertTrue(True)
def test_interpretmodel_predictions_all(self): data = np.random.randint(0, 2, size=(1000, 3)) data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"]) model = Model(x_train=data, target_field="col3", test_split_percentage=0.6) model.logistic_regression(random_state=2, run=True) model.log_reg.interpret_predictions(show=False) self.assertTrue(True)
def test_model_dtregression(self): data = np.random.randint(0, 2, size=(1000, 3)) data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"]) model = Model(x_train=data, target_field="col3") model.decision_tree_regression(random_state=2, run=True) validate = model.dt_reg is not None self.assertTrue(validate)
def test_model_linearsvc(self): data = np.random.randint(0, 2, size=(1000, 3)) data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"]) model = Model(x_train=data, target_field="col3") model.linearsvc(random_state=2, run=True) validate = model.linsvc is not None self.assertTrue(validate)
def test_model_isoforest(self): data = np.random.randint(0, 2, size=(1000, 3)) data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"]) model = Model(x_train=data) model.isolation_forest(random_state=2, run=True) validate = model.iso_forest is not None self.assertTrue(validate)
def test_model_oneclasssvm(self): data = np.random.randint(0, 2, size=(1000, 3)) data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"]) model = Model(x_train=data) model.oneclass_svm(run=True) validate = model.ocsvm is not None self.assertTrue(validate)
def test_stratified_cv(self): data = np.random.randint(0, 2, size=(1000, 3)) data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"]) model = Model(x_train=data, target_field="col3", test_split_percentage=0.2) cv_values = model.logistic_regression( cv="strat-kfold", random_state=2, learning_curve=True, run=False ) self.assertIsNotNone(len(cv_values) == 5)
def test_model_confusionmatrix(self): data = np.random.randint(0, 2, size=(1000, 3)) data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"]) model = Model(x_train=data, target_field="col3") model.logistic_regression(random_state=2, penalty="l2", run=True) model.log_reg.confusion_matrix() self.assertTrue(True)
def test_model_agglom(self): data = [[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]] data = pd.DataFrame(data=data, columns=["col1", "col2"]) model = Model(x_train=data, split=False) model.agglomerative_clustering(n_clusters=2, run=True) validate = model.agglom is not None self.assertTrue(validate)
def test_model_svr(self): data = np.random.randint(0, 2, size=(1000, 3)) data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"]) model = Model(x_train=data, target_field="col3") model.svr(run=True) validate = model.svr_reg is not None self.assertTrue(validate)
def test_dependence_plot(self): data = np.random.randint(0, 2, size=(1000, 3)) data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"]) model = Model(x_train=data, target_field="col3", test_split_percentage=0.5) model.logistic_regression(random_state=2, penalty="l2", run=True) model.log_reg.dependence_plot("col1") self.assertTrue(True)
def test_force_plot_misclassified(self): data = np.random.randint(0, 2, size=(1000, 3)) data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"]) model = Model(x_train=data, target_field="col3", test_split_percentage=0.6) model.logistic_regression(random_state=2, run=True) model.log_reg.force_plot(misclassified=True) self.assertTrue(True)
def test_model_cluster_filter(self): data = [[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]] data = pd.DataFrame(data=data, columns=["col1", "col2"]) model = Model(x_train=data, split=False) model = model.dbscan(eps=3, min_samples=2, run=True) filtered = model.filter_cluster(0) validate = all(filtered.dbscan_clusters == 0) self.assertTrue(validate)
def test_model_unsupervised_defaultgridsearch(self): data = np.random.randint(0, 2, size=(1000, 3)) data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"]) model = Model(x_train=data, target_field="col3", report_name="gridsearch_test") gridsearch_params = {"max_iter": [300, 200]} model.kmeans(gridsearch=gridsearch_params, cv=2, run=True) self.assertTrue(True)
def test_interpretmodel_behaviour_all(self): train_data = np.random.random_sample(size=(1000, 2)) label_data = np.random.randint(0, 2, size=(1000, 1)) data = pd.DataFrame(data=train_data, columns=["col1", "col2"]) data["col3"] = label_data model = Model(x_train=data, target_field="col3", test_split_percentage=0.2) model.logistic_regression(random_state=2, run=True) model.log_reg.interpret_model_behavior(show=False) self.assertTrue(True)
def test_model_getattr(self): text_data = [ "Hi my name is PyAutoML. Please split me.", "This function is going to split by sentence. Automation is great.", ] data = pd.DataFrame(data=text_data, columns=["data"]) model = Model(x_train=data, split=False) model.extract_keywords_gensim("data", ratio=0.5, model_name="model1", run=True) validate = model.model1 is not None and model["model1"] is not None self.assertTrue(validate)
def test_text_gensim_summarize(self): text_data = [ "Hi my name is PyAutoML. Please split me.", "This function is going to split by sentence. Automation is great.", ] data = pd.DataFrame(data=text_data, columns=["data"]) model = Model(x_train=data, split=False) model.summarize_gensim("data", ratio=0.5, run=True) validate = model.data_summarized is not None self.assertTrue(validate)
def test_text_d2v(self): text_data = [ "Hi my name is PyAutoML. Please split me.", "This function is going to split by sentence. Automation is great.", ] data = pd.DataFrame(data=text_data, columns=["data"]) model = Model(x_train=data, split=False) model.doc2vec("data", prep=True, run=True, min_count=1) validate = model.d2v is not None self.assertTrue(validate)
def test_decision_plot_sameaxis(self): data = np.random.randint(0, 2, size=(1000, 3)) data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"]) model = Model(x_train=data, target_field="col3", test_split_percentage=0.5) model.logistic_regression(random_state=2, penalty="l2", run=True) r = model.log_reg.decision_plot(sample_no=1) model.log_reg.decision_plot( sample_no=2, feature_order=r.feature_idx, xlim=r.xlim ) self.assertTrue(True)
def test_model_logisticregression(self): data = np.random.randint(0, 2, size=(1000, 3)) data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"]) model = Model(x_train=data, target_field="col3") model.logistic_regression(random_state=2, penalty="l2", run=True) validate = ( model.x_train_results.log_predictions is not None and model.x_test_results.log_predictions is not None ) self.assertTrue(validate)
def test_text_w2vprep(self): text_data = [ "Hi my name is PyAutoML. Please split me.", "This function is going to split by sentence. Automation is great.", ] data = pd.DataFrame(data=text_data, columns=["data"]) data["prep"] = pd.Series([text.split() for text in text_data]) model = Model(x_train=data, split=False) model.word2vec("prep", run=True, min_count=1) validate = model.w2v is not None self.assertTrue(validate)