def main(): boston_dstl = DatasetsTools(datasets.load_boston) # print(str(boston_dstl)) boston_df = boston_dstl.data_as_df() print(boston_dstl.info) def set_CHAS(n): if (n == 1): return "bounds river" else: None boston_df.CHAS = boston_df.CHAS.apply(set_CHAS) plotter = DataPlots(df=boston_df, ggplot=True, cmap=cm.jet) sizes_lst = [(4, 4), (6, 4), (6, 4), (12, 8), (7, 7), (6, 6), (6, 6), (12, 12), (12, 12), (12, 12), (12, 12), (12, 12), (12, 12), (12, 12), (12, 12)] # plotter.plot_column(data_column=boston_df.CHAS) # plt.show() for i, col in enumerate(list(boston_df)): print(col) plotter.plot_column(data_column=boston_df[col], figsize=sizes_lst[i]) plt.show() return
def main(): # load iris data into DataFrame iris_dtst = DatasetsTools(datasets.load_iris) iris_df = iris_dtst.data_as_df() print("columns: {}".format(list(iris_df))) print(iris_dtst.info) # iris-colored_acatter_matrix in 3 lines of code plotter = DataPlots(df=iris_df, ggplot=True, is_verbose=True) fig = plotter.colored_scatter_matrix(df=iris_df, colored_column_name="Target") # fig.savefig("iris-colored_acatter_matrix.png") plt.show() return
def setUp(self): ds = DatasetsTools(datasets.load_iris) self.iris_df = ds.data_as_df(target_column_name="IrisClass") self.boton_df = DatasetsTools(datasets.load_boston).data_as_df() self.tree_clf = DecisionTreeClassifier(max_depth=5, min_samples_split=10, min_samples_leaf=10) self.prd_lbl = "PrdictedIrisClass" self.actl_lbl = "IrisClass" self.columns_lst = list(self.iris_df) self.columns_lst.pop(-1) self.mu = ModelUtils(df=self.iris_df, model=self.tree_clf, columns_lst=self.columns_lst, predicted_lbl=self.prd_lbl, actual_lbl=self.actl_lbl)
def test__set_test_df(self): mu = self.mu df = mu._set_test_df(None) iris_df = self.iris_df self.assertEquals(list(df), list(iris_df)) boton_df = DatasetsTools(datasets.load_boston).data_as_df() df = mu._set_test_df(boton_df) self.assertEquals(list(df), list(boton_df))
def main(): iris_dtst = DatasetsTools(datasets.load_iris) iris_df = iris_dtst.data_as_df() print(iris_df.info()) # exit() plotter = DataPlots(df=iris_df, ggplot=True, cmap=cm.jet) sizes_lst = [(4, 4), (6, 4), (6, 4), (12, 8), (7, 7), (6, 6), (6, 6), (12, 12)] tr_axes = [] for i, col in enumerate(list(iris_df)): ax = plotter.colored_scatter(x=iris_df[col], y=iris_df.sepal_length_cm, z2color=iris_df.Target, figsize=sizes_lst[i]) tr_axes.append(ax) plt.show() return
class DatasetsToolsTestCase(unittest.TestCase): def setUp(self): self.boston_dtst = DatasetsTools(datasets.load_boston) self.iris_dtst = DatasetsTools(datasets.load_iris) def test_boston(self): self.assertIsInstance(self.boston_dtst, DatasetsTools) df = self.boston_dtst.data_as_df() self.assertEqual(list(df.shape), [506, 14]) l = list(df) headrs = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'Target'] self.assertEqual(list(df), headrs) def test_iris(self): self.assertIsInstance(self.iris_dtst, DatasetsTools) df = self.iris_dtst.data_as_df() self.assertEqual(list(df.shape), [150, 5]) headrs = ['sepal_length_cm', 'sepal_width_cm', 'petal_length_cm', 'petal_width_cm', 'Target'] self.assertEqual(list(df), headrs)
def main(): boston_dtst = DatasetsTools(datasets.load_boston) boston_df = boston_dtst.data_as_df() print(boston_dtst.info) plotter = DataPlots(df=boston_df, ggplot=True, cmap=cm.jet) plotter.colored_scatter_matrix(df=boston_df, colored_column_name="Target") # plt.show() iris_dtst = DatasetsTools(datasets.load_iris) iris_df = iris_dtst.data_as_df() print(iris_df.info()) plotter = DataPlots(df=iris_df, ggplot=True, cmap=cm.jet) plotter.colored_scatter_matrix(df=iris_df, colored_column_name="Target") plt.show() return
def setUp(self): self.boston_df = DatasetsTools(datasets.load_boston).data_as_df() self.iris_df = DatasetsTools(datasets.load_iris).data_as_df()
from matplotlib import pyplot as plt from sklearn import datasets from sklearn.tree import DecisionTreeClassifier from MachineLearningUtils.DatasetTools import DatasetsTools from MachineLearningUtils.ModelsUtils import ModelUtils from MachineLearningUtils.UsefulPlots import EvaluationPlots # load iris data into DataFrame prd_lbl, actl_lbl = "PrdictedIrisClass", "IrisClass" iris_df = DatasetsTools( datasets.load_iris).data_as_df(target_column_name="IrisClass") # set clf tree_clf = DecisionTreeClassifier(max_depth=5, min_samples_split=10, min_samples_leaf=10) # simple usage mu = ModelUtils(df=iris_df, model=tree_clf, predicted_lbl=prd_lbl, actual_lbl=actl_lbl) mu.split_and_train() results_df = mu.test_model() # evaluate results using plot_confusion_matrix print(mu.confusion_matrix_as_dataframe()) evp = EvaluationPlots(df=results_df, actual_lbl=actl_lbl, predicted_lbl=prd_lbl)
def setUp(self): self.iris_df = DatasetsTools( datasets.load_iris).data_as_df(target_column_name="target") self.boston_df = DatasetsTools(datasets.load_boston).data_as_df() self.fig = plt.figure() self.iris___BasePlot = _BasePlot(df=self.iris_df)
from matplotlib import pyplot as plt from sklearn import datasets import pandas as pd from sklearn.linear_model import LinearRegression from MachineLearningUtils.DatasetTools import DatasetsTools from MachineLearningUtils.LinearModelUtils import LinearModelUtils from MachineLearningUtils.UsefulPlots import EvaluationPlots pd.set_option('expand_frame_repr', False) # load boston data into DataFrame prd_lbl, target = "PrdictedPrice", "Price" boston_dtst = DatasetsTools( datasets.load_boston).data_as_df(target_column_name=target) print(boston_dtst.describe()) boston_df = boston_dtst.data_as_df(target_column_name=target) print(boston_df.head()) # set linear model lm = LinearRegression() # simple usage mu = LinearModelUtils(df=boston_df, lm=lm, predicted_lbl=prd_lbl, actual_lbl=target) mu.split_and_train() results_df = mu.test_model() # evaluate results using plot_confusion_matrix print(mu.get_formula()) evp = EvaluationPlots(df=results_df, actual_lbl=mu.actual_lbl,
def setUp(self): self.boston_dtst = DatasetsTools(datasets.load_boston) self.iris_dtst = DatasetsTools(datasets.load_iris)