def main():
    boston_dstl = DatasetsTools(datasets.load_boston)
    # print(str(boston_dstl))
    boston_df = boston_dstl.data_as_df()
    print(boston_dstl.info)

    def set_CHAS(n):
        if (n == 1):
            return "bounds river"
        else:
            None

    boston_df.CHAS = boston_df.CHAS.apply(set_CHAS)
    plotter = DataPlots(df=boston_df, ggplot=True, cmap=cm.jet)
    sizes_lst = [(4, 4), (6, 4), (6, 4), (12, 8), (7, 7), (6, 6), (6, 6),
                 (12, 12), (12, 12), (12, 12), (12, 12), (12, 12), (12, 12),
                 (12, 12), (12, 12)]
    # plotter.plot_column(data_column=boston_df.CHAS)
    # plt.show()

    for i, col in enumerate(list(boston_df)):
        print(col)
        plotter.plot_column(data_column=boston_df[col], figsize=sizes_lst[i])
        plt.show()
    return
Example #2
0
def main():
    boston_dtst = DatasetsTools(datasets.load_boston)
    boston_df = boston_dtst.data_as_df()
    print(boston_dtst.info)

    plotter = DataPlots(df=boston_df, ggplot=True, cmap=cm.jet)
    plotter.colored_scatter_matrix(df=boston_df, colored_column_name="Target")
    # plt.show()

    iris_dtst = DatasetsTools(datasets.load_iris)
    iris_df = iris_dtst.data_as_df()
    print(iris_df.info())
    plotter = DataPlots(df=iris_df, ggplot=True, cmap=cm.jet)
    plotter.colored_scatter_matrix(df=iris_df, colored_column_name="Target")
    plt.show()
    return
class DatasetsToolsTestCase(unittest.TestCase):
    def setUp(self):
        self.boston_dtst = DatasetsTools(datasets.load_boston)
        self.iris_dtst = DatasetsTools(datasets.load_iris)

    def test_boston(self):
        self.assertIsInstance(self.boston_dtst, DatasetsTools)
        df = self.boston_dtst.data_as_df()
        self.assertEqual(list(df.shape), [506, 14])
        l = list(df)
        headrs = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS',
                  'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'Target']
        self.assertEqual(list(df), headrs)

    def test_iris(self):
        self.assertIsInstance(self.iris_dtst, DatasetsTools)
        df = self.iris_dtst.data_as_df()
        self.assertEqual(list(df.shape), [150, 5])
        headrs = ['sepal_length_cm', 'sepal_width_cm', 'petal_length_cm', 'petal_width_cm', 'Target']
        self.assertEqual(list(df), headrs)
Example #4
0
def main():
    # load iris data into DataFrame
    iris_dtst = DatasetsTools(datasets.load_iris)
    iris_df = iris_dtst.data_as_df()
    print("columns: {}".format(list(iris_df)))
    print(iris_dtst.info)
    # iris-colored_acatter_matrix in 3 lines of code
    plotter = DataPlots(df=iris_df, ggplot=True, is_verbose=True)
    fig = plotter.colored_scatter_matrix(df=iris_df,
                                         colored_column_name="Target")
    # fig.savefig("iris-colored_acatter_matrix.png")
    plt.show()
    return
Example #5
0
 def setUp(self):
     ds = DatasetsTools(datasets.load_iris)
     self.iris_df = ds.data_as_df(target_column_name="IrisClass")
     self.boton_df = DatasetsTools(datasets.load_boston).data_as_df()
     self.tree_clf = DecisionTreeClassifier(max_depth=5,
                                            min_samples_split=10,
                                            min_samples_leaf=10)
     self.prd_lbl = "PrdictedIrisClass"
     self.actl_lbl = "IrisClass"
     self.columns_lst = list(self.iris_df)
     self.columns_lst.pop(-1)
     self.mu = ModelUtils(df=self.iris_df,
                          model=self.tree_clf,
                          columns_lst=self.columns_lst,
                          predicted_lbl=self.prd_lbl,
                          actual_lbl=self.actl_lbl)
def main():
    iris_dtst = DatasetsTools(datasets.load_iris)
    iris_df = iris_dtst.data_as_df()
    print(iris_df.info())
    # exit()
    plotter = DataPlots(df=iris_df, ggplot=True, cmap=cm.jet)
    sizes_lst = [(4, 4), (6, 4), (6, 4), (12, 8), (7, 7), (6, 6), (6, 6),
                 (12, 12)]
    tr_axes = []
    for i, col in enumerate(list(iris_df)):
        ax = plotter.colored_scatter(x=iris_df[col],
                                     y=iris_df.sepal_length_cm,
                                     z2color=iris_df.Target,
                                     figsize=sizes_lst[i])
        tr_axes.append(ax)

    plt.show()
    return
Example #7
0
from matplotlib import pyplot as plt
from sklearn import datasets
import pandas as pd
from sklearn.linear_model import LinearRegression
from MachineLearningUtils.DatasetTools import DatasetsTools
from MachineLearningUtils.LinearModelUtils import LinearModelUtils
from MachineLearningUtils.UsefulPlots import EvaluationPlots

pd.set_option('expand_frame_repr', False)

# load boston data into DataFrame
prd_lbl, target = "PrdictedPrice", "Price"
boston_dtst = DatasetsTools(
    datasets.load_boston).data_as_df(target_column_name=target)
print(boston_dtst.describe())
boston_df = boston_dtst.data_as_df(target_column_name=target)
print(boston_df.head())
# set linear model
lm = LinearRegression()

# simple usage
mu = LinearModelUtils(df=boston_df,
                      lm=lm,
                      predicted_lbl=prd_lbl,
                      actual_lbl=target)
mu.split_and_train()
results_df = mu.test_model()
# evaluate results using plot_confusion_matrix
print(mu.get_formula())
evp = EvaluationPlots(df=results_df,
                      actual_lbl=mu.actual_lbl,