Beispiel #1
0
    def test_most_common_int(self):

        data = pd.Series([1, 1, 2, 4, 2, 5])
        data = pd.DataFrame(data, columns=['col1'])

        df = Data(data, test_split_percentage=0.5)
        df.most_common('col1', plot=True, use_test=True)

        self.assertTrue(True)
Beispiel #2
0
    def test_most_common_list(self):

        data = pd.Series([['hi', 'aethos'], ['hi', 'py-automl'], [], ['hi']])
        data = pd.DataFrame(data, columns=['col1'])

        df = Data(data, split=False)
        df.most_common('col1', plot=True)

        self.assertTrue(True)
Beispiel #3
0
    def test_compare_dist_predict(self):

        data = np.random.randint(0, 2, size=(1000, 3))
        data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"])

        df = Data(data, target_field="col3", report_name="test")
        df.predict_data_sample()

        self.assertTrue(True)
Beispiel #4
0
    def test_plot_clusters_pca(self):

        data, label = make_blobs(100, 4, centers=3)

        df = pd.DataFrame(data)
        df['label'] = label
        df = Data(df, split=False)
        df.plot_dim_reduction('label', algo='pca', dim=2)

        self.assertTrue(True)
Beispiel #5
0
    def test_compare_dist_ks(self):

        data = np.random.randint(0, 2, size=(1000, 3))
        data = pd.DataFrame(data=data, columns=["col1", "col2", "col3"])

        df = Data(data, target_field="col3", report_name="test")
        df["col4"] = np.random.normal(1, 2, size=(1, 800))[0]
        df["col4"] = np.random.normal(10, 20, size=(1, 200))[0]
        df.ks_feature_distribution()

        self.assertTrue(True)
Beispiel #6
0
    def test_most_common_str(self):

        data = pd.Series([
            'hi aethos', 'aethos is awesome', 'hi',
            'py-automl is the old name', 'hi everyone'
        ])
        data = pd.DataFrame(data, columns=['col1'])

        df = Data(data, split=False)
        df.most_common('col1')

        self.assertTrue(True)
Beispiel #7
0
    def test_lineplot(self):

        np.random.seed(42)
        df = pd.DataFrame(
            {
                "Google": np.random.randn(1000) + 0.2,
                "Apple": np.random.randn(1000) + 0.17,
                "date": pd.date_range("1/1/2000", periods=1000),
            }
        )

        clean = Data(x_train=df, split=False)
        clean.lineplot(x="date", y=["Google", "Apple"], show_figure=False)

        self.assertTrue(True)
Beispiel #8
0
    def test_jointplot(self):

        data = sns.load_dataset("iris")

        base = Data(
            x_train=data,
            x_test=None,
            split=True,
            target_field="species",
            report_name="test",
            test_split_percentage=0.5,
        )

        base.jointplot(x="sepal_width", y="sepal_length")

        self.assertTrue(True)
Beispiel #9
0
    def test_pairplot_custom(self):

        data = sns.load_dataset("iris")

        base = Data(
            x_train=data,
            x_test=None,
            split=True,
            target_field="species",
            report_name="test",
            test_split_percentage=0.5,
        )

        base.pairplot(diag_kind='hist', upper_kind='scatter', lower_kind='kde')

        self.assertTrue(True)
Beispiel #10
0
    def test_pairplot(self):

        data = sns.load_dataset("iris")

        base = Data(
            x_train=data,
            x_test=None,
            split=True,
            target_field="species",
            report_name="test",
            test_split_percentage=0.5,
        )

        base.pairplot()

        self.assertTrue(True)
Beispiel #11
0
    def test_histogram_1(self):

        data = sns.load_dataset("iris")

        base = Data(
            x_train=data,
            x_test=None,
            split=True,
            target_field="species",
            report_name="test",
            test_split_percentage=0.5,
        )

        base.histogram("sepal_length")

        self.assertTrue(True)
Beispiel #12
0
    def test_correlation_plot(self):

        data = pd.DataFrame(np.random.rand(100, 10))

        base = Data(
            x_train=data,
            x_test=None,
            split=True,
            target_field="col3",
            report_name="test",
            test_split_percentage=0.5,
        )

        base.correlation_matrix(data_labels=True, hide_mirror=True)

        self.assertTrue(True)
Beispiel #13
0
    def test_barplot(self):

        data = sns.load_dataset("iris")

        base = Data(
            x_train=data,
            x_test=None,
            split=True,
            target_field="species",
            report_name="test",
            test_split_percentage=0.5,
        )

        base.barplot(
            x="species", y=["sepal_length", "sepal_width"], method="mean", orient="h", barmode="group",
        )
        base.barplot(
            x="species", y=["sepal_length", "sepal_width"], method="mean", orient="v", barmode="group",
        )

        self.assertTrue(True)